ipvs: netfilter connection tracking changes
[pandora-kernel.git] / net / netfilter / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39
40 #include <net/net_namespace.h>
41 #include <net/ip.h>
42 #ifdef CONFIG_IP_VS_IPV6
43 #include <net/ipv6.h>
44 #include <net/ip6_route.h>
45 #endif
46 #include <net/route.h>
47 #include <net/sock.h>
48 #include <net/genetlink.h>
49
50 #include <asm/uaccess.h>
51
52 #include <net/ip_vs.h>
53
54 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
55 static DEFINE_MUTEX(__ip_vs_mutex);
56
57 /* lock for service table */
58 static DEFINE_RWLOCK(__ip_vs_svc_lock);
59
60 /* lock for table with the real services */
61 static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63 /* lock for state and timeout tables */
64 static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65
66 /* lock for drop entry handling */
67 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69 /* lock for drop packet handling */
70 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72 /* 1/rate drop and drop-entry variables */
73 int ip_vs_drop_rate = 0;
74 int ip_vs_drop_counter = 0;
75 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77 /* number of virtual services */
78 static int ip_vs_num_services = 0;
79
80 /* sysctl variables */
81 static int sysctl_ip_vs_drop_entry = 0;
82 static int sysctl_ip_vs_drop_packet = 0;
83 static int sysctl_ip_vs_secure_tcp = 0;
84 static int sysctl_ip_vs_amemthresh = 1024;
85 static int sysctl_ip_vs_am_droprate = 10;
86 int sysctl_ip_vs_cache_bypass = 0;
87 int sysctl_ip_vs_expire_nodest_conn = 0;
88 int sysctl_ip_vs_expire_quiescent_template = 0;
89 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90 int sysctl_ip_vs_nat_icmp_send = 0;
91 #ifdef CONFIG_IP_VS_NFCT
92 int sysctl_ip_vs_conntrack;
93 #endif
94
95
96 #ifdef CONFIG_IP_VS_DEBUG
97 static int sysctl_ip_vs_debug_level = 0;
98
99 int ip_vs_get_debug_level(void)
100 {
101         return sysctl_ip_vs_debug_level;
102 }
103 #endif
104
105 #ifdef CONFIG_IP_VS_IPV6
106 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
107 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
108 {
109         struct rt6_info *rt;
110         struct flowi fl = {
111                 .oif = 0,
112                 .nl_u = {
113                         .ip6_u = {
114                                 .daddr = *addr,
115                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
116         };
117
118         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
119         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
120                         return 1;
121
122         return 0;
123 }
124 #endif
125 /*
126  *      update_defense_level is called from keventd and from sysctl,
127  *      so it needs to protect itself from softirqs
128  */
129 static void update_defense_level(void)
130 {
131         struct sysinfo i;
132         static int old_secure_tcp = 0;
133         int availmem;
134         int nomem;
135         int to_change = -1;
136
137         /* we only count free and buffered memory (in pages) */
138         si_meminfo(&i);
139         availmem = i.freeram + i.bufferram;
140         /* however in linux 2.5 the i.bufferram is total page cache size,
141            we need adjust it */
142         /* si_swapinfo(&i); */
143         /* availmem = availmem - (i.totalswap - i.freeswap); */
144
145         nomem = (availmem < sysctl_ip_vs_amemthresh);
146
147         local_bh_disable();
148
149         /* drop_entry */
150         spin_lock(&__ip_vs_dropentry_lock);
151         switch (sysctl_ip_vs_drop_entry) {
152         case 0:
153                 atomic_set(&ip_vs_dropentry, 0);
154                 break;
155         case 1:
156                 if (nomem) {
157                         atomic_set(&ip_vs_dropentry, 1);
158                         sysctl_ip_vs_drop_entry = 2;
159                 } else {
160                         atomic_set(&ip_vs_dropentry, 0);
161                 }
162                 break;
163         case 2:
164                 if (nomem) {
165                         atomic_set(&ip_vs_dropentry, 1);
166                 } else {
167                         atomic_set(&ip_vs_dropentry, 0);
168                         sysctl_ip_vs_drop_entry = 1;
169                 };
170                 break;
171         case 3:
172                 atomic_set(&ip_vs_dropentry, 1);
173                 break;
174         }
175         spin_unlock(&__ip_vs_dropentry_lock);
176
177         /* drop_packet */
178         spin_lock(&__ip_vs_droppacket_lock);
179         switch (sysctl_ip_vs_drop_packet) {
180         case 0:
181                 ip_vs_drop_rate = 0;
182                 break;
183         case 1:
184                 if (nomem) {
185                         ip_vs_drop_rate = ip_vs_drop_counter
186                                 = sysctl_ip_vs_amemthresh /
187                                 (sysctl_ip_vs_amemthresh-availmem);
188                         sysctl_ip_vs_drop_packet = 2;
189                 } else {
190                         ip_vs_drop_rate = 0;
191                 }
192                 break;
193         case 2:
194                 if (nomem) {
195                         ip_vs_drop_rate = ip_vs_drop_counter
196                                 = sysctl_ip_vs_amemthresh /
197                                 (sysctl_ip_vs_amemthresh-availmem);
198                 } else {
199                         ip_vs_drop_rate = 0;
200                         sysctl_ip_vs_drop_packet = 1;
201                 }
202                 break;
203         case 3:
204                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
205                 break;
206         }
207         spin_unlock(&__ip_vs_droppacket_lock);
208
209         /* secure_tcp */
210         spin_lock(&ip_vs_securetcp_lock);
211         switch (sysctl_ip_vs_secure_tcp) {
212         case 0:
213                 if (old_secure_tcp >= 2)
214                         to_change = 0;
215                 break;
216         case 1:
217                 if (nomem) {
218                         if (old_secure_tcp < 2)
219                                 to_change = 1;
220                         sysctl_ip_vs_secure_tcp = 2;
221                 } else {
222                         if (old_secure_tcp >= 2)
223                                 to_change = 0;
224                 }
225                 break;
226         case 2:
227                 if (nomem) {
228                         if (old_secure_tcp < 2)
229                                 to_change = 1;
230                 } else {
231                         if (old_secure_tcp >= 2)
232                                 to_change = 0;
233                         sysctl_ip_vs_secure_tcp = 1;
234                 }
235                 break;
236         case 3:
237                 if (old_secure_tcp < 2)
238                         to_change = 1;
239                 break;
240         }
241         old_secure_tcp = sysctl_ip_vs_secure_tcp;
242         if (to_change >= 0)
243                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
244         spin_unlock(&ip_vs_securetcp_lock);
245
246         local_bh_enable();
247 }
248
249
250 /*
251  *      Timer for checking the defense
252  */
253 #define DEFENSE_TIMER_PERIOD    1*HZ
254 static void defense_work_handler(struct work_struct *work);
255 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
256
257 static void defense_work_handler(struct work_struct *work)
258 {
259         update_defense_level();
260         if (atomic_read(&ip_vs_dropentry))
261                 ip_vs_random_dropentry();
262
263         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
264 }
265
266 int
267 ip_vs_use_count_inc(void)
268 {
269         return try_module_get(THIS_MODULE);
270 }
271
272 void
273 ip_vs_use_count_dec(void)
274 {
275         module_put(THIS_MODULE);
276 }
277
278
279 /*
280  *      Hash table: for virtual service lookups
281  */
282 #define IP_VS_SVC_TAB_BITS 8
283 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
284 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
285
286 /* the service table hashed by <protocol, addr, port> */
287 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
288 /* the service table hashed by fwmark */
289 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
290
291 /*
292  *      Hash table: for real service lookups
293  */
294 #define IP_VS_RTAB_BITS 4
295 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
296 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
297
298 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
299
300 /*
301  *      Trash for destinations
302  */
303 static LIST_HEAD(ip_vs_dest_trash);
304
305 /*
306  *      FTP & NULL virtual service counters
307  */
308 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
309 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
310
311
312 /*
313  *      Returns hash value for virtual service
314  */
315 static __inline__ unsigned
316 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
317                   __be16 port)
318 {
319         register unsigned porth = ntohs(port);
320         __be32 addr_fold = addr->ip;
321
322 #ifdef CONFIG_IP_VS_IPV6
323         if (af == AF_INET6)
324                 addr_fold = addr->ip6[0]^addr->ip6[1]^
325                             addr->ip6[2]^addr->ip6[3];
326 #endif
327
328         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
329                 & IP_VS_SVC_TAB_MASK;
330 }
331
332 /*
333  *      Returns hash value of fwmark for virtual service lookup
334  */
335 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
336 {
337         return fwmark & IP_VS_SVC_TAB_MASK;
338 }
339
340 /*
341  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
342  *      or in the ip_vs_svc_fwm_table by fwmark.
343  *      Should be called with locked tables.
344  */
345 static int ip_vs_svc_hash(struct ip_vs_service *svc)
346 {
347         unsigned hash;
348
349         if (svc->flags & IP_VS_SVC_F_HASHED) {
350                 pr_err("%s(): request for already hashed, called from %pF\n",
351                        __func__, __builtin_return_address(0));
352                 return 0;
353         }
354
355         if (svc->fwmark == 0) {
356                 /*
357                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
358                  */
359                 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
360                                          svc->port);
361                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
362         } else {
363                 /*
364                  *  Hash it by fwmark in ip_vs_svc_fwm_table
365                  */
366                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
367                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
368         }
369
370         svc->flags |= IP_VS_SVC_F_HASHED;
371         /* increase its refcnt because it is referenced by the svc table */
372         atomic_inc(&svc->refcnt);
373         return 1;
374 }
375
376
377 /*
378  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
379  *      Should be called with locked tables.
380  */
381 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
382 {
383         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
384                 pr_err("%s(): request for unhash flagged, called from %pF\n",
385                        __func__, __builtin_return_address(0));
386                 return 0;
387         }
388
389         if (svc->fwmark == 0) {
390                 /* Remove it from the ip_vs_svc_table table */
391                 list_del(&svc->s_list);
392         } else {
393                 /* Remove it from the ip_vs_svc_fwm_table table */
394                 list_del(&svc->f_list);
395         }
396
397         svc->flags &= ~IP_VS_SVC_F_HASHED;
398         atomic_dec(&svc->refcnt);
399         return 1;
400 }
401
402
403 /*
404  *      Get service by {proto,addr,port} in the service table.
405  */
406 static inline struct ip_vs_service *
407 __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
408                     __be16 vport)
409 {
410         unsigned hash;
411         struct ip_vs_service *svc;
412
413         /* Check for "full" addressed entries */
414         hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
415
416         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
417                 if ((svc->af == af)
418                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
419                     && (svc->port == vport)
420                     && (svc->protocol == protocol)) {
421                         /* HIT */
422                         atomic_inc(&svc->usecnt);
423                         return svc;
424                 }
425         }
426
427         return NULL;
428 }
429
430
431 /*
432  *      Get service by {fwmark} in the service table.
433  */
434 static inline struct ip_vs_service *
435 __ip_vs_svc_fwm_get(int af, __u32 fwmark)
436 {
437         unsigned hash;
438         struct ip_vs_service *svc;
439
440         /* Check for fwmark addressed entries */
441         hash = ip_vs_svc_fwm_hashkey(fwmark);
442
443         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
444                 if (svc->fwmark == fwmark && svc->af == af) {
445                         /* HIT */
446                         atomic_inc(&svc->usecnt);
447                         return svc;
448                 }
449         }
450
451         return NULL;
452 }
453
454 struct ip_vs_service *
455 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
456                   const union nf_inet_addr *vaddr, __be16 vport)
457 {
458         struct ip_vs_service *svc;
459
460         read_lock(&__ip_vs_svc_lock);
461
462         /*
463          *      Check the table hashed by fwmark first
464          */
465         if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
466                 goto out;
467
468         /*
469          *      Check the table hashed by <protocol,addr,port>
470          *      for "full" addressed entries
471          */
472         svc = __ip_vs_service_get(af, protocol, vaddr, vport);
473
474         if (svc == NULL
475             && protocol == IPPROTO_TCP
476             && atomic_read(&ip_vs_ftpsvc_counter)
477             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
478                 /*
479                  * Check if ftp service entry exists, the packet
480                  * might belong to FTP data connections.
481                  */
482                 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
483         }
484
485         if (svc == NULL
486             && atomic_read(&ip_vs_nullsvc_counter)) {
487                 /*
488                  * Check if the catch-all port (port zero) exists
489                  */
490                 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
491         }
492
493   out:
494         read_unlock(&__ip_vs_svc_lock);
495
496         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
497                       fwmark, ip_vs_proto_name(protocol),
498                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
499                       svc ? "hit" : "not hit");
500
501         return svc;
502 }
503
504
505 static inline void
506 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
507 {
508         atomic_inc(&svc->refcnt);
509         dest->svc = svc;
510 }
511
512 static inline void
513 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
514 {
515         struct ip_vs_service *svc = dest->svc;
516
517         dest->svc = NULL;
518         if (atomic_dec_and_test(&svc->refcnt))
519                 kfree(svc);
520 }
521
522
523 /*
524  *      Returns hash value for real service
525  */
526 static inline unsigned ip_vs_rs_hashkey(int af,
527                                             const union nf_inet_addr *addr,
528                                             __be16 port)
529 {
530         register unsigned porth = ntohs(port);
531         __be32 addr_fold = addr->ip;
532
533 #ifdef CONFIG_IP_VS_IPV6
534         if (af == AF_INET6)
535                 addr_fold = addr->ip6[0]^addr->ip6[1]^
536                             addr->ip6[2]^addr->ip6[3];
537 #endif
538
539         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
540                 & IP_VS_RTAB_MASK;
541 }
542
543 /*
544  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
545  *      should be called with locked tables.
546  */
547 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
548 {
549         unsigned hash;
550
551         if (!list_empty(&dest->d_list)) {
552                 return 0;
553         }
554
555         /*
556          *      Hash by proto,addr,port,
557          *      which are the parameters of the real service.
558          */
559         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
560
561         list_add(&dest->d_list, &ip_vs_rtable[hash]);
562
563         return 1;
564 }
565
566 /*
567  *      UNhashes ip_vs_dest from ip_vs_rtable.
568  *      should be called with locked tables.
569  */
570 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
571 {
572         /*
573          * Remove it from the ip_vs_rtable table.
574          */
575         if (!list_empty(&dest->d_list)) {
576                 list_del(&dest->d_list);
577                 INIT_LIST_HEAD(&dest->d_list);
578         }
579
580         return 1;
581 }
582
583 /*
584  *      Lookup real service by <proto,addr,port> in the real service table.
585  */
586 struct ip_vs_dest *
587 ip_vs_lookup_real_service(int af, __u16 protocol,
588                           const union nf_inet_addr *daddr,
589                           __be16 dport)
590 {
591         unsigned hash;
592         struct ip_vs_dest *dest;
593
594         /*
595          *      Check for "full" addressed entries
596          *      Return the first found entry
597          */
598         hash = ip_vs_rs_hashkey(af, daddr, dport);
599
600         read_lock(&__ip_vs_rs_lock);
601         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
602                 if ((dest->af == af)
603                     && ip_vs_addr_equal(af, &dest->addr, daddr)
604                     && (dest->port == dport)
605                     && ((dest->protocol == protocol) ||
606                         dest->vfwmark)) {
607                         /* HIT */
608                         read_unlock(&__ip_vs_rs_lock);
609                         return dest;
610                 }
611         }
612         read_unlock(&__ip_vs_rs_lock);
613
614         return NULL;
615 }
616
617 /*
618  *      Lookup destination by {addr,port} in the given service
619  */
620 static struct ip_vs_dest *
621 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
622                   __be16 dport)
623 {
624         struct ip_vs_dest *dest;
625
626         /*
627          * Find the destination for the given service
628          */
629         list_for_each_entry(dest, &svc->destinations, n_list) {
630                 if ((dest->af == svc->af)
631                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
632                     && (dest->port == dport)) {
633                         /* HIT */
634                         return dest;
635                 }
636         }
637
638         return NULL;
639 }
640
641 /*
642  * Find destination by {daddr,dport,vaddr,protocol}
643  * Cretaed to be used in ip_vs_process_message() in
644  * the backup synchronization daemon. It finds the
645  * destination to be bound to the received connection
646  * on the backup.
647  *
648  * ip_vs_lookup_real_service() looked promissing, but
649  * seems not working as expected.
650  */
651 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
652                                    __be16 dport,
653                                    const union nf_inet_addr *vaddr,
654                                    __be16 vport, __u16 protocol)
655 {
656         struct ip_vs_dest *dest;
657         struct ip_vs_service *svc;
658
659         svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
660         if (!svc)
661                 return NULL;
662         dest = ip_vs_lookup_dest(svc, daddr, dport);
663         if (dest)
664                 atomic_inc(&dest->refcnt);
665         ip_vs_service_put(svc);
666         return dest;
667 }
668
669 /*
670  *  Lookup dest by {svc,addr,port} in the destination trash.
671  *  The destination trash is used to hold the destinations that are removed
672  *  from the service table but are still referenced by some conn entries.
673  *  The reason to add the destination trash is when the dest is temporary
674  *  down (either by administrator or by monitor program), the dest can be
675  *  picked back from the trash, the remaining connections to the dest can
676  *  continue, and the counting information of the dest is also useful for
677  *  scheduling.
678  */
679 static struct ip_vs_dest *
680 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
681                      __be16 dport)
682 {
683         struct ip_vs_dest *dest, *nxt;
684
685         /*
686          * Find the destination in trash
687          */
688         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
689                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
690                               "dest->refcnt=%d\n",
691                               dest->vfwmark,
692                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
693                               ntohs(dest->port),
694                               atomic_read(&dest->refcnt));
695                 if (dest->af == svc->af &&
696                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
697                     dest->port == dport &&
698                     dest->vfwmark == svc->fwmark &&
699                     dest->protocol == svc->protocol &&
700                     (svc->fwmark ||
701                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
702                       dest->vport == svc->port))) {
703                         /* HIT */
704                         return dest;
705                 }
706
707                 /*
708                  * Try to purge the destination from trash if not referenced
709                  */
710                 if (atomic_read(&dest->refcnt) == 1) {
711                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
712                                       "from trash\n",
713                                       dest->vfwmark,
714                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
715                                       ntohs(dest->port));
716                         list_del(&dest->n_list);
717                         ip_vs_dst_reset(dest);
718                         __ip_vs_unbind_svc(dest);
719                         kfree(dest);
720                 }
721         }
722
723         return NULL;
724 }
725
726
727 /*
728  *  Clean up all the destinations in the trash
729  *  Called by the ip_vs_control_cleanup()
730  *
731  *  When the ip_vs_control_clearup is activated by ipvs module exit,
732  *  the service tables must have been flushed and all the connections
733  *  are expired, and the refcnt of each destination in the trash must
734  *  be 1, so we simply release them here.
735  */
736 static void ip_vs_trash_cleanup(void)
737 {
738         struct ip_vs_dest *dest, *nxt;
739
740         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
741                 list_del(&dest->n_list);
742                 ip_vs_dst_reset(dest);
743                 __ip_vs_unbind_svc(dest);
744                 kfree(dest);
745         }
746 }
747
748
749 static void
750 ip_vs_zero_stats(struct ip_vs_stats *stats)
751 {
752         spin_lock_bh(&stats->lock);
753
754         memset(&stats->ustats, 0, sizeof(stats->ustats));
755         ip_vs_zero_estimator(stats);
756
757         spin_unlock_bh(&stats->lock);
758 }
759
760 /*
761  *      Update a destination in the given service
762  */
763 static void
764 __ip_vs_update_dest(struct ip_vs_service *svc,
765                     struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
766 {
767         int conn_flags;
768
769         /* set the weight and the flags */
770         atomic_set(&dest->weight, udest->weight);
771         conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
772         conn_flags |= IP_VS_CONN_F_INACTIVE;
773
774         /* check if local node and update the flags */
775 #ifdef CONFIG_IP_VS_IPV6
776         if (svc->af == AF_INET6) {
777                 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
778                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
779                                 | IP_VS_CONN_F_LOCALNODE;
780                 }
781         } else
782 #endif
783                 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
784                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
785                                 | IP_VS_CONN_F_LOCALNODE;
786                 }
787
788         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
789         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
790                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
791         } else {
792                 /*
793                  *    Put the real service in ip_vs_rtable if not present.
794                  *    For now only for NAT!
795                  */
796                 write_lock_bh(&__ip_vs_rs_lock);
797                 ip_vs_rs_hash(dest);
798                 write_unlock_bh(&__ip_vs_rs_lock);
799         }
800         atomic_set(&dest->conn_flags, conn_flags);
801
802         /* bind the service */
803         if (!dest->svc) {
804                 __ip_vs_bind_svc(dest, svc);
805         } else {
806                 if (dest->svc != svc) {
807                         __ip_vs_unbind_svc(dest);
808                         ip_vs_zero_stats(&dest->stats);
809                         __ip_vs_bind_svc(dest, svc);
810                 }
811         }
812
813         /* set the dest status flags */
814         dest->flags |= IP_VS_DEST_F_AVAILABLE;
815
816         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
817                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
818         dest->u_threshold = udest->u_threshold;
819         dest->l_threshold = udest->l_threshold;
820 }
821
822
823 /*
824  *      Create a destination for the given service
825  */
826 static int
827 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
828                struct ip_vs_dest **dest_p)
829 {
830         struct ip_vs_dest *dest;
831         unsigned atype;
832
833         EnterFunction(2);
834
835 #ifdef CONFIG_IP_VS_IPV6
836         if (svc->af == AF_INET6) {
837                 atype = ipv6_addr_type(&udest->addr.in6);
838                 if ((!(atype & IPV6_ADDR_UNICAST) ||
839                         atype & IPV6_ADDR_LINKLOCAL) &&
840                         !__ip_vs_addr_is_local_v6(&udest->addr.in6))
841                         return -EINVAL;
842         } else
843 #endif
844         {
845                 atype = inet_addr_type(&init_net, udest->addr.ip);
846                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
847                         return -EINVAL;
848         }
849
850         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
851         if (dest == NULL) {
852                 pr_err("%s(): no memory.\n", __func__);
853                 return -ENOMEM;
854         }
855
856         dest->af = svc->af;
857         dest->protocol = svc->protocol;
858         dest->vaddr = svc->addr;
859         dest->vport = svc->port;
860         dest->vfwmark = svc->fwmark;
861         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
862         dest->port = udest->port;
863
864         atomic_set(&dest->activeconns, 0);
865         atomic_set(&dest->inactconns, 0);
866         atomic_set(&dest->persistconns, 0);
867         atomic_set(&dest->refcnt, 0);
868
869         INIT_LIST_HEAD(&dest->d_list);
870         spin_lock_init(&dest->dst_lock);
871         spin_lock_init(&dest->stats.lock);
872         __ip_vs_update_dest(svc, dest, udest);
873         ip_vs_new_estimator(&dest->stats);
874
875         *dest_p = dest;
876
877         LeaveFunction(2);
878         return 0;
879 }
880
881
882 /*
883  *      Add a destination into an existing service
884  */
885 static int
886 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
887 {
888         struct ip_vs_dest *dest;
889         union nf_inet_addr daddr;
890         __be16 dport = udest->port;
891         int ret;
892
893         EnterFunction(2);
894
895         if (udest->weight < 0) {
896                 pr_err("%s(): server weight less than zero\n", __func__);
897                 return -ERANGE;
898         }
899
900         if (udest->l_threshold > udest->u_threshold) {
901                 pr_err("%s(): lower threshold is higher than upper threshold\n",
902                         __func__);
903                 return -ERANGE;
904         }
905
906         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
907
908         /*
909          * Check if the dest already exists in the list
910          */
911         dest = ip_vs_lookup_dest(svc, &daddr, dport);
912
913         if (dest != NULL) {
914                 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
915                 return -EEXIST;
916         }
917
918         /*
919          * Check if the dest already exists in the trash and
920          * is from the same service
921          */
922         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
923
924         if (dest != NULL) {
925                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
926                               "dest->refcnt=%d, service %u/%s:%u\n",
927                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
928                               atomic_read(&dest->refcnt),
929                               dest->vfwmark,
930                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
931                               ntohs(dest->vport));
932
933                 __ip_vs_update_dest(svc, dest, udest);
934
935                 /*
936                  * Get the destination from the trash
937                  */
938                 list_del(&dest->n_list);
939
940                 ip_vs_new_estimator(&dest->stats);
941
942                 write_lock_bh(&__ip_vs_svc_lock);
943
944                 /*
945                  * Wait until all other svc users go away.
946                  */
947                 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
948
949                 list_add(&dest->n_list, &svc->destinations);
950                 svc->num_dests++;
951
952                 /* call the update_service function of its scheduler */
953                 if (svc->scheduler->update_service)
954                         svc->scheduler->update_service(svc);
955
956                 write_unlock_bh(&__ip_vs_svc_lock);
957                 return 0;
958         }
959
960         /*
961          * Allocate and initialize the dest structure
962          */
963         ret = ip_vs_new_dest(svc, udest, &dest);
964         if (ret) {
965                 return ret;
966         }
967
968         /*
969          * Add the dest entry into the list
970          */
971         atomic_inc(&dest->refcnt);
972
973         write_lock_bh(&__ip_vs_svc_lock);
974
975         /*
976          * Wait until all other svc users go away.
977          */
978         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
979
980         list_add(&dest->n_list, &svc->destinations);
981         svc->num_dests++;
982
983         /* call the update_service function of its scheduler */
984         if (svc->scheduler->update_service)
985                 svc->scheduler->update_service(svc);
986
987         write_unlock_bh(&__ip_vs_svc_lock);
988
989         LeaveFunction(2);
990
991         return 0;
992 }
993
994
995 /*
996  *      Edit a destination in the given service
997  */
998 static int
999 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1000 {
1001         struct ip_vs_dest *dest;
1002         union nf_inet_addr daddr;
1003         __be16 dport = udest->port;
1004
1005         EnterFunction(2);
1006
1007         if (udest->weight < 0) {
1008                 pr_err("%s(): server weight less than zero\n", __func__);
1009                 return -ERANGE;
1010         }
1011
1012         if (udest->l_threshold > udest->u_threshold) {
1013                 pr_err("%s(): lower threshold is higher than upper threshold\n",
1014                         __func__);
1015                 return -ERANGE;
1016         }
1017
1018         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1019
1020         /*
1021          *  Lookup the destination list
1022          */
1023         dest = ip_vs_lookup_dest(svc, &daddr, dport);
1024
1025         if (dest == NULL) {
1026                 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1027                 return -ENOENT;
1028         }
1029
1030         __ip_vs_update_dest(svc, dest, udest);
1031
1032         write_lock_bh(&__ip_vs_svc_lock);
1033
1034         /* Wait until all other svc users go away */
1035         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1036
1037         /* call the update_service, because server weight may be changed */
1038         if (svc->scheduler->update_service)
1039                 svc->scheduler->update_service(svc);
1040
1041         write_unlock_bh(&__ip_vs_svc_lock);
1042
1043         LeaveFunction(2);
1044
1045         return 0;
1046 }
1047
1048
1049 /*
1050  *      Delete a destination (must be already unlinked from the service)
1051  */
1052 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1053 {
1054         ip_vs_kill_estimator(&dest->stats);
1055
1056         /*
1057          *  Remove it from the d-linked list with the real services.
1058          */
1059         write_lock_bh(&__ip_vs_rs_lock);
1060         ip_vs_rs_unhash(dest);
1061         write_unlock_bh(&__ip_vs_rs_lock);
1062
1063         /*
1064          *  Decrease the refcnt of the dest, and free the dest
1065          *  if nobody refers to it (refcnt=0). Otherwise, throw
1066          *  the destination into the trash.
1067          */
1068         if (atomic_dec_and_test(&dest->refcnt)) {
1069                 ip_vs_dst_reset(dest);
1070                 /* simply decrease svc->refcnt here, let the caller check
1071                    and release the service if nobody refers to it.
1072                    Only user context can release destination and service,
1073                    and only one user context can update virtual service at a
1074                    time, so the operation here is OK */
1075                 atomic_dec(&dest->svc->refcnt);
1076                 kfree(dest);
1077         } else {
1078                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1079                               "dest->refcnt=%d\n",
1080                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1081                               ntohs(dest->port),
1082                               atomic_read(&dest->refcnt));
1083                 list_add(&dest->n_list, &ip_vs_dest_trash);
1084                 atomic_inc(&dest->refcnt);
1085         }
1086 }
1087
1088
1089 /*
1090  *      Unlink a destination from the given service
1091  */
1092 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1093                                 struct ip_vs_dest *dest,
1094                                 int svcupd)
1095 {
1096         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1097
1098         /*
1099          *  Remove it from the d-linked destination list.
1100          */
1101         list_del(&dest->n_list);
1102         svc->num_dests--;
1103
1104         /*
1105          *  Call the update_service function of its scheduler
1106          */
1107         if (svcupd && svc->scheduler->update_service)
1108                         svc->scheduler->update_service(svc);
1109 }
1110
1111
1112 /*
1113  *      Delete a destination server in the given service
1114  */
1115 static int
1116 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1117 {
1118         struct ip_vs_dest *dest;
1119         __be16 dport = udest->port;
1120
1121         EnterFunction(2);
1122
1123         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1124
1125         if (dest == NULL) {
1126                 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1127                 return -ENOENT;
1128         }
1129
1130         write_lock_bh(&__ip_vs_svc_lock);
1131
1132         /*
1133          *      Wait until all other svc users go away.
1134          */
1135         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1136
1137         /*
1138          *      Unlink dest from the service
1139          */
1140         __ip_vs_unlink_dest(svc, dest, 1);
1141
1142         write_unlock_bh(&__ip_vs_svc_lock);
1143
1144         /*
1145          *      Delete the destination
1146          */
1147         __ip_vs_del_dest(dest);
1148
1149         LeaveFunction(2);
1150
1151         return 0;
1152 }
1153
1154
1155 /*
1156  *      Add a service into the service hash table
1157  */
1158 static int
1159 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1160                   struct ip_vs_service **svc_p)
1161 {
1162         int ret = 0;
1163         struct ip_vs_scheduler *sched = NULL;
1164         struct ip_vs_service *svc = NULL;
1165
1166         /* increase the module use count */
1167         ip_vs_use_count_inc();
1168
1169         /* Lookup the scheduler by 'u->sched_name' */
1170         sched = ip_vs_scheduler_get(u->sched_name);
1171         if (sched == NULL) {
1172                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1173                 ret = -ENOENT;
1174                 goto out_mod_dec;
1175         }
1176
1177 #ifdef CONFIG_IP_VS_IPV6
1178         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1179                 ret = -EINVAL;
1180                 goto out_err;
1181         }
1182 #endif
1183
1184         svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1185         if (svc == NULL) {
1186                 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1187                 ret = -ENOMEM;
1188                 goto out_err;
1189         }
1190
1191         /* I'm the first user of the service */
1192         atomic_set(&svc->usecnt, 1);
1193         atomic_set(&svc->refcnt, 0);
1194
1195         svc->af = u->af;
1196         svc->protocol = u->protocol;
1197         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1198         svc->port = u->port;
1199         svc->fwmark = u->fwmark;
1200         svc->flags = u->flags;
1201         svc->timeout = u->timeout * HZ;
1202         svc->netmask = u->netmask;
1203
1204         INIT_LIST_HEAD(&svc->destinations);
1205         rwlock_init(&svc->sched_lock);
1206         spin_lock_init(&svc->stats.lock);
1207
1208         /* Bind the scheduler */
1209         ret = ip_vs_bind_scheduler(svc, sched);
1210         if (ret)
1211                 goto out_err;
1212         sched = NULL;
1213
1214         /* Update the virtual service counters */
1215         if (svc->port == FTPPORT)
1216                 atomic_inc(&ip_vs_ftpsvc_counter);
1217         else if (svc->port == 0)
1218                 atomic_inc(&ip_vs_nullsvc_counter);
1219
1220         ip_vs_new_estimator(&svc->stats);
1221
1222         /* Count only IPv4 services for old get/setsockopt interface */
1223         if (svc->af == AF_INET)
1224                 ip_vs_num_services++;
1225
1226         /* Hash the service into the service table */
1227         write_lock_bh(&__ip_vs_svc_lock);
1228         ip_vs_svc_hash(svc);
1229         write_unlock_bh(&__ip_vs_svc_lock);
1230
1231         *svc_p = svc;
1232         return 0;
1233
1234   out_err:
1235         if (svc != NULL) {
1236                 if (svc->scheduler)
1237                         ip_vs_unbind_scheduler(svc);
1238                 if (svc->inc) {
1239                         local_bh_disable();
1240                         ip_vs_app_inc_put(svc->inc);
1241                         local_bh_enable();
1242                 }
1243                 kfree(svc);
1244         }
1245         ip_vs_scheduler_put(sched);
1246
1247   out_mod_dec:
1248         /* decrease the module use count */
1249         ip_vs_use_count_dec();
1250
1251         return ret;
1252 }
1253
1254
1255 /*
1256  *      Edit a service and bind it with a new scheduler
1257  */
1258 static int
1259 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1260 {
1261         struct ip_vs_scheduler *sched, *old_sched;
1262         int ret = 0;
1263
1264         /*
1265          * Lookup the scheduler, by 'u->sched_name'
1266          */
1267         sched = ip_vs_scheduler_get(u->sched_name);
1268         if (sched == NULL) {
1269                 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1270                 return -ENOENT;
1271         }
1272         old_sched = sched;
1273
1274 #ifdef CONFIG_IP_VS_IPV6
1275         if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1276                 ret = -EINVAL;
1277                 goto out;
1278         }
1279 #endif
1280
1281         write_lock_bh(&__ip_vs_svc_lock);
1282
1283         /*
1284          * Wait until all other svc users go away.
1285          */
1286         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1287
1288         /*
1289          * Set the flags and timeout value
1290          */
1291         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1292         svc->timeout = u->timeout * HZ;
1293         svc->netmask = u->netmask;
1294
1295         old_sched = svc->scheduler;
1296         if (sched != old_sched) {
1297                 /*
1298                  * Unbind the old scheduler
1299                  */
1300                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1301                         old_sched = sched;
1302                         goto out_unlock;
1303                 }
1304
1305                 /*
1306                  * Bind the new scheduler
1307                  */
1308                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1309                         /*
1310                          * If ip_vs_bind_scheduler fails, restore the old
1311                          * scheduler.
1312                          * The main reason of failure is out of memory.
1313                          *
1314                          * The question is if the old scheduler can be
1315                          * restored all the time. TODO: if it cannot be
1316                          * restored some time, we must delete the service,
1317                          * otherwise the system may crash.
1318                          */
1319                         ip_vs_bind_scheduler(svc, old_sched);
1320                         old_sched = sched;
1321                         goto out_unlock;
1322                 }
1323         }
1324
1325   out_unlock:
1326         write_unlock_bh(&__ip_vs_svc_lock);
1327 #ifdef CONFIG_IP_VS_IPV6
1328   out:
1329 #endif
1330
1331         if (old_sched)
1332                 ip_vs_scheduler_put(old_sched);
1333
1334         return ret;
1335 }
1336
1337
1338 /*
1339  *      Delete a service from the service list
1340  *      - The service must be unlinked, unlocked and not referenced!
1341  *      - We are called under _bh lock
1342  */
1343 static void __ip_vs_del_service(struct ip_vs_service *svc)
1344 {
1345         struct ip_vs_dest *dest, *nxt;
1346         struct ip_vs_scheduler *old_sched;
1347
1348         /* Count only IPv4 services for old get/setsockopt interface */
1349         if (svc->af == AF_INET)
1350                 ip_vs_num_services--;
1351
1352         ip_vs_kill_estimator(&svc->stats);
1353
1354         /* Unbind scheduler */
1355         old_sched = svc->scheduler;
1356         ip_vs_unbind_scheduler(svc);
1357         if (old_sched)
1358                 ip_vs_scheduler_put(old_sched);
1359
1360         /* Unbind app inc */
1361         if (svc->inc) {
1362                 ip_vs_app_inc_put(svc->inc);
1363                 svc->inc = NULL;
1364         }
1365
1366         /*
1367          *    Unlink the whole destination list
1368          */
1369         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1370                 __ip_vs_unlink_dest(svc, dest, 0);
1371                 __ip_vs_del_dest(dest);
1372         }
1373
1374         /*
1375          *    Update the virtual service counters
1376          */
1377         if (svc->port == FTPPORT)
1378                 atomic_dec(&ip_vs_ftpsvc_counter);
1379         else if (svc->port == 0)
1380                 atomic_dec(&ip_vs_nullsvc_counter);
1381
1382         /*
1383          *    Free the service if nobody refers to it
1384          */
1385         if (atomic_read(&svc->refcnt) == 0)
1386                 kfree(svc);
1387
1388         /* decrease the module use count */
1389         ip_vs_use_count_dec();
1390 }
1391
1392 /*
1393  *      Delete a service from the service list
1394  */
1395 static int ip_vs_del_service(struct ip_vs_service *svc)
1396 {
1397         if (svc == NULL)
1398                 return -EEXIST;
1399
1400         /*
1401          * Unhash it from the service table
1402          */
1403         write_lock_bh(&__ip_vs_svc_lock);
1404
1405         ip_vs_svc_unhash(svc);
1406
1407         /*
1408          * Wait until all the svc users go away.
1409          */
1410         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1411
1412         __ip_vs_del_service(svc);
1413
1414         write_unlock_bh(&__ip_vs_svc_lock);
1415
1416         return 0;
1417 }
1418
1419
1420 /*
1421  *      Flush all the virtual services
1422  */
1423 static int ip_vs_flush(void)
1424 {
1425         int idx;
1426         struct ip_vs_service *svc, *nxt;
1427
1428         /*
1429          * Flush the service table hashed by <protocol,addr,port>
1430          */
1431         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1432                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1433                         write_lock_bh(&__ip_vs_svc_lock);
1434                         ip_vs_svc_unhash(svc);
1435                         /*
1436                          * Wait until all the svc users go away.
1437                          */
1438                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1439                         __ip_vs_del_service(svc);
1440                         write_unlock_bh(&__ip_vs_svc_lock);
1441                 }
1442         }
1443
1444         /*
1445          * Flush the service table hashed by fwmark
1446          */
1447         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1448                 list_for_each_entry_safe(svc, nxt,
1449                                          &ip_vs_svc_fwm_table[idx], f_list) {
1450                         write_lock_bh(&__ip_vs_svc_lock);
1451                         ip_vs_svc_unhash(svc);
1452                         /*
1453                          * Wait until all the svc users go away.
1454                          */
1455                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1456                         __ip_vs_del_service(svc);
1457                         write_unlock_bh(&__ip_vs_svc_lock);
1458                 }
1459         }
1460
1461         return 0;
1462 }
1463
1464
1465 /*
1466  *      Zero counters in a service or all services
1467  */
1468 static int ip_vs_zero_service(struct ip_vs_service *svc)
1469 {
1470         struct ip_vs_dest *dest;
1471
1472         write_lock_bh(&__ip_vs_svc_lock);
1473         list_for_each_entry(dest, &svc->destinations, n_list) {
1474                 ip_vs_zero_stats(&dest->stats);
1475         }
1476         ip_vs_zero_stats(&svc->stats);
1477         write_unlock_bh(&__ip_vs_svc_lock);
1478         return 0;
1479 }
1480
1481 static int ip_vs_zero_all(void)
1482 {
1483         int idx;
1484         struct ip_vs_service *svc;
1485
1486         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1487                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1488                         ip_vs_zero_service(svc);
1489                 }
1490         }
1491
1492         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1493                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1494                         ip_vs_zero_service(svc);
1495                 }
1496         }
1497
1498         ip_vs_zero_stats(&ip_vs_stats);
1499         return 0;
1500 }
1501
1502
1503 static int
1504 proc_do_defense_mode(ctl_table *table, int write,
1505                      void __user *buffer, size_t *lenp, loff_t *ppos)
1506 {
1507         int *valp = table->data;
1508         int val = *valp;
1509         int rc;
1510
1511         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1512         if (write && (*valp != val)) {
1513                 if ((*valp < 0) || (*valp > 3)) {
1514                         /* Restore the correct value */
1515                         *valp = val;
1516                 } else {
1517                         update_defense_level();
1518                 }
1519         }
1520         return rc;
1521 }
1522
1523
1524 static int
1525 proc_do_sync_threshold(ctl_table *table, int write,
1526                        void __user *buffer, size_t *lenp, loff_t *ppos)
1527 {
1528         int *valp = table->data;
1529         int val[2];
1530         int rc;
1531
1532         /* backup the value first */
1533         memcpy(val, valp, sizeof(val));
1534
1535         rc = proc_dointvec(table, write, buffer, lenp, ppos);
1536         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1537                 /* Restore the correct value */
1538                 memcpy(valp, val, sizeof(val));
1539         }
1540         return rc;
1541 }
1542
1543
1544 /*
1545  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1546  */
1547
1548 static struct ctl_table vs_vars[] = {
1549         {
1550                 .procname       = "amemthresh",
1551                 .data           = &sysctl_ip_vs_amemthresh,
1552                 .maxlen         = sizeof(int),
1553                 .mode           = 0644,
1554                 .proc_handler   = proc_dointvec,
1555         },
1556 #ifdef CONFIG_IP_VS_DEBUG
1557         {
1558                 .procname       = "debug_level",
1559                 .data           = &sysctl_ip_vs_debug_level,
1560                 .maxlen         = sizeof(int),
1561                 .mode           = 0644,
1562                 .proc_handler   = proc_dointvec,
1563         },
1564 #endif
1565         {
1566                 .procname       = "am_droprate",
1567                 .data           = &sysctl_ip_vs_am_droprate,
1568                 .maxlen         = sizeof(int),
1569                 .mode           = 0644,
1570                 .proc_handler   = proc_dointvec,
1571         },
1572         {
1573                 .procname       = "drop_entry",
1574                 .data           = &sysctl_ip_vs_drop_entry,
1575                 .maxlen         = sizeof(int),
1576                 .mode           = 0644,
1577                 .proc_handler   = proc_do_defense_mode,
1578         },
1579         {
1580                 .procname       = "drop_packet",
1581                 .data           = &sysctl_ip_vs_drop_packet,
1582                 .maxlen         = sizeof(int),
1583                 .mode           = 0644,
1584                 .proc_handler   = proc_do_defense_mode,
1585         },
1586 #ifdef CONFIG_IP_VS_NFCT
1587         {
1588                 .procname       = "conntrack",
1589                 .data           = &sysctl_ip_vs_conntrack,
1590                 .maxlen         = sizeof(int),
1591                 .mode           = 0644,
1592                 .proc_handler   = &proc_dointvec,
1593         },
1594 #endif
1595         {
1596                 .procname       = "secure_tcp",
1597                 .data           = &sysctl_ip_vs_secure_tcp,
1598                 .maxlen         = sizeof(int),
1599                 .mode           = 0644,
1600                 .proc_handler   = proc_do_defense_mode,
1601         },
1602 #if 0
1603         {
1604                 .procname       = "timeout_established",
1605                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1606                 .maxlen         = sizeof(int),
1607                 .mode           = 0644,
1608                 .proc_handler   = proc_dointvec_jiffies,
1609         },
1610         {
1611                 .procname       = "timeout_synsent",
1612                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1613                 .maxlen         = sizeof(int),
1614                 .mode           = 0644,
1615                 .proc_handler   = proc_dointvec_jiffies,
1616         },
1617         {
1618                 .procname       = "timeout_synrecv",
1619                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1620                 .maxlen         = sizeof(int),
1621                 .mode           = 0644,
1622                 .proc_handler   = proc_dointvec_jiffies,
1623         },
1624         {
1625                 .procname       = "timeout_finwait",
1626                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1627                 .maxlen         = sizeof(int),
1628                 .mode           = 0644,
1629                 .proc_handler   = proc_dointvec_jiffies,
1630         },
1631         {
1632                 .procname       = "timeout_timewait",
1633                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1634                 .maxlen         = sizeof(int),
1635                 .mode           = 0644,
1636                 .proc_handler   = proc_dointvec_jiffies,
1637         },
1638         {
1639                 .procname       = "timeout_close",
1640                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1641                 .maxlen         = sizeof(int),
1642                 .mode           = 0644,
1643                 .proc_handler   = proc_dointvec_jiffies,
1644         },
1645         {
1646                 .procname       = "timeout_closewait",
1647                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1648                 .maxlen         = sizeof(int),
1649                 .mode           = 0644,
1650                 .proc_handler   = proc_dointvec_jiffies,
1651         },
1652         {
1653                 .procname       = "timeout_lastack",
1654                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1655                 .maxlen         = sizeof(int),
1656                 .mode           = 0644,
1657                 .proc_handler   = proc_dointvec_jiffies,
1658         },
1659         {
1660                 .procname       = "timeout_listen",
1661                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1662                 .maxlen         = sizeof(int),
1663                 .mode           = 0644,
1664                 .proc_handler   = proc_dointvec_jiffies,
1665         },
1666         {
1667                 .procname       = "timeout_synack",
1668                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1669                 .maxlen         = sizeof(int),
1670                 .mode           = 0644,
1671                 .proc_handler   = proc_dointvec_jiffies,
1672         },
1673         {
1674                 .procname       = "timeout_udp",
1675                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1676                 .maxlen         = sizeof(int),
1677                 .mode           = 0644,
1678                 .proc_handler   = proc_dointvec_jiffies,
1679         },
1680         {
1681                 .procname       = "timeout_icmp",
1682                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1683                 .maxlen         = sizeof(int),
1684                 .mode           = 0644,
1685                 .proc_handler   = proc_dointvec_jiffies,
1686         },
1687 #endif
1688         {
1689                 .procname       = "cache_bypass",
1690                 .data           = &sysctl_ip_vs_cache_bypass,
1691                 .maxlen         = sizeof(int),
1692                 .mode           = 0644,
1693                 .proc_handler   = proc_dointvec,
1694         },
1695         {
1696                 .procname       = "expire_nodest_conn",
1697                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1698                 .maxlen         = sizeof(int),
1699                 .mode           = 0644,
1700                 .proc_handler   = proc_dointvec,
1701         },
1702         {
1703                 .procname       = "expire_quiescent_template",
1704                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1705                 .maxlen         = sizeof(int),
1706                 .mode           = 0644,
1707                 .proc_handler   = proc_dointvec,
1708         },
1709         {
1710                 .procname       = "sync_threshold",
1711                 .data           = &sysctl_ip_vs_sync_threshold,
1712                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1713                 .mode           = 0644,
1714                 .proc_handler   = proc_do_sync_threshold,
1715         },
1716         {
1717                 .procname       = "nat_icmp_send",
1718                 .data           = &sysctl_ip_vs_nat_icmp_send,
1719                 .maxlen         = sizeof(int),
1720                 .mode           = 0644,
1721                 .proc_handler   = proc_dointvec,
1722         },
1723         { }
1724 };
1725
1726 const struct ctl_path net_vs_ctl_path[] = {
1727         { .procname = "net", },
1728         { .procname = "ipv4", },
1729         { .procname = "vs", },
1730         { }
1731 };
1732 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1733
1734 static struct ctl_table_header * sysctl_header;
1735
1736 #ifdef CONFIG_PROC_FS
1737
1738 struct ip_vs_iter {
1739         struct list_head *table;
1740         int bucket;
1741 };
1742
1743 /*
1744  *      Write the contents of the VS rule table to a PROCfs file.
1745  *      (It is kept just for backward compatibility)
1746  */
1747 static inline const char *ip_vs_fwd_name(unsigned flags)
1748 {
1749         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1750         case IP_VS_CONN_F_LOCALNODE:
1751                 return "Local";
1752         case IP_VS_CONN_F_TUNNEL:
1753                 return "Tunnel";
1754         case IP_VS_CONN_F_DROUTE:
1755                 return "Route";
1756         default:
1757                 return "Masq";
1758         }
1759 }
1760
1761
1762 /* Get the Nth entry in the two lists */
1763 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1764 {
1765         struct ip_vs_iter *iter = seq->private;
1766         int idx;
1767         struct ip_vs_service *svc;
1768
1769         /* look in hash by protocol */
1770         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1771                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1772                         if (pos-- == 0){
1773                                 iter->table = ip_vs_svc_table;
1774                                 iter->bucket = idx;
1775                                 return svc;
1776                         }
1777                 }
1778         }
1779
1780         /* keep looking in fwmark */
1781         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1782                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1783                         if (pos-- == 0) {
1784                                 iter->table = ip_vs_svc_fwm_table;
1785                                 iter->bucket = idx;
1786                                 return svc;
1787                         }
1788                 }
1789         }
1790
1791         return NULL;
1792 }
1793
1794 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1795 __acquires(__ip_vs_svc_lock)
1796 {
1797
1798         read_lock_bh(&__ip_vs_svc_lock);
1799         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1800 }
1801
1802
1803 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1804 {
1805         struct list_head *e;
1806         struct ip_vs_iter *iter;
1807         struct ip_vs_service *svc;
1808
1809         ++*pos;
1810         if (v == SEQ_START_TOKEN)
1811                 return ip_vs_info_array(seq,0);
1812
1813         svc = v;
1814         iter = seq->private;
1815
1816         if (iter->table == ip_vs_svc_table) {
1817                 /* next service in table hashed by protocol */
1818                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1819                         return list_entry(e, struct ip_vs_service, s_list);
1820
1821
1822                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1823                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1824                                             s_list) {
1825                                 return svc;
1826                         }
1827                 }
1828
1829                 iter->table = ip_vs_svc_fwm_table;
1830                 iter->bucket = -1;
1831                 goto scan_fwmark;
1832         }
1833
1834         /* next service in hashed by fwmark */
1835         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1836                 return list_entry(e, struct ip_vs_service, f_list);
1837
1838  scan_fwmark:
1839         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1840                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1841                                     f_list)
1842                         return svc;
1843         }
1844
1845         return NULL;
1846 }
1847
1848 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1849 __releases(__ip_vs_svc_lock)
1850 {
1851         read_unlock_bh(&__ip_vs_svc_lock);
1852 }
1853
1854
1855 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1856 {
1857         if (v == SEQ_START_TOKEN) {
1858                 seq_printf(seq,
1859                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1860                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1861                 seq_puts(seq,
1862                          "Prot LocalAddress:Port Scheduler Flags\n");
1863                 seq_puts(seq,
1864                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1865         } else {
1866                 const struct ip_vs_service *svc = v;
1867                 const struct ip_vs_iter *iter = seq->private;
1868                 const struct ip_vs_dest *dest;
1869
1870                 if (iter->table == ip_vs_svc_table) {
1871 #ifdef CONFIG_IP_VS_IPV6
1872                         if (svc->af == AF_INET6)
1873                                 seq_printf(seq, "%s  [%pI6]:%04X %s ",
1874                                            ip_vs_proto_name(svc->protocol),
1875                                            &svc->addr.in6,
1876                                            ntohs(svc->port),
1877                                            svc->scheduler->name);
1878                         else
1879 #endif
1880                                 seq_printf(seq, "%s  %08X:%04X %s %s ",
1881                                            ip_vs_proto_name(svc->protocol),
1882                                            ntohl(svc->addr.ip),
1883                                            ntohs(svc->port),
1884                                            svc->scheduler->name,
1885                                            (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1886                 } else {
1887                         seq_printf(seq, "FWM  %08X %s %s",
1888                                    svc->fwmark, svc->scheduler->name,
1889                                    (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1890                 }
1891
1892                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1893                         seq_printf(seq, "persistent %d %08X\n",
1894                                 svc->timeout,
1895                                 ntohl(svc->netmask));
1896                 else
1897                         seq_putc(seq, '\n');
1898
1899                 list_for_each_entry(dest, &svc->destinations, n_list) {
1900 #ifdef CONFIG_IP_VS_IPV6
1901                         if (dest->af == AF_INET6)
1902                                 seq_printf(seq,
1903                                            "  -> [%pI6]:%04X"
1904                                            "      %-7s %-6d %-10d %-10d\n",
1905                                            &dest->addr.in6,
1906                                            ntohs(dest->port),
1907                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1908                                            atomic_read(&dest->weight),
1909                                            atomic_read(&dest->activeconns),
1910                                            atomic_read(&dest->inactconns));
1911                         else
1912 #endif
1913                                 seq_printf(seq,
1914                                            "  -> %08X:%04X      "
1915                                            "%-7s %-6d %-10d %-10d\n",
1916                                            ntohl(dest->addr.ip),
1917                                            ntohs(dest->port),
1918                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1919                                            atomic_read(&dest->weight),
1920                                            atomic_read(&dest->activeconns),
1921                                            atomic_read(&dest->inactconns));
1922
1923                 }
1924         }
1925         return 0;
1926 }
1927
1928 static const struct seq_operations ip_vs_info_seq_ops = {
1929         .start = ip_vs_info_seq_start,
1930         .next  = ip_vs_info_seq_next,
1931         .stop  = ip_vs_info_seq_stop,
1932         .show  = ip_vs_info_seq_show,
1933 };
1934
1935 static int ip_vs_info_open(struct inode *inode, struct file *file)
1936 {
1937         return seq_open_private(file, &ip_vs_info_seq_ops,
1938                         sizeof(struct ip_vs_iter));
1939 }
1940
1941 static const struct file_operations ip_vs_info_fops = {
1942         .owner   = THIS_MODULE,
1943         .open    = ip_vs_info_open,
1944         .read    = seq_read,
1945         .llseek  = seq_lseek,
1946         .release = seq_release_private,
1947 };
1948
1949 #endif
1950
1951 struct ip_vs_stats ip_vs_stats = {
1952         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1953 };
1954
1955 #ifdef CONFIG_PROC_FS
1956 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1957 {
1958
1959 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1960         seq_puts(seq,
1961                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1962         seq_printf(seq,
1963                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1964
1965         spin_lock_bh(&ip_vs_stats.lock);
1966         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1967                    ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1968                    (unsigned long long) ip_vs_stats.ustats.inbytes,
1969                    (unsigned long long) ip_vs_stats.ustats.outbytes);
1970
1971 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1972         seq_puts(seq,
1973                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1974         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1975                         ip_vs_stats.ustats.cps,
1976                         ip_vs_stats.ustats.inpps,
1977                         ip_vs_stats.ustats.outpps,
1978                         ip_vs_stats.ustats.inbps,
1979                         ip_vs_stats.ustats.outbps);
1980         spin_unlock_bh(&ip_vs_stats.lock);
1981
1982         return 0;
1983 }
1984
1985 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1986 {
1987         return single_open(file, ip_vs_stats_show, NULL);
1988 }
1989
1990 static const struct file_operations ip_vs_stats_fops = {
1991         .owner = THIS_MODULE,
1992         .open = ip_vs_stats_seq_open,
1993         .read = seq_read,
1994         .llseek = seq_lseek,
1995         .release = single_release,
1996 };
1997
1998 #endif
1999
2000 /*
2001  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2002  */
2003 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2004 {
2005         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2006                   u->tcp_timeout,
2007                   u->tcp_fin_timeout,
2008                   u->udp_timeout);
2009
2010 #ifdef CONFIG_IP_VS_PROTO_TCP
2011         if (u->tcp_timeout) {
2012                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2013                         = u->tcp_timeout * HZ;
2014         }
2015
2016         if (u->tcp_fin_timeout) {
2017                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2018                         = u->tcp_fin_timeout * HZ;
2019         }
2020 #endif
2021
2022 #ifdef CONFIG_IP_VS_PROTO_UDP
2023         if (u->udp_timeout) {
2024                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2025                         = u->udp_timeout * HZ;
2026         }
2027 #endif
2028         return 0;
2029 }
2030
2031
2032 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2033 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2034 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2035                                  sizeof(struct ip_vs_dest_user))
2036 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2037 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2038 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2039
2040 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2041         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2042         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2043         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2044         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2045         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2046         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2047         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2048         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2049         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2050         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2051         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2052 };
2053
2054 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2055                                   struct ip_vs_service_user *usvc_compat)
2056 {
2057         usvc->af                = AF_INET;
2058         usvc->protocol          = usvc_compat->protocol;
2059         usvc->addr.ip           = usvc_compat->addr;
2060         usvc->port              = usvc_compat->port;
2061         usvc->fwmark            = usvc_compat->fwmark;
2062
2063         /* Deep copy of sched_name is not needed here */
2064         usvc->sched_name        = usvc_compat->sched_name;
2065
2066         usvc->flags             = usvc_compat->flags;
2067         usvc->timeout           = usvc_compat->timeout;
2068         usvc->netmask           = usvc_compat->netmask;
2069 }
2070
2071 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2072                                    struct ip_vs_dest_user *udest_compat)
2073 {
2074         udest->addr.ip          = udest_compat->addr;
2075         udest->port             = udest_compat->port;
2076         udest->conn_flags       = udest_compat->conn_flags;
2077         udest->weight           = udest_compat->weight;
2078         udest->u_threshold      = udest_compat->u_threshold;
2079         udest->l_threshold      = udest_compat->l_threshold;
2080 }
2081
2082 static int
2083 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2084 {
2085         int ret;
2086         unsigned char arg[MAX_ARG_LEN];
2087         struct ip_vs_service_user *usvc_compat;
2088         struct ip_vs_service_user_kern usvc;
2089         struct ip_vs_service *svc;
2090         struct ip_vs_dest_user *udest_compat;
2091         struct ip_vs_dest_user_kern udest;
2092
2093         if (!capable(CAP_NET_ADMIN))
2094                 return -EPERM;
2095
2096         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2097                 return -EINVAL;
2098         if (len < 0 || len >  MAX_ARG_LEN)
2099                 return -EINVAL;
2100         if (len != set_arglen[SET_CMDID(cmd)]) {
2101                 pr_err("set_ctl: len %u != %u\n",
2102                        len, set_arglen[SET_CMDID(cmd)]);
2103                 return -EINVAL;
2104         }
2105
2106         if (copy_from_user(arg, user, len) != 0)
2107                 return -EFAULT;
2108
2109         /* increase the module use count */
2110         ip_vs_use_count_inc();
2111
2112         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2113                 ret = -ERESTARTSYS;
2114                 goto out_dec;
2115         }
2116
2117         if (cmd == IP_VS_SO_SET_FLUSH) {
2118                 /* Flush the virtual service */
2119                 ret = ip_vs_flush();
2120                 goto out_unlock;
2121         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2122                 /* Set timeout values for (tcp tcpfin udp) */
2123                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2124                 goto out_unlock;
2125         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2126                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2127                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2128                 goto out_unlock;
2129         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2130                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2131                 ret = stop_sync_thread(dm->state);
2132                 goto out_unlock;
2133         }
2134
2135         usvc_compat = (struct ip_vs_service_user *)arg;
2136         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2137
2138         /* We only use the new structs internally, so copy userspace compat
2139          * structs to extended internal versions */
2140         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2141         ip_vs_copy_udest_compat(&udest, udest_compat);
2142
2143         if (cmd == IP_VS_SO_SET_ZERO) {
2144                 /* if no service address is set, zero counters in all */
2145                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2146                         ret = ip_vs_zero_all();
2147                         goto out_unlock;
2148                 }
2149         }
2150
2151         /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2152         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2153             usvc.protocol != IPPROTO_SCTP) {
2154                 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2155                        usvc.protocol, &usvc.addr.ip,
2156                        ntohs(usvc.port), usvc.sched_name);
2157                 ret = -EFAULT;
2158                 goto out_unlock;
2159         }
2160
2161         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2162         if (usvc.fwmark == 0)
2163                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2164                                           &usvc.addr, usvc.port);
2165         else
2166                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2167
2168         if (cmd != IP_VS_SO_SET_ADD
2169             && (svc == NULL || svc->protocol != usvc.protocol)) {
2170                 ret = -ESRCH;
2171                 goto out_drop_service;
2172         }
2173
2174         switch (cmd) {
2175         case IP_VS_SO_SET_ADD:
2176                 if (svc != NULL)
2177                         ret = -EEXIST;
2178                 else
2179                         ret = ip_vs_add_service(&usvc, &svc);
2180                 break;
2181         case IP_VS_SO_SET_EDIT:
2182                 ret = ip_vs_edit_service(svc, &usvc);
2183                 break;
2184         case IP_VS_SO_SET_DEL:
2185                 ret = ip_vs_del_service(svc);
2186                 if (!ret)
2187                         goto out_unlock;
2188                 break;
2189         case IP_VS_SO_SET_ZERO:
2190                 ret = ip_vs_zero_service(svc);
2191                 break;
2192         case IP_VS_SO_SET_ADDDEST:
2193                 ret = ip_vs_add_dest(svc, &udest);
2194                 break;
2195         case IP_VS_SO_SET_EDITDEST:
2196                 ret = ip_vs_edit_dest(svc, &udest);
2197                 break;
2198         case IP_VS_SO_SET_DELDEST:
2199                 ret = ip_vs_del_dest(svc, &udest);
2200                 break;
2201         default:
2202                 ret = -EINVAL;
2203         }
2204
2205 out_drop_service:
2206         if (svc)
2207                 ip_vs_service_put(svc);
2208
2209   out_unlock:
2210         mutex_unlock(&__ip_vs_mutex);
2211   out_dec:
2212         /* decrease the module use count */
2213         ip_vs_use_count_dec();
2214
2215         return ret;
2216 }
2217
2218
2219 static void
2220 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2221 {
2222         spin_lock_bh(&src->lock);
2223         memcpy(dst, &src->ustats, sizeof(*dst));
2224         spin_unlock_bh(&src->lock);
2225 }
2226
2227 static void
2228 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2229 {
2230         dst->protocol = src->protocol;
2231         dst->addr = src->addr.ip;
2232         dst->port = src->port;
2233         dst->fwmark = src->fwmark;
2234         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2235         dst->flags = src->flags;
2236         dst->timeout = src->timeout / HZ;
2237         dst->netmask = src->netmask;
2238         dst->num_dests = src->num_dests;
2239         ip_vs_copy_stats(&dst->stats, &src->stats);
2240 }
2241
2242 static inline int
2243 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2244                             struct ip_vs_get_services __user *uptr)
2245 {
2246         int idx, count=0;
2247         struct ip_vs_service *svc;
2248         struct ip_vs_service_entry entry;
2249         int ret = 0;
2250
2251         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2252                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2253                         /* Only expose IPv4 entries to old interface */
2254                         if (svc->af != AF_INET)
2255                                 continue;
2256
2257                         if (count >= get->num_services)
2258                                 goto out;
2259                         memset(&entry, 0, sizeof(entry));
2260                         ip_vs_copy_service(&entry, svc);
2261                         if (copy_to_user(&uptr->entrytable[count],
2262                                          &entry, sizeof(entry))) {
2263                                 ret = -EFAULT;
2264                                 goto out;
2265                         }
2266                         count++;
2267                 }
2268         }
2269
2270         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2271                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2272                         /* Only expose IPv4 entries to old interface */
2273                         if (svc->af != AF_INET)
2274                                 continue;
2275
2276                         if (count >= get->num_services)
2277                                 goto out;
2278                         memset(&entry, 0, sizeof(entry));
2279                         ip_vs_copy_service(&entry, svc);
2280                         if (copy_to_user(&uptr->entrytable[count],
2281                                          &entry, sizeof(entry))) {
2282                                 ret = -EFAULT;
2283                                 goto out;
2284                         }
2285                         count++;
2286                 }
2287         }
2288   out:
2289         return ret;
2290 }
2291
2292 static inline int
2293 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2294                          struct ip_vs_get_dests __user *uptr)
2295 {
2296         struct ip_vs_service *svc;
2297         union nf_inet_addr addr = { .ip = get->addr };
2298         int ret = 0;
2299
2300         if (get->fwmark)
2301                 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
2302         else
2303                 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2304                                           get->port);
2305
2306         if (svc) {
2307                 int count = 0;
2308                 struct ip_vs_dest *dest;
2309                 struct ip_vs_dest_entry entry;
2310
2311                 list_for_each_entry(dest, &svc->destinations, n_list) {
2312                         if (count >= get->num_dests)
2313                                 break;
2314
2315                         entry.addr = dest->addr.ip;
2316                         entry.port = dest->port;
2317                         entry.conn_flags = atomic_read(&dest->conn_flags);
2318                         entry.weight = atomic_read(&dest->weight);
2319                         entry.u_threshold = dest->u_threshold;
2320                         entry.l_threshold = dest->l_threshold;
2321                         entry.activeconns = atomic_read(&dest->activeconns);
2322                         entry.inactconns = atomic_read(&dest->inactconns);
2323                         entry.persistconns = atomic_read(&dest->persistconns);
2324                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2325                         if (copy_to_user(&uptr->entrytable[count],
2326                                          &entry, sizeof(entry))) {
2327                                 ret = -EFAULT;
2328                                 break;
2329                         }
2330                         count++;
2331                 }
2332                 ip_vs_service_put(svc);
2333         } else
2334                 ret = -ESRCH;
2335         return ret;
2336 }
2337
2338 static inline void
2339 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2340 {
2341 #ifdef CONFIG_IP_VS_PROTO_TCP
2342         u->tcp_timeout =
2343                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2344         u->tcp_fin_timeout =
2345                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2346 #endif
2347 #ifdef CONFIG_IP_VS_PROTO_UDP
2348         u->udp_timeout =
2349                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2350 #endif
2351 }
2352
2353
2354 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2355 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2356 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2357 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2358 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2359 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2360 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2361
2362 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2363         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2364         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2365         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2366         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2367         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2368         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2369         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2370 };
2371
2372 static int
2373 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2374 {
2375         unsigned char arg[128];
2376         int ret = 0;
2377         unsigned int copylen;
2378
2379         if (!capable(CAP_NET_ADMIN))
2380                 return -EPERM;
2381
2382         if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2383                 return -EINVAL;
2384
2385         if (*len < get_arglen[GET_CMDID(cmd)]) {
2386                 pr_err("get_ctl: len %u < %u\n",
2387                        *len, get_arglen[GET_CMDID(cmd)]);
2388                 return -EINVAL;
2389         }
2390
2391         copylen = get_arglen[GET_CMDID(cmd)];
2392         if (copylen > 128)
2393                 return -EINVAL;
2394
2395         if (copy_from_user(arg, user, copylen) != 0)
2396                 return -EFAULT;
2397
2398         if (mutex_lock_interruptible(&__ip_vs_mutex))
2399                 return -ERESTARTSYS;
2400
2401         switch (cmd) {
2402         case IP_VS_SO_GET_VERSION:
2403         {
2404                 char buf[64];
2405
2406                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2407                         NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2408                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2409                         ret = -EFAULT;
2410                         goto out;
2411                 }
2412                 *len = strlen(buf)+1;
2413         }
2414         break;
2415
2416         case IP_VS_SO_GET_INFO:
2417         {
2418                 struct ip_vs_getinfo info;
2419                 info.version = IP_VS_VERSION_CODE;
2420                 info.size = ip_vs_conn_tab_size;
2421                 info.num_services = ip_vs_num_services;
2422                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2423                         ret = -EFAULT;
2424         }
2425         break;
2426
2427         case IP_VS_SO_GET_SERVICES:
2428         {
2429                 struct ip_vs_get_services *get;
2430                 int size;
2431
2432                 get = (struct ip_vs_get_services *)arg;
2433                 size = sizeof(*get) +
2434                         sizeof(struct ip_vs_service_entry) * get->num_services;
2435                 if (*len != size) {
2436                         pr_err("length: %u != %u\n", *len, size);
2437                         ret = -EINVAL;
2438                         goto out;
2439                 }
2440                 ret = __ip_vs_get_service_entries(get, user);
2441         }
2442         break;
2443
2444         case IP_VS_SO_GET_SERVICE:
2445         {
2446                 struct ip_vs_service_entry *entry;
2447                 struct ip_vs_service *svc;
2448                 union nf_inet_addr addr;
2449
2450                 entry = (struct ip_vs_service_entry *)arg;
2451                 addr.ip = entry->addr;
2452                 if (entry->fwmark)
2453                         svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
2454                 else
2455                         svc = __ip_vs_service_get(AF_INET, entry->protocol,
2456                                                   &addr, entry->port);
2457                 if (svc) {
2458                         ip_vs_copy_service(entry, svc);
2459                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2460                                 ret = -EFAULT;
2461                         ip_vs_service_put(svc);
2462                 } else
2463                         ret = -ESRCH;
2464         }
2465         break;
2466
2467         case IP_VS_SO_GET_DESTS:
2468         {
2469                 struct ip_vs_get_dests *get;
2470                 int size;
2471
2472                 get = (struct ip_vs_get_dests *)arg;
2473                 size = sizeof(*get) +
2474                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2475                 if (*len != size) {
2476                         pr_err("length: %u != %u\n", *len, size);
2477                         ret = -EINVAL;
2478                         goto out;
2479                 }
2480                 ret = __ip_vs_get_dest_entries(get, user);
2481         }
2482         break;
2483
2484         case IP_VS_SO_GET_TIMEOUT:
2485         {
2486                 struct ip_vs_timeout_user t;
2487
2488                 __ip_vs_get_timeouts(&t);
2489                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2490                         ret = -EFAULT;
2491         }
2492         break;
2493
2494         case IP_VS_SO_GET_DAEMON:
2495         {
2496                 struct ip_vs_daemon_user d[2];
2497
2498                 memset(&d, 0, sizeof(d));
2499                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2500                         d[0].state = IP_VS_STATE_MASTER;
2501                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2502                         d[0].syncid = ip_vs_master_syncid;
2503                 }
2504                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2505                         d[1].state = IP_VS_STATE_BACKUP;
2506                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2507                         d[1].syncid = ip_vs_backup_syncid;
2508                 }
2509                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2510                         ret = -EFAULT;
2511         }
2512         break;
2513
2514         default:
2515                 ret = -EINVAL;
2516         }
2517
2518   out:
2519         mutex_unlock(&__ip_vs_mutex);
2520         return ret;
2521 }
2522
2523
2524 static struct nf_sockopt_ops ip_vs_sockopts = {
2525         .pf             = PF_INET,
2526         .set_optmin     = IP_VS_BASE_CTL,
2527         .set_optmax     = IP_VS_SO_SET_MAX+1,
2528         .set            = do_ip_vs_set_ctl,
2529         .get_optmin     = IP_VS_BASE_CTL,
2530         .get_optmax     = IP_VS_SO_GET_MAX+1,
2531         .get            = do_ip_vs_get_ctl,
2532         .owner          = THIS_MODULE,
2533 };
2534
2535 /*
2536  * Generic Netlink interface
2537  */
2538
2539 /* IPVS genetlink family */
2540 static struct genl_family ip_vs_genl_family = {
2541         .id             = GENL_ID_GENERATE,
2542         .hdrsize        = 0,
2543         .name           = IPVS_GENL_NAME,
2544         .version        = IPVS_GENL_VERSION,
2545         .maxattr        = IPVS_CMD_MAX,
2546 };
2547
2548 /* Policy used for first-level command attributes */
2549 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2550         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2551         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2552         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2553         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2554         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2555         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2556 };
2557
2558 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2559 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2560         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2561         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2562                                             .len = IP_VS_IFNAME_MAXLEN },
2563         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2564 };
2565
2566 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2567 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2568         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2569         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2570         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2571                                             .len = sizeof(union nf_inet_addr) },
2572         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2573         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2574         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2575                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2576         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2577                                             .len = sizeof(struct ip_vs_flags) },
2578         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2579         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2580         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2581 };
2582
2583 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2584 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2585         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2586                                             .len = sizeof(union nf_inet_addr) },
2587         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2588         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2589         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2590         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2591         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2592         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2593         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2594         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2595         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2596 };
2597
2598 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2599                                  struct ip_vs_stats *stats)
2600 {
2601         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2602         if (!nl_stats)
2603                 return -EMSGSIZE;
2604
2605         spin_lock_bh(&stats->lock);
2606
2607         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2608         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2609         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2610         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2611         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2612         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2613         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2614         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2615         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2616         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2617
2618         spin_unlock_bh(&stats->lock);
2619
2620         nla_nest_end(skb, nl_stats);
2621
2622         return 0;
2623
2624 nla_put_failure:
2625         spin_unlock_bh(&stats->lock);
2626         nla_nest_cancel(skb, nl_stats);
2627         return -EMSGSIZE;
2628 }
2629
2630 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2631                                    struct ip_vs_service *svc)
2632 {
2633         struct nlattr *nl_service;
2634         struct ip_vs_flags flags = { .flags = svc->flags,
2635                                      .mask = ~0 };
2636
2637         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2638         if (!nl_service)
2639                 return -EMSGSIZE;
2640
2641         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2642
2643         if (svc->fwmark) {
2644                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2645         } else {
2646                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2647                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2648                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2649         }
2650
2651         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2652         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2653         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2654         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2655
2656         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2657                 goto nla_put_failure;
2658
2659         nla_nest_end(skb, nl_service);
2660
2661         return 0;
2662
2663 nla_put_failure:
2664         nla_nest_cancel(skb, nl_service);
2665         return -EMSGSIZE;
2666 }
2667
2668 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2669                                    struct ip_vs_service *svc,
2670                                    struct netlink_callback *cb)
2671 {
2672         void *hdr;
2673
2674         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2675                           &ip_vs_genl_family, NLM_F_MULTI,
2676                           IPVS_CMD_NEW_SERVICE);
2677         if (!hdr)
2678                 return -EMSGSIZE;
2679
2680         if (ip_vs_genl_fill_service(skb, svc) < 0)
2681                 goto nla_put_failure;
2682
2683         return genlmsg_end(skb, hdr);
2684
2685 nla_put_failure:
2686         genlmsg_cancel(skb, hdr);
2687         return -EMSGSIZE;
2688 }
2689
2690 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2691                                     struct netlink_callback *cb)
2692 {
2693         int idx = 0, i;
2694         int start = cb->args[0];
2695         struct ip_vs_service *svc;
2696
2697         mutex_lock(&__ip_vs_mutex);
2698         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2699                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2700                         if (++idx <= start)
2701                                 continue;
2702                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2703                                 idx--;
2704                                 goto nla_put_failure;
2705                         }
2706                 }
2707         }
2708
2709         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2710                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2711                         if (++idx <= start)
2712                                 continue;
2713                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2714                                 idx--;
2715                                 goto nla_put_failure;
2716                         }
2717                 }
2718         }
2719
2720 nla_put_failure:
2721         mutex_unlock(&__ip_vs_mutex);
2722         cb->args[0] = idx;
2723
2724         return skb->len;
2725 }
2726
2727 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2728                                     struct nlattr *nla, int full_entry)
2729 {
2730         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2731         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2732
2733         /* Parse mandatory identifying service fields first */
2734         if (nla == NULL ||
2735             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2736                 return -EINVAL;
2737
2738         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2739         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2740         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2741         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2742         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2743
2744         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2745                 return -EINVAL;
2746
2747         memset(usvc, 0, sizeof(*usvc));
2748
2749         usvc->af = nla_get_u16(nla_af);
2750 #ifdef CONFIG_IP_VS_IPV6
2751         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2752 #else
2753         if (usvc->af != AF_INET)
2754 #endif
2755                 return -EAFNOSUPPORT;
2756
2757         if (nla_fwmark) {
2758                 usvc->protocol = IPPROTO_TCP;
2759                 usvc->fwmark = nla_get_u32(nla_fwmark);
2760         } else {
2761                 usvc->protocol = nla_get_u16(nla_protocol);
2762                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2763                 usvc->port = nla_get_u16(nla_port);
2764                 usvc->fwmark = 0;
2765         }
2766
2767         /* If a full entry was requested, check for the additional fields */
2768         if (full_entry) {
2769                 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2770                               *nla_netmask;
2771                 struct ip_vs_flags flags;
2772                 struct ip_vs_service *svc;
2773
2774                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2775                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2776                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2777                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2778
2779                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2780                         return -EINVAL;
2781
2782                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2783
2784                 /* prefill flags from service if it already exists */
2785                 if (usvc->fwmark)
2786                         svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2787                 else
2788                         svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2789                                                   &usvc->addr, usvc->port);
2790                 if (svc) {
2791                         usvc->flags = svc->flags;
2792                         ip_vs_service_put(svc);
2793                 } else
2794                         usvc->flags = 0;
2795
2796                 /* set new flags from userland */
2797                 usvc->flags = (usvc->flags & ~flags.mask) |
2798                               (flags.flags & flags.mask);
2799                 usvc->sched_name = nla_data(nla_sched);
2800                 usvc->timeout = nla_get_u32(nla_timeout);
2801                 usvc->netmask = nla_get_u32(nla_netmask);
2802         }
2803
2804         return 0;
2805 }
2806
2807 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2808 {
2809         struct ip_vs_service_user_kern usvc;
2810         int ret;
2811
2812         ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2813         if (ret)
2814                 return ERR_PTR(ret);
2815
2816         if (usvc.fwmark)
2817                 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2818         else
2819                 return __ip_vs_service_get(usvc.af, usvc.protocol,
2820                                            &usvc.addr, usvc.port);
2821 }
2822
2823 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2824 {
2825         struct nlattr *nl_dest;
2826
2827         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2828         if (!nl_dest)
2829                 return -EMSGSIZE;
2830
2831         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2832         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2833
2834         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2835                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2836         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2837         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2838         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2839         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2840                     atomic_read(&dest->activeconns));
2841         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2842                     atomic_read(&dest->inactconns));
2843         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2844                     atomic_read(&dest->persistconns));
2845
2846         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2847                 goto nla_put_failure;
2848
2849         nla_nest_end(skb, nl_dest);
2850
2851         return 0;
2852
2853 nla_put_failure:
2854         nla_nest_cancel(skb, nl_dest);
2855         return -EMSGSIZE;
2856 }
2857
2858 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2859                                 struct netlink_callback *cb)
2860 {
2861         void *hdr;
2862
2863         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2864                           &ip_vs_genl_family, NLM_F_MULTI,
2865                           IPVS_CMD_NEW_DEST);
2866         if (!hdr)
2867                 return -EMSGSIZE;
2868
2869         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2870                 goto nla_put_failure;
2871
2872         return genlmsg_end(skb, hdr);
2873
2874 nla_put_failure:
2875         genlmsg_cancel(skb, hdr);
2876         return -EMSGSIZE;
2877 }
2878
2879 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2880                                  struct netlink_callback *cb)
2881 {
2882         int idx = 0;
2883         int start = cb->args[0];
2884         struct ip_vs_service *svc;
2885         struct ip_vs_dest *dest;
2886         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2887
2888         mutex_lock(&__ip_vs_mutex);
2889
2890         /* Try to find the service for which to dump destinations */
2891         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2892                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2893                 goto out_err;
2894
2895         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2896         if (IS_ERR(svc) || svc == NULL)
2897                 goto out_err;
2898
2899         /* Dump the destinations */
2900         list_for_each_entry(dest, &svc->destinations, n_list) {
2901                 if (++idx <= start)
2902                         continue;
2903                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2904                         idx--;
2905                         goto nla_put_failure;
2906                 }
2907         }
2908
2909 nla_put_failure:
2910         cb->args[0] = idx;
2911         ip_vs_service_put(svc);
2912
2913 out_err:
2914         mutex_unlock(&__ip_vs_mutex);
2915
2916         return skb->len;
2917 }
2918
2919 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2920                                  struct nlattr *nla, int full_entry)
2921 {
2922         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2923         struct nlattr *nla_addr, *nla_port;
2924
2925         /* Parse mandatory identifying destination fields first */
2926         if (nla == NULL ||
2927             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2928                 return -EINVAL;
2929
2930         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2931         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2932
2933         if (!(nla_addr && nla_port))
2934                 return -EINVAL;
2935
2936         memset(udest, 0, sizeof(*udest));
2937
2938         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2939         udest->port = nla_get_u16(nla_port);
2940
2941         /* If a full entry was requested, check for the additional fields */
2942         if (full_entry) {
2943                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2944                               *nla_l_thresh;
2945
2946                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2947                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2948                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2949                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2950
2951                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2952                         return -EINVAL;
2953
2954                 udest->conn_flags = nla_get_u32(nla_fwd)
2955                                     & IP_VS_CONN_F_FWD_MASK;
2956                 udest->weight = nla_get_u32(nla_weight);
2957                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2958                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2959         }
2960
2961         return 0;
2962 }
2963
2964 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2965                                   const char *mcast_ifn, __be32 syncid)
2966 {
2967         struct nlattr *nl_daemon;
2968
2969         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2970         if (!nl_daemon)
2971                 return -EMSGSIZE;
2972
2973         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2974         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2975         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2976
2977         nla_nest_end(skb, nl_daemon);
2978
2979         return 0;
2980
2981 nla_put_failure:
2982         nla_nest_cancel(skb, nl_daemon);
2983         return -EMSGSIZE;
2984 }
2985
2986 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2987                                   const char *mcast_ifn, __be32 syncid,
2988                                   struct netlink_callback *cb)
2989 {
2990         void *hdr;
2991         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2992                           &ip_vs_genl_family, NLM_F_MULTI,
2993                           IPVS_CMD_NEW_DAEMON);
2994         if (!hdr)
2995                 return -EMSGSIZE;
2996
2997         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2998                 goto nla_put_failure;
2999
3000         return genlmsg_end(skb, hdr);
3001
3002 nla_put_failure:
3003         genlmsg_cancel(skb, hdr);
3004         return -EMSGSIZE;
3005 }
3006
3007 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3008                                    struct netlink_callback *cb)
3009 {
3010         mutex_lock(&__ip_vs_mutex);
3011         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3012                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3013                                            ip_vs_master_mcast_ifn,
3014                                            ip_vs_master_syncid, cb) < 0)
3015                         goto nla_put_failure;
3016
3017                 cb->args[0] = 1;
3018         }
3019
3020         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3021                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3022                                            ip_vs_backup_mcast_ifn,
3023                                            ip_vs_backup_syncid, cb) < 0)
3024                         goto nla_put_failure;
3025
3026                 cb->args[1] = 1;
3027         }
3028
3029 nla_put_failure:
3030         mutex_unlock(&__ip_vs_mutex);
3031
3032         return skb->len;
3033 }
3034
3035 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3036 {
3037         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3038               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3039               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3040                 return -EINVAL;
3041
3042         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3043                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3044                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3045 }
3046
3047 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3048 {
3049         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3050                 return -EINVAL;
3051
3052         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3053 }
3054
3055 static int ip_vs_genl_set_config(struct nlattr **attrs)
3056 {
3057         struct ip_vs_timeout_user t;
3058
3059         __ip_vs_get_timeouts(&t);
3060
3061         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3062                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3063
3064         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3065                 t.tcp_fin_timeout =
3066                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3067
3068         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3069                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3070
3071         return ip_vs_set_timeout(&t);
3072 }
3073
3074 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3075 {
3076         struct ip_vs_service *svc = NULL;
3077         struct ip_vs_service_user_kern usvc;
3078         struct ip_vs_dest_user_kern udest;
3079         int ret = 0, cmd;
3080         int need_full_svc = 0, need_full_dest = 0;
3081
3082         cmd = info->genlhdr->cmd;
3083
3084         mutex_lock(&__ip_vs_mutex);
3085
3086         if (cmd == IPVS_CMD_FLUSH) {
3087                 ret = ip_vs_flush();
3088                 goto out;
3089         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3090                 ret = ip_vs_genl_set_config(info->attrs);
3091                 goto out;
3092         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3093                    cmd == IPVS_CMD_DEL_DAEMON) {
3094
3095                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3096
3097                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3098                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3099                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3100                                      ip_vs_daemon_policy)) {
3101                         ret = -EINVAL;
3102                         goto out;
3103                 }
3104
3105                 if (cmd == IPVS_CMD_NEW_DAEMON)
3106                         ret = ip_vs_genl_new_daemon(daemon_attrs);
3107                 else
3108                         ret = ip_vs_genl_del_daemon(daemon_attrs);
3109                 goto out;
3110         } else if (cmd == IPVS_CMD_ZERO &&
3111                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3112                 ret = ip_vs_zero_all();
3113                 goto out;
3114         }
3115
3116         /* All following commands require a service argument, so check if we
3117          * received a valid one. We need a full service specification when
3118          * adding / editing a service. Only identifying members otherwise. */
3119         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3120                 need_full_svc = 1;
3121
3122         ret = ip_vs_genl_parse_service(&usvc,
3123                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3124                                        need_full_svc);
3125         if (ret)
3126                 goto out;
3127
3128         /* Lookup the exact service by <protocol, addr, port> or fwmark */
3129         if (usvc.fwmark == 0)
3130                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3131                                           &usvc.addr, usvc.port);
3132         else
3133                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3134
3135         /* Unless we're adding a new service, the service must already exist */
3136         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3137                 ret = -ESRCH;
3138                 goto out;
3139         }
3140
3141         /* Destination commands require a valid destination argument. For
3142          * adding / editing a destination, we need a full destination
3143          * specification. */
3144         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3145             cmd == IPVS_CMD_DEL_DEST) {
3146                 if (cmd != IPVS_CMD_DEL_DEST)
3147                         need_full_dest = 1;
3148
3149                 ret = ip_vs_genl_parse_dest(&udest,
3150                                             info->attrs[IPVS_CMD_ATTR_DEST],
3151                                             need_full_dest);
3152                 if (ret)
3153                         goto out;
3154         }
3155
3156         switch (cmd) {
3157         case IPVS_CMD_NEW_SERVICE:
3158                 if (svc == NULL)
3159                         ret = ip_vs_add_service(&usvc, &svc);
3160                 else
3161                         ret = -EEXIST;
3162                 break;
3163         case IPVS_CMD_SET_SERVICE:
3164                 ret = ip_vs_edit_service(svc, &usvc);
3165                 break;
3166         case IPVS_CMD_DEL_SERVICE:
3167                 ret = ip_vs_del_service(svc);
3168                 break;
3169         case IPVS_CMD_NEW_DEST:
3170                 ret = ip_vs_add_dest(svc, &udest);
3171                 break;
3172         case IPVS_CMD_SET_DEST:
3173                 ret = ip_vs_edit_dest(svc, &udest);
3174                 break;
3175         case IPVS_CMD_DEL_DEST:
3176                 ret = ip_vs_del_dest(svc, &udest);
3177                 break;
3178         case IPVS_CMD_ZERO:
3179                 ret = ip_vs_zero_service(svc);
3180                 break;
3181         default:
3182                 ret = -EINVAL;
3183         }
3184
3185 out:
3186         if (svc)
3187                 ip_vs_service_put(svc);
3188         mutex_unlock(&__ip_vs_mutex);
3189
3190         return ret;
3191 }
3192
3193 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3194 {
3195         struct sk_buff *msg;
3196         void *reply;
3197         int ret, cmd, reply_cmd;
3198
3199         cmd = info->genlhdr->cmd;
3200
3201         if (cmd == IPVS_CMD_GET_SERVICE)
3202                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3203         else if (cmd == IPVS_CMD_GET_INFO)
3204                 reply_cmd = IPVS_CMD_SET_INFO;
3205         else if (cmd == IPVS_CMD_GET_CONFIG)
3206                 reply_cmd = IPVS_CMD_SET_CONFIG;
3207         else {
3208                 pr_err("unknown Generic Netlink command\n");
3209                 return -EINVAL;
3210         }
3211
3212         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3213         if (!msg)
3214                 return -ENOMEM;
3215
3216         mutex_lock(&__ip_vs_mutex);
3217
3218         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3219         if (reply == NULL)
3220                 goto nla_put_failure;
3221
3222         switch (cmd) {
3223         case IPVS_CMD_GET_SERVICE:
3224         {
3225                 struct ip_vs_service *svc;
3226
3227                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3228                 if (IS_ERR(svc)) {
3229                         ret = PTR_ERR(svc);
3230                         goto out_err;
3231                 } else if (svc) {
3232                         ret = ip_vs_genl_fill_service(msg, svc);
3233                         ip_vs_service_put(svc);
3234                         if (ret)
3235                                 goto nla_put_failure;
3236                 } else {
3237                         ret = -ESRCH;
3238                         goto out_err;
3239                 }
3240
3241                 break;
3242         }
3243
3244         case IPVS_CMD_GET_CONFIG:
3245         {
3246                 struct ip_vs_timeout_user t;
3247
3248                 __ip_vs_get_timeouts(&t);
3249 #ifdef CONFIG_IP_VS_PROTO_TCP
3250                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3251                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3252                             t.tcp_fin_timeout);
3253 #endif
3254 #ifdef CONFIG_IP_VS_PROTO_UDP
3255                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3256 #endif
3257
3258                 break;
3259         }
3260
3261         case IPVS_CMD_GET_INFO:
3262                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3263                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3264                             ip_vs_conn_tab_size);
3265                 break;
3266         }
3267
3268         genlmsg_end(msg, reply);
3269         ret = genlmsg_reply(msg, info);
3270         goto out;
3271
3272 nla_put_failure:
3273         pr_err("not enough space in Netlink message\n");
3274         ret = -EMSGSIZE;
3275
3276 out_err:
3277         nlmsg_free(msg);
3278 out:
3279         mutex_unlock(&__ip_vs_mutex);
3280
3281         return ret;
3282 }
3283
3284
3285 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3286         {
3287                 .cmd    = IPVS_CMD_NEW_SERVICE,
3288                 .flags  = GENL_ADMIN_PERM,
3289                 .policy = ip_vs_cmd_policy,
3290                 .doit   = ip_vs_genl_set_cmd,
3291         },
3292         {
3293                 .cmd    = IPVS_CMD_SET_SERVICE,
3294                 .flags  = GENL_ADMIN_PERM,
3295                 .policy = ip_vs_cmd_policy,
3296                 .doit   = ip_vs_genl_set_cmd,
3297         },
3298         {
3299                 .cmd    = IPVS_CMD_DEL_SERVICE,
3300                 .flags  = GENL_ADMIN_PERM,
3301                 .policy = ip_vs_cmd_policy,
3302                 .doit   = ip_vs_genl_set_cmd,
3303         },
3304         {
3305                 .cmd    = IPVS_CMD_GET_SERVICE,
3306                 .flags  = GENL_ADMIN_PERM,
3307                 .doit   = ip_vs_genl_get_cmd,
3308                 .dumpit = ip_vs_genl_dump_services,
3309                 .policy = ip_vs_cmd_policy,
3310         },
3311         {
3312                 .cmd    = IPVS_CMD_NEW_DEST,
3313                 .flags  = GENL_ADMIN_PERM,
3314                 .policy = ip_vs_cmd_policy,
3315                 .doit   = ip_vs_genl_set_cmd,
3316         },
3317         {
3318                 .cmd    = IPVS_CMD_SET_DEST,
3319                 .flags  = GENL_ADMIN_PERM,
3320                 .policy = ip_vs_cmd_policy,
3321                 .doit   = ip_vs_genl_set_cmd,
3322         },
3323         {
3324                 .cmd    = IPVS_CMD_DEL_DEST,
3325                 .flags  = GENL_ADMIN_PERM,
3326                 .policy = ip_vs_cmd_policy,
3327                 .doit   = ip_vs_genl_set_cmd,
3328         },
3329         {
3330                 .cmd    = IPVS_CMD_GET_DEST,
3331                 .flags  = GENL_ADMIN_PERM,
3332                 .policy = ip_vs_cmd_policy,
3333                 .dumpit = ip_vs_genl_dump_dests,
3334         },
3335         {
3336                 .cmd    = IPVS_CMD_NEW_DAEMON,
3337                 .flags  = GENL_ADMIN_PERM,
3338                 .policy = ip_vs_cmd_policy,
3339                 .doit   = ip_vs_genl_set_cmd,
3340         },
3341         {
3342                 .cmd    = IPVS_CMD_DEL_DAEMON,
3343                 .flags  = GENL_ADMIN_PERM,
3344                 .policy = ip_vs_cmd_policy,
3345                 .doit   = ip_vs_genl_set_cmd,
3346         },
3347         {
3348                 .cmd    = IPVS_CMD_GET_DAEMON,
3349                 .flags  = GENL_ADMIN_PERM,
3350                 .dumpit = ip_vs_genl_dump_daemons,
3351         },
3352         {
3353                 .cmd    = IPVS_CMD_SET_CONFIG,
3354                 .flags  = GENL_ADMIN_PERM,
3355                 .policy = ip_vs_cmd_policy,
3356                 .doit   = ip_vs_genl_set_cmd,
3357         },
3358         {
3359                 .cmd    = IPVS_CMD_GET_CONFIG,
3360                 .flags  = GENL_ADMIN_PERM,
3361                 .doit   = ip_vs_genl_get_cmd,
3362         },
3363         {
3364                 .cmd    = IPVS_CMD_GET_INFO,
3365                 .flags  = GENL_ADMIN_PERM,
3366                 .doit   = ip_vs_genl_get_cmd,
3367         },
3368         {
3369                 .cmd    = IPVS_CMD_ZERO,
3370                 .flags  = GENL_ADMIN_PERM,
3371                 .policy = ip_vs_cmd_policy,
3372                 .doit   = ip_vs_genl_set_cmd,
3373         },
3374         {
3375                 .cmd    = IPVS_CMD_FLUSH,
3376                 .flags  = GENL_ADMIN_PERM,
3377                 .doit   = ip_vs_genl_set_cmd,
3378         },
3379 };
3380
3381 static int __init ip_vs_genl_register(void)
3382 {
3383         return genl_register_family_with_ops(&ip_vs_genl_family,
3384                 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3385 }
3386
3387 static void ip_vs_genl_unregister(void)
3388 {
3389         genl_unregister_family(&ip_vs_genl_family);
3390 }
3391
3392 /* End of Generic Netlink interface definitions */
3393
3394
3395 int __init ip_vs_control_init(void)
3396 {
3397         int ret;
3398         int idx;
3399
3400         EnterFunction(2);
3401
3402         ret = nf_register_sockopt(&ip_vs_sockopts);
3403         if (ret) {
3404                 pr_err("cannot register sockopt.\n");
3405                 return ret;
3406         }
3407
3408         ret = ip_vs_genl_register();
3409         if (ret) {
3410                 pr_err("cannot register Generic Netlink interface.\n");
3411                 nf_unregister_sockopt(&ip_vs_sockopts);
3412                 return ret;
3413         }
3414
3415         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3416         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3417
3418         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3419
3420         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3421         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3422                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3423                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3424         }
3425         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3426                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3427         }
3428
3429         ip_vs_new_estimator(&ip_vs_stats);
3430
3431         /* Hook the defense timer */
3432         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3433
3434         LeaveFunction(2);
3435         return 0;
3436 }
3437
3438
3439 void ip_vs_control_cleanup(void)
3440 {
3441         EnterFunction(2);
3442         ip_vs_trash_cleanup();
3443         cancel_rearming_delayed_work(&defense_work);
3444         cancel_work_sync(&defense_work.work);
3445         ip_vs_kill_estimator(&ip_vs_stats);
3446         unregister_sysctl_table(sysctl_header);
3447         proc_net_remove(&init_net, "ip_vs_stats");
3448         proc_net_remove(&init_net, "ip_vs");
3449         ip_vs_genl_unregister();
3450         nf_unregister_sockopt(&ip_vs_sockopts);
3451         LeaveFunction(2);
3452 }