sched: remove wait_runtime limit
[pandora-kernel.git] / kernel / sysctl.c
1 /*
2  * sysctl.c: General linux system control interface
3  *
4  * Begun 24 March 1995, Stephen Tweedie
5  * Added /proc support, Dec 1995
6  * Added bdflush entry and intvec min/max checking, 2/23/96, Tom Dyas.
7  * Added hooks for /proc/sys/net (minor, minor patch), 96/4/1, Mike Shaver.
8  * Added kernel/java-{interpreter,appletviewer}, 96/5/10, Mike Shaver.
9  * Dynamic registration fixes, Stephen Tweedie.
10  * Added kswapd-interval, ctrl-alt-del, printk stuff, 1/8/97, Chris Horn.
11  * Made sysctl support optional via CONFIG_SYSCTL, 1/10/97, Chris
12  *  Horn.
13  * Added proc_doulongvec_ms_jiffies_minmax, 09/08/99, Carlos H. Bauer.
14  * Added proc_doulongvec_minmax, 09/08/99, Carlos H. Bauer.
15  * Changed linked lists to use list.h instead of lists.h, 02/24/00, Bill
16  *  Wendling.
17  * The list_for_each() macro wasn't appropriate for the sysctl loop.
18  *  Removed it and replaced it with older style, 03/23/00, Bill Wendling
19  */
20
21 #include <linux/module.h>
22 #include <linux/mm.h>
23 #include <linux/swap.h>
24 #include <linux/slab.h>
25 #include <linux/sysctl.h>
26 #include <linux/proc_fs.h>
27 #include <linux/capability.h>
28 #include <linux/ctype.h>
29 #include <linux/utsname.h>
30 #include <linux/smp_lock.h>
31 #include <linux/fs.h>
32 #include <linux/init.h>
33 #include <linux/kernel.h>
34 #include <linux/kobject.h>
35 #include <linux/net.h>
36 #include <linux/sysrq.h>
37 #include <linux/highuid.h>
38 #include <linux/writeback.h>
39 #include <linux/hugetlb.h>
40 #include <linux/security.h>
41 #include <linux/initrd.h>
42 #include <linux/times.h>
43 #include <linux/limits.h>
44 #include <linux/dcache.h>
45 #include <linux/syscalls.h>
46 #include <linux/nfs_fs.h>
47 #include <linux/acpi.h>
48 #include <linux/reboot.h>
49
50 #include <asm/uaccess.h>
51 #include <asm/processor.h>
52
53 #ifdef CONFIG_X86
54 #include <asm/nmi.h>
55 #include <asm/stacktrace.h>
56 #endif
57
58 #if defined(CONFIG_SYSCTL)
59
60 /* External variables not in a header file. */
61 extern int C_A_D;
62 extern int print_fatal_signals;
63 extern int sysctl_overcommit_memory;
64 extern int sysctl_overcommit_ratio;
65 extern int sysctl_panic_on_oom;
66 extern int max_threads;
67 extern int core_uses_pid;
68 extern int suid_dumpable;
69 extern char core_pattern[];
70 extern int pid_max;
71 extern int min_free_kbytes;
72 extern int printk_ratelimit_jiffies;
73 extern int printk_ratelimit_burst;
74 extern int pid_max_min, pid_max_max;
75 extern int sysctl_drop_caches;
76 extern int percpu_pagelist_fraction;
77 extern int compat_log;
78 extern int maps_protect;
79 extern int sysctl_stat_interval;
80 extern int audit_argv_kb;
81
82 /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
83 static int maxolduid = 65535;
84 static int minolduid;
85 static int min_percpu_pagelist_fract = 8;
86
87 static int ngroups_max = NGROUPS_MAX;
88
89 #ifdef CONFIG_KMOD
90 extern char modprobe_path[];
91 #endif
92 #ifdef CONFIG_CHR_DEV_SG
93 extern int sg_big_buff;
94 #endif
95
96 #ifdef __sparc__
97 extern char reboot_command [];
98 extern int stop_a_enabled;
99 extern int scons_pwroff;
100 #endif
101
102 #ifdef __hppa__
103 extern int pwrsw_enabled;
104 extern int unaligned_enabled;
105 #endif
106
107 #ifdef CONFIG_S390
108 #ifdef CONFIG_MATHEMU
109 extern int sysctl_ieee_emulation_warnings;
110 #endif
111 extern int sysctl_userprocess_debug;
112 extern int spin_retry;
113 #endif
114
115 extern int sysctl_hz_timer;
116
117 #ifdef CONFIG_BSD_PROCESS_ACCT
118 extern int acct_parm[];
119 #endif
120
121 #ifdef CONFIG_IA64
122 extern int no_unaligned_warning;
123 #endif
124
125 #ifdef CONFIG_RT_MUTEXES
126 extern int max_lock_depth;
127 #endif
128
129 #ifdef CONFIG_SYSCTL_SYSCALL
130 static int parse_table(int __user *, int, void __user *, size_t __user *,
131                 void __user *, size_t, ctl_table *);
132 #endif
133
134
135 #ifdef CONFIG_PROC_SYSCTL
136 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
137                   void __user *buffer, size_t *lenp, loff_t *ppos);
138 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
139                                void __user *buffer, size_t *lenp, loff_t *ppos);
140 #endif
141
142 static ctl_table root_table[];
143 static struct ctl_table_header root_table_header =
144         { root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
145
146 static ctl_table kern_table[];
147 static ctl_table vm_table[];
148 static ctl_table fs_table[];
149 static ctl_table debug_table[];
150 static ctl_table dev_table[];
151 extern ctl_table random_table[];
152 #ifdef CONFIG_UNIX98_PTYS
153 extern ctl_table pty_table[];
154 #endif
155 #ifdef CONFIG_INOTIFY_USER
156 extern ctl_table inotify_table[];
157 #endif
158
159 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
160 int sysctl_legacy_va_layout;
161 #endif
162
163 extern int prove_locking;
164 extern int lock_stat;
165
166 /* The default sysctl tables: */
167
168 static ctl_table root_table[] = {
169         {
170                 .ctl_name       = CTL_KERN,
171                 .procname       = "kernel",
172                 .mode           = 0555,
173                 .child          = kern_table,
174         },
175         {
176                 .ctl_name       = CTL_VM,
177                 .procname       = "vm",
178                 .mode           = 0555,
179                 .child          = vm_table,
180         },
181 #ifdef CONFIG_NET
182         {
183                 .ctl_name       = CTL_NET,
184                 .procname       = "net",
185                 .mode           = 0555,
186                 .child          = net_table,
187         },
188 #endif
189         {
190                 .ctl_name       = CTL_FS,
191                 .procname       = "fs",
192                 .mode           = 0555,
193                 .child          = fs_table,
194         },
195         {
196                 .ctl_name       = CTL_DEBUG,
197                 .procname       = "debug",
198                 .mode           = 0555,
199                 .child          = debug_table,
200         },
201         {
202                 .ctl_name       = CTL_DEV,
203                 .procname       = "dev",
204                 .mode           = 0555,
205                 .child          = dev_table,
206         },
207 /*
208  * NOTE: do not add new entries to this table unless you have read
209  * Documentation/sysctl/ctl_unnumbered.txt
210  */
211         { .ctl_name = 0 }
212 };
213
214 #ifdef CONFIG_SCHED_DEBUG
215 static unsigned long min_sched_granularity_ns = 100000;         /* 100 usecs */
216 static unsigned long max_sched_granularity_ns = 1000000000;     /* 1 second */
217 static unsigned long min_wakeup_granularity_ns;                 /* 0 usecs */
218 static unsigned long max_wakeup_granularity_ns = 1000000000;    /* 1 second */
219 #endif
220
221 static ctl_table kern_table[] = {
222 #ifdef CONFIG_SCHED_DEBUG
223         {
224                 .ctl_name       = CTL_UNNUMBERED,
225                 .procname       = "sched_min_granularity_ns",
226                 .data           = &sysctl_sched_min_granularity,
227                 .maxlen         = sizeof(unsigned int),
228                 .mode           = 0644,
229                 .proc_handler   = &proc_dointvec_minmax,
230                 .strategy       = &sysctl_intvec,
231                 .extra1         = &min_sched_granularity_ns,
232                 .extra2         = &max_sched_granularity_ns,
233         },
234         {
235                 .ctl_name       = CTL_UNNUMBERED,
236                 .procname       = "sched_latency_ns",
237                 .data           = &sysctl_sched_latency,
238                 .maxlen         = sizeof(unsigned int),
239                 .mode           = 0644,
240                 .proc_handler   = &proc_dointvec_minmax,
241                 .strategy       = &sysctl_intvec,
242                 .extra1         = &min_sched_granularity_ns,
243                 .extra2         = &max_sched_granularity_ns,
244         },
245         {
246                 .ctl_name       = CTL_UNNUMBERED,
247                 .procname       = "sched_wakeup_granularity_ns",
248                 .data           = &sysctl_sched_wakeup_granularity,
249                 .maxlen         = sizeof(unsigned int),
250                 .mode           = 0644,
251                 .proc_handler   = &proc_dointvec_minmax,
252                 .strategy       = &sysctl_intvec,
253                 .extra1         = &min_wakeup_granularity_ns,
254                 .extra2         = &max_wakeup_granularity_ns,
255         },
256         {
257                 .ctl_name       = CTL_UNNUMBERED,
258                 .procname       = "sched_batch_wakeup_granularity_ns",
259                 .data           = &sysctl_sched_batch_wakeup_granularity,
260                 .maxlen         = sizeof(unsigned int),
261                 .mode           = 0644,
262                 .proc_handler   = &proc_dointvec_minmax,
263                 .strategy       = &sysctl_intvec,
264                 .extra1         = &min_wakeup_granularity_ns,
265                 .extra2         = &max_wakeup_granularity_ns,
266         },
267         {
268                 .ctl_name       = CTL_UNNUMBERED,
269                 .procname       = "sched_child_runs_first",
270                 .data           = &sysctl_sched_child_runs_first,
271                 .maxlen         = sizeof(unsigned int),
272                 .mode           = 0644,
273                 .proc_handler   = &proc_dointvec,
274         },
275         {
276                 .ctl_name       = CTL_UNNUMBERED,
277                 .procname       = "sched_features",
278                 .data           = &sysctl_sched_features,
279                 .maxlen         = sizeof(unsigned int),
280                 .mode           = 0644,
281                 .proc_handler   = &proc_dointvec,
282         },
283 #endif
284         {
285                 .ctl_name       = CTL_UNNUMBERED,
286                 .procname       = "sched_compat_yield",
287                 .data           = &sysctl_sched_compat_yield,
288                 .maxlen         = sizeof(unsigned int),
289                 .mode           = 0644,
290                 .proc_handler   = &proc_dointvec,
291         },
292 #ifdef CONFIG_PROVE_LOCKING
293         {
294                 .ctl_name       = CTL_UNNUMBERED,
295                 .procname       = "prove_locking",
296                 .data           = &prove_locking,
297                 .maxlen         = sizeof(int),
298                 .mode           = 0644,
299                 .proc_handler   = &proc_dointvec,
300         },
301 #endif
302 #ifdef CONFIG_LOCK_STAT
303         {
304                 .ctl_name       = CTL_UNNUMBERED,
305                 .procname       = "lock_stat",
306                 .data           = &lock_stat,
307                 .maxlen         = sizeof(int),
308                 .mode           = 0644,
309                 .proc_handler   = &proc_dointvec,
310         },
311 #endif
312         {
313                 .ctl_name       = KERN_PANIC,
314                 .procname       = "panic",
315                 .data           = &panic_timeout,
316                 .maxlen         = sizeof(int),
317                 .mode           = 0644,
318                 .proc_handler   = &proc_dointvec,
319         },
320         {
321                 .ctl_name       = KERN_CORE_USES_PID,
322                 .procname       = "core_uses_pid",
323                 .data           = &core_uses_pid,
324                 .maxlen         = sizeof(int),
325                 .mode           = 0644,
326                 .proc_handler   = &proc_dointvec,
327         },
328 #ifdef CONFIG_AUDITSYSCALL
329         {
330                 .ctl_name       = CTL_UNNUMBERED,
331                 .procname       = "audit_argv_kb",
332                 .data           = &audit_argv_kb,
333                 .maxlen         = sizeof(int),
334                 .mode           = 0644,
335                 .proc_handler   = &proc_dointvec,
336         },
337 #endif
338         {
339                 .ctl_name       = KERN_CORE_PATTERN,
340                 .procname       = "core_pattern",
341                 .data           = core_pattern,
342                 .maxlen         = CORENAME_MAX_SIZE,
343                 .mode           = 0644,
344                 .proc_handler   = &proc_dostring,
345                 .strategy       = &sysctl_string,
346         },
347 #ifdef CONFIG_PROC_SYSCTL
348         {
349                 .ctl_name       = KERN_TAINTED,
350                 .procname       = "tainted",
351                 .data           = &tainted,
352                 .maxlen         = sizeof(int),
353                 .mode           = 0644,
354                 .proc_handler   = &proc_dointvec_taint,
355         },
356 #endif
357         {
358                 .ctl_name       = KERN_CAP_BSET,
359                 .procname       = "cap-bound",
360                 .data           = &cap_bset,
361                 .maxlen         = sizeof(kernel_cap_t),
362                 .mode           = 0600,
363                 .proc_handler   = &proc_dointvec_bset,
364         },
365 #ifdef CONFIG_BLK_DEV_INITRD
366         {
367                 .ctl_name       = KERN_REALROOTDEV,
368                 .procname       = "real-root-dev",
369                 .data           = &real_root_dev,
370                 .maxlen         = sizeof(int),
371                 .mode           = 0644,
372                 .proc_handler   = &proc_dointvec,
373         },
374 #endif
375         {
376                 .ctl_name       = CTL_UNNUMBERED,
377                 .procname       = "print-fatal-signals",
378                 .data           = &print_fatal_signals,
379                 .maxlen         = sizeof(int),
380                 .mode           = 0644,
381                 .proc_handler   = &proc_dointvec,
382         },
383 #ifdef __sparc__
384         {
385                 .ctl_name       = KERN_SPARC_REBOOT,
386                 .procname       = "reboot-cmd",
387                 .data           = reboot_command,
388                 .maxlen         = 256,
389                 .mode           = 0644,
390                 .proc_handler   = &proc_dostring,
391                 .strategy       = &sysctl_string,
392         },
393         {
394                 .ctl_name       = KERN_SPARC_STOP_A,
395                 .procname       = "stop-a",
396                 .data           = &stop_a_enabled,
397                 .maxlen         = sizeof (int),
398                 .mode           = 0644,
399                 .proc_handler   = &proc_dointvec,
400         },
401         {
402                 .ctl_name       = KERN_SPARC_SCONS_PWROFF,
403                 .procname       = "scons-poweroff",
404                 .data           = &scons_pwroff,
405                 .maxlen         = sizeof (int),
406                 .mode           = 0644,
407                 .proc_handler   = &proc_dointvec,
408         },
409 #endif
410 #ifdef __hppa__
411         {
412                 .ctl_name       = KERN_HPPA_PWRSW,
413                 .procname       = "soft-power",
414                 .data           = &pwrsw_enabled,
415                 .maxlen         = sizeof (int),
416                 .mode           = 0644,
417                 .proc_handler   = &proc_dointvec,
418         },
419         {
420                 .ctl_name       = KERN_HPPA_UNALIGNED,
421                 .procname       = "unaligned-trap",
422                 .data           = &unaligned_enabled,
423                 .maxlen         = sizeof (int),
424                 .mode           = 0644,
425                 .proc_handler   = &proc_dointvec,
426         },
427 #endif
428         {
429                 .ctl_name       = KERN_CTLALTDEL,
430                 .procname       = "ctrl-alt-del",
431                 .data           = &C_A_D,
432                 .maxlen         = sizeof(int),
433                 .mode           = 0644,
434                 .proc_handler   = &proc_dointvec,
435         },
436         {
437                 .ctl_name       = KERN_PRINTK,
438                 .procname       = "printk",
439                 .data           = &console_loglevel,
440                 .maxlen         = 4*sizeof(int),
441                 .mode           = 0644,
442                 .proc_handler   = &proc_dointvec,
443         },
444 #ifdef CONFIG_KMOD
445         {
446                 .ctl_name       = KERN_MODPROBE,
447                 .procname       = "modprobe",
448                 .data           = &modprobe_path,
449                 .maxlen         = KMOD_PATH_LEN,
450                 .mode           = 0644,
451                 .proc_handler   = &proc_dostring,
452                 .strategy       = &sysctl_string,
453         },
454 #endif
455 #if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
456         {
457                 .ctl_name       = KERN_HOTPLUG,
458                 .procname       = "hotplug",
459                 .data           = &uevent_helper,
460                 .maxlen         = UEVENT_HELPER_PATH_LEN,
461                 .mode           = 0644,
462                 .proc_handler   = &proc_dostring,
463                 .strategy       = &sysctl_string,
464         },
465 #endif
466 #ifdef CONFIG_CHR_DEV_SG
467         {
468                 .ctl_name       = KERN_SG_BIG_BUFF,
469                 .procname       = "sg-big-buff",
470                 .data           = &sg_big_buff,
471                 .maxlen         = sizeof (int),
472                 .mode           = 0444,
473                 .proc_handler   = &proc_dointvec,
474         },
475 #endif
476 #ifdef CONFIG_BSD_PROCESS_ACCT
477         {
478                 .ctl_name       = KERN_ACCT,
479                 .procname       = "acct",
480                 .data           = &acct_parm,
481                 .maxlen         = 3*sizeof(int),
482                 .mode           = 0644,
483                 .proc_handler   = &proc_dointvec,
484         },
485 #endif
486 #ifdef CONFIG_MAGIC_SYSRQ
487         {
488                 .ctl_name       = KERN_SYSRQ,
489                 .procname       = "sysrq",
490                 .data           = &__sysrq_enabled,
491                 .maxlen         = sizeof (int),
492                 .mode           = 0644,
493                 .proc_handler   = &proc_dointvec,
494         },
495 #endif
496 #ifdef CONFIG_PROC_SYSCTL
497         {
498                 .ctl_name       = KERN_CADPID,
499                 .procname       = "cad_pid",
500                 .data           = NULL,
501                 .maxlen         = sizeof (int),
502                 .mode           = 0600,
503                 .proc_handler   = &proc_do_cad_pid,
504         },
505 #endif
506         {
507                 .ctl_name       = KERN_MAX_THREADS,
508                 .procname       = "threads-max",
509                 .data           = &max_threads,
510                 .maxlen         = sizeof(int),
511                 .mode           = 0644,
512                 .proc_handler   = &proc_dointvec,
513         },
514         {
515                 .ctl_name       = KERN_RANDOM,
516                 .procname       = "random",
517                 .mode           = 0555,
518                 .child          = random_table,
519         },
520 #ifdef CONFIG_UNIX98_PTYS
521         {
522                 .ctl_name       = KERN_PTY,
523                 .procname       = "pty",
524                 .mode           = 0555,
525                 .child          = pty_table,
526         },
527 #endif
528         {
529                 .ctl_name       = KERN_OVERFLOWUID,
530                 .procname       = "overflowuid",
531                 .data           = &overflowuid,
532                 .maxlen         = sizeof(int),
533                 .mode           = 0644,
534                 .proc_handler   = &proc_dointvec_minmax,
535                 .strategy       = &sysctl_intvec,
536                 .extra1         = &minolduid,
537                 .extra2         = &maxolduid,
538         },
539         {
540                 .ctl_name       = KERN_OVERFLOWGID,
541                 .procname       = "overflowgid",
542                 .data           = &overflowgid,
543                 .maxlen         = sizeof(int),
544                 .mode           = 0644,
545                 .proc_handler   = &proc_dointvec_minmax,
546                 .strategy       = &sysctl_intvec,
547                 .extra1         = &minolduid,
548                 .extra2         = &maxolduid,
549         },
550 #ifdef CONFIG_S390
551 #ifdef CONFIG_MATHEMU
552         {
553                 .ctl_name       = KERN_IEEE_EMULATION_WARNINGS,
554                 .procname       = "ieee_emulation_warnings",
555                 .data           = &sysctl_ieee_emulation_warnings,
556                 .maxlen         = sizeof(int),
557                 .mode           = 0644,
558                 .proc_handler   = &proc_dointvec,
559         },
560 #endif
561 #ifdef CONFIG_NO_IDLE_HZ
562         {
563                 .ctl_name       = KERN_HZ_TIMER,
564                 .procname       = "hz_timer",
565                 .data           = &sysctl_hz_timer,
566                 .maxlen         = sizeof(int),
567                 .mode           = 0644,
568                 .proc_handler   = &proc_dointvec,
569         },
570 #endif
571         {
572                 .ctl_name       = KERN_S390_USER_DEBUG_LOGGING,
573                 .procname       = "userprocess_debug",
574                 .data           = &sysctl_userprocess_debug,
575                 .maxlen         = sizeof(int),
576                 .mode           = 0644,
577                 .proc_handler   = &proc_dointvec,
578         },
579 #endif
580         {
581                 .ctl_name       = KERN_PIDMAX,
582                 .procname       = "pid_max",
583                 .data           = &pid_max,
584                 .maxlen         = sizeof (int),
585                 .mode           = 0644,
586                 .proc_handler   = &proc_dointvec_minmax,
587                 .strategy       = sysctl_intvec,
588                 .extra1         = &pid_max_min,
589                 .extra2         = &pid_max_max,
590         },
591         {
592                 .ctl_name       = KERN_PANIC_ON_OOPS,
593                 .procname       = "panic_on_oops",
594                 .data           = &panic_on_oops,
595                 .maxlen         = sizeof(int),
596                 .mode           = 0644,
597                 .proc_handler   = &proc_dointvec,
598         },
599         {
600                 .ctl_name       = KERN_PRINTK_RATELIMIT,
601                 .procname       = "printk_ratelimit",
602                 .data           = &printk_ratelimit_jiffies,
603                 .maxlen         = sizeof(int),
604                 .mode           = 0644,
605                 .proc_handler   = &proc_dointvec_jiffies,
606                 .strategy       = &sysctl_jiffies,
607         },
608         {
609                 .ctl_name       = KERN_PRINTK_RATELIMIT_BURST,
610                 .procname       = "printk_ratelimit_burst",
611                 .data           = &printk_ratelimit_burst,
612                 .maxlen         = sizeof(int),
613                 .mode           = 0644,
614                 .proc_handler   = &proc_dointvec,
615         },
616         {
617                 .ctl_name       = KERN_NGROUPS_MAX,
618                 .procname       = "ngroups_max",
619                 .data           = &ngroups_max,
620                 .maxlen         = sizeof (int),
621                 .mode           = 0444,
622                 .proc_handler   = &proc_dointvec,
623         },
624 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
625         {
626                 .ctl_name       = KERN_UNKNOWN_NMI_PANIC,
627                 .procname       = "unknown_nmi_panic",
628                 .data           = &unknown_nmi_panic,
629                 .maxlen         = sizeof (int),
630                 .mode           = 0644,
631                 .proc_handler   = &proc_dointvec,
632         },
633         {
634                 .ctl_name       = KERN_NMI_WATCHDOG,
635                 .procname       = "nmi_watchdog",
636                 .data           = &nmi_watchdog_enabled,
637                 .maxlen         = sizeof (int),
638                 .mode           = 0644,
639                 .proc_handler   = &proc_nmi_enabled,
640         },
641 #endif
642 #if defined(CONFIG_X86)
643         {
644                 .ctl_name       = KERN_PANIC_ON_NMI,
645                 .procname       = "panic_on_unrecovered_nmi",
646                 .data           = &panic_on_unrecovered_nmi,
647                 .maxlen         = sizeof(int),
648                 .mode           = 0644,
649                 .proc_handler   = &proc_dointvec,
650         },
651         {
652                 .ctl_name       = KERN_BOOTLOADER_TYPE,
653                 .procname       = "bootloader_type",
654                 .data           = &bootloader_type,
655                 .maxlen         = sizeof (int),
656                 .mode           = 0444,
657                 .proc_handler   = &proc_dointvec,
658         },
659         {
660                 .ctl_name       = CTL_UNNUMBERED,
661                 .procname       = "kstack_depth_to_print",
662                 .data           = &kstack_depth_to_print,
663                 .maxlen         = sizeof(int),
664                 .mode           = 0644,
665                 .proc_handler   = &proc_dointvec,
666         },
667 #endif
668 #if defined(CONFIG_MMU)
669         {
670                 .ctl_name       = KERN_RANDOMIZE,
671                 .procname       = "randomize_va_space",
672                 .data           = &randomize_va_space,
673                 .maxlen         = sizeof(int),
674                 .mode           = 0644,
675                 .proc_handler   = &proc_dointvec,
676         },
677 #endif
678 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
679         {
680                 .ctl_name       = KERN_SPIN_RETRY,
681                 .procname       = "spin_retry",
682                 .data           = &spin_retry,
683                 .maxlen         = sizeof (int),
684                 .mode           = 0644,
685                 .proc_handler   = &proc_dointvec,
686         },
687 #endif
688 #if     defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
689         {
690                 .ctl_name       = KERN_ACPI_VIDEO_FLAGS,
691                 .procname       = "acpi_video_flags",
692                 .data           = &acpi_realmode_flags,
693                 .maxlen         = sizeof (unsigned long),
694                 .mode           = 0644,
695                 .proc_handler   = &proc_doulongvec_minmax,
696         },
697 #endif
698 #ifdef CONFIG_IA64
699         {
700                 .ctl_name       = KERN_IA64_UNALIGNED,
701                 .procname       = "ignore-unaligned-usertrap",
702                 .data           = &no_unaligned_warning,
703                 .maxlen         = sizeof (int),
704                 .mode           = 0644,
705                 .proc_handler   = &proc_dointvec,
706         },
707 #endif
708 #ifdef CONFIG_COMPAT
709         {
710                 .ctl_name       = KERN_COMPAT_LOG,
711                 .procname       = "compat-log",
712                 .data           = &compat_log,
713                 .maxlen         = sizeof (int),
714                 .mode           = 0644,
715                 .proc_handler   = &proc_dointvec,
716         },
717 #endif
718 #ifdef CONFIG_RT_MUTEXES
719         {
720                 .ctl_name       = KERN_MAX_LOCK_DEPTH,
721                 .procname       = "max_lock_depth",
722                 .data           = &max_lock_depth,
723                 .maxlen         = sizeof(int),
724                 .mode           = 0644,
725                 .proc_handler   = &proc_dointvec,
726         },
727 #endif
728 #ifdef CONFIG_PROC_FS
729         {
730                 .ctl_name       = CTL_UNNUMBERED,
731                 .procname       = "maps_protect",
732                 .data           = &maps_protect,
733                 .maxlen         = sizeof(int),
734                 .mode           = 0644,
735                 .proc_handler   = &proc_dointvec,
736         },
737 #endif
738         {
739                 .ctl_name       = CTL_UNNUMBERED,
740                 .procname       = "poweroff_cmd",
741                 .data           = &poweroff_cmd,
742                 .maxlen         = POWEROFF_CMD_PATH_LEN,
743                 .mode           = 0644,
744                 .proc_handler   = &proc_dostring,
745                 .strategy       = &sysctl_string,
746         },
747 /*
748  * NOTE: do not add new entries to this table unless you have read
749  * Documentation/sysctl/ctl_unnumbered.txt
750  */
751         { .ctl_name = 0 }
752 };
753
754 /* Constants for minimum and maximum testing in vm_table.
755    We use these as one-element integer vectors. */
756 static int zero;
757 static int two = 2;
758 static int one_hundred = 100;
759
760
761 static ctl_table vm_table[] = {
762         {
763                 .ctl_name       = VM_OVERCOMMIT_MEMORY,
764                 .procname       = "overcommit_memory",
765                 .data           = &sysctl_overcommit_memory,
766                 .maxlen         = sizeof(sysctl_overcommit_memory),
767                 .mode           = 0644,
768                 .proc_handler   = &proc_dointvec,
769         },
770         {
771                 .ctl_name       = VM_PANIC_ON_OOM,
772                 .procname       = "panic_on_oom",
773                 .data           = &sysctl_panic_on_oom,
774                 .maxlen         = sizeof(sysctl_panic_on_oom),
775                 .mode           = 0644,
776                 .proc_handler   = &proc_dointvec,
777         },
778         {
779                 .ctl_name       = VM_OVERCOMMIT_RATIO,
780                 .procname       = "overcommit_ratio",
781                 .data           = &sysctl_overcommit_ratio,
782                 .maxlen         = sizeof(sysctl_overcommit_ratio),
783                 .mode           = 0644,
784                 .proc_handler   = &proc_dointvec,
785         },
786         {
787                 .ctl_name       = VM_PAGE_CLUSTER,
788                 .procname       = "page-cluster", 
789                 .data           = &page_cluster,
790                 .maxlen         = sizeof(int),
791                 .mode           = 0644,
792                 .proc_handler   = &proc_dointvec,
793         },
794         {
795                 .ctl_name       = VM_DIRTY_BACKGROUND,
796                 .procname       = "dirty_background_ratio",
797                 .data           = &dirty_background_ratio,
798                 .maxlen         = sizeof(dirty_background_ratio),
799                 .mode           = 0644,
800                 .proc_handler   = &proc_dointvec_minmax,
801                 .strategy       = &sysctl_intvec,
802                 .extra1         = &zero,
803                 .extra2         = &one_hundred,
804         },
805         {
806                 .ctl_name       = VM_DIRTY_RATIO,
807                 .procname       = "dirty_ratio",
808                 .data           = &vm_dirty_ratio,
809                 .maxlen         = sizeof(vm_dirty_ratio),
810                 .mode           = 0644,
811                 .proc_handler   = &proc_dointvec_minmax,
812                 .strategy       = &sysctl_intvec,
813                 .extra1         = &zero,
814                 .extra2         = &one_hundred,
815         },
816         {
817                 .ctl_name       = VM_DIRTY_WB_CS,
818                 .procname       = "dirty_writeback_centisecs",
819                 .data           = &dirty_writeback_interval,
820                 .maxlen         = sizeof(dirty_writeback_interval),
821                 .mode           = 0644,
822                 .proc_handler   = &dirty_writeback_centisecs_handler,
823         },
824         {
825                 .ctl_name       = VM_DIRTY_EXPIRE_CS,
826                 .procname       = "dirty_expire_centisecs",
827                 .data           = &dirty_expire_interval,
828                 .maxlen         = sizeof(dirty_expire_interval),
829                 .mode           = 0644,
830                 .proc_handler   = &proc_dointvec_userhz_jiffies,
831         },
832         {
833                 .ctl_name       = VM_NR_PDFLUSH_THREADS,
834                 .procname       = "nr_pdflush_threads",
835                 .data           = &nr_pdflush_threads,
836                 .maxlen         = sizeof nr_pdflush_threads,
837                 .mode           = 0444 /* read-only*/,
838                 .proc_handler   = &proc_dointvec,
839         },
840         {
841                 .ctl_name       = VM_SWAPPINESS,
842                 .procname       = "swappiness",
843                 .data           = &vm_swappiness,
844                 .maxlen         = sizeof(vm_swappiness),
845                 .mode           = 0644,
846                 .proc_handler   = &proc_dointvec_minmax,
847                 .strategy       = &sysctl_intvec,
848                 .extra1         = &zero,
849                 .extra2         = &one_hundred,
850         },
851 #ifdef CONFIG_HUGETLB_PAGE
852          {
853                 .ctl_name       = VM_HUGETLB_PAGES,
854                 .procname       = "nr_hugepages",
855                 .data           = &max_huge_pages,
856                 .maxlen         = sizeof(unsigned long),
857                 .mode           = 0644,
858                 .proc_handler   = &hugetlb_sysctl_handler,
859                 .extra1         = (void *)&hugetlb_zero,
860                 .extra2         = (void *)&hugetlb_infinity,
861          },
862          {
863                 .ctl_name       = VM_HUGETLB_GROUP,
864                 .procname       = "hugetlb_shm_group",
865                 .data           = &sysctl_hugetlb_shm_group,
866                 .maxlen         = sizeof(gid_t),
867                 .mode           = 0644,
868                 .proc_handler   = &proc_dointvec,
869          },
870          {
871                 .ctl_name       = CTL_UNNUMBERED,
872                 .procname       = "hugepages_treat_as_movable",
873                 .data           = &hugepages_treat_as_movable,
874                 .maxlen         = sizeof(int),
875                 .mode           = 0644,
876                 .proc_handler   = &hugetlb_treat_movable_handler,
877         },
878 #endif
879         {
880                 .ctl_name       = VM_LOWMEM_RESERVE_RATIO,
881                 .procname       = "lowmem_reserve_ratio",
882                 .data           = &sysctl_lowmem_reserve_ratio,
883                 .maxlen         = sizeof(sysctl_lowmem_reserve_ratio),
884                 .mode           = 0644,
885                 .proc_handler   = &lowmem_reserve_ratio_sysctl_handler,
886                 .strategy       = &sysctl_intvec,
887         },
888         {
889                 .ctl_name       = VM_DROP_PAGECACHE,
890                 .procname       = "drop_caches",
891                 .data           = &sysctl_drop_caches,
892                 .maxlen         = sizeof(int),
893                 .mode           = 0644,
894                 .proc_handler   = drop_caches_sysctl_handler,
895                 .strategy       = &sysctl_intvec,
896         },
897         {
898                 .ctl_name       = VM_MIN_FREE_KBYTES,
899                 .procname       = "min_free_kbytes",
900                 .data           = &min_free_kbytes,
901                 .maxlen         = sizeof(min_free_kbytes),
902                 .mode           = 0644,
903                 .proc_handler   = &min_free_kbytes_sysctl_handler,
904                 .strategy       = &sysctl_intvec,
905                 .extra1         = &zero,
906         },
907         {
908                 .ctl_name       = VM_PERCPU_PAGELIST_FRACTION,
909                 .procname       = "percpu_pagelist_fraction",
910                 .data           = &percpu_pagelist_fraction,
911                 .maxlen         = sizeof(percpu_pagelist_fraction),
912                 .mode           = 0644,
913                 .proc_handler   = &percpu_pagelist_fraction_sysctl_handler,
914                 .strategy       = &sysctl_intvec,
915                 .extra1         = &min_percpu_pagelist_fract,
916         },
917 #ifdef CONFIG_MMU
918         {
919                 .ctl_name       = VM_MAX_MAP_COUNT,
920                 .procname       = "max_map_count",
921                 .data           = &sysctl_max_map_count,
922                 .maxlen         = sizeof(sysctl_max_map_count),
923                 .mode           = 0644,
924                 .proc_handler   = &proc_dointvec
925         },
926 #endif
927         {
928                 .ctl_name       = VM_LAPTOP_MODE,
929                 .procname       = "laptop_mode",
930                 .data           = &laptop_mode,
931                 .maxlen         = sizeof(laptop_mode),
932                 .mode           = 0644,
933                 .proc_handler   = &proc_dointvec_jiffies,
934                 .strategy       = &sysctl_jiffies,
935         },
936         {
937                 .ctl_name       = VM_BLOCK_DUMP,
938                 .procname       = "block_dump",
939                 .data           = &block_dump,
940                 .maxlen         = sizeof(block_dump),
941                 .mode           = 0644,
942                 .proc_handler   = &proc_dointvec,
943                 .strategy       = &sysctl_intvec,
944                 .extra1         = &zero,
945         },
946         {
947                 .ctl_name       = VM_VFS_CACHE_PRESSURE,
948                 .procname       = "vfs_cache_pressure",
949                 .data           = &sysctl_vfs_cache_pressure,
950                 .maxlen         = sizeof(sysctl_vfs_cache_pressure),
951                 .mode           = 0644,
952                 .proc_handler   = &proc_dointvec,
953                 .strategy       = &sysctl_intvec,
954                 .extra1         = &zero,
955         },
956 #ifdef HAVE_ARCH_PICK_MMAP_LAYOUT
957         {
958                 .ctl_name       = VM_LEGACY_VA_LAYOUT,
959                 .procname       = "legacy_va_layout",
960                 .data           = &sysctl_legacy_va_layout,
961                 .maxlen         = sizeof(sysctl_legacy_va_layout),
962                 .mode           = 0644,
963                 .proc_handler   = &proc_dointvec,
964                 .strategy       = &sysctl_intvec,
965                 .extra1         = &zero,
966         },
967 #endif
968 #ifdef CONFIG_NUMA
969         {
970                 .ctl_name       = VM_ZONE_RECLAIM_MODE,
971                 .procname       = "zone_reclaim_mode",
972                 .data           = &zone_reclaim_mode,
973                 .maxlen         = sizeof(zone_reclaim_mode),
974                 .mode           = 0644,
975                 .proc_handler   = &proc_dointvec,
976                 .strategy       = &sysctl_intvec,
977                 .extra1         = &zero,
978         },
979         {
980                 .ctl_name       = VM_MIN_UNMAPPED,
981                 .procname       = "min_unmapped_ratio",
982                 .data           = &sysctl_min_unmapped_ratio,
983                 .maxlen         = sizeof(sysctl_min_unmapped_ratio),
984                 .mode           = 0644,
985                 .proc_handler   = &sysctl_min_unmapped_ratio_sysctl_handler,
986                 .strategy       = &sysctl_intvec,
987                 .extra1         = &zero,
988                 .extra2         = &one_hundred,
989         },
990         {
991                 .ctl_name       = VM_MIN_SLAB,
992                 .procname       = "min_slab_ratio",
993                 .data           = &sysctl_min_slab_ratio,
994                 .maxlen         = sizeof(sysctl_min_slab_ratio),
995                 .mode           = 0644,
996                 .proc_handler   = &sysctl_min_slab_ratio_sysctl_handler,
997                 .strategy       = &sysctl_intvec,
998                 .extra1         = &zero,
999                 .extra2         = &one_hundred,
1000         },
1001 #endif
1002 #ifdef CONFIG_SMP
1003         {
1004                 .ctl_name       = CTL_UNNUMBERED,
1005                 .procname       = "stat_interval",
1006                 .data           = &sysctl_stat_interval,
1007                 .maxlen         = sizeof(sysctl_stat_interval),
1008                 .mode           = 0644,
1009                 .proc_handler   = &proc_dointvec_jiffies,
1010                 .strategy       = &sysctl_jiffies,
1011         },
1012 #endif
1013 #ifdef CONFIG_SECURITY
1014         {
1015                 .ctl_name       = CTL_UNNUMBERED,
1016                 .procname       = "mmap_min_addr",
1017                 .data           = &mmap_min_addr,
1018                 .maxlen         = sizeof(unsigned long),
1019                 .mode           = 0644,
1020                 .proc_handler   = &proc_doulongvec_minmax,
1021         },
1022 #endif
1023 #ifdef CONFIG_NUMA
1024         {
1025                 .ctl_name       = CTL_UNNUMBERED,
1026                 .procname       = "numa_zonelist_order",
1027                 .data           = &numa_zonelist_order,
1028                 .maxlen         = NUMA_ZONELIST_ORDER_LEN,
1029                 .mode           = 0644,
1030                 .proc_handler   = &numa_zonelist_order_handler,
1031                 .strategy       = &sysctl_string,
1032         },
1033 #endif
1034 #if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
1035    (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1036         {
1037                 .ctl_name       = VM_VDSO_ENABLED,
1038                 .procname       = "vdso_enabled",
1039                 .data           = &vdso_enabled,
1040                 .maxlen         = sizeof(vdso_enabled),
1041                 .mode           = 0644,
1042                 .proc_handler   = &proc_dointvec,
1043                 .strategy       = &sysctl_intvec,
1044                 .extra1         = &zero,
1045         },
1046 #endif
1047 /*
1048  * NOTE: do not add new entries to this table unless you have read
1049  * Documentation/sysctl/ctl_unnumbered.txt
1050  */
1051         { .ctl_name = 0 }
1052 };
1053
1054 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1055 static ctl_table binfmt_misc_table[] = {
1056         { .ctl_name = 0 }
1057 };
1058 #endif
1059
1060 static ctl_table fs_table[] = {
1061         {
1062                 .ctl_name       = FS_NRINODE,
1063                 .procname       = "inode-nr",
1064                 .data           = &inodes_stat,
1065                 .maxlen         = 2*sizeof(int),
1066                 .mode           = 0444,
1067                 .proc_handler   = &proc_dointvec,
1068         },
1069         {
1070                 .ctl_name       = FS_STATINODE,
1071                 .procname       = "inode-state",
1072                 .data           = &inodes_stat,
1073                 .maxlen         = 7*sizeof(int),
1074                 .mode           = 0444,
1075                 .proc_handler   = &proc_dointvec,
1076         },
1077         {
1078                 .ctl_name       = FS_NRFILE,
1079                 .procname       = "file-nr",
1080                 .data           = &files_stat,
1081                 .maxlen         = 3*sizeof(int),
1082                 .mode           = 0444,
1083                 .proc_handler   = &proc_nr_files,
1084         },
1085         {
1086                 .ctl_name       = FS_MAXFILE,
1087                 .procname       = "file-max",
1088                 .data           = &files_stat.max_files,
1089                 .maxlen         = sizeof(int),
1090                 .mode           = 0644,
1091                 .proc_handler   = &proc_dointvec,
1092         },
1093         {
1094                 .ctl_name       = FS_DENTRY,
1095                 .procname       = "dentry-state",
1096                 .data           = &dentry_stat,
1097                 .maxlen         = 6*sizeof(int),
1098                 .mode           = 0444,
1099                 .proc_handler   = &proc_dointvec,
1100         },
1101         {
1102                 .ctl_name       = FS_OVERFLOWUID,
1103                 .procname       = "overflowuid",
1104                 .data           = &fs_overflowuid,
1105                 .maxlen         = sizeof(int),
1106                 .mode           = 0644,
1107                 .proc_handler   = &proc_dointvec_minmax,
1108                 .strategy       = &sysctl_intvec,
1109                 .extra1         = &minolduid,
1110                 .extra2         = &maxolduid,
1111         },
1112         {
1113                 .ctl_name       = FS_OVERFLOWGID,
1114                 .procname       = "overflowgid",
1115                 .data           = &fs_overflowgid,
1116                 .maxlen         = sizeof(int),
1117                 .mode           = 0644,
1118                 .proc_handler   = &proc_dointvec_minmax,
1119                 .strategy       = &sysctl_intvec,
1120                 .extra1         = &minolduid,
1121                 .extra2         = &maxolduid,
1122         },
1123         {
1124                 .ctl_name       = FS_LEASES,
1125                 .procname       = "leases-enable",
1126                 .data           = &leases_enable,
1127                 .maxlen         = sizeof(int),
1128                 .mode           = 0644,
1129                 .proc_handler   = &proc_dointvec,
1130         },
1131 #ifdef CONFIG_DNOTIFY
1132         {
1133                 .ctl_name       = FS_DIR_NOTIFY,
1134                 .procname       = "dir-notify-enable",
1135                 .data           = &dir_notify_enable,
1136                 .maxlen         = sizeof(int),
1137                 .mode           = 0644,
1138                 .proc_handler   = &proc_dointvec,
1139         },
1140 #endif
1141 #ifdef CONFIG_MMU
1142         {
1143                 .ctl_name       = FS_LEASE_TIME,
1144                 .procname       = "lease-break-time",
1145                 .data           = &lease_break_time,
1146                 .maxlen         = sizeof(int),
1147                 .mode           = 0644,
1148                 .proc_handler   = &proc_dointvec_minmax,
1149                 .strategy       = &sysctl_intvec,
1150                 .extra1         = &zero,
1151                 .extra2         = &two,
1152         },
1153         {
1154                 .ctl_name       = FS_AIO_NR,
1155                 .procname       = "aio-nr",
1156                 .data           = &aio_nr,
1157                 .maxlen         = sizeof(aio_nr),
1158                 .mode           = 0444,
1159                 .proc_handler   = &proc_doulongvec_minmax,
1160         },
1161         {
1162                 .ctl_name       = FS_AIO_MAX_NR,
1163                 .procname       = "aio-max-nr",
1164                 .data           = &aio_max_nr,
1165                 .maxlen         = sizeof(aio_max_nr),
1166                 .mode           = 0644,
1167                 .proc_handler   = &proc_doulongvec_minmax,
1168         },
1169 #ifdef CONFIG_INOTIFY_USER
1170         {
1171                 .ctl_name       = FS_INOTIFY,
1172                 .procname       = "inotify",
1173                 .mode           = 0555,
1174                 .child          = inotify_table,
1175         },
1176 #endif  
1177 #endif
1178         {
1179                 .ctl_name       = KERN_SETUID_DUMPABLE,
1180                 .procname       = "suid_dumpable",
1181                 .data           = &suid_dumpable,
1182                 .maxlen         = sizeof(int),
1183                 .mode           = 0644,
1184                 .proc_handler   = &proc_dointvec,
1185         },
1186 #if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
1187         {
1188                 .ctl_name       = CTL_UNNUMBERED,
1189                 .procname       = "binfmt_misc",
1190                 .mode           = 0555,
1191                 .child          = binfmt_misc_table,
1192         },
1193 #endif
1194 /*
1195  * NOTE: do not add new entries to this table unless you have read
1196  * Documentation/sysctl/ctl_unnumbered.txt
1197  */
1198         { .ctl_name = 0 }
1199 };
1200
1201 static ctl_table debug_table[] = {
1202 #if defined(CONFIG_X86) || defined(CONFIG_PPC)
1203         {
1204                 .ctl_name       = CTL_UNNUMBERED,
1205                 .procname       = "exception-trace",
1206                 .data           = &show_unhandled_signals,
1207                 .maxlen         = sizeof(int),
1208                 .mode           = 0644,
1209                 .proc_handler   = proc_dointvec
1210         },
1211 #endif
1212         { .ctl_name = 0 }
1213 };
1214
1215 static ctl_table dev_table[] = {
1216         { .ctl_name = 0 }
1217 };
1218
1219 static DEFINE_SPINLOCK(sysctl_lock);
1220
1221 /* called under sysctl_lock */
1222 static int use_table(struct ctl_table_header *p)
1223 {
1224         if (unlikely(p->unregistering))
1225                 return 0;
1226         p->used++;
1227         return 1;
1228 }
1229
1230 /* called under sysctl_lock */
1231 static void unuse_table(struct ctl_table_header *p)
1232 {
1233         if (!--p->used)
1234                 if (unlikely(p->unregistering))
1235                         complete(p->unregistering);
1236 }
1237
1238 /* called under sysctl_lock, will reacquire if has to wait */
1239 static void start_unregistering(struct ctl_table_header *p)
1240 {
1241         /*
1242          * if p->used is 0, nobody will ever touch that entry again;
1243          * we'll eliminate all paths to it before dropping sysctl_lock
1244          */
1245         if (unlikely(p->used)) {
1246                 struct completion wait;
1247                 init_completion(&wait);
1248                 p->unregistering = &wait;
1249                 spin_unlock(&sysctl_lock);
1250                 wait_for_completion(&wait);
1251                 spin_lock(&sysctl_lock);
1252         }
1253         /*
1254          * do not remove from the list until nobody holds it; walking the
1255          * list in do_sysctl() relies on that.
1256          */
1257         list_del_init(&p->ctl_entry);
1258 }
1259
1260 void sysctl_head_finish(struct ctl_table_header *head)
1261 {
1262         if (!head)
1263                 return;
1264         spin_lock(&sysctl_lock);
1265         unuse_table(head);
1266         spin_unlock(&sysctl_lock);
1267 }
1268
1269 struct ctl_table_header *sysctl_head_next(struct ctl_table_header *prev)
1270 {
1271         struct ctl_table_header *head;
1272         struct list_head *tmp;
1273         spin_lock(&sysctl_lock);
1274         if (prev) {
1275                 tmp = &prev->ctl_entry;
1276                 unuse_table(prev);
1277                 goto next;
1278         }
1279         tmp = &root_table_header.ctl_entry;
1280         for (;;) {
1281                 head = list_entry(tmp, struct ctl_table_header, ctl_entry);
1282
1283                 if (!use_table(head))
1284                         goto next;
1285                 spin_unlock(&sysctl_lock);
1286                 return head;
1287         next:
1288                 tmp = tmp->next;
1289                 if (tmp == &root_table_header.ctl_entry)
1290                         break;
1291         }
1292         spin_unlock(&sysctl_lock);
1293         return NULL;
1294 }
1295
1296 #ifdef CONFIG_SYSCTL_SYSCALL
1297 int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp,
1298                void __user *newval, size_t newlen)
1299 {
1300         struct ctl_table_header *head;
1301         int error = -ENOTDIR;
1302
1303         if (nlen <= 0 || nlen >= CTL_MAXNAME)
1304                 return -ENOTDIR;
1305         if (oldval) {
1306                 int old_len;
1307                 if (!oldlenp || get_user(old_len, oldlenp))
1308                         return -EFAULT;
1309         }
1310
1311         for (head = sysctl_head_next(NULL); head;
1312                         head = sysctl_head_next(head)) {
1313                 error = parse_table(name, nlen, oldval, oldlenp, 
1314                                         newval, newlen, head->ctl_table);
1315                 if (error != -ENOTDIR) {
1316                         sysctl_head_finish(head);
1317                         break;
1318                 }
1319         }
1320         return error;
1321 }
1322
1323 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
1324 {
1325         struct __sysctl_args tmp;
1326         int error;
1327
1328         if (copy_from_user(&tmp, args, sizeof(tmp)))
1329                 return -EFAULT;
1330
1331         lock_kernel();
1332         error = do_sysctl(tmp.name, tmp.nlen, tmp.oldval, tmp.oldlenp,
1333                           tmp.newval, tmp.newlen);
1334         unlock_kernel();
1335         return error;
1336 }
1337 #endif /* CONFIG_SYSCTL_SYSCALL */
1338
1339 /*
1340  * sysctl_perm does NOT grant the superuser all rights automatically, because
1341  * some sysctl variables are readonly even to root.
1342  */
1343
1344 static int test_perm(int mode, int op)
1345 {
1346         if (!current->euid)
1347                 mode >>= 6;
1348         else if (in_egroup_p(0))
1349                 mode >>= 3;
1350         if ((mode & op & 0007) == op)
1351                 return 0;
1352         return -EACCES;
1353 }
1354
1355 int sysctl_perm(ctl_table *table, int op)
1356 {
1357         int error;
1358         error = security_sysctl(table, op);
1359         if (error)
1360                 return error;
1361         return test_perm(table->mode, op);
1362 }
1363
1364 #ifdef CONFIG_SYSCTL_SYSCALL
1365 static int parse_table(int __user *name, int nlen,
1366                        void __user *oldval, size_t __user *oldlenp,
1367                        void __user *newval, size_t newlen,
1368                        ctl_table *table)
1369 {
1370         int n;
1371 repeat:
1372         if (!nlen)
1373                 return -ENOTDIR;
1374         if (get_user(n, name))
1375                 return -EFAULT;
1376         for ( ; table->ctl_name || table->procname; table++) {
1377                 if (!table->ctl_name)
1378                         continue;
1379                 if (n == table->ctl_name) {
1380                         int error;
1381                         if (table->child) {
1382                                 if (sysctl_perm(table, 001))
1383                                         return -EPERM;
1384                                 name++;
1385                                 nlen--;
1386                                 table = table->child;
1387                                 goto repeat;
1388                         }
1389                         error = do_sysctl_strategy(table, name, nlen,
1390                                                    oldval, oldlenp,
1391                                                    newval, newlen);
1392                         return error;
1393                 }
1394         }
1395         return -ENOTDIR;
1396 }
1397
1398 /* Perform the actual read/write of a sysctl table entry. */
1399 int do_sysctl_strategy (ctl_table *table, 
1400                         int __user *name, int nlen,
1401                         void __user *oldval, size_t __user *oldlenp,
1402                         void __user *newval, size_t newlen)
1403 {
1404         int op = 0, rc;
1405         size_t len;
1406
1407         if (oldval)
1408                 op |= 004;
1409         if (newval) 
1410                 op |= 002;
1411         if (sysctl_perm(table, op))
1412                 return -EPERM;
1413
1414         if (table->strategy) {
1415                 rc = table->strategy(table, name, nlen, oldval, oldlenp,
1416                                      newval, newlen);
1417                 if (rc < 0)
1418                         return rc;
1419                 if (rc > 0)
1420                         return 0;
1421         }
1422
1423         /* If there is no strategy routine, or if the strategy returns
1424          * zero, proceed with automatic r/w */
1425         if (table->data && table->maxlen) {
1426                 if (oldval && oldlenp) {
1427                         if (get_user(len, oldlenp))
1428                                 return -EFAULT;
1429                         if (len) {
1430                                 if (len > table->maxlen)
1431                                         len = table->maxlen;
1432                                 if(copy_to_user(oldval, table->data, len))
1433                                         return -EFAULT;
1434                                 if(put_user(len, oldlenp))
1435                                         return -EFAULT;
1436                         }
1437                 }
1438                 if (newval && newlen) {
1439                         len = newlen;
1440                         if (len > table->maxlen)
1441                                 len = table->maxlen;
1442                         if(copy_from_user(table->data, newval, len))
1443                                 return -EFAULT;
1444                 }
1445         }
1446         return 0;
1447 }
1448 #endif /* CONFIG_SYSCTL_SYSCALL */
1449
1450 static void sysctl_set_parent(struct ctl_table *parent, struct ctl_table *table)
1451 {
1452         for (; table->ctl_name || table->procname; table++) {
1453                 table->parent = parent;
1454                 if (table->child)
1455                         sysctl_set_parent(table, table->child);
1456         }
1457 }
1458
1459 static __init int sysctl_init(void)
1460 {
1461         sysctl_set_parent(NULL, root_table);
1462         return 0;
1463 }
1464
1465 core_initcall(sysctl_init);
1466
1467 /**
1468  * register_sysctl_table - register a sysctl hierarchy
1469  * @table: the top-level table structure
1470  *
1471  * Register a sysctl table hierarchy. @table should be a filled in ctl_table
1472  * array. An entry with a ctl_name of 0 terminates the table. 
1473  *
1474  * The members of the &ctl_table structure are used as follows:
1475  *
1476  * ctl_name - This is the numeric sysctl value used by sysctl(2). The number
1477  *            must be unique within that level of sysctl
1478  *
1479  * procname - the name of the sysctl file under /proc/sys. Set to %NULL to not
1480  *            enter a sysctl file
1481  *
1482  * data - a pointer to data for use by proc_handler
1483  *
1484  * maxlen - the maximum size in bytes of the data
1485  *
1486  * mode - the file permissions for the /proc/sys file, and for sysctl(2)
1487  *
1488  * child - a pointer to the child sysctl table if this entry is a directory, or
1489  *         %NULL.
1490  *
1491  * proc_handler - the text handler routine (described below)
1492  *
1493  * strategy - the strategy routine (described below)
1494  *
1495  * de - for internal use by the sysctl routines
1496  *
1497  * extra1, extra2 - extra pointers usable by the proc handler routines
1498  *
1499  * Leaf nodes in the sysctl tree will be represented by a single file
1500  * under /proc; non-leaf nodes will be represented by directories.
1501  *
1502  * sysctl(2) can automatically manage read and write requests through
1503  * the sysctl table.  The data and maxlen fields of the ctl_table
1504  * struct enable minimal validation of the values being written to be
1505  * performed, and the mode field allows minimal authentication.
1506  *
1507  * More sophisticated management can be enabled by the provision of a
1508  * strategy routine with the table entry.  This will be called before
1509  * any automatic read or write of the data is performed.
1510  *
1511  * The strategy routine may return
1512  *
1513  * < 0 - Error occurred (error is passed to user process)
1514  *
1515  * 0   - OK - proceed with automatic read or write.
1516  *
1517  * > 0 - OK - read or write has been done by the strategy routine, so
1518  *       return immediately.
1519  *
1520  * There must be a proc_handler routine for any terminal nodes
1521  * mirrored under /proc/sys (non-terminals are handled by a built-in
1522  * directory handler).  Several default handlers are available to
1523  * cover common cases -
1524  *
1525  * proc_dostring(), proc_dointvec(), proc_dointvec_jiffies(),
1526  * proc_dointvec_userhz_jiffies(), proc_dointvec_minmax(), 
1527  * proc_doulongvec_ms_jiffies_minmax(), proc_doulongvec_minmax()
1528  *
1529  * It is the handler's job to read the input buffer from user memory
1530  * and process it. The handler should return 0 on success.
1531  *
1532  * This routine returns %NULL on a failure to register, and a pointer
1533  * to the table header on success.
1534  */
1535 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1536 {
1537         struct ctl_table_header *tmp;
1538         tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
1539         if (!tmp)
1540                 return NULL;
1541         tmp->ctl_table = table;
1542         INIT_LIST_HEAD(&tmp->ctl_entry);
1543         tmp->used = 0;
1544         tmp->unregistering = NULL;
1545         sysctl_set_parent(NULL, table);
1546         spin_lock(&sysctl_lock);
1547         list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
1548         spin_unlock(&sysctl_lock);
1549         return tmp;
1550 }
1551
1552 /**
1553  * unregister_sysctl_table - unregister a sysctl table hierarchy
1554  * @header: the header returned from register_sysctl_table
1555  *
1556  * Unregisters the sysctl table and all children. proc entries may not
1557  * actually be removed until they are no longer used by anyone.
1558  */
1559 void unregister_sysctl_table(struct ctl_table_header * header)
1560 {
1561         might_sleep();
1562         spin_lock(&sysctl_lock);
1563         start_unregistering(header);
1564         spin_unlock(&sysctl_lock);
1565         kfree(header);
1566 }
1567
1568 #else /* !CONFIG_SYSCTL */
1569 struct ctl_table_header *register_sysctl_table(ctl_table * table)
1570 {
1571         return NULL;
1572 }
1573
1574 void unregister_sysctl_table(struct ctl_table_header * table)
1575 {
1576 }
1577
1578 #endif /* CONFIG_SYSCTL */
1579
1580 /*
1581  * /proc/sys support
1582  */
1583
1584 #ifdef CONFIG_PROC_SYSCTL
1585
1586 static int _proc_do_string(void* data, int maxlen, int write,
1587                            struct file *filp, void __user *buffer,
1588                            size_t *lenp, loff_t *ppos)
1589 {
1590         size_t len;
1591         char __user *p;
1592         char c;
1593
1594         if (!data || !maxlen || !*lenp) {
1595                 *lenp = 0;
1596                 return 0;
1597         }
1598
1599         if (write) {
1600                 len = 0;
1601                 p = buffer;
1602                 while (len < *lenp) {
1603                         if (get_user(c, p++))
1604                                 return -EFAULT;
1605                         if (c == 0 || c == '\n')
1606                                 break;
1607                         len++;
1608                 }
1609                 if (len >= maxlen)
1610                         len = maxlen-1;
1611                 if(copy_from_user(data, buffer, len))
1612                         return -EFAULT;
1613                 ((char *) data)[len] = 0;
1614                 *ppos += *lenp;
1615         } else {
1616                 len = strlen(data);
1617                 if (len > maxlen)
1618                         len = maxlen;
1619
1620                 if (*ppos > len) {
1621                         *lenp = 0;
1622                         return 0;
1623                 }
1624
1625                 data += *ppos;
1626                 len  -= *ppos;
1627
1628                 if (len > *lenp)
1629                         len = *lenp;
1630                 if (len)
1631                         if(copy_to_user(buffer, data, len))
1632                                 return -EFAULT;
1633                 if (len < *lenp) {
1634                         if(put_user('\n', ((char __user *) buffer) + len))
1635                                 return -EFAULT;
1636                         len++;
1637                 }
1638                 *lenp = len;
1639                 *ppos += len;
1640         }
1641         return 0;
1642 }
1643
1644 /**
1645  * proc_dostring - read a string sysctl
1646  * @table: the sysctl table
1647  * @write: %TRUE if this is a write to the sysctl file
1648  * @filp: the file structure
1649  * @buffer: the user buffer
1650  * @lenp: the size of the user buffer
1651  * @ppos: file position
1652  *
1653  * Reads/writes a string from/to the user buffer. If the kernel
1654  * buffer provided is not large enough to hold the string, the
1655  * string is truncated. The copied string is %NULL-terminated.
1656  * If the string is being read by the user process, it is copied
1657  * and a newline '\n' is added. It is truncated if the buffer is
1658  * not large enough.
1659  *
1660  * Returns 0 on success.
1661  */
1662 int proc_dostring(ctl_table *table, int write, struct file *filp,
1663                   void __user *buffer, size_t *lenp, loff_t *ppos)
1664 {
1665         return _proc_do_string(table->data, table->maxlen, write, filp,
1666                                buffer, lenp, ppos);
1667 }
1668
1669
1670 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
1671                                  int *valp,
1672                                  int write, void *data)
1673 {
1674         if (write) {
1675                 *valp = *negp ? -*lvalp : *lvalp;
1676         } else {
1677                 int val = *valp;
1678                 if (val < 0) {
1679                         *negp = -1;
1680                         *lvalp = (unsigned long)-val;
1681                 } else {
1682                         *negp = 0;
1683                         *lvalp = (unsigned long)val;
1684                 }
1685         }
1686         return 0;
1687 }
1688
1689 static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
1690                   int write, struct file *filp, void __user *buffer,
1691                   size_t *lenp, loff_t *ppos,
1692                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1693                               int write, void *data),
1694                   void *data)
1695 {
1696 #define TMPBUFLEN 21
1697         int *i, vleft, first=1, neg, val;
1698         unsigned long lval;
1699         size_t left, len;
1700         
1701         char buf[TMPBUFLEN], *p;
1702         char __user *s = buffer;
1703         
1704         if (!tbl_data || !table->maxlen || !*lenp ||
1705             (*ppos && !write)) {
1706                 *lenp = 0;
1707                 return 0;
1708         }
1709         
1710         i = (int *) tbl_data;
1711         vleft = table->maxlen / sizeof(*i);
1712         left = *lenp;
1713
1714         if (!conv)
1715                 conv = do_proc_dointvec_conv;
1716
1717         for (; left && vleft--; i++, first=0) {
1718                 if (write) {
1719                         while (left) {
1720                                 char c;
1721                                 if (get_user(c, s))
1722                                         return -EFAULT;
1723                                 if (!isspace(c))
1724                                         break;
1725                                 left--;
1726                                 s++;
1727                         }
1728                         if (!left)
1729                                 break;
1730                         neg = 0;
1731                         len = left;
1732                         if (len > sizeof(buf) - 1)
1733                                 len = sizeof(buf) - 1;
1734                         if (copy_from_user(buf, s, len))
1735                                 return -EFAULT;
1736                         buf[len] = 0;
1737                         p = buf;
1738                         if (*p == '-' && left > 1) {
1739                                 neg = 1;
1740                                 p++;
1741                         }
1742                         if (*p < '0' || *p > '9')
1743                                 break;
1744
1745                         lval = simple_strtoul(p, &p, 0);
1746
1747                         len = p-buf;
1748                         if ((len < left) && *p && !isspace(*p))
1749                                 break;
1750                         if (neg)
1751                                 val = -val;
1752                         s += len;
1753                         left -= len;
1754
1755                         if (conv(&neg, &lval, i, 1, data))
1756                                 break;
1757                 } else {
1758                         p = buf;
1759                         if (!first)
1760                                 *p++ = '\t';
1761         
1762                         if (conv(&neg, &lval, i, 0, data))
1763                                 break;
1764
1765                         sprintf(p, "%s%lu", neg ? "-" : "", lval);
1766                         len = strlen(buf);
1767                         if (len > left)
1768                                 len = left;
1769                         if(copy_to_user(s, buf, len))
1770                                 return -EFAULT;
1771                         left -= len;
1772                         s += len;
1773                 }
1774         }
1775
1776         if (!write && !first && left) {
1777                 if(put_user('\n', s))
1778                         return -EFAULT;
1779                 left--, s++;
1780         }
1781         if (write) {
1782                 while (left) {
1783                         char c;
1784                         if (get_user(c, s++))
1785                                 return -EFAULT;
1786                         if (!isspace(c))
1787                                 break;
1788                         left--;
1789                 }
1790         }
1791         if (write && first)
1792                 return -EINVAL;
1793         *lenp -= left;
1794         *ppos += *lenp;
1795         return 0;
1796 #undef TMPBUFLEN
1797 }
1798
1799 static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
1800                   void __user *buffer, size_t *lenp, loff_t *ppos,
1801                   int (*conv)(int *negp, unsigned long *lvalp, int *valp,
1802                               int write, void *data),
1803                   void *data)
1804 {
1805         return __do_proc_dointvec(table->data, table, write, filp,
1806                         buffer, lenp, ppos, conv, data);
1807 }
1808
1809 /**
1810  * proc_dointvec - read a vector of integers
1811  * @table: the sysctl table
1812  * @write: %TRUE if this is a write to the sysctl file
1813  * @filp: the file structure
1814  * @buffer: the user buffer
1815  * @lenp: the size of the user buffer
1816  * @ppos: file position
1817  *
1818  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1819  * values from/to the user buffer, treated as an ASCII string. 
1820  *
1821  * Returns 0 on success.
1822  */
1823 int proc_dointvec(ctl_table *table, int write, struct file *filp,
1824                      void __user *buffer, size_t *lenp, loff_t *ppos)
1825 {
1826     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1827                             NULL,NULL);
1828 }
1829
1830 #define OP_SET  0
1831 #define OP_AND  1
1832 #define OP_OR   2
1833
1834 static int do_proc_dointvec_bset_conv(int *negp, unsigned long *lvalp,
1835                                       int *valp,
1836                                       int write, void *data)
1837 {
1838         int op = *(int *)data;
1839         if (write) {
1840                 int val = *negp ? -*lvalp : *lvalp;
1841                 switch(op) {
1842                 case OP_SET:    *valp = val; break;
1843                 case OP_AND:    *valp &= val; break;
1844                 case OP_OR:     *valp |= val; break;
1845                 }
1846         } else {
1847                 int val = *valp;
1848                 if (val < 0) {
1849                         *negp = -1;
1850                         *lvalp = (unsigned long)-val;
1851                 } else {
1852                         *negp = 0;
1853                         *lvalp = (unsigned long)val;
1854                 }
1855         }
1856         return 0;
1857 }
1858
1859 /*
1860  *      init may raise the set.
1861  */
1862  
1863 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
1864                         void __user *buffer, size_t *lenp, loff_t *ppos)
1865 {
1866         int op;
1867
1868         if (write && !capable(CAP_SYS_MODULE)) {
1869                 return -EPERM;
1870         }
1871
1872         op = is_init(current) ? OP_SET : OP_AND;
1873         return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1874                                 do_proc_dointvec_bset_conv,&op);
1875 }
1876
1877 /*
1878  *      Taint values can only be increased
1879  */
1880 static int proc_dointvec_taint(ctl_table *table, int write, struct file *filp,
1881                                void __user *buffer, size_t *lenp, loff_t *ppos)
1882 {
1883         int op;
1884
1885         if (write && !capable(CAP_SYS_ADMIN))
1886                 return -EPERM;
1887
1888         op = OP_OR;
1889         return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
1890                                 do_proc_dointvec_bset_conv,&op);
1891 }
1892
1893 struct do_proc_dointvec_minmax_conv_param {
1894         int *min;
1895         int *max;
1896 };
1897
1898 static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp, 
1899                                         int *valp, 
1900                                         int write, void *data)
1901 {
1902         struct do_proc_dointvec_minmax_conv_param *param = data;
1903         if (write) {
1904                 int val = *negp ? -*lvalp : *lvalp;
1905                 if ((param->min && *param->min > val) ||
1906                     (param->max && *param->max < val))
1907                         return -EINVAL;
1908                 *valp = val;
1909         } else {
1910                 int val = *valp;
1911                 if (val < 0) {
1912                         *negp = -1;
1913                         *lvalp = (unsigned long)-val;
1914                 } else {
1915                         *negp = 0;
1916                         *lvalp = (unsigned long)val;
1917                 }
1918         }
1919         return 0;
1920 }
1921
1922 /**
1923  * proc_dointvec_minmax - read a vector of integers with min/max values
1924  * @table: the sysctl table
1925  * @write: %TRUE if this is a write to the sysctl file
1926  * @filp: the file structure
1927  * @buffer: the user buffer
1928  * @lenp: the size of the user buffer
1929  * @ppos: file position
1930  *
1931  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
1932  * values from/to the user buffer, treated as an ASCII string.
1933  *
1934  * This routine will ensure the values are within the range specified by
1935  * table->extra1 (min) and table->extra2 (max).
1936  *
1937  * Returns 0 on success.
1938  */
1939 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
1940                   void __user *buffer, size_t *lenp, loff_t *ppos)
1941 {
1942         struct do_proc_dointvec_minmax_conv_param param = {
1943                 .min = (int *) table->extra1,
1944                 .max = (int *) table->extra2,
1945         };
1946         return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
1947                                 do_proc_dointvec_minmax_conv, &param);
1948 }
1949
1950 static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
1951                                      struct file *filp,
1952                                      void __user *buffer,
1953                                      size_t *lenp, loff_t *ppos,
1954                                      unsigned long convmul,
1955                                      unsigned long convdiv)
1956 {
1957 #define TMPBUFLEN 21
1958         unsigned long *i, *min, *max, val;
1959         int vleft, first=1, neg;
1960         size_t len, left;
1961         char buf[TMPBUFLEN], *p;
1962         char __user *s = buffer;
1963         
1964         if (!data || !table->maxlen || !*lenp ||
1965             (*ppos && !write)) {
1966                 *lenp = 0;
1967                 return 0;
1968         }
1969         
1970         i = (unsigned long *) data;
1971         min = (unsigned long *) table->extra1;
1972         max = (unsigned long *) table->extra2;
1973         vleft = table->maxlen / sizeof(unsigned long);
1974         left = *lenp;
1975         
1976         for (; left && vleft--; i++, min++, max++, first=0) {
1977                 if (write) {
1978                         while (left) {
1979                                 char c;
1980                                 if (get_user(c, s))
1981                                         return -EFAULT;
1982                                 if (!isspace(c))
1983                                         break;
1984                                 left--;
1985                                 s++;
1986                         }
1987                         if (!left)
1988                                 break;
1989                         neg = 0;
1990                         len = left;
1991                         if (len > TMPBUFLEN-1)
1992                                 len = TMPBUFLEN-1;
1993                         if (copy_from_user(buf, s, len))
1994                                 return -EFAULT;
1995                         buf[len] = 0;
1996                         p = buf;
1997                         if (*p == '-' && left > 1) {
1998                                 neg = 1;
1999                                 p++;
2000                         }
2001                         if (*p < '0' || *p > '9')
2002                                 break;
2003                         val = simple_strtoul(p, &p, 0) * convmul / convdiv ;
2004                         len = p-buf;
2005                         if ((len < left) && *p && !isspace(*p))
2006                                 break;
2007                         if (neg)
2008                                 val = -val;
2009                         s += len;
2010                         left -= len;
2011
2012                         if(neg)
2013                                 continue;
2014                         if ((min && val < *min) || (max && val > *max))
2015                                 continue;
2016                         *i = val;
2017                 } else {
2018                         p = buf;
2019                         if (!first)
2020                                 *p++ = '\t';
2021                         sprintf(p, "%lu", convdiv * (*i) / convmul);
2022                         len = strlen(buf);
2023                         if (len > left)
2024                                 len = left;
2025                         if(copy_to_user(s, buf, len))
2026                                 return -EFAULT;
2027                         left -= len;
2028                         s += len;
2029                 }
2030         }
2031
2032         if (!write && !first && left) {
2033                 if(put_user('\n', s))
2034                         return -EFAULT;
2035                 left--, s++;
2036         }
2037         if (write) {
2038                 while (left) {
2039                         char c;
2040                         if (get_user(c, s++))
2041                                 return -EFAULT;
2042                         if (!isspace(c))
2043                                 break;
2044                         left--;
2045                 }
2046         }
2047         if (write && first)
2048                 return -EINVAL;
2049         *lenp -= left;
2050         *ppos += *lenp;
2051         return 0;
2052 #undef TMPBUFLEN
2053 }
2054
2055 static int do_proc_doulongvec_minmax(ctl_table *table, int write,
2056                                      struct file *filp,
2057                                      void __user *buffer,
2058                                      size_t *lenp, loff_t *ppos,
2059                                      unsigned long convmul,
2060                                      unsigned long convdiv)
2061 {
2062         return __do_proc_doulongvec_minmax(table->data, table, write,
2063                         filp, buffer, lenp, ppos, convmul, convdiv);
2064 }
2065
2066 /**
2067  * proc_doulongvec_minmax - read a vector of long integers with min/max values
2068  * @table: the sysctl table
2069  * @write: %TRUE if this is a write to the sysctl file
2070  * @filp: the file structure
2071  * @buffer: the user buffer
2072  * @lenp: the size of the user buffer
2073  * @ppos: file position
2074  *
2075  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2076  * values from/to the user buffer, treated as an ASCII string.
2077  *
2078  * This routine will ensure the values are within the range specified by
2079  * table->extra1 (min) and table->extra2 (max).
2080  *
2081  * Returns 0 on success.
2082  */
2083 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2084                            void __user *buffer, size_t *lenp, loff_t *ppos)
2085 {
2086     return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
2087 }
2088
2089 /**
2090  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
2091  * @table: the sysctl table
2092  * @write: %TRUE if this is a write to the sysctl file
2093  * @filp: the file structure
2094  * @buffer: the user buffer
2095  * @lenp: the size of the user buffer
2096  * @ppos: file position
2097  *
2098  * Reads/writes up to table->maxlen/sizeof(unsigned long) unsigned long
2099  * values from/to the user buffer, treated as an ASCII string. The values
2100  * are treated as milliseconds, and converted to jiffies when they are stored.
2101  *
2102  * This routine will ensure the values are within the range specified by
2103  * table->extra1 (min) and table->extra2 (max).
2104  *
2105  * Returns 0 on success.
2106  */
2107 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2108                                       struct file *filp,
2109                                       void __user *buffer,
2110                                       size_t *lenp, loff_t *ppos)
2111 {
2112     return do_proc_doulongvec_minmax(table, write, filp, buffer,
2113                                      lenp, ppos, HZ, 1000l);
2114 }
2115
2116
2117 static int do_proc_dointvec_jiffies_conv(int *negp, unsigned long *lvalp,
2118                                          int *valp,
2119                                          int write, void *data)
2120 {
2121         if (write) {
2122                 if (*lvalp > LONG_MAX / HZ)
2123                         return 1;
2124                 *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
2125         } else {
2126                 int val = *valp;
2127                 unsigned long lval;
2128                 if (val < 0) {
2129                         *negp = -1;
2130                         lval = (unsigned long)-val;
2131                 } else {
2132                         *negp = 0;
2133                         lval = (unsigned long)val;
2134                 }
2135                 *lvalp = lval / HZ;
2136         }
2137         return 0;
2138 }
2139
2140 static int do_proc_dointvec_userhz_jiffies_conv(int *negp, unsigned long *lvalp,
2141                                                 int *valp,
2142                                                 int write, void *data)
2143 {
2144         if (write) {
2145                 if (USER_HZ < HZ && *lvalp > (LONG_MAX / HZ) * USER_HZ)
2146                         return 1;
2147                 *valp = clock_t_to_jiffies(*negp ? -*lvalp : *lvalp);
2148         } else {
2149                 int val = *valp;
2150                 unsigned long lval;
2151                 if (val < 0) {
2152                         *negp = -1;
2153                         lval = (unsigned long)-val;
2154                 } else {
2155                         *negp = 0;
2156                         lval = (unsigned long)val;
2157                 }
2158                 *lvalp = jiffies_to_clock_t(lval);
2159         }
2160         return 0;
2161 }
2162
2163 static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
2164                                             int *valp,
2165                                             int write, void *data)
2166 {
2167         if (write) {
2168                 *valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
2169         } else {
2170                 int val = *valp;
2171                 unsigned long lval;
2172                 if (val < 0) {
2173                         *negp = -1;
2174                         lval = (unsigned long)-val;
2175                 } else {
2176                         *negp = 0;
2177                         lval = (unsigned long)val;
2178                 }
2179                 *lvalp = jiffies_to_msecs(lval);
2180         }
2181         return 0;
2182 }
2183
2184 /**
2185  * proc_dointvec_jiffies - read a vector of integers as seconds
2186  * @table: the sysctl table
2187  * @write: %TRUE if this is a write to the sysctl file
2188  * @filp: the file structure
2189  * @buffer: the user buffer
2190  * @lenp: the size of the user buffer
2191  * @ppos: file position
2192  *
2193  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2194  * values from/to the user buffer, treated as an ASCII string. 
2195  * The values read are assumed to be in seconds, and are converted into
2196  * jiffies.
2197  *
2198  * Returns 0 on success.
2199  */
2200 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2201                           void __user *buffer, size_t *lenp, loff_t *ppos)
2202 {
2203     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2204                             do_proc_dointvec_jiffies_conv,NULL);
2205 }
2206
2207 /**
2208  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
2209  * @table: the sysctl table
2210  * @write: %TRUE if this is a write to the sysctl file
2211  * @filp: the file structure
2212  * @buffer: the user buffer
2213  * @lenp: the size of the user buffer
2214  * @ppos: pointer to the file position
2215  *
2216  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2217  * values from/to the user buffer, treated as an ASCII string. 
2218  * The values read are assumed to be in 1/USER_HZ seconds, and 
2219  * are converted into jiffies.
2220  *
2221  * Returns 0 on success.
2222  */
2223 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2224                                  void __user *buffer, size_t *lenp, loff_t *ppos)
2225 {
2226     return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
2227                             do_proc_dointvec_userhz_jiffies_conv,NULL);
2228 }
2229
2230 /**
2231  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
2232  * @table: the sysctl table
2233  * @write: %TRUE if this is a write to the sysctl file
2234  * @filp: the file structure
2235  * @buffer: the user buffer
2236  * @lenp: the size of the user buffer
2237  * @ppos: file position
2238  * @ppos: the current position in the file
2239  *
2240  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
2241  * values from/to the user buffer, treated as an ASCII string. 
2242  * The values read are assumed to be in 1/1000 seconds, and 
2243  * are converted into jiffies.
2244  *
2245  * Returns 0 on success.
2246  */
2247 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2248                              void __user *buffer, size_t *lenp, loff_t *ppos)
2249 {
2250         return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
2251                                 do_proc_dointvec_ms_jiffies_conv, NULL);
2252 }
2253
2254 static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
2255                            void __user *buffer, size_t *lenp, loff_t *ppos)
2256 {
2257         struct pid *new_pid;
2258         pid_t tmp;
2259         int r;
2260
2261         tmp = pid_nr(cad_pid);
2262
2263         r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
2264                                lenp, ppos, NULL, NULL);
2265         if (r || !write)
2266                 return r;
2267
2268         new_pid = find_get_pid(tmp);
2269         if (!new_pid)
2270                 return -ESRCH;
2271
2272         put_pid(xchg(&cad_pid, new_pid));
2273         return 0;
2274 }
2275
2276 #else /* CONFIG_PROC_FS */
2277
2278 int proc_dostring(ctl_table *table, int write, struct file *filp,
2279                   void __user *buffer, size_t *lenp, loff_t *ppos)
2280 {
2281         return -ENOSYS;
2282 }
2283
2284 int proc_dointvec(ctl_table *table, int write, struct file *filp,
2285                   void __user *buffer, size_t *lenp, loff_t *ppos)
2286 {
2287         return -ENOSYS;
2288 }
2289
2290 int proc_dointvec_bset(ctl_table *table, int write, struct file *filp,
2291                         void __user *buffer, size_t *lenp, loff_t *ppos)
2292 {
2293         return -ENOSYS;
2294 }
2295
2296 int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
2297                     void __user *buffer, size_t *lenp, loff_t *ppos)
2298 {
2299         return -ENOSYS;
2300 }
2301
2302 int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
2303                     void __user *buffer, size_t *lenp, loff_t *ppos)
2304 {
2305         return -ENOSYS;
2306 }
2307
2308 int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
2309                     void __user *buffer, size_t *lenp, loff_t *ppos)
2310 {
2311         return -ENOSYS;
2312 }
2313
2314 int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
2315                              void __user *buffer, size_t *lenp, loff_t *ppos)
2316 {
2317         return -ENOSYS;
2318 }
2319
2320 int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp,
2321                     void __user *buffer, size_t *lenp, loff_t *ppos)
2322 {
2323         return -ENOSYS;
2324 }
2325
2326 int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write,
2327                                       struct file *filp,
2328                                       void __user *buffer,
2329                                       size_t *lenp, loff_t *ppos)
2330 {
2331     return -ENOSYS;
2332 }
2333
2334
2335 #endif /* CONFIG_PROC_FS */
2336
2337
2338 #ifdef CONFIG_SYSCTL_SYSCALL
2339 /*
2340  * General sysctl support routines 
2341  */
2342
2343 /* The generic string strategy routine: */
2344 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2345                   void __user *oldval, size_t __user *oldlenp,
2346                   void __user *newval, size_t newlen)
2347 {
2348         if (!table->data || !table->maxlen) 
2349                 return -ENOTDIR;
2350         
2351         if (oldval && oldlenp) {
2352                 size_t bufsize;
2353                 if (get_user(bufsize, oldlenp))
2354                         return -EFAULT;
2355                 if (bufsize) {
2356                         size_t len = strlen(table->data), copied;
2357
2358                         /* This shouldn't trigger for a well-formed sysctl */
2359                         if (len > table->maxlen)
2360                                 len = table->maxlen;
2361
2362                         /* Copy up to a max of bufsize-1 bytes of the string */
2363                         copied = (len >= bufsize) ? bufsize - 1 : len;
2364
2365                         if (copy_to_user(oldval, table->data, copied) ||
2366                             put_user(0, (char __user *)(oldval + copied)))
2367                                 return -EFAULT;
2368                         if (put_user(len, oldlenp))
2369                                 return -EFAULT;
2370                 }
2371         }
2372         if (newval && newlen) {
2373                 size_t len = newlen;
2374                 if (len > table->maxlen)
2375                         len = table->maxlen;
2376                 if(copy_from_user(table->data, newval, len))
2377                         return -EFAULT;
2378                 if (len == table->maxlen)
2379                         len--;
2380                 ((char *) table->data)[len] = 0;
2381         }
2382         return 1;
2383 }
2384
2385 /*
2386  * This function makes sure that all of the integers in the vector
2387  * are between the minimum and maximum values given in the arrays
2388  * table->extra1 and table->extra2, respectively.
2389  */
2390 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2391                 void __user *oldval, size_t __user *oldlenp,
2392                 void __user *newval, size_t newlen)
2393 {
2394
2395         if (newval && newlen) {
2396                 int __user *vec = (int __user *) newval;
2397                 int *min = (int *) table->extra1;
2398                 int *max = (int *) table->extra2;
2399                 size_t length;
2400                 int i;
2401
2402                 if (newlen % sizeof(int) != 0)
2403                         return -EINVAL;
2404
2405                 if (!table->extra1 && !table->extra2)
2406                         return 0;
2407
2408                 if (newlen > table->maxlen)
2409                         newlen = table->maxlen;
2410                 length = newlen / sizeof(int);
2411
2412                 for (i = 0; i < length; i++) {
2413                         int value;
2414                         if (get_user(value, vec + i))
2415                                 return -EFAULT;
2416                         if (min && value < min[i])
2417                                 return -EINVAL;
2418                         if (max && value > max[i])
2419                                 return -EINVAL;
2420                 }
2421         }
2422         return 0;
2423 }
2424
2425 /* Strategy function to convert jiffies to seconds */ 
2426 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2427                 void __user *oldval, size_t __user *oldlenp,
2428                 void __user *newval, size_t newlen)
2429 {
2430         if (oldval && oldlenp) {
2431                 size_t olen;
2432
2433                 if (get_user(olen, oldlenp))
2434                         return -EFAULT;
2435                 if (olen) {
2436                         int val;
2437
2438                         if (olen < sizeof(int))
2439                                 return -EINVAL;
2440
2441                         val = *(int *)(table->data) / HZ;
2442                         if (put_user(val, (int __user *)oldval))
2443                                 return -EFAULT;
2444                         if (put_user(sizeof(int), oldlenp))
2445                                 return -EFAULT;
2446                 }
2447         }
2448         if (newval && newlen) { 
2449                 int new;
2450                 if (newlen != sizeof(int))
2451                         return -EINVAL; 
2452                 if (get_user(new, (int __user *)newval))
2453                         return -EFAULT;
2454                 *(int *)(table->data) = new*HZ; 
2455         }
2456         return 1;
2457 }
2458
2459 /* Strategy function to convert jiffies to seconds */ 
2460 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2461                 void __user *oldval, size_t __user *oldlenp,
2462                 void __user *newval, size_t newlen)
2463 {
2464         if (oldval && oldlenp) {
2465                 size_t olen;
2466
2467                 if (get_user(olen, oldlenp))
2468                         return -EFAULT;
2469                 if (olen) {
2470                         int val;
2471
2472                         if (olen < sizeof(int))
2473                                 return -EINVAL;
2474
2475                         val = jiffies_to_msecs(*(int *)(table->data));
2476                         if (put_user(val, (int __user *)oldval))
2477                                 return -EFAULT;
2478                         if (put_user(sizeof(int), oldlenp))
2479                                 return -EFAULT;
2480                 }
2481         }
2482         if (newval && newlen) { 
2483                 int new;
2484                 if (newlen != sizeof(int))
2485                         return -EINVAL; 
2486                 if (get_user(new, (int __user *)newval))
2487                         return -EFAULT;
2488                 *(int *)(table->data) = msecs_to_jiffies(new);
2489         }
2490         return 1;
2491 }
2492
2493
2494
2495 #else /* CONFIG_SYSCTL_SYSCALL */
2496
2497
2498 asmlinkage long sys_sysctl(struct __sysctl_args __user *args)
2499 {
2500         static int msg_count;
2501         struct __sysctl_args tmp;
2502         int name[CTL_MAXNAME];
2503         int i;
2504
2505         /* Read in the sysctl name for better debug message logging */
2506         if (copy_from_user(&tmp, args, sizeof(tmp)))
2507                 return -EFAULT;
2508         if (tmp.nlen <= 0 || tmp.nlen >= CTL_MAXNAME)
2509                 return -ENOTDIR;
2510         for (i = 0; i < tmp.nlen; i++)
2511                 if (get_user(name[i], tmp.name + i))
2512                         return -EFAULT;
2513
2514         /* Ignore accesses to kernel.version */
2515         if ((tmp.nlen == 2) && (name[0] == CTL_KERN) && (name[1] == KERN_VERSION))
2516                 goto out;
2517
2518         if (msg_count < 5) {
2519                 msg_count++;
2520                 printk(KERN_INFO
2521                         "warning: process `%s' used the removed sysctl "
2522                         "system call with ", current->comm);
2523                 for (i = 0; i < tmp.nlen; i++)
2524                         printk("%d.", name[i]);
2525                 printk("\n");
2526         }
2527 out:
2528         return -ENOSYS;
2529 }
2530
2531 int sysctl_string(ctl_table *table, int __user *name, int nlen,
2532                   void __user *oldval, size_t __user *oldlenp,
2533                   void __user *newval, size_t newlen)
2534 {
2535         return -ENOSYS;
2536 }
2537
2538 int sysctl_intvec(ctl_table *table, int __user *name, int nlen,
2539                 void __user *oldval, size_t __user *oldlenp,
2540                 void __user *newval, size_t newlen)
2541 {
2542         return -ENOSYS;
2543 }
2544
2545 int sysctl_jiffies(ctl_table *table, int __user *name, int nlen,
2546                 void __user *oldval, size_t __user *oldlenp,
2547                 void __user *newval, size_t newlen)
2548 {
2549         return -ENOSYS;
2550 }
2551
2552 int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen,
2553                 void __user *oldval, size_t __user *oldlenp,
2554                 void __user *newval, size_t newlen)
2555 {
2556         return -ENOSYS;
2557 }
2558
2559 #endif /* CONFIG_SYSCTL_SYSCALL */
2560
2561 /*
2562  * No sense putting this after each symbol definition, twice,
2563  * exception granted :-)
2564  */
2565 EXPORT_SYMBOL(proc_dointvec);
2566 EXPORT_SYMBOL(proc_dointvec_jiffies);
2567 EXPORT_SYMBOL(proc_dointvec_minmax);
2568 EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
2569 EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
2570 EXPORT_SYMBOL(proc_dostring);
2571 EXPORT_SYMBOL(proc_doulongvec_minmax);
2572 EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
2573 EXPORT_SYMBOL(register_sysctl_table);
2574 EXPORT_SYMBOL(sysctl_intvec);
2575 EXPORT_SYMBOL(sysctl_jiffies);
2576 EXPORT_SYMBOL(sysctl_ms_jiffies);
2577 EXPORT_SYMBOL(sysctl_string);
2578 EXPORT_SYMBOL(unregister_sysctl_table);