Merge branch 'upstream' of git://ftp.linux-mips.org/pub/scm/upstream-linus
authorLinus Torvalds <torvalds@g5.osdl.org>
Mon, 2 Oct 2006 15:18:43 +0000 (08:18 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Mon, 2 Oct 2006 15:18:43 +0000 (08:18 -0700)
* 'upstream' of git://ftp.linux-mips.org/pub/scm/upstream-linus:
  [MIPS] Remove unused galileo-boars header files
  [MIPS] Rename SERIAL_PORT_DEFNS for EV64120
  [MIPS] Add UART IRQ number for EV64120
  [MIPS] Remove excite_flash.c
  [MIPS] Update i8259 resources.
  [MIPS] Make unwind_stack() can dig into interrupted context
  [MIPS] Stacktrace build-fix and improvement
  [MIPS] QEMU: Add support for little endian mips
  [MIPS] Remove __flush_icache_page
  [MIPS] lockdep: update defconfigs
  [MIPS] lockdep: Add STACKTRACE_SUPPORT and enable LOCKDEP_SUPPORT
  [MIPS] lockdep: fix TRACE_IRQFLAGS_SUPPORT

296 files changed:
Documentation/kprobes.txt
arch/alpha/kernel/alpha_ksyms.c
arch/alpha/kernel/entry.S
arch/alpha/kernel/osf_sys.c
arch/alpha/kernel/srmcons.c
arch/arm/kernel/setup.c
arch/arm/kernel/smp.c
arch/arm/kernel/sys_arm.c
arch/arm26/kernel/setup.c
arch/arm26/kernel/sys_arm.c
arch/avr32/kernel/sys_avr32.c
arch/cris/arch-v32/kernel/smp.c
arch/cris/kernel/setup.c
arch/frv/kernel/Makefile
arch/frv/kernel/kernel_execve.S [new file with mode: 0644]
arch/h8300/kernel/sys_h8300.c
arch/i386/Kconfig
arch/i386/kernel/kprobes.c
arch/i386/kernel/process.c
arch/i386/kernel/smpboot.c
arch/i386/kernel/sys_i386.c
arch/i386/kernel/traps.c
arch/ia64/Kconfig
arch/ia64/hp/sim/simserial.c
arch/ia64/kernel/entry.S
arch/ia64/kernel/kprobes.c
arch/ia64/kernel/numa.c
arch/ia64/kernel/process.c
arch/ia64/sn/kernel/sn2/sn_hwperf.c
arch/m32r/kernel/sys_m32r.c
arch/m68k/kernel/sys_m68k.c
arch/m68knommu/kernel/sys_m68k.c
arch/mips/kernel/linux32.c
arch/mips/kernel/signal_n32.c
arch/mips/kernel/syscall.c
arch/mips/kernel/sysirix.c
arch/mips/sgi-ip22/ip22-reset.c
arch/mips/sgi-ip32/ip32-reset.c
arch/parisc/hpux/sys_hpux.c
arch/parisc/kernel/process.c
arch/powerpc/Kconfig
arch/powerpc/kernel/kprobes.c
arch/powerpc/kernel/misc_32.S
arch/powerpc/kernel/misc_64.S
arch/powerpc/kernel/process.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/syscalls.c
arch/powerpc/platforms/iseries/mf.c
arch/powerpc/platforms/pseries/setup.c
arch/ppc/4xx_io/serial_sicc.c
arch/ppc/kernel/misc.S
arch/s390/kernel/kprobes.c
arch/s390/kernel/sys_s390.c
arch/sh/kernel/setup.c
arch/sh/kernel/smp.c
arch/sh/kernel/sys_sh.c
arch/sh64/kernel/process.c
arch/sh64/kernel/sys_sh64.c
arch/sparc/kernel/sys_sparc.c
arch/sparc/kernel/sys_sunos.c
arch/sparc64/Kconfig
arch/sparc64/kernel/power.c
arch/sparc64/kernel/sys_sparc.c
arch/sparc64/kernel/sys_sunos32.c
arch/sparc64/solaris/misc.c
arch/um/drivers/line.c
arch/um/drivers/mconsole_kern.c
arch/um/include/line.h
arch/um/kernel/syscall.c
arch/um/kernel/um_arch.c
arch/um/os-Linux/process.c
arch/um/os-Linux/sys-i386/tls.c
arch/um/os-Linux/tls.c
arch/um/sys-i386/unmap.c
arch/um/sys-x86_64/syscalls.c
arch/um/sys-x86_64/sysrq.c
arch/um/sys-x86_64/unmap.c
arch/v850/kernel/memcons.c
arch/v850/kernel/simcons.c
arch/v850/kernel/syscalls.c
arch/x86_64/Kconfig
arch/x86_64/ia32/sys_ia32.c
arch/x86_64/kernel/entry.S
arch/x86_64/kernel/kprobes.c
arch/x86_64/kernel/process.c
arch/x86_64/kernel/sys_x86_64.c
arch/xtensa/kernel/syscalls.c
arch/xtensa/platform-iss/console.c
drivers/char/amiserial.c
drivers/char/cyclades.c
drivers/char/epca.c
drivers/char/esp.c
drivers/char/hvc_console.c
drivers/char/hvcs.c
drivers/char/hvsi.c
drivers/char/ip2/ip2main.c
drivers/char/isicom.c
drivers/char/istallion.c
drivers/char/keyboard.c
drivers/char/moxa.c
drivers/char/mxser.c
drivers/char/nwbutton.c
drivers/char/pcmcia/synclink_cs.c
drivers/char/pty.c
drivers/char/random.c
drivers/char/rio/rio_linux.c
drivers/char/riscom8.c
drivers/char/rocket.c
drivers/char/ser_a2232.c
drivers/char/serial167.c
drivers/char/snsc_event.c
drivers/char/specialix.c
drivers/char/stallion.c
drivers/char/sx.c
drivers/char/synclink.c
drivers/char/synclink_gt.c
drivers/char/synclinkmp.c
drivers/char/tty_io.c
drivers/char/viocons.c
drivers/char/vme_scc.c
drivers/char/vt.c
drivers/char/vt_ioctl.c
drivers/infiniband/hw/ipath/ipath_verbs.c
drivers/isdn/capi/capi.c
drivers/isdn/gigaset/interface.c
drivers/isdn/gigaset/proc.c
drivers/isdn/hisax/hisax.h
drivers/isdn/i4l/isdn_tty.c
drivers/media/dvb/dvb-core/dvb_ringbuffer.c
drivers/misc/Makefile
drivers/misc/lkdtm.c [new file with mode: 0644]
drivers/net/tun.c
drivers/net/wireless/ipw2100.c
drivers/parisc/led.c
drivers/parisc/power.c
drivers/s390/char/con3215.c
drivers/s390/char/fs3270.c
drivers/s390/char/sclp_tty.c
drivers/s390/char/sclp_vt220.c
drivers/s390/char/tty3270.c
drivers/s390/s390mach.c
drivers/sbus/char/aurora.c
drivers/sbus/char/bbc_envctrl.c
drivers/sbus/char/envctrl.c
drivers/scsi/lpfc/lpfc_ct.c
drivers/serial/68328serial.c
drivers/serial/68360serial.c
drivers/serial/crisv10.c
drivers/serial/mcfserial.c
drivers/serial/serial_core.c
drivers/tc/zs.c
drivers/usb/class/cdc-acm.c
drivers/usb/core/devio.c
drivers/usb/core/hcd.c
drivers/usb/core/inode.c
drivers/usb/core/usb.h
drivers/usb/gadget/ether.c
drivers/usb/gadget/file_storage.c
drivers/usb/gadget/gmidi.c
drivers/usb/gadget/serial.c
drivers/usb/gadget/zero.c
drivers/usb/serial/usb-serial.c
fs/cifs/connect.c
fs/cifs/sess.c
fs/compat.c
fs/dnotify.c
fs/exec.c
fs/fcntl.c
fs/file_table.c
fs/inode.c
fs/lockd/clntlock.c
fs/lockd/clntproc.c
fs/lockd/mon.c
fs/lockd/svc.c
fs/lockd/svclock.c
fs/lockd/xdr.c
fs/locks.c
fs/namespace.c
fs/nfs/callback.c
fs/nfs/client.c
fs/nfs/nfsroot.c
fs/nfsd/export.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfsctl.c
fs/nfsd/nfssvc.c
fs/nfsd/vfs.c
fs/nls/nls_base.c
fs/proc/array.c
fs/proc/base.c
fs/proc/proc_misc.c
fs/proc/root.c
include/asm-alpha/unistd.h
include/asm-arm/unistd.h
include/asm-arm26/unistd.h
include/asm-avr32/unistd.h
include/asm-cris/unistd.h
include/asm-frv/unistd.h
include/asm-h8300/unistd.h
include/asm-i386/bugs.h
include/asm-i386/elf.h
include/asm-i386/ptrace.h
include/asm-i386/unistd.h
include/asm-ia64/ptrace.h
include/asm-ia64/unistd.h
include/asm-m32r/unistd.h
include/asm-m68k/unistd.h
include/asm-m68knommu/unistd.h
include/asm-mips/unistd.h
include/asm-parisc/unistd.h
include/asm-powerpc/kprobes.h
include/asm-powerpc/ptrace.h
include/asm-powerpc/unistd.h
include/asm-s390/ptrace.h
include/asm-s390/unistd.h
include/asm-sh/bugs.h
include/asm-sh/unistd.h
include/asm-sh64/unistd.h
include/asm-sparc/unistd.h
include/asm-sparc64/unistd.h
include/asm-um/unistd.h
include/asm-v850/unistd.h
include/asm-x86_64/ptrace.h
include/asm-x86_64/unistd.h
include/asm-xtensa/unistd.h
include/linux/compat.h
include/linux/console_struct.h
include/linux/fs.h
include/linux/genalloc.h
include/linux/init_task.h
include/linux/ipc.h
include/linux/kprobes.h
include/linux/lockd/bind.h
include/linux/lockd/lockd.h
include/linux/module.h
include/linux/namespace.h
include/linux/nfsd/nfsd.h
include/linux/nfsd/nfsfh.h
include/linux/nfsd/syscall.h
include/linux/nodemask.h
include/linux/nsproxy.h [new file with mode: 0644]
include/linux/pid.h
include/linux/proc_fs.h
include/linux/pspace.h [new file with mode: 0644]
include/linux/sched.h
include/linux/sunrpc/svc.h
include/linux/sunrpc/svcsock.h
include/linux/syscalls.h
include/linux/tty_driver.h
include/linux/unistd.h
include/linux/utsname.h
include/linux/vt_kern.h
init/Kconfig
init/do_mounts_initrd.c
init/main.c
init/version.c
ipc/mqueue.c
ipc/msg.c
ipc/sem.c
ipc/shm.c
ipc/util.c
ipc/util.h
kernel/Makefile
kernel/compat.c
kernel/exit.c
kernel/fork.c
kernel/futex.c
kernel/kallsyms.c
kernel/kmod.c
kernel/kprobes.c
kernel/lockdep.c
kernel/module.c
kernel/nsproxy.c [new file with mode: 0644]
kernel/pid.c
kernel/power/snapshot.c
kernel/sched.c
kernel/signal.c
kernel/sys.c
kernel/sysctl.c
kernel/utsname.c [new file with mode: 0644]
lib/Kconfig.debug
lib/Makefile
lib/cpumask.c
lib/errno.c [deleted file]
lib/genalloc.c
net/bluetooth/rfcomm/tty.c
net/ipv4/ipconfig.c
net/ipv4/tcp_probe.c
net/irda/ircomm/ircomm_tty.c
net/socket.c
net/sunrpc/clnt.c
net/sunrpc/sunrpc_syms.c
net/sunrpc/svc.c
net/sunrpc/svcauth_unix.c
net/sunrpc/svcsock.c
sound/core/info_oss.c

index 2c3b1ea..ba26201 100644 (file)
@@ -151,9 +151,9 @@ So that you can load and unload Kprobes-based instrumentation modules,
 make sure "Loadable module support" (CONFIG_MODULES) and "Module
 unloading" (CONFIG_MODULE_UNLOAD) are set to "y".
 
-You may also want to ensure that CONFIG_KALLSYMS and perhaps even
-CONFIG_KALLSYMS_ALL are set to "y", since kallsyms_lookup_name()
-is a handy, version-independent way to find a function's address.
+Also make sure that CONFIG_KALLSYMS and perhaps even CONFIG_KALLSYMS_ALL
+are set to "y", since kallsyms_lookup_name() is used by the in-kernel
+kprobe address resolution code.
 
 If you need to insert a probe in the middle of a function, you may find
 it useful to "Compile the kernel with debug info" (CONFIG_DEBUG_INFO),
@@ -179,6 +179,27 @@ occurs during execution of kp->pre_handler or kp->post_handler,
 or during single-stepping of the probed instruction, Kprobes calls
 kp->fault_handler.  Any or all handlers can be NULL.
 
+NOTE:
+1. With the introduction of the "symbol_name" field to struct kprobe,
+the probepoint address resolution will now be taken care of by the kernel.
+The following will now work:
+
+       kp.symbol_name = "symbol_name";
+
+(64-bit powerpc intricacies such as function descriptors are handled
+transparently)
+
+2. Use the "offset" field of struct kprobe if the offset into the symbol
+to install a probepoint is known. This field is used to calculate the
+probepoint.
+
+3. Specify either the kprobe "symbol_name" OR the "addr". If both are
+specified, kprobe registration will fail with -EINVAL.
+
+4. With CISC architectures (such as i386 and x86_64), the kprobes code
+does not validate if the kprobe.addr is at an instruction boundary.
+Use "offset" with caution.
+
 register_kprobe() returns 0 on success, or a negative errno otherwise.
 
 User's pre-handler (kp->pre_handler):
@@ -225,6 +246,12 @@ control to Kprobes.)  If the probed function is declared asmlinkage,
 fastcall, or anything else that affects how args are passed, the
 handler's declaration must match.
 
+NOTE: A macro JPROBE_ENTRY is provided to handle architecture-specific
+aliasing of jp->entry. In the interest of portability, it is advised
+to use:
+
+       jp->entry = JPROBE_ENTRY(handler);
+
 register_jprobe() returns 0 on success, or a negative errno otherwise.
 
 4.3 register_kretprobe
@@ -251,6 +278,11 @@ of interest:
 - ret_addr: the return address
 - rp: points to the corresponding kretprobe object
 - task: points to the corresponding task struct
+
+The regs_return_value(regs) macro provides a simple abstraction to
+extract the return value from the appropriate register as defined by
+the architecture's ABI.
+
 The handler's return value is currently ignored.
 
 4.4 unregister_*probe
@@ -369,7 +401,6 @@ stack trace and selected i386 registers when do_fork() is called.
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
-#include <linux/kallsyms.h>
 #include <linux/sched.h>
 
 /*For each probe you need to allocate a kprobe structure*/
@@ -403,18 +434,14 @@ int handler_fault(struct kprobe *p, struct pt_regs *regs, int trapnr)
        return 0;
 }
 
-int init_module(void)
+static int __init kprobe_init(void)
 {
        int ret;
        kp.pre_handler = handler_pre;
        kp.post_handler = handler_post;
        kp.fault_handler = handler_fault;
-       kp.addr = (kprobe_opcode_t*) kallsyms_lookup_name("do_fork");
-       /* register the kprobe now */
-       if (!kp.addr) {
-               printk("Couldn't find %s to plant kprobe\n", "do_fork");
-               return -1;
-       }
+       kp.symbol_name = "do_fork";
+
        if ((ret = register_kprobe(&kp) < 0)) {
                printk("register_kprobe failed, returned %d\n", ret);
                return -1;
@@ -423,12 +450,14 @@ int init_module(void)
        return 0;
 }
 
-void cleanup_module(void)
+static void __exit kprobe_exit(void)
 {
        unregister_kprobe(&kp);
        printk("kprobe unregistered\n");
 }
 
+module_init(kprobe_init)
+module_exit(kprobe_exit)
 MODULE_LICENSE("GPL");
 ----- cut here -----
 
@@ -463,7 +492,6 @@ the arguments of do_fork().
 #include <linux/fs.h>
 #include <linux/uio.h>
 #include <linux/kprobes.h>
-#include <linux/kallsyms.h>
 
 /*
  * Jumper probe for do_fork.
@@ -485,17 +513,13 @@ long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
 }
 
 static struct jprobe my_jprobe = {
-       .entry = (kprobe_opcode_t *) jdo_fork
+       .entry = JPROBE_ENTRY(jdo_fork)
 };
 
-int init_module(void)
+static int __init jprobe_init(void)
 {
        int ret;
-       my_jprobe.kp.addr = (kprobe_opcode_t *) kallsyms_lookup_name("do_fork");
-       if (!my_jprobe.kp.addr) {
-               printk("Couldn't find %s to plant jprobe\n", "do_fork");
-               return -1;
-       }
+       my_jprobe.kp.symbol_name = "do_fork";
 
        if ((ret = register_jprobe(&my_jprobe)) <0) {
                printk("register_jprobe failed, returned %d\n", ret);
@@ -506,12 +530,14 @@ int init_module(void)
        return 0;
 }
 
-void cleanup_module(void)
+static void __exit jprobe_exit(void)
 {
        unregister_jprobe(&my_jprobe);
        printk("jprobe unregistered\n");
 }
 
+module_init(jprobe_init)
+module_exit(jprobe_exit)
 MODULE_LICENSE("GPL");
 ----- cut here -----
 
@@ -530,16 +556,13 @@ report failed calls to sys_open().
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/kprobes.h>
-#include <linux/kallsyms.h>
 
 static const char *probed_func = "sys_open";
 
 /* Return-probe handler: If the probed function fails, log the return value. */
 static int ret_handler(struct kretprobe_instance *ri, struct pt_regs *regs)
 {
-       // Substitute the appropriate register name for your architecture --
-       // e.g., regs->rax for x86_64, regs->gpr[3] for ppc64.
-       int retval = (int) regs->eax;
+       int retval = regs_return_value(regs);
        if (retval < 0) {
                printk("%s returns %d\n", probed_func, retval);
        }
@@ -552,15 +575,11 @@ static struct kretprobe my_kretprobe = {
        .maxactive = 20
 };
 
-int init_module(void)
+static int __init kretprobe_init(void)
 {
        int ret;
-       my_kretprobe.kp.addr =
-               (kprobe_opcode_t *) kallsyms_lookup_name(probed_func);
-       if (!my_kretprobe.kp.addr) {
-               printk("Couldn't find %s to plant return probe\n", probed_func);
-               return -1;
-       }
+       my_kretprobe.kp.symbol_name = (char *)probed_func;
+
        if ((ret = register_kretprobe(&my_kretprobe)) < 0) {
                printk("register_kretprobe failed, returned %d\n", ret);
                return -1;
@@ -569,7 +588,7 @@ int init_module(void)
        return 0;
 }
 
-void cleanup_module(void)
+static void __exit kretprobe_exit(void)
 {
        unregister_kretprobe(&my_kretprobe);
        printk("kretprobe unregistered\n");
@@ -578,6 +597,8 @@ void cleanup_module(void)
                my_kretprobe.nmissed, probed_func);
 }
 
+module_init(kretprobe_init)
+module_exit(kretprobe_exit)
 MODULE_LICENSE("GPL");
 ----- cut here -----
 
@@ -590,3 +611,5 @@ messages.)
 For additional information on Kprobes, refer to the following URLs:
 http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe
 http://www.redhat.com/magazine/005mar05/features/kprobes/
+http://www-users.cs.umn.edu/~boutcher/kprobes/
+http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115)
index f042cc4..dbe327d 100644 (file)
@@ -36,7 +36,6 @@
 #include <asm/cacheflush.h>
 #include <asm/vga.h>
 
-#define __KERNEL_SYSCALLS__
 #include <asm/unistd.h>
 
 extern struct hwrpb_struct *hwrpb;
@@ -116,7 +115,7 @@ EXPORT_SYMBOL(sys_dup);
 EXPORT_SYMBOL(sys_exit);
 EXPORT_SYMBOL(sys_write);
 EXPORT_SYMBOL(sys_lseek);
-EXPORT_SYMBOL(execve);
+EXPORT_SYMBOL(kernel_execve);
 EXPORT_SYMBOL(sys_setsid);
 EXPORT_SYMBOL(sys_wait4);
 
index 01ecd09..c95e95e 100644 (file)
@@ -655,12 +655,12 @@ kernel_thread:
 .end kernel_thread
 
 /*
- * execve(path, argv, envp)
+ * kernel_execve(path, argv, envp)
  */
        .align  4
-       .globl  execve
-       .ent    execve
-execve:
+       .globl  kernel_execve
+       .ent    kernel_execve
+kernel_execve:
        /* We can be called from a module.  */
        ldgp    $gp, 0($27)
        lda     $sp, -(32+SIZEOF_PT_REGS+8)($sp)
@@ -704,7 +704,7 @@ execve:
 
 1:     lda     $sp, 32+SIZEOF_PT_REGS+8($sp)
        ret
-.end execve
+.end kernel_execve
 
 \f
 /*
index 73c7622..8a31fc1 100644 (file)
@@ -402,15 +402,15 @@ osf_utsname(char __user *name)
 
        down_read(&uts_sem);
        error = -EFAULT;
-       if (copy_to_user(name + 0, system_utsname.sysname, 32))
+       if (copy_to_user(name + 0, utsname()->sysname, 32))
                goto out;
-       if (copy_to_user(name + 32, system_utsname.nodename, 32))
+       if (copy_to_user(name + 32, utsname()->nodename, 32))
                goto out;
-       if (copy_to_user(name + 64, system_utsname.release, 32))
+       if (copy_to_user(name + 64, utsname()->release, 32))
                goto out;
-       if (copy_to_user(name + 96, system_utsname.version, 32))
+       if (copy_to_user(name + 96, utsname()->version, 32))
                goto out;
-       if (copy_to_user(name + 128, system_utsname.machine, 32))
+       if (copy_to_user(name + 128, utsname()->machine, 32))
                goto out;
 
        error = 0;
@@ -449,8 +449,8 @@ osf_getdomainname(char __user *name, int namelen)
 
        down_read(&uts_sem);
        for (i = 0; i < len; ++i) {
-               __put_user(system_utsname.domainname[i], name + i);
-               if (system_utsname.domainname[i] == '\0')
+               __put_user(utsname()->domainname[i], name + i);
+               if (utsname()->domainname[i] == '\0')
                        break;
        }
        up_read(&uts_sem);
@@ -607,12 +607,12 @@ osf_sigstack(struct sigstack __user *uss, struct sigstack __user *uoss)
 asmlinkage long
 osf_sysinfo(int command, char __user *buf, long count)
 {
-       static char * sysinfo_table[] = {
-               system_utsname.sysname,
-               system_utsname.nodename,
-               system_utsname.release,
-               system_utsname.version,
-               system_utsname.machine,
+       char *sysinfo_table[] = {
+               utsname()->sysname,
+               utsname()->nodename,
+               utsname()->release,
+               utsname()->version,
+               utsname()->machine,
                "alpha",        /* instruction set architecture */
                "dummy",        /* hardware serial number */
                "dummy",        /* hardware manufacturer */
index 9d7dff2..7569232 100644 (file)
@@ -229,7 +229,7 @@ srmcons_close(struct tty_struct *tty, struct file *filp)
 
 static struct tty_driver *srmcons_driver;
 
-static struct tty_operations srmcons_ops = {
+static const struct tty_operations srmcons_ops = {
        .open           = srmcons_open,
        .close          = srmcons_close,
        .write          = srmcons_write,
index 0a722e7..6bbd93d 100644 (file)
@@ -348,7 +348,7 @@ static void __init setup_processor(void)
               cpu_name, processor_id, (int)processor_id & 15,
               proc_arch[cpu_architecture()], cr_alignment);
 
-       sprintf(system_utsname.machine, "%s%c", list->arch_name, ENDIANNESS);
+       sprintf(init_utsname()->machine, "%s%c", list->arch_name, ENDIANNESS);
        sprintf(elf_platform, "%s%c", list->elf_name, ENDIANNESS);
        elf_hwcap = list->elf_hwcap;
 #ifndef CONFIG_ARM_THUMB
index 68e9634..421329f 100644 (file)
@@ -36,7 +36,9 @@
  * The online bitmask indicates that the CPU is up and running.
  */
 cpumask_t cpu_possible_map;
+EXPORT_SYMBOL(cpu_possible_map);
 cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
 
 /*
  * as from 2.5, kernels no longer have an init_tasks structure
index 8170af4..00c18d3 100644 (file)
@@ -279,7 +279,7 @@ out:
        return error;
 }
 
-long execve(const char *filename, char **argv, char **envp)
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
 {
        struct pt_regs regs;
        int ret;
@@ -317,7 +317,7 @@ long execve(const char *filename, char **argv, char **envp)
  out:
        return ret;
 }
-EXPORT_SYMBOL(execve);
+EXPORT_SYMBOL(kernel_execve);
 
 /*
  * Since loff_t is a 64 bit type we avoid a lot of ABI hastle
index e7eb070..466ddb5 100644 (file)
@@ -143,7 +143,7 @@ static void __init setup_processor(void)
 
        dump_cpu_info();
 
-       sprintf(system_utsname.machine, "%s", list->arch_name);
+       sprintf(init_utsname()->machine, "%s", list->arch_name);
        sprintf(elf_platform, "%s", list->elf_name);
        elf_hwcap = list->elf_hwcap;
 
index 8545789..dc05aba 100644 (file)
@@ -283,7 +283,7 @@ out:
 }
 
 /* FIXME - see if this is correct for arm26 */
-long execve(const char *filename, char **argv, char **envp)
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
 {
        struct pt_regs regs;
         int ret;
@@ -320,4 +320,4 @@ long execve(const char *filename, char **argv, char **envp)
         return ret;
 }
 
-EXPORT_SYMBOL(execve);
+EXPORT_SYMBOL(kernel_execve);
index 6ec5693..8deb600 100644 (file)
@@ -49,3 +49,17 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
                fput(file);
        return error;
 }
+
+int kernel_execve(const char *file, char **argv, char **envp)
+{
+       register long scno asm("r8") = __NR_execve;
+       register long sc1 asm("r12") = (long)file;
+       register long sc2 asm("r11") = (long)argv;
+       register long sc3 asm("r10") = (long)envp;
+
+       asm volatile("scall"
+                    : "=r"(sc1)
+                    : "r"(scno), "0"(sc1), "r"(sc2), "r"(sc3)
+                    : "cc", "memory");
+       return sc1;
+}
index 464ecae..2d0023f 100644 (file)
@@ -28,6 +28,7 @@ spinlock_t cris_atomic_locks[] = { [0 ... LOCK_COUNT - 1] = SPIN_LOCK_UNLOCKED};
 
 /* CPU masks */
 cpumask_t cpu_online_map = CPU_MASK_NONE;
+EXPORT_SYMBOL(cpu_online_map);
 cpumask_t phys_cpu_present_map = CPU_MASK_NONE;
 EXPORT_SYMBOL(phys_cpu_present_map);
 
index 7af3d5d..ca8b45a 100644 (file)
@@ -160,7 +160,7 @@ setup_arch(char **cmdline_p)
        show_etrax_copyright();
 
        /* Setup utsname */
-       strcpy(system_utsname.machine, cris_machine_name);
+       strcpy(init_utsname()->machine, cris_machine_name);
 }
 
 static void *c_start(struct seq_file *m, loff_t *pos)
index 32db349..e8f73ed 100644 (file)
@@ -8,7 +8,7 @@ heads-$(CONFIG_MMU)             := head-mmu-fr451.o
 extra-y:= head.o init_task.o vmlinux.lds
 
 obj-y := $(heads-y) entry.o entry-table.o break.o switch_to.o kernel_thread.o \
-        process.o traps.o ptrace.o signal.o dma.o \
+        kernel_execve.o process.o traps.o ptrace.o signal.o dma.o \
         sys_frv.o time.o semaphore.o setup.o frv_ksyms.o \
         debug-stub.o irq.o sleep.o uaccess.o
 
diff --git a/arch/frv/kernel/kernel_execve.S b/arch/frv/kernel/kernel_execve.S
new file mode 100644 (file)
index 0000000..9b074a1
--- /dev/null
@@ -0,0 +1,33 @@
+/* in-kernel program execution
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+###############################################################################
+#
+# Do a system call from kernel instead of calling sys_execve so we end up with
+# proper pt_regs.
+#
+# int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+#
+# On entry: GR8/GR9/GR10: arguments to function
+# On return: GR8: syscall return.
+#
+###############################################################################
+       .globl          kernel_execve
+       .type           kernel_execve,@function
+kernel_execve:
+       setlos          __NR_execve,gr7
+       tira            gr0,#0
+       bralr
+
+       .size           kernel_execve,.-kernel_execve
index 0f61b7a..302a2df 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/cachectl.h>
 #include <asm/traps.h>
 #include <asm/ipc.h>
+#include <asm/unistd.h>
 
 /*
  * sys_pipe() is the normal C calling standard for creating
@@ -280,3 +281,26 @@ asmlinkage void syscall_print(void *dummy,...)
                ((regs->pc)&0xffffff)-2,regs->orig_er0,regs->er1,regs->er2,regs->er3,regs->er0);
 }
 #endif
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       register long res __asm__("er0");
+       register const char * _a __asm__("er1") = filename;
+       register void *_b __asm__("er2") = argv;
+       register void *_c __asm__("er3") = envp;
+       __asm__ __volatile__ ("mov.l %1,er0\n\t"
+                       "trapa  #0\n\t"
+                       : "=r" (res)
+                       : "g" (__NR_execve),
+                         "g" (_a),
+                         "g" (_b),
+                         "g" (_c)
+                       : "cc", "memory");
+       return res;
+}
+
+
index 3fd2f25..af219e5 100644 (file)
@@ -1142,7 +1142,7 @@ source "arch/i386/oprofile/Kconfig"
 
 config KPROBES
        bool "Kprobes (EXPERIMENTAL)"
-       depends on EXPERIMENTAL && MODULES
+       depends on KALLSYMS && EXPERIMENTAL && MODULES
        help
          Kprobes allows you to trap at almost any kernel address and
          execute a callback function.  register_kprobe() establishes
index afe6505..d98e44b 100644 (file)
@@ -230,20 +230,20 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
                                      struct pt_regs *regs)
 {
        unsigned long *sara = (unsigned long *)&regs->esp;
-        struct kretprobe_instance *ri;
 
-        if ((ri = get_free_rp_inst(rp)) != NULL) {
-                ri->rp = rp;
-                ri->task = current;
+       struct kretprobe_instance *ri;
+
+       if ((ri = get_free_rp_inst(rp)) != NULL) {
+               ri->rp = rp;
+               ri->task = current;
                ri->ret_addr = (kprobe_opcode_t *) *sara;
 
                /* Replace the return addr with trampoline addr */
                *sara = (unsigned long) &kretprobe_trampoline;
-
-                add_rp_inst(ri);
-        } else {
-                rp->nmissed++;
-        }
+               add_rp_inst(ri);
+       } else {
+               rp->nmissed++;
+       }
 }
 
 /*
@@ -359,7 +359,7 @@ no_kprobe:
  void __kprobes kretprobe_trampoline_holder(void)
  {
        asm volatile ( ".global kretprobe_trampoline\n"
-                       "kretprobe_trampoline: \n"
+                       "kretprobe_trampoline: \n"
                        "       pushf\n"
                        /* skip cs, eip, orig_eax, es, ds */
                        "       subl $20, %esp\n"
@@ -395,14 +395,15 @@ no_kprobe:
  */
 fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
 {
-        struct kretprobe_instance *ri = NULL;
-        struct hlist_head *head;
-        struct hlist_node *node, *tmp;
+       struct kretprobe_instance *ri = NULL;
+       struct hlist_head *head, empty_rp;
+       struct hlist_node *node, *tmp;
        unsigned long flags, orig_ret_address = 0;
        unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
+       INIT_HLIST_HEAD(&empty_rp);
        spin_lock_irqsave(&kretprobe_lock, flags);
-        head = kretprobe_inst_table_head(current);
+       head = kretprobe_inst_table_head(current);
 
        /*
         * It is possible to have multiple instances associated with a given
@@ -413,14 +414,14 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
         * We can handle this because:
         *     - instances are always inserted at the head of the list
         *     - when multiple return probes are registered for the same
-         *       function, the first instance's ret_addr will point to the
+        *       function, the first instance's ret_addr will point to the
         *       real return address, and all the rest will point to
         *       kretprobe_trampoline
         */
        hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
-                if (ri->task != current)
+               if (ri->task != current)
                        /* another task is sharing our hash bucket */
-                        continue;
+                       continue;
 
                if (ri->rp && ri->rp->handler){
                        __get_cpu_var(current_kprobe) = &ri->rp->kp;
@@ -429,7 +430,7 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
                }
 
                orig_ret_address = (unsigned long)ri->ret_addr;
-               recycle_rp_inst(ri);
+               recycle_rp_inst(ri, &empty_rp);
 
                if (orig_ret_address != trampoline_address)
                        /*
@@ -444,6 +445,10 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs)
 
        spin_unlock_irqrestore(&kretprobe_lock, flags);
 
+       hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+               hlist_del(&ri->hlist);
+               kfree(ri);
+       }
        return (void*)orig_ret_address;
 }
 
index 96cd023..dad02a9 100644 (file)
@@ -297,9 +297,9 @@ void show_regs(struct pt_regs * regs)
        if (user_mode_vm(regs))
                printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
        printk(" EFLAGS: %08lx    %s  (%s %.*s)\n",
-              regs->eflags, print_tainted(), system_utsname.release,
-              (int)strcspn(system_utsname.version, " "),
-              system_utsname.version);
+              regs->eflags, print_tainted(), init_utsname()->release,
+              (int)strcspn(init_utsname()->version, " "),
+              init_utsname()->version);
        printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
                regs->eax,regs->ebx,regs->ecx,regs->edx);
        printk("ESI: %08lx EDI: %08lx EBP: %08lx",
index 0831f70..9d93ecf 100644 (file)
@@ -612,6 +612,7 @@ extern struct {
 /* which logical CPUs are on which nodes */
 cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly =
                                { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
+EXPORT_SYMBOL(node_2_cpu_mask);
 /* which node each logical CPU is on */
 int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
 EXPORT_SYMBOL(cpu_2_node);
index 8fdb1fb..4048397 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/utsname.h>
 
 #include <asm/uaccess.h>
+#include <asm/unistd.h>
 #include <asm/ipc.h>
 
 /*
@@ -210,7 +211,7 @@ asmlinkage int sys_uname(struct old_utsname __user * name)
        if (!name)
                return -EFAULT;
        down_read(&uts_sem);
-       err=copy_to_user(name, &system_utsname, sizeof (*name));
+       err = copy_to_user(name, utsname(), sizeof (*name));
        up_read(&uts_sem);
        return err?-EFAULT:0;
 }
@@ -226,16 +227,21 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name)
   
        down_read(&uts_sem);
        
-       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
-       error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
-       error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
-       error |= __put_user(0,name->release+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
-       error |= __put_user(0,name->version+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
-       error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+       error = __copy_to_user(&name->sysname, &utsname()->sysname,
+                              __OLD_UTS_LEN);
+       error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
+       error |= __copy_to_user(&name->nodename, &utsname()->nodename,
+                               __OLD_UTS_LEN);
+       error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
+       error |= __copy_to_user(&name->release, &utsname()->release,
+                               __OLD_UTS_LEN);
+       error |= __put_user(0, name->release + __OLD_UTS_LEN);
+       error |= __copy_to_user(&name->version, &utsname()->version,
+                               __OLD_UTS_LEN);
+       error |= __put_user(0, name->version + __OLD_UTS_LEN);
+       error |= __copy_to_user(&name->machine, &utsname()->machine,
+                               __OLD_UTS_LEN);
+       error |= __put_user(0, name->machine + __OLD_UTS_LEN);
        
        up_read(&uts_sem);
        
@@ -243,3 +249,17 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name)
 
        return error;
 }
+
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       long __res;
+       asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx"
+       : "=a" (__res)
+       : "0" (__NR_execve),"ri" (filename),"c" (argv), "d" (envp) : "memory");
+       return __res;
+}
index 6820b8d..00489b7 100644 (file)
@@ -357,9 +357,9 @@ void show_registers(struct pt_regs *regs)
                KERN_EMERG "EIP:    %04x:[<%08lx>]    %s VLI\n"
                KERN_EMERG "EFLAGS: %08lx   (%s %.*s)\n",
                smp_processor_id(), 0xffff & regs->xcs, regs->eip,
-               print_tainted(), regs->eflags, system_utsname.release,
-               (int)strcspn(system_utsname.version, " "),
-               system_utsname.version);
+               print_tainted(), regs->eflags, init_utsname()->release,
+               (int)strcspn(init_utsname()->version, " "),
+               init_utsname()->version);
        print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
        printk(KERN_EMERG "eax: %08lx   ebx: %08lx   ecx: %08lx   edx: %08lx\n",
                regs->eax, regs->ebx, regs->ecx, regs->edx);
index 0b7f701..70f7eb9 100644 (file)
@@ -516,7 +516,7 @@ source "arch/ia64/oprofile/Kconfig"
 
 config KPROBES
        bool "Kprobes (EXPERIMENTAL)"
-       depends on EXPERIMENTAL && MODULES
+       depends on KALLSYMS && EXPERIMENTAL && MODULES
        help
          Kprobes allows you to trap at almost any kernel address and
          execute a callback function.  register_kprobe() establishes
index 0daacc2..246eb3d 100644 (file)
@@ -940,7 +940,7 @@ static inline void show_serial_version(void)
        printk(KERN_INFO " no serial options enabled\n");
 }
 
-static struct tty_operations hp_ops = {
+static const struct tty_operations hp_ops = {
        .open = rs_open,
        .close = rs_close,
        .write = rs_write,
index 12701cf..e5b1be5 100644 (file)
@@ -492,11 +492,11 @@ GLOBAL_ENTRY(prefetch_stack)
        br.ret.sptk.many rp
 END(prefetch_stack)
 
-GLOBAL_ENTRY(execve)
+GLOBAL_ENTRY(kernel_execve)
        mov r15=__NR_execve                     // put syscall number in place
        break __BREAK_SYSCALL
        br.ret.sptk.many rp
-END(execve)
+END(kernel_execve)
 
 GLOBAL_ENTRY(clone)
        mov r15=__NR_clone                      // put syscall number in place
index 169ec3a..51217d6 100644 (file)
@@ -90,7 +90,7 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint  slot,
        p->ainsn.target_br_reg = 0;
 
        /* Check for Break instruction
-        * Bits 37:40 Major opcode to be zero
+        * Bits 37:40 Major opcode to be zero
         * Bits 27:32 X6 to be zero
         * Bits 32:35 X3 to be zero
         */
@@ -104,19 +104,19 @@ static void __kprobes update_kprobe_inst_flag(uint template, uint  slot,
                switch (major_opcode) {
                  case INDIRECT_CALL_OPCODE:
                        p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
-                       p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
-                       break;
+                       p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+                       break;
                  case IP_RELATIVE_PREDICT_OPCODE:
                  case IP_RELATIVE_BRANCH_OPCODE:
                        p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
-                       break;
+                       break;
                  case IP_RELATIVE_CALL_OPCODE:
-                       p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
-                       p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
-                       p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
-                       break;
+                       p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR;
+                       p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
+                       p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7);
+                       break;
                }
-       } else if (bundle_encoding[template][slot] == X) {
+       } else if (bundle_encoding[template][slot] == X) {
                switch (major_opcode) {
                  case LONG_CALL_OPCODE:
                        p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG;
@@ -258,18 +258,18 @@ static void __kprobes get_kprobe_inst(bundle_t *bundle, uint slot,
 
        switch (slot) {
          case 0:
-               *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT);
-               *kprobe_inst = bundle->quad0.slot0;
-               break;
+               *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT);
+               *kprobe_inst = bundle->quad0.slot0;
+                 break;
          case 1:
-               *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT);
-               kprobe_inst_p0 = bundle->quad0.slot1_p0;
-               kprobe_inst_p1 = bundle->quad1.slot1_p1;
-               *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46));
+               *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT);
+               kprobe_inst_p0 = bundle->quad0.slot1_p0;
+               kprobe_inst_p1 = bundle->quad1.slot1_p1;
+               *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46));
                break;
          case 2:
-               *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT);
-               *kprobe_inst = bundle->quad1.slot2;
+               *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT);
+               *kprobe_inst = bundle->quad1.slot2;
                break;
        }
 }
@@ -290,11 +290,11 @@ static int __kprobes valid_kprobe_addr(int template, int slot,
                return -EINVAL;
        }
 
-       if (in_ivt_functions(addr)) {
-               printk(KERN_WARNING "Kprobes can't be inserted inside "
+       if (in_ivt_functions(addr)) {
+               printk(KERN_WARNING "Kprobes can't be inserted inside "
                                "IVT functions at 0x%lx\n", addr);
-               return -EINVAL;
-       }
+               return -EINVAL;
+       }
 
        if (slot == 1 && bundle_encoding[template][1] != L) {
                printk(KERN_WARNING "Inserting kprobes on slot #1 "
@@ -338,12 +338,13 @@ static void kretprobe_trampoline(void)
 int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
        struct kretprobe_instance *ri = NULL;
-       struct hlist_head *head;
+       struct hlist_head *head, empty_rp;
        struct hlist_node *node, *tmp;
        unsigned long flags, orig_ret_address = 0;
        unsigned long trampoline_address =
                ((struct fnptr *)kretprobe_trampoline)->ip;
 
+       INIT_HLIST_HEAD(&empty_rp);
        spin_lock_irqsave(&kretprobe_lock, flags);
        head = kretprobe_inst_table_head(current);
 
@@ -369,7 +370,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
                        ri->rp->handler(ri, regs);
 
                orig_ret_address = (unsigned long)ri->ret_addr;
-               recycle_rp_inst(ri);
+               recycle_rp_inst(ri, &empty_rp);
 
                if (orig_ret_address != trampoline_address)
                        /*
@@ -387,6 +388,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
        spin_unlock_irqrestore(&kretprobe_lock, flags);
        preempt_enable_no_resched();
 
+       hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+               hlist_del(&ri->hlist);
+               kfree(ri);
+       }
        /*
         * By returning a non-zero value, we are telling
         * kprobe_handler() that we don't want the post_handler
@@ -424,14 +429,14 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
        bundle_t *bundle;
 
        bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle;
-       template = bundle->quad0.template;
+       template = bundle->quad0.template;
 
        if(valid_kprobe_addr(template, slot, addr))
                return -EINVAL;
 
        /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
-       if (slot == 1 && bundle_encoding[template][1] == L)
-               slot++;
+       if (slot == 1 && bundle_encoding[template][1] == L)
+               slot++;
 
        /* Get kprobe_inst and major_opcode from the bundle */
        get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode);
@@ -489,21 +494,22 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
  */
 static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
 {
-       unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle);
-       unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
-       unsigned long template;
-       int slot = ((unsigned long)p->addr & 0xf);
+       unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle);
+       unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL;
+       unsigned long template;
+       int slot = ((unsigned long)p->addr & 0xf);
 
        template = p->ainsn.insn->bundle.quad0.template;
 
-       if (slot == 1 && bundle_encoding[template][1] == L)
-               slot = 2;
+       if (slot == 1 && bundle_encoding[template][1] == L)
+               slot = 2;
 
        if (p->ainsn.inst_flag) {
 
                if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
                        /* Fix relative IP address */
-                       regs->cr_iip = (regs->cr_iip - bundle_addr) + resume_addr;
+                       regs->cr_iip = (regs->cr_iip - bundle_addr) +
+                                       resume_addr;
                }
 
                if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) {
@@ -540,18 +546,18 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
        }
 
        if (slot == 2) {
-               if (regs->cr_iip == bundle_addr + 0x10) {
-                       regs->cr_iip = resume_addr + 0x10;
-               }
-       } else {
-               if (regs->cr_iip == bundle_addr) {
-                       regs->cr_iip = resume_addr;
-               }
+               if (regs->cr_iip == bundle_addr + 0x10) {
+                       regs->cr_iip = resume_addr + 0x10;
+               }
+       } else {
+               if (regs->cr_iip == bundle_addr) {
+                       regs->cr_iip = resume_addr;
+               }
        }
 
 turn_ss_off:
-       /* Turn off Single Step bit */
-       ia64_psr(regs)->ss = 0;
+       /* Turn off Single Step bit */
+       ia64_psr(regs)->ss = 0;
 }
 
 static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs)
@@ -587,7 +593,7 @@ static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
 
        /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */
        if (slot == 1 && bundle_encoding[template][1] == L)
-               slot++;
+               slot++;
 
        /* Get Kprobe probe instruction at given slot*/
        get_kprobe_inst(&bundle, slot, &kprobe_inst, &major_opcode);
@@ -627,7 +633,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
                if (p) {
                        if ((kcb->kprobe_status == KPROBE_HIT_SS) &&
                             (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) {
-                               ia64_psr(regs)->ss = 0;
+                               ia64_psr(regs)->ss = 0;
                                goto no_kprobe;
                        }
                        /* We have reentered the pre_kprobe_handler(), since
@@ -887,7 +893,7 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
         * fix the return address to our jprobe_inst_return() function
         * in the jprobes.S file
         */
-       regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip;
+       regs->b0 = ((struct fnptr *)(jprobe_inst_return))->ip;
 
        return 1;
 }
index 2034063..a78b45f 100644 (file)
@@ -28,6 +28,7 @@ u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
 EXPORT_SYMBOL(cpu_to_node_map);
 
 cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
+EXPORT_SYMBOL(node_to_cpu_mask);
 
 void __cpuinit map_cpu_to_node(int cpu, int nid)
 {
index ea914cc..51922b9 100644 (file)
@@ -8,8 +8,6 @@
  * 2005-10-07 Keith Owens <kaos@sgi.com>
  *           Add notify_die() hooks.
  */
-#define __KERNEL_SYSCALLS__    /* see <asm/unistd.h> */
-
 #include <linux/cpu.h>
 #include <linux/pm.h>
 #include <linux/elf.h>
index b632b9c..462ea17 100644 (file)
@@ -423,7 +423,7 @@ static int sn_topology_show(struct seq_file *s, void *d)
                        "coherency_domain %d, "
                        "region_size %d\n",
 
-                       partid, system_utsname.nodename,
+                       partid, utsname()->nodename,
                        shubtype ? "shub2" : "shub1", 
                        (u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift,
                        system_size, sharing_size, coher, region_size);
index a9cea32..b567351 100644 (file)
@@ -25,6 +25,8 @@
 #include <asm/cachectl.h>
 #include <asm/cacheflush.h>
 #include <asm/ipc.h>
+#include <asm/syscall.h>
+#include <asm/unistd.h>
 
 /*
  * sys_tas() - test-and-set
@@ -205,7 +207,7 @@ asmlinkage int sys_uname(struct old_utsname * name)
        if (!name)
                return -EFAULT;
        down_read(&uts_sem);
-       err=copy_to_user(name, &system_utsname, sizeof (*name));
+       err = copy_to_user(name, utsname(), sizeof (*name));
        up_read(&uts_sem);
        return err?-EFAULT:0;
 }
@@ -223,3 +225,21 @@ asmlinkage int sys_cachectl(char *addr, int nbytes, int op)
        return -ENOSYS;
 }
 
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       register long __scno __asm__ ("r7") = __NR_execve;
+       register long __arg3 __asm__ ("r2") = (long)(envp);
+       register long __arg2 __asm__ ("r1") = (long)(argv);
+       register long __res __asm__ ("r0") = (long)(filename);
+       __asm__ __volatile__ (
+               "trap #" SYSCALL_VECTOR "|| nop"
+               : "=r" (__res)
+               : "r" (__scno), "0" (__res), "r" (__arg2),
+                       "r" (__arg3)
+               : "memory");
+       return __res;
+}
index 143c552..90238a8 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/traps.h>
 #include <asm/ipc.h>
 #include <asm/page.h>
+#include <asm/unistd.h>
 
 /*
  * sys_pipe() is the normal C calling standard for creating
@@ -663,3 +664,18 @@ asmlinkage int sys_getpagesize(void)
 {
        return PAGE_SIZE;
 }
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       register long __res asm ("%d0") = __NR_execve;
+       register long __a asm ("%d1") = (long)(filename);
+       register long __b asm ("%d2") = (long)(argv);
+       register long __c asm ("%d3") = (long)(envp);
+       asm volatile ("trap  #0" : "+d" (__res)
+                       : "d" (__a), "d" (__b), "d" (__c));
+       return __res;
+}
index d87e1e0..c3494b8 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/traps.h>
 #include <asm/ipc.h>
 #include <asm/cacheflush.h>
+#include <asm/unistd.h>
 
 /*
  * sys_pipe() is the normal C calling standard for creating
@@ -206,3 +207,17 @@ asmlinkage int sys_getpagesize(void)
        return PAGE_SIZE;
 }
 
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       register long __res asm ("%d0") = __NR_execve;
+       register long __a asm ("%d1") = (long)(filename);
+       register long __b asm ("%d2") = (long)(argv);
+       register long __c asm ("%d3") = (long)(envp);
+       asm volatile ("trap  #0" : "+d" (__res)
+                       : "d" (__a), "d" (__b), "d" (__c));
+       return __res;
+}
index 43b1162..52cada4 100644 (file)
@@ -1039,7 +1039,7 @@ asmlinkage long sys32_newuname(struct new_utsname __user * name)
        int ret = 0;
 
        down_read(&uts_sem);
-       if (copy_to_user(name,&system_utsname,sizeof *name))
+       if (copy_to_user(name, utsname(), sizeof *name))
                ret = -EFAULT;
        up_read(&uts_sem);
 
index 50c17ea..477c533 100644 (file)
@@ -42,8 +42,6 @@
 
 #include "signal-common.h"
 
-extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
-
 /*
  * Including <asm/unistd.h> would give use the 64-bit syscall numbers ...
  */
@@ -83,6 +81,8 @@ struct rt_sigframe_n32 {
 #endif
 };
 
+extern void sigset_from_compat (sigset_t *set, compat_sigset_t *compat);
+
 save_static_function(sysn32_rt_sigsuspend);
 __attribute_used__ noinline static int
 _sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs)
index 9951240..26e1a7e 100644 (file)
@@ -231,7 +231,7 @@ out:
  */
 asmlinkage int sys_uname(struct old_utsname __user * name)
 {
-       if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+       if (name && !copy_to_user(name, utsname(), sizeof (*name)))
                return 0;
        return -EFAULT;
 }
@@ -248,16 +248,21 @@ asmlinkage int sys_olduname(struct oldold_utsname __user * name)
        if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
                return -EFAULT;
 
-       error = __copy_to_user(&name->sysname,&system_utsname.sysname,__OLD_UTS_LEN);
-       error -= __put_user(0,name->sysname+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->nodename,&system_utsname.nodename,__OLD_UTS_LEN);
-       error -= __put_user(0,name->nodename+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->release,&system_utsname.release,__OLD_UTS_LEN);
-       error -= __put_user(0,name->release+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->version,&system_utsname.version,__OLD_UTS_LEN);
-       error -= __put_user(0,name->version+__OLD_UTS_LEN);
-       error -= __copy_to_user(&name->machine,&system_utsname.machine,__OLD_UTS_LEN);
-       error = __put_user(0,name->machine+__OLD_UTS_LEN);
+       error = __copy_to_user(&name->sysname, &utsname()->sysname,
+                              __OLD_UTS_LEN);
+       error -= __put_user(0, name->sysname + __OLD_UTS_LEN);
+       error -= __copy_to_user(&name->nodename, &utsname()->nodename,
+                               __OLD_UTS_LEN);
+       error -= __put_user(0, name->nodename + __OLD_UTS_LEN);
+       error -= __copy_to_user(&name->release, &utsname()->release,
+                               __OLD_UTS_LEN);
+       error -= __put_user(0, name->release + __OLD_UTS_LEN);
+       error -= __copy_to_user(&name->version, &utsname()->version,
+                               __OLD_UTS_LEN);
+       error -= __put_user(0, name->version + __OLD_UTS_LEN);
+       error -= __copy_to_user(&name->machine, &utsname()->machine,
+                               __OLD_UTS_LEN);
+       error = __put_user(0, name->machine + __OLD_UTS_LEN);
        error = error ? -EFAULT : 0;
 
        return error;
@@ -401,3 +406,32 @@ asmlinkage void bad_stack(void)
 {
        do_exit(SIGSEGV);
 }
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       register unsigned long __a0 asm("$4") = (unsigned long) filename;
+       register unsigned long __a1 asm("$5") = (unsigned long) argv;
+       register unsigned long __a2 asm("$6") = (unsigned long) envp;
+       register unsigned long __a3 asm("$7");
+       unsigned long __v0;
+
+       __asm__ volatile ("                                     \n"
+       "       .set    noreorder                               \n"
+       "       li      $2, %5          # __NR_execve           \n"
+       "       syscall                                         \n"
+       "       move    %0, $2                                  \n"
+       "       .set    reorder                                 \n"
+       : "=&r" (__v0), "=r" (__a3)
+       : "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_execve)
+       : "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24",
+         "memory");
+
+       if (__a3 == 0)
+               return __v0;
+
+       return -__v0;
+}
index 1137dd6..11bb971 100644 (file)
@@ -884,7 +884,7 @@ asmlinkage int irix_getdomainname(char __user *name, int len)
        down_read(&uts_sem);
        if (len > __NEW_UTS_LEN)
                len = __NEW_UTS_LEN;
-       err = copy_to_user(name, system_utsname.domainname, len) ? -EFAULT : 0;
+       err = copy_to_user(name, utsname()->domainname, len) ? -EFAULT : 0;
        up_read(&uts_sem);
 
        return err;
@@ -1127,11 +1127,11 @@ struct iuname {
 asmlinkage int irix_uname(struct iuname __user *buf)
 {
        down_read(&uts_sem);
-       if (copy_from_user(system_utsname.sysname, buf->sysname, 65)
-           || copy_from_user(system_utsname.nodename, buf->nodename, 65)
-           || copy_from_user(system_utsname.release, buf->release, 65)
-           || copy_from_user(system_utsname.version, buf->version, 65)
-           || copy_from_user(system_utsname.machine, buf->machine, 65)) {
+       if (copy_from_user(utsname()->sysname, buf->sysname, 65)
+           || copy_from_user(utsname()->nodename, buf->nodename, 65)
+           || copy_from_user(utsname()->release, buf->release, 65)
+           || copy_from_user(utsname()->version, buf->version, 65)
+           || copy_from_user(utsname()->machine, buf->machine, 65)) {
                return -EFAULT;
        }
        up_read(&uts_sem);
index 8134220..7a941ec 100644 (file)
@@ -123,7 +123,8 @@ static inline void power_button(void)
        if (machine_state & MACHINE_PANICED)
                return;
 
-       if ((machine_state & MACHINE_SHUTTING_DOWN) || kill_proc(1,SIGINT,1)) {
+       if ((machine_state & MACHINE_SHUTTING_DOWN) ||
+                       kill_cad_pid(SIGINT, 1)) {
                /* No init process or button pressed twice.  */
                sgi_machine_power_off();
        }
index 79ddb46..fd0932b 100644 (file)
@@ -120,7 +120,7 @@ static inline void ip32_power_button(void)
        if (has_panicked)
                return;
 
-       if (shuting_down || kill_proc(1, SIGINT, 1)) {
+       if (shuting_down || kill_cad_pid(SIGINT, 1)) {
                /* No init process or button pressed twice.  */
                ip32_machine_power_off();
        }
index cb69727..2e2dc4f 100644 (file)
@@ -266,16 +266,21 @@ static int hpux_uname(struct hpux_utsname *name)
 
        down_read(&uts_sem);
 
-       error = __copy_to_user(&name->sysname,&system_utsname.sysname,HPUX_UTSLEN-1);
-       error |= __put_user(0,name->sysname+HPUX_UTSLEN-1);
-       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,HPUX_UTSLEN-1);
-       error |= __put_user(0,name->nodename+HPUX_UTSLEN-1);
-       error |= __copy_to_user(&name->release,&system_utsname.release,HPUX_UTSLEN-1);
-       error |= __put_user(0,name->release+HPUX_UTSLEN-1);
-       error |= __copy_to_user(&name->version,&system_utsname.version,HPUX_UTSLEN-1);
-       error |= __put_user(0,name->version+HPUX_UTSLEN-1);
-       error |= __copy_to_user(&name->machine,&system_utsname.machine,HPUX_UTSLEN-1);
-       error |= __put_user(0,name->machine+HPUX_UTSLEN-1);
+       error = __copy_to_user(&name->sysname, &utsname()->sysname,
+                              HPUX_UTSLEN - 1);
+       error |= __put_user(0, name->sysname + HPUX_UTSLEN - 1);
+       error |= __copy_to_user(&name->nodename, &utsname()->nodename,
+                               HPUX_UTSLEN - 1);
+       error |= __put_user(0, name->nodename + HPUX_UTSLEN - 1);
+       error |= __copy_to_user(&name->release, &utsname()->release,
+                               HPUX_UTSLEN - 1);
+       error |= __put_user(0, name->release + HPUX_UTSLEN - 1);
+       error |= __copy_to_user(&name->version, &utsname()->version,
+                               HPUX_UTSLEN - 1);
+       error |= __put_user(0, name->version + HPUX_UTSLEN - 1);
+       error |= __copy_to_user(&name->machine, &utsname()->machine,
+                               HPUX_UTSLEN - 1);
+       error |= __put_user(0, name->machine + HPUX_UTSLEN - 1);
 
        up_read(&uts_sem);
 
@@ -373,8 +378,8 @@ int hpux_utssys(char *ubuf, int n, int type)
                /*  TODO:  print a warning about using this?  */
                down_write(&uts_sem);
                error = -EFAULT;
-               if (!copy_from_user(system_utsname.sysname, ubuf, len)) {
-                       system_utsname.sysname[len] = 0;
+               if (!copy_from_user(utsname()->sysname, ubuf, len)) {
+                       utsname()->sysname[len] = 0;
                        error = 0;
                }
                up_write(&uts_sem);
@@ -400,8 +405,8 @@ int hpux_utssys(char *ubuf, int n, int type)
                /*  TODO:  print a warning about this?  */
                down_write(&uts_sem);
                error = -EFAULT;
-               if (!copy_from_user(system_utsname.release, ubuf, len)) {
-                       system_utsname.release[len] = 0;
+               if (!copy_from_user(utsname()->release, ubuf, len)) {
+                       utsname()->release[len] = 0;
                        error = 0;
                }
                up_write(&uts_sem);
@@ -422,13 +427,13 @@ int hpux_getdomainname(char *name, int len)
        
        down_read(&uts_sem);
        
-       nlen = strlen(system_utsname.domainname) + 1;
+       nlen = strlen(utsname()->domainname) + 1;
 
        if (nlen < len)
                len = nlen;
        if(len > __NEW_UTS_LEN)
                goto done;
-       if(copy_to_user(name, system_utsname.domainname, len))
+       if(copy_to_user(name, utsname()->domainname, len))
                goto done;
        err = 0;
 done:
index 0b485ef..2f9f9df 100644 (file)
@@ -368,7 +368,14 @@ out:
        return error;
 }
 
-unsigned long 
+extern int __execve(const char *filename, char *const argv[],
+               char *const envp[], struct task_struct *task);
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       return __execve(filename, argv, envp, current);
+}
+
+unsigned long
 get_wchan(struct task_struct *p)
 {
        struct unwind_frame_info info;
index a0dd1b0..032e6ab 100644 (file)
@@ -1069,7 +1069,7 @@ source "arch/powerpc/oprofile/Kconfig"
 
 config KPROBES
        bool "Kprobes (EXPERIMENTAL)"
-       depends on PPC64 && EXPERIMENTAL && MODULES
+       depends on PPC64 && KALLSYMS && EXPERIMENTAL && MODULES
        help
          Kprobes allows you to trap at almost any kernel address and
          execute a callback function.  register_kprobe() establishes
index cd65c36..7b8d12b 100644 (file)
@@ -259,14 +259,15 @@ void kretprobe_trampoline_holder(void)
  */
 int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
-        struct kretprobe_instance *ri = NULL;
-        struct hlist_head *head;
-        struct hlist_node *node, *tmp;
+       struct kretprobe_instance *ri = NULL;
+       struct hlist_head *head, empty_rp;
+       struct hlist_node *node, *tmp;
        unsigned long flags, orig_ret_address = 0;
        unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
+       INIT_HLIST_HEAD(&empty_rp);
        spin_lock_irqsave(&kretprobe_lock, flags);
-        head = kretprobe_inst_table_head(current);
+       head = kretprobe_inst_table_head(current);
 
        /*
         * It is possible to have multiple instances associated with a given
@@ -277,20 +278,20 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
         * We can handle this because:
         *     - instances are always inserted at the head of the list
         *     - when multiple return probes are registered for the same
-         *       function, the first instance's ret_addr will point to the
+        *       function, the first instance's ret_addr will point to the
         *       real return address, and all the rest will point to
         *       kretprobe_trampoline
         */
        hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
-                if (ri->task != current)
+               if (ri->task != current)
                        /* another task is sharing our hash bucket */
-                        continue;
+                       continue;
 
                if (ri->rp && ri->rp->handler)
                        ri->rp->handler(ri, regs);
 
                orig_ret_address = (unsigned long)ri->ret_addr;
-               recycle_rp_inst(ri);
+               recycle_rp_inst(ri, &empty_rp);
 
                if (orig_ret_address != trampoline_address)
                        /*
@@ -308,12 +309,16 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
        spin_unlock_irqrestore(&kretprobe_lock, flags);
        preempt_enable_no_resched();
 
-        /*
-         * By returning a non-zero value, we are telling
-         * kprobe_handler() that we don't want the post_handler
-         * to run (and have re-enabled preemption)
-         */
-        return 1;
+       hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+               hlist_del(&ri->hlist);
+               kfree(ri);
+       }
+       /*
+        * By returning a non-zero value, we are telling
+        * kprobe_handler() that we don't want the post_handler
+        * to run (and have re-enabled preemption)
+        */
+       return 1;
 }
 
 /*
index 58758d8..88fd73f 100644 (file)
@@ -843,7 +843,7 @@ _GLOBAL(kernel_thread)
        addi    r1,r1,16
        blr
 
-_GLOBAL(execve)
+_GLOBAL(kernel_execve)
        li      r0,__NR_execve
        sc
        bnslr
index e3ed21c..9c54ecc 100644 (file)
@@ -556,7 +556,7 @@ _GLOBAL(giveup_altivec)
 
 #endif /* CONFIG_ALTIVEC */
 
-_GLOBAL(execve)
+_GLOBAL(kernel_execve)
        li      r0,__NR_execve
        sc
        bnslr
index a127a1e..7b2f645 100644 (file)
@@ -424,7 +424,7 @@ void show_regs(struct pt_regs * regs)
        printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
               regs->nip, regs->link, regs->ctr);
        printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
-              regs, regs->trap, print_tainted(), system_utsname.release);
+              regs, regs->trap, print_tainted(), init_utsname()->release);
        printk("MSR: "REG" ", regs->msr);
        printbits(regs->msr, msr_bits);
        printk("  CR: %08lX  XER: %08lX\n", regs->ccr, regs->xer);
index 962ad5e..cda2dbe 100644 (file)
@@ -414,7 +414,7 @@ void __init setup_system(void)
        smp_release_cpus();
 #endif
 
-       printk("Starting Linux PPC64 %s\n", system_utsname.version);
+       printk("Starting Linux PPC64 %s\n", init_utsname()->version);
 
        printk("-----------------------------------------------------\n");
        printk("ppc64_pft_size                = 0x%lx\n", ppc64_pft_size);
index 9b69d99..d358866 100644 (file)
@@ -260,7 +260,7 @@ long ppc_newuname(struct new_utsname __user * name)
        int err = 0;
 
        down_read(&uts_sem);
-       if (copy_to_user(name, &system_utsname, sizeof(*name)))
+       if (copy_to_user(name, utsname(), sizeof(*name)))
                err = -EFAULT;
        up_read(&uts_sem);
        if (!err)
@@ -273,7 +273,7 @@ int sys_uname(struct old_utsname __user *name)
        int err = 0;
        
        down_read(&uts_sem);
-       if (copy_to_user(name, &system_utsname, sizeof(*name)))
+       if (copy_to_user(name, utsname(), sizeof(*name)))
                err = -EFAULT;
        up_read(&uts_sem);
        if (!err)
@@ -289,19 +289,19 @@ int sys_olduname(struct oldold_utsname __user *name)
                return -EFAULT;
   
        down_read(&uts_sem);
-       error = __copy_to_user(&name->sysname, &system_utsname.sysname,
+       error = __copy_to_user(&name->sysname, &utsname()->sysname,
                               __OLD_UTS_LEN);
        error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
-       error |= __copy_to_user(&name->nodename, &system_utsname.nodename,
+       error |= __copy_to_user(&name->nodename, &utsname()->nodename,
                                __OLD_UTS_LEN);
        error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
-       error |= __copy_to_user(&name->release, &system_utsname.release,
+       error |= __copy_to_user(&name->release, &utsname()->release,
                                __OLD_UTS_LEN);
        error |= __put_user(0, name->release + __OLD_UTS_LEN);
-       error |= __copy_to_user(&name->version, &system_utsname.version,
+       error |= __copy_to_user(&name->version, &utsname()->version,
                                __OLD_UTS_LEN);
        error |= __put_user(0, name->version + __OLD_UTS_LEN);
-       error |= __copy_to_user(&name->machine, &system_utsname.machine,
+       error |= __copy_to_user(&name->machine, &utsname()->machine,
                                __OLD_UTS_LEN);
        error |= override_machine(name->machine);
        up_read(&uts_sem);
index 1a2c2a5..1983b64 100644 (file)
@@ -357,7 +357,7 @@ static int dma_and_signal_ce_msg(char *ce_msg,
  */
 static int shutdown(void)
 {
-       int rc = kill_proc(1, SIGINT, 1);
+       int rc = kill_cad_pid(SIGINT, 1);
 
        if (rc) {
                printk(KERN_ALERT "mf.c: SIGINT to init failed (%d), "
index 8ed3621..98189d8 100644 (file)
@@ -342,7 +342,7 @@ static int __init pSeries_init_panel(void)
 {
        /* Manually leave the kernel version on the panel. */
        ppc_md.progress("Linux ppc64\n", 0);
-       ppc_md.progress(system_utsname.release, 0);
+       ppc_md.progress(init_utsname()->version, 0);
 
        return 0;
 }
index b81a367..87fe9a8 100644 (file)
@@ -1720,7 +1720,7 @@ static int siccuart_open(struct tty_struct *tty, struct file *filp)
     return 0;
 }
 
-static struct tty_operations sicc_ops = {
+static const struct tty_operations sicc_ops = {
        .open = siccuart_open,
        .close = siccuart_close,
        .write = siccuart_write,
index 50b4bbd..5f66840 100644 (file)
@@ -942,20 +942,16 @@ _GLOBAL(kernel_thread)
        addi    r1,r1,16
        blr
 
+_GLOBAL(kernel_execve)
+       li      r0,__NR_execve
+       sc
+       bnslr
+       neg     r3,r3
+       blr
+
 /*
  * This routine is just here to keep GCC happy - sigh...
  */
 _GLOBAL(__main)
        blr
 
-#define SYSCALL(name) \
-_GLOBAL(name) \
-       li      r0,__NR_##name; \
-       sc; \
-       bnslr; \
-       lis     r4,errno@ha; \
-       stw     r3,errno@l(r4); \
-       li      r3,-1; \
-       blr
-
-SYSCALL(execve)
index ca28fb0..4d9ff5c 100644 (file)
@@ -369,11 +369,12 @@ void __kprobes kretprobe_trampoline_holder(void)
 int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
        struct kretprobe_instance *ri = NULL;
-       struct hlist_head *head;
+       struct hlist_head *head, empty_rp;
        struct hlist_node *node, *tmp;
        unsigned long flags, orig_ret_address = 0;
        unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
 
+       INIT_HLIST_HEAD(&empty_rp);
        spin_lock_irqsave(&kretprobe_lock, flags);
        head = kretprobe_inst_table_head(current);
 
@@ -399,7 +400,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
                        ri->rp->handler(ri, regs);
 
                orig_ret_address = (unsigned long)ri->ret_addr;
-               recycle_rp_inst(ri);
+               recycle_rp_inst(ri, &empty_rp);
 
                if (orig_ret_address != trampoline_address) {
                        /*
@@ -417,6 +418,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
        spin_unlock_irqrestore(&kretprobe_lock, flags);
        preempt_enable_no_resched();
 
+       hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+               hlist_del(&ri->hlist);
+               kfree(ri);
+       }
        /*
         * By returning a non-zero value, we are telling
         * kprobe_handler() that we don't want the post_handler
index e351780..584ed95 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/file.h>
 #include <linux/utsname.h>
 #include <linux/personality.h>
+#include <linux/unistd.h>
 
 #include <asm/uaccess.h>
 #include <asm/ipc.h>
@@ -266,3 +267,22 @@ s390_fadvise64_64(struct fadvise64_64_args __user *args)
        return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice);
 }
 
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       register const char *__arg1 asm("2") = filename;
+       register char *const*__arg2 asm("3") = argv;
+       register char *const*__arg3 asm("4") = envp;
+       register long __svcres asm("2");
+       asm volatile(
+               "svc %b1"
+               : "=d" (__svcres)
+               : "i" (__NR_execve),
+                 "0" (__arg1),
+                 "d" (__arg2),
+                 "d" (__arg3) : "memory");
+       return __svcres;
+}
index 5f58733..77491cf 100644 (file)
@@ -459,7 +459,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                seq_printf(m, "machine\t\t: %s\n", get_system_type());
 
        seq_printf(m, "processor\t: %d\n", cpu);
-       seq_printf(m, "cpu family\t: %s\n", system_utsname.machine);
+       seq_printf(m, "cpu family\t: %s\n", init_utsname()->machine);
        seq_printf(m, "cpu type\t: %s\n", get_cpu_subtype());
 
        show_cpuflags(m);
index 6c0fb7c..dbebadd 100644 (file)
@@ -42,6 +42,7 @@ cpumask_t cpu_possible_map;
 EXPORT_SYMBOL(cpu_possible_map);
 
 cpumask_t cpu_online_map;
+EXPORT_SYMBOL(cpu_online_map);
 static atomic_t cpus_booted = ATOMIC_INIT(0);
 
 /* These are defined by the board-specific code. */
index b68ff70..8fde950 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/cacheflush.h>
 #include <asm/uaccess.h>
 #include <asm/ipc.h>
+#include <asm/unistd.h>
 
 /*
  * sys_pipe() is the normal C calling standard for creating
@@ -281,7 +282,7 @@ asmlinkage int sys_uname(struct old_utsname * name)
        if (!name)
                return -EFAULT;
        down_read(&uts_sem);
-       err=copy_to_user(name, &system_utsname, sizeof (*name));
+       err = copy_to_user(name, utsname(), sizeof (*name));
        up_read(&uts_sem);
        return err?-EFAULT:0;
 }
@@ -309,3 +310,19 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1,
                                (u64)len0 << 32 | len1, advice);
 #endif
 }
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       register long __sc0 __asm__ ("r3") = __NR_execve;
+       register long __sc4 __asm__ ("r4") = (long) filename;
+       register long __sc5 __asm__ ("r5") = (long) argv;
+       register long __sc6 __asm__ ("r6") = (long) envp;
+       __asm__ __volatile__ ("trapa    #0x13" : "=z" (__sc0)
+                       : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6)
+                       : "memory");
+       return __sc0;
+}
index db475b7..525d0ec 100644 (file)
 /*
  * This file handles the architecture-dependent parts of process handling..
  */
-
-/* Temporary flags/tests. All to be removed/undefined. BEGIN */
-#define IDLE_TRACE
-#define VM_SHOW_TABLES
-#define VM_TEST_FAULT
-#define VM_TEST_RTLBMISS
-#define VM_TEST_WTLBMISS
-
-#undef VM_SHOW_TABLES
-#undef IDLE_TRACE
-/* Temporary flags/tests. All to be removed/undefined. END */
-
-#define __KERNEL_SYSCALLS__
-#include <stdarg.h>
-
-#include <linux/kernel.h>
-#include <linux/rwsem.h>
 #include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/user.h>
-#include <linux/a.out.h>
-#include <linux/interrupt.h>
-#include <linux/unistd.h>
-#include <linux/delay.h>
 #include <linux/reboot.h>
 #include <linux/init.h>
-
+#include <linux/module.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/processor.h>             /* includes also <asm/registers.h> */
-#include <asm/mmu_context.h>
-#include <asm/elf.h>
-#include <asm/page.h>
-
-#include <linux/irq.h>
 
 struct task_struct *last_task_used_math = NULL;
 
-#ifdef IDLE_TRACE
-#ifdef VM_SHOW_TABLES
-/* For testing */
-static void print_PTE(long base)
-{
-       int i, skip=0;
-       long long x, y, *p = (long long *) base;
-
-       for (i=0; i< 512; i++, p++){
-               if (*p == 0) {
-                       if (!skip) {
-                               skip++;
-                               printk("(0s) ");
-                       }
-               } else {
-                       skip=0;
-                       x = (*p) >> 32;
-                       y = (*p) & 0xffffffff;
-                       printk("%08Lx%08Lx ", x, y);
-                       if (!((i+1)&0x3)) printk("\n");
-               }
-       }
-}
-
-/* For testing */
-static void print_DIR(long base)
-{
-       int i, skip=0;
-       long *p = (long *) base;
-
-       for (i=0; i< 512; i++, p++){
-               if (*p == 0) {
-                       if (!skip) {
-                               skip++;
-                               printk("(0s) ");
-                       }
-               } else {
-                       skip=0;
-                       printk("%08lx ", *p);
-                       if (!((i+1)&0x7)) printk("\n");
-               }
-       }
-}
-
-/* For testing */
-static void print_vmalloc_first_tables(void)
-{
-
-#define PRESENT        0x800   /* Bit 11 */
-
-       /*
-        * Do it really dirty by looking at raw addresses,
-         * raw offsets, no types. If we used pgtable/pgalloc
-        * macros/definitions we could hide potential bugs.
-        *
-        * Note that pointers are 32-bit for CDC.
-        */
-       long pgdt, pmdt, ptet;
-
-       pgdt = (long) &swapper_pg_dir;
-       printk("-->PGD (0x%08lx):\n", pgdt);
-       print_DIR(pgdt);
-       printk("\n");
-
-       /* VMALLOC pool is mapped at 0xc0000000, second (pointer) entry in PGD */
-       pgdt += 4;
-       pmdt = (long) (* (long *) pgdt);
-       if (!(pmdt & PRESENT)) {
-               printk("No PMD\n");
-               return;
-       } else pmdt &= 0xfffff000;
-
-       printk("-->PMD (0x%08lx):\n", pmdt);
-       print_DIR(pmdt);
-       printk("\n");
-
-       /* Get the pmdt displacement for 0xc0000000 */
-       pmdt += 2048;
-
-       /* just look at first two address ranges ... */
-        /* ... 0xc0000000 ... */
-       ptet = (long) (* (long *) pmdt);
-       if (!(ptet & PRESENT)) {
-               printk("No PTE0\n");
-               return;
-       } else ptet &= 0xfffff000;
-
-       printk("-->PTE0 (0x%08lx):\n", ptet);
-       print_PTE(ptet);
-       printk("\n");
-
-        /* ... 0xc0001000 ... */
-       ptet += 4;
-       if (!(ptet & PRESENT)) {
-               printk("No PTE1\n");
-               return;
-       } else ptet &= 0xfffff000;
-       printk("-->PTE1 (0x%08lx):\n", ptet);
-       print_PTE(ptet);
-       printk("\n");
-}
-#else
-#define print_vmalloc_first_tables()
-#endif /* VM_SHOW_TABLES */
-
-static void test_VM(void)
-{
-       void *a, *b, *c;
-
-#ifdef VM_SHOW_TABLES
-       printk("Initial PGD/PMD/PTE\n");
-#endif
-        print_vmalloc_first_tables();
-
-       printk("Allocating 2 bytes\n");
-       a = vmalloc(2);
-        print_vmalloc_first_tables();
-
-       printk("Allocating 4100 bytes\n");
-       b = vmalloc(4100);
-        print_vmalloc_first_tables();
-
-       printk("Allocating 20234 bytes\n");
-       c = vmalloc(20234);
-        print_vmalloc_first_tables();
-
-#ifdef VM_TEST_FAULT
-       /* Here you may want to fault ! */
-
-#ifdef VM_TEST_RTLBMISS
-       printk("Ready to fault upon read.\n");
-       if (* (char *) a) {
-               printk("RTLBMISSed on area a !\n");
-       }
-       printk("RTLBMISSed on area a !\n");
-#endif
-
-#ifdef VM_TEST_WTLBMISS
-       printk("Ready to fault upon write.\n");
-       *((char *) b) = 'L';
-       printk("WTLBMISSed on area b !\n");
-#endif
-
-#endif /* VM_TEST_FAULT */
-
-       printk("Deallocating the 4100 byte chunk\n");
-       vfree(b);
-        print_vmalloc_first_tables();
-
-       printk("Deallocating the 2 byte chunk\n");
-       vfree(a);
-        print_vmalloc_first_tables();
-
-       printk("Deallocating the last chunk\n");
-       vfree(c);
-        print_vmalloc_first_tables();
-}
-
-extern unsigned long volatile jiffies;
-int once = 0;
-unsigned long old_jiffies;
-int pid = -1, pgid = -1;
-
-void idle_trace(void)
-{
-
-       _syscall0(int, getpid)
-       _syscall1(int, getpgid, int, pid)
-
-       if (!once) {
-               /* VM allocation/deallocation simple test */
-               test_VM();
-               pid = getpid();
-
-               printk("Got all through to Idle !!\n");
-               printk("I'm now going to loop forever ...\n");
-               printk("Any ! below is a timer tick.\n");
-               printk("Any . below is a getpgid system call from pid = %d.\n", pid);
-
-
-               old_jiffies = jiffies;
-               once++;
-       }
-
-       if (old_jiffies != jiffies) {
-               old_jiffies = jiffies - old_jiffies;
-               switch (old_jiffies) {
-               case 1:
-                       printk("!");
-                       break;
-               case 2:
-                       printk("!!");
-                       break;
-               case 3:
-                       printk("!!!");
-                       break;
-               case 4:
-                       printk("!!!!");
-                       break;
-               default:
-                       printk("(%d!)", (int) old_jiffies);
-               }
-               old_jiffies = jiffies;
-       }
-       pgid = getpgid(pid);
-       printk(".");
-}
-#else
-#define idle_trace()   do { } while (0)
-#endif /* IDLE_TRACE */
-
 static int hlt_counter = 1;
 
 #define HARD_IDLE_TIMEOUT (HZ / 3)
@@ -323,7 +78,6 @@ void cpu_idle(void)
                        local_irq_disable();
                        while (!need_resched()) {
                                local_irq_enable();
-                               idle_trace();
                                hlt();
                                local_irq_disable();
                        }
@@ -622,6 +376,10 @@ void free_task_struct(struct task_struct *p)
 /*
  * Create a kernel thread
  */
+ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
+{
+       do_exit(fn(arg));
+}
 
 /*
  * This is the mechanism for creating a new kernel thread.
@@ -633,19 +391,17 @@ void free_task_struct(struct task_struct *p)
  */
 int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
 {
-       /* A bit less processor dependent than older sh ... */
-       unsigned int reply;
+       struct pt_regs regs;
 
-static __inline__ _syscall2(int,clone,unsigned long,flags,unsigned long,newsp)
-static __inline__ _syscall1(int,exit,int,ret)
+       memset(&regs, 0, sizeof(regs));
+       regs.regs[2] = (unsigned long)arg;
+       regs.regs[3] = (unsigned long)fn;
 
-       reply = clone(flags | CLONE_VM, 0);
-       if (!reply) {
-               /* Child */
-               reply = exit(fn(arg));
-       }
+       regs.pc = (unsigned long)kernel_thread_helper;
+       regs.sr = (1 << 30);
 
-       return reply;
+       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
+                      &regs, 0, NULL, NULL);
 }
 
 /*
index 58ff7d5..ad0fa4e 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/uaccess.h>
 #include <asm/ipc.h>
 #include <asm/ptrace.h>
+#include <asm/unistd.h>
 
 #define REG_3  3
 
@@ -279,7 +280,25 @@ asmlinkage int sys_uname(struct old_utsname * name)
        if (!name)
                return -EFAULT;
        down_read(&uts_sem);
-       err=copy_to_user(name, &system_utsname, sizeof (*name));
+       err = copy_to_user(name, utsname(), sizeof (*name));
        up_read(&uts_sem);
        return err?-EFAULT:0;
 }
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_execve);
+       register unsigned long __sc2 __asm__ ("r2") = (unsigned long) filename;
+       register unsigned long __sc3 __asm__ ("r3") = (unsigned long) argv;
+       register unsigned long __sc4 __asm__ ("r4") = (unsigned long) envp;
+       __asm__ __volatile__ ("trapa    %1 !\t\t\t execve(%2,%3,%4)"
+       : "=r" (__sc0)
+       : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4) );
+       __asm__ __volatile__ ("!dummy   %0 %1 %2 %3"
+       : : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4) : "memory");
+       return __sc0;
+}
index 896863f..a954a0c 100644 (file)
@@ -24,6 +24,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/ipc.h>
+#include <asm/unistd.h>
 
 /* #define DEBUG_UNIMP_SYSCALL */
 
@@ -475,16 +476,38 @@ asmlinkage int sys_getdomainname(char __user *name, int len)
 
        down_read(&uts_sem);
        
-       nlen = strlen(system_utsname.domainname) + 1;
+       nlen = strlen(utsname()->domainname) + 1;
        err = -EINVAL;
        if (nlen > len)
                goto out;
 
        err = -EFAULT;
-       if (!copy_to_user(name, system_utsname.domainname, nlen))
+       if (!copy_to_user(name, utsname()->domainname, nlen))
                err = 0;
 
 out:
        up_read(&uts_sem);
        return err;
 }
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       long __res;
+       register long __g1 __asm__ ("g1") = __NR_execve;
+       register long __o0 __asm__ ("o0") = (long)(filename);
+       register long __o1 __asm__ ("o1") = (long)(argv);
+       register long __o2 __asm__ ("o2") = (long)(envp);
+       asm volatile ("t 0x10\n\t"
+                     "bcc 1f\n\t"
+                     "mov %%o0, %0\n\t"
+                     "sub %%g0, %%o0, %0\n\t"
+                     "1:\n\t"
+                     : "=r" (__res), "=&r" (__o0)
+                     : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1)
+                     : "cc");
+       return __res;
+}
index aa0fb2e..9d2cd97 100644 (file)
@@ -483,13 +483,18 @@ asmlinkage int sunos_uname(struct sunos_utsname __user *name)
 {
        int ret;
        down_read(&uts_sem);
-       ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0], sizeof(name->sname) - 1);
+       ret = copy_to_user(&name->sname[0], &utsname()->sysname[0],
+                          sizeof(name->sname) - 1);
        if (!ret) {
-               ret |= __copy_to_user(&name->nname[0], &system_utsname.nodename[0], sizeof(name->nname) - 1);
+               ret |= __copy_to_user(&name->nname[0], &utsname()->nodename[0],
+                                     sizeof(name->nname) - 1);
                ret |= __put_user('\0', &name->nname[8]);
-               ret |= __copy_to_user(&name->rel[0], &system_utsname.release[0], sizeof(name->rel) - 1);
-               ret |= __copy_to_user(&name->ver[0], &system_utsname.version[0], sizeof(name->ver) - 1);
-               ret |= __copy_to_user(&name->mach[0], &system_utsname.machine[0], sizeof(name->mach) - 1);
+               ret |= __copy_to_user(&name->rel[0], &utsname()->release[0],
+                                     sizeof(name->rel) - 1);
+               ret |= __copy_to_user(&name->ver[0], &utsname()->version[0],
+                                     sizeof(name->ver) - 1);
+               ret |= __copy_to_user(&name->mach[0], &utsname()->machine[0],
+                                     sizeof(name->mach) - 1);
        }
        up_read(&uts_sem);
        return ret ? -EFAULT : 0;
index 8d8ca71..b627f8d 100644 (file)
@@ -420,7 +420,7 @@ source "arch/sparc64/oprofile/Kconfig"
 
 config KPROBES
        bool "Kprobes (EXPERIMENTAL)"
-       depends on EXPERIMENTAL && MODULES
+       depends on KALLSYMS && EXPERIMENTAL && MODULES
        help
          Kprobes allows you to trap at almost any kernel address and
          execute a callback function.  register_kprobe() establishes
index e55466c..0b9c706 100644 (file)
@@ -4,8 +4,6 @@
  * Copyright (C) 1999 David S. Miller (davem@redhat.com)
  */
 
-#define __KERNEL_SYSCALLS__
-
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
@@ -14,6 +12,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/pm.h>
+#include <linux/syscalls.h>
 
 #include <asm/system.h>
 #include <asm/auxio.h>
@@ -98,7 +97,7 @@ again:
 
        /* Ok, down we go... */
        button_pressed = 0;
-       if (execve("/sbin/shutdown", argv, envp) < 0) {
+       if (kernel_execve("/sbin/shutdown", argv, envp) < 0) {
                printk("powerd: shutdown execution failed\n");
                add_wait_queue(&powerd_wait, &wait);
                goto again;
index c608c94..a53d4ab 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/utrap.h>
 #include <asm/perfctr.h>
 #include <asm/a.out.h>
+#include <asm/unistd.h>
 
 /* #define DEBUG_UNIMP_SYSCALL */
 
@@ -712,13 +713,13 @@ asmlinkage long sys_getdomainname(char __user *name, int len)
 
        down_read(&uts_sem);
        
-       nlen = strlen(system_utsname.domainname) + 1;
+       nlen = strlen(utsname()->domainname) + 1;
        err = -EINVAL;
        if (nlen > len)
                goto out;
 
        err = -EFAULT;
-       if (!copy_to_user(name, system_utsname.domainname, nlen))
+       if (!copy_to_user(name, utsname()->domainname, nlen))
                err = 0;
 
 out:
@@ -963,3 +964,23 @@ asmlinkage long sys_perfctr(int opcode, unsigned long arg0, unsigned long arg1,
        };
        return err;
 }
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       long __res;
+       register long __g1 __asm__ ("g1") = __NR_execve;
+       register long __o0 __asm__ ("o0") = (long)(filename);
+       register long __o1 __asm__ ("o1") = (long)(argv);
+       register long __o2 __asm__ ("o2") = (long)(envp);
+       asm volatile ("t 0x6d\n\t"
+                     "sub %%g0, %%o0, %0\n\t"
+                     "movcc %%xcc, %%o0, %0\n\t"
+                     : "=r" (__res), "=&r" (__o0)
+                     : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1)
+                     : "cc");
+       return __res;
+}
index 87ebdf8..953296b 100644 (file)
@@ -439,16 +439,16 @@ asmlinkage int sunos_uname(struct sunos_utsname __user *name)
        int ret;
 
        down_read(&uts_sem);
-       ret = copy_to_user(&name->sname[0], &system_utsname.sysname[0],
+       ret = copy_to_user(&name->sname[0], &utsname()->sysname[0],
                           sizeof(name->sname) - 1);
-       ret |= copy_to_user(&name->nname[0], &system_utsname.nodename[0],
+       ret |= copy_to_user(&name->nname[0], &utsname()->nodename[0],
                            sizeof(name->nname) - 1);
        ret |= put_user('\0', &name->nname[8]);
-       ret |= copy_to_user(&name->rel[0], &system_utsname.release[0],
+       ret |= copy_to_user(&name->rel[0], &utsname()->release[0],
                            sizeof(name->rel) - 1);
-       ret |= copy_to_user(&name->ver[0], &system_utsname.version[0],
+       ret |= copy_to_user(&name->ver[0], &utsname()->version[0],
                            sizeof(name->ver) - 1);
-       ret |= copy_to_user(&name->mach[0], &system_utsname.machine[0],
+       ret |= copy_to_user(&name->mach[0], &utsname()->machine[0],
                            sizeof(name->mach) - 1);
        up_read(&uts_sem);
        return (ret ? -EFAULT : 0);
index 9c58132..9ed9979 100644 (file)
@@ -249,7 +249,7 @@ asmlinkage int solaris_utssys(u32 buf, u32 flags, int which, u32 buf2)
                /* Let's cheat */
                err  = set_utsfield(v->sysname, "SunOS", 1, 0);
                down_read(&uts_sem);
-               err |= set_utsfield(v->nodename, system_utsname.nodename,
+               err |= set_utsfield(v->nodename, utsname()->nodename,
                                    1, 1);
                up_read(&uts_sem);
                err |= set_utsfield(v->release, "2.6", 0, 0);
@@ -273,7 +273,7 @@ asmlinkage int solaris_utsname(u32 buf)
        /* Why should we not lie a bit? */
        down_read(&uts_sem);
        err  = set_utsfield(v->sysname, "SunOS", 0, 0);
-       err |= set_utsfield(v->nodename, system_utsname.nodename, 1, 1);
+       err |= set_utsfield(v->nodename, utsname()->nodename, 1, 1);
        err |= set_utsfield(v->release, "5.6", 0, 0);
        err |= set_utsfield(v->version, "Generic", 0, 0);
        err |= set_utsfield(v->machine, machine(), 0, 0);
@@ -305,7 +305,7 @@ asmlinkage int solaris_sysinfo(int cmd, u32 buf, s32 count)
        case SI_HOSTNAME:
                r = buffer + 256;
                down_read(&uts_sem);
-               for (p = system_utsname.nodename, q = buffer; 
+               for (p = utsname()->nodename, q = buffer;
                     q < r && *p && *p != '.'; *q++ = *p++);
                up_read(&uts_sem);
                *q = 0;
index 563ce76..24747a4 100644 (file)
@@ -642,9 +642,9 @@ int line_remove(struct line *lines, unsigned int num, int n)
 }
 
 struct tty_driver *line_register_devfs(struct lines *set,
-                        struct line_driver *line_driver,
-                        struct tty_operations *ops, struct line *lines,
-                        int nlines)
+                                      struct line_driver *line_driver,
+                                      const struct tty_operations *ops,
+                                      struct line *lines, int nlines)
 {
        int i;
        struct tty_driver *driver = alloc_tty_driver(nlines);
index 773a134..a67dcbd 100644 (file)
@@ -106,9 +106,9 @@ void mconsole_version(struct mc_request *req)
 {
        char version[256];
 
-       sprintf(version, "%s %s %s %s %s", system_utsname.sysname,
-               system_utsname.nodename, system_utsname.release,
-               system_utsname.version, system_utsname.machine);
+       sprintf(version, "%s %s %s %s %s", utsname()->sysname,
+               utsname()->nodename, utsname()->release,
+               utsname()->version, utsname()->machine);
        mconsole_reply(req, version, 0, 0);
 }
 
index 642c9a0..7be2481 100644 (file)
@@ -91,10 +91,9 @@ extern int line_setup_irq(int fd, int input, int output, struct line *line,
                          void *data);
 extern void line_close_chan(struct line *line);
 extern struct tty_driver * line_register_devfs(struct lines *set,
-                               struct line_driver *line_driver,
-                               struct tty_operations *driver,
-                               struct line *lines,
-                               int nlines);
+                                              struct line_driver *line_driver,
+                                              const struct tty_operations *driver,
+                                              struct line *lines, int nlines);
 extern void lines_init(struct line *lines, int nlines, struct chan_opts *opts);
 extern void close_lines(struct line *lines, int nlines);
 
index 48cf88d..f5ed862 100644 (file)
@@ -110,7 +110,7 @@ long sys_uname(struct old_utsname __user * name)
        if (!name)
                return -EFAULT;
        down_read(&uts_sem);
-       err = copy_to_user(name, &system_utsname, sizeof (*name));
+       err = copy_to_user(name, utsname(), sizeof (*name));
        up_read(&uts_sem);
        return err?-EFAULT:0;
 }
@@ -126,21 +126,21 @@ long sys_olduname(struct oldold_utsname __user * name)
 
        down_read(&uts_sem);
 
-       error = __copy_to_user(&name->sysname,&system_utsname.sysname,
+       error = __copy_to_user(&name->sysname, &utsname()->sysname,
                               __OLD_UTS_LEN);
-       error |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->nodename,&system_utsname.nodename,
+       error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
+       error |= __copy_to_user(&name->nodename, &utsname()->nodename,
                                __OLD_UTS_LEN);
-       error |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->release,&system_utsname.release,
+       error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
+       error |= __copy_to_user(&name->release, &utsname()->release,
                                __OLD_UTS_LEN);
-       error |= __put_user(0,name->release+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->version,&system_utsname.version,
+       error |= __put_user(0, name->release + __OLD_UTS_LEN);
+       error |= __copy_to_user(&name->version, &utsname()->version,
                                __OLD_UTS_LEN);
-       error |= __put_user(0,name->version+__OLD_UTS_LEN);
-       error |= __copy_to_user(&name->machine,&system_utsname.machine,
+       error |= __put_user(0, name->version + __OLD_UTS_LEN);
+       error |= __copy_to_user(&name->machine, &utsname()->machine,
                                __OLD_UTS_LEN);
-       error |= __put_user(0,name->machine+__OLD_UTS_LEN);
+       error |= __put_user(0, name->machine + __OLD_UTS_LEN);
 
        up_read(&uts_sem);
 
@@ -164,3 +164,16 @@ int next_syscall_index(int limit)
        spin_unlock(&syscall_lock);
        return(ret);
 }
+
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       mm_segment_t fs;
+       int ret;
+
+       fs = get_fs();
+       set_fs(KERNEL_DS);
+       ret = um_execve(filename, argv, envp);
+       set_fs(fs);
+
+       return ret;
+}
index 5500571..97d88e7 100644 (file)
@@ -167,7 +167,7 @@ static char *usage_string =
 
 static int __init uml_version_setup(char *line, int *add)
 {
-       printf("%s\n", system_utsname.release);
+       printf("%s\n", init_utsname()->release);
        exit(0);
 
        return 0;
@@ -278,7 +278,7 @@ static int __init Usage(char *line, int *add)
 {
        const char **p;
 
-       printf(usage_string, system_utsname.release);
+       printf(usage_string, init_utsname()->release);
        p = &__uml_help_start;
        while (p < &__uml_help_end) {
                printf("%s", *p);
@@ -403,7 +403,7 @@ int linux_main(int argc, char **argv)
        /* Reserve up to 4M after the current brk */
        uml_reserved = ROUND_4M(brk_start) + (1 << 22);
 
-       setup_machinename(system_utsname.machine);
+       setup_machinename(init_utsname()->machine);
 
 #ifdef CONFIG_CMDLINE_ON_HOST
        argv1_begin = argv[1];
index ff20362..51f0893 100644 (file)
@@ -11,6 +11,7 @@
 #include <sys/mman.h>
 #include <sys/wait.h>
 #include <sys/mman.h>
+#include <sys/syscall.h>
 #include "ptrace_user.h"
 #include "os.h"
 #include "user.h"
@@ -140,11 +141,9 @@ void os_usr1_process(int pid)
  * syscalls, and also breaks with clone(), which does not unshare the TLS.
  */
 
-inline _syscall0(pid_t, getpid)
-
 int os_getpid(void)
 {
-       return(getpid());
+       return(syscall(__NR_getpid));
 }
 
 int os_getpgrp(void)
index 120abbe..6e945ab 100644 (file)
@@ -1,10 +1,9 @@
 #include <errno.h>
 #include <linux/unistd.h>
+#include <sys/syscall.h>
 #include "sysdep/tls.h"
 #include "user_util.h"
 
-static _syscall1(int, get_thread_area, user_desc_t *, u_info);
-
 /* Checks whether host supports TLS, and sets *tls_min according to the value
  * valid on the host.
  * i386 host have it == 6; x86_64 host have it == 12, for i386 emulation. */
@@ -17,7 +16,7 @@ void check_host_supports_tls(int *supports_tls, int *tls_min) {
                user_desc_t info;
                info.entry_number = val[i];
 
-               if (get_thread_area(&info) == 0) {
+               if (syscall(__NR_get_thread_area, &info) == 0) {
                        *tls_min = val[i];
                        *supports_tls = 1;
                        return;
index 9cb09a4..a2de258 100644 (file)
@@ -1,5 +1,6 @@
 #include <errno.h>
 #include <sys/ptrace.h>
+#include <sys/syscall.h>
 #include <asm/ldt.h>
 #include "sysdep/tls.h"
 #include "uml-config.h"
@@ -48,14 +49,11 @@ int os_get_thread_area(user_desc_t *info, int pid)
 #ifdef UML_CONFIG_MODE_TT
 #include "linux/unistd.h"
 
-static _syscall1(int, get_thread_area, user_desc_t *, u_info);
-static _syscall1(int, set_thread_area, user_desc_t *, u_info);
-
 int do_set_thread_area_tt(user_desc_t *info)
 {
        int ret;
 
-       ret = set_thread_area(info);
+       ret = syscall(__NR_set_thread_area,info);
        if (ret < 0) {
                ret = -errno;
        }
@@ -66,7 +64,7 @@ int do_get_thread_area_tt(user_desc_t *info)
 {
        int ret;
 
-       ret = get_thread_area(info);
+       ret = syscall(__NR_get_thread_area,info);
        if (ret < 0) {
                ret = -errno;
        }
index 1b0ad0e..8e55cd5 100644 (file)
@@ -5,20 +5,17 @@
 
 #include <linux/mman.h>
 #include <asm/unistd.h>
+#include <sys/syscall.h>
 
-static int errno;
-
-static inline _syscall2(int,munmap,void *,start,size_t,len)
-static inline _syscall6(void *,mmap2,void *,addr,size_t,len,int,prot,int,flags,int,fd,off_t,offset)
 int switcheroo(int fd, int prot, void *from, void *to, int size)
 {
-       if(munmap(to, size) < 0){
+       if (syscall(__NR_munmap, to, size) < 0){
                return(-1);
        }
-       if(mmap2(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1 ){
+       if (syscall(__NR_mmap2, to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1 ){
                return(-1);
        }
-       if(munmap(from, size) < 0){
+       if (syscall(__NR_munmap, from, size) < 0){
                return(-1);
        }
        return(0);
index 6fce9f4..73ce446 100644 (file)
@@ -21,7 +21,7 @@ asmlinkage long sys_uname64(struct new_utsname __user * name)
 {
        int err;
        down_read(&uts_sem);
-       err = copy_to_user(name, &system_utsname, sizeof (*name));
+       err = copy_to_user(name, utsname(), sizeof (*name));
        up_read(&uts_sem);
        if (personality(current->personality) == PER_LINUX32)
                err |= copy_to_user(&name->machine, "i686", 5);
index d0a25af..ce3e07f 100644 (file)
@@ -16,7 +16,7 @@ void __show_regs(struct pt_regs * regs)
        printk("\n");
        print_modules();
        printk("Pid: %d, comm: %.20s %s %s\n",
-              current->pid, current->comm, print_tainted(), system_utsname.release);
+              current->pid, current->comm, print_tainted(), init_utsname()->release);
        printk("RIP: %04lx:[<%016lx>] ", PT_REGS_CS(regs) & 0xffff,
               PT_REGS_RIP(regs));
        printk("\nRSP: %016lx  EFLAGS: %08lx\n", PT_REGS_RSP(regs),
index f4a4bff..57c9286 100644 (file)
@@ -5,20 +5,17 @@
 
 #include <linux/mman.h>
 #include <asm/unistd.h>
+#include <sys/syscall.h>
 
-static int errno;
-
-static inline _syscall2(int,munmap,void *,start,size_t,len)
-static inline _syscall6(void *,mmap,void *,addr,size_t,len,int,prot,int,flags,int,fd,off_t,offset)
 int switcheroo(int fd, int prot, void *from, void *to, int size)
 {
-       if(munmap(to, size) < 0){
+       if (syscall(__NR_munmap, to, size) < 0){
                return(-1);
        }
-       if(mmap(to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1){
+       if (syscall(__NR_mmap, to, size, prot, MAP_SHARED | MAP_FIXED, fd, 0) == (void*) -1){
                return(-1);
        }
-       if(munmap(from, size) < 0){
+       if (syscall(__NR_munmap, from, size) < 0){
                return(-1);
        }
        return(0);
index 815f8a4..92f514f 100644 (file)
@@ -104,7 +104,7 @@ int memcons_tty_chars_in_buffer (struct tty_struct *tty)
        return 0;
 }
 
-static struct tty_operations ops = {
+static const struct tty_operations ops = {
        .open = memcons_tty_open,
        .write = memcons_tty_write,
        .write_room = memcons_tty_write_room,
index 3975aa0..9973596 100644 (file)
@@ -77,7 +77,7 @@ int simcons_tty_chars_in_buffer (struct tty_struct *tty)
        return 0;
 }
 
-static struct tty_operations ops = {
+static const struct tty_operations ops = {
        .open = simcons_tty_open,
        .write = simcons_tty_write,
        .write_room = simcons_tty_write_room,
index 2ec0700..d2b1fb1 100644 (file)
@@ -33,6 +33,7 @@
 #include <asm/uaccess.h>
 #include <asm/ipc.h>
 #include <asm/semaphore.h>
+#include <asm/unistd.h>
 
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
@@ -194,3 +195,22 @@ unsigned long sys_mmap (unsigned long addr, size_t len,
 out:
        return err;
 }
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       register char *__a __asm__ ("r6") = filename;
+       register void *__b __asm__ ("r7") = argv;
+       register void *__c __asm__ ("r8") = envp;
+       register unsigned long __syscall __asm__ ("r12") = __NR_execve;
+       register unsigned long __ret __asm__ ("r10");
+       __asm__ __volatile__ ("trap 0"
+                       : "=r" (__ret), "=r" (__syscall)
+                       : "1" (__syscall), "r" (__a), "r" (__b), "r" (__c)
+                       : "r1", "r5", "r11", "r13", "r14",
+                         "r15", "r16", "r17", "r18", "r19");
+       return __ret;
+}
index b87a19f..0a5d8e6 100644 (file)
@@ -690,7 +690,7 @@ source "arch/x86_64/oprofile/Kconfig"
 
 config KPROBES
        bool "Kprobes (EXPERIMENTAL)"
-       depends on EXPERIMENTAL && MODULES
+       depends on KALLSYMS && EXPERIMENTAL && MODULES
        help
          Kprobes allows you to trap at almost any kernel address and
          execute a callback function.  register_kprobe() establishes
index f280d36..26a0171 100644 (file)
@@ -784,36 +784,36 @@ asmlinkage long sys32_olduname(struct oldold_utsname __user * name)
 
        if (!name)
                return -EFAULT;
-       if (!access_ok(VERIFY_WRITE,name,sizeof(struct oldold_utsname)))
+       if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
                return -EFAULT;
   
        down_read(&uts_sem);
-       
-       err = __copy_to_user(&name->sysname,&system_utsname.sysname,
+
+       err = __copy_to_user(&name->sysname,&utsname()->sysname,
                                __OLD_UTS_LEN);
        err |= __put_user(0,name->sysname+__OLD_UTS_LEN);
-       err |= __copy_to_user(&name->nodename,&system_utsname.nodename,
+       err |= __copy_to_user(&name->nodename,&utsname()->nodename,
                                __OLD_UTS_LEN);
        err |= __put_user(0,name->nodename+__OLD_UTS_LEN);
-       err |= __copy_to_user(&name->release,&system_utsname.release,
+       err |= __copy_to_user(&name->release,&utsname()->release,
                                __OLD_UTS_LEN);
        err |= __put_user(0,name->release+__OLD_UTS_LEN);
-       err |= __copy_to_user(&name->version,&system_utsname.version,
+       err |= __copy_to_user(&name->version,&utsname()->version,
                                __OLD_UTS_LEN);
        err |= __put_user(0,name->version+__OLD_UTS_LEN);
-        { 
-                char *arch = "x86_64";
-                if (personality(current->personality) == PER_LINUX32)
-                        arch = "i686";
+       {
+               char *arch = "x86_64";
+               if (personality(current->personality) == PER_LINUX32)
+                       arch = "i686";
                 
-                err |= __copy_to_user(&name->machine,arch,strlen(arch)+1);
-        }
-       
-        up_read(&uts_sem);
-        
-        err = err ? -EFAULT : 0;
-        
-        return err;
+               err |= __copy_to_user(&name->machine, arch, strlen(arch)+1);
+       }
+
+       up_read(&uts_sem);
+
+       err = err ? -EFAULT : 0;
+
+       return err;
 }
 
 long sys32_uname(struct old_utsname __user * name)
@@ -822,7 +822,7 @@ long sys32_uname(struct old_utsname __user * name)
        if (!name)
                return -EFAULT;
        down_read(&uts_sem);
-       err=copy_to_user(name, &system_utsname, sizeof (*name));
+       err = copy_to_user(name, utsname(), sizeof (*name));
        up_read(&uts_sem);
        if (personality(current->personality) == PER_LINUX32) 
                err |= copy_to_user(&name->machine, "i686", 5);
index 2802524..b8285cf 100644 (file)
@@ -1023,7 +1023,7 @@ ENDPROC(child_rip)
  * do_sys_execve asm fallback arguments:
  *     rdi: name, rsi: argv, rdx: envp, fake frame on the stack
  */
-ENTRY(execve)
+ENTRY(kernel_execve)
        CFI_STARTPROC
        FAKE_STACK_FRAME $0
        SAVE_ALL        
@@ -1036,7 +1036,7 @@ ENTRY(execve)
        UNFAKE_STACK_FRAME
        ret
        CFI_ENDPROC
-ENDPROC(execve)
+ENDPROC(kernel_execve)
 
 KPROBE_ENTRY(page_fault)
        errorentry do_page_fault
index ffc73ac..ac24156 100644 (file)
@@ -270,20 +270,19 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
                                      struct pt_regs *regs)
 {
        unsigned long *sara = (unsigned long *)regs->rsp;
-        struct kretprobe_instance *ri;
+       struct kretprobe_instance *ri;
 
-        if ((ri = get_free_rp_inst(rp)) != NULL) {
-                ri->rp = rp;
-                ri->task = current;
+       if ((ri = get_free_rp_inst(rp)) != NULL) {
+               ri->rp = rp;
+               ri->task = current;
                ri->ret_addr = (kprobe_opcode_t *) *sara;
 
                /* Replace the return addr with trampoline addr */
                *sara = (unsigned long) &kretprobe_trampoline;
-
-                add_rp_inst(ri);
-        } else {
-                rp->nmissed++;
-        }
+               add_rp_inst(ri);
+       } else {
+               rp->nmissed++;
+       }
 }
 
 int __kprobes kprobe_handler(struct pt_regs *regs)
@@ -405,14 +404,15 @@ no_kprobe:
  */
 int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
 {
-        struct kretprobe_instance *ri = NULL;
-        struct hlist_head *head;
-        struct hlist_node *node, *tmp;
+       struct kretprobe_instance *ri = NULL;
+       struct hlist_head *head, empty_rp;
+       struct hlist_node *node, *tmp;
        unsigned long flags, orig_ret_address = 0;
        unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
 
+       INIT_HLIST_HEAD(&empty_rp);
        spin_lock_irqsave(&kretprobe_lock, flags);
-        head = kretprobe_inst_table_head(current);
+       head = kretprobe_inst_table_head(current);
 
        /*
         * It is possible to have multiple instances associated with a given
@@ -423,20 +423,20 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
         * We can handle this because:
         *     - instances are always inserted at the head of the list
         *     - when multiple return probes are registered for the same
-         *       function, the first instance's ret_addr will point to the
+        *       function, the first instance's ret_addr will point to the
         *       real return address, and all the rest will point to
         *       kretprobe_trampoline
         */
        hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
-                if (ri->task != current)
+               if (ri->task != current)
                        /* another task is sharing our hash bucket */
-                        continue;
+                       continue;
 
                if (ri->rp && ri->rp->handler)
                        ri->rp->handler(ri, regs);
 
                orig_ret_address = (unsigned long)ri->ret_addr;
-               recycle_rp_inst(ri);
+               recycle_rp_inst(ri, &empty_rp);
 
                if (orig_ret_address != trampoline_address)
                        /*
@@ -454,12 +454,16 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
        spin_unlock_irqrestore(&kretprobe_lock, flags);
        preempt_enable_no_resched();
 
-        /*
-         * By returning a non-zero value, we are telling
-         * kprobe_handler() that we don't want the post_handler
+       hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+               hlist_del(&ri->hlist);
+               kfree(ri);
+       }
+       /*
+        * By returning a non-zero value, we are telling
+        * kprobe_handler() that we don't want the post_handler
         * to run (and have re-enabled preemption)
-         */
-        return 1;
+        */
+       return 1;
 }
 
 /*
index 458006a..de10cb8 100644 (file)
@@ -294,9 +294,9 @@ void __show_regs(struct pt_regs * regs)
        print_modules();
        printk("Pid: %d, comm: %.20s %s %s %.*s\n",
                current->pid, current->comm, print_tainted(),
-               system_utsname.release,
-               (int)strcspn(system_utsname.version, " "),
-               system_utsname.version);
+               init_utsname()->release,
+               (int)strcspn(init_utsname()->version, " "),
+               init_utsname()->version);
        printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
        printk_address(regs->rip); 
        printk("RSP: %04lx:%016lx  EFLAGS: %08lx\n", regs->ss, regs->rsp,
index 6449ea8..76bf7c2 100644 (file)
@@ -148,7 +148,7 @@ asmlinkage long sys_uname(struct new_utsname __user * name)
 {
        int err;
        down_read(&uts_sem);
-       err = copy_to_user(name, &system_utsname, sizeof (*name));
+       err = copy_to_user(name, utsname(), sizeof (*name));
        up_read(&uts_sem);
        if (personality(current->personality) == PER_LINUX32) 
                err |= copy_to_user(&name->machine, "i686", 5);                 
index 4688ba2..d9285d4 100644 (file)
@@ -128,7 +128,7 @@ out:
 
 int sys_uname(struct old_utsname * name)
 {
-       if (name && !copy_to_user(name, &system_utsname, sizeof (*name)))
+       if (name && !copy_to_user(name, utsname(), sizeof (*name)))
                return 0;
        return -EFAULT;
 }
@@ -266,3 +266,23 @@ void system_call (struct pt_regs *regs)
        regs->areg[2] = res;
        do_syscall_trace();
 }
+
+/*
+ * Do a system call from kernel instead of calling sys_execve so we
+ * end up with proper pt_regs.
+ */
+int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+{
+       long __res;
+       asm volatile (
+               "  mov   a5, %2 \n"
+               "  mov   a4, %4 \n"
+               "  mov   a3, %3 \n"
+               "  movi  a2, %1 \n"
+               "  syscall      \n"
+               "  mov   %0, a2 \n"
+               : "=a" (__res)
+               : "i" (__NR_execve), "a" (filename), "a" (argv), "a" (envp)
+               : "a2", "a3", "a4", "a5");
+       return __res;
+}
index 22d3c57..5c947ca 100644 (file)
@@ -191,7 +191,7 @@ static int rs_read_proc(char *page, char **start, off_t off, int count,
 }
 
 
-static struct tty_operations serial_ops = {
+static const struct tty_operations serial_ops = {
        .open = rs_open,
        .close = rs_close,
        .write = rs_write,
index 9d6713a..d0e92ed 100644 (file)
@@ -1958,7 +1958,7 @@ static void show_serial_version(void)
 }
 
 
-static struct tty_operations serial_ops = {
+static const struct tty_operations serial_ops = {
        .open = rs_open,
        .close = rs_close,
        .write = rs_write,
index c1c6728..f85b4eb 100644 (file)
@@ -5205,7 +5205,7 @@ done:
     extra ports are ignored.
  */
 
-static struct tty_operations cy_ops = {
+static const struct tty_operations cy_ops = {
     .open = cy_open,
     .close = cy_close,
     .write = cy_write,
index 86d290e..3baa2ab 100644 (file)
@@ -1125,7 +1125,7 @@ static void __exit epca_module_exit(void)
 
 module_exit(epca_module_exit);
 
-static struct tty_operations pc_ops = {
+static const struct tty_operations pc_ops = {
        .open = pc_open,
        .close = pc_close,
        .write = pc_write,
index afcd83d..05788c7 100644 (file)
@@ -2376,7 +2376,7 @@ static inline int autoconfig(struct esp_struct * info)
        return (port_detected);
 }
 
-static struct tty_operations esp_ops = {
+static const struct tty_operations esp_ops = {
        .open = esp_open,
        .close = rs_close,
        .write = rs_write,
index a76d2c4..4053d1c 100644 (file)
@@ -696,7 +696,7 @@ int khvcd(void *unused)
        return 0;
 }
 
-static struct tty_operations hvc_ops = {
+static const struct tty_operations hvc_ops = {
        .open = hvc_open,
        .close = hvc_close,
        .write = hvc_write,
index 4589ff3..0b89bcd 100644 (file)
@@ -1306,7 +1306,7 @@ static int hvcs_chars_in_buffer(struct tty_struct *tty)
        return hvcsd->chars_in_buffer;
 }
 
-static struct tty_operations hvcs_ops = {
+static const struct tty_operations hvcs_ops = {
        .open = hvcs_open,
        .close = hvcs_close,
        .hangup = hvcs_hangup,
index a89a95f..c07dc58 100644 (file)
@@ -1130,7 +1130,7 @@ static int hvsi_tiocmset(struct tty_struct *tty, struct file *file,
 }
 
 
-static struct tty_operations hvsi_ops = {
+static const struct tty_operations hvsi_ops = {
        .open = hvsi_open,
        .close = hvsi_close,
        .write = hvsi_write,
index 331f447..4828bc9 100644 (file)
@@ -458,7 +458,7 @@ cleanup_module(void)
 }
 #endif /* MODULE */
 
-static struct tty_operations ip2_ops = {
+static const struct tty_operations ip2_ops = {
        .open            = ip2_open,
        .close           = ip2_close,
        .write           = ip2_write,
index 2e1da63..ea2bbf8 100644 (file)
@@ -1550,7 +1550,7 @@ static void isicom_unregister_ioregion(struct pci_dev *pdev)
        board->base = 0;
 }
 
-static struct tty_operations isicom_ops = {
+static const struct tty_operations isicom_ops = {
        .open                   = isicom_open,
        .close                  = isicom_close,
        .write                  = isicom_write,
index 6b4d82a..d6e0315 100644 (file)
@@ -4636,7 +4636,7 @@ static int stli_memioctl(struct inode *ip, struct file *fp, unsigned int cmd, un
        return rc;
 }
 
-static struct tty_operations stli_ops = {
+static const struct tty_operations stli_ops = {
        .open = stli_open,
        .close = stli_close,
        .write = stli_write,
index 3e90aac..99fb070 100644 (file)
@@ -108,7 +108,11 @@ const int NR_TYPES = ARRAY_SIZE(max_vals);
 struct kbd_struct kbd_table[MAX_NR_CONSOLES];
 static struct kbd_struct *kbd = kbd_table;
 
-int spawnpid, spawnsig;
+struct vt_spawn_console vt_spawn_con = {
+       .lock = SPIN_LOCK_UNLOCKED,
+       .pid  = NULL,
+       .sig  = 0,
+};
 
 /*
  * Variables exported for vt.c
@@ -578,9 +582,13 @@ static void fn_compose(struct vc_data *vc, struct pt_regs *regs)
 
 static void fn_spawn_con(struct vc_data *vc, struct pt_regs *regs)
 {
-       if (spawnpid)
-               if (kill_proc(spawnpid, spawnsig, 1))
-                       spawnpid = 0;
+       spin_lock(&vt_spawn_con.lock);
+       if (vt_spawn_con.pid)
+               if (kill_pid(vt_spawn_con.pid, vt_spawn_con.sig, 1)) {
+                       put_pid(vt_spawn_con.pid);
+                       vt_spawn_con.pid = NULL;
+               }
+       spin_unlock(&vt_spawn_con.lock);
 }
 
 static void fn_SAK(struct vc_data *vc, struct pt_regs *regs)
index c1a6d3c..b401383 100644 (file)
@@ -281,7 +281,7 @@ static int moxa_get_serial_info(struct moxa_str *, struct serial_struct __user *
 static int moxa_set_serial_info(struct moxa_str *, struct serial_struct __user *);
 static void MoxaSetFifo(int port, int enable);
 
-static struct tty_operations moxa_ops = {
+static const struct tty_operations moxa_ops = {
        .open = moxa_open,
        .close = moxa_close,
        .write = moxa_write,
index 27a6537..8253fca 100644 (file)
@@ -453,7 +453,7 @@ static int CheckIsMoxaMust(int io)
 
 /* above is modified by Victor Yu. 08-15-2002 */
 
-static struct tty_operations mxser_ops = {
+static const struct tty_operations mxser_ops = {
        .open = mxser_open,
        .close = mxser_close,
        .write = mxser_write,
index 7c57ebf..ea1aa77 100644 (file)
@@ -127,9 +127,8 @@ static void button_consume_callbacks (int bpcount)
 static void button_sequence_finished (unsigned long parameters)
 {
 #ifdef CONFIG_NWBUTTON_REBOOT          /* Reboot using button is enabled */
-       if (button_press_count == reboot_count) {
-               kill_proc (1, SIGINT, 1);       /* Ask init to reboot us */
-       }
+       if (button_press_count == reboot_count)
+               kill_cad_pid(SIGINT, 1);        /* Ask init to reboot us */
 #endif /* CONFIG_NWBUTTON_REBOOT */
        button_consume_callbacks (button_press_count);
        bcount = sprintf (button_output_buffer, "%d\n", button_press_count);
index 00f574c..dd845cb 100644 (file)
@@ -3010,7 +3010,7 @@ static struct pcmcia_driver mgslpc_driver = {
        .resume         = mgslpc_resume,
 };
 
-static struct tty_operations mgslpc_ops = {
+static const struct tty_operations mgslpc_ops = {
        .open = mgslpc_open,
        .close = mgslpc_close,
        .write = mgslpc_write,
index 34dd4c3..80d3eed 100644 (file)
@@ -224,7 +224,7 @@ static void pty_set_termios(struct tty_struct *tty, struct termios *old_termios)
         tty->termios->c_cflag |= (CS8 | CREAD);
 }
 
-static struct tty_operations pty_ops = {
+static const struct tty_operations pty_ops = {
        .open = pty_open,
        .close = pty_close,
        .write = pty_write,
index b430a12..07f47a0 100644 (file)
@@ -889,8 +889,8 @@ static void init_std_data(struct entropy_store *r)
 
        do_gettimeofday(&tv);
        add_entropy_words(r, (__u32 *)&tv, sizeof(tv)/4);
-       add_entropy_words(r, (__u32 *)&system_utsname,
-                         sizeof(system_utsname)/4);
+       add_entropy_words(r, (__u32 *)utsname(),
+                         sizeof(*(utsname()))/4);
 }
 
 static int __init rand_initialize(void)
index 3fa80aa..202a3b0 100644 (file)
@@ -727,7 +727,7 @@ static struct vpd_prom *get_VPD_PROM(struct Host *hp)
        return &vpdp;
 }
 
-static struct tty_operations rio_ops = {
+static const struct tty_operations rio_ops = {
        .open = riotopen,
        .close = gs_close,
        .write = gs_write,
index 06b9f78..214d850 100644 (file)
@@ -1583,7 +1583,7 @@ static void do_softint(void *private_)
        }
 }
 
-static struct tty_operations riscom_ops = {
+static const struct tty_operations riscom_ops = {
        .open  = rc_open,
        .close = rc_close,
        .write = rc_write,
index 0ac1318..bac8005 100644 (file)
@@ -2334,7 +2334,7 @@ static int __init init_ISA(int i)
        return (1);
 }
 
-static struct tty_operations rocket_ops = {
+static const struct tty_operations rocket_ops = {
        .open = rp_open,
        .close = rp_close,
        .write = rp_write,
index 510bd3e..65c751d 100644 (file)
@@ -661,7 +661,7 @@ static void a2232_init_portstructs(void)
        }
 }
 
-static struct tty_operations a2232_ops = {
+static const struct tty_operations a2232_ops = {
        .open = a2232_open,
        .close = gs_close,
        .write = gs_write,
index 21a710c..b4ea126 100644 (file)
@@ -2158,7 +2158,7 @@ mvme167_serial_console_setup(int cflag)
                                        rcor >> 5, rbpr);
 } /* serial_console_init */
 
-static struct tty_operations cy_ops = {
+static const struct tty_operations cy_ops = {
        .open = cy_open,
        .close = cy_close,
        .write = cy_write,
index d12d4f6..864854c 100644 (file)
@@ -220,7 +220,7 @@ scdrv_dispatch_event(char *event, int len)
                               " Sending SIGPWR to init...\n");
 
                /* give a SIGPWR signal to init proc */
-               kill_proc(1, SIGPWR, 0);
+               kill_cad_pid(SIGPWR, 0);
        } else {
                /* print to system log */
                printk("%s|$(0x%x)%s\n", severity, esp_code, desc);
index f52c7c3..902c48d 100644 (file)
@@ -2363,7 +2363,7 @@ static void do_softint(void *private_)
        func_exit();
 }
 
-static struct tty_operations sx_ops = {
+static const struct tty_operations sx_ops = {
        .open  = sx_open,
        .close = sx_close,
        .write = sx_write,
index 3beb220..bd71153 100644 (file)
@@ -2993,7 +2993,7 @@ static int stl_memioctl(struct inode *ip, struct file *fp, unsigned int cmd, uns
        return(rc);
 }
 
-static struct tty_operations stl_ops = {
+static const struct tty_operations stl_ops = {
        .open = stl_open,
        .close = stl_close,
        .write = stl_write,
index e1cd2bc..57e31e5 100644 (file)
@@ -2226,7 +2226,7 @@ static int probe_si (struct sx_board *board)
        return 1;
 }
 
-static struct tty_operations sx_ops = {
+static const struct tty_operations sx_ops = {
        .break_ctl = sx_break,
        .open   = sx_open,
        .close = gs_close,
index 78b1b1a..244dc30 100644 (file)
@@ -4360,7 +4360,7 @@ static struct mgsl_struct* mgsl_allocate_device(void)
 
 }      /* end of mgsl_allocate_device()*/
 
-static struct tty_operations mgsl_ops = {
+static const struct tty_operations mgsl_ops = {
        .open = mgsl_open,
        .close = mgsl_close,
        .write = mgsl_write,
index 78bc851..bdc7cb2 100644 (file)
@@ -3441,7 +3441,7 @@ static void __devexit remove_one(struct pci_dev *dev)
 {
 }
 
-static struct tty_operations ops = {
+static const struct tty_operations ops = {
        .open = open,
        .close = close,
        .write = write,
index 66f3754..6eb75dc 100644 (file)
@@ -3929,7 +3929,7 @@ void device_init(int adapter_num, struct pci_dev *pdev)
        }
 }
 
-static struct tty_operations ops = {
+static const struct tty_operations ops = {
        .open = open,
        .close = close,
        .write = write,
index 3337417..e90ea39 100644 (file)
@@ -3680,7 +3680,8 @@ void put_tty_driver(struct tty_driver *driver)
        kfree(driver);
 }
 
-void tty_set_operations(struct tty_driver *driver, struct tty_operations *op)
+void tty_set_operations(struct tty_driver *driver,
+                       const struct tty_operations *op)
 {
        driver->open = op->open;
        driver->close = op->close;
index f3efeaf..a362ee9 100644 (file)
@@ -1047,7 +1047,7 @@ static int send_open(HvLpIndex remoteLp, void *sem)
                        0, 0, 0, 0);
 }
 
-static struct tty_operations serial_ops = {
+static const struct tty_operations serial_ops = {
        .open = viotty_open,
        .close = viotty_close,
        .write = viotty_write,
index bfe5ea9..c2ca31e 100644 (file)
@@ -113,7 +113,7 @@ static struct real_driver scc_real_driver = {
 };
 
 
-static struct tty_operations scc_ops = {
+static const struct tty_operations scc_ops = {
        .open   = scc_open,
        .close = gs_close,
        .write = gs_write,
index fb75da9..ec0c070 100644 (file)
@@ -903,6 +903,7 @@ void vc_deallocate(unsigned int currcons)
        if (vc_cons_allocated(currcons)) {
                struct vc_data *vc = vc_cons[currcons].d;
                vc->vc_sw->con_deinit(vc);
+               put_pid(vc->vt_pid);
                module_put(vc->vc_sw->owner);
                if (vc->vc_kmalloced)
                        kfree(vc->vc_screenbuf);
@@ -2674,7 +2675,7 @@ static int __init con_init(void)
 }
 console_initcall(con_init);
 
-static struct tty_operations con_ops = {
+static const struct tty_operations con_ops = {
        .open = con_open,
        .close = con_close,
        .write = con_write,
index a53e382..ac5d60e 100644 (file)
@@ -645,13 +645,16 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
         */
        case KDSIGACCEPT:
        {
-               extern int spawnpid, spawnsig;
                if (!perm || !capable(CAP_KILL))
                  return -EPERM;
                if (!valid_signal(arg) || arg < 1 || arg == SIGKILL)
                  return -EINVAL;
-               spawnpid = current->pid;
-               spawnsig = arg;
+
+               spin_lock_irq(&vt_spawn_con.lock);
+               put_pid(vt_spawn_con.pid);
+               vt_spawn_con.pid = get_pid(task_pid(current));
+               vt_spawn_con.sig = arg;
+               spin_unlock_irq(&vt_spawn_con.lock);
                return 0;
        }
 
@@ -669,7 +672,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
                vc->vt_mode = tmp;
                /* the frsig is ignored, so we set it to 0 */
                vc->vt_mode.frsig = 0;
-               vc->vt_pid = current->pid;
+               put_pid(xchg(&vc->vt_pid, get_pid(task_pid(current))));
                /* no switch is required -- saw@shade.msu.ru */
                vc->vt_newvt = -1;
                release_console_sem();
@@ -1060,7 +1063,7 @@ void reset_vc(struct vc_data *vc)
        vc->vt_mode.relsig = 0;
        vc->vt_mode.acqsig = 0;
        vc->vt_mode.frsig = 0;
-       vc->vt_pid = -1;
+       put_pid(xchg(&vc->vt_pid, NULL));
        vc->vt_newvt = -1;
        if (!in_interrupt())    /* Via keyboard.c:SAK() - akpm */
                reset_palette(vc);
@@ -1111,7 +1114,7 @@ static void complete_change_console(struct vc_data *vc)
                 * tell us if the process has gone or something else
                 * is awry
                 */
-               if (kill_proc(vc->vt_pid, vc->vt_mode.acqsig, 1) != 0) {
+               if (kill_pid(vc->vt_pid, vc->vt_mode.acqsig, 1) != 0) {
                /*
                 * The controlling process has died, so we revert back to
                 * normal operation. In this case, we'll also change back
@@ -1171,7 +1174,7 @@ void change_console(struct vc_data *new_vc)
                 * tell us if the process has gone or something else
                 * is awry
                 */
-               if (kill_proc(vc->vt_pid, vc->vt_mode.relsig, 1) == 0) {
+               if (kill_pid(vc->vt_pid, vc->vt_mode.relsig, 1) == 0) {
                        /*
                         * It worked. Mark the vt to switch to and
                         * return. The process needs to send us a
index 42eaed8..a545610 100644 (file)
@@ -1601,7 +1601,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
        dev->mmap = ipath_mmap;
 
        snprintf(dev->node_desc, sizeof(dev->node_desc),
-                IPATH_IDSTR " %s", system_utsname.nodename);
+                IPATH_IDSTR " %s", init_utsname()->nodename);
 
        ret = ib_register_device(dev);
        if (ret)
index 669f763..11844bb 100644 (file)
@@ -1298,7 +1298,7 @@ static int capinc_tty_read_proc(char *page, char **start, off_t off,
 
 static struct tty_driver *capinc_tty_driver;
 
-static struct tty_operations capinc_ops = {
+static const struct tty_operations capinc_ops = {
        .open = capinc_tty_open,
        .close = capinc_tty_close,
        .write = capinc_tty_write,
index bd2e426..596f3ae 100644 (file)
@@ -134,7 +134,7 @@ static int  if_tiocmset(struct tty_struct *tty, struct file *file,
 static int  if_write(struct tty_struct *tty,
                     const unsigned char *buf, int count);
 
-static struct tty_operations if_ops = {
+static const struct tty_operations if_ops = {
        .open =                 if_open,
        .close =                if_close,
        .ioctl =                if_ioctl,
index 9ae3a7f..9ad840e 100644 (file)
@@ -83,5 +83,6 @@ void gigaset_init_dev_sysfs(struct cardstate *cs)
                return;
 
        gig_dbg(DEBUG_INIT, "setting up sysfs");
-       class_device_create_file(cs->class, &class_device_attr_cidmode);
+       if (class_device_create_file(cs->class, &class_device_attr_cidmode))
+               dev_err(cs->dev, "could not create sysfs attribute\n");
 }
index 75920aa..2f9d511 100644 (file)
@@ -1316,7 +1316,18 @@ void dlogframe(struct IsdnCardState *cs, struct sk_buff *skb, int dir);
 void iecpy(u_char * dest, u_char * iestart, int ieoffset);
 #endif /* __KERNEL__ */
 
-#define HZDELAY(jiffs) {int tout = jiffs; while (tout--) udelay(1000000/HZ);}
+/*
+ * Busywait delay for `jiffs' jiffies
+ */
+#define HZDELAY(jiffs) do {                                    \
+               int tout = jiffs;                               \
+                                                               \
+               while (tout--) {                                \
+                       int loops = USEC_PER_SEC / HZ;          \
+                       while (loops--)                         \
+                               udelay(1);                      \
+               }                                               \
+       } while (0)
 
 int ll_run(struct IsdnCardState *cs, int addfeatures);
 int CallcNew(void);
index 9ab66e8..2b91bb0 100644 (file)
@@ -1860,7 +1860,7 @@ modem_write_profile(atemu * m)
                send_sig(SIGIO, dev->profd, 1);
 }
 
-static struct tty_operations modem_ops = {
+static const struct tty_operations modem_ops = {
         .open = isdn_tty_open,
        .close = isdn_tty_close,
        .write = isdn_tty_write,
index c972fe0..9878183 100644 (file)
@@ -26,7 +26,6 @@
 
 
 
-#define __KERNEL_SYSCALLS__
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
index 19c2b85..c1bf1fb 100644 (file)
@@ -3,5 +3,6 @@
 #
 obj- := misc.o # Dummy rule to force built-in.o to be made
 
-obj-$(CONFIG_IBM_ASM)  += ibmasm/
+obj-$(CONFIG_IBM_ASM)          += ibmasm/
 obj-$(CONFIG_HDPU_FEATURES)    += hdpuftrs/
+obj-$(CONFIG_LKDTM)            += lkdtm.o
diff --git a/drivers/misc/lkdtm.c b/drivers/misc/lkdtm.c
new file mode 100644 (file)
index 0000000..e689ee9
--- /dev/null
@@ -0,0 +1,342 @@
+/*
+ * Kprobe module for testing crash dumps
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author: Ankita Garg <ankita@in.ibm.com>
+ *
+ * This module induces system failures at predefined crashpoints to
+ * evaluate the reliability of crash dumps obtained using different dumping
+ * solutions.
+ *
+ * It is adapted from the Linux Kernel Dump Test Tool by
+ * Fernando Luis Vazquez Cao <http://lkdtt.sourceforge.net>
+ *
+ * Usage :  insmod lkdtm.ko [recur_count={>0}] cpoint_name=<> cpoint_type=<>
+ *                                                     [cpoint_count={>0}]
+ *
+ * recur_count : Recursion level for the stack overflow test. Default is 10.
+ *
+ * cpoint_name : Crash point where the kernel is to be crashed. It can be
+ *              one of INT_HARDWARE_ENTRY, INT_HW_IRQ_EN, INT_TASKLET_ENTRY,
+ *              FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_DISPATCH_CMD,
+ *              IDE_CORE_CP
+ *
+ * cpoint_type : Indicates the action to be taken on hitting the crash point.
+ *              It can be one of PANIC, BUG, EXCEPTION, LOOP, OVERFLOW
+ *
+ * cpoint_count : Indicates the number of times the crash point is to be hit
+ *               to trigger an action. The default is 10.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kprobes.h>
+#include <linux/kallsyms.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <scsi/scsi_cmnd.h>
+
+#ifdef CONFIG_IDE
+#include <linux/ide.h>
+#endif
+
+#define NUM_CPOINTS 8
+#define NUM_CPOINT_TYPES 5
+#define DEFAULT_COUNT 10
+#define REC_NUM_DEFAULT 10
+
+enum cname {
+       INVALID,
+       INT_HARDWARE_ENTRY,
+       INT_HW_IRQ_EN,
+       INT_TASKLET_ENTRY,
+       FS_DEVRW,
+       MEM_SWAPOUT,
+       TIMERADD,
+       SCSI_DISPATCH_CMD,
+       IDE_CORE_CP
+};
+
+enum ctype {
+       NONE,
+       PANIC,
+       BUG,
+       EXCEPTION,
+       LOOP,
+       OVERFLOW
+};
+
+static char* cp_name[] = {
+       "INT_HARDWARE_ENTRY",
+       "INT_HW_IRQ_EN",
+       "INT_TASKLET_ENTRY",
+       "FS_DEVRW",
+       "MEM_SWAPOUT",
+       "TIMERADD",
+       "SCSI_DISPATCH_CMD",
+       "IDE_CORE_CP"
+};
+
+static char* cp_type[] = {
+       "PANIC",
+       "BUG",
+       "EXCEPTION",
+       "LOOP",
+       "OVERFLOW"
+};
+
+static struct jprobe lkdtm;
+
+static int lkdtm_parse_commandline(void);
+static void lkdtm_handler(void);
+
+static char* cpoint_name = INVALID;
+static char* cpoint_type = NONE;
+static int cpoint_count = DEFAULT_COUNT;
+static int recur_count = REC_NUM_DEFAULT;
+
+static enum cname cpoint = INVALID;
+static enum ctype cptype = NONE;
+static int count = DEFAULT_COUNT;
+
+module_param(recur_count, int, 0644);
+MODULE_PARM_DESC(recur_count, "Recurcion level for the stack overflow test,\
+                                default is 10");
+module_param(cpoint_name, charp, 0644);
+MODULE_PARM_DESC(cpoint_name, "Crash Point, where kernel is to be crashed");
+module_param(cpoint_type, charp, 06444);
+MODULE_PARM_DESC(cpoint_type, "Crash Point Type, action to be taken on\
+                               hitting the crash point");
+module_param(cpoint_count, int, 06444);
+MODULE_PARM_DESC(cpoint_count, "Crash Point Count, number of times the \
+                               crash point is to be hit to trigger action");
+
+unsigned int jp_do_irq(unsigned int irq, struct pt_regs *regs)
+{
+       lkdtm_handler();
+       jprobe_return();
+       return 0;
+}
+
+irqreturn_t jp_handle_irq_event(unsigned int irq, struct pt_regs *regs,
+                       struct irqaction *action)
+{
+       lkdtm_handler();
+       jprobe_return();
+       return 0;
+}
+
+void jp_tasklet_action(struct softirq_action *a)
+{
+       lkdtm_handler();
+       jprobe_return();
+}
+
+void jp_ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
+{
+       lkdtm_handler();
+       jprobe_return();
+}
+
+struct scan_control;
+
+unsigned long jp_shrink_page_list(struct list_head *page_list,
+                                        struct scan_control *sc)
+{
+       lkdtm_handler();
+       jprobe_return();
+       return 0;
+}
+
+int jp_hrtimer_start(struct hrtimer *timer, ktime_t tim,
+                               const enum hrtimer_mode mode)
+{
+       lkdtm_handler();
+       jprobe_return();
+       return 0;
+}
+
+int jp_scsi_dispatch_cmd(struct scsi_cmnd *cmd)
+{
+       lkdtm_handler();
+       jprobe_return();
+       return 0;
+}
+
+#ifdef CONFIG_IDE
+int jp_generic_ide_ioctl(ide_drive_t *drive, struct file *file,
+                       struct block_device *bdev, unsigned int cmd,
+                       unsigned long arg)
+{
+       lkdtm_handler();
+       jprobe_return();
+       return 0;
+}
+#endif
+
+static int lkdtm_parse_commandline(void)
+{
+       int i;
+
+       if (cpoint_name == INVALID || cpoint_type == NONE ||
+                                       cpoint_count < 1 || recur_count < 1)
+               return -EINVAL;
+
+       for (i = 0; i < NUM_CPOINTS; ++i) {
+               if (!strcmp(cpoint_name, cp_name[i])) {
+                       cpoint = i + 1;
+                       break;
+               }
+       }
+
+       for (i = 0; i < NUM_CPOINT_TYPES; ++i) {
+               if (!strcmp(cpoint_type, cp_type[i])) {
+                       cptype = i + 1;
+                       break;
+               }
+       }
+
+       if (cpoint == INVALID || cptype == NONE)
+                return -EINVAL;
+
+       count = cpoint_count;
+
+       return 0;
+}
+
+static int recursive_loop(int a)
+{
+       char buf[1024];
+
+       memset(buf,0xFF,1024);
+       recur_count--;
+       if (!recur_count)
+               return 0;
+       else
+               return recursive_loop(a);
+}
+
+void lkdtm_handler(void)
+{
+       printk(KERN_INFO "lkdtm : Crash point %s of type %s hit\n",
+                                        cpoint_name, cpoint_type);
+       --count;
+
+       if (count == 0) {
+               switch (cptype) {
+               case NONE:
+                       break;
+               case PANIC:
+                       printk(KERN_INFO "lkdtm : PANIC\n");
+                       panic("dumptest");
+                       break;
+               case BUG:
+                       printk(KERN_INFO "lkdtm : BUG\n");
+                       BUG();
+                       break;
+               case EXCEPTION:
+                       printk(KERN_INFO "lkdtm : EXCEPTION\n");
+                       *((int *) 0) = 0;
+                       break;
+               case LOOP:
+                       printk(KERN_INFO "lkdtm : LOOP\n");
+                       for (;;);
+                       break;
+               case OVERFLOW:
+                       printk(KERN_INFO "lkdtm : OVERFLOW\n");
+                       (void) recursive_loop(0);
+                       break;
+               default:
+                       break;
+               }
+               count = cpoint_count;
+       }
+}
+
+int lkdtm_module_init(void)
+{
+       int ret;
+
+       if (lkdtm_parse_commandline() == -EINVAL) {
+               printk(KERN_INFO "lkdtm : Invalid command\n");
+               return -EINVAL;
+       }
+
+       switch (cpoint) {
+       case INT_HARDWARE_ENTRY:
+               lkdtm.kp.symbol_name = "__do_IRQ";
+               lkdtm.entry = (kprobe_opcode_t*) jp_do_irq;
+               break;
+       case INT_HW_IRQ_EN:
+               lkdtm.kp.symbol_name = "handle_IRQ_event";
+               lkdtm.entry = (kprobe_opcode_t*) jp_handle_irq_event;
+               break;
+       case INT_TASKLET_ENTRY:
+               lkdtm.kp.symbol_name = "tasklet_action";
+               lkdtm.entry = (kprobe_opcode_t*) jp_tasklet_action;
+               break;
+       case FS_DEVRW:
+               lkdtm.kp.symbol_name = "ll_rw_block";
+               lkdtm.entry = (kprobe_opcode_t*) jp_ll_rw_block;
+               break;
+       case MEM_SWAPOUT:
+               lkdtm.kp.symbol_name = "shrink_page_list";
+               lkdtm.entry = (kprobe_opcode_t*) jp_shrink_page_list;
+               break;
+       case TIMERADD:
+               lkdtm.kp.symbol_name = "hrtimer_start";
+               lkdtm.entry = (kprobe_opcode_t*) jp_hrtimer_start;
+               break;
+       case SCSI_DISPATCH_CMD:
+               lkdtm.kp.symbol_name = "scsi_dispatch_cmd";
+               lkdtm.entry = (kprobe_opcode_t*) jp_scsi_dispatch_cmd;
+               break;
+       case IDE_CORE_CP:
+#ifdef CONFIG_IDE
+               lkdtm.kp.symbol_name = "generic_ide_ioctl";
+               lkdtm.entry = (kprobe_opcode_t*) jp_generic_ide_ioctl;
+#else
+               printk(KERN_INFO "lkdtm : Crash point not available\n");
+#endif
+               break;
+       default:
+               printk(KERN_INFO "lkdtm : Invalid Crash Point\n");
+               break;
+       }
+
+       if ((ret = register_jprobe(&lkdtm)) < 0) {
+                printk(KERN_INFO "lkdtm : Couldn't register jprobe\n");
+                return ret;
+       }
+
+       printk(KERN_INFO "lkdtm : Crash point %s of type %s registered\n",
+                                               cpoint_name, cpoint_type);
+       return 0;
+}
+
+void lkdtm_module_exit(void)
+{
+        unregister_jprobe(&lkdtm);
+        printk(KERN_INFO "lkdtm : Crash point unregistered\n");
+}
+
+module_init(lkdtm_module_init);
+module_exit(lkdtm_module_exit);
+
+MODULE_LICENSE("GPL");
index bc0face..151a2e1 100644 (file)
@@ -697,7 +697,7 @@ static int tun_chr_fasync(int fd, struct file *file, int on)
                return ret;
 
        if (on) {
-               ret = f_setown(file, current->pid, 0);
+               ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0);
                if (ret)
                        return ret;
                tun->flags |= TUN_FASYNC;
index 9793780..eddfa87 100644 (file)
@@ -150,7 +150,6 @@ that only one external action is invoked at a time.
 #include <linux/skbuff.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
-#define __KERNEL_SYSCALLS__
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
index bf00fa2..8dac2ba 100644 (file)
@@ -684,7 +684,7 @@ int __init led_init(void)
        int ret;
 
        snprintf(lcd_text_default, sizeof(lcd_text_default),
-               "Linux %s", system_utsname.release);
+               "Linux %s", init_utsname()->release);
 
        /* Work around the buggy PDC of KittyHawk-machines */
        switch (CPU_HVERSION) {
index fad5a33..4a9f025 100644 (file)
@@ -84,8 +84,7 @@
 
 static void deferred_poweroff(void *dummy)
 {
-       extern int cad_pid;     /* from kernel/sys.c */
-       if (kill_proc(cad_pid, SIGINT, 1)) {
+       if (kill_cad_pid(SIGINT, 1)) {
                /* just in case killing init process failed */
                machine_power_off();
        }
index 2fa566f..d7de175 100644 (file)
@@ -1103,7 +1103,7 @@ tty3215_start(struct tty_struct *tty)
        }
 }
 
-static struct tty_operations tty3215_ops = {
+static const struct tty_operations tty3215_ops = {
        .open = tty3215_open,
        .close = tty3215_close,
        .write = tty3215_write,
index b4557fa..78f8bda 100644 (file)
@@ -27,7 +27,7 @@ struct raw3270_fn fs3270_fn;
 
 struct fs3270 {
        struct raw3270_view view;
-       pid_t fs_pid;                   /* Pid of controlling program. */
+       struct pid *fs_pid;             /* Pid of controlling program. */
        int read_command;               /* ccw command to use for reads. */
        int write_command;              /* ccw command to use for writes. */
        int attention;                  /* Got attention. */
@@ -102,7 +102,7 @@ fs3270_restore_callback(struct raw3270_request *rq, void *data)
        fp = (struct fs3270 *) rq->view;
        if (rq->rc != 0 || rq->rescnt != 0) {
                if (fp->fs_pid)
-                       kill_proc(fp->fs_pid, SIGHUP, 1);
+                       kill_pid(fp->fs_pid, SIGHUP, 1);
        }
        fp->rdbuf_size = 0;
        raw3270_request_reset(rq);
@@ -173,7 +173,7 @@ fs3270_save_callback(struct raw3270_request *rq, void *data)
         */
        if (rq->rc != 0 || rq->rescnt == 0) {
                if (fp->fs_pid)
-                       kill_proc(fp->fs_pid, SIGHUP, 1);
+                       kill_pid(fp->fs_pid, SIGHUP, 1);
                fp->rdbuf_size = 0;
        } else
                fp->rdbuf_size = fp->rdbuf->size - rq->rescnt;
@@ -442,7 +442,7 @@ fs3270_open(struct inode *inode, struct file *filp)
                return PTR_ERR(fp);
 
        init_waitqueue_head(&fp->wait);
-       fp->fs_pid = current->pid;
+       fp->fs_pid = get_pid(task_pid(current));
        rc = raw3270_add_view(&fp->view, &fs3270_fn, minor);
        if (rc) {
                fs3270_free_view(&fp->view);
@@ -480,7 +480,8 @@ fs3270_close(struct inode *inode, struct file *filp)
        fp = filp->private_data;
        filp->private_data = NULL;
        if (fp) {
-               fp->fs_pid = 0;
+               put_pid(fp->fs_pid);
+               fp->fs_pid = NULL;
                raw3270_reset(&fp->view);
                raw3270_put_view(&fp->view);
                raw3270_del_view(&fp->view);
index f6cf902..6f43e04 100644 (file)
@@ -711,7 +711,7 @@ static struct sclp_register sclp_input_event =
        .receiver_fn = sclp_tty_receiver
 };
 
-static struct tty_operations sclp_ops = {
+static const struct tty_operations sclp_ops = {
        .open = sclp_tty_open,
        .close = sclp_tty_close,
        .write = sclp_tty_write,
index 54fba6f..723bf41 100644 (file)
@@ -655,7 +655,7 @@ __sclp_vt220_init(int early)
        return 0;
 }
 
-static struct tty_operations sclp_vt220_ops = {
+static const struct tty_operations sclp_vt220_ops = {
        .open = sclp_vt220_open,
        .close = sclp_vt220_close,
        .write = sclp_vt220_write,
index 06e2eee..4717c36 100644 (file)
@@ -1737,7 +1737,7 @@ tty3270_ioctl(struct tty_struct *tty, struct file *file,
        return kbd_ioctl(tp->kbd, file, cmd, arg);
 }
 
-static struct tty_operations tty3270_ops = {
+static const struct tty_operations tty3270_ops = {
        .open = tty3270_open,
        .close = tty3270_close,
        .write = tty3270_write,
index 479364d..e088b5e 100644 (file)
@@ -208,7 +208,7 @@ s390_handle_mcck(void)
                 */
                __ctl_clear_bit(14, 24);        /* Disable WARNING MCH */
                if (xchg(&mchchk_wng_posted, 1) == 0)
-                       kill_proc(1, SIGPWR, 1);
+                       kill_cad_pid(SIGPWR, 1);
        }
 #endif
 
index 4fdb2c9..a305d40 100644 (file)
@@ -2187,7 +2187,7 @@ static void do_softint(void *private_)
 #endif
 }
 
-static struct tty_operations aurora_ops = {
+static const struct tty_operations aurora_ops = {
        .open  = aurora_open,
        .close = aurora_close,
        .write = aurora_write,
index 1cc706e..d27e4f6 100644 (file)
@@ -4,9 +4,6 @@
  * Copyright (C) 2001 David S. Miller (davem@redhat.com)
  */
 
-#define __KERNEL_SYSCALLS__
-static int errno;
-
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
@@ -200,7 +197,7 @@ static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
        printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
 
        shutting_down = 1;
-       if (execve("/sbin/shutdown", argv, envp) < 0)
+       if (kernel_execve("/sbin/shutdown", argv, envp) < 0)
                printk(KERN_CRIT "envctrl: shutdown execution failed\n");
 }
 
index 063e676..728a133 100644 (file)
@@ -19,9 +19,6 @@
  *              Daniele Bellucci <bellucda@tiscali.it>
  */
 
-#define __KERNEL_SYSCALLS__
-static int errno;
-
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/kthread.h>
@@ -976,13 +973,15 @@ static void envctrl_do_shutdown(void)
                "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
        char *argv[] = { 
                "/sbin/shutdown", "-h", "now", NULL };  
+       int ret;
 
        if (inprog != 0)
                return;
 
        inprog = 1;
        printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n");
-       if (0 > execve("/sbin/shutdown", argv, envp)) {
+       ret = kernel_execve("/sbin/shutdown", argv, envp);
+       if (ret < 0) {
                printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n"); 
                inprog = 0;  /* unlikely to succeed, but we could try again */
        }
index ae41064..1b53afb 100644 (file)
@@ -933,8 +933,8 @@ lpfc_fdmi_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode)
                        ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) rh + size);
                        ae->ad.bits.AttrType = be16_to_cpu(OS_NAME_VERSION);
                        sprintf(ae->un.OsNameVersion, "%s %s %s",
-                               system_utsname.sysname, system_utsname.release,
-                               system_utsname.version);
+                               init_utsname()->sysname, init_utsname()->release,
+                               init_utsname()->version);
                        len = strlen(ae->un.OsNameVersion);
                        len += (len & 3) ? (4 - (len & 3)) : 4;
                        ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + len);
@@ -1052,7 +1052,7 @@ lpfc_fdmi_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode)
                                                          size);
                                ae->ad.bits.AttrType = be16_to_cpu(HOST_NAME);
                                sprintf(ae->un.HostName, "%s",
-                                       system_utsname.nodename);
+                                       init_utsname()->nodename);
                                len = strlen(ae->un.HostName);
                                len += (len & 3) ? (4 - (len & 3)) : 4;
                                ae->ad.bits.AttrLen =
@@ -1140,7 +1140,7 @@ lpfc_fdmi_tmo_handler(struct lpfc_hba *phba)
 
        ndlp = lpfc_findnode_did(phba, NLP_SEARCH_ALL, FDMI_DID);
        if (ndlp) {
-               if (system_utsname.nodename[0] != '\0') {
+               if (init_utsname()->nodename[0] != '\0') {
                        lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DHBA);
                } else {
                        mod_timer(&phba->fc_fdmitmo, jiffies + HZ * 60);
index 993a702..bac853c 100644 (file)
@@ -1378,7 +1378,7 @@ void startup_console(void)
 #endif /* CONFIG_PM_LEGACY */
 
 
-static struct tty_operations rs_ops = {
+static const struct tty_operations rs_ops = {
        .open = rs_open,
        .close = rs_close,
        .write = rs_write,
index e80e70e..1b299e8 100644 (file)
@@ -2424,7 +2424,7 @@ long console_360_init(long kmem_start, long kmem_end)
 */
 static int     baud_idx;
 
-static struct tty_operations rs_360_ops = {
+static const struct tty_operations rs_360_ops = {
        .owner = THIS_MODULE,
        .open = rs_360_open,
        .close = rs_360_close,
index cabd048..9851d9e 100644 (file)
@@ -4825,7 +4825,7 @@ show_serial_version(void)
 
 /* rs_init inits the driver at boot (using the module_init chain) */
 
-static struct tty_operations rs_ops = {
+static const struct tty_operations rs_ops = {
        .open = rs_open,
        .close = rs_close,
        .write = rs_write,
index 832abd3..00d7859 100644 (file)
@@ -1666,7 +1666,7 @@ static void show_serial_version(void)
        printk(mcfrs_drivername);
 }
 
-static struct tty_operations mcfrs_ops = {
+static const struct tty_operations mcfrs_ops = {
        .open = mcfrs_open,
        .close = mcfrs_close,
        .write = mcfrs_write,
index 5f7ba1a..de5e893 100644 (file)
@@ -2111,7 +2111,7 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state,
        }
 }
 
-static struct tty_operations uart_ops = {
+static const struct tty_operations uart_ops = {
        .open           = uart_open,
        .close          = uart_close,
        .write          = uart_write,
index 5e8a276..622881f 100644 (file)
@@ -1701,7 +1701,7 @@ static void __init probe_sccs(void)
        spin_unlock_irqrestore(&zs_lock, flags);
 }
 
-static struct tty_operations serial_ops = {
+static const struct tty_operations serial_ops = {
        .open = rs_open,
        .close = rs_close,
        .write = rs_write,
index ca90326..7128829 100644 (file)
@@ -1120,7 +1120,7 @@ static struct usb_driver acm_driver = {
  * TTY driver structures.
  */
 
-static struct tty_operations acm_ops = {
+static const struct tty_operations acm_ops = {
        .open =                 acm_tty_open,
        .close =                acm_tty_close,
        .write =                acm_tty_write,
index a94c63b..3f509be 100644 (file)
@@ -65,7 +65,7 @@ DEFINE_MUTEX(usbfs_mutex);
 struct async {
        struct list_head asynclist;
        struct dev_state *ps;
-       pid_t pid;
+       struct pid *pid;
        uid_t uid, euid;
        unsigned int signr;
        unsigned int ifnum;
@@ -225,6 +225,7 @@ static struct async *alloc_async(unsigned int numisoframes)
 
 static void free_async(struct async *as)
 {
+       put_pid(as->pid);
        kfree(as->urb->transfer_buffer);
        kfree(as->urb->setup_packet);
        usb_free_urb(as->urb);
@@ -317,7 +318,7 @@ static void async_completed(struct urb *urb, struct pt_regs *regs)
                sinfo.si_errno = as->urb->status;
                sinfo.si_code = SI_ASYNCIO;
                sinfo.si_addr = as->userurb;
-               kill_proc_info_as_uid(as->signr, &sinfo, as->pid, as->uid, 
+               kill_pid_info_as_uid(as->signr, &sinfo, as->pid, as->uid,
                                      as->euid, as->secid);
        }
        snoop(&urb->dev->dev, "urb complete\n");
@@ -573,7 +574,7 @@ static int usbdev_open(struct inode *inode, struct file *file)
        INIT_LIST_HEAD(&ps->async_completed);
        init_waitqueue_head(&ps->wait);
        ps->discsignr = 0;
-       ps->disc_pid = current->pid;
+       ps->disc_pid = get_pid(task_pid(current));
        ps->disc_uid = current->uid;
        ps->disc_euid = current->euid;
        ps->disccontext = NULL;
@@ -611,6 +612,7 @@ static int usbdev_release(struct inode *inode, struct file *file)
        usb_autosuspend_device(dev, 1);
        usb_unlock_device(dev);
        usb_put_dev(dev);
+       put_pid(ps->disc_pid);
        kfree(ps);
        return 0;
 }
@@ -1063,7 +1065,7 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
                as->userbuffer = NULL;
        as->signr = uurb->signr;
        as->ifnum = ifnum;
-       as->pid = current->pid;
+       as->pid = get_pid(task_pid(current));
        as->uid = current->uid;
        as->euid = current->euid;
        security_task_getsecid(current, &as->secid);
index 37f9f5e..e658089 100644 (file)
@@ -318,8 +318,8 @@ static int rh_string (
 
        // id 3 == vendor description
        } else if (id == 3) {
-               snprintf (buf, sizeof buf, "%s %s %s", system_utsname.sysname,
-                       system_utsname.release, hcd->driver->description);
+               snprintf (buf, sizeof buf, "%s %s %s", init_utsname()->sysname,
+                       init_utsname()->release, hcd->driver->description);
 
        // unsupported IDs --> "protocol stall"
        } else
index 7c77c2d..b5d6a79 100644 (file)
@@ -699,7 +699,7 @@ static void usbfs_remove_device(struct usb_device *dev)
                        sinfo.si_errno = EPIPE;
                        sinfo.si_code = SI_ASYNCIO;
                        sinfo.si_addr = ds->disccontext;
-                       kill_proc_info_as_uid(ds->discsignr, &sinfo, ds->disc_pid, ds->disc_uid, ds->disc_euid, ds->secid);
+                       kill_pid_info_as_uid(ds->discsignr, &sinfo, ds->disc_pid, ds->disc_uid, ds->disc_euid, ds->secid);
                }
        }
 }
index f69df13..13322e3 100644 (file)
@@ -139,7 +139,7 @@ struct dev_state {
        struct list_head async_completed;
        wait_queue_head_t wait;     /* wake up if a request completed */
        unsigned int discsignr;
-       pid_t disc_pid;
+       struct pid *disc_pid;
        uid_t disc_uid, disc_euid;
        void __user *disccontext;
        unsigned long ifclaimed;
index 366dc0a..1c17d26 100644 (file)
@@ -2260,7 +2260,7 @@ eth_bind (struct usb_gadget *gadget)
                return -ENODEV;
        }
        snprintf (manufacturer, sizeof manufacturer, "%s %s/%s",
-               system_utsname.sysname, system_utsname.release,
+               init_utsname()->sysname, init_utsname()->release,
                gadget->name);
 
        /* If there's an RNDIS configuration, that's what Windows wants to
index c83d3b6..8b975d1 100644 (file)
@@ -4001,7 +4001,7 @@ static int __init fsg_bind(struct usb_gadget *gadget)
        usb_gadget_set_selfpowered(gadget);
 
        snprintf(manufacturer, sizeof manufacturer, "%s %s with %s",
-                       system_utsname.sysname, system_utsname.release,
+                       init_utsname()->sysname, init_utsname()->release,
                        gadget->name);
 
        /* On a real device, serial[] would be loaded from permanent
index b68cecd..83601d4 100644 (file)
@@ -1189,7 +1189,7 @@ static int __devinit gmidi_bind(struct usb_gadget *gadget)
                strlcpy(manufacturer, iManufacturer, sizeof(manufacturer));
        } else {
                snprintf(manufacturer, sizeof(manufacturer), "%s %s with %s",
-                       system_utsname.sysname, system_utsname.release,
+                       init_utsname()->sysname, init_utsname()->release,
                        gadget->name);
        }
        if (iProduct) {
index b893e31..208e55a 100644 (file)
@@ -271,7 +271,7 @@ static unsigned int use_acm = GS_DEFAULT_USE_ACM;
 
 
 /* tty driver struct */
-static struct tty_operations gs_tty_ops = {
+static const struct tty_operations gs_tty_ops = {
        .open =                 gs_open,
        .close =                gs_close,
        .write =                gs_write,
@@ -1434,7 +1434,7 @@ static int __init gs_bind(struct usb_gadget *gadget)
                return -ENOMEM;
 
        snprintf(manufacturer, sizeof(manufacturer), "%s %s with %s",
-               system_utsname.sysname, system_utsname.release,
+               init_utsname()->sysname, init_utsname()->release,
                gadget->name);
 
        memset(dev, 0, sizeof(struct gs_dev));
index b7018ee..0f809dd 100644 (file)
@@ -1242,7 +1242,7 @@ autoconf_fail:
                EP_OUT_NAME, EP_IN_NAME);
 
        snprintf (manufacturer, sizeof manufacturer, "%s %s with %s",
-               system_utsname.sysname, system_utsname.release,
+               init_utsname()->sysname, init_utsname()->release,
                gadget->name);
 
        return 0;
index 0222d92..8006e51 100644 (file)
@@ -1015,7 +1015,7 @@ void usb_serial_disconnect(struct usb_interface *interface)
        dev_info(dev, "device disconnected\n");
 }
 
-static struct tty_operations serial_ops = {
+static const struct tty_operations serial_ops = {
        .open =                 serial_open,
        .close =                serial_close,
        .write =                serial_write,
index 0e9ba0b..c787620 100644 (file)
@@ -772,12 +772,12 @@ cifs_parse_mount_options(char *options, const char *devname,struct smb_vol *vol)
        separator[1] = 0; 
 
        memset(vol->source_rfc1001_name,0x20,15);
-       for(i=0;i < strnlen(system_utsname.nodename,15);i++) {
+       for(i=0;i < strnlen(utsname()->nodename,15);i++) {
                /* does not have to be a perfect mapping since the field is
                informational, only used for servers that do not support
                port 445 and it can be overridden at mount time */
                vol->source_rfc1001_name[i] = 
-                       toupper(system_utsname.nodename[i]);
+                       toupper(utsname()->nodename[i]);
        }
        vol->source_rfc1001_name[15] = 0;
        /* null target name indicates to use *SMBSERVR default called name
@@ -2153,7 +2153,7 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
                                  32, nls_codepage);
                bcc_ptr += 2 * bytes_returned;
                bytes_returned =
-                   cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release,
+                   cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release,
                                  32, nls_codepage);
                bcc_ptr += 2 * bytes_returned;
                bcc_ptr += 2;
@@ -2180,8 +2180,8 @@ CIFSSessSetup(unsigned int xid, struct cifsSesInfo *ses,
                }
                strcpy(bcc_ptr, "Linux version ");
                bcc_ptr += strlen("Linux version ");
-               strcpy(bcc_ptr, system_utsname.release);
-               bcc_ptr += strlen(system_utsname.release) + 1;
+               strcpy(bcc_ptr, utsname()->release);
+               bcc_ptr += strlen(utsname()->release) + 1;
                strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
                bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
        }
@@ -2445,7 +2445,7 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
                                  32, nls_codepage);
                bcc_ptr += 2 * bytes_returned;
                bytes_returned =
-                   cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32,
+                   cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32,
                                  nls_codepage);
                bcc_ptr += 2 * bytes_returned;
                bcc_ptr += 2;   /* null terminate Linux version */
@@ -2462,8 +2462,8 @@ CIFSNTLMSSPNegotiateSessSetup(unsigned int xid,
        } else {                /* ASCII */
                strcpy(bcc_ptr, "Linux version ");
                bcc_ptr += strlen("Linux version ");
-               strcpy(bcc_ptr, system_utsname.release);
-               bcc_ptr += strlen(system_utsname.release) + 1;
+               strcpy(bcc_ptr, utsname()->release);
+               bcc_ptr += strlen(utsname()->release) + 1;
                strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
                bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
                bcc_ptr++;      /* empty domain field */
@@ -2836,7 +2836,7 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
                                  32, nls_codepage);
                bcc_ptr += 2 * bytes_returned;
                bytes_returned =
-                   cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release, 32,
+                   cifs_strtoUCS((__le16 *) bcc_ptr, utsname()->release, 32,
                                  nls_codepage);
                bcc_ptr += 2 * bytes_returned;
                bcc_ptr += 2;   /* null term version string */
@@ -2888,8 +2888,8 @@ CIFSNTLMSSPAuthSessSetup(unsigned int xid, struct cifsSesInfo *ses,
 
                strcpy(bcc_ptr, "Linux version ");
                bcc_ptr += strlen("Linux version ");
-               strcpy(bcc_ptr, system_utsname.release);
-               bcc_ptr += strlen(system_utsname.release) + 1;
+               strcpy(bcc_ptr, utsname()->release);
+               bcc_ptr += strlen(utsname()->release) + 1;
                strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
                bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
                bcc_ptr++;      /* null domain */
index d1705ab..22b4c35 100644 (file)
@@ -111,7 +111,7 @@ static void unicode_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
        bytes_ret = cifs_strtoUCS((__le16 *)bcc_ptr, "Linux version ", 32,
                                  nls_cp);
        bcc_ptr += 2 * bytes_ret;
-       bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, system_utsname.release,
+       bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, init_utsname()->release,
                                  32, nls_cp);
        bcc_ptr += 2 * bytes_ret;
        bcc_ptr += 2; /* trailing null */
@@ -158,8 +158,8 @@ static void ascii_ssetup_strings(char ** pbcc_area, struct cifsSesInfo *ses,
 
        strcpy(bcc_ptr, "Linux version ");
        bcc_ptr += strlen("Linux version ");
-       strcpy(bcc_ptr, system_utsname.release);
-       bcc_ptr += strlen(system_utsname.release) + 1;
+       strcpy(bcc_ptr, init_utsname()->release);
+       bcc_ptr += strlen(init_utsname()->release) + 1;
 
        strcpy(bcc_ptr, CIFS_NETWORK_OPSYS);
        bcc_ptr += strlen(CIFS_NETWORK_OPSYS) + 1;
index 13fb08d..d98c96f 100644 (file)
@@ -56,8 +56,6 @@
 
 int compat_log = 1;
 
-extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
-
 int compat_printk(const char *fmt, ...)
 {
        va_list ap;
index f932591..2b0442d 100644 (file)
@@ -92,7 +92,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
                prev = &odn->dn_next;
        }
 
-       error = f_setown(filp, current->pid, 0);
+       error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
        if (error)
                goto out_free;
 
index 6270f8f..d993ea1 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1318,7 +1318,7 @@ static void format_corename(char *corename, const char *pattern, long signr)
                        case 'h':
                                down_read(&uts_sem);
                                rc = snprintf(out_ptr, out_end - out_ptr,
-                                             "%s", system_utsname.nodename);
+                                             "%s", utsname()->nodename);
                                up_read(&uts_sem);
                                if (rc > out_end - out_ptr)
                                        goto out;
index d35cbc6..e4f2616 100644 (file)
@@ -250,19 +250,22 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
        return error;
 }
 
-static void f_modown(struct file *filp, unsigned long pid,
+static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
                      uid_t uid, uid_t euid, int force)
 {
        write_lock_irq(&filp->f_owner.lock);
        if (force || !filp->f_owner.pid) {
-               filp->f_owner.pid = pid;
+               put_pid(filp->f_owner.pid);
+               filp->f_owner.pid = get_pid(pid);
+               filp->f_owner.pid_type = type;
                filp->f_owner.uid = uid;
                filp->f_owner.euid = euid;
        }
        write_unlock_irq(&filp->f_owner.lock);
 }
 
-int f_setown(struct file *filp, unsigned long arg, int force)
+int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
+               int force)
 {
        int err;
        
@@ -270,15 +273,44 @@ int f_setown(struct file *filp, unsigned long arg, int force)
        if (err)
                return err;
 
-       f_modown(filp, arg, current->uid, current->euid, force);
+       f_modown(filp, pid, type, current->uid, current->euid, force);
        return 0;
 }
+EXPORT_SYMBOL(__f_setown);
 
+int f_setown(struct file *filp, unsigned long arg, int force)
+{
+       enum pid_type type;
+       struct pid *pid;
+       int who = arg;
+       int result;
+       type = PIDTYPE_PID;
+       if (who < 0) {
+               type = PIDTYPE_PGID;
+               who = -who;
+       }
+       rcu_read_lock();
+       pid = find_pid(who);
+       result = __f_setown(filp, pid, type, force);
+       rcu_read_unlock();
+       return result;
+}
 EXPORT_SYMBOL(f_setown);
 
 void f_delown(struct file *filp)
 {
-       f_modown(filp, 0, 0, 0, 1);
+       f_modown(filp, NULL, PIDTYPE_PID, 0, 0, 1);
+}
+
+pid_t f_getown(struct file *filp)
+{
+       pid_t pid;
+       read_lock(&filp->f_owner.lock);
+       pid = pid_nr(filp->f_owner.pid);
+       if (filp->f_owner.pid_type == PIDTYPE_PGID)
+               pid = -pid;
+       read_unlock(&filp->f_owner.lock);
+       return pid;
 }
 
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
@@ -319,7 +351,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
                 * current syscall conventions, the only way
                 * to fix this will be in libc.
                 */
-               err = filp->f_owner.pid;
+               err = f_getown(filp);
                force_successful_syscall_return();
                break;
        case F_SETOWN:
@@ -470,24 +502,19 @@ static void send_sigio_to_task(struct task_struct *p,
 void send_sigio(struct fown_struct *fown, int fd, int band)
 {
        struct task_struct *p;
-       int pid;
+       enum pid_type type;
+       struct pid *pid;
        
        read_lock(&fown->lock);
+       type = fown->pid_type;
        pid = fown->pid;
        if (!pid)
                goto out_unlock_fown;
        
        read_lock(&tasklist_lock);
-       if (pid > 0) {
-               p = find_task_by_pid(pid);
-               if (p) {
-                       send_sigio_to_task(p, fown, fd, band);
-               }
-       } else {
-               do_each_task_pid(-pid, PIDTYPE_PGID, p) {
-                       send_sigio_to_task(p, fown, fd, band);
-               } while_each_task_pid(-pid, PIDTYPE_PGID, p);
-       }
+       do_each_pid_task(pid, type, p) {
+               send_sigio_to_task(p, fown, fd, band);
+       } while_each_pid_task(pid, type, p);
        read_unlock(&tasklist_lock);
  out_unlock_fown:
        read_unlock(&fown->lock);
@@ -503,9 +530,12 @@ static void send_sigurg_to_task(struct task_struct *p,
 int send_sigurg(struct fown_struct *fown)
 {
        struct task_struct *p;
-       int pid, ret = 0;
+       enum pid_type type;
+       struct pid *pid;
+       int ret = 0;
        
        read_lock(&fown->lock);
+       type = fown->pid_type;
        pid = fown->pid;
        if (!pid)
                goto out_unlock_fown;
@@ -513,16 +543,9 @@ int send_sigurg(struct fown_struct *fown)
        ret = 1;
        
        read_lock(&tasklist_lock);
-       if (pid > 0) {
-               p = find_task_by_pid(pid);
-               if (p) {
-                       send_sigurg_to_task(p, fown);
-               }
-       } else {
-               do_each_task_pid(-pid, PIDTYPE_PGID, p) {
-                       send_sigurg_to_task(p, fown);
-               } while_each_task_pid(-pid, PIDTYPE_PGID, p);
-       }
+       do_each_pid_task(pid, type, p) {
+               send_sigurg_to_task(p, fown);
+       } while_each_pid_task(pid, type, p);
        read_unlock(&tasklist_lock);
  out_unlock_fown:
        read_unlock(&fown->lock);
index bc35a40..24f25a0 100644 (file)
@@ -174,6 +174,7 @@ void fastcall __fput(struct file *file)
        fops_put(file->f_op);
        if (file->f_mode & FMODE_WRITE)
                put_write_access(inode);
+       put_pid(file->f_owner.pid);
        file_kill(file);
        file->f_dentry = NULL;
        file->f_vfsmnt = NULL;
index ada7643..bf6bec4 100644 (file)
@@ -657,7 +657,7 @@ static struct inode * get_new_inode_fast(struct super_block *sb, struct hlist_he
        return inode;
 }
 
-static inline unsigned long hash(struct super_block *sb, unsigned long hashval)
+static unsigned long hash(struct super_block *sb, unsigned long hashval)
 {
        unsigned long tmp;
 
@@ -1003,7 +1003,7 @@ void generic_delete_inode(struct inode *inode)
 
        list_del_init(&inode->i_list);
        list_del_init(&inode->i_sb_list);
-       inode->i_state|=I_FREEING;
+       inode->i_state |= I_FREEING;
        inodes_stat.nr_inodes--;
        spin_unlock(&inode_lock);
 
@@ -1210,13 +1210,15 @@ void file_update_time(struct file *file)
                return;
 
        now = current_fs_time(inode->i_sb);
-       if (!timespec_equal(&inode->i_mtime, &now))
+       if (!timespec_equal(&inode->i_mtime, &now)) {
+               inode->i_mtime = now;
                sync_it = 1;
-       inode->i_mtime = now;
+       }
 
-       if (!timespec_equal(&inode->i_ctime, &now))
+       if (!timespec_equal(&inode->i_ctime, &now)) {
+               inode->i_ctime = now;
                sync_it = 1;
-       inode->i_ctime = now;
+       }
 
        if (sync_it)
                mark_inode_dirty_sync(inode);
index f95cc3f..87e1d03 100644 (file)
@@ -202,7 +202,7 @@ reclaimer(void *ptr)
        /* This one ensures that our parent doesn't terminate while the
         * reclaim is in progress */
        lock_kernel();
-       lockd_up();
+       lockd_up(0); /* note: this cannot fail as lockd is already running */
 
        nlmclnt_prepare_reclaim(host);
        /* First, reclaim all locks that have been marked. */
index 271e216..0116729 100644 (file)
@@ -129,11 +129,11 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
        nlmclnt_next_cookie(&argp->cookie);
        argp->state   = nsm_local_state;
        memcpy(&lock->fh, NFS_FH(fl->fl_file->f_dentry->d_inode), sizeof(struct nfs_fh));
-       lock->caller  = system_utsname.nodename;
+       lock->caller  = utsname()->nodename;
        lock->oh.data = req->a_owner;
        lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
                                (unsigned int)fl->fl_u.nfs_fl.owner->pid,
-                               system_utsname.nodename);
+                               utsname()->nodename);
        lock->svid = fl->fl_u.nfs_fl.owner->pid;
        lock->fl.fl_start = fl->fl_start;
        lock->fl.fl_end = fl->fl_end;
index 5954dcb..a816b92 100644 (file)
@@ -145,7 +145,7 @@ xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
         */
        sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
        if (!(p = xdr_encode_string(p, buffer))
-        || !(p = xdr_encode_string(p, system_utsname.nodename)))
+        || !(p = xdr_encode_string(p, utsname()->nodename)))
                return ERR_PTR(-EIO);
        *p++ = htonl(argp->prog);
        *p++ = htonl(argp->vers);
index 9a991b5..3cc369e 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
+#include <net/ip.h>
 #include <linux/lockd/lockd.h>
 #include <linux/nfs.h>
 
@@ -46,6 +47,7 @@ EXPORT_SYMBOL(nlmsvc_ops);
 static DEFINE_MUTEX(nlmsvc_mutex);
 static unsigned int            nlmsvc_users;
 static pid_t                   nlmsvc_pid;
+static struct svc_serv         *nlmsvc_serv;
 int                            nlmsvc_grace_period;
 unsigned long                  nlmsvc_timeout;
 
@@ -96,7 +98,6 @@ static inline void clear_grace_period(void)
 static void
 lockd(struct svc_rqst *rqstp)
 {
-       struct svc_serv *serv = rqstp->rq_server;
        int             err = 0;
        unsigned long grace_period_expire;
 
@@ -112,6 +113,7 @@ lockd(struct svc_rqst *rqstp)
         * Let our maker know we're running.
         */
        nlmsvc_pid = current->pid;
+       nlmsvc_serv = rqstp->rq_server;
        complete(&lockd_start_done);
 
        daemonize("lockd");
@@ -161,7 +163,7 @@ lockd(struct svc_rqst *rqstp)
                 * Find a socket with data available and call its
                 * recvfrom routine.
                 */
-               err = svc_recv(serv, rqstp, timeout);
+               err = svc_recv(rqstp, timeout);
                if (err == -EAGAIN || err == -EINTR)
                        continue;
                if (err < 0) {
@@ -174,7 +176,7 @@ lockd(struct svc_rqst *rqstp)
                dprintk("lockd: request from %08x\n",
                        (unsigned)ntohl(rqstp->rq_addr.sin_addr.s_addr));
 
-               svc_process(serv, rqstp);
+               svc_process(rqstp);
 
        }
 
@@ -189,6 +191,7 @@ lockd(struct svc_rqst *rqstp)
                        nlmsvc_invalidate_all();
                nlm_shutdown_hosts();
                nlmsvc_pid = 0;
+               nlmsvc_serv = NULL;
        } else
                printk(KERN_DEBUG
                        "lockd: new process, skipping host shutdown\n");
@@ -205,54 +208,77 @@ lockd(struct svc_rqst *rqstp)
        module_put_and_exit(0);
 }
 
+
+static int find_socket(struct svc_serv *serv, int proto)
+{
+       struct svc_sock *svsk;
+       int found = 0;
+       list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
+               if (svsk->sk_sk->sk_protocol == proto) {
+                       found = 1;
+                       break;
+               }
+       return found;
+}
+
+static int make_socks(struct svc_serv *serv, int proto)
+{
+       /* Make any sockets that are needed but not present.
+        * If nlm_udpport or nlm_tcpport were set as module
+        * options, make those sockets unconditionally
+        */
+       static int              warned;
+       int err = 0;
+       if (proto == IPPROTO_UDP || nlm_udpport)
+               if (!find_socket(serv, IPPROTO_UDP))
+                       err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport);
+       if (err == 0 && (proto == IPPROTO_TCP || nlm_tcpport))
+               if (!find_socket(serv, IPPROTO_TCP))
+                       err= svc_makesock(serv, IPPROTO_TCP, nlm_tcpport);
+       if (!err)
+               warned = 0;
+       else if (warned++ == 0)
+               printk(KERN_WARNING
+                      "lockd_up: makesock failed, error=%d\n", err);
+       return err;
+}
+
 /*
  * Bring up the lockd process if it's not already up.
  */
 int
-lockd_up(void)
+lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
 {
-       static int              warned;
        struct svc_serv *       serv;
        int                     error = 0;
 
        mutex_lock(&nlmsvc_mutex);
-       /*
-        * Unconditionally increment the user count ... this is
-        * the number of clients who _want_ a lockd process.
-        */
-       nlmsvc_users++; 
        /*
         * Check whether we're already up and running.
         */
-       if (nlmsvc_pid)
+       if (nlmsvc_pid) {
+               if (proto)
+                       error = make_socks(nlmsvc_serv, proto);
                goto out;
+       }
 
        /*
         * Sanity check: if there's no pid,
         * we should be the first user ...
         */
-       if (nlmsvc_users > 1)
+       if (nlmsvc_users)
                printk(KERN_WARNING
                        "lockd_up: no pid, %d users??\n", nlmsvc_users);
 
        error = -ENOMEM;
-       serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE);
+       serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL);
        if (!serv) {
                printk(KERN_WARNING "lockd_up: create service failed\n");
                goto out;
        }
 
-       if ((error = svc_makesock(serv, IPPROTO_UDP, nlm_udpport)) < 0 
-#ifdef CONFIG_NFSD_TCP
-        || (error = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport)) < 0
-#endif
-               ) {
-               if (warned++ == 0) 
-                       printk(KERN_WARNING
-                               "lockd_up: makesock failed, error=%d\n", error);
+       if ((error = make_socks(serv, proto)) < 0)
                goto destroy_and_out;
-       } 
-       warned = 0;
 
        /*
         * Create the kernel thread and wait for it to start.
@@ -272,6 +298,8 @@ lockd_up(void)
 destroy_and_out:
        svc_destroy(serv);
 out:
+       if (!error)
+               nlmsvc_users++;
        mutex_unlock(&nlmsvc_mutex);
        return error;
 }
index c9d4197..93c00ee 100644 (file)
@@ -325,7 +325,7 @@ static int nlmsvc_setgrantargs(struct nlm_rqst *call, struct nlm_lock *lock)
 {
        locks_copy_lock(&call->a_args.lock.fl, &lock->fl);
        memcpy(&call->a_args.lock.fh, &lock->fh, sizeof(call->a_args.lock.fh));
-       call->a_args.lock.caller = system_utsname.nodename;
+       call->a_args.lock.caller = utsname()->nodename;
        call->a_args.lock.oh.len = lock->oh.len;
 
        /* set default data area */
index 033ea4a..61c46fa 100644 (file)
@@ -515,7 +515,7 @@ nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
  */
 #define NLM_void_sz            0
 #define NLM_cookie_sz          1+XDR_QUADLEN(NLM_MAXCOOKIELEN)
-#define NLM_caller_sz          1+XDR_QUADLEN(sizeof(system_utsname.nodename))
+#define NLM_caller_sz          1+XDR_QUADLEN(sizeof(utsname()->nodename))
 #define NLM_netobj_sz          1+XDR_QUADLEN(XDR_MAX_NETOBJ)
 /* #define NLM_owner_sz                1+XDR_QUADLEN(NLM_MAXOWNER) */
 #define NLM_fhandle_sz         1+XDR_QUADLEN(NFS2_FHSIZE)
index 21dfadf..e0b6a80 100644 (file)
@@ -1514,7 +1514,7 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
                goto out_unlock;
        }
 
-       error = f_setown(filp, current->pid, 0);
+       error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
 out_unlock:
        unlock_kernel();
        return error;
index 66d921e..55442a6 100644 (file)
@@ -133,7 +133,7 @@ struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
 
 static inline int check_mnt(struct vfsmount *mnt)
 {
-       return mnt->mnt_namespace == current->namespace;
+       return mnt->mnt_namespace == current->nsproxy->namespace;
 }
 
 static void touch_namespace(struct namespace *ns)
@@ -830,7 +830,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
        if (parent_nd) {
                detach_mnt(source_mnt, parent_nd);
                attach_mnt(source_mnt, nd);
-               touch_namespace(current->namespace);
+               touch_namespace(current->nsproxy->namespace);
        } else {
                mnt_set_mountpoint(dest_mnt, dest_dentry, source_mnt);
                commit_tree(source_mnt);
@@ -1441,7 +1441,7 @@ dput_out:
  */
 struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
 {
-       struct namespace *namespace = tsk->namespace;
+       struct namespace *namespace = tsk->nsproxy->namespace;
        struct namespace *new_ns;
        struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL;
        struct vfsmount *p, *q;
@@ -1508,7 +1508,7 @@ struct namespace *dup_namespace(struct task_struct *tsk, struct fs_struct *fs)
 
 int copy_namespace(int flags, struct task_struct *tsk)
 {
-       struct namespace *namespace = tsk->namespace;
+       struct namespace *namespace = tsk->nsproxy->namespace;
        struct namespace *new_ns;
        int err = 0;
 
@@ -1531,7 +1531,7 @@ int copy_namespace(int flags, struct task_struct *tsk)
                goto out;
        }
 
-       tsk->namespace = new_ns;
+       tsk->nsproxy->namespace = new_ns;
 
 out:
        put_namespace(namespace);
@@ -1754,7 +1754,7 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
        detach_mnt(user_nd.mnt, &root_parent);
        attach_mnt(user_nd.mnt, &old_nd);     /* mount old root on put_old */
        attach_mnt(new_nd.mnt, &root_parent); /* mount new_root on / */
-       touch_namespace(current->namespace);
+       touch_namespace(current->nsproxy->namespace);
        spin_unlock(&vfsmount_lock);
        chroot_fs_refs(&user_nd, &new_nd);
        security_sb_post_pivotroot(&user_nd, &new_nd);
@@ -1780,7 +1780,6 @@ static void __init init_mount_tree(void)
 {
        struct vfsmount *mnt;
        struct namespace *namespace;
-       struct task_struct *g, *p;
 
        mnt = do_kern_mount("rootfs", 0, "rootfs", NULL);
        if (IS_ERR(mnt))
@@ -1796,13 +1795,8 @@ static void __init init_mount_tree(void)
        namespace->root = mnt;
        mnt->mnt_namespace = namespace;
 
-       init_task.namespace = namespace;
-       read_lock(&tasklist_lock);
-       do_each_thread(g, p) {
-               get_namespace(namespace);
-               p->namespace = namespace;
-       } while_each_thread(g, p);
-       read_unlock(&tasklist_lock);
+       init_task.nsproxy->namespace = namespace;
+       get_namespace(namespace);
 
        set_fs_pwd(current->fs, namespace->root, namespace->root->mnt_root);
        set_fs_root(current->fs, namespace->root, namespace->root->mnt_root);
index a3ee113..7933e2e 100644 (file)
@@ -58,7 +58,6 @@ module_param_call(callback_tcpport, param_set_port, param_get_int,
  */
 static void nfs_callback_svc(struct svc_rqst *rqstp)
 {
-       struct svc_serv *serv = rqstp->rq_server;
        int err;
 
        __module_get(THIS_MODULE);
@@ -80,7 +79,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
                /*
                 * Listen for a request on the socket
                 */
-               err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT);
+               err = svc_recv(rqstp, MAX_SCHEDULE_TIMEOUT);
                if (err == -EAGAIN || err == -EINTR)
                        continue;
                if (err < 0) {
@@ -91,7 +90,7 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
                }
                dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__,
                                NIPQUAD(rqstp->rq_addr.sin_addr.s_addr));
-               svc_process(serv, rqstp);
+               svc_process(rqstp);
        }
 
        svc_exit_thread(rqstp);
@@ -116,7 +115,7 @@ int nfs_callback_up(void)
                goto out;
        init_completion(&nfs_callback_info.started);
        init_completion(&nfs_callback_info.stopped);
-       serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE);
+       serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
        ret = -ENOMEM;
        if (!serv)
                goto out_err;
index ec1938d..8106f3b 100644 (file)
@@ -460,7 +460,8 @@ static int nfs_start_lockd(struct nfs_server *server)
                goto out;
        if (server->flags & NFS_MOUNT_NONLM)
                goto out;
-       error = lockd_up();
+       error = lockd_up((server->flags & NFS_MOUNT_TCP) ?
+                       IPPROTO_TCP : IPPROTO_UDP);
        if (error < 0)
                server->flags |= NFS_MOUNT_NONLM;
        else
index c0a754e..1d656a6 100644 (file)
@@ -312,7 +312,7 @@ static int __init root_nfs_name(char *name)
        /* Override them by options set on kernel command-line */
        root_nfs_parse(name, buf);
 
-       cp = system_utsname.nodename;
+       cp = utsname()->nodename;
        if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) {
                printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n");
                return -1;
index 01bc68c..cfe141e 100644 (file)
@@ -370,7 +370,7 @@ static int check_export(struct inode *inode, int flags)
         */
        if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
            !(flags & NFSEXP_FSID)) {
-               dprintk("exp_export: export of non-dev fs without fsid");
+               dprintk("exp_export: export of non-dev fs without fsid\n");
                return -EINVAL;
        }
        if (!inode->i_sb->s_export_op) {
@@ -1078,6 +1078,7 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 /* Iterator */
 
 static void *e_start(struct seq_file *m, loff_t *pos)
+       __acquires(svc_export_cache.hash_lock)
 {
        loff_t n = *pos;
        unsigned hash, export;
@@ -1086,7 +1087,7 @@ static void *e_start(struct seq_file *m, loff_t *pos)
        exp_readlock();
        read_lock(&svc_export_cache.hash_lock);
        if (!n--)
-               return (void *)1;
+               return SEQ_START_TOKEN;
        hash = n >> 32;
        export = n & ((1LL<<32) - 1);
 
@@ -1110,7 +1111,7 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos)
        struct cache_head *ch = p;
        int hash = (*pos >> 32);
 
-       if (p == (void *)1)
+       if (p == SEQ_START_TOKEN)
                hash = 0;
        else if (ch->next == NULL) {
                hash++;
@@ -1131,6 +1132,7 @@ static void *e_next(struct seq_file *m, void *p, loff_t *pos)
 }
 
 static void e_stop(struct seq_file *m, void *p)
+       __releases(svc_export_cache.hash_lock)
 {
        read_unlock(&svc_export_cache.hash_lock);
        exp_readunlock();
@@ -1178,15 +1180,13 @@ static int e_show(struct seq_file *m, void *p)
 {
        struct cache_head *cp = p;
        struct svc_export *exp = container_of(cp, struct svc_export, h);
-       svc_client *clp;
 
-       if (p == (void *)1) {
+       if (p == SEQ_START_TOKEN) {
                seq_puts(m, "# Version 1.1\n");
                seq_puts(m, "# Path Client(Flags) # IPs\n");
                return 0;
        }
 
-       clp = exp->ex_client;
        cache_get(&exp->h);
        if (cache_check(&svc_export_cache, &exp->h, NULL))
                return 0;
index 8583d99..f6ca9fb 100644 (file)
@@ -131,7 +131,7 @@ xdr_error:                                      \
 #define READ_BUF(nbytes)  do { \
        p = xdr_inline_decode(xdr, nbytes); \
        if (!p) { \
-               dprintk("NFSD: %s: reply buffer overflowed in line %d.", \
+               dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
                        __FUNCTION__, __LINE__); \
                return -EIO; \
        } \
index ee4eff2..15ded7a 100644 (file)
@@ -600,7 +600,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_se
                        &setattr->sa_stateid, CHECK_FH | WR_STATE, NULL);
                nfs4_unlock_state();
                if (status) {
-                       dprintk("NFSD: nfsd4_setattr: couldn't process stateid!");
+                       dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
                        return status;
                }
        }
index 7046ac9..5c6a477 100644 (file)
 #include <linux/pagemap.h>
 #include <linux/init.h>
 #include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/ctype.h>
 
 #include <linux/nfs.h>
 #include <linux/nfsd_idmap.h>
+#include <linux/lockd/bind.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcsock.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/cache.h>
 #include <linux/nfsd/xdr.h>
@@ -35,8 +39,6 @@
 
 #include <asm/uaccess.h>
 
-unsigned int nfsd_versbits = ~0;
-
 /*
  *     We have a single directory with 9 nodes in it.
  */
@@ -52,7 +54,9 @@ enum {
        NFSD_List,
        NFSD_Fh,
        NFSD_Threads,
+       NFSD_Pool_Threads,
        NFSD_Versions,
+       NFSD_Ports,
        /*
         * The below MUST come last.  Otherwise we leave a hole in nfsd_files[]
         * with !CONFIG_NFSD_V4 and simple_fill_super() goes oops
@@ -75,7 +79,9 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size);
 static ssize_t write_getfs(struct file *file, char *buf, size_t size);
 static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
 static ssize_t write_threads(struct file *file, char *buf, size_t size);
+static ssize_t write_pool_threads(struct file *file, char *buf, size_t size);
 static ssize_t write_versions(struct file *file, char *buf, size_t size);
+static ssize_t write_ports(struct file *file, char *buf, size_t size);
 #ifdef CONFIG_NFSD_V4
 static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
 static ssize_t write_recoverydir(struct file *file, char *buf, size_t size);
@@ -91,7 +97,9 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = {
        [NFSD_Getfs] = write_getfs,
        [NFSD_Fh] = write_filehandle,
        [NFSD_Threads] = write_threads,
+       [NFSD_Pool_Threads] = write_pool_threads,
        [NFSD_Versions] = write_versions,
+       [NFSD_Ports] = write_ports,
 #ifdef CONFIG_NFSD_V4
        [NFSD_Leasetime] = write_leasetime,
        [NFSD_RecoveryDir] = write_recoverydir,
@@ -358,6 +366,72 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size)
        return strlen(buf);
 }
 
+extern int nfsd_nrpools(void);
+extern int nfsd_get_nrthreads(int n, int *);
+extern int nfsd_set_nrthreads(int n, int *);
+
+static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
+{
+       /* if size > 0, look for an array of number of threads per node
+        * and apply them  then write out number of threads per node as reply
+        */
+       char *mesg = buf;
+       int i;
+       int rv;
+       int len;
+       int npools = nfsd_nrpools();
+       int *nthreads;
+
+       if (npools == 0) {
+               /*
+                * NFS is shut down.  The admin can start it by
+                * writing to the threads file but NOT the pool_threads
+                * file, sorry.  Report zero threads.
+                */
+               strcpy(buf, "0\n");
+               return strlen(buf);
+       }
+
+       nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
+       if (nthreads == NULL)
+               return -ENOMEM;
+
+       if (size > 0) {
+               for (i = 0; i < npools; i++) {
+                       rv = get_int(&mesg, &nthreads[i]);
+                       if (rv == -ENOENT)
+                               break;          /* fewer numbers than pools */
+                       if (rv)
+                               goto out_free;  /* syntax error */
+                       rv = -EINVAL;
+                       if (nthreads[i] < 0)
+                               goto out_free;
+               }
+               rv = nfsd_set_nrthreads(i, nthreads);
+               if (rv)
+                       goto out_free;
+       }
+
+       rv = nfsd_get_nrthreads(npools, nthreads);
+       if (rv)
+               goto out_free;
+
+       mesg = buf;
+       size = SIMPLE_TRANSACTION_LIMIT;
+       for (i = 0; i < npools && size > 0; i++) {
+               snprintf(mesg, size, "%d%c", nthreads[i], (i == npools-1 ? '\n' : ' '));
+               len = strlen(mesg);
+               size -= len;
+               mesg += len;
+       }
+
+       return (mesg-buf);
+
+out_free:
+       kfree(nthreads);
+       return rv;
+}
+
 static ssize_t write_versions(struct file *file, char *buf, size_t size)
 {
        /*
@@ -372,6 +446,10 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
 
        if (size>0) {
                if (nfsd_serv)
+                       /* Cannot change versions without updating
+                        * nfsd_serv->sv_xdrsize, and reallocing
+                        * rq_argp and rq_resp
+                        */
                        return -EBUSY;
                if (buf[size-1] != '\n')
                        return -EINVAL;
@@ -390,10 +468,7 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
                        case 2:
                        case 3:
                        case 4:
-                               if (sign != '-')
-                                       NFSCTL_VERSET(nfsd_versbits, num);
-                               else
-                                       NFSCTL_VERUNSET(nfsd_versbits, num);
+                               nfsd_vers(num, sign == '-' ? NFSD_CLEAR : NFSD_SET);
                                break;
                        default:
                                return -EINVAL;
@@ -404,16 +479,15 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
                /* If all get turned off, turn them back on, as
                 * having no versions is BAD
                 */
-               if ((nfsd_versbits & NFSCTL_VERALL)==0)
-                       nfsd_versbits = NFSCTL_VERALL;
+               nfsd_reset_versions();
        }
        /* Now write current state into reply buffer */
        len = 0;
        sep = "";
        for (num=2 ; num <= 4 ; num++)
-               if (NFSCTL_VERISSET(NFSCTL_VERALL, num)) {
+               if (nfsd_vers(num, NFSD_AVAIL)) {
                        len += sprintf(buf+len, "%s%c%d", sep,
-                                      NFSCTL_VERISSET(nfsd_versbits, num)?'+':'-',
+                                      nfsd_vers(num, NFSD_TEST)?'+':'-',
                                       num);
                        sep = " ";
                }
@@ -421,6 +495,62 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
        return len;
 }
 
+static ssize_t write_ports(struct file *file, char *buf, size_t size)
+{
+       if (size == 0) {
+               int len = 0;
+               lock_kernel();
+               if (nfsd_serv)
+                       len = svc_sock_names(buf, nfsd_serv, NULL);
+               unlock_kernel();
+               return len;
+       }
+       /* Either a single 'fd' number is written, in which
+        * case it must be for a socket of a supported family/protocol,
+        * and we use it as an nfsd socket, or
+        * A '-' followed by the 'name' of a socket in which case
+        * we close the socket.
+        */
+       if (isdigit(buf[0])) {
+               char *mesg = buf;
+               int fd;
+               int err;
+               err = get_int(&mesg, &fd);
+               if (err)
+                       return -EINVAL;
+               if (fd < 0)
+                       return -EINVAL;
+               err = nfsd_create_serv();
+               if (!err) {
+                       int proto = 0;
+                       err = lockd_up(proto);
+                       if (!err) {
+                               err = svc_addsock(nfsd_serv, fd, buf, &proto);
+                               if (err)
+                                       lockd_down();
+                       }
+                       /* Decrease the count, but don't shutdown the
+                        * the service
+                        */
+                       nfsd_serv->sv_nrthreads--;
+               }
+               return err;
+       }
+       if (buf[0] == '-') {
+               char *toclose = kstrdup(buf+1, GFP_KERNEL);
+               int len = 0;
+               if (!toclose)
+                       return -ENOMEM;
+               lock_kernel();
+               if (nfsd_serv)
+                       len = svc_sock_names(buf, nfsd_serv, toclose);
+               unlock_kernel();
+               kfree(toclose);
+               return len;
+       }
+       return -EINVAL;
+}
+
 #ifdef CONFIG_NFSD_V4
 extern time_t nfs4_leasetime(void);
 
@@ -483,7 +613,9 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
                [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
                [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
                [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
+               [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
                [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
+               [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
 #ifdef CONFIG_NFSD_V4
                [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
                [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR},
index ec1decf..1944305 100644 (file)
@@ -57,12 +57,6 @@ static atomic_t                      nfsd_busy;
 static unsigned long           nfsd_last_call;
 static DEFINE_SPINLOCK(nfsd_call_lock);
 
-struct nfsd_list {
-       struct list_head        list;
-       struct task_struct      *task;
-};
-static struct list_head nfsd_list = LIST_HEAD_INIT(nfsd_list);
-
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat nfsd_acl_svcstats;
 static struct svc_version *    nfsd_acl_version[] = {
@@ -117,6 +111,32 @@ struct svc_program         nfsd_program = {
 
 };
 
+int nfsd_vers(int vers, enum vers_op change)
+{
+       if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
+               return -1;
+       switch(change) {
+       case NFSD_SET:
+               nfsd_versions[vers] = nfsd_version[vers];
+               break;
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+               if (vers < NFSD_ACL_NRVERS)
+                       nfsd_acl_version[vers] = nfsd_acl_version[vers];
+#endif
+       case NFSD_CLEAR:
+               nfsd_versions[vers] = NULL;
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+               if (vers < NFSD_ACL_NRVERS)
+                       nfsd_acl_version[vers] = NULL;
+#endif
+               break;
+       case NFSD_TEST:
+               return nfsd_versions[vers] != NULL;
+       case NFSD_AVAIL:
+               return nfsd_version[vers] != NULL;
+       }
+       return 0;
+}
 /*
  * Maximum number of nfsd processes
  */
@@ -130,16 +150,175 @@ int nfsd_nrthreads(void)
                return nfsd_serv->sv_nrthreads;
 }
 
+static int killsig;    /* signal that was used to kill last nfsd */
+static void nfsd_last_thread(struct svc_serv *serv)
+{
+       /* When last nfsd thread exits we need to do some clean-up */
+       struct svc_sock *svsk;
+       list_for_each_entry(svsk, &serv->sv_permsocks, sk_list)
+               lockd_down();
+       nfsd_serv = NULL;
+       nfsd_racache_shutdown();
+       nfs4_state_shutdown();
+
+       printk(KERN_WARNING "nfsd: last server has exited\n");
+       if (killsig != SIG_NOCLEAN) {
+               printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
+               nfsd_export_flush();
+       }
+}
+
+void nfsd_reset_versions(void)
+{
+       int found_one = 0;
+       int i;
+
+       for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
+               if (nfsd_program.pg_vers[i])
+                       found_one = 1;
+       }
+
+       if (!found_one) {
+               for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
+                       nfsd_program.pg_vers[i] = nfsd_version[i];
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+               for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
+                       nfsd_acl_program.pg_vers[i] =
+                               nfsd_acl_version[i];
+#endif
+       }
+}
+
+int nfsd_create_serv(void)
+{
+       int err = 0;
+       lock_kernel();
+       if (nfsd_serv) {
+               svc_get(nfsd_serv);
+               unlock_kernel();
+               return 0;
+       }
+
+       atomic_set(&nfsd_busy, 0);
+       nfsd_serv = svc_create_pooled(&nfsd_program, NFSD_BUFSIZE,
+                                     nfsd_last_thread,
+                                     nfsd, SIG_NOCLEAN, THIS_MODULE);
+       if (nfsd_serv == NULL)
+               err = -ENOMEM;
+       unlock_kernel();
+       do_gettimeofday(&nfssvc_boot);          /* record boot time */
+       return err;
+}
+
+static int nfsd_init_socks(int port)
+{
+       int error;
+       if (!list_empty(&nfsd_serv->sv_permsocks))
+               return 0;
+
+       error = lockd_up(IPPROTO_UDP);
+       if (error >= 0) {
+               error = svc_makesock(nfsd_serv, IPPROTO_UDP, port);
+               if (error < 0)
+                       lockd_down();
+       }
+       if (error < 0)
+               return error;
+
+#ifdef CONFIG_NFSD_TCP
+       error = lockd_up(IPPROTO_TCP);
+       if (error >= 0) {
+               error = svc_makesock(nfsd_serv, IPPROTO_TCP, port);
+               if (error < 0)
+                       lockd_down();
+       }
+       if (error < 0)
+               return error;
+#endif
+       return 0;
+}
+
+int nfsd_nrpools(void)
+{
+       if (nfsd_serv == NULL)
+               return 0;
+       else
+               return nfsd_serv->sv_nrpools;
+}
+
+int nfsd_get_nrthreads(int n, int *nthreads)
+{
+       int i = 0;
+
+       if (nfsd_serv != NULL) {
+               for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++)
+                       nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads;
+       }
+
+       return 0;
+}
+
+int nfsd_set_nrthreads(int n, int *nthreads)
+{
+       int i = 0;
+       int tot = 0;
+       int err = 0;
+
+       if (nfsd_serv == NULL || n <= 0)
+               return 0;
+
+       if (n > nfsd_serv->sv_nrpools)
+               n = nfsd_serv->sv_nrpools;
+
+       /* enforce a global maximum number of threads */
+       tot = 0;
+       for (i = 0; i < n; i++) {
+               if (nthreads[i] > NFSD_MAXSERVS)
+                       nthreads[i] = NFSD_MAXSERVS;
+               tot += nthreads[i];
+       }
+       if (tot > NFSD_MAXSERVS) {
+               /* total too large: scale down requested numbers */
+               for (i = 0; i < n && tot > 0; i++) {
+                       int new = nthreads[i] * NFSD_MAXSERVS / tot;
+                       tot -= (nthreads[i] - new);
+                       nthreads[i] = new;
+               }
+               for (i = 0; i < n && tot > 0; i++) {
+                       nthreads[i]--;
+                       tot--;
+               }
+       }
+
+       /*
+        * There must always be a thread in pool 0; the admin
+        * can't shut down NFS completely using pool_threads.
+        */
+       if (nthreads[0] == 0)
+               nthreads[0] = 1;
+
+       /* apply the new numbers */
+       lock_kernel();
+       svc_get(nfsd_serv);
+       for (i = 0; i < n; i++) {
+               err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
+                                         nthreads[i]);
+               if (err)
+                       break;
+       }
+       svc_destroy(nfsd_serv);
+       unlock_kernel();
+
+       return err;
+}
+
 int
 nfsd_svc(unsigned short port, int nrservs)
 {
        int     error;
-       int     none_left, found_one, i;
-       struct list_head *victim;
        
        lock_kernel();
-       dprintk("nfsd: creating service: vers 0x%x\n",
-               nfsd_versbits);
+       dprintk("nfsd: creating service\n");
        error = -EINVAL;
        if (nrservs <= 0)
                nrservs = 0;
@@ -153,91 +332,20 @@ nfsd_svc(unsigned short port, int nrservs)
        error = nfs4_state_start();
        if (error<0)
                goto out;
-       if (!nfsd_serv) {
-               /*
-                * Use the nfsd_ctlbits to define which
-                * versions that will be advertised.
-                * If nfsd_ctlbits doesn't list any version,
-                * export them all.
-                */
-               found_one = 0;
-
-               for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
-                       if (NFSCTL_VERISSET(nfsd_versbits, i)) {
-                               nfsd_program.pg_vers[i] = nfsd_version[i];
-                               found_one = 1;
-                       } else
-                               nfsd_program.pg_vers[i] = NULL;
-               }
 
-               if (!found_one) {
-                       for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
-                               nfsd_program.pg_vers[i] = nfsd_version[i];
-               }
+       nfsd_reset_versions();
 
+       error = nfsd_create_serv();
 
-#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
-               found_one = 0;
-
-               for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++) {
-                       if (NFSCTL_VERISSET(nfsd_versbits, i)) {
-                               nfsd_acl_program.pg_vers[i] =
-                                       nfsd_acl_version[i];
-                               found_one = 1;
-                       } else
-                               nfsd_acl_program.pg_vers[i] = NULL;
-               }
-
-               if (!found_one) {
-                       for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
-                               nfsd_acl_program.pg_vers[i] =
-                                       nfsd_acl_version[i];
-               }
-#endif
-
-               atomic_set(&nfsd_busy, 0);
-               error = -ENOMEM;
-               nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE);
-               if (nfsd_serv == NULL)
-                       goto out;
-               error = svc_makesock(nfsd_serv, IPPROTO_UDP, port);
-               if (error < 0)
-                       goto failure;
+       if (error)
+               goto out;
+       error = nfsd_init_socks(port);
+       if (error)
+               goto failure;
 
-#ifdef CONFIG_NFSD_TCP
-               error = svc_makesock(nfsd_serv, IPPROTO_TCP, port);
-               if (error < 0)
-                       goto failure;
-#endif
-               do_gettimeofday(&nfssvc_boot);          /* record boot time */
-       } else
-               nfsd_serv->sv_nrthreads++;
-       nrservs -= (nfsd_serv->sv_nrthreads-1);
-       while (nrservs > 0) {
-               nrservs--;
-               __module_get(THIS_MODULE);
-               error = svc_create_thread(nfsd, nfsd_serv);
-               if (error < 0) {
-                       module_put(THIS_MODULE);
-                       break;
-               }
-       }
-       victim = nfsd_list.next;
-       while (nrservs < 0 && victim != &nfsd_list) {
-               struct nfsd_list *nl =
-                       list_entry(victim,struct nfsd_list, list);
-               victim = victim->next;
-               send_sig(SIG_NOCLEAN, nl->task, 1);
-               nrservs++;
-       }
+       error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
  failure:
-       none_left = (nfsd_serv->sv_nrthreads == 1);
        svc_destroy(nfsd_serv);         /* Release server */
-       if (none_left) {
-               nfsd_serv = NULL;
-               nfsd_racache_shutdown();
-               nfs4_state_shutdown();
-       }
  out:
        unlock_kernel();
        return error;
@@ -270,10 +378,8 @@ update_thread_usage(int busy_threads)
 static void
 nfsd(struct svc_rqst *rqstp)
 {
-       struct svc_serv *serv = rqstp->rq_server;
        struct fs_struct *fsp;
        int             err;
-       struct nfsd_list me;
        sigset_t shutdown_mask, allowed_mask;
 
        /* Lock module and set up kernel thread */
@@ -297,10 +403,7 @@ nfsd(struct svc_rqst *rqstp)
 
        nfsdstats.th_cnt++;
 
-       lockd_up();                             /* start lockd */
-
-       me.task = current;
-       list_add(&me.list, &nfsd_list);
+       rqstp->rq_task = current;
 
        unlock_kernel();
 
@@ -322,8 +425,7 @@ nfsd(struct svc_rqst *rqstp)
                 * Find a socket with data available and call its
                 * recvfrom routine.
                 */
-               while ((err = svc_recv(serv, rqstp,
-                                      60*60*HZ)) == -EAGAIN)
+               while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
                        ;
                if (err < 0)
                        break;
@@ -336,7 +438,7 @@ nfsd(struct svc_rqst *rqstp)
                /* Process request with signals blocked.  */
                sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
 
-               svc_process(serv, rqstp);
+               svc_process(rqstp);
 
                /* Unlock export hash tables */
                exp_readunlock();
@@ -353,29 +455,13 @@ nfsd(struct svc_rqst *rqstp)
                        if (sigismember(&current->pending.signal, signo) &&
                            !sigismember(&current->blocked, signo))
                                break;
-               err = signo;
+               killsig = signo;
        }
-       /* Clear signals before calling lockd_down() and svc_exit_thread() */
+       /* Clear signals before calling svc_exit_thread() */
        flush_signals(current);
 
        lock_kernel();
 
-       /* Release lockd */
-       lockd_down();
-
-       /* Check if this is last thread */
-       if (serv->sv_nrthreads==1) {
-               
-               printk(KERN_WARNING "nfsd: last server has exited\n");
-               if (err != SIG_NOCLEAN) {
-                       printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
-                       nfsd_export_flush();
-               }
-               nfsd_serv = NULL;
-               nfsd_racache_shutdown();        /* release read-ahead cache */
-               nfs4_state_shutdown();
-       }
-       list_del(&me.list);
        nfsdstats.th_cnt --;
 
 out:
index c9e3b5a..443ebc5 100644 (file)
@@ -1114,7 +1114,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
         */
        if (!resfhp->fh_dentry) {
                /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
-               fh_lock(fhp);
+               fh_lock_nested(fhp, I_MUTEX_PARENT);
                dchild = lookup_one_len(fname, dentry, flen);
                err = PTR_ERR(dchild);
                if (IS_ERR(dchild))
@@ -1240,7 +1240,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
        err = nfserr_notdir;
        if(!dirp->i_op || !dirp->i_op->lookup)
                goto out;
-       fh_lock(fhp);
+       fh_lock_nested(fhp, I_MUTEX_PARENT);
 
        /*
         * Compose the response file handle.
@@ -1494,7 +1494,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
        if (isdotent(name, len))
                goto out;
 
-       fh_lock(ffhp);
+       fh_lock_nested(ffhp, I_MUTEX_PARENT);
        ddir = ffhp->fh_dentry;
        dirp = ddir->d_inode;
 
@@ -1644,7 +1644,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (err)
                goto out;
 
-       fh_lock(fhp);
+       fh_lock_nested(fhp, I_MUTEX_PARENT);
        dentry = fhp->fh_dentry;
        dirp = dentry->d_inode;
 
index 9de6b49..b1317ad 100644 (file)
@@ -163,8 +163,6 @@ int register_nls(struct nls_table * nls)
 {
        struct nls_table ** tmp = &tables;
 
-       if (!nls)
-               return -EINVAL;
        if (nls->next)
                return -EBUSY;
 
index c0e5549..25e917f 100644 (file)
@@ -162,7 +162,7 @@ static inline char * task_state(struct task_struct *p, char *buffer)
        int g;
        struct fdtable *fdt = NULL;
 
-       read_lock(&tasklist_lock);
+       rcu_read_lock();
        buffer += sprintf(buffer,
                "State:\t%s\n"
                "SleepAVG:\t%lu%%\n"
@@ -174,14 +174,13 @@ static inline char * task_state(struct task_struct *p, char *buffer)
                "Gid:\t%d\t%d\t%d\t%d\n",
                get_task_state(p),
                (p->sleep_avg/1024)*100/(1020000000/1024),
-               p->tgid,
-               p->pid, pid_alive(p) ? p->group_leader->real_parent->tgid : 0,
-               pid_alive(p) && p->ptrace ? p->parent->pid : 0,
+               p->tgid, p->pid,
+               pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0,
+               pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0,
                p->uid, p->euid, p->suid, p->fsuid,
                p->gid, p->egid, p->sgid, p->fsgid);
-       read_unlock(&tasklist_lock);
+
        task_lock(p);
-       rcu_read_lock();
        if (p->files)
                fdt = files_fdtable(p->files);
        buffer += sprintf(buffer,
@@ -244,6 +243,7 @@ static void collect_sigign_sigcatch(struct task_struct *p, sigset_t *ign,
 
 static inline char * task_sig(struct task_struct *p, char *buffer)
 {
+       unsigned long flags;
        sigset_t pending, shpending, blocked, ignored, caught;
        int num_threads = 0;
        unsigned long qsize = 0;
@@ -255,10 +255,8 @@ static inline char * task_sig(struct task_struct *p, char *buffer)
        sigemptyset(&ignored);
        sigemptyset(&caught);
 
-       /* Gather all the data with the appropriate locks held */
-       read_lock(&tasklist_lock);
-       if (p->sighand) {
-               spin_lock_irq(&p->sighand->siglock);
+       rcu_read_lock();
+       if (lock_task_sighand(p, &flags)) {
                pending = p->pending.signal;
                shpending = p->signal->shared_pending.signal;
                blocked = p->blocked;
@@ -266,9 +264,9 @@ static inline char * task_sig(struct task_struct *p, char *buffer)
                num_threads = atomic_read(&p->signal->count);
                qsize = atomic_read(&p->user->sigpending);
                qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur;
-               spin_unlock_irq(&p->sighand->siglock);
+               unlock_task_sighand(p, &flags);
        }
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
 
        buffer += sprintf(buffer, "Threads:\t%d\n", num_threads);
        buffer += sprintf(buffer, "SigQ:\t%lu/%lu\n", qsize, qlim);
@@ -322,7 +320,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
        sigset_t sigign, sigcatch;
        char state;
        int res;
-       pid_t ppid, pgid = -1, sid = -1;
+       pid_t ppid = 0, pgid = -1, sid = -1;
        int num_threads = 0;
        struct mm_struct *mm;
        unsigned long long start_time;
@@ -330,8 +328,8 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
        unsigned long  min_flt = 0,  maj_flt = 0;
        cputime_t cutime, cstime, utime, stime;
        unsigned long rsslim = 0;
-       struct task_struct *t;
        char tcomm[sizeof(task->comm)];
+       unsigned long flags;
 
        state = *get_task_state(task);
        vsize = eip = esp = 0;
@@ -349,15 +347,33 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
        cutime = cstime = utime = stime = cputime_zero;
 
        mutex_lock(&tty_mutex);
-       read_lock(&tasklist_lock);
-       if (task->sighand) {
-               spin_lock_irq(&task->sighand->siglock);
-               num_threads = atomic_read(&task->signal->count);
+       rcu_read_lock();
+       if (lock_task_sighand(task, &flags)) {
+               struct signal_struct *sig = task->signal;
+               struct tty_struct *tty = sig->tty;
+
+               if (tty) {
+                       /*
+                        * sig->tty is not stable, but tty_mutex
+                        * protects us from release_dev(tty)
+                        */
+                       barrier();
+                       tty_pgrp = tty->pgrp;
+                       tty_nr = new_encode_dev(tty_devnum(tty));
+               }
+
+               num_threads = atomic_read(&sig->count);
                collect_sigign_sigcatch(task, &sigign, &sigcatch);
 
+               cmin_flt = sig->cmin_flt;
+               cmaj_flt = sig->cmaj_flt;
+               cutime = sig->cutime;
+               cstime = sig->cstime;
+               rsslim = sig->rlim[RLIMIT_RSS].rlim_cur;
+
                /* add up live thread stats at the group level */
                if (whole) {
-                       t = task;
+                       struct task_struct *t = task;
                        do {
                                min_flt += t->min_flt;
                                maj_flt += t->maj_flt;
@@ -365,31 +381,20 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
                                stime = cputime_add(stime, t->stime);
                                t = next_thread(t);
                        } while (t != task);
-               }
 
-               spin_unlock_irq(&task->sighand->siglock);
-       }
-       if (task->signal) {
-               if (task->signal->tty) {
-                       tty_pgrp = task->signal->tty->pgrp;
-                       tty_nr = new_encode_dev(tty_devnum(task->signal->tty));
+                       min_flt += sig->min_flt;
+                       maj_flt += sig->maj_flt;
+                       utime = cputime_add(utime, sig->utime);
+                       stime = cputime_add(stime, sig->stime);
                }
+
+               sid = sig->session;
                pgid = process_group(task);
-               sid = task->signal->session;
-               cmin_flt = task->signal->cmin_flt;
-               cmaj_flt = task->signal->cmaj_flt;
-               cutime = task->signal->cutime;
-               cstime = task->signal->cstime;
-               rsslim = task->signal->rlim[RLIMIT_RSS].rlim_cur;
-               if (whole) {
-                       min_flt += task->signal->min_flt;
-                       maj_flt += task->signal->maj_flt;
-                       utime = cputime_add(utime, task->signal->utime);
-                       stime = cputime_add(stime, task->signal->stime);
-               }
+               ppid = rcu_dereference(task->real_parent)->tgid;
+
+               unlock_task_sighand(task, &flags);
        }
-       ppid = pid_alive(task) ? task->group_leader->real_parent->tgid : 0;
-       read_unlock(&tasklist_lock);
+       rcu_read_unlock();
        mutex_unlock(&tty_mutex);
 
        if (!whole || num_threads<2)
index 89c20d9..82da55b 100644 (file)
@@ -71,6 +71,7 @@
 #include <linux/cpuset.h>
 #include <linux/audit.h>
 #include <linux/poll.h>
+#include <linux/nsproxy.h>
 #include "internal.h"
 
 /* NOTE:
  *     in /proc for a task before it execs a suid executable.
  */
 
-/*
- * For hysterical raisins we keep the same inumbers as in the old procfs.
- * Feel free to change the macro below - just keep the range distinct from
- * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
- * As soon as we'll get a separate superblock we will be able to forget
- * about magical ranges too.
- */
-
-#define fake_ino(pid,ino) (((pid)<<16)|(ino))
-
-enum pid_directory_inos {
-       PROC_TGID_INO = 2,
-       PROC_TGID_TASK,
-       PROC_TGID_STATUS,
-       PROC_TGID_MEM,
-#ifdef CONFIG_SECCOMP
-       PROC_TGID_SECCOMP,
-#endif
-       PROC_TGID_CWD,
-       PROC_TGID_ROOT,
-       PROC_TGID_EXE,
-       PROC_TGID_FD,
-       PROC_TGID_ENVIRON,
-       PROC_TGID_AUXV,
-       PROC_TGID_CMDLINE,
-       PROC_TGID_STAT,
-       PROC_TGID_STATM,
-       PROC_TGID_MAPS,
-       PROC_TGID_NUMA_MAPS,
-       PROC_TGID_MOUNTS,
-       PROC_TGID_MOUNTSTATS,
-       PROC_TGID_WCHAN,
-#ifdef CONFIG_MMU
-       PROC_TGID_SMAPS,
-#endif
-#ifdef CONFIG_SCHEDSTATS
-       PROC_TGID_SCHEDSTAT,
-#endif
-#ifdef CONFIG_CPUSETS
-       PROC_TGID_CPUSET,
-#endif
-#ifdef CONFIG_SECURITY
-       PROC_TGID_ATTR,
-       PROC_TGID_ATTR_CURRENT,
-       PROC_TGID_ATTR_PREV,
-       PROC_TGID_ATTR_EXEC,
-       PROC_TGID_ATTR_FSCREATE,
-       PROC_TGID_ATTR_KEYCREATE,
-       PROC_TGID_ATTR_SOCKCREATE,
-#endif
-#ifdef CONFIG_AUDITSYSCALL
-       PROC_TGID_LOGINUID,
-#endif
-       PROC_TGID_OOM_SCORE,
-       PROC_TGID_OOM_ADJUST,
-       PROC_TID_INO,
-       PROC_TID_STATUS,
-       PROC_TID_MEM,
-#ifdef CONFIG_SECCOMP
-       PROC_TID_SECCOMP,
-#endif
-       PROC_TID_CWD,
-       PROC_TID_ROOT,
-       PROC_TID_EXE,
-       PROC_TID_FD,
-       PROC_TID_ENVIRON,
-       PROC_TID_AUXV,
-       PROC_TID_CMDLINE,
-       PROC_TID_STAT,
-       PROC_TID_STATM,
-       PROC_TID_MAPS,
-       PROC_TID_NUMA_MAPS,
-       PROC_TID_MOUNTS,
-       PROC_TID_MOUNTSTATS,
-       PROC_TID_WCHAN,
-#ifdef CONFIG_MMU
-       PROC_TID_SMAPS,
-#endif
-#ifdef CONFIG_SCHEDSTATS
-       PROC_TID_SCHEDSTAT,
-#endif
-#ifdef CONFIG_CPUSETS
-       PROC_TID_CPUSET,
-#endif
-#ifdef CONFIG_SECURITY
-       PROC_TID_ATTR,
-       PROC_TID_ATTR_CURRENT,
-       PROC_TID_ATTR_PREV,
-       PROC_TID_ATTR_EXEC,
-       PROC_TID_ATTR_FSCREATE,
-       PROC_TID_ATTR_KEYCREATE,
-       PROC_TID_ATTR_SOCKCREATE,
-#endif
-#ifdef CONFIG_AUDITSYSCALL
-       PROC_TID_LOGINUID,
-#endif
-       PROC_TID_OOM_SCORE,
-       PROC_TID_OOM_ADJUST,
-
-       /* Add new entries before this */
-       PROC_TID_FD_DIR = 0x8000,       /* 0x8000-0xffff */
-};
 
 /* Worst case buffer size needed for holding an integer. */
 #define PROC_NUMBUF 10
 
 struct pid_entry {
-       int type;
        int len;
        char *name;
        mode_t mode;
+       struct inode_operations *iop;
+       struct file_operations *fop;
+       union proc_op op;
 };
 
-#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
-
-static struct pid_entry tgid_base_stuff[] = {
-       E(PROC_TGID_TASK,      "task",    S_IFDIR|S_IRUGO|S_IXUGO),
-       E(PROC_TGID_FD,        "fd",      S_IFDIR|S_IRUSR|S_IXUSR),
-       E(PROC_TGID_ENVIRON,   "environ", S_IFREG|S_IRUSR),
-       E(PROC_TGID_AUXV,      "auxv",    S_IFREG|S_IRUSR),
-       E(PROC_TGID_STATUS,    "status",  S_IFREG|S_IRUGO),
-       E(PROC_TGID_CMDLINE,   "cmdline", S_IFREG|S_IRUGO),
-       E(PROC_TGID_STAT,      "stat",    S_IFREG|S_IRUGO),
-       E(PROC_TGID_STATM,     "statm",   S_IFREG|S_IRUGO),
-       E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUGO),
-#ifdef CONFIG_NUMA
-       E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
-#endif
-       E(PROC_TGID_MEM,       "mem",     S_IFREG|S_IRUSR|S_IWUSR),
-#ifdef CONFIG_SECCOMP
-       E(PROC_TGID_SECCOMP,   "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
-#endif
-       E(PROC_TGID_CWD,       "cwd",     S_IFLNK|S_IRWXUGO),
-       E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
-       E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
-       E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
-       E(PROC_TGID_MOUNTSTATS, "mountstats", S_IFREG|S_IRUSR),
-#ifdef CONFIG_MMU
-       E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUGO),
-#endif
-#ifdef CONFIG_SECURITY
-       E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
-#endif
-#ifdef CONFIG_KALLSYMS
-       E(PROC_TGID_WCHAN,     "wchan",   S_IFREG|S_IRUGO),
-#endif
-#ifdef CONFIG_SCHEDSTATS
-       E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO),
-#endif
-#ifdef CONFIG_CPUSETS
-       E(PROC_TGID_CPUSET,    "cpuset",  S_IFREG|S_IRUGO),
-#endif
-       E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
-       E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
-#ifdef CONFIG_AUDITSYSCALL
-       E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
-#endif
-       {0,0,NULL,0}
-};
-static struct pid_entry tid_base_stuff[] = {
-       E(PROC_TID_FD,         "fd",      S_IFDIR|S_IRUSR|S_IXUSR),
-       E(PROC_TID_ENVIRON,    "environ", S_IFREG|S_IRUSR),
-       E(PROC_TID_AUXV,       "auxv",    S_IFREG|S_IRUSR),
-       E(PROC_TID_STATUS,     "status",  S_IFREG|S_IRUGO),
-       E(PROC_TID_CMDLINE,    "cmdline", S_IFREG|S_IRUGO),
-       E(PROC_TID_STAT,       "stat",    S_IFREG|S_IRUGO),
-       E(PROC_TID_STATM,      "statm",   S_IFREG|S_IRUGO),
-       E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUGO),
-#ifdef CONFIG_NUMA
-       E(PROC_TID_NUMA_MAPS,  "numa_maps",    S_IFREG|S_IRUGO),
-#endif
-       E(PROC_TID_MEM,        "mem",     S_IFREG|S_IRUSR|S_IWUSR),
-#ifdef CONFIG_SECCOMP
-       E(PROC_TID_SECCOMP,    "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
-#endif
-       E(PROC_TID_CWD,        "cwd",     S_IFLNK|S_IRWXUGO),
-       E(PROC_TID_ROOT,       "root",    S_IFLNK|S_IRWXUGO),
-       E(PROC_TID_EXE,        "exe",     S_IFLNK|S_IRWXUGO),
-       E(PROC_TID_MOUNTS,     "mounts",  S_IFREG|S_IRUGO),
-#ifdef CONFIG_MMU
-       E(PROC_TID_SMAPS,      "smaps",   S_IFREG|S_IRUGO),
-#endif
-#ifdef CONFIG_SECURITY
-       E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
-#endif
-#ifdef CONFIG_KALLSYMS
-       E(PROC_TID_WCHAN,      "wchan",   S_IFREG|S_IRUGO),
-#endif
-#ifdef CONFIG_SCHEDSTATS
-       E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO),
-#endif
-#ifdef CONFIG_CPUSETS
-       E(PROC_TID_CPUSET,     "cpuset",  S_IFREG|S_IRUGO),
-#endif
-       E(PROC_TID_OOM_SCORE,  "oom_score",S_IFREG|S_IRUGO),
-       E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
-#ifdef CONFIG_AUDITSYSCALL
-       E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
-#endif
-       {0,0,NULL,0}
-};
-
-#ifdef CONFIG_SECURITY
-static struct pid_entry tgid_attr_stuff[] = {
-       E(PROC_TGID_ATTR_CURRENT,  "current",  S_IFREG|S_IRUGO|S_IWUGO),
-       E(PROC_TGID_ATTR_PREV,     "prev",     S_IFREG|S_IRUGO),
-       E(PROC_TGID_ATTR_EXEC,     "exec",     S_IFREG|S_IRUGO|S_IWUGO),
-       E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
-       E(PROC_TGID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
-       E(PROC_TGID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
-       {0,0,NULL,0}
-};
-static struct pid_entry tid_attr_stuff[] = {
-       E(PROC_TID_ATTR_CURRENT,   "current",  S_IFREG|S_IRUGO|S_IWUGO),
-       E(PROC_TID_ATTR_PREV,      "prev",     S_IFREG|S_IRUGO),
-       E(PROC_TID_ATTR_EXEC,      "exec",     S_IFREG|S_IRUGO|S_IWUGO),
-       E(PROC_TID_ATTR_FSCREATE,  "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
-       E(PROC_TID_ATTR_KEYCREATE, "keycreate", S_IFREG|S_IRUGO|S_IWUGO),
-       E(PROC_TID_ATTR_SOCKCREATE, "sockcreate", S_IFREG|S_IRUGO|S_IWUGO),
-       {0,0,NULL,0}
-};
-#endif
-
-#undef E
-
-static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
-{
-       struct task_struct *task = get_proc_task(inode);
-       struct files_struct *files = NULL;
-       struct file *file;
-       int fd = proc_fd(inode);
-
-       if (task) {
-               files = get_files_struct(task);
-               put_task_struct(task);
-       }
-       if (files) {
-               /*
-                * We are not taking a ref to the file structure, so we must
-                * hold ->file_lock.
-                */
-               spin_lock(&files->file_lock);
-               file = fcheck_files(files, fd);
-               if (file) {
-                       *mnt = mntget(file->f_vfsmnt);
-                       *dentry = dget(file->f_dentry);
-                       spin_unlock(&files->file_lock);
-                       put_files_struct(files);
-                       return 0;
-               }
-               spin_unlock(&files->file_lock);
-               put_files_struct(files);
-       }
-       return -ENOENT;
+#define NOD(NAME, MODE, IOP, FOP, OP) {                        \
+       .len  = sizeof(NAME) - 1,                       \
+       .name = (NAME),                                 \
+       .mode = MODE,                                   \
+       .iop  = IOP,                                    \
+       .fop  = FOP,                                    \
+       .op   = OP,                                     \
 }
 
+#define DIR(NAME, MODE, OTYPE)                                                 \
+       NOD(NAME, (S_IFDIR|(MODE)),                                             \
+               &proc_##OTYPE##_inode_operations, &proc_##OTYPE##_operations,   \
+               {} )
+#define LNK(NAME, OTYPE)                                       \
+       NOD(NAME, (S_IFLNK|S_IRWXUGO),                          \
+               &proc_pid_link_inode_operations, NULL,          \
+               { .proc_get_link = &proc_##OTYPE##_link } )
+#define REG(NAME, MODE, OTYPE)                         \
+       NOD(NAME, (S_IFREG|(MODE)), NULL,               \
+               &proc_##OTYPE##_operations, {})
+#define INF(NAME, MODE, OTYPE)                         \
+       NOD(NAME, (S_IFREG|(MODE)),                     \
+               NULL, &proc_info_file_operations,       \
+               { .proc_read = &proc_##OTYPE } )
+
 static struct fs_struct *get_fs_struct(struct task_struct *task)
 {
        struct fs_struct *fs;
@@ -587,7 +370,7 @@ static int mounts_open(struct inode *inode, struct file *file)
 
        if (task) {
                task_lock(task);
-               namespace = task->namespace;
+               namespace = task->nsproxy->namespace;
                if (namespace)
                        get_namespace(namespace);
                task_unlock(task);
@@ -658,7 +441,7 @@ static int mountstats_open(struct inode *inode, struct file *file)
 
                if (task) {
                        task_lock(task);
-                       namespace = task->namespace;
+                       namespace = task->nsproxy->namespace;
                        if (namespace)
                                get_namespace(namespace);
                        task_unlock(task);
@@ -1137,143 +920,6 @@ static struct inode_operations proc_pid_link_inode_operations = {
        .setattr        = proc_setattr,
 };
 
-static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
-{
-       struct dentry *dentry = filp->f_dentry;
-       struct inode *inode = dentry->d_inode;
-       struct task_struct *p = get_proc_task(inode);
-       unsigned int fd, tid, ino;
-       int retval;
-       char buf[PROC_NUMBUF];
-       struct files_struct * files;
-       struct fdtable *fdt;
-
-       retval = -ENOENT;
-       if (!p)
-               goto out_no_task;
-       retval = 0;
-       tid = p->pid;
-
-       fd = filp->f_pos;
-       switch (fd) {
-               case 0:
-                       if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
-                               goto out;
-                       filp->f_pos++;
-               case 1:
-                       ino = parent_ino(dentry);
-                       if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
-                               goto out;
-                       filp->f_pos++;
-               default:
-                       files = get_files_struct(p);
-                       if (!files)
-                               goto out;
-                       rcu_read_lock();
-                       fdt = files_fdtable(files);
-                       for (fd = filp->f_pos-2;
-                            fd < fdt->max_fds;
-                            fd++, filp->f_pos++) {
-                               unsigned int i,j;
-
-                               if (!fcheck_files(files, fd))
-                                       continue;
-                               rcu_read_unlock();
-
-                               j = PROC_NUMBUF;
-                               i = fd;
-                               do {
-                                       j--;
-                                       buf[j] = '0' + (i % 10);
-                                       i /= 10;
-                               } while (i);
-
-                               ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
-                               if (filldir(dirent, buf+j, PROC_NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
-                                       rcu_read_lock();
-                                       break;
-                               }
-                               rcu_read_lock();
-                       }
-                       rcu_read_unlock();
-                       put_files_struct(files);
-       }
-out:
-       put_task_struct(p);
-out_no_task:
-       return retval;
-}
-
-static int proc_pident_readdir(struct file *filp,
-               void *dirent, filldir_t filldir,
-               struct pid_entry *ents, unsigned int nents)
-{
-       int i;
-       int pid;
-       struct dentry *dentry = filp->f_dentry;
-       struct inode *inode = dentry->d_inode;
-       struct task_struct *task = get_proc_task(inode);
-       struct pid_entry *p;
-       ino_t ino;
-       int ret;
-
-       ret = -ENOENT;
-       if (!task)
-               goto out;
-
-       ret = 0;
-       pid = task->pid;
-       put_task_struct(task);
-       i = filp->f_pos;
-       switch (i) {
-       case 0:
-               ino = inode->i_ino;
-               if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
-                       goto out;
-               i++;
-               filp->f_pos++;
-               /* fall through */
-       case 1:
-               ino = parent_ino(dentry);
-               if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
-                       goto out;
-               i++;
-               filp->f_pos++;
-               /* fall through */
-       default:
-               i -= 2;
-               if (i >= nents) {
-                       ret = 1;
-                       goto out;
-               }
-               p = ents + i;
-               while (p->name) {
-                       if (filldir(dirent, p->name, p->len, filp->f_pos,
-                                   fake_ino(pid, p->type), p->mode >> 12) < 0)
-                               goto out;
-                       filp->f_pos++;
-                       p++;
-               }
-       }
-
-       ret = 1;
-out:
-       return ret;
-}
-
-static int proc_tgid_base_readdir(struct file * filp,
-                            void * dirent, filldir_t filldir)
-{
-       return proc_pident_readdir(filp,dirent,filldir,
-                                  tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
-}
-
-static int proc_tid_base_readdir(struct file * filp,
-                            void * dirent, filldir_t filldir)
-{
-       return proc_pident_readdir(filp,dirent,filldir,
-                                  tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
-}
 
 /* building an inode */
 
@@ -1293,13 +939,13 @@ static int task_dumpable(struct task_struct *task)
 }
 
 
-static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
+static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task)
 {
        struct inode * inode;
        struct proc_inode *ei;
 
        /* We need a new inode */
-       
+
        inode = new_inode(sb);
        if (!inode)
                goto out;
@@ -1307,13 +953,12 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
        /* Common stuff */
        ei = PROC_I(inode);
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-       inode->i_ino = fake_ino(task->pid, ino);
        inode->i_op = &proc_def_inode_operations;
 
        /*
         * grab the reference to task.
         */
-       ei->pid = get_pid(task->pids[PIDTYPE_PID].pid);
+       ei->pid = get_task_pid(task, PIDTYPE_PID);
        if (!ei->pid)
                goto out_unlock;
 
@@ -1333,6 +978,27 @@ out_unlock:
        return NULL;
 }
 
+static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+       struct inode *inode = dentry->d_inode;
+       struct task_struct *task;
+       generic_fillattr(inode, stat);
+
+       rcu_read_lock();
+       stat->uid = 0;
+       stat->gid = 0;
+       task = pid_task(proc_pid(inode), PIDTYPE_PID);
+       if (task) {
+               if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
+                   task_dumpable(task)) {
+                       stat->uid = task->euid;
+                       stat->gid = task->egid;
+               }
+       }
+       rcu_read_unlock();
+       return 0;
+}
+
 /* dentry stuff */
 
 /*
@@ -1372,25 +1038,130 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
        return 0;
 }
 
-static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+static int pid_delete_dentry(struct dentry * dentry)
 {
-       struct inode *inode = dentry->d_inode;
-       struct task_struct *task;
-       generic_fillattr(inode, stat);
-
-       rcu_read_lock();
-       stat->uid = 0;
-       stat->gid = 0;
-       task = pid_task(proc_pid(inode), PIDTYPE_PID);
+       /* Is the task we represent dead?
+        * If so, then don't put the dentry on the lru list,
+        * kill it immediately.
+        */
+       return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+}
+
+static struct dentry_operations pid_dentry_operations =
+{
+       .d_revalidate   = pid_revalidate,
+       .d_delete       = pid_delete_dentry,
+};
+
+/* Lookups */
+
+typedef struct dentry *instantiate_t(struct inode *, struct dentry *, struct task_struct *, void *);
+
+/*
+ * Fill a directory entry.
+ *
+ * If possible create the dcache entry and derive our inode number and
+ * file type from dcache entry.
+ *
+ * Since all of the proc inode numbers are dynamically generated, the inode
+ * numbers do not exist until the inode is cache.  This means creating the
+ * the dcache entry in readdir is necessary to keep the inode numbers
+ * reported by readdir in sync with the inode numbers reported
+ * by stat.
+ */
+static int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+       char *name, int len,
+       instantiate_t instantiate, struct task_struct *task, void *ptr)
+{
+       struct dentry *child, *dir = filp->f_dentry;
+       struct inode *inode;
+       struct qstr qname;
+       ino_t ino = 0;
+       unsigned type = DT_UNKNOWN;
+
+       qname.name = name;
+       qname.len  = len;
+       qname.hash = full_name_hash(name, len);
+
+       child = d_lookup(dir, &qname);
+       if (!child) {
+               struct dentry *new;
+               new = d_alloc(dir, &qname);
+               if (new) {
+                       child = instantiate(dir->d_inode, new, task, ptr);
+                       if (child)
+                               dput(new);
+                       else
+                               child = new;
+               }
+       }
+       if (!child || IS_ERR(child) || !child->d_inode)
+               goto end_instantiate;
+       inode = child->d_inode;
+       if (inode) {
+               ino = inode->i_ino;
+               type = inode->i_mode >> 12;
+       }
+       dput(child);
+end_instantiate:
+       if (!ino)
+               ino = find_inode_number(dir, &qname);
+       if (!ino)
+               ino = 1;
+       return filldir(dirent, name, len, filp->f_pos, ino, type);
+}
+
+static unsigned name_to_int(struct dentry *dentry)
+{
+       const char *name = dentry->d_name.name;
+       int len = dentry->d_name.len;
+       unsigned n = 0;
+
+       if (len > 1 && *name == '0')
+               goto out;
+       while (len-- > 0) {
+               unsigned c = *name++ - '0';
+               if (c > 9)
+                       goto out;
+               if (n >= (~0U-9)/10)
+                       goto out;
+               n *= 10;
+               n += c;
+       }
+       return n;
+out:
+       return ~0U;
+}
+
+static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+{
+       struct task_struct *task = get_proc_task(inode);
+       struct files_struct *files = NULL;
+       struct file *file;
+       int fd = proc_fd(inode);
+
        if (task) {
-               if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) ||
-                   task_dumpable(task)) {
-                       stat->uid = task->euid;
-                       stat->gid = task->egid;
+               files = get_files_struct(task);
+               put_task_struct(task);
+       }
+       if (files) {
+               /*
+                * We are not taking a ref to the file structure, so we must
+                * hold ->file_lock.
+                */
+               spin_lock(&files->file_lock);
+               file = fcheck_files(files, fd);
+               if (file) {
+                       *mnt = mntget(file->f_vfsmnt);
+                       *dentry = dget(file->f_dentry);
+                       spin_unlock(&files->file_lock);
+                       put_files_struct(files);
+                       return 0;
                }
+               spin_unlock(&files->file_lock);
+               put_files_struct(files);
        }
-       rcu_read_unlock();
-       return 0;
+       return -ENOENT;
 }
 
 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
@@ -1428,75 +1199,30 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
        return 0;
 }
 
-static int pid_delete_dentry(struct dentry * dentry)
-{
-       /* Is the task we represent dead?
-        * If so, then don't put the dentry on the lru list,
-        * kill it immediately.
-        */
-       return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
-}
-
 static struct dentry_operations tid_fd_dentry_operations =
 {
        .d_revalidate   = tid_fd_revalidate,
        .d_delete       = pid_delete_dentry,
 };
 
-static struct dentry_operations pid_dentry_operations =
-{
-       .d_revalidate   = pid_revalidate,
-       .d_delete       = pid_delete_dentry,
-};
-
-/* Lookups */
-
-static unsigned name_to_int(struct dentry *dentry)
-{
-       const char *name = dentry->d_name.name;
-       int len = dentry->d_name.len;
-       unsigned n = 0;
-
-       if (len > 1 && *name == '0')
-               goto out;
-       while (len-- > 0) {
-               unsigned c = *name++ - '0';
-               if (c > 9)
-                       goto out;
-               if (n >= (~0U-9)/10)
-                       goto out;
-               n *= 10;
-               n += c;
-       }
-       return n;
-out:
-       return ~0U;
-}
-
-/* SMP-safe */
-static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
+static struct dentry *proc_fd_instantiate(struct inode *dir,
+       struct dentry *dentry, struct task_struct *task, void *ptr)
 {
-       struct task_struct *task = get_proc_task(dir);
-       unsigned fd = name_to_int(dentry);
-       struct dentry *result = ERR_PTR(-ENOENT);
-       struct file * file;
-       struct files_struct * files;
-       struct inode *inode;
-       struct proc_inode *ei;
-
-       if (!task)
-               goto out_no_task;
-       if (fd == ~0U)
-               goto out;
+       unsigned fd = *(unsigned *)ptr;
+       struct file *file;
+       struct files_struct *files;
+       struct inode *inode;
+       struct proc_inode *ei;
+       struct dentry *error = ERR_PTR(-ENOENT);
 
-       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
+       inode = proc_pid_make_inode(dir->i_sb, task);
        if (!inode)
                goto out;
        ei = PROC_I(inode);
        ei->fd = fd;
        files = get_files_struct(task);
        if (!files)
-               goto out_unlock;
+               goto out_iput;
        inode->i_mode = S_IFLNK;
 
        /*
@@ -1506,13 +1232,14 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
        spin_lock(&files->file_lock);
        file = fcheck_files(files, fd);
        if (!file)
-               goto out_unlock2;
+               goto out_unlock;
        if (file->f_mode & 1)
                inode->i_mode |= S_IRUSR | S_IXUSR;
        if (file->f_mode & 2)
                inode->i_mode |= S_IWUSR | S_IXUSR;
        spin_unlock(&files->file_lock);
        put_files_struct(files);
+
        inode->i_op = &proc_pid_link_inode_operations;
        inode->i_size = 64;
        ei->op.proc_get_link = proc_fd_link;
@@ -1520,34 +1247,106 @@ static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry,
        d_add(dentry, inode);
        /* Close the race of the process dying before we return the dentry */
        if (tid_fd_revalidate(dentry, NULL))
-               result = NULL;
-out:
-       put_task_struct(task);
-out_no_task:
-       return result;
+               error = NULL;
 
-out_unlock2:
+ out:
+       return error;
+out_unlock:
        spin_unlock(&files->file_lock);
        put_files_struct(files);
-out_unlock:
+out_iput:
        iput(inode);
        goto out;
 }
 
-static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir);
-static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd);
-static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
+static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
+{
+       struct task_struct *task = get_proc_task(dir);
+       unsigned fd = name_to_int(dentry);
+       struct dentry *result = ERR_PTR(-ENOENT);
+
+       if (!task)
+               goto out_no_task;
+       if (fd == ~0U)
+               goto out;
+
+       result = proc_fd_instantiate(dir, dentry, task, &fd);
+out:
+       put_task_struct(task);
+out_no_task:
+       return result;
+}
+
+static int proc_fd_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+       struct task_struct *task, int fd)
+{
+       char name[PROC_NUMBUF];
+       int len = snprintf(name, sizeof(name), "%d", fd);
+       return proc_fill_cache(filp, dirent, filldir, name, len,
+                               proc_fd_instantiate, task, &fd);
+}
+
+static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
+{
+       struct dentry *dentry = filp->f_dentry;
+       struct inode *inode = dentry->d_inode;
+       struct task_struct *p = get_proc_task(inode);
+       unsigned int fd, tid, ino;
+       int retval;
+       struct files_struct * files;
+       struct fdtable *fdt;
+
+       retval = -ENOENT;
+       if (!p)
+               goto out_no_task;
+       retval = 0;
+       tid = p->pid;
+
+       fd = filp->f_pos;
+       switch (fd) {
+               case 0:
+                       if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
+                               goto out;
+                       filp->f_pos++;
+               case 1:
+                       ino = parent_ino(dentry);
+                       if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+                               goto out;
+                       filp->f_pos++;
+               default:
+                       files = get_files_struct(p);
+                       if (!files)
+                               goto out;
+                       rcu_read_lock();
+                       fdt = files_fdtable(files);
+                       for (fd = filp->f_pos-2;
+                            fd < fdt->max_fds;
+                            fd++, filp->f_pos++) {
+
+                               if (!fcheck_files(files, fd))
+                                       continue;
+                               rcu_read_unlock();
+
+                               if (proc_fd_fill_cache(filp, dirent, filldir, p, fd) < 0) {
+                                       rcu_read_lock();
+                                       break;
+                               }
+                               rcu_read_lock();
+                       }
+                       rcu_read_unlock();
+                       put_files_struct(files);
+       }
+out:
+       put_task_struct(p);
+out_no_task:
+       return retval;
+}
 
 static struct file_operations proc_fd_operations = {
        .read           = generic_read_dir,
        .readdir        = proc_readfd,
 };
 
-static struct file_operations proc_task_operations = {
-       .read           = generic_read_dir,
-       .readdir        = proc_task_readdir,
-};
-
 /*
  * proc directories can do almost nothing..
  */
@@ -1556,36 +1355,162 @@ static struct inode_operations proc_fd_inode_operations = {
        .setattr        = proc_setattr,
 };
 
-static struct inode_operations proc_task_inode_operations = {
-       .lookup         = proc_task_lookup,
-       .getattr        = proc_task_getattr,
-       .setattr        = proc_setattr,
-};
-
-#ifdef CONFIG_SECURITY
-static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
-                                 size_t count, loff_t *ppos)
+static struct dentry *proc_pident_instantiate(struct inode *dir,
+       struct dentry *dentry, struct task_struct *task, void *ptr)
 {
-       struct inode * inode = file->f_dentry->d_inode;
-       unsigned long page;
-       ssize_t length;
-       struct task_struct *task = get_proc_task(inode);
-
-       length = -ESRCH;
-       if (!task)
-               goto out_no_task;
+       struct pid_entry *p = ptr;
+       struct inode *inode;
+       struct proc_inode *ei;
+       struct dentry *error = ERR_PTR(-EINVAL);
 
-       if (count > PAGE_SIZE)
-               count = PAGE_SIZE;
-       length = -ENOMEM;
-       if (!(page = __get_free_page(GFP_KERNEL)))
+       inode = proc_pid_make_inode(dir->i_sb, task);
+       if (!inode)
                goto out;
 
-       length = security_getprocattr(task, 
-                                     (char*)file->f_dentry->d_name.name, 
-                                     (void*)page, count);
-       if (length >= 0)
-               length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
+       ei = PROC_I(inode);
+       inode->i_mode = p->mode;
+       if (S_ISDIR(inode->i_mode))
+               inode->i_nlink = 2;     /* Use getattr to fix if necessary */
+       if (p->iop)
+               inode->i_op = p->iop;
+       if (p->fop)
+               inode->i_fop = p->fop;
+       ei->op = p->op;
+       dentry->d_op = &pid_dentry_operations;
+       d_add(dentry, inode);
+       /* Close the race of the process dying before we return the dentry */
+       if (pid_revalidate(dentry, NULL))
+               error = NULL;
+out:
+       return error;
+}
+
+static struct dentry *proc_pident_lookup(struct inode *dir, 
+                                        struct dentry *dentry,
+                                        struct pid_entry *ents,
+                                        unsigned int nents)
+{
+       struct inode *inode;
+       struct dentry *error;
+       struct task_struct *task = get_proc_task(dir);
+       struct pid_entry *p, *last;
+
+       error = ERR_PTR(-ENOENT);
+       inode = NULL;
+
+       if (!task)
+               goto out_no_task;
+
+       /*
+        * Yes, it does not scale. And it should not. Don't add
+        * new entries into /proc/<tgid>/ without very good reasons.
+        */
+       last = &ents[nents - 1];
+       for (p = ents; p <= last; p++) {
+               if (p->len != dentry->d_name.len)
+                       continue;
+               if (!memcmp(dentry->d_name.name, p->name, p->len))
+                       break;
+       }
+       if (p > last)
+               goto out;
+
+       error = proc_pident_instantiate(dir, dentry, task, p);
+out:
+       put_task_struct(task);
+out_no_task:
+       return error;
+}
+
+static int proc_pident_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+       struct task_struct *task, struct pid_entry *p)
+{
+       return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
+                               proc_pident_instantiate, task, p);
+}
+
+static int proc_pident_readdir(struct file *filp,
+               void *dirent, filldir_t filldir,
+               struct pid_entry *ents, unsigned int nents)
+{
+       int i;
+       int pid;
+       struct dentry *dentry = filp->f_dentry;
+       struct inode *inode = dentry->d_inode;
+       struct task_struct *task = get_proc_task(inode);
+       struct pid_entry *p, *last;
+       ino_t ino;
+       int ret;
+
+       ret = -ENOENT;
+       if (!task)
+               goto out_no_task;
+
+       ret = 0;
+       pid = task->pid;
+       i = filp->f_pos;
+       switch (i) {
+       case 0:
+               ino = inode->i_ino;
+               if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
+                       goto out;
+               i++;
+               filp->f_pos++;
+               /* fall through */
+       case 1:
+               ino = parent_ino(dentry);
+               if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
+                       goto out;
+               i++;
+               filp->f_pos++;
+               /* fall through */
+       default:
+               i -= 2;
+               if (i >= nents) {
+                       ret = 1;
+                       goto out;
+               }
+               p = ents + i;
+               last = &ents[nents - 1];
+               while (p <= last) {
+                       if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0)
+                               goto out;
+                       filp->f_pos++;
+                       p++;
+               }
+       }
+
+       ret = 1;
+out:
+       put_task_struct(task);
+out_no_task:
+       return ret;
+}
+
+#ifdef CONFIG_SECURITY
+static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
+                                 size_t count, loff_t *ppos)
+{
+       struct inode * inode = file->f_dentry->d_inode;
+       unsigned long page;
+       ssize_t length;
+       struct task_struct *task = get_proc_task(inode);
+
+       length = -ESRCH;
+       if (!task)
+               goto out_no_task;
+
+       if (count > PAGE_SIZE)
+               count = PAGE_SIZE;
+       length = -ENOMEM;
+       if (!(page = __get_free_page(GFP_KERNEL)))
+               goto out;
+
+       length = security_getprocattr(task,
+                                     (char*)file->f_dentry->d_name.name,
+                                     (void*)page, count);
+       if (length >= 0)
+               length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
        free_page(page);
 out:
        put_task_struct(task);
@@ -1595,17 +1520,17 @@ out_no_task:
 
 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
                                   size_t count, loff_t *ppos)
-{ 
+{
        struct inode * inode = file->f_dentry->d_inode;
-       char *page; 
-       ssize_t length; 
+       char *page;
+       ssize_t length;
        struct task_struct *task = get_proc_task(inode);
 
        length = -ESRCH;
        if (!task)
                goto out_no_task;
-       if (count > PAGE_SIZE) 
-               count = PAGE_SIZE; 
+       if (count > PAGE_SIZE)
+               count = PAGE_SIZE;
 
        /* No partial writes. */
        length = -EINVAL;
@@ -1613,16 +1538,16 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
                goto out;
 
        length = -ENOMEM;
-       page = (char*)__get_free_page(GFP_USER); 
-       if (!page) 
+       page = (char*)__get_free_page(GFP_USER);
+       if (!page)
                goto out;
 
-       length = -EFAULT; 
-       if (copy_from_user(page, buf, count)) 
+       length = -EFAULT;
+       if (copy_from_user(page, buf, count))
                goto out_free;
 
-       length = security_setprocattr(task, 
-                                     (char*)file->f_dentry->d_name.name, 
+       length = security_setprocattr(task,
+                                     (char*)file->f_dentry->d_name.name,
                                      (void*)page, count);
 out_free:
        free_page((unsigned long) page);
@@ -1630,329 +1555,262 @@ out:
        put_task_struct(task);
 out_no_task:
        return length;
-} 
+}
 
 static struct file_operations proc_pid_attr_operations = {
        .read           = proc_pid_attr_read,
        .write          = proc_pid_attr_write,
 };
 
-static struct file_operations proc_tid_attr_operations;
-static struct inode_operations proc_tid_attr_inode_operations;
-static struct file_operations proc_tgid_attr_operations;
-static struct inode_operations proc_tgid_attr_inode_operations;
+static struct pid_entry attr_dir_stuff[] = {
+       REG("current",    S_IRUGO|S_IWUGO, pid_attr),
+       REG("prev",       S_IRUGO,         pid_attr),
+       REG("exec",       S_IRUGO|S_IWUGO, pid_attr),
+       REG("fscreate",   S_IRUGO|S_IWUGO, pid_attr),
+       REG("keycreate",  S_IRUGO|S_IWUGO, pid_attr),
+       REG("sockcreate", S_IRUGO|S_IWUGO, pid_attr),
+};
+
+static int proc_attr_dir_readdir(struct file * filp,
+                            void * dirent, filldir_t filldir)
+{
+       return proc_pident_readdir(filp,dirent,filldir,
+                                  attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
+}
+
+static struct file_operations proc_attr_dir_operations = {
+       .read           = generic_read_dir,
+       .readdir        = proc_attr_dir_readdir,
+};
+
+static struct dentry *proc_attr_dir_lookup(struct inode *dir,
+                               struct dentry *dentry, struct nameidata *nd)
+{
+       return proc_pident_lookup(dir, dentry,
+                                 attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
+}
+
+static struct inode_operations proc_attr_dir_inode_operations = {
+       .lookup         = proc_attr_dir_lookup,
+       .getattr        = pid_getattr,
+       .setattr        = proc_setattr,
+};
+
 #endif
 
-/* SMP-safe */
-static struct dentry *proc_pident_lookup(struct inode *dir, 
-                                        struct dentry *dentry,
-                                        struct pid_entry *ents)
+/*
+ * /proc/self:
+ */
+static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
+                             int buflen)
 {
+       char tmp[PROC_NUMBUF];
+       sprintf(tmp, "%d", current->tgid);
+       return vfs_readlink(dentry,buffer,buflen,tmp);
+}
+
+static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+       char tmp[PROC_NUMBUF];
+       sprintf(tmp, "%d", current->tgid);
+       return ERR_PTR(vfs_follow_link(nd,tmp));
+}
+
+static struct inode_operations proc_self_inode_operations = {
+       .readlink       = proc_self_readlink,
+       .follow_link    = proc_self_follow_link,
+};
+
+/*
+ * proc base
+ *
+ * These are the directory entries in the root directory of /proc
+ * that properly belong to the /proc filesystem, as they describe
+ * describe something that is process related.
+ */
+static struct pid_entry proc_base_stuff[] = {
+       NOD("self", S_IFLNK|S_IRWXUGO,
+               &proc_self_inode_operations, NULL, {}),
+};
+
+/*
+ *     Exceptional case: normally we are not allowed to unhash a busy
+ * directory. In this case, however, we can do it - no aliasing problems
+ * due to the way we treat inodes.
+ */
+static int proc_base_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+       struct inode *inode = dentry->d_inode;
+       struct task_struct *task = get_proc_task(inode);
+       if (task) {
+               put_task_struct(task);
+               return 1;
+       }
+       d_drop(dentry);
+       return 0;
+}
+
+static struct dentry_operations proc_base_dentry_operations =
+{
+       .d_revalidate   = proc_base_revalidate,
+       .d_delete       = pid_delete_dentry,
+};
+
+static struct dentry *proc_base_instantiate(struct inode *dir,
+       struct dentry *dentry, struct task_struct *task, void *ptr)
+{
+       struct pid_entry *p = ptr;
        struct inode *inode;
+       struct proc_inode *ei;
+       struct dentry *error = ERR_PTR(-EINVAL);
+
+       /* Allocate the inode */
+       error = ERR_PTR(-ENOMEM);
+       inode = new_inode(dir->i_sb);
+       if (!inode)
+               goto out;
+
+       /* Initialize the inode */
+       ei = PROC_I(inode);
+       inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+
+       /*
+        * grab the reference to the task.
+        */
+       ei->pid = get_task_pid(task, PIDTYPE_PID);
+       if (!ei->pid)
+               goto out_iput;
+
+       inode->i_uid = 0;
+       inode->i_gid = 0;
+       inode->i_mode = p->mode;
+       if (S_ISDIR(inode->i_mode))
+               inode->i_nlink = 2;
+       if (S_ISLNK(inode->i_mode))
+               inode->i_size = 64;
+       if (p->iop)
+               inode->i_op = p->iop;
+       if (p->fop)
+               inode->i_fop = p->fop;
+       ei->op = p->op;
+       dentry->d_op = &proc_base_dentry_operations;
+       d_add(dentry, inode);
+       error = NULL;
+out:
+       return error;
+out_iput:
+       iput(inode);
+       goto out;
+}
+
+static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
+{
        struct dentry *error;
        struct task_struct *task = get_proc_task(dir);
-       struct pid_entry *p;
-       struct proc_inode *ei;
+       struct pid_entry *p, *last;
 
        error = ERR_PTR(-ENOENT);
-       inode = NULL;
 
        if (!task)
                goto out_no_task;
 
-       for (p = ents; p->name; p++) {
+       /* Lookup the directory entry */
+       last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1];
+       for (p = proc_base_stuff; p <= last; p++) {
                if (p->len != dentry->d_name.len)
                        continue;
                if (!memcmp(dentry->d_name.name, p->name, p->len))
                        break;
        }
-       if (!p->name)
+       if (p > last)
                goto out;
 
-       error = ERR_PTR(-EINVAL);
-       inode = proc_pid_make_inode(dir->i_sb, task, p->type);
-       if (!inode)
-               goto out;
+       error = proc_base_instantiate(dir, dentry, task, p);
 
-       ei = PROC_I(inode);
-       inode->i_mode = p->mode;
-       /*
-        * Yes, it does not scale. And it should not. Don't add
-        * new entries into /proc/<tgid>/ without very good reasons.
-        */
-       switch(p->type) {
-               case PROC_TGID_TASK:
-                       inode->i_nlink = 2;
-                       inode->i_op = &proc_task_inode_operations;
-                       inode->i_fop = &proc_task_operations;
-                       break;
-               case PROC_TID_FD:
-               case PROC_TGID_FD:
-                       inode->i_nlink = 2;
-                       inode->i_op = &proc_fd_inode_operations;
-                       inode->i_fop = &proc_fd_operations;
-                       break;
-               case PROC_TID_EXE:
-               case PROC_TGID_EXE:
-                       inode->i_op = &proc_pid_link_inode_operations;
-                       ei->op.proc_get_link = proc_exe_link;
-                       break;
-               case PROC_TID_CWD:
-               case PROC_TGID_CWD:
-                       inode->i_op = &proc_pid_link_inode_operations;
-                       ei->op.proc_get_link = proc_cwd_link;
-                       break;
-               case PROC_TID_ROOT:
-               case PROC_TGID_ROOT:
-                       inode->i_op = &proc_pid_link_inode_operations;
-                       ei->op.proc_get_link = proc_root_link;
-                       break;
-               case PROC_TID_ENVIRON:
-               case PROC_TGID_ENVIRON:
-                       inode->i_fop = &proc_info_file_operations;
-                       ei->op.proc_read = proc_pid_environ;
-                       break;
-               case PROC_TID_AUXV:
-               case PROC_TGID_AUXV:
-                       inode->i_fop = &proc_info_file_operations;
-                       ei->op.proc_read = proc_pid_auxv;
-                       break;
-               case PROC_TID_STATUS:
-               case PROC_TGID_STATUS:
-                       inode->i_fop = &proc_info_file_operations;
-                       ei->op.proc_read = proc_pid_status;
-                       break;
-               case PROC_TID_STAT:
-                       inode->i_fop = &proc_info_file_operations;
-                       ei->op.proc_read = proc_tid_stat;
-                       break;
-               case PROC_TGID_STAT:
-                       inode->i_fop = &proc_info_file_operations;
-                       ei->op.proc_read = proc_tgid_stat;
-                       break;
-               case PROC_TID_CMDLINE:
-               case PROC_TGID_CMDLINE:
-                       inode->i_fop = &proc_info_file_operations;
-                       ei->op.proc_read = proc_pid_cmdline;
-                       break;
-               case PROC_TID_STATM:
-               case PROC_TGID_STATM:
-                       inode->i_fop = &proc_info_file_operations;
-                       ei->op.proc_read = proc_pid_statm;
-                       break;
-               case PROC_TID_MAPS:
-               case PROC_TGID_MAPS:
-                       inode->i_fop = &proc_maps_operations;
-                       break;
+out:
+       put_task_struct(task);
+out_no_task:
+       return error;
+}
+
+static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+       struct task_struct *task, struct pid_entry *p)
+{
+       return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
+                               proc_base_instantiate, task, p);
+}
+
+/*
+ * Thread groups
+ */
+static struct file_operations proc_task_operations;
+static struct inode_operations proc_task_inode_operations;
+
+static struct pid_entry tgid_base_stuff[] = {
+       DIR("task",       S_IRUGO|S_IXUGO, task),
+       DIR("fd",         S_IRUSR|S_IXUSR, fd),
+       INF("environ",    S_IRUSR, pid_environ),
+       INF("auxv",       S_IRUSR, pid_auxv),
+       INF("status",     S_IRUGO, pid_status),
+       INF("cmdline",    S_IRUGO, pid_cmdline),
+       INF("stat",       S_IRUGO, tgid_stat),
+       INF("statm",      S_IRUGO, pid_statm),
+       REG("maps",       S_IRUGO, maps),
 #ifdef CONFIG_NUMA
-               case PROC_TID_NUMA_MAPS:
-               case PROC_TGID_NUMA_MAPS:
-                       inode->i_fop = &proc_numa_maps_operations;
-                       break;
+       REG("numa_maps",  S_IRUGO, numa_maps),
 #endif
-               case PROC_TID_MEM:
-               case PROC_TGID_MEM:
-                       inode->i_fop = &proc_mem_operations;
-                       break;
+       REG("mem",        S_IRUSR|S_IWUSR, mem),
 #ifdef CONFIG_SECCOMP
-               case PROC_TID_SECCOMP:
-               case PROC_TGID_SECCOMP:
-                       inode->i_fop = &proc_seccomp_operations;
-                       break;
-#endif /* CONFIG_SECCOMP */
-               case PROC_TID_MOUNTS:
-               case PROC_TGID_MOUNTS:
-                       inode->i_fop = &proc_mounts_operations;
-                       break;
+       REG("seccomp",    S_IRUSR|S_IWUSR, seccomp),
+#endif
+       LNK("cwd",        cwd),
+       LNK("root",       root),
+       LNK("exe",        exe),
+       REG("mounts",     S_IRUGO, mounts),
+       REG("mountstats", S_IRUSR, mountstats),
 #ifdef CONFIG_MMU
-               case PROC_TID_SMAPS:
-               case PROC_TGID_SMAPS:
-                       inode->i_fop = &proc_smaps_operations;
-                       break;
+       REG("smaps",      S_IRUGO, smaps),
 #endif
-               case PROC_TID_MOUNTSTATS:
-               case PROC_TGID_MOUNTSTATS:
-                       inode->i_fop = &proc_mountstats_operations;
-                       break;
 #ifdef CONFIG_SECURITY
-               case PROC_TID_ATTR:
-                       inode->i_nlink = 2;
-                       inode->i_op = &proc_tid_attr_inode_operations;
-                       inode->i_fop = &proc_tid_attr_operations;
-                       break;
-               case PROC_TGID_ATTR:
-                       inode->i_nlink = 2;
-                       inode->i_op = &proc_tgid_attr_inode_operations;
-                       inode->i_fop = &proc_tgid_attr_operations;
-                       break;
-               case PROC_TID_ATTR_CURRENT:
-               case PROC_TGID_ATTR_CURRENT:
-               case PROC_TID_ATTR_PREV:
-               case PROC_TGID_ATTR_PREV:
-               case PROC_TID_ATTR_EXEC:
-               case PROC_TGID_ATTR_EXEC:
-               case PROC_TID_ATTR_FSCREATE:
-               case PROC_TGID_ATTR_FSCREATE:
-               case PROC_TID_ATTR_KEYCREATE:
-               case PROC_TGID_ATTR_KEYCREATE:
-               case PROC_TID_ATTR_SOCKCREATE:
-               case PROC_TGID_ATTR_SOCKCREATE:
-                       inode->i_fop = &proc_pid_attr_operations;
-                       break;
+       DIR("attr",       S_IRUGO|S_IXUGO, attr_dir),
 #endif
 #ifdef CONFIG_KALLSYMS
-               case PROC_TID_WCHAN:
-               case PROC_TGID_WCHAN:
-                       inode->i_fop = &proc_info_file_operations;
-                       ei->op.proc_read = proc_pid_wchan;
-                       break;
+       INF("wchan",      S_IRUGO, pid_wchan),
 #endif
 #ifdef CONFIG_SCHEDSTATS
-               case PROC_TID_SCHEDSTAT:
-               case PROC_TGID_SCHEDSTAT:
-                       inode->i_fop = &proc_info_file_operations;
-                       ei->op.proc_read = proc_pid_schedstat;
-                       break;
+       INF("schedstat",  S_IRUGO, pid_schedstat),
 #endif
 #ifdef CONFIG_CPUSETS
-               case PROC_TID_CPUSET:
-               case PROC_TGID_CPUSET:
-                       inode->i_fop = &proc_cpuset_operations;
-                       break;
+       REG("cpuset",     S_IRUGO, cpuset),
 #endif
-               case PROC_TID_OOM_SCORE:
-               case PROC_TGID_OOM_SCORE:
-                       inode->i_fop = &proc_info_file_operations;
-                       ei->op.proc_read = proc_oom_score;
-                       break;
-               case PROC_TID_OOM_ADJUST:
-               case PROC_TGID_OOM_ADJUST:
-                       inode->i_fop = &proc_oom_adjust_operations;
-                       break;
+       INF("oom_score",  S_IRUGO, oom_score),
+       REG("oom_adj",    S_IRUGO|S_IWUSR, oom_adjust),
 #ifdef CONFIG_AUDITSYSCALL
-               case PROC_TID_LOGINUID:
-               case PROC_TGID_LOGINUID:
-                       inode->i_fop = &proc_loginuid_operations;
-                       break;
+       REG("loginuid",   S_IWUSR|S_IRUGO, loginuid),
 #endif
-               default:
-                       printk("procfs: impossible type (%d)",p->type);
-                       iput(inode);
-                       error = ERR_PTR(-EINVAL);
-                       goto out;
-       }
-       dentry->d_op = &pid_dentry_operations;
-       d_add(dentry, inode);
-       /* Close the race of the process dying before we return the dentry */
-       if (pid_revalidate(dentry, NULL))
-               error = NULL;
-out:
-       put_task_struct(task);
-out_no_task:
-       return error;
-}
-
-static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
-       return proc_pident_lookup(dir, dentry, tgid_base_stuff);
-}
-
-static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
-       return proc_pident_lookup(dir, dentry, tid_base_stuff);
-}
-
-static struct file_operations proc_tgid_base_operations = {
-       .read           = generic_read_dir,
-       .readdir        = proc_tgid_base_readdir,
 };
 
-static struct file_operations proc_tid_base_operations = {
-       .read           = generic_read_dir,
-       .readdir        = proc_tid_base_readdir,
-};
-
-static struct inode_operations proc_tgid_base_inode_operations = {
-       .lookup         = proc_tgid_base_lookup,
-       .getattr        = pid_getattr,
-       .setattr        = proc_setattr,
-};
-
-static struct inode_operations proc_tid_base_inode_operations = {
-       .lookup         = proc_tid_base_lookup,
-       .getattr        = pid_getattr,
-       .setattr        = proc_setattr,
-};
-
-#ifdef CONFIG_SECURITY
-static int proc_tgid_attr_readdir(struct file * filp,
-                            void * dirent, filldir_t filldir)
-{
-       return proc_pident_readdir(filp,dirent,filldir,
-                                  tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff));
-}
-
-static int proc_tid_attr_readdir(struct file * filp,
-                            void * dirent, filldir_t filldir)
-{
-       return proc_pident_readdir(filp,dirent,filldir,
-                                  tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff));
-}
-
-static struct file_operations proc_tgid_attr_operations = {
-       .read           = generic_read_dir,
-       .readdir        = proc_tgid_attr_readdir,
-};
-
-static struct file_operations proc_tid_attr_operations = {
-       .read           = generic_read_dir,
-       .readdir        = proc_tid_attr_readdir,
-};
-
-static struct dentry *proc_tgid_attr_lookup(struct inode *dir,
-                               struct dentry *dentry, struct nameidata *nd)
-{
-       return proc_pident_lookup(dir, dentry, tgid_attr_stuff);
-}
-
-static struct dentry *proc_tid_attr_lookup(struct inode *dir,
-                               struct dentry *dentry, struct nameidata *nd)
-{
-       return proc_pident_lookup(dir, dentry, tid_attr_stuff);
-}
-
-static struct inode_operations proc_tgid_attr_inode_operations = {
-       .lookup         = proc_tgid_attr_lookup,
-       .getattr        = pid_getattr,
-       .setattr        = proc_setattr,
-};
-
-static struct inode_operations proc_tid_attr_inode_operations = {
-       .lookup         = proc_tid_attr_lookup,
-       .getattr        = pid_getattr,
-       .setattr        = proc_setattr,
-};
-#endif
-
-/*
- * /proc/self:
- */
-static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
-                             int buflen)
+static int proc_tgid_base_readdir(struct file * filp,
+                            void * dirent, filldir_t filldir)
 {
-       char tmp[PROC_NUMBUF];
-       sprintf(tmp, "%d", current->tgid);
-       return vfs_readlink(dentry,buffer,buflen,tmp);
+       return proc_pident_readdir(filp,dirent,filldir,
+                                  tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
 }
 
-static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-       char tmp[PROC_NUMBUF];
-       sprintf(tmp, "%d", current->tgid);
-       return ERR_PTR(vfs_follow_link(nd,tmp));
-}      
+static struct file_operations proc_tgid_base_operations = {
+       .read           = generic_read_dir,
+       .readdir        = proc_tgid_base_readdir,
+};
 
-static struct inode_operations proc_self_inode_operations = {
-       .readlink       = proc_self_readlink,
-       .follow_link    = proc_self_follow_link,
+static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
+       return proc_pident_lookup(dir, dentry,
+                                 tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
+}
+
+static struct inode_operations proc_tgid_base_inode_operations = {
+       .lookup         = proc_tgid_base_lookup,
+       .getattr        = pid_getattr,
+       .setattr        = proc_setattr,
 };
 
 /**
@@ -2022,54 +1880,23 @@ out:
        return;
 }
 
-/* SMP-safe */
-struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+struct dentry *proc_pid_instantiate(struct inode *dir,
+       struct dentry * dentry, struct task_struct *task, void *ptr)
 {
-       struct dentry *result = ERR_PTR(-ENOENT);
-       struct task_struct *task;
+       struct dentry *error = ERR_PTR(-ENOENT);
        struct inode *inode;
-       struct proc_inode *ei;
-       unsigned tgid;
-
-       if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
-               inode = new_inode(dir->i_sb);
-               if (!inode)
-                       return ERR_PTR(-ENOMEM);
-               ei = PROC_I(inode);
-               inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-               inode->i_ino = fake_ino(0, PROC_TGID_INO);
-               ei->pde = NULL;
-               inode->i_mode = S_IFLNK|S_IRWXUGO;
-               inode->i_uid = inode->i_gid = 0;
-               inode->i_size = 64;
-               inode->i_op = &proc_self_inode_operations;
-               d_add(dentry, inode);
-               return NULL;
-       }
-       tgid = name_to_int(dentry);
-       if (tgid == ~0U)
-               goto out;
-
-       rcu_read_lock();
-       task = find_task_by_pid(tgid);
-       if (task)
-               get_task_struct(task);
-       rcu_read_unlock();
-       if (!task)
-               goto out;
 
-       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
+       inode = proc_pid_make_inode(dir->i_sb, task);
        if (!inode)
-               goto out_put_task;
+               goto out;
 
        inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
        inode->i_op = &proc_tgid_base_inode_operations;
        inode->i_fop = &proc_tgid_base_operations;
        inode->i_flags|=S_IMMUTABLE;
-#ifdef CONFIG_SECURITY
-       inode->i_nlink = 5;
-#else
        inode->i_nlink = 4;
+#ifdef CONFIG_SECURITY
+       inode->i_nlink += 1;
 #endif
 
        dentry->d_op = &pid_dentry_operations;
@@ -2077,178 +1904,250 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
        d_add(dentry, inode);
        /* Close the race of the process dying before we return the dentry */
        if (pid_revalidate(dentry, NULL))
-               result = NULL;
-
-out_put_task:
-       put_task_struct(task);
+               error = NULL;
 out:
-       return result;
+       return error;
 }
 
-/* SMP-safe */
-static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
 {
        struct dentry *result = ERR_PTR(-ENOENT);
        struct task_struct *task;
-       struct task_struct *leader = get_proc_task(dir);
-       struct inode *inode;
-       unsigned tid;
+       unsigned tgid;
 
-       if (!leader)
-               goto out_no_task;
+       result = proc_base_lookup(dir, dentry);
+       if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT)
+               goto out;
 
-       tid = name_to_int(dentry);
-       if (tid == ~0U)
+       tgid = name_to_int(dentry);
+       if (tgid == ~0U)
                goto out;
 
        rcu_read_lock();
-       task = find_task_by_pid(tid);
+       task = find_task_by_pid(tgid);
        if (task)
                get_task_struct(task);
        rcu_read_unlock();
        if (!task)
                goto out;
-       if (leader->tgid != task->tgid)
-               goto out_drop_task;
-
-       inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
-
-
-       if (!inode)
-               goto out_drop_task;
-       inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
-       inode->i_op = &proc_tid_base_inode_operations;
-       inode->i_fop = &proc_tid_base_operations;
-       inode->i_flags|=S_IMMUTABLE;
-#ifdef CONFIG_SECURITY
-       inode->i_nlink = 4;
-#else
-       inode->i_nlink = 3;
-#endif
 
-       dentry->d_op = &pid_dentry_operations;
-
-       d_add(dentry, inode);
-       /* Close the race of the process dying before we return the dentry */
-       if (pid_revalidate(dentry, NULL))
-               result = NULL;
-
-out_drop_task:
+       result = proc_pid_instantiate(dir, dentry, task, NULL);
        put_task_struct(task);
 out:
-       put_task_struct(leader);
-out_no_task:
        return result;
 }
 
 /*
- * Find the first tgid to return to user space.
- *
- * Usually this is just whatever follows &init_task, but if the users
- * buffer was too small to hold the full list or there was a seek into
- * the middle of the directory we have more work to do.
- *
- * In the case of a short read we start with find_task_by_pid.
+ * Find the first task with tgid >= tgid
  *
- * In the case of a seek we start with &init_task and walk nr
- * threads past it.
  */
-static struct task_struct *first_tgid(int tgid, unsigned int nr)
+static struct task_struct *next_tgid(unsigned int tgid)
 {
-       struct task_struct *pos;
-       rcu_read_lock();
-       if (tgid && nr) {
-               pos = find_task_by_pid(tgid);
-               if (pos && thread_group_leader(pos))
-                       goto found;
-       }
-       /* If nr exceeds the number of processes get out quickly */
-       pos = NULL;
-       if (nr && nr >= nr_processes())
-               goto done;
+       struct task_struct *task;
+       struct pid *pid;
 
-       /* If we haven't found our starting place yet start with
-        * the init_task and walk nr tasks forward.
-        */
-       for (pos = next_task(&init_task); nr > 0; --nr) {
-               pos = next_task(pos);
-               if (pos == &init_task) {
-                       pos = NULL;
-                       goto done;
-               }
+       rcu_read_lock();
+retry:
+       task = NULL;
+       pid = find_ge_pid(tgid);
+       if (pid) {
+               tgid = pid->nr + 1;
+               task = pid_task(pid, PIDTYPE_PID);
+               /* What we to know is if the pid we have find is the
+                * pid of a thread_group_leader.  Testing for task
+                * being a thread_group_leader is the obvious thing
+                * todo but there is a window when it fails, due to
+                * the pid transfer logic in de_thread.
+                *
+                * So we perform the straight forward test of seeing
+                * if the pid we have found is the pid of a thread
+                * group leader, and don't worry if the task we have
+                * found doesn't happen to be a thread group leader.
+                * As we don't care in the case of readdir.
+                */
+               if (!task || !has_group_leader_pid(task))
+                       goto retry;
+               get_task_struct(task);
        }
-found:
-       get_task_struct(pos);
-done:
        rcu_read_unlock();
-       return pos;
+       return task;
 }
 
-/*
- * Find the next task in the task list.
- * Return NULL if we loop or there is any error.
- *
- * The reference to the input task_struct is released.
- */
-static struct task_struct *next_tgid(struct task_struct *start)
+#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff))
+
+static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+       struct task_struct *task, int tgid)
 {
-       struct task_struct *pos;
-       rcu_read_lock();
-       pos = start;
-       if (pid_alive(start))
-               pos = next_task(start);
-       if (pid_alive(pos) && (pos != &init_task)) {
-               get_task_struct(pos);
-               goto done;
-       }
-       pos = NULL;
-done:
-       rcu_read_unlock();
-       put_task_struct(start);
-       return pos;
+       char name[PROC_NUMBUF];
+       int len = snprintf(name, sizeof(name), "%d", tgid);
+       return proc_fill_cache(filp, dirent, filldir, name, len,
+                               proc_pid_instantiate, task, NULL);
 }
 
 /* for the /proc/ directory itself, after non-process stuff has been done */
 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-       char buf[PROC_NUMBUF];
        unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
+       struct task_struct *reaper = get_proc_task(filp->f_dentry->d_inode);
        struct task_struct *task;
        int tgid;
 
-       if (!nr) {
-               ino_t ino = fake_ino(0,PROC_TGID_INO);
-               if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
-                       return 0;
-               filp->f_pos++;
-               nr++;
+       if (!reaper)
+               goto out_no_task;
+
+       for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) {
+               struct pid_entry *p = &proc_base_stuff[nr];
+               if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0)
+                       goto out;
        }
-       nr -= 1;
 
-       /* f_version caches the tgid value that the last readdir call couldn't
-        * return. lseek aka telldir automagically resets f_version to 0.
-        */
-       tgid = filp->f_version;
-       filp->f_version = 0;
-       for (task = first_tgid(tgid, nr);
+       tgid = filp->f_pos - TGID_OFFSET;
+       for (task = next_tgid(tgid);
             task;
-            task = next_tgid(task), filp->f_pos++) {
-               int len;
-               ino_t ino;
+            put_task_struct(task), task = next_tgid(tgid + 1)) {
                tgid = task->pid;
-               len = snprintf(buf, sizeof(buf), "%d", tgid);
-               ino = fake_ino(tgid, PROC_TGID_INO);
-               if (filldir(dirent, buf, len, filp->f_pos, ino, DT_DIR) < 0) {
-                       /* returning this tgid failed, save it as the first
-                        * pid for the next readir call */
-                       filp->f_version = tgid;
+               filp->f_pos = tgid + TGID_OFFSET;
+               if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) {
                        put_task_struct(task);
-                       break;
+                       goto out;
                }
        }
+       filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
+out:
+       put_task_struct(reaper);
+out_no_task:
        return 0;
 }
 
+/*
+ * Tasks
+ */
+static struct pid_entry tid_base_stuff[] = {
+       DIR("fd",        S_IRUSR|S_IXUSR, fd),
+       INF("environ",   S_IRUSR, pid_environ),
+       INF("auxv",      S_IRUSR, pid_auxv),
+       INF("status",    S_IRUGO, pid_status),
+       INF("cmdline",   S_IRUGO, pid_cmdline),
+       INF("stat",      S_IRUGO, tid_stat),
+       INF("statm",     S_IRUGO, pid_statm),
+       REG("maps",      S_IRUGO, maps),
+#ifdef CONFIG_NUMA
+       REG("numa_maps", S_IRUGO, numa_maps),
+#endif
+       REG("mem",       S_IRUSR|S_IWUSR, mem),
+#ifdef CONFIG_SECCOMP
+       REG("seccomp",   S_IRUSR|S_IWUSR, seccomp),
+#endif
+       LNK("cwd",       cwd),
+       LNK("root",      root),
+       LNK("exe",       exe),
+       REG("mounts",    S_IRUGO, mounts),
+#ifdef CONFIG_MMU
+       REG("smaps",     S_IRUGO, smaps),
+#endif
+#ifdef CONFIG_SECURITY
+       DIR("attr",      S_IRUGO|S_IXUGO, attr_dir),
+#endif
+#ifdef CONFIG_KALLSYMS
+       INF("wchan",     S_IRUGO, pid_wchan),
+#endif
+#ifdef CONFIG_SCHEDSTATS
+       INF("schedstat", S_IRUGO, pid_schedstat),
+#endif
+#ifdef CONFIG_CPUSETS
+       REG("cpuset",    S_IRUGO, cpuset),
+#endif
+       INF("oom_score", S_IRUGO, oom_score),
+       REG("oom_adj",   S_IRUGO|S_IWUSR, oom_adjust),
+#ifdef CONFIG_AUDITSYSCALL
+       REG("loginuid",  S_IWUSR|S_IRUGO, loginuid),
+#endif
+};
+
+static int proc_tid_base_readdir(struct file * filp,
+                            void * dirent, filldir_t filldir)
+{
+       return proc_pident_readdir(filp,dirent,filldir,
+                                  tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
+}
+
+static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
+       return proc_pident_lookup(dir, dentry,
+                                 tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
+}
+
+static struct file_operations proc_tid_base_operations = {
+       .read           = generic_read_dir,
+       .readdir        = proc_tid_base_readdir,
+};
+
+static struct inode_operations proc_tid_base_inode_operations = {
+       .lookup         = proc_tid_base_lookup,
+       .getattr        = pid_getattr,
+       .setattr        = proc_setattr,
+};
+
+static struct dentry *proc_task_instantiate(struct inode *dir,
+       struct dentry *dentry, struct task_struct *task, void *ptr)
+{
+       struct dentry *error = ERR_PTR(-ENOENT);
+       struct inode *inode;
+       inode = proc_pid_make_inode(dir->i_sb, task);
+
+       if (!inode)
+               goto out;
+       inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
+       inode->i_op = &proc_tid_base_inode_operations;
+       inode->i_fop = &proc_tid_base_operations;
+       inode->i_flags|=S_IMMUTABLE;
+       inode->i_nlink = 3;
+#ifdef CONFIG_SECURITY
+       inode->i_nlink += 1;
+#endif
+
+       dentry->d_op = &pid_dentry_operations;
+
+       d_add(dentry, inode);
+       /* Close the race of the process dying before we return the dentry */
+       if (pid_revalidate(dentry, NULL))
+               error = NULL;
+out:
+       return error;
+}
+
+static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+{
+       struct dentry *result = ERR_PTR(-ENOENT);
+       struct task_struct *task;
+       struct task_struct *leader = get_proc_task(dir);
+       unsigned tid;
+
+       if (!leader)
+               goto out_no_task;
+
+       tid = name_to_int(dentry);
+       if (tid == ~0U)
+               goto out;
+
+       rcu_read_lock();
+       task = find_task_by_pid(tid);
+       if (task)
+               get_task_struct(task);
+       rcu_read_unlock();
+       if (!task)
+               goto out;
+       if (leader->tgid != task->tgid)
+               goto out_drop_task;
+
+       result = proc_task_instantiate(dir, dentry, task, NULL);
+out_drop_task:
+       put_task_struct(task);
+out:
+       put_task_struct(leader);
+out_no_task:
+       return result;
+}
+
 /*
  * Find the first tid of a thread group to return to user space.
  *
@@ -2318,10 +2217,18 @@ static struct task_struct *next_tid(struct task_struct *start)
        return pos;
 }
 
+static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+       struct task_struct *task, int tid)
+{
+       char name[PROC_NUMBUF];
+       int len = snprintf(name, sizeof(name), "%d", tid);
+       return proc_fill_cache(filp, dirent, filldir, name, len,
+                               proc_task_instantiate, task, NULL);
+}
+
 /* for the /proc/TGID/task/ directories */
 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
 {
-       char buf[PROC_NUMBUF];
        struct dentry *dentry = filp->f_dentry;
        struct inode *inode = dentry->d_inode;
        struct task_struct *leader = get_proc_task(inode);
@@ -2358,11 +2265,8 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
        for (task = first_tid(leader, tid, pos - 2);
             task;
             task = next_tid(task), pos++) {
-               int len;
                tid = task->pid;
-               len = snprintf(buf, sizeof(buf), "%d", tid);
-               ino = fake_ino(tid, PROC_TID_INO);
-               if (filldir(dirent, buf, len, pos, ino, DT_DIR < 0)) {
+               if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
                        /* returning this tgid failed, save it as the first
                         * pid for the next readir call */
                        filp->f_version = tid;
@@ -2392,3 +2296,14 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
 
        return 0;
 }
+
+static struct inode_operations proc_task_inode_operations = {
+       .lookup         = proc_task_lookup,
+       .getattr        = proc_task_getattr,
+       .setattr        = proc_setattr,
+};
+
+static struct file_operations proc_task_operations = {
+       .read           = generic_read_dir,
+       .readdir        = proc_task_readdir,
+};
index 66bc425..8d88e58 100644 (file)
@@ -45,6 +45,7 @@
 #include <linux/sysrq.h>
 #include <linux/vmalloc.h>
 #include <linux/crash_dump.h>
+#include <linux/pspace.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/io.h>
@@ -91,7 +92,7 @@ static int loadavg_read_proc(char *page, char **start, off_t off,
                LOAD_INT(a), LOAD_FRAC(a),
                LOAD_INT(b), LOAD_FRAC(b),
                LOAD_INT(c), LOAD_FRAC(c),
-               nr_running(), nr_threads, last_pid);
+               nr_running(), nr_threads, init_pspace.last_pid);
        return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
index 8901c65..ffe66c3 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/smp_lock.h>
+#include <linux/mount.h>
 
 #include "internal.h"
 
@@ -28,6 +29,17 @@ struct proc_dir_entry *proc_sys_root;
 static int proc_get_sb(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
+       if (proc_mnt) {
+               /* Seed the root directory with a pid so it doesn't need
+                * to be special in base.c.  I would do this earlier but
+                * the only task alive when /proc is mounted the first time
+                * is the init_task and it doesn't have any pids.
+                */
+               struct proc_inode *ei;
+               ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode);
+               if (!ei->pid)
+                       ei->pid = find_get_pid(1);
+       }
        return get_sb_single(fs_type, flags, data, proc_fill_super, mnt);
 }
 
index bc6e6a9..2cabbd4 100644 (file)
@@ -580,75 +580,6 @@ type name (type1 arg1,type2 arg2,type3 arg3,type4 arg4,type5 arg5, type6 arg6)\
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/signal.h>
-#include <linux/syscalls.h>
-#include <asm/ptrace.h>
-
-static inline long open(const char * name, int mode, int flags)
-{
-       return sys_open(name, mode, flags);
-}
-
-static inline long dup(int fd)
-{
-       return sys_dup(fd);
-}
-
-static inline long close(int fd)
-{
-       return sys_close(fd);
-}
-
-static inline off_t lseek(int fd, off_t off, int whence)
-{
-       return sys_lseek(fd, off, whence);
-}
-
-static inline void _exit(int value)
-{
-       sys_exit(value);
-}
-
-#define exit(x) _exit(x)
-
-static inline long write(int fd, const char * buf, size_t nr)
-{
-       return sys_write(fd, buf, nr);
-}
-
-static inline long read(int fd, char * buf, size_t nr)
-{
-       return sys_read(fd, buf, nr);
-}
-
-extern int execve(char *, char **, char **);
-
-static inline long setsid(void)
-{
-       return sys_setsid();
-}
-
-static inline pid_t waitpid(int pid, int * wait_stat, int flags)
-{
-       return sys_wait4(pid, wait_stat, flags, NULL);
-}
-
-asmlinkage int sys_execve(char *ufilename, char **argv, char **envp,
-                       unsigned long a3, unsigned long a4, unsigned long a5,
-                       struct pt_regs regs);
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               size_t sigsetsize,
-                               void *restorer);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 /* "Conditional" syscalls.  What we want is
 
        __attribute__((weak,alias("sys_ni_syscall")))
index 2ab4078..14a87ee 100644 (file)
@@ -549,30 +549,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6
 #define __ARCH_WANT_SYS_SOCKETCALL
 #endif
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/syscalls.h>
-
-extern long execve(const char *file, char **argv, char **envp);
-
-struct pt_regs;
-asmlinkage int sys_execve(char *filenamei, char **argv, char **envp,
-                       struct pt_regs *regs);
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-                       struct pt_regs *regs);
-asmlinkage int sys_fork(struct pt_regs *regs);
-asmlinkage int sys_vfork(struct pt_regs *regs);
-asmlinkage int sys_pipe(unsigned long *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  *
index c6d2436..25a5eea 100644 (file)
@@ -464,30 +464,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/syscalls.h>
-
-extern long execve(const char *file, char **argv, char **envp);
-
-struct pt_regs;
-asmlinkage int sys_execve(char *filenamei, char **argv, char **envp,
-                       struct pt_regs *regs);
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-                       struct pt_regs *regs);
-asmlinkage int sys_fork(struct pt_regs *regs);
-asmlinkage int sys_vfork(struct pt_regs *regs);
-asmlinkage int sys_pipe(unsigned long *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  *
index 1f528f9..a50e500 100644 (file)
 #define __NR_tee               263
 #define __NR_vmsplice          264
 
+#ifdef __KERNEL__
 #define NR_syscalls            265
 
 
-/*
- * AVR32 calling convention for system calls:
- *   - System call number in r8
- *   - Parameters in r12 and downwards to r9 as well as r6 and r5.
- *   - Return value in r12
- */
-
-/*
- * user-visible error numbers are in the range -1 - -124: see
- * <asm-generic/errno.h>
- */
-
-#define __syscall_return(type, res) do {                               \
-               if ((unsigned long)(res) >= (unsigned long)(-125)) {    \
-                       errno = -(res);                                 \
-                       res = -1;                                       \
-               }                                                       \
-               return (type) (res);                                    \
-       } while (0)
-
-#ifdef __KERNEL__
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_SYS_ALARM
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
-#endif
-
-#if defined(__KERNEL_SYSCALLS__) || defined(__CHECKER__)
-
-#include <linux/types.h>
-#include <linux/linkage.h>
-#include <asm/signal.h>
-
-struct pt_regs;
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-static inline int execve(const char *file, char **argv, char **envp)
-{
-       register long scno asm("r8") = __NR_execve;
-       register long sc1 asm("r12") = (long)file;
-       register long sc2 asm("r11") = (long)argv;
-       register long sc3 asm("r10") = (long)envp;
-       int res;
-
-       asm volatile("scall"
-                    : "=r"(sc1)
-                    : "r"(scno), "0"(sc1), "r"(sc2), "r"(sc3)
-                    : "lr", "memory");
-       res = sc1;
-       __syscall_return(int, res);
-}
-
-asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize);
-asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
-                              struct pt_regs *regs);
-asmlinkage int sys_rt_sigreturn(struct pt_regs *regs);
-asmlinkage int sys_pipe(unsigned long __user *filedes);
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-                         unsigned long prot, unsigned long flags,
-                         unsigned long fd, off_t offset);
-asmlinkage int sys_cacheflush(int operation, void __user *addr, size_t len);
-asmlinkage int sys_fork(struct pt_regs *regs);
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-                        unsigned long parent_tidptr,
-                        unsigned long child_tidptr, struct pt_regs *regs);
-asmlinkage int sys_vfork(struct pt_regs *regs);
-asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv,
-                         char __user *__user *uenvp, struct pt_regs *regs);
-
-#endif
 
 /*
  * "Conditional" syscalls
@@ -384,4 +308,6 @@ asmlinkage int sys_execve(char __user *ufilename, char __user *__user *uargv,
  */
 #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall");
 
+#endif /* __KERNEL__ */
+
 #endif /* __ASM_AVR32_UNISTD_H */
index 7372efa..7c90fa9 100644 (file)
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/linkage.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static inline _syscall0(pid_t,setsid)
-static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-static inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static inline _syscall1(int,dup,int,fd)
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-static inline _syscall3(int,open,const char *,file,int,flag,int,mode)
-static inline _syscall1(int,close,int,fd)
-
-struct pt_regs;
-asmlinkage long sys_mmap2(
-                       unsigned long addr, unsigned long len,
-                       unsigned long prot, unsigned long flags,
-                       unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(const char *fname, char **argv, char **envp,
-                       long r13, long mof, long srp, struct pt_regs *regs);
-asmlinkage int sys_clone(unsigned long newusp, unsigned long flags,
-                       int* parent_tid, int* child_tid, long mof, long srp,
-                       struct pt_regs *regs);
-asmlinkage int sys_fork(long r10, long r11, long r12, long r13,
-                       long mof, long srp, struct pt_regs *regs);
-asmlinkage int sys_vfork(long r10, long r11, long r12, long r13,
-                       long mof, long srp, struct pt_regs *regs);
-asmlinkage int sys_pipe(unsigned long __user *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               size_t sigsetsize);
-
-/*
- * Since we define it "external", it collides with the built-in
- * definition, which has the "noreturn" attribute and will cause
- * complaints.  We don't want to use -fno-builtin, so just use a
- * different name when in the kernel.
- */
-#define _exit kernel_syscall_exit
-static inline _syscall1(int,_exit,int,exitcode)
-static inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-#endif /* __KERNEL_SYSCALLS__ */
-
-
 /*
  * "Conditional" syscalls
  *
index d104d1b..725e854 100644 (file)
@@ -440,31 +440,6 @@ type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg
        __syscall_return(type, __sc0);                                                           \
 }
 
-
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 #define __ARCH_WANT_IPC_PARSE_VERSION
 /* #define __ARCH_WANT_OLD_READDIR */
 #define __ARCH_WANT_OLD_STAT
index a2dd904..747788d 100644 (file)
@@ -485,57 +485,6 @@ type name(atype a, btype b, ctype c, dtype d, etype e, ftype f)    \
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static inline _syscall0(int,pause)
-static inline _syscall0(int,sync)
-static inline _syscall0(pid_t,setsid)
-static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-static inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static inline _syscall1(int,dup,int,fd)
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-static inline _syscall3(int,open,const char *,file,int,flag,int,mode)
-static inline _syscall1(int,close,int,fd)
-static inline _syscall1(int,_exit,int,exitcode)
-static inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-static inline _syscall1(int,delete_module,const char *,name)
-
-static inline pid_t wait(int * wait_stat)
-{
-       return waitpid(-1,wait_stat,0);
-}
-
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-                       unsigned long prot, unsigned long flags,
-                       unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(char *name, char **argv, char **envp,
-                       int dummy, ...);
-asmlinkage int sys_pipe(unsigned long *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  */
index 2a9e4ee..592ffee 100644 (file)
@@ -189,6 +189,6 @@ static void __init check_bugs(void)
        check_fpu();
        check_hlt();
        check_popad();
-       system_utsname.machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
+       init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
        alternative_instructions(); 
 }
index db4344d..3a05436 100644 (file)
@@ -112,7 +112,7 @@ typedef struct user_fxsr_struct elf_fpxregset_t;
    For the moment, we have only optimizations for the Intel generations,
    but that could change... */
 
-#define ELF_PLATFORM  (system_utsname.machine)
+#define ELF_PLATFORM  (utsname()->machine)
 
 #define SET_PERSONALITY(ex, ibcs2) do { } while (0)
 
index a4a0e52..d505f50 100644 (file)
@@ -47,7 +47,10 @@ static inline int user_mode_vm(struct pt_regs *regs)
 {
        return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL;
 }
+
 #define instruction_pointer(regs) ((regs)->eip)
+#define regs_return_value(regs) ((regs)->eax)
+
 extern unsigned long profile_pc(struct pt_regs *regs);
 #endif /* __KERNEL__ */
 
index bd99870..3ca7ab9 100644 (file)
@@ -451,45 +451,6 @@ __syscall_return(type,__res); \
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-
-asmlinkage int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount);
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-                       unsigned long prot, unsigned long flags,
-                       unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(struct pt_regs regs);
-asmlinkage int sys_clone(struct pt_regs regs);
-asmlinkage int sys_fork(struct pt_regs regs);
-asmlinkage int sys_vfork(struct pt_regs regs);
-asmlinkage int sys_pipe(unsigned long __user *fildes);
-asmlinkage long sys_iopl(unsigned long unused);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  *
index 1414316..f4ef87a 100644 (file)
@@ -241,6 +241,9 @@ struct switch_stack {
  * the canonical representation by adding to instruction pointer.
  */
 # define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri)
+
+#define regs_return_value(regs) ((regs)->r8)
+
 /* Conserve space in histogram by encoding slot bits in address
  * bits 2 and 3 rather than bits 0 and 1.
  */
index bb0eb72..53c5c0e 100644 (file)
 
 extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr);
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/string.h>
-#include <linux/signal.h>
-#include <asm/ptrace.h>
-#include <linux/stringify.h>
-#include <linux/syscalls.h>
-
-static inline long
-open (const char * name, int mode, int flags)
-{
-       return sys_open(name, mode, flags);
-}
-
-static inline long
-dup (int fd)
-{
-       return sys_dup(fd);
-}
-
-static inline long
-close (int fd)
-{
-       return sys_close(fd);
-}
-
-static inline off_t
-lseek (int fd, off_t off, int whence)
-{
-       return sys_lseek(fd, off, whence);
-}
-
-static inline void
-_exit (int value)
-{
-       sys_exit(value);
-}
-
-#define exit(x) _exit(x)
-
-static inline long
-write (int fd, const char * buf, size_t nr)
-{
-       return sys_write(fd, buf, nr);
-}
-
-static inline long
-read (int fd, char * buf, size_t nr)
-{
-       return sys_read(fd, buf, nr);
-}
-
-
-static inline long
-setsid (void)
-{
-       return sys_setsid();
-}
-
-static inline pid_t
-waitpid (int pid, int * wait_stat, int flags)
-{
-       return sys_wait4(pid, wait_stat, flags, NULL);
-}
-
-
-extern int execve (const char *filename, char *const av[], char *const ep[]);
-extern pid_t clone (unsigned long flags, void *sp);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 asmlinkage unsigned long sys_mmap(
                                unsigned long addr, unsigned long len,
                                int prot, int flags,
index 5c6a9ac..95aa342 100644 (file)
@@ -424,43 +424,6 @@ __syscall_return(type,__res); \
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-static __inline__ _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-                         unsigned long prot, unsigned long flags,
-                         unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(struct pt_regs regs);
-asmlinkage int sys_clone(struct pt_regs regs);
-asmlinkage int sys_fork(struct pt_regs regs);
-asmlinkage int sys_vfork(struct pt_regs regs);
-asmlinkage int sys_pipe(unsigned long __user *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                                const struct sigaction __user *act,
-                                struct sigaction __user *oact,
-                                size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  *
index 751632b..3ab716f 100644 (file)
@@ -409,12 +409,6 @@ __syscall_return(type,__res); \
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
-#ifdef __KERNEL_SYSCALLS__
-
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  *
index 21fdc37..daafb5d 100644 (file)
@@ -463,61 +463,6 @@ type name(atype a, btype b, ctype c, dtype d, etype e)                             \
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/interrupt.h>
-#include <linux/types.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static inline _syscall0(int,pause)
-static inline _syscall0(int,sync)
-static inline _syscall0(pid_t,setsid)
-static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-static inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static inline _syscall1(int,dup,int,fd)
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-static inline _syscall3(int,open,const char *,file,int,flag,int,mode)
-static inline _syscall1(int,close,int,fd)
-static inline _syscall1(int,_exit,int,exitcode)
-static inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-static inline _syscall1(int,delete_module,const char *,name)
-
-static inline pid_t wait(int * wait_stat)
-{
-       return waitpid(-1,wait_stat,0);
-}
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
-                       unsigned long prot, unsigned long flags,
-                       unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(char *name, char **argv, char **envp);
-asmlinkage int sys_pipe(unsigned long *fildes);
-struct pt_regs;
-int sys_request_irq(unsigned int,
-                       irqreturn_t (*)(int, void *, struct pt_regs *),
-                       unsigned long, const char *, void *);
-void sys_free_irq(unsigned int, void *);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  *
index c391429..685c914 100644 (file)
@@ -1212,45 +1212,6 @@ type name (atype a,btype b,ctype c,dtype d,etype e,ftype f) \
 #  define __ARCH_WANT_COMPAT_SYS_TIME
 # endif
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-#include <asm/sim.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-
-asmlinkage unsigned long sys_mmap(
-                               unsigned long addr, size_t len,
-                               int prot, int flags,
-                               int fd, off_t offset);
-asmlinkage long sys_mmap2(
-                       unsigned long addr, unsigned long len,
-                       unsigned long prot, unsigned long flags,
-                       unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(nabi_no_regargs struct pt_regs regs);
-asmlinkage int sys_pipe(nabi_no_regargs struct pt_regs regs);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
 #endif /* !__ASSEMBLY__ */
 
 /*
index 27bcfad..53b0f5d 100644 (file)
@@ -952,92 +952,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5)      \
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
-/* mmap & mmap2 take 6 arguments */
-#define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5,type6,arg6) \
-type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) \
-{                                                                              \
-    return K_INLINE_SYSCALL(name, 6, arg1, arg2, arg3, arg4, arg5, arg6);      \
-}
-
-#ifdef __KERNEL_SYSCALLS__
-
-#include <asm/current.h>
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/syscalls.h>
-
-static inline pid_t setsid(void)
-{
-       return sys_setsid();
-}
-
-static inline int write(int fd, const char *buf, off_t count)
-{
-       return sys_write(fd, buf, count);
-}
-
-static inline int read(int fd, char *buf, off_t count)
-{
-       return sys_read(fd, buf, count);
-}
-
-static inline off_t lseek(int fd, off_t offset, int count)
-{
-       return sys_lseek(fd, offset, count);
-}
-
-static inline int dup(int fd)
-{
-       return sys_dup(fd);
-}
-
-static inline int execve(char *filename, char * argv [],
-       char * envp[])
-{
-       extern int __execve(char *, char **, char **, struct task_struct *);
-       return __execve(filename, argv, envp, current);
-}
-
-static inline int open(const char *file, int flag, int mode)
-{
-       return sys_open(file, flag, mode);
-}
-
-static inline int close(int fd)
-{
-       return sys_close(fd);
-}
-
-static inline void _exit(int exitcode)
-{
-       sys_exit(exitcode);
-}
-
-static inline pid_t waitpid(pid_t pid, int *wait_stat, int options)
-{
-       return sys_wait4(pid, wait_stat, options, NULL);
-}
-
-asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len,
-                               unsigned long prot, unsigned long flags,
-                               unsigned long fd, unsigned long offset);
-asmlinkage unsigned long sys_mmap2(unsigned long addr, unsigned long len,
-                               unsigned long prot, unsigned long flags,
-                               unsigned long fd, unsigned long pgoff);
-struct pt_regs;
-asmlinkage int sys_execve(struct pt_regs *regs);
-int sys_clone(unsigned long clone_flags, unsigned long usp,
-               struct pt_regs *regs);
-int sys_vfork(struct pt_regs *regs);
-int sys_pipe(int *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 #endif /* __ASSEMBLY__ */
 
 #undef STR
index 34e1f89..2dafa37 100644 (file)
@@ -44,6 +44,28 @@ typedef unsigned int kprobe_opcode_t;
 #define IS_TDI(instr)          (((instr) & 0xfc000000) == 0x08000000)
 #define IS_TWI(instr)          (((instr) & 0xfc000000) == 0x0c000000)
 
+/*
+ * 64bit powerpc uses function descriptors.
+ * Handle cases where:
+ *             - User passes a <.symbol> or <module:.symbol>
+ *             - User passes a <symbol> or <module:symbol>
+ *             - User passes a non-existant symbol, kallsyms_lookup_name
+ *               returns 0. Don't deref the NULL pointer in that case
+ */
+#define kprobe_lookup_name(name, addr)                                 \
+{                                                                      \
+       addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);           \
+       if (addr) {                                                     \
+               char *colon;                                            \
+               if ((colon = strchr(name, ':')) != NULL) {              \
+                       colon++;                                        \
+                       if (*colon != '\0' && *colon != '.')            \
+                               addr = *(kprobe_opcode_t **)addr;       \
+               } else if (name[0] != '.')                              \
+                       addr = *(kprobe_opcode_t **)addr;               \
+       }                                                               \
+}
+
 #define JPROBE_ENTRY(pentry)   (kprobe_opcode_t *)((func_descr_t *)pentry)
 
 #define is_trap(instr) (IS_TW(instr) || IS_TD(instr) || \
index 4435efe..4ad77a1 100644 (file)
@@ -73,6 +73,8 @@ struct pt_regs {
 #ifndef __ASSEMBLY__
 
 #define instruction_pointer(regs) ((regs)->nip)
+#define regs_return_value(regs) ((regs)->gpr[3])
+
 #ifdef CONFIG_SMP
 extern unsigned long profile_pc(struct pt_regs *regs);
 #else
index eb66eae..464a48c 100644 (file)
@@ -478,13 +478,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #endif
 
-/*
- * System call prototypes.
- */
-#ifdef __KERNEL_SYSCALLS__
-extern int execve(const char *file, char **argv, char **envp);
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  *
index 8d2bf65..7b768c5 100644 (file)
@@ -472,6 +472,7 @@ struct user_regs_struct
 
 #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
 #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+#define regs_return_value(regs)((regs)->gprs[2])
 #define profile_pc(regs) instruction_pointer(regs)
 extern void show_regs(struct pt_regs * regs);
 #endif
index 0361ac5..0cccfd8 100644 (file)
@@ -523,57 +523,6 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4,  \
 #   define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 # endif
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/ptrace.h>
-#include <asm/stat.h>
-#include <linux/syscalls.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static inline _syscall0(pid_t,setsid)
-static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-static inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static inline _syscall1(int,dup,int,fd)
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-static inline _syscall3(int,open,const char *,file,int,flag,int,mode)
-static inline _syscall1(int,close,int,fd)
-static inline _syscall2(long,stat,char *,filename,struct stat *,statbuf)
-
-static inline pid_t waitpid(int pid, int *wait_stat, int flags)
-{
-       return sys_wait4(pid, wait_stat, flags, NULL);
-}
-struct mmap_arg_struct;
-asmlinkage long sys_mmap2(struct mmap_arg_struct __user *arg);
-
-asmlinkage long sys_execve(struct pt_regs regs);
-asmlinkage long sys_clone(struct pt_regs regs);
-asmlinkage long sys_fork(struct pt_regs regs);
-asmlinkage long sys_vfork(struct pt_regs regs);
-asmlinkage long sys_pipe(unsigned long __user *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  *
index b4000c8..beeea40 100644 (file)
@@ -18,7 +18,7 @@ static void __init check_bugs(void)
 {
        extern char *get_cpu_subtype(void);
        extern unsigned long loops_per_jiffy;
-       char *p= &system_utsname.machine[2]; /* "sh" */
+       char *p= &init_utsname()->machine[2]; /* "sh" */
 
        cpu_data->loops_per_jiffy = loops_per_jiffy;
 
index 5d5e9f9..f1a0cbc 100644 (file)
@@ -472,76 +472,6 @@ __syscall_return(type,__sc0); \
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/linkage.h>
-#include <asm/ptrace.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static __inline__ _syscall0(int,pause)
-static __inline__ _syscall0(int,sync)
-static __inline__ _syscall0(pid_t,setsid)
-static __inline__ _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-static __inline__ _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static __inline__ _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static __inline__ _syscall1(int,dup,int,fd)
-static __inline__ _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-static __inline__ _syscall3(int,open,const char *,file,int,flag,int,mode)
-static __inline__ _syscall1(int,close,int,fd)
-static __inline__ _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-static __inline__ _syscall1(int,delete_module,const char *,name)
-
-static __inline__ pid_t wait(int * wait_stat)
-{
-       return waitpid(-1,wait_stat,0);
-}
-
-asmlinkage long sys_mmap2(
-                       unsigned long addr, unsigned long len,
-                       unsigned long prot, unsigned long flags,
-                       unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_execve(char *ufilename, char **uargv,
-                       char **uenvp, unsigned long r7,
-                       struct pt_regs regs);
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-                       unsigned long parent_tidptr,
-                       unsigned long child_tidptr,
-                       struct pt_regs regs);
-asmlinkage int sys_fork(unsigned long r4, unsigned long r5,
-                       unsigned long r6, unsigned long r7,
-                       struct pt_regs regs);
-asmlinkage int sys_vfork(unsigned long r4, unsigned long r5,
-                       unsigned long r6, unsigned long r7,
-                       struct pt_regs regs);
-asmlinkage int sys_pipe(unsigned long r4, unsigned long r5,
-                       unsigned long r6, unsigned long r7,
-                       struct pt_regs regs);
-asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char *buf,
-                               size_t count, long dummy, loff_t pos);
-asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char *buf,
-                               size_t count, long dummy, loff_t pos);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  *
index c113566..ee7828b 100644 (file)
@@ -513,47 +513,6 @@ __syscall_return(type,__sc0);                                                  \
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
-#ifdef __KERNEL_SYSCALLS__
-
-/* Copy from sh */
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/ptrace.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static inline _syscall0(int,pause)
-static inline _syscall1(int,setup,int,magic)
-static inline _syscall0(int,sync)
-static inline _syscall0(pid_t,setsid)
-static inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-static inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static inline _syscall1(int,dup,int,fd)
-static inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-static inline _syscall3(int,open,const char *,file,int,flag,int,mode)
-static inline _syscall1(int,close,int,fd)
-static inline _syscall1(int,_exit,int,exitcode)
-static inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-static inline _syscall1(int,delete_module,const char *,name)
-
-static inline pid_t wait(int * wait_stat)
-{
-       return waitpid(-1,wait_stat,0);
-}
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  *
index 2553762..c7a495a 100644 (file)
@@ -478,53 +478,6 @@ return -1; \
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static __inline__ _syscall0(pid_t,setsid)
-static __inline__ _syscall3(int,write,int,fd,__const__ char *,buf,off_t,count)
-static __inline__ _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static __inline__ _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static __inline__ _syscall1(int,dup,int,fd)
-static __inline__ _syscall3(int,execve,__const__ char *,file,char **,argv,char **,envp)
-static __inline__ _syscall3(int,open,__const__ char *,file,int,flag,int,mode)
-static __inline__ _syscall1(int,close,int,fd)
-static __inline__ _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-
-#include <linux/linkage.h>
-
-asmlinkage unsigned long sys_mmap(
-                               unsigned long addr, unsigned long len,
-                               unsigned long prot, unsigned long flags,
-                               unsigned long fd, unsigned long off);
-asmlinkage unsigned long sys_mmap2(
-                               unsigned long addr, unsigned long len,
-                               unsigned long prot, unsigned long flags,
-                               unsigned long fd, unsigned long pgoff);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               void __user *restorer,
-                               size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  *
index badc73f..124cf07 100644 (file)
@@ -445,48 +445,6 @@ if (__res>=0) \
 errno = -__res; \
 return -1; \
 }
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-static __inline__ _syscall0(pid_t,setsid)
-static __inline__ _syscall3(int,write,int,fd,__const__ char *,buf,off_t,count)
-static __inline__ _syscall3(int,read,int,fd,char *,buf,off_t,count)
-static __inline__ _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-static __inline__ _syscall1(int,dup,int,fd)
-static __inline__ _syscall3(int,execve,__const__ char *,file,char **,argv,char **,envp)
-static __inline__ _syscall3(int,open,__const__ char *,file,int,flag,int,mode)
-static __inline__ _syscall1(int,close,int,fd)
-static __inline__ _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-
-#include <linux/linkage.h>
-
-asmlinkage unsigned long sys_mmap(
-                               unsigned long addr, unsigned long len,
-                               unsigned long prot, unsigned long flags,
-                               unsigned long fd, unsigned long off);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               void __user *restorer,
-                               size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
 
 /* sysconf options, for SunOS compatibility */
 #define   _SC_ARG_MAX             1
index afccfca..732c83f 100644 (file)
@@ -37,34 +37,6 @@ extern int um_execve(const char *file, char *const argv[], char *const env[]);
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #endif
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-
-static inline int execve(const char *filename, char *const argv[],
-                        char *const envp[])
-{
-       mm_segment_t fs;
-       int ret;
-
-       fs = get_fs();
-       set_fs(KERNEL_DS);
-       ret = um_execve(filename, argv, envp);
-       set_fs(fs);
-
-       if (ret >= 0)
-               return ret;
-
-       errno = -(long)ret;
-       return -1;
-}
-
-int sys_execve(char *file, char **argv, char **env);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
-#undef __KERNEL_SYSCALLS__
 #include "asm/arch/unistd.h"
 
 #endif /* _UM_UNISTD_H_*/
index 552b7c8..737401e 100644 (file)
@@ -387,57 +387,6 @@ type name (atype a, btype b, ctype c, dtype d, etype e, ftype f)         \
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 
-#ifdef __KERNEL_SYSCALLS__
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-extern inline _syscall0(pid_t,setsid)
-extern inline _syscall3(int,write,int,fd,const char *,buf,off_t,count)
-extern inline _syscall3(int,read,int,fd,char *,buf,off_t,count)
-extern inline _syscall3(off_t,lseek,int,fd,off_t,offset,int,count)
-extern inline _syscall1(int,dup,int,fd)
-extern inline _syscall3(int,execve,const char *,file,char **,argv,char **,envp)
-extern inline _syscall3(int,open,const char *,file,int,flag,int,mode)
-extern inline _syscall1(int,close,int,fd)
-extern inline _syscall1(int,_exit,int,exitcode)
-extern inline _syscall3(pid_t,waitpid,pid_t,pid,int *,wait_stat,int,options)
-
-extern inline pid_t wait(int * wait_stat)
-{
-       return waitpid (-1, wait_stat, 0);
-}
-
-unsigned long sys_mmap(unsigned long addr, size_t len,
-                       unsigned long prot, unsigned long flags,
-                       unsigned long fd, off_t offset);
-unsigned long sys_mmap2(unsigned long addr, size_t len,
-                       unsigned long prot, unsigned long flags,
-                       unsigned long fd, unsigned long pgoff);
-struct pt_regs;
-int sys_execve (char *name, char **argv, char **envp, struct pt_regs *regs);
-int sys_pipe (int *fildes);
-struct sigaction;
-asmlinkage long sys_rt_sigaction(int sig,
-                               const struct sigaction __user *act,
-                               struct sigaction __user *oact,
-                               size_t sigsetsize);
-
-#endif /* __KERNEL_SYSCALLS__ */
-
 /*
  * "Conditional" syscalls
  */
index ab827dc..5ea84db 100644 (file)
@@ -39,6 +39,8 @@ struct pt_regs {
 #define user_mode(regs) (!!((regs)->cs & 3))
 #define user_mode_vm(regs) user_mode(regs)
 #define instruction_pointer(regs) ((regs)->rip)
+#define regs_return_value(regs) ((regs)->rax)
+
 extern unsigned long profile_pc(struct pt_regs *regs);
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
 
index 6137146..777288e 100644 (file)
@@ -620,10 +620,11 @@ __SYSCALL(__NR_vmsplice, sys_vmsplice)
 #define __NR_move_pages                279
 __SYSCALL(__NR_move_pages, sys_move_pages)
 
-#ifdef __KERNEL__
-
 #define __NR_syscall_max __NR_move_pages
+
+#ifdef __KERNEL__
 #include <linux/err.h>
+#endif
 
 #ifndef __NO_STUBS
 
@@ -663,8 +664,6 @@ do { \
 #define __ARCH_WANT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_TIME
 
-#ifndef __KERNEL_SYSCALLS__
-
 #define __syscall "syscall"
 
 #define _syscall0(type,name) \
@@ -746,83 +745,7 @@ __asm__ volatile ("movq %5,%%r10 ; movq %6,%%r8 ; movq %7,%%r9 ; " __syscall \
 __syscall_return(type,__res); \
 }
 
-#else /* __KERNEL_SYSCALLS__ */
-
-#include <linux/syscalls.h>
-#include <asm/ptrace.h>
-
-/*
- * we need this inline - forking from kernel space will result
- * in NO COPY ON WRITE (!!!), until an execve is executed. This
- * is no problem, but for the stack. This is handled by not letting
- * main() use the stack at all after fork(). Thus, no function
- * calls - which means inline code for fork too, as otherwise we
- * would use the stack upon exit from 'fork()'.
- *
- * Actually only pause and fork are needed inline, so that there
- * won't be any messing with the stack from main(), but we define
- * some others too.
- */
-#define __NR__exit __NR_exit
-
-static inline pid_t setsid(void)
-{
-       return sys_setsid();
-}
-
-static inline ssize_t write(unsigned int fd, char * buf, size_t count)
-{
-       return sys_write(fd, buf, count);
-}
-
-static inline ssize_t read(unsigned int fd, char * buf, size_t count)
-{
-       return sys_read(fd, buf, count);
-}
-
-static inline off_t lseek(unsigned int fd, off_t offset, unsigned int origin)
-{
-       return sys_lseek(fd, offset, origin);
-}
-
-static inline long dup(unsigned int fd)
-{
-       return sys_dup(fd);
-}
-
-/* implemented in asm in arch/x86_64/kernel/entry.S */
-extern int execve(const char *, char * const *, char * const *);
-
-static inline long open(const char * filename, int flags, int mode)
-{
-       return sys_open(filename, flags, mode);
-}
-
-static inline long close(unsigned int fd)
-{
-       return sys_close(fd);
-}
-
-static inline pid_t waitpid(int pid, int * wait_stat, int flags)
-{
-       return sys_wait4(pid, wait_stat, flags, NULL);
-}
-
-extern long sys_mmap(unsigned long addr, unsigned long len,
-                       unsigned long prot, unsigned long flags,
-                       unsigned long fd, unsigned long off);
-
-extern int sys_modify_ldt(int func, void *ptr, unsigned long bytecount);
-
-asmlinkage long sys_execve(char *name, char **argv, char **envp,
-                       struct pt_regs regs);
-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp,
-                       void *parent_tid, void *child_tid,
-                       struct pt_regs regs);
-asmlinkage long sys_fork(struct pt_regs regs);
-asmlinkage long sys_vfork(struct pt_regs regs);
-asmlinkage long sys_pipe(int *fildes);
-
+#ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
 #include <linux/linkage.h>
@@ -839,8 +762,8 @@ asmlinkage long sys_rt_sigaction(int sig,
                                size_t sigsetsize);
 
 #endif  /* __ASSEMBLY__ */
-
-#endif /* __KERNEL_SYSCALLS__ */
+#endif /* __KERNEL__ */
+#endif /* __NO_STUBS */
 
 /*
  * "Conditional" syscalls
@@ -850,8 +773,4 @@ asmlinkage long sys_rt_sigaction(int sig,
  */
 #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
 
-#endif /* __NO_STUBS */
-
-#endif /* __KERNEL__ */
-
 #endif /* _ASM_X86_64_UNISTD_H_ */
index 5e1b99d..411f810 100644 (file)
@@ -402,11 +402,6 @@ __asm__ __volatile__ ( \
 __syscall_return(type,__res); \
 }
 
-
-#ifdef __KERNEL_SYSCALLS__
-static __inline__ _syscall3(int,execve,const char*,file,char**,argv,char**,envp)
-#endif
-
 /*
  * "Conditional" syscalls
  *
index 9760753..6f11095 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <asm/compat.h>
 #include <asm/siginfo.h>
+#include <asm/signal.h>
 
 #define compat_jiffies_to_clock_t(x)   \
                (((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
index 25423f7..ed6c0fe 100644 (file)
@@ -54,7 +54,7 @@ struct vc_data {
        struct tty_struct *vc_tty;              /* TTY we are attached to */
        /* data for manual vt switching */
        struct vt_mode  vt_mode;
-       int             vt_pid;
+       struct pid      *vt_pid;
        int             vt_newvt;
        wait_queue_head_t paste_wait;
        /* mode flags */
index 2e29a2e..91c0b2a 100644 (file)
@@ -684,7 +684,8 @@ extern struct block_device *I_BDEV(struct inode *inode);
 
 struct fown_struct {
        rwlock_t lock;          /* protects pid, uid, euid fields */
-       int pid;                /* pid or -pgrp where SIGIO should be sent */
+       struct pid *pid;        /* pid or -pgrp where SIGIO should be sent */
+       enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */
        uid_t uid, euid;        /* uid/euid of process setting the owner */
        int signum;             /* posix.1b rt signal to be delivered on IO */
 };
@@ -880,8 +881,10 @@ extern void kill_fasync(struct fasync_struct **, int, int);
 /* only for net: no internal synchronization */
 extern void __kill_fasync(struct fasync_struct *, int, int);
 
+extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
 extern int f_setown(struct file *filp, unsigned long arg, int force);
 extern void f_delown(struct file *filp);
+extern pid_t f_getown(struct file *filp);
 extern int send_sigurg(struct fown_struct *fown);
 
 /*
index 690c428..9869ef3 100644 (file)
@@ -31,5 +31,6 @@ struct gen_pool_chunk {
 
 extern struct gen_pool *gen_pool_create(int, int);
 extern int gen_pool_add(struct gen_pool *, unsigned long, size_t, int);
+extern void gen_pool_destroy(struct gen_pool *);
 extern unsigned long gen_pool_alloc(struct gen_pool *, size_t);
 extern void gen_pool_free(struct gen_pool *, unsigned long, size_t);
index 60aac2c..33c5daa 100644 (file)
@@ -4,7 +4,9 @@
 #include <linux/file.h>
 #include <linux/rcupdate.h>
 #include <linux/irqflags.h>
+#include <linux/utsname.h>
 #include <linux/lockdep.h>
+#include <linux/ipc.h>
 
 #define INIT_FDTABLE \
 {                                                      \
        .session        = 1,                                            \
 }
 
+extern struct nsproxy init_nsproxy;
+#define INIT_NSPROXY(nsproxy) {                                                \
+       .count          = ATOMIC_INIT(1),                               \
+       .nslock         = SPIN_LOCK_UNLOCKED,                           \
+       .uts_ns         = &init_uts_ns,                                 \
+       .namespace      = NULL,                                         \
+       INIT_IPC_NS(ipc_ns)                                             \
+}
+
 #define INIT_SIGHAND(sighand) {                                                \
        .count          = ATOMIC_INIT(1),                               \
        .action         = { { { .sa_handler = NULL, } }, },             \
@@ -117,6 +128,7 @@ extern struct group_info init_groups;
        .files          = &init_files,                                  \
        .signal         = &init_signals,                                \
        .sighand        = &init_sighand,                                \
+       .nsproxy        = &init_nsproxy,                                \
        .pending        = {                                             \
                .list = LIST_HEAD_INIT(tsk.pending.list),               \
                .signal = {{0}}},                                       \
index b291189..d9e2b3f 100644 (file)
@@ -2,6 +2,7 @@
 #define _LINUX_IPC_H
 
 #include <linux/types.h>
+#include <linux/kref.h>
 
 #define IPC_PRIVATE ((__kernel_key_t) 0)  
 
@@ -68,6 +69,59 @@ struct kern_ipc_perm
        void            *security;
 };
 
+struct ipc_ids;
+struct ipc_namespace {
+       struct kref     kref;
+       struct ipc_ids  *ids[3];
+
+       int             sem_ctls[4];
+       int             used_sems;
+
+       int             msg_ctlmax;
+       int             msg_ctlmnb;
+       int             msg_ctlmni;
+
+       size_t          shm_ctlmax;
+       size_t          shm_ctlall;
+       int             shm_ctlmni;
+       int             shm_tot;
+};
+
+extern struct ipc_namespace init_ipc_ns;
+
+#ifdef CONFIG_SYSVIPC
+#define INIT_IPC_NS(ns)                .ns             = &init_ipc_ns,
+#else
+#define INIT_IPC_NS(ns)
+#endif
+
+#ifdef CONFIG_IPC_NS
+extern void free_ipc_ns(struct kref *kref);
+extern int copy_ipcs(unsigned long flags, struct task_struct *tsk);
+extern int unshare_ipcs(unsigned long flags, struct ipc_namespace **ns);
+#else
+static inline int copy_ipcs(unsigned long flags, struct task_struct *tsk)
+{
+       return 0;
+}
+#endif
+
+static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+{
+#ifdef CONFIG_IPC_NS
+       if (ns)
+               kref_get(&ns->kref);
+#endif
+       return ns;
+}
+
+static inline void put_ipc_ns(struct ipc_namespace *ns)
+{
+#ifdef CONFIG_IPC_NS
+       kref_put(&ns->kref, free_ipc_ns);
+#endif
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_IPC_H */
index 8bf6702..ac4c055 100644 (file)
@@ -77,6 +77,12 @@ struct kprobe {
        /* location of the probe point */
        kprobe_opcode_t *addr;
 
+       /* Allow user to indicate symbol name of the probe point */
+       char *symbol_name;
+
+       /* Offset into the symbol */
+       unsigned int offset;
+
        /* Called before addr is executed. */
        kprobe_pre_handler_t pre_handler;
 
@@ -196,7 +202,7 @@ void unregister_kretprobe(struct kretprobe *rp);
 struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp);
 void add_rp_inst(struct kretprobe_instance *ri);
 void kprobe_flush_task(struct task_struct *tk);
-void recycle_rp_inst(struct kretprobe_instance *ri);
+void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
 #else /* CONFIG_KPROBES */
 
 #define __kprobes      /**/
index b054deb..81e3a18 100644 (file)
@@ -30,7 +30,7 @@ extern struct nlmsvc_binding *        nlmsvc_ops;
  * Functions exported by the lockd module
  */
 extern int     nlmclnt_proc(struct inode *, int, struct file_lock *);
-extern int     lockd_up(void);
+extern int     lockd_up(int proto);
 extern void    lockd_down(void);
 
 #endif /* LINUX_LOCKD_BIND_H */
index 0d92c46..47b7dbd 100644 (file)
@@ -80,7 +80,7 @@ struct nlm_wait;
 /*
  * Memory chunk for NLM client RPC request.
  */
-#define NLMCLNT_OHSIZE         (sizeof(system_utsname.nodename)+10)
+#define NLMCLNT_OHSIZE         (sizeof(utsname()->nodename)+10)
 struct nlm_rqst {
        unsigned int            a_flags;        /* initial RPC task flags */
        struct nlm_host *       a_host;         /* host handle */
index 2c59917..4b2d809 100644 (file)
@@ -320,6 +320,8 @@ struct module
        /* Am I GPL-compatible */
        int license_gplok;
 
+       unsigned int taints;    /* same bits as kernel:tainted */
+
 #ifdef CONFIG_MODULE_UNLOAD
        /* Reference counts */
        struct module_ref ref[NR_CPUS];
index 3abc8e3..d137009 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <linux/mount.h>
 #include <linux/sched.h>
+#include <linux/nsproxy.h>
 
 struct namespace {
        atomic_t                count;
@@ -26,11 +27,8 @@ static inline void put_namespace(struct namespace *namespace)
 
 static inline void exit_namespace(struct task_struct *p)
 {
-       struct namespace *namespace = p->namespace;
+       struct namespace *namespace = p->nsproxy->namespace;
        if (namespace) {
-               task_lock(p);
-               p->namespace = NULL;
-               task_unlock(p);
                put_namespace(namespace);
        }
 }
index 2dcad29..e1dbc86 100644 (file)
@@ -140,6 +140,11 @@ struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
 int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
 #endif
 
+enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
+int nfsd_vers(int vers, enum vers_op change);
+void nfsd_reset_versions(void);
+int nfsd_create_serv(void);
+
 
 /* 
  * NFSv4 State
index 31a3cb6..069257e 100644 (file)
@@ -290,8 +290,9 @@ fill_post_wcc(struct svc_fh *fhp)
  * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
  * so, any changes here should be reflected there.
  */
+
 static inline void
-fh_lock(struct svc_fh *fhp)
+fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
 {
        struct dentry   *dentry = fhp->fh_dentry;
        struct inode    *inode;
@@ -310,11 +311,17 @@ fh_lock(struct svc_fh *fhp)
        }
 
        inode = dentry->d_inode;
-       mutex_lock(&inode->i_mutex);
+       mutex_lock_nested(&inode->i_mutex, subclass);
        fill_pre_wcc(fhp);
        fhp->fh_locked = 1;
 }
 
+static inline void
+fh_lock(struct svc_fh *fhp)
+{
+       fh_lock_nested(fhp, I_MUTEX_NORMAL);
+}
+
 /*
  * Unlock a file handle/inode
  */
index dae0fae..8bcddcc 100644 (file)
 #define NFSCTL_GETFD           7       /* get an fh by path (used by mountd) */
 #define        NFSCTL_GETFS            8       /* get an fh by path with max FH len */
 
-/*
- * Macros used to set version
- */
-#define NFSCTL_VERSET(_cltbits, _v)   ((_cltbits) |=  (1 << (_v)))
-#define NFSCTL_VERUNSET(_cltbits, _v) ((_cltbits) &= ~(1 << (_v)))
-#define NFSCTL_VERISSET(_cltbits, _v) ((_cltbits) & (1 << (_v)))
-
-#if defined(CONFIG_NFSD_V4)
-#define        NFSCTL_VERALL   (0x1c /* 0b011100 */)
-#elif defined(CONFIG_NFSD_V3)
-#define        NFSCTL_VERALL   (0x0c /* 0b001100 */)
-#else
-#define        NFSCTL_VERALL   (0x04 /* 0b000100 */)
-#endif
-
 /* SVC */
 struct nfsctl_svc {
        unsigned short          svc_port;
@@ -134,8 +119,6 @@ extern int          exp_delclient(struct nfsctl_client *ncp);
 extern int             exp_export(struct nfsctl_export *nxp);
 extern int             exp_unexport(struct nfsctl_export *nxp);
 
-extern unsigned int nfsd_versbits;
-
 #endif /* __KERNEL__ */
 
 #endif /* NFSD_SYSCALL_H */
index 1a9ef3e..5dce5c2 100644 (file)
@@ -352,6 +352,7 @@ extern nodemask_t node_possible_map;
 #define node_possible(node)    node_isset((node), node_possible_map)
 #define first_online_node      first_node(node_online_map)
 #define next_online_node(nid)  next_node((nid), node_online_map)
+int highest_possible_node_id(void);
 #else
 #define num_online_nodes()     1
 #define num_possible_nodes()   1
@@ -359,6 +360,7 @@ extern nodemask_t node_possible_map;
 #define node_possible(node)    ((node) == 0)
 #define first_online_node      0
 #define next_online_node(nid)  (MAX_NUMNODES)
+#define highest_possible_node_id()     0
 #endif
 
 #define any_online_node(mask)                  \
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
new file mode 100644 (file)
index 0000000..f6baecd
--- /dev/null
@@ -0,0 +1,52 @@
+#ifndef _LINUX_NSPROXY_H
+#define _LINUX_NSPROXY_H
+
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+
+struct namespace;
+struct uts_namespace;
+struct ipc_namespace;
+
+/*
+ * A structure to contain pointers to all per-process
+ * namespaces - fs (mount), uts, network, sysvipc, etc.
+ *
+ * 'count' is the number of tasks holding a reference.
+ * The count for each namespace, then, will be the number
+ * of nsproxies pointing to it, not the number of tasks.
+ *
+ * The nsproxy is shared by tasks which share all namespaces.
+ * As soon as a single namespace is cloned or unshared, the
+ * nsproxy is copied.
+ */
+struct nsproxy {
+       atomic_t count;
+       spinlock_t nslock;
+       struct uts_namespace *uts_ns;
+       struct ipc_namespace *ipc_ns;
+       struct namespace *namespace;
+};
+extern struct nsproxy init_nsproxy;
+
+struct nsproxy *dup_namespaces(struct nsproxy *orig);
+int copy_namespaces(int flags, struct task_struct *tsk);
+void get_task_namespaces(struct task_struct *tsk);
+void free_nsproxy(struct nsproxy *ns);
+
+static inline void put_nsproxy(struct nsproxy *ns)
+{
+       if (atomic_dec_and_test(&ns->count)) {
+               free_nsproxy(ns);
+       }
+}
+
+static inline void exit_task_namespaces(struct task_struct *p)
+{
+       struct nsproxy *ns = p->nsproxy;
+       if (ns) {
+               put_nsproxy(ns);
+               p->nsproxy = NULL;
+       }
+}
+#endif
index 93da7e2..17b9e04 100644 (file)
@@ -68,6 +68,8 @@ extern struct task_struct *FASTCALL(pid_task(struct pid *pid, enum pid_type));
 extern struct task_struct *FASTCALL(get_pid_task(struct pid *pid,
                                                enum pid_type));
 
+extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type);
+
 /*
  * attach_pid() and detach_pid() must be called with the tasklist_lock
  * write-held.
@@ -89,33 +91,42 @@ extern struct pid *FASTCALL(find_pid(int nr));
  * Lookup a PID in the hash table, and return with it's count elevated.
  */
 extern struct pid *find_get_pid(int nr);
+extern struct pid *find_ge_pid(int nr);
 
 extern struct pid *alloc_pid(void);
 extern void FASTCALL(free_pid(struct pid *pid));
 
-#define pid_next(task, type)                                   \
-       ((task)->pids[(type)].node.next)
+static inline pid_t pid_nr(struct pid *pid)
+{
+       pid_t nr = 0;
+       if (pid)
+               nr = pid->nr;
+       return nr;
+}
+
 
-#define pid_next_task(task, type)                              \
-       hlist_entry(pid_next(task, type), struct task_struct,   \
-                       pids[(type)].node)
+#define do_each_task_pid(who, type, task)                                      \
+       do {                                                                    \
+               struct hlist_node *pos___;                                      \
+               struct pid *pid___ = find_pid(who);                             \
+               if (pid___ != NULL)                                             \
+                       hlist_for_each_entry_rcu((task), pos___,                \
+                               &pid___->tasks[type], pids[type].node) {
 
+#define while_each_task_pid(who, type, task)                                   \
+                       }                                                       \
+       } while (0)
 
-/* We could use hlist_for_each_entry_rcu here but it takes more arguments
- * than the do_each_task_pid/while_each_task_pid.  So we roll our own
- * to preserve the existing interface.
- */
-#define do_each_task_pid(who, type, task)                              \
-       if ((task = find_task_by_pid_type(type, who))) {                \
-               prefetch(pid_next(task, type));                         \
-               do {
-
-#define while_each_task_pid(who, type, task)                           \
-               } while (pid_next(task, type) &&  ({                    \
-                               task = pid_next_task(task, type);       \
-                               rcu_dereference(task);                  \
-                               prefetch(pid_next(task, type));         \
-                               1; }) );                                \
-       }
+
+#define do_each_pid_task(pid, type, task)                                      \
+       do {                                                                    \
+               struct hlist_node *pos___;                                      \
+               if (pid != NULL)                                                \
+                       hlist_for_each_entry_rcu((task), pos___,                \
+                               &pid->tasks[type], pids[type].node) {
+
+#define while_each_pid_task(pid, type, task)                                   \
+                       }                                                       \
+       } while (0)
 
 #endif /* _LINUX_PID_H */
index 57f70bc..87dec8f 100644 (file)
@@ -244,13 +244,15 @@ static inline void kclist_add(struct kcore_list *new, void *addr, size_t size)
 extern void kclist_add(struct kcore_list *, void *, size_t);
 #endif
 
+union proc_op {
+       int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
+       int (*proc_read)(struct task_struct *task, char *page);
+};
+
 struct proc_inode {
        struct pid *pid;
        int fd;
-       union {
-               int (*proc_get_link)(struct inode *, struct dentry **, struct vfsmount **);
-               int (*proc_read)(struct task_struct *task, char *page);
-       } op;
+       union proc_op op;
        struct proc_dir_entry *pde;
        struct inode vfs_inode;
 };
diff --git a/include/linux/pspace.h b/include/linux/pspace.h
new file mode 100644 (file)
index 0000000..91d48b8
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef _LINUX_PSPACE_H
+#define _LINUX_PSPACE_H
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/threads.h>
+#include <linux/pid.h>
+
+struct pidmap {
+       atomic_t nr_free;
+       void *page;
+};
+
+#define PIDMAP_ENTRIES         ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
+
+struct pspace {
+       struct pidmap pidmap[PIDMAP_ENTRIES];
+       int last_pid;
+};
+
+extern struct pspace init_pspace;
+
+#endif /* _LINUX_PSPACE_H */
index 7ef899c..3853023 100644 (file)
@@ -24,6 +24,8 @@
 #define CLONE_UNTRACED         0x00800000      /* set if the tracing process can't force CLONE_PTRACE on this clone */
 #define CLONE_CHILD_SETTID     0x01000000      /* set the TID in the child */
 #define CLONE_STOPPED          0x02000000      /* Start in stopped state */
+#define CLONE_NEWUTS           0x04000000      /* New utsname group? */
+#define CLONE_NEWIPC           0x08000000      /* New ipcs */
 
 /*
  * Scheduling policies
@@ -118,7 +120,6 @@ extern unsigned long avenrun[];             /* Load averages */
 
 extern unsigned long total_forks;
 extern int nr_threads;
-extern int last_pid;
 DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
@@ -239,7 +240,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout);
 extern signed long schedule_timeout_uninterruptible(signed long timeout);
 asmlinkage void schedule(void);
 
-struct namespace;
+struct nsproxy;
 
 /* Maximum number of active map areas.. This is a random (large) number */
 #define DEFAULT_MAX_MAP_COUNT  65536
@@ -754,6 +755,7 @@ static inline void prefetch_stack(struct task_struct *t) { }
 struct audit_context;          /* See audit.c */
 struct mempolicy;
 struct pipe_inode_info;
+struct uts_namespace;
 
 enum sleep_type {
        SLEEP_NORMAL,
@@ -897,8 +899,8 @@ struct task_struct {
        struct fs_struct *fs;
 /* open file information */
        struct files_struct *files;
-/* namespace */
-       struct namespace *namespace;
+/* namespaces */
+       struct nsproxy *nsproxy;
 /* signal handlers */
        struct signal_struct *signal;
        struct sighand_struct *sighand;
@@ -1020,6 +1022,26 @@ static inline pid_t process_group(struct task_struct *tsk)
        return tsk->signal->pgrp;
 }
 
+static inline struct pid *task_pid(struct task_struct *task)
+{
+       return task->pids[PIDTYPE_PID].pid;
+}
+
+static inline struct pid *task_tgid(struct task_struct *task)
+{
+       return task->group_leader->pids[PIDTYPE_PID].pid;
+}
+
+static inline struct pid *task_pgrp(struct task_struct *task)
+{
+       return task->group_leader->pids[PIDTYPE_PGID].pid;
+}
+
+static inline struct pid *task_session(struct task_struct *task)
+{
+       return task->group_leader->pids[PIDTYPE_SID].pid;
+}
+
 /**
  * pid_alive - check that a task structure is not stale
  * @p: Task structure to be checked.
@@ -1043,6 +1065,8 @@ static inline int is_init(struct task_struct *tsk)
        return tsk->pid == 1;
 }
 
+extern struct pid *cad_pid;
+
 extern void free_task(struct task_struct *tsk);
 #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
 
@@ -1247,10 +1271,15 @@ extern int send_sig_info(int, struct siginfo *, struct task_struct *);
 extern int send_group_sig_info(int, struct siginfo *, struct task_struct *);
 extern int force_sigsegv(int, struct task_struct *);
 extern int force_sig_info(int, struct siginfo *, struct task_struct *);
+extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
+extern int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
+extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
+extern int kill_pid_info_as_uid(int, struct siginfo *, struct pid *, uid_t, uid_t, u32);
+extern int kill_pgrp(struct pid *pid, int sig, int priv);
+extern int kill_pid(struct pid *pid, int sig, int priv);
 extern int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp);
 extern int kill_pg_info(int, struct siginfo *, pid_t);
 extern int kill_proc_info(int, struct siginfo *, pid_t);
-extern int kill_proc_info_as_uid(int, struct siginfo *, pid_t, uid_t, uid_t, u32);
 extern void do_notify_parent(struct task_struct *, int);
 extern void force_sig(int, struct task_struct *);
 extern void force_sig_specific(int, struct task_struct *);
@@ -1265,6 +1294,11 @@ extern int send_group_sigqueue(int, struct sigqueue *,  struct task_struct *);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
 
+static inline int kill_cad_pid(int sig, int priv)
+{
+       return kill_pid(cad_pid, sig, priv);
+}
+
 /* These can be the second arg to send_sig_info/send_group_sig_info.  */
 #define SEND_SIG_NOINFO ((struct siginfo *) 0)
 #define SEND_SIG_PRIV  ((struct siginfo *) 1)
@@ -1358,6 +1392,17 @@ extern void wait_task_inactive(struct task_struct * p);
 /* de_thread depends on thread_group_leader not being a pid based check */
 #define thread_group_leader(p) (p == p->group_leader)
 
+/* Do to the insanities of de_thread it is possible for a process
+ * to have the pid of the thread group leader without actually being
+ * the thread group leader.  For iteration through the pids in proc
+ * all we care about is that we have a task with the appropriate
+ * pid, we don't actually care if we have the right task.
+ */
+static inline int has_group_leader_pid(struct task_struct *p)
+{
+       return p->pid == p->tgid;
+}
+
 static inline struct task_struct *next_thread(const struct task_struct *p)
 {
        return list_entry(rcu_dereference(p->thread_group.next),
index 73140ee..4ebcdf9 100644 (file)
 #include <linux/wait.h>
 #include <linux/mm.h>
 
+/*
+ * This is the RPC server thread function prototype
+ */
+typedef void           (*svc_thread_fn)(struct svc_rqst *);
+
+/*
+ *
+ * RPC service thread pool.
+ *
+ * Pool of threads and temporary sockets.  Generally there is only
+ * a single one of these per RPC service, but on NUMA machines those
+ * services that can benefit from it (i.e. nfs but not lockd) will
+ * have one pool per NUMA node.  This optimisation reduces cross-
+ * node traffic on multi-node NUMA NFS servers.
+ */
+struct svc_pool {
+       unsigned int            sp_id;          /* pool id; also node id on NUMA */
+       spinlock_t              sp_lock;        /* protects all fields */
+       struct list_head        sp_threads;     /* idle server threads */
+       struct list_head        sp_sockets;     /* pending sockets */
+       unsigned int            sp_nrthreads;   /* # of threads in pool */
+       struct list_head        sp_all_threads; /* all server threads */
+} ____cacheline_aligned_in_smp;
+
 /*
  * RPC service.
  *
@@ -28,8 +52,6 @@
  * We currently do not support more than one RPC program per daemon.
  */
 struct svc_serv {
-       struct list_head        sv_threads;     /* idle server threads */
-       struct list_head        sv_sockets;     /* pending sockets */
        struct svc_program *    sv_program;     /* RPC program */
        struct svc_stat *       sv_stats;       /* RPC statistics */
        spinlock_t              sv_lock;
@@ -40,10 +62,35 @@ struct svc_serv {
        struct list_head        sv_permsocks;   /* all permanent sockets */
        struct list_head        sv_tempsocks;   /* all temporary sockets */
        int                     sv_tmpcnt;      /* count of temporary sockets */
+       struct timer_list       sv_temptimer;   /* timer for aging temporary sockets */
 
        char *                  sv_name;        /* service name */
+
+       unsigned int            sv_nrpools;     /* number of thread pools */
+       struct svc_pool *       sv_pools;       /* array of thread pools */
+
+       void                    (*sv_shutdown)(struct svc_serv *serv);
+                                               /* Callback to use when last thread
+                                                * exits.
+                                                */
+
+       struct module *         sv_module;      /* optional module to count when
+                                                * adding threads */
+       svc_thread_fn           sv_function;    /* main function for threads */
+       int                     sv_kill_signal; /* signal to kill threads */
 };
 
+/*
+ * We use sv_nrthreads as a reference count.  svc_destroy() drops
+ * this refcount, so we need to bump it up around operations that
+ * change the number of threads.  Horrible, but there it is.
+ * Should be called with the BKL held.
+ */
+static inline void svc_get(struct svc_serv *serv)
+{
+       serv->sv_nrthreads++;
+}
+
 /*
  * Maximum payload size supported by a kernel RPC server.
  * This is use to determine the max number of pages nfsd is
@@ -127,11 +174,13 @@ static inline void svc_putu32(struct kvec *iov, __be32 val)
  */
 struct svc_rqst {
        struct list_head        rq_list;        /* idle list */
+       struct list_head        rq_all;         /* all threads list */
        struct svc_sock *       rq_sock;        /* socket */
        struct sockaddr_in      rq_addr;        /* peer address */
        int                     rq_addrlen;
 
        struct svc_serv *       rq_server;      /* RPC service definition */
+       struct svc_pool *       rq_pool;        /* thread pool */
        struct svc_procedure *  rq_procinfo;    /* procedure info */
        struct auth_ops *       rq_authop;      /* authentication flavour */
        struct svc_cred         rq_cred;        /* auth info */
@@ -180,6 +229,7 @@ struct svc_rqst {
                                                 * to prevent encrypting page
                                                 * cache pages */
        wait_queue_head_t       rq_wait;        /* synchronization */
+       struct task_struct      *rq_task;       /* service thread */
 };
 
 /*
@@ -320,21 +370,22 @@ struct svc_procedure {
        unsigned int            pc_xdrressize;  /* maximum size of XDR reply */
 };
 
-/*
- * This is the RPC server thread function prototype
- */
-typedef void           (*svc_thread_fn)(struct svc_rqst *);
-
 /*
  * Function prototypes.
  */
-struct svc_serv *  svc_create(struct svc_program *, unsigned int);
+struct svc_serv *  svc_create(struct svc_program *, unsigned int,
+                             void (*shutdown)(struct svc_serv*));
 int               svc_create_thread(svc_thread_fn, struct svc_serv *);
 void              svc_exit_thread(struct svc_rqst *);
+struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
+                       void (*shutdown)(struct svc_serv*),
+                       svc_thread_fn, int sig, struct module *);
+int               svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
 void              svc_destroy(struct svc_serv *);
-int               svc_process(struct svc_serv *, struct svc_rqst *);
+int               svc_process(struct svc_rqst *);
 int               svc_register(struct svc_serv *, int, unsigned short);
 void              svc_wake_up(struct svc_serv *);
 void              svc_reserve(struct svc_rqst *rqstp, int space);
+struct svc_pool *  svc_pool_for_cpu(struct svc_serv *serv, int cpu);
 
 #endif /* SUNRPC_SVC_H */
index b4acb3d..4c29615 100644 (file)
@@ -20,8 +20,9 @@ struct svc_sock {
        struct socket *         sk_sock;        /* berkeley socket layer */
        struct sock *           sk_sk;          /* INET layer */
 
+       struct svc_pool *       sk_pool;        /* current pool iff queued */
        struct svc_serv *       sk_server;      /* service for this socket */
-       unsigned int            sk_inuse;       /* use count */
+       atomic_t                sk_inuse;       /* use count */
        unsigned long           sk_flags;
 #define        SK_BUSY         0                       /* enqueued/receiving */
 #define        SK_CONN         1                       /* conn pending */
@@ -31,9 +32,12 @@ struct svc_sock {
 #define        SK_DEAD         6                       /* socket closed */
 #define        SK_CHNGBUF      7                       /* need to change snd/rcv buffer sizes */
 #define        SK_DEFERRED     8                       /* request on sk_deferred */
+#define        SK_OLD          9                       /* used for temp socket aging mark+sweep */
+#define        SK_DETACHED     10                      /* detached from tempsocks list */
 
-       int                     sk_reserved;    /* space on outq that is reserved */
+       atomic_t                sk_reserved;    /* space on outq that is reserved */
 
+       spinlock_t              sk_defer_lock;  /* protects sk_deferred */
        struct list_head        sk_deferred;    /* deferred requests that need to
                                                 * be revisted */
        struct mutex            sk_mutex;       /* to serialize sending data */
@@ -57,9 +61,14 @@ struct svc_sock {
  */
 int            svc_makesock(struct svc_serv *, int, unsigned short);
 void           svc_delete_socket(struct svc_sock *);
-int            svc_recv(struct svc_serv *, struct svc_rqst *, long);
+int            svc_recv(struct svc_rqst *, long);
 int            svc_send(struct svc_rqst *);
 void           svc_drop(struct svc_rqst *);
 void           svc_sock_update_bufs(struct svc_serv *serv);
+int            svc_sock_names(char *buf, struct svc_serv *serv, char *toclose);
+int            svc_addsock(struct svc_serv *serv,
+                           int fd,
+                           char *name_return,
+                           int *proto);
 
 #endif /* SUNRPC_SVCSOCK_H */
index 2d1c3d5..3efcfc7 100644 (file)
@@ -599,4 +599,6 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
                                    size_t len);
 asmlinkage long sys_getcpu(unsigned __user *cpu, unsigned __user *node, struct getcpu_cache __user *cache);
 
+int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
+
 #endif
index 58c961c..5c8473b 100644 (file)
@@ -219,7 +219,8 @@ extern struct list_head tty_drivers;
 
 struct tty_driver *alloc_tty_driver(int lines);
 void put_tty_driver(struct tty_driver *driver);
-void tty_set_operations(struct tty_driver *driver, struct tty_operations *op);
+void tty_set_operations(struct tty_driver *driver,
+                       const struct tty_operations *op);
 
 /* tty driver magic number */
 #define TTY_DRIVER_MAGIC               0x5402
index c18c60f..aa8d5b5 100644 (file)
@@ -1,12 +1,8 @@
 #ifndef _LINUX_UNISTD_H_
 #define _LINUX_UNISTD_H_
 
-#ifdef __KERNEL__
-extern int errno;
-#endif
-
 /*
- * Include machine specific syscallX macros
+ * Include machine specific syscall numbers
  */
 #include <asm/unistd.h>
 
index 13e1da0..02e4b69 100644 (file)
@@ -1,6 +1,11 @@
 #ifndef _LINUX_UTSNAME_H
 #define _LINUX_UTSNAME_H
 
+#include <linux/sched.h>
+#include <linux/kref.h>
+#include <linux/nsproxy.h>
+#include <asm/atomic.h>
+
 #define __OLD_UTS_LEN 8
 
 struct oldold_utsname {
@@ -30,7 +35,55 @@ struct new_utsname {
        char domainname[65];
 };
 
-extern struct new_utsname system_utsname;
+struct uts_namespace {
+       struct kref kref;
+       struct new_utsname name;
+};
+extern struct uts_namespace init_uts_ns;
+
+static inline void get_uts_ns(struct uts_namespace *ns)
+{
+       kref_get(&ns->kref);
+}
+
+#ifdef CONFIG_UTS_NS
+extern int unshare_utsname(unsigned long unshare_flags,
+                               struct uts_namespace **new_uts);
+extern int copy_utsname(int flags, struct task_struct *tsk);
+extern void free_uts_ns(struct kref *kref);
+
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+       kref_put(&ns->kref, free_uts_ns);
+}
+#else
+static inline int unshare_utsname(unsigned long unshare_flags,
+                       struct uts_namespace **new_uts)
+{
+       if (unshare_flags & CLONE_NEWUTS)
+               return -EINVAL;
+
+       return 0;
+}
+
+static inline int copy_utsname(int flags, struct task_struct *tsk)
+{
+       return 0;
+}
+static inline void put_uts_ns(struct uts_namespace *ns)
+{
+}
+#endif
+
+static inline struct new_utsname *utsname(void)
+{
+       return &current->nsproxy->uts_ns->name;
+}
+
+static inline struct new_utsname *init_utsname(void)
+{
+       return &init_uts_ns.name;
+}
 
 extern struct rw_semaphore uts_sem;
 #endif
index 1009d3f..37a1a41 100644 (file)
@@ -84,4 +84,11 @@ void reset_vc(struct vc_data *vc);
 extern char con_buf[CON_BUF_SIZE];
 extern struct semaphore con_buf_sem;
 
+struct vt_spawn_console {
+       spinlock_t lock;
+       struct pid *pid;
+       int sig;
+};
+extern struct vt_spawn_console vt_spawn_con;
+
 #endif /* _VT_KERN_H */
index f7a04d0..1038293 100644 (file)
@@ -115,6 +115,15 @@ config SYSVIPC
          section 6.4 of the Linux Programmer's Guide, available from
          <http://www.tldp.org/guides.html>.
 
+config IPC_NS
+       bool "IPC Namespaces"
+       depends on SYSVIPC
+       default n
+       help
+         Support ipc namespaces.  This allows containers, i.e. virtual
+         environments, to use ipc namespaces to provide different ipc
+         objects for different servers.  If unsure, say N.
+
 config POSIX_MQUEUE
        bool "POSIX Message Queues"
        depends on NET && EXPERIMENTAL
@@ -182,6 +191,14 @@ config TASK_DELAY_ACCT
 
          Say N if unsure.
 
+config UTS_NS
+       bool "UTS Namespaces"
+       default n
+       help
+         Support uts namespaces.  This allows containers, i.e.
+         vservers, to use uts namespaces to provide different
+         uts info for different servers.  If unsure, say N.
+
 config AUDIT
        bool "Auditing support"
        depends on NET
index a06f037..919a80c 100644 (file)
@@ -1,4 +1,3 @@
-#define __KERNEL_SYSCALLS__
 #include <linux/unistd.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
@@ -35,7 +34,7 @@ static int __init do_linuxrc(void * shell)
        (void) sys_open("/dev/console",O_RDWR,0);
        (void) sys_dup(0);
        (void) sys_dup(0);
-       return execve(shell, argv, envp_init);
+       return kernel_execve(shell, argv, envp_init);
 }
 
 static void __init handle_initrd(void)
index 0766e69..ee12324 100644 (file)
@@ -9,8 +9,6 @@
  *  Simplified starting of init:  Michael A. Griffith <grif@acm.org> 
  */
 
-#define __KERNEL_SYSCALLS__
-
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/proc_fs.h>
@@ -703,7 +701,7 @@ static void do_pre_smp_initcalls(void)
 static void run_init_process(char *init_filename)
 {
        argv_init[0] = init_filename;
-       execve(init_filename, argv_init, envp_init);
+       kernel_execve(init_filename, argv_init, envp_init);
 }
 
 static int init(void * unused)
@@ -723,6 +721,8 @@ static int init(void * unused)
         */
        child_reaper = current;
 
+       cad_pid = task_pid(current);
+
        smp_prepare_cpus(max_cpus);
 
        do_pre_smp_initcalls();
index e290802..8f28344 100644 (file)
 #include <linux/utsname.h>
 #include <linux/utsrelease.h>
 #include <linux/version.h>
+#include <linux/sched.h>
 
 #define version(a) Version_ ## a
 #define version_string(a) version(a)
 
 int version_string(LINUX_VERSION_CODE);
 
-struct new_utsname system_utsname = {
-       .sysname        = UTS_SYSNAME,
-       .nodename       = UTS_NODENAME,
-       .release        = UTS_RELEASE,
-       .version        = UTS_VERSION,
-       .machine        = UTS_MACHINE,
-       .domainname     = UTS_DOMAINNAME,
+struct uts_namespace init_uts_ns = {
+       .kref = {
+               .refcount       = ATOMIC_INIT(2),
+       },
+       .name = {
+               .sysname        = UTS_SYSNAME,
+               .nodename       = UTS_NODENAME,
+               .release        = UTS_RELEASE,
+               .version        = UTS_VERSION,
+               .machine        = UTS_MACHINE,
+               .domainname     = UTS_DOMAINNAME,
+       },
 };
-
-EXPORT_SYMBOL(system_utsname);
+EXPORT_SYMBOL_GPL(init_uts_ns);
 
 const char linux_banner[] =
        "Linux version " UTS_RELEASE " (" LINUX_COMPILE_BY "@"
index d75d0ba..c45ae86 100644 (file)
@@ -73,7 +73,7 @@ struct mqueue_inode_info {
        struct mq_attr attr;
 
        struct sigevent notify;
-       pid_t notify_owner;
+       struct pid* notify_owner;
        struct user_struct *user;       /* user who created, for accounting */
        struct sock *notify_sock;
        struct sk_buff *notify_cookie;
@@ -134,7 +134,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode,
                        INIT_LIST_HEAD(&info->e_wait_q[0].list);
                        INIT_LIST_HEAD(&info->e_wait_q[1].list);
                        info->messages = NULL;
-                       info->notify_owner = 0;
+                       info->notify_owner = NULL;
                        info->qsize = 0;
                        info->user = NULL;      /* set when all is ok */
                        memset(&info->attr, 0, sizeof(info->attr));
@@ -338,7 +338,7 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
                        (info->notify_owner &&
                         info->notify.sigev_notify == SIGEV_SIGNAL) ?
                                info->notify.sigev_signo : 0,
-                       info->notify_owner);
+                       pid_nr(info->notify_owner));
        spin_unlock(&info->lock);
        buffer[sizeof(buffer)-1] = '\0';
        slen = strlen(buffer)+1;
@@ -363,7 +363,7 @@ static int mqueue_flush_file(struct file *filp, fl_owner_t id)
        struct mqueue_inode_info *info = MQUEUE_I(filp->f_dentry->d_inode);
 
        spin_lock(&info->lock);
-       if (current->tgid == info->notify_owner)
+       if (task_tgid(current) == info->notify_owner)
                remove_notification(info);
 
        spin_unlock(&info->lock);
@@ -518,8 +518,8 @@ static void __do_notify(struct mqueue_inode_info *info)
                        sig_i.si_pid = current->tgid;
                        sig_i.si_uid = current->uid;
 
-                       kill_proc_info(info->notify.sigev_signo,
-                                      &sig_i, info->notify_owner);
+                       kill_pid_info(info->notify.sigev_signo,
+                                     &sig_i, info->notify_owner);
                        break;
                case SIGEV_THREAD:
                        set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
@@ -528,7 +528,8 @@ static void __do_notify(struct mqueue_inode_info *info)
                        break;
                }
                /* after notification unregisters process */
-               info->notify_owner = 0;
+               put_pid(info->notify_owner);
+               info->notify_owner = NULL;
        }
        wake_up(&info->wait_q);
 }
@@ -566,12 +567,13 @@ static long prepare_timeout(const struct timespec __user *u_arg)
 
 static void remove_notification(struct mqueue_inode_info *info)
 {
-       if (info->notify_owner != 0 &&
+       if (info->notify_owner != NULL &&
            info->notify.sigev_notify == SIGEV_THREAD) {
                set_cookie(info->notify_cookie, NOTIFY_REMOVED);
                netlink_sendskb(info->notify_sock, info->notify_cookie, 0);
        }
-       info->notify_owner = 0;
+       put_pid(info->notify_owner);
+       info->notify_owner = NULL;
 }
 
 static int mq_attr_ok(struct mq_attr *attr)
@@ -1062,11 +1064,11 @@ retry:
        ret = 0;
        spin_lock(&info->lock);
        if (u_notification == NULL) {
-               if (info->notify_owner == current->tgid) {
+               if (info->notify_owner == task_tgid(current)) {
                        remove_notification(info);
                        inode->i_atime = inode->i_ctime = CURRENT_TIME;
                }
-       } else if (info->notify_owner != 0) {
+       } else if (info->notify_owner != NULL) {
                ret = -EBUSY;
        } else {
                switch (notification.sigev_notify) {
@@ -1086,7 +1088,8 @@ retry:
                        info->notify.sigev_notify = SIGEV_SIGNAL;
                        break;
                }
-               info->notify_owner = current->tgid;
+
+               info->notify_owner = get_pid(task_tgid(current));
                inode->i_atime = inode->i_ctime = CURRENT_TIME;
        }
        spin_unlock(&info->lock);
index 2b4fccf..5b213d9 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
  *
  * support for audit of ipc object properties and permission changes
  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
+ *
+ * namespaces support
+ * OpenVZ, SWsoft Inc.
+ * Pavel Emelianov <xemul@openvz.org>
  */
 
 #include <linux/capability.h>
 #include <linux/audit.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
+#include <linux/nsproxy.h>
 
 #include <asm/current.h>
 #include <asm/uaccess.h>
 #include "util.h"
 
-/* sysctl: */
-int msg_ctlmax = MSGMAX;
-int msg_ctlmnb = MSGMNB;
-int msg_ctlmni = MSGMNI;
-
 /*
  * one msg_receiver structure for each sleeping receiver:
  */
@@ -69,30 +69,75 @@ struct msg_sender {
 static atomic_t msg_bytes =    ATOMIC_INIT(0);
 static atomic_t msg_hdrs =     ATOMIC_INIT(0);
 
-static struct ipc_ids msg_ids;
+static struct ipc_ids init_msg_ids;
 
-#define msg_lock(id)           ((struct msg_queue *)ipc_lock(&msg_ids, id))
-#define msg_unlock(msq)                ipc_unlock(&(msq)->q_perm)
-#define msg_rmid(id)           ((struct msg_queue *)ipc_rmid(&msg_ids, id))
-#define msg_checkid(msq, msgid)        ipc_checkid(&msg_ids, &msq->q_perm, msgid)
-#define msg_buildid(id, seq)   ipc_buildid(&msg_ids, id, seq)
+#define msg_ids(ns)    (*((ns)->ids[IPC_MSG_IDS]))
 
-static void freeque(struct msg_queue *msq, int id);
-static int newque(key_t key, int msgflg);
+#define msg_lock(ns, id)       ((struct msg_queue*)ipc_lock(&msg_ids(ns), id))
+#define msg_unlock(msq)                ipc_unlock(&(msq)->q_perm)
+#define msg_rmid(ns, id)       ((struct msg_queue*)ipc_rmid(&msg_ids(ns), id))
+#define msg_checkid(ns, msq, msgid)    \
+       ipc_checkid(&msg_ids(ns), &msq->q_perm, msgid)
+#define msg_buildid(ns, id, seq) \
+       ipc_buildid(&msg_ids(ns), id, seq)
+
+static void freeque (struct ipc_namespace *ns, struct msg_queue *msq, int id);
+static int newque (struct ipc_namespace *ns, key_t key, int msgflg);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
 #endif
 
+static void __ipc_init __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+{
+       ns->ids[IPC_MSG_IDS] = ids;
+       ns->msg_ctlmax = MSGMAX;
+       ns->msg_ctlmnb = MSGMNB;
+       ns->msg_ctlmni = MSGMNI;
+       ipc_init_ids(ids, ns->msg_ctlmni);
+}
+
+#ifdef CONFIG_IPC_NS
+int msg_init_ns(struct ipc_namespace *ns)
+{
+       struct ipc_ids *ids;
+
+       ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
+       if (ids == NULL)
+               return -ENOMEM;
+
+       __msg_init_ns(ns, ids);
+       return 0;
+}
+
+void msg_exit_ns(struct ipc_namespace *ns)
+{
+       int i;
+       struct msg_queue *msq;
+
+       mutex_lock(&msg_ids(ns).mutex);
+       for (i = 0; i <= msg_ids(ns).max_id; i++) {
+               msq = msg_lock(ns, i);
+               if (msq == NULL)
+                       continue;
+
+               freeque(ns, msq, i);
+       }
+       mutex_unlock(&msg_ids(ns).mutex);
+
+       kfree(ns->ids[IPC_MSG_IDS]);
+       ns->ids[IPC_MSG_IDS] = NULL;
+}
+#endif
+
 void __init msg_init(void)
 {
-       ipc_init_ids(&msg_ids, msg_ctlmni);
+       __msg_init_ns(&init_ipc_ns, &init_msg_ids);
        ipc_init_proc_interface("sysvipc/msg",
                                "       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
-                               &msg_ids,
-                               sysvipc_msg_proc_show);
+                               IPC_MSG_IDS, sysvipc_msg_proc_show);
 }
 
-static int newque(key_t key, int msgflg)
+static int newque (struct ipc_namespace *ns, key_t key, int msgflg)
 {
        struct msg_queue *msq;
        int id, retval;
@@ -111,18 +156,18 @@ static int newque(key_t key, int msgflg)
                return retval;
        }
 
-       id = ipc_addid(&msg_ids, &msq->q_perm, msg_ctlmni);
+       id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
        if (id == -1) {
                security_msg_queue_free(msq);
                ipc_rcu_putref(msq);
                return -ENOSPC;
        }
 
-       msq->q_id = msg_buildid(id, msq->q_perm.seq);
+       msq->q_id = msg_buildid(ns, id, msq->q_perm.seq);
        msq->q_stime = msq->q_rtime = 0;
        msq->q_ctime = get_seconds();
        msq->q_cbytes = msq->q_qnum = 0;
-       msq->q_qbytes = msg_ctlmnb;
+       msq->q_qbytes = ns->msg_ctlmnb;
        msq->q_lspid = msq->q_lrpid = 0;
        INIT_LIST_HEAD(&msq->q_messages);
        INIT_LIST_HEAD(&msq->q_receivers);
@@ -186,13 +231,13 @@ static void expunge_all(struct msg_queue *msq, int res)
  * msg_ids.mutex and the spinlock for this message queue is hold
  * before freeque() is called. msg_ids.mutex remains locked on exit.
  */
-static void freeque(struct msg_queue *msq, int id)
+static void freeque(struct ipc_namespace *ns, struct msg_queue *msq, int id)
 {
        struct list_head *tmp;
 
        expunge_all(msq, -EIDRM);
        ss_wakeup(&msq->q_senders, 1);
-       msq = msg_rmid(id);
+       msq = msg_rmid(ns, id);
        msg_unlock(msq);
 
        tmp = msq->q_messages.next;
@@ -212,24 +257,27 @@ asmlinkage long sys_msgget(key_t key, int msgflg)
 {
        struct msg_queue *msq;
        int id, ret = -EPERM;
+       struct ipc_namespace *ns;
+
+       ns = current->nsproxy->ipc_ns;
        
-       mutex_lock(&msg_ids.mutex);
+       mutex_lock(&msg_ids(ns).mutex);
        if (key == IPC_PRIVATE) 
-               ret = newque(key, msgflg);
-       else if ((id = ipc_findkey(&msg_ids, key)) == -1) { /* key not used */
+               ret = newque(ns, key, msgflg);
+       else if ((id = ipc_findkey(&msg_ids(ns), key)) == -1) { /* key not used */
                if (!(msgflg & IPC_CREAT))
                        ret = -ENOENT;
                else
-                       ret = newque(key, msgflg);
+                       ret = newque(ns, key, msgflg);
        } else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
                ret = -EEXIST;
        } else {
-               msq = msg_lock(id);
+               msq = msg_lock(ns, id);
                BUG_ON(msq == NULL);
                if (ipcperms(&msq->q_perm, msgflg))
                        ret = -EACCES;
                else {
-                       int qid = msg_buildid(id, msq->q_perm.seq);
+                       int qid = msg_buildid(ns, id, msq->q_perm.seq);
 
                        ret = security_msg_queue_associate(msq, msgflg);
                        if (!ret)
@@ -237,7 +285,7 @@ asmlinkage long sys_msgget(key_t key, int msgflg)
                }
                msg_unlock(msq);
        }
-       mutex_unlock(&msg_ids.mutex);
+       mutex_unlock(&msg_ids(ns).mutex);
 
        return ret;
 }
@@ -341,11 +389,13 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
        struct msq_setbuf setbuf;
        struct msg_queue *msq;
        int err, version;
+       struct ipc_namespace *ns;
 
        if (msqid < 0 || cmd < 0)
                return -EINVAL;
 
        version = ipc_parse_version(&cmd);
+       ns = current->nsproxy->ipc_ns;
 
        switch (cmd) {
        case IPC_INFO:
@@ -366,14 +416,14 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
                        return err;
 
                memset(&msginfo, 0, sizeof(msginfo));
-               msginfo.msgmni = msg_ctlmni;
-               msginfo.msgmax = msg_ctlmax;
-               msginfo.msgmnb = msg_ctlmnb;
+               msginfo.msgmni = ns->msg_ctlmni;
+               msginfo.msgmax = ns->msg_ctlmax;
+               msginfo.msgmnb = ns->msg_ctlmnb;
                msginfo.msgssz = MSGSSZ;
                msginfo.msgseg = MSGSEG;
-               mutex_lock(&msg_ids.mutex);
+               mutex_lock(&msg_ids(ns).mutex);
                if (cmd == MSG_INFO) {
-                       msginfo.msgpool = msg_ids.in_use;
+                       msginfo.msgpool = msg_ids(ns).in_use;
                        msginfo.msgmap = atomic_read(&msg_hdrs);
                        msginfo.msgtql = atomic_read(&msg_bytes);
                } else {
@@ -381,8 +431,8 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
                        msginfo.msgpool = MSGPOOL;
                        msginfo.msgtql = MSGTQL;
                }
-               max_id = msg_ids.max_id;
-               mutex_unlock(&msg_ids.mutex);
+               max_id = msg_ids(ns).max_id;
+               mutex_unlock(&msg_ids(ns).mutex);
                if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
                        return -EFAULT;
                return (max_id < 0) ? 0 : max_id;
@@ -395,20 +445,20 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
 
                if (!buf)
                        return -EFAULT;
-               if (cmd == MSG_STAT && msqid >= msg_ids.entries->size)
+               if (cmd == MSG_STAT && msqid >= msg_ids(ns).entries->size)
                        return -EINVAL;
 
                memset(&tbuf, 0, sizeof(tbuf));
 
-               msq = msg_lock(msqid);
+               msq = msg_lock(ns, msqid);
                if (msq == NULL)
                        return -EINVAL;
 
                if (cmd == MSG_STAT) {
-                       success_return = msg_buildid(msqid, msq->q_perm.seq);
+                       success_return = msg_buildid(ns, msqid, msq->q_perm.seq);
                } else {
                        err = -EIDRM;
-                       if (msg_checkid(msq, msqid))
+                       if (msg_checkid(ns, msq, msqid))
                                goto out_unlock;
                        success_return = 0;
                }
@@ -446,14 +496,14 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
                return  -EINVAL;
        }
 
-       mutex_lock(&msg_ids.mutex);
-       msq = msg_lock(msqid);
+       mutex_lock(&msg_ids(ns).mutex);
+       msq = msg_lock(ns, msqid);
        err = -EINVAL;
        if (msq == NULL)
                goto out_up;
 
        err = -EIDRM;
-       if (msg_checkid(msq, msqid))
+       if (msg_checkid(ns, msq, msqid))
                goto out_unlock_up;
        ipcp = &msq->q_perm;
 
@@ -481,7 +531,7 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
        case IPC_SET:
        {
                err = -EPERM;
-               if (setbuf.qbytes > msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
+               if (setbuf.qbytes > ns->msg_ctlmnb && !capable(CAP_SYS_RESOURCE))
                        goto out_unlock_up;
 
                msq->q_qbytes = setbuf.qbytes;
@@ -503,12 +553,12 @@ asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf)
                break;
        }
        case IPC_RMID:
-               freeque(msq, msqid);
+               freeque(ns, msq, msqid);
                break;
        }
        err = 0;
 out_up:
-       mutex_unlock(&msg_ids.mutex);
+       mutex_unlock(&msg_ids(ns).mutex);
        return err;
 out_unlock_up:
        msg_unlock(msq);
@@ -582,8 +632,11 @@ sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg)
        struct msg_msg *msg;
        long mtype;
        int err;
+       struct ipc_namespace *ns;
+
+       ns = current->nsproxy->ipc_ns;
 
-       if (msgsz > msg_ctlmax || (long) msgsz < 0 || msqid < 0)
+       if (msgsz > ns->msg_ctlmax || (long) msgsz < 0 || msqid < 0)
                return -EINVAL;
        if (get_user(mtype, &msgp->mtype))
                return -EFAULT;
@@ -597,13 +650,13 @@ sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg)
        msg->m_type = mtype;
        msg->m_ts = msgsz;
 
-       msq = msg_lock(msqid);
+       msq = msg_lock(ns, msqid);
        err = -EINVAL;
        if (msq == NULL)
                goto out_free;
 
        err= -EIDRM;
-       if (msg_checkid(msq, msqid))
+       if (msg_checkid(ns, msq, msqid))
                goto out_unlock_free;
 
        for (;;) {
@@ -694,17 +747,19 @@ asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz,
        struct msg_queue *msq;
        struct msg_msg *msg;
        int mode;
+       struct ipc_namespace *ns;
 
        if (msqid < 0 || (long) msgsz < 0)
                return -EINVAL;
        mode = convert_mode(&msgtyp, msgflg);
+       ns = current->nsproxy->ipc_ns;
 
-       msq = msg_lock(msqid);
+       msq = msg_lock(ns, msqid);
        if (msq == NULL)
                return -EINVAL;
 
        msg = ERR_PTR(-EIDRM);
-       if (msg_checkid(msq, msqid))
+       if (msg_checkid(ns, msq, msqid))
                goto out_unlock;
 
        for (;;) {
index 6013c75..0dafcc4 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
  *
  * support for audit of ipc object properties and permission changes
  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
+ *
+ * namespaces support
+ * OpenVZ, SWsoft Inc.
+ * Pavel Emelianov <xemul@openvz.org>
  */
 
 #include <linux/slab.h>
 #include <linux/capability.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include "util.h"
 
+#define sem_ids(ns)    (*((ns)->ids[IPC_SEM_IDS]))
+
+#define sem_lock(ns, id)       ((struct sem_array*)ipc_lock(&sem_ids(ns), id))
+#define sem_unlock(sma)                ipc_unlock(&(sma)->sem_perm)
+#define sem_rmid(ns, id)       ((struct sem_array*)ipc_rmid(&sem_ids(ns), id))
+#define sem_checkid(ns, sma, semid)    \
+       ipc_checkid(&sem_ids(ns),&sma->sem_perm,semid)
+#define sem_buildid(ns, id, seq) \
+       ipc_buildid(&sem_ids(ns), id, seq)
 
-#define sem_lock(id)   ((struct sem_array*)ipc_lock(&sem_ids,id))
-#define sem_unlock(sma)        ipc_unlock(&(sma)->sem_perm)
-#define sem_rmid(id)   ((struct sem_array*)ipc_rmid(&sem_ids,id))
-#define sem_checkid(sma, semid)        \
-       ipc_checkid(&sem_ids,&sma->sem_perm,semid)
-#define sem_buildid(id, seq) \
-       ipc_buildid(&sem_ids, id, seq)
-static struct ipc_ids sem_ids;
+static struct ipc_ids init_sem_ids;
 
-static int newary (key_t, int, int);
-static void freeary (struct sem_array *sma, int id);
+static int newary(struct ipc_namespace *, key_t, int, int);
+static void freeary(struct ipc_namespace *ns, struct sem_array *sma, int id);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
 #endif
@@ -110,22 +117,61 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
  *     
  */
 
-int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI};
-#define sc_semmsl      (sem_ctls[0])
-#define sc_semmns      (sem_ctls[1])
-#define sc_semopm      (sem_ctls[2])
-#define sc_semmni      (sem_ctls[3])
+#define sc_semmsl      sem_ctls[0]
+#define sc_semmns      sem_ctls[1]
+#define sc_semopm      sem_ctls[2]
+#define sc_semmni      sem_ctls[3]
 
-static int used_sems;
+static void __ipc_init __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+{
+       ns->ids[IPC_SEM_IDS] = ids;
+       ns->sc_semmsl = SEMMSL;
+       ns->sc_semmns = SEMMNS;
+       ns->sc_semopm = SEMOPM;
+       ns->sc_semmni = SEMMNI;
+       ns->used_sems = 0;
+       ipc_init_ids(ids, ns->sc_semmni);
+}
+
+#ifdef CONFIG_IPC_NS
+int sem_init_ns(struct ipc_namespace *ns)
+{
+       struct ipc_ids *ids;
+
+       ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
+       if (ids == NULL)
+               return -ENOMEM;
+
+       __sem_init_ns(ns, ids);
+       return 0;
+}
+
+void sem_exit_ns(struct ipc_namespace *ns)
+{
+       int i;
+       struct sem_array *sma;
+
+       mutex_lock(&sem_ids(ns).mutex);
+       for (i = 0; i <= sem_ids(ns).max_id; i++) {
+               sma = sem_lock(ns, i);
+               if (sma == NULL)
+                       continue;
+
+               freeary(ns, sma, i);
+       }
+       mutex_unlock(&sem_ids(ns).mutex);
+
+       kfree(ns->ids[IPC_SEM_IDS]);
+       ns->ids[IPC_SEM_IDS] = NULL;
+}
+#endif
 
 void __init sem_init (void)
 {
-       used_sems = 0;
-       ipc_init_ids(&sem_ids,sc_semmni);
+       __sem_init_ns(&init_ipc_ns, &init_sem_ids);
        ipc_init_proc_interface("sysvipc/sem",
                                "       key      semid perms      nsems   uid   gid  cuid  cgid      otime      ctime\n",
-                               &sem_ids,
-                               sysvipc_sem_proc_show);
+                               IPC_SEM_IDS, sysvipc_sem_proc_show);
 }
 
 /*
@@ -162,7 +208,7 @@ void __init sem_init (void)
  */
 #define IN_WAKEUP      1
 
-static int newary (key_t key, int nsems, int semflg)
+static int newary (struct ipc_namespace *ns, key_t key, int nsems, int semflg)
 {
        int id;
        int retval;
@@ -171,7 +217,7 @@ static int newary (key_t key, int nsems, int semflg)
 
        if (!nsems)
                return -EINVAL;
-       if (used_sems + nsems > sc_semmns)
+       if (ns->used_sems + nsems > ns->sc_semmns)
                return -ENOSPC;
 
        size = sizeof (*sma) + nsems * sizeof (struct sem);
@@ -191,15 +237,15 @@ static int newary (key_t key, int nsems, int semflg)
                return retval;
        }
 
-       id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
+       id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
        if(id == -1) {
                security_sem_free(sma);
                ipc_rcu_putref(sma);
                return -ENOSPC;
        }
-       used_sems += nsems;
+       ns->used_sems += nsems;
 
-       sma->sem_id = sem_buildid(id, sma->sem_perm.seq);
+       sma->sem_id = sem_buildid(ns, id, sma->sem_perm.seq);
        sma->sem_base = (struct sem *) &sma[1];
        /* sma->sem_pending = NULL; */
        sma->sem_pending_last = &sma->sem_pending;
@@ -215,29 +261,32 @@ asmlinkage long sys_semget (key_t key, int nsems, int semflg)
 {
        int id, err = -EINVAL;
        struct sem_array *sma;
+       struct ipc_namespace *ns;
 
-       if (nsems < 0 || nsems > sc_semmsl)
+       ns = current->nsproxy->ipc_ns;
+
+       if (nsems < 0 || nsems > ns->sc_semmsl)
                return -EINVAL;
-       mutex_lock(&sem_ids.mutex);
+       mutex_lock(&sem_ids(ns).mutex);
        
        if (key == IPC_PRIVATE) {
-               err = newary(key, nsems, semflg);
-       } else if ((id = ipc_findkey(&sem_ids, key)) == -1) {  /* key not used */
+               err = newary(ns, key, nsems, semflg);
+       } else if ((id = ipc_findkey(&sem_ids(ns), key)) == -1) {  /* key not used */
                if (!(semflg & IPC_CREAT))
                        err = -ENOENT;
                else
-                       err = newary(key, nsems, semflg);
+                       err = newary(ns, key, nsems, semflg);
        } else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
                err = -EEXIST;
        } else {
-               sma = sem_lock(id);
+               sma = sem_lock(ns, id);
                BUG_ON(sma==NULL);
                if (nsems > sma->sem_nsems)
                        err = -EINVAL;
                else if (ipcperms(&sma->sem_perm, semflg))
                        err = -EACCES;
                else {
-                       int semid = sem_buildid(id, sma->sem_perm.seq);
+                       int semid = sem_buildid(ns, id, sma->sem_perm.seq);
                        err = security_sem_associate(sma, semflg);
                        if (!err)
                                err = semid;
@@ -245,7 +294,7 @@ asmlinkage long sys_semget (key_t key, int nsems, int semflg)
                sem_unlock(sma);
        }
 
-       mutex_unlock(&sem_ids.mutex);
+       mutex_unlock(&sem_ids(ns).mutex);
        return err;
 }
 
@@ -444,7 +493,7 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
  * the spinlock for this semaphore set hold. sem_ids.mutex remains locked
  * on exit.
  */
-static void freeary (struct sem_array *sma, int id)
+static void freeary (struct ipc_namespace *ns, struct sem_array *sma, int id)
 {
        struct sem_undo *un;
        struct sem_queue *q;
@@ -472,10 +521,10 @@ static void freeary (struct sem_array *sma, int id)
        }
 
        /* Remove the semaphore set from the ID array*/
-       sma = sem_rmid(id);
+       sma = sem_rmid(ns, id);
        sem_unlock(sma);
 
-       used_sems -= sma->sem_nsems;
+       ns->used_sems -= sma->sem_nsems;
        size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem);
        security_sem_free(sma);
        ipc_rcu_putref(sma);
@@ -503,7 +552,8 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in,
        }
 }
 
-static int semctl_nolock(int semid, int semnum, int cmd, int version, union semun arg)
+static int semctl_nolock(struct ipc_namespace *ns, int semid, int semnum,
+               int cmd, int version, union semun arg)
 {
        int err = -EINVAL;
        struct sem_array *sma;
@@ -520,24 +570,24 @@ static int semctl_nolock(int semid, int semnum, int cmd, int version, union semu
                        return err;
                
                memset(&seminfo,0,sizeof(seminfo));
-               seminfo.semmni = sc_semmni;
-               seminfo.semmns = sc_semmns;
-               seminfo.semmsl = sc_semmsl;
-               seminfo.semopm = sc_semopm;
+               seminfo.semmni = ns->sc_semmni;
+               seminfo.semmns = ns->sc_semmns;
+               seminfo.semmsl = ns->sc_semmsl;
+               seminfo.semopm = ns->sc_semopm;
                seminfo.semvmx = SEMVMX;
                seminfo.semmnu = SEMMNU;
                seminfo.semmap = SEMMAP;
                seminfo.semume = SEMUME;
-               mutex_lock(&sem_ids.mutex);
+               mutex_lock(&sem_ids(ns).mutex);
                if (cmd == SEM_INFO) {
-                       seminfo.semusz = sem_ids.in_use;
-                       seminfo.semaem = used_sems;
+                       seminfo.semusz = sem_ids(ns).in_use;
+                       seminfo.semaem = ns->used_sems;
                } else {
                        seminfo.semusz = SEMUSZ;
                        seminfo.semaem = SEMAEM;
                }
-               max_id = sem_ids.max_id;
-               mutex_unlock(&sem_ids.mutex);
+               max_id = sem_ids(ns).max_id;
+               mutex_unlock(&sem_ids(ns).mutex);
                if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo))) 
                        return -EFAULT;
                return (max_id < 0) ? 0: max_id;
@@ -547,12 +597,12 @@ static int semctl_nolock(int semid, int semnum, int cmd, int version, union semu
                struct semid64_ds tbuf;
                int id;
 
-               if(semid >= sem_ids.entries->size)
+               if(semid >= sem_ids(ns).entries->size)
                        return -EINVAL;
 
                memset(&tbuf,0,sizeof(tbuf));
 
-               sma = sem_lock(semid);
+               sma = sem_lock(ns, semid);
                if(sma == NULL)
                        return -EINVAL;
 
@@ -564,7 +614,7 @@ static int semctl_nolock(int semid, int semnum, int cmd, int version, union semu
                if (err)
                        goto out_unlock;
 
-               id = sem_buildid(semid, sma->sem_perm.seq);
+               id = sem_buildid(ns, semid, sma->sem_perm.seq);
 
                kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
                tbuf.sem_otime  = sma->sem_otime;
@@ -584,7 +634,8 @@ out_unlock:
        return err;
 }
 
-static int semctl_main(int semid, int semnum, int cmd, int version, union semun arg)
+static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
+               int cmd, int version, union semun arg)
 {
        struct sem_array *sma;
        struct sem* curr;
@@ -593,14 +644,14 @@ static int semctl_main(int semid, int semnum, int cmd, int version, union semun
        ushort* sem_io = fast_sem_io;
        int nsems;
 
-       sma = sem_lock(semid);
+       sma = sem_lock(ns, semid);
        if(sma==NULL)
                return -EINVAL;
 
        nsems = sma->sem_nsems;
 
        err=-EIDRM;
-       if (sem_checkid(sma,semid))
+       if (sem_checkid(ns,sma,semid))
                goto out_unlock;
 
        err = -EACCES;
@@ -802,7 +853,8 @@ static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void __
        }
 }
 
-static int semctl_down(int semid, int semnum, int cmd, int version, union semun arg)
+static int semctl_down(struct ipc_namespace *ns, int semid, int semnum,
+               int cmd, int version, union semun arg)
 {
        struct sem_array *sma;
        int err;
@@ -813,11 +865,11 @@ static int semctl_down(int semid, int semnum, int cmd, int version, union semun
                if(copy_semid_from_user (&setbuf, arg.buf, version))
                        return -EFAULT;
        }
-       sma = sem_lock(semid);
+       sma = sem_lock(ns, semid);
        if(sma==NULL)
                return -EINVAL;
 
-       if (sem_checkid(sma,semid)) {
+       if (sem_checkid(ns,sma,semid)) {
                err=-EIDRM;
                goto out_unlock;
        }       
@@ -844,7 +896,7 @@ static int semctl_down(int semid, int semnum, int cmd, int version, union semun
 
        switch(cmd){
        case IPC_RMID:
-               freeary(sma, semid);
+               freeary(ns, sma, semid);
                err = 0;
                break;
        case IPC_SET:
@@ -872,17 +924,19 @@ asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg)
 {
        int err = -EINVAL;
        int version;
+       struct ipc_namespace *ns;
 
        if (semid < 0)
                return -EINVAL;
 
        version = ipc_parse_version(&cmd);
+       ns = current->nsproxy->ipc_ns;
 
        switch(cmd) {
        case IPC_INFO:
        case SEM_INFO:
        case SEM_STAT:
-               err = semctl_nolock(semid,semnum,cmd,version,arg);
+               err = semctl_nolock(ns,semid,semnum,cmd,version,arg);
                return err;
        case GETALL:
        case GETVAL:
@@ -892,13 +946,13 @@ asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg)
        case IPC_STAT:
        case SETVAL:
        case SETALL:
-               err = semctl_main(semid,semnum,cmd,version,arg);
+               err = semctl_main(ns,semid,semnum,cmd,version,arg);
                return err;
        case IPC_RMID:
        case IPC_SET:
-               mutex_lock(&sem_ids.mutex);
-               err = semctl_down(semid,semnum,cmd,version,arg);
-               mutex_unlock(&sem_ids.mutex);
+               mutex_lock(&sem_ids(ns).mutex);
+               err = semctl_down(ns,semid,semnum,cmd,version,arg);
+               mutex_unlock(&sem_ids(ns).mutex);
                return err;
        default:
                return -EINVAL;
@@ -949,15 +1003,12 @@ static inline void unlock_semundo(void)
 static inline int get_undo_list(struct sem_undo_list **undo_listp)
 {
        struct sem_undo_list *undo_list;
-       int size;
 
        undo_list = current->sysvsem.undo_list;
        if (!undo_list) {
-               size = sizeof(struct sem_undo_list);
-               undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
+               undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL);
                if (undo_list == NULL)
                        return -ENOMEM;
-               memset(undo_list, 0, size);
                spin_lock_init(&undo_list->lock);
                atomic_set(&undo_list->refcnt, 1);
                current->sysvsem.undo_list = undo_list;
@@ -986,7 +1037,7 @@ static struct sem_undo *lookup_undo(struct sem_undo_list *ulp, int semid)
        return un;
 }
 
-static struct sem_undo *find_undo(int semid)
+static struct sem_undo *find_undo(struct ipc_namespace *ns, int semid)
 {
        struct sem_array *sma;
        struct sem_undo_list *ulp;
@@ -1005,12 +1056,12 @@ static struct sem_undo *find_undo(int semid)
                goto out;
 
        /* no undo structure around - allocate one. */
-       sma = sem_lock(semid);
+       sma = sem_lock(ns, semid);
        un = ERR_PTR(-EINVAL);
        if(sma==NULL)
                goto out;
        un = ERR_PTR(-EIDRM);
-       if (sem_checkid(sma,semid)) {
+       if (sem_checkid(ns,sma,semid)) {
                sem_unlock(sma);
                goto out;
        }
@@ -1070,10 +1121,13 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
        int undos = 0, alter = 0, max;
        struct sem_queue queue;
        unsigned long jiffies_left = 0;
+       struct ipc_namespace *ns;
+
+       ns = current->nsproxy->ipc_ns;
 
        if (nsops < 1 || semid < 0)
                return -EINVAL;
-       if (nsops > sc_semopm)
+       if (nsops > ns->sc_semopm)
                return -E2BIG;
        if(nsops > SEMOPM_FAST) {
                sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
@@ -1109,7 +1163,7 @@ asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops,
 
 retry_undos:
        if (undos) {
-               un = find_undo(semid);
+               un = find_undo(ns, semid);
                if (IS_ERR(un)) {
                        error = PTR_ERR(un);
                        goto out_free;
@@ -1117,12 +1171,12 @@ retry_undos:
        } else
                un = NULL;
 
-       sma = sem_lock(semid);
+       sma = sem_lock(ns, semid);
        error=-EINVAL;
        if(sma==NULL)
                goto out_free;
        error = -EIDRM;
-       if (sem_checkid(sma,semid))
+       if (sem_checkid(ns,sma,semid))
                goto out_unlock_free;
        /*
         * semid identifies are not unique - find_undo may have
@@ -1190,7 +1244,7 @@ retry_undos:
                goto out_free;
        }
 
-       sma = sem_lock(semid);
+       sma = sem_lock(ns, semid);
        if(sma==NULL) {
                BUG_ON(queue.prev != NULL);
                error = -EIDRM;
@@ -1267,6 +1321,7 @@ void exit_sem(struct task_struct *tsk)
 {
        struct sem_undo_list *undo_list;
        struct sem_undo *u, **up;
+       struct ipc_namespace *ns;
 
        undo_list = tsk->sysvsem.undo_list;
        if (!undo_list)
@@ -1275,6 +1330,7 @@ void exit_sem(struct task_struct *tsk)
        if (!atomic_dec_and_test(&undo_list->refcnt))
                return;
 
+       ns = tsk->nsproxy->ipc_ns;
        /* There's no need to hold the semundo list lock, as current
          * is the last task exiting for this undo list.
         */
@@ -1288,14 +1344,14 @@ void exit_sem(struct task_struct *tsk)
 
                if(semid == -1)
                        continue;
-               sma = sem_lock(semid);
+               sma = sem_lock(ns, semid);
                if (sma == NULL)
                        continue;
 
                if (u->semid == -1)
                        goto next_entry;
 
-               BUG_ON(sem_checkid(sma,u->semid));
+               BUG_ON(sem_checkid(ns,sma,u->semid));
 
                /* remove u from the sma->undo list */
                for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
index 940b0c9..bfbd317 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
  *
  * support for audit of ipc object properties and permission changes
  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
+ *
+ * namespaces support
+ * OpenVZ, SWsoft Inc.
+ * Pavel Emelianov <xemul@openvz.org>
  */
 
 #include <linux/slab.h>
@@ -32,6 +36,7 @@
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
 #include <linux/mutex.h>
+#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 
 static struct file_operations shm_file_operations;
 static struct vm_operations_struct shm_vm_ops;
 
-static struct ipc_ids shm_ids;
+static struct ipc_ids init_shm_ids;
+
+#define shm_ids(ns)    (*((ns)->ids[IPC_SHM_IDS]))
 
-#define shm_lock(id)   ((struct shmid_kernel*)ipc_lock(&shm_ids,id))
-#define shm_unlock(shp)        ipc_unlock(&(shp)->shm_perm)
-#define shm_get(id)    ((struct shmid_kernel*)ipc_get(&shm_ids,id))
-#define shm_buildid(id, seq) \
-       ipc_buildid(&shm_ids, id, seq)
+#define shm_lock(ns, id)               \
+       ((struct shmid_kernel*)ipc_lock(&shm_ids(ns),id))
+#define shm_unlock(shp)                        \
+       ipc_unlock(&(shp)->shm_perm)
+#define shm_get(ns, id)                        \
+       ((struct shmid_kernel*)ipc_get(&shm_ids(ns),id))
+#define shm_buildid(ns, id, seq)       \
+       ipc_buildid(&shm_ids(ns), id, seq)
 
-static int newseg (key_t key, int shmflg, size_t size);
+static int newseg (struct ipc_namespace *ns, key_t key,
+               int shmflg, size_t size);
 static void shm_open (struct vm_area_struct *shmd);
 static void shm_close (struct vm_area_struct *shmd);
+static void shm_destroy (struct ipc_namespace *ns, struct shmid_kernel *shp);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
 #endif
 
-size_t shm_ctlmax = SHMMAX;
-size_t         shm_ctlall = SHMALL;
-int    shm_ctlmni = SHMMNI;
+static void __ipc_init __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+{
+       ns->ids[IPC_SHM_IDS] = ids;
+       ns->shm_ctlmax = SHMMAX;
+       ns->shm_ctlall = SHMALL;
+       ns->shm_ctlmni = SHMMNI;
+       ns->shm_tot = 0;
+       ipc_init_ids(ids, 1);
+}
+
+static void do_shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *shp)
+{
+       if (shp->shm_nattch){
+               shp->shm_perm.mode |= SHM_DEST;
+               /* Do not find it any more */
+               shp->shm_perm.key = IPC_PRIVATE;
+               shm_unlock(shp);
+       } else
+               shm_destroy(ns, shp);
+}
+
+#ifdef CONFIG_IPC_NS
+int shm_init_ns(struct ipc_namespace *ns)
+{
+       struct ipc_ids *ids;
+
+       ids = kmalloc(sizeof(struct ipc_ids), GFP_KERNEL);
+       if (ids == NULL)
+               return -ENOMEM;
 
-static int shm_tot; /* total number of shared memory pages */
+       __shm_init_ns(ns, ids);
+       return 0;
+}
+
+void shm_exit_ns(struct ipc_namespace *ns)
+{
+       int i;
+       struct shmid_kernel *shp;
+
+       mutex_lock(&shm_ids(ns).mutex);
+       for (i = 0; i <= shm_ids(ns).max_id; i++) {
+               shp = shm_lock(ns, i);
+               if (shp == NULL)
+                       continue;
+
+               do_shm_rmid(ns, shp);
+       }
+       mutex_unlock(&shm_ids(ns).mutex);
+
+       kfree(ns->ids[IPC_SHM_IDS]);
+       ns->ids[IPC_SHM_IDS] = NULL;
+}
+#endif
 
 void __init shm_init (void)
 {
-       ipc_init_ids(&shm_ids, 1);
+       __shm_init_ns(&init_ipc_ns, &init_shm_ids);
        ipc_init_proc_interface("sysvipc/shm",
                                "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime\n",
-                               &shm_ids,
-                               sysvipc_shm_proc_show);
+                               IPC_SHM_IDS, sysvipc_shm_proc_show);
 }
 
-static inline int shm_checkid(struct shmid_kernel *s, int id)
+static inline int shm_checkid(struct ipc_namespace *ns,
+               struct shmid_kernel *s, int id)
 {
-       if (ipc_checkid(&shm_ids,&s->shm_perm,id))
+       if (ipc_checkid(&shm_ids(ns), &s->shm_perm, id))
                return -EIDRM;
        return 0;
 }
 
-static inline struct shmid_kernel *shm_rmid(int id)
+static inline struct shmid_kernel *shm_rmid(struct ipc_namespace *ns, int id)
 {
-       return (struct shmid_kernel *)ipc_rmid(&shm_ids,id);
+       return (struct shmid_kernel *)ipc_rmid(&shm_ids(ns), id);
 }
 
-static inline int shm_addid(struct shmid_kernel *shp)
+static inline int shm_addid(struct ipc_namespace *ns, struct shmid_kernel *shp)
 {
-       return ipc_addid(&shm_ids, &shp->shm_perm, shm_ctlmni);
+       return ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
 }
 
 
 
-static inline void shm_inc (int id) {
+static inline void shm_inc(struct ipc_namespace *ns, int id)
+{
        struct shmid_kernel *shp;
 
-       shp = shm_lock(id);
+       shp = shm_lock(ns, id);
        BUG_ON(!shp);
        shp->shm_atim = get_seconds();
        shp->shm_lprid = current->tgid;
@@ -100,10 +161,13 @@ static inline void shm_inc (int id) {
        shm_unlock(shp);
 }
 
+#define shm_file_ns(file) (*((struct ipc_namespace **)&(file)->private_data))
+
 /* This is called by fork, once for every shm attach. */
-static void shm_open (struct vm_area_struct *shmd)
+static void shm_open(struct vm_area_struct *shmd)
 {
-       shm_inc (shmd->vm_file->f_dentry->d_inode->i_ino);
+       shm_inc(shm_file_ns(shmd->vm_file),
+                       shmd->vm_file->f_dentry->d_inode->i_ino);
 }
 
 /*
@@ -114,10 +178,10 @@ static void shm_open (struct vm_area_struct *shmd)
  * It has to be called with shp and shm_ids.mutex locked,
  * but returns with shp unlocked and freed.
  */
-static void shm_destroy (struct shmid_kernel *shp)
+static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 {
-       shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
-       shm_rmid (shp->id);
+       ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       shm_rmid(ns, shp->id);
        shm_unlock(shp);
        if (!is_file_hugepages(shp->shm_file))
                shmem_lock(shp->shm_file, 0, shp->mlock_user);
@@ -140,20 +204,23 @@ static void shm_close (struct vm_area_struct *shmd)
        struct file * file = shmd->vm_file;
        int id = file->f_dentry->d_inode->i_ino;
        struct shmid_kernel *shp;
+       struct ipc_namespace *ns;
 
-       mutex_lock(&shm_ids.mutex);
+       ns = shm_file_ns(file);
+
+       mutex_lock(&shm_ids(ns).mutex);
        /* remove from the list of attaches of the shm segment */
-       shp = shm_lock(id);
+       shp = shm_lock(ns, id);
        BUG_ON(!shp);
        shp->shm_lprid = current->tgid;
        shp->shm_dtim = get_seconds();
        shp->shm_nattch--;
        if(shp->shm_nattch == 0 &&
           shp->shm_perm.mode & SHM_DEST)
-               shm_destroy (shp);
+               shm_destroy(ns, shp);
        else
                shm_unlock(shp);
-       mutex_unlock(&shm_ids.mutex);
+       mutex_unlock(&shm_ids(ns).mutex);
 }
 
 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
@@ -165,14 +232,25 @@ static int shm_mmap(struct file * file, struct vm_area_struct * vma)
                vma->vm_ops = &shm_vm_ops;
                if (!(vma->vm_flags & VM_WRITE))
                        vma->vm_flags &= ~VM_MAYWRITE;
-               shm_inc(file->f_dentry->d_inode->i_ino);
+               shm_inc(shm_file_ns(file), file->f_dentry->d_inode->i_ino);
        }
 
        return ret;
 }
 
+static int shm_release(struct inode *ino, struct file *file)
+{
+       struct ipc_namespace *ns;
+
+       ns = shm_file_ns(file);
+       put_ipc_ns(ns);
+       shm_file_ns(file) = NULL;
+       return 0;
+}
+
 static struct file_operations shm_file_operations = {
-       .mmap   = shm_mmap,
+       .mmap           = shm_mmap,
+       .release        = shm_release,
 #ifndef CONFIG_MMU
        .get_unmapped_area = shmem_get_unmapped_area,
 #endif
@@ -188,7 +266,7 @@ static struct vm_operations_struct shm_vm_ops = {
 #endif
 };
 
-static int newseg (key_t key, int shmflg, size_t size)
+static int newseg (struct ipc_namespace *ns, key_t key, int shmflg, size_t size)
 {
        int error;
        struct shmid_kernel *shp;
@@ -197,10 +275,10 @@ static int newseg (key_t key, int shmflg, size_t size)
        char name[13];
        int id;
 
-       if (size < SHMMIN || size > shm_ctlmax)
+       if (size < SHMMIN || size > ns->shm_ctlmax)
                return -EINVAL;
 
-       if (shm_tot + numpages >= shm_ctlall)
+       if (ns->shm_tot + numpages >= ns->shm_ctlall)
                return -ENOSPC;
 
        shp = ipc_rcu_alloc(sizeof(*shp));
@@ -239,7 +317,7 @@ static int newseg (key_t key, int shmflg, size_t size)
                goto no_file;
 
        error = -ENOSPC;
-       id = shm_addid(shp);
+       id = shm_addid(ns, shp);
        if(id == -1) 
                goto no_id;
 
@@ -249,15 +327,17 @@ static int newseg (key_t key, int shmflg, size_t size)
        shp->shm_ctim = get_seconds();
        shp->shm_segsz = size;
        shp->shm_nattch = 0;
-       shp->id = shm_buildid(id,shp->shm_perm.seq);
+       shp->id = shm_buildid(ns, id, shp->shm_perm.seq);
        shp->shm_file = file;
        file->f_dentry->d_inode->i_ino = shp->id;
 
+       shm_file_ns(file) = get_ipc_ns(ns);
+
        /* Hugetlb ops would have already been assigned. */
        if (!(shmflg & SHM_HUGETLB))
                file->f_op = &shm_file_operations;
 
-       shm_tot += numpages;
+       ns->shm_tot += numpages;
        shm_unlock(shp);
        return shp->id;
 
@@ -273,33 +353,36 @@ asmlinkage long sys_shmget (key_t key, size_t size, int shmflg)
 {
        struct shmid_kernel *shp;
        int err, id = 0;
+       struct ipc_namespace *ns;
+
+       ns = current->nsproxy->ipc_ns;
 
-       mutex_lock(&shm_ids.mutex);
+       mutex_lock(&shm_ids(ns).mutex);
        if (key == IPC_PRIVATE) {
-               err = newseg(key, shmflg, size);
-       } else if ((id = ipc_findkey(&shm_ids, key)) == -1) {
+               err = newseg(ns, key, shmflg, size);
+       } else if ((id = ipc_findkey(&shm_ids(ns), key)) == -1) {
                if (!(shmflg & IPC_CREAT))
                        err = -ENOENT;
                else
-                       err = newseg(key, shmflg, size);
+                       err = newseg(ns, key, shmflg, size);
        } else if ((shmflg & IPC_CREAT) && (shmflg & IPC_EXCL)) {
                err = -EEXIST;
        } else {
-               shp = shm_lock(id);
+               shp = shm_lock(ns, id);
                BUG_ON(shp==NULL);
                if (shp->shm_segsz < size)
                        err = -EINVAL;
                else if (ipcperms(&shp->shm_perm, shmflg))
                        err = -EACCES;
                else {
-                       int shmid = shm_buildid(id, shp->shm_perm.seq);
+                       int shmid = shm_buildid(ns, id, shp->shm_perm.seq);
                        err = security_shm_associate(shp, shmflg);
                        if (!err)
                                err = shmid;
                }
                shm_unlock(shp);
        }
-       mutex_unlock(&shm_ids.mutex);
+       mutex_unlock(&shm_ids(ns).mutex);
 
        return err;
 }
@@ -395,18 +478,19 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
        }
 }
 
-static void shm_get_stat(unsigned long *rss, unsigned long *swp) 
+static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
+               unsigned long *swp)
 {
        int i;
 
        *rss = 0;
        *swp = 0;
 
-       for (i = 0; i <= shm_ids.max_id; i++) {
+       for (i = 0; i <= shm_ids(ns).max_id; i++) {
                struct shmid_kernel *shp;
                struct inode *inode;
 
-               shp = shm_get(i);
+               shp = shm_get(ns, i);
                if(!shp)
                        continue;
 
@@ -430,6 +514,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
        struct shm_setbuf setbuf;
        struct shmid_kernel *shp;
        int err, version;
+       struct ipc_namespace *ns;
 
        if (cmd < 0 || shmid < 0) {
                err = -EINVAL;
@@ -437,6 +522,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
        }
 
        version = ipc_parse_version(&cmd);
+       ns = current->nsproxy->ipc_ns;
 
        switch (cmd) { /* replace with proc interface ? */
        case IPC_INFO:
@@ -448,15 +534,15 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                        return err;
 
                memset(&shminfo,0,sizeof(shminfo));
-               shminfo.shmmni = shminfo.shmseg = shm_ctlmni;
-               shminfo.shmmax = shm_ctlmax;
-               shminfo.shmall = shm_ctlall;
+               shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
+               shminfo.shmmax = ns->shm_ctlmax;
+               shminfo.shmall = ns->shm_ctlall;
 
                shminfo.shmmin = SHMMIN;
                if(copy_shminfo_to_user (buf, &shminfo, version))
                        return -EFAULT;
                /* reading a integer is always atomic */
-               err= shm_ids.max_id;
+               err= shm_ids(ns).max_id;
                if(err<0)
                        err = 0;
                goto out;
@@ -470,14 +556,14 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                        return err;
 
                memset(&shm_info,0,sizeof(shm_info));
-               mutex_lock(&shm_ids.mutex);
-               shm_info.used_ids = shm_ids.in_use;
-               shm_get_stat (&shm_info.shm_rss, &shm_info.shm_swp);
-               shm_info.shm_tot = shm_tot;
+               mutex_lock(&shm_ids(ns).mutex);
+               shm_info.used_ids = shm_ids(ns).in_use;
+               shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
+               shm_info.shm_tot = ns->shm_tot;
                shm_info.swap_attempts = 0;
                shm_info.swap_successes = 0;
-               err = shm_ids.max_id;
-               mutex_unlock(&shm_ids.mutex);
+               err = shm_ids(ns).max_id;
+               mutex_unlock(&shm_ids(ns).mutex);
                if(copy_to_user (buf, &shm_info, sizeof(shm_info))) {
                        err = -EFAULT;
                        goto out;
@@ -492,17 +578,17 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                struct shmid64_ds tbuf;
                int result;
                memset(&tbuf, 0, sizeof(tbuf));
-               shp = shm_lock(shmid);
+               shp = shm_lock(ns, shmid);
                if(shp==NULL) {
                        err = -EINVAL;
                        goto out;
                } else if(cmd==SHM_STAT) {
                        err = -EINVAL;
-                       if (shmid > shm_ids.max_id)
+                       if (shmid > shm_ids(ns).max_id)
                                goto out_unlock;
-                       result = shm_buildid(shmid, shp->shm_perm.seq);
+                       result = shm_buildid(ns, shmid, shp->shm_perm.seq);
                } else {
-                       err = shm_checkid(shp,shmid);
+                       err = shm_checkid(ns, shp,shmid);
                        if(err)
                                goto out_unlock;
                        result = 0;
@@ -534,12 +620,12 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
        case SHM_LOCK:
        case SHM_UNLOCK:
        {
-               shp = shm_lock(shmid);
+               shp = shm_lock(ns, shmid);
                if(shp==NULL) {
                        err = -EINVAL;
                        goto out;
                }
-               err = shm_checkid(shp,shmid);
+               err = shm_checkid(ns, shp,shmid);
                if(err)
                        goto out_unlock;
 
@@ -590,12 +676,12 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                 *      Instead we set a destroyed flag, and then blow
                 *      the name away when the usage hits zero.
                 */
-               mutex_lock(&shm_ids.mutex);
-               shp = shm_lock(shmid);
+               mutex_lock(&shm_ids(ns).mutex);
+               shp = shm_lock(ns, shmid);
                err = -EINVAL;
                if (shp == NULL) 
                        goto out_up;
-               err = shm_checkid(shp, shmid);
+               err = shm_checkid(ns, shp, shmid);
                if(err)
                        goto out_unlock_up;
 
@@ -614,14 +700,8 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                if (err)
                        goto out_unlock_up;
 
-               if (shp->shm_nattch){
-                       shp->shm_perm.mode |= SHM_DEST;
-                       /* Do not find it any more */
-                       shp->shm_perm.key = IPC_PRIVATE;
-                       shm_unlock(shp);
-               } else
-                       shm_destroy (shp);
-               mutex_unlock(&shm_ids.mutex);
+               do_shm_rmid(ns, shp);
+               mutex_unlock(&shm_ids(ns).mutex);
                goto out;
        }
 
@@ -631,12 +711,12 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
                        err = -EFAULT;
                        goto out;
                }
-               mutex_lock(&shm_ids.mutex);
-               shp = shm_lock(shmid);
+               mutex_lock(&shm_ids(ns).mutex);
+               shp = shm_lock(ns, shmid);
                err=-EINVAL;
                if(shp==NULL)
                        goto out_up;
-               err = shm_checkid(shp,shmid);
+               err = shm_checkid(ns, shp,shmid);
                if(err)
                        goto out_unlock_up;
                err = audit_ipc_obj(&(shp->shm_perm));
@@ -673,7 +753,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds __user *buf)
 out_unlock_up:
        shm_unlock(shp);
 out_up:
-       mutex_unlock(&shm_ids.mutex);
+       mutex_unlock(&shm_ids(ns).mutex);
        goto out;
 out_unlock:
        shm_unlock(shp);
@@ -699,6 +779,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
        unsigned long prot;
        int acc_mode;
        void *user_addr;
+       struct ipc_namespace *ns;
 
        if (shmid < 0) {
                err = -EINVAL;
@@ -737,12 +818,13 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
         * We cannot rely on the fs check since SYSV IPC does have an
         * additional creator id...
         */
-       shp = shm_lock(shmid);
+       ns = current->nsproxy->ipc_ns;
+       shp = shm_lock(ns, shmid);
        if(shp == NULL) {
                err = -EINVAL;
                goto out;
        }
-       err = shm_checkid(shp,shmid);
+       err = shm_checkid(ns, shp,shmid);
        if (err) {
                shm_unlock(shp);
                goto out;
@@ -783,16 +865,16 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 invalid:
        up_write(&current->mm->mmap_sem);
 
-       mutex_lock(&shm_ids.mutex);
-       shp = shm_lock(shmid);
+       mutex_lock(&shm_ids(ns).mutex);
+       shp = shm_lock(ns, shmid);
        BUG_ON(!shp);
        shp->shm_nattch--;
        if(shp->shm_nattch == 0 &&
           shp->shm_perm.mode & SHM_DEST)
-               shm_destroy (shp);
+               shm_destroy(ns, shp);
        else
                shm_unlock(shp);
-       mutex_unlock(&shm_ids.mutex);
+       mutex_unlock(&shm_ids(ns).mutex);
 
        *raddr = (unsigned long) user_addr;
        err = 0;
index 67b6d17..42479e4 100644 (file)
@@ -12,6 +12,9 @@
  *            Mingming Cao <cmm@us.ibm.com>
  * Mar 2006 - support for audit of ipc object properties
  *            Dustin Kirkland <dustin.kirkland@us.ibm.com>
+ * Jun 2006 - namespaces ssupport
+ *            OpenVZ, SWsoft Inc.
+ *            Pavel Emelianov <xemul@openvz.org>
  */
 
 #include <linux/mm.h>
@@ -29,6 +32,7 @@
 #include <linux/seq_file.h>
 #include <linux/proc_fs.h>
 #include <linux/audit.h>
+#include <linux/nsproxy.h>
 
 #include <asm/unistd.h>
 
 struct ipc_proc_iface {
        const char *path;
        const char *header;
-       struct ipc_ids *ids;
+       int ids;
        int (*show)(struct seq_file *, void *);
 };
 
+struct ipc_namespace init_ipc_ns = {
+       .kref = {
+               .refcount       = ATOMIC_INIT(2),
+       },
+};
+
+#ifdef CONFIG_IPC_NS
+static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
+{
+       int err;
+       struct ipc_namespace *ns;
+
+       err = -ENOMEM;
+       ns = kmalloc(sizeof(struct ipc_namespace), GFP_KERNEL);
+       if (ns == NULL)
+               goto err_mem;
+
+       err = sem_init_ns(ns);
+       if (err)
+               goto err_sem;
+       err = msg_init_ns(ns);
+       if (err)
+               goto err_msg;
+       err = shm_init_ns(ns);
+       if (err)
+               goto err_shm;
+
+       kref_init(&ns->kref);
+       return ns;
+
+err_shm:
+       msg_exit_ns(ns);
+err_msg:
+       sem_exit_ns(ns);
+err_sem:
+       kfree(ns);
+err_mem:
+       return ERR_PTR(err);
+}
+
+int unshare_ipcs(unsigned long unshare_flags, struct ipc_namespace **new_ipc)
+{
+       struct ipc_namespace *new;
+
+       if (unshare_flags & CLONE_NEWIPC) {
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               new = clone_ipc_ns(current->nsproxy->ipc_ns);
+               if (IS_ERR(new))
+                       return PTR_ERR(new);
+
+               *new_ipc = new;
+       }
+
+       return 0;
+}
+
+int copy_ipcs(unsigned long flags, struct task_struct *tsk)
+{
+       struct ipc_namespace *old_ns = tsk->nsproxy->ipc_ns;
+       struct ipc_namespace *new_ns;
+       int err = 0;
+
+       if (!old_ns)
+               return 0;
+
+       get_ipc_ns(old_ns);
+
+       if (!(flags & CLONE_NEWIPC))
+               return 0;
+
+       if (!capable(CAP_SYS_ADMIN)) {
+               err = -EPERM;
+               goto out;
+       }
+
+       new_ns = clone_ipc_ns(old_ns);
+       if (!new_ns) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       tsk->nsproxy->ipc_ns = new_ns;
+out:
+       put_ipc_ns(old_ns);
+       return err;
+}
+
+void free_ipc_ns(struct kref *kref)
+{
+       struct ipc_namespace *ns;
+
+       ns = container_of(kref, struct ipc_namespace, kref);
+       sem_exit_ns(ns);
+       msg_exit_ns(ns);
+       shm_exit_ns(ns);
+       kfree(ns);
+}
+#endif
+
 /**
  *     ipc_init        -       initialise IPC subsystem
  *
@@ -67,7 +172,7 @@ __initcall(ipc_init);
  *     array itself. 
  */
  
-void __init ipc_init_ids(struct ipc_ids* ids, int size)
+void __ipc_init ipc_init_ids(struct ipc_ids* ids, int size)
 {
        int i;
 
@@ -110,8 +215,7 @@ static struct file_operations sysvipc_proc_fops;
  *     @show: show routine.
  */
 void __init ipc_init_proc_interface(const char *path, const char *header,
-                                   struct ipc_ids *ids,
-                                   int (*show)(struct seq_file *, void *))
+               int ids, int (*show)(struct seq_file *, void *))
 {
        struct proc_dir_entry *pde;
        struct ipc_proc_iface *iface;
@@ -635,6 +739,9 @@ static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
        struct ipc_proc_iface *iface = s->private;
        struct kern_ipc_perm *ipc = it;
        loff_t p;
+       struct ipc_ids *ids;
+
+       ids = current->nsproxy->ipc_ns->ids[iface->ids];
 
        /* If we had an ipc id locked before, unlock it */
        if (ipc && ipc != SEQ_START_TOKEN)
@@ -644,8 +751,8 @@ static void *sysvipc_proc_next(struct seq_file *s, void *it, loff_t *pos)
         * p = *pos - 1 (because id 0 starts at position 1)
         *          + 1 (because we increment the position by one)
         */
-       for (p = *pos; p <= iface->ids->max_id; p++) {
-               if ((ipc = ipc_lock(iface->ids, p)) != NULL) {
+       for (p = *pos; p <= ids->max_id; p++) {
+               if ((ipc = ipc_lock(ids, p)) != NULL) {
                        *pos = p + 1;
                        return ipc;
                }
@@ -664,12 +771,15 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
        struct ipc_proc_iface *iface = s->private;
        struct kern_ipc_perm *ipc;
        loff_t p;
+       struct ipc_ids *ids;
+
+       ids = current->nsproxy->ipc_ns->ids[iface->ids];
 
        /*
         * Take the lock - this will be released by the corresponding
         * call to stop().
         */
-       mutex_lock(&iface->ids->mutex);
+       mutex_lock(&ids->mutex);
 
        /* pos < 0 is invalid */
        if (*pos < 0)
@@ -680,8 +790,8 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
                return SEQ_START_TOKEN;
 
        /* Find the (pos-1)th ipc */
-       for (p = *pos - 1; p <= iface->ids->max_id; p++) {
-               if ((ipc = ipc_lock(iface->ids, p)) != NULL) {
+       for (p = *pos - 1; p <= ids->max_id; p++) {
+               if ((ipc = ipc_lock(ids, p)) != NULL) {
                        *pos = p + 1;
                        return ipc;
                }
@@ -693,13 +803,15 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it)
 {
        struct kern_ipc_perm *ipc = it;
        struct ipc_proc_iface *iface = s->private;
+       struct ipc_ids *ids;
 
        /* If we had a locked segment, release it */
        if (ipc && ipc != SEQ_START_TOKEN)
                ipc_unlock(ipc);
 
+       ids = current->nsproxy->ipc_ns->ids[iface->ids];
        /* Release the lock we took in start() */
-       mutex_unlock(&iface->ids->mutex);
+       mutex_unlock(&ids->mutex);
 }
 
 static int sysvipc_proc_show(struct seq_file *s, void *it)
index 0181553..c8fd6b9 100644 (file)
@@ -3,6 +3,8 @@
  * Copyright (C) 1999 Christoph Rohland
  *
  * ipc helper functions (c) 1999 Manfred Spraul <manfred@colorfullife.com>
+ * namespaces support.      2006 OpenVZ, SWsoft Inc.
+ *                               Pavel Emelianov <xemul@openvz.org>
  */
 
 #ifndef _IPC_UTIL_H
@@ -15,6 +17,14 @@ void sem_init (void);
 void msg_init (void);
 void shm_init (void);
 
+int sem_init_ns(struct ipc_namespace *ns);
+int msg_init_ns(struct ipc_namespace *ns);
+int shm_init_ns(struct ipc_namespace *ns);
+
+void sem_exit_ns(struct ipc_namespace *ns);
+void msg_exit_ns(struct ipc_namespace *ns);
+void shm_exit_ns(struct ipc_namespace *ns);
+
 struct ipc_id_ary {
        int size;
        struct kern_ipc_perm *p[0];
@@ -31,15 +41,23 @@ struct ipc_ids {
 };
 
 struct seq_file;
-void __init ipc_init_ids(struct ipc_ids* ids, int size);
+#ifdef CONFIG_IPC_NS
+#define __ipc_init
+#else
+#define __ipc_init     __init
+#endif
+void __ipc_init ipc_init_ids(struct ipc_ids *ids, int size);
 #ifdef CONFIG_PROC_FS
 void __init ipc_init_proc_interface(const char *path, const char *header,
-                                   struct ipc_ids *ids,
-                                   int (*show)(struct seq_file *, void *));
+               int ids, int (*show)(struct seq_file *, void *));
 #else
 #define ipc_init_proc_interface(path, header, ids, show) do {} while (0)
 #endif
 
+#define IPC_SEM_IDS    0
+#define IPC_MSG_IDS    1
+#define IPC_SHM_IDS    2
+
 /* must be called with ids->mutex acquired.*/
 int ipc_findkey(struct ipc_ids* ids, key_t key);
 int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size);
index aacaafb..d948ca1 100644 (file)
@@ -8,7 +8,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
            signal.o sys.o kmod.o workqueue.o pid.o \
            rcupdate.o extable.o params.o posix-timers.o \
            kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
-           hrtimer.o rwsem.o latency.o
+           hrtimer.o rwsem.o latency.o nsproxy.o
 
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
 obj-y += time/
@@ -48,6 +48,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_RELAY) += relay.o
+obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
 
index b4fbd83..75573e5 100644 (file)
@@ -26,8 +26,6 @@
 
 #include <asm/uaccess.h>
 
-extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
-
 int get_compat_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
 {
        return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
index 3b47f26..f250a5e 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/tsacct_kern.h>
 #include <linux/file.h>
 #include <linux/binfmts.h>
+#include <linux/nsproxy.h>
 #include <linux/ptrace.h>
 #include <linux/profile.h>
 #include <linux/mount.h>
@@ -397,9 +398,11 @@ void daemonize(const char *name, ...)
        fs = init_task.fs;
        current->fs = fs;
        atomic_inc(&fs->count);
-       exit_namespace(current);
-       current->namespace = init_task.namespace;
-       get_namespace(current->namespace);
+
+       exit_task_namespaces(current);
+       current->nsproxy = init_task.nsproxy;
+       get_task_namespaces(current);
+
        exit_files(current);
        current->files = init_task.files;
        atomic_inc(&current->files->count);
@@ -917,7 +920,6 @@ fastcall NORET_TYPE void do_exit(long code)
        exit_sem(tsk);
        __exit_files(tsk);
        __exit_fs(tsk);
-       exit_namespace(tsk);
        exit_thread();
        cpuset_exit(tsk);
        exit_keys(tsk);
@@ -932,6 +934,7 @@ fastcall NORET_TYPE void do_exit(long code)
        tsk->exit_code = code;
        proc_exit_connector(tsk);
        exit_notify(tsk);
+       exit_task_namespaces(tsk);
 #ifdef CONFIG_NUMA
        mpol_free(tsk->mempolicy);
        tsk->mempolicy = NULL;
index 89f6664..7dc6140 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/binfmts.h>
 #include <linux/mman.h>
 #include <linux/fs.h>
+#include <linux/nsproxy.h>
 #include <linux/capability.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
@@ -1116,11 +1117,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                goto bad_fork_cleanup_signal;
        if ((retval = copy_keys(clone_flags, p)))
                goto bad_fork_cleanup_mm;
-       if ((retval = copy_namespace(clone_flags, p)))
+       if ((retval = copy_namespaces(clone_flags, p)))
                goto bad_fork_cleanup_keys;
        retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
        if (retval)
-               goto bad_fork_cleanup_namespace;
+               goto bad_fork_cleanup_namespaces;
 
        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
        /*
@@ -1212,7 +1213,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                spin_unlock(&current->sighand->siglock);
                write_unlock_irq(&tasklist_lock);
                retval = -ERESTARTNOINTR;
-               goto bad_fork_cleanup_namespace;
+               goto bad_fork_cleanup_namespaces;
        }
 
        if (clone_flags & CLONE_THREAD) {
@@ -1260,8 +1261,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        proc_fork_connector(p);
        return p;
 
-bad_fork_cleanup_namespace:
-       exit_namespace(p);
+bad_fork_cleanup_namespaces:
+       exit_task_namespaces(p);
 bad_fork_cleanup_keys:
        exit_keys(p);
 bad_fork_cleanup_mm:
@@ -1514,10 +1515,9 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
  */
 static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs)
 {
-       struct namespace *ns = current->namespace;
+       struct namespace *ns = current->nsproxy->namespace;
 
-       if ((unshare_flags & CLONE_NEWNS) &&
-           (ns && atomic_read(&ns->count) > 1)) {
+       if ((unshare_flags & CLONE_NEWNS) && ns) {
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
 
@@ -1589,6 +1589,16 @@ static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **n
        return 0;
 }
 
+#ifndef CONFIG_IPC_NS
+static inline int unshare_ipcs(unsigned long flags, struct ipc_namespace **ns)
+{
+       if (flags & CLONE_NEWIPC)
+               return -EINVAL;
+
+       return 0;
+}
+#endif
+
 /*
  * unshare allows a process to 'unshare' part of the process
  * context which was originally shared using clone.  copy_*
@@ -1606,13 +1616,17 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
        struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL;
        struct files_struct *fd, *new_fd = NULL;
        struct sem_undo_list *new_ulist = NULL;
+       struct nsproxy *new_nsproxy = NULL, *old_nsproxy = NULL;
+       struct uts_namespace *uts, *new_uts = NULL;
+       struct ipc_namespace *ipc, *new_ipc = NULL;
 
        check_unshare_flags(&unshare_flags);
 
        /* Return -EINVAL for all unsupported flags */
        err = -EINVAL;
        if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
-                               CLONE_VM|CLONE_FILES|CLONE_SYSVSEM))
+                               CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
+                               CLONE_NEWUTS|CLONE_NEWIPC))
                goto bad_unshare_out;
 
        if ((err = unshare_thread(unshare_flags)))
@@ -1629,11 +1643,30 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
                goto bad_unshare_cleanup_vm;
        if ((err = unshare_semundo(unshare_flags, &new_ulist)))
                goto bad_unshare_cleanup_fd;
+       if ((err = unshare_utsname(unshare_flags, &new_uts)))
+               goto bad_unshare_cleanup_semundo;
+       if ((err = unshare_ipcs(unshare_flags, &new_ipc)))
+               goto bad_unshare_cleanup_uts;
+
+       if (new_ns || new_uts || new_ipc) {
+               old_nsproxy = current->nsproxy;
+               new_nsproxy = dup_namespaces(old_nsproxy);
+               if (!new_nsproxy) {
+                       err = -ENOMEM;
+                       goto bad_unshare_cleanup_ipc;
+               }
+       }
 
-       if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) {
+       if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist ||
+                               new_uts || new_ipc) {
 
                task_lock(current);
 
+               if (new_nsproxy) {
+                       current->nsproxy = new_nsproxy;
+                       new_nsproxy = old_nsproxy;
+               }
+
                if (new_fs) {
                        fs = current->fs;
                        current->fs = new_fs;
@@ -1641,8 +1674,8 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
                }
 
                if (new_ns) {
-                       ns = current->namespace;
-                       current->namespace = new_ns;
+                       ns = current->nsproxy->namespace;
+                       current->nsproxy->namespace = new_ns;
                        new_ns = ns;
                }
 
@@ -1667,9 +1700,33 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
                        new_fd = fd;
                }
 
+               if (new_uts) {
+                       uts = current->nsproxy->uts_ns;
+                       current->nsproxy->uts_ns = new_uts;
+                       new_uts = uts;
+               }
+
+               if (new_ipc) {
+                       ipc = current->nsproxy->ipc_ns;
+                       current->nsproxy->ipc_ns = new_ipc;
+                       new_ipc = ipc;
+               }
+
                task_unlock(current);
        }
 
+       if (new_nsproxy)
+               put_nsproxy(new_nsproxy);
+
+bad_unshare_cleanup_ipc:
+       if (new_ipc)
+               put_ipc_ns(new_ipc);
+
+bad_unshare_cleanup_uts:
+       if (new_uts)
+               put_uts_ns(new_uts);
+
+bad_unshare_cleanup_semundo:
 bad_unshare_cleanup_fd:
        if (new_fd)
                put_files_struct(new_fd);
index 4b6770e..4aaf919 100644 (file)
@@ -1527,7 +1527,7 @@ static int futex_fd(u32 __user *uaddr, int signal)
        filp->f_mapping = filp->f_dentry->d_inode->i_mapping;
 
        if (signal) {
-               err = f_setown(filp, current->pid, 1);
+               err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1);
                if (err < 0) {
                        goto error;
                }
index ab16a5a..342bca6 100644 (file)
@@ -154,7 +154,6 @@ unsigned long kallsyms_lookup_name(const char *name)
        }
        return module_kallsyms_lookup_name(name);
 }
-EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
 
 /*
  * Lookup an address
index f8121b9..bb4e29d 100644 (file)
@@ -18,8 +18,6 @@
        call_usermodehelper wait flag, and remove exec_usermodehelper.
        Rusty Russell <rusty@rustcorp.com.au>  Jan 2003
 */
-#define __KERNEL_SYSCALLS__
-
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/syscalls.h>
@@ -169,7 +167,8 @@ static int ____call_usermodehelper(void *data)
 
        retval = -EPERM;
        if (current->fs->root)
-               retval = execve(sub_info->path, sub_info->argv, sub_info->envp);
+               retval = kernel_execve(sub_info->path,
+                               sub_info->argv, sub_info->envp);
 
        /* Exec failed? */
        sub_info->retval = retval;
index 3f57dfd..610c837 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/moduleloader.h>
+#include <linux/kallsyms.h>
 #include <asm-generic/sections.h>
 #include <asm/cacheflush.h>
 #include <asm/errno.h>
 #define KPROBE_HASH_BITS 6
 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS)
 
+
+/*
+ * Some oddball architectures like 64bit powerpc have function descriptors
+ * so this must be overridable.
+ */
+#ifndef kprobe_lookup_name
+#define kprobe_lookup_name(name, addr) \
+       addr = ((kprobe_opcode_t *)(kallsyms_lookup_name(name)))
+#endif
+
 static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
 static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
 static atomic_t kprobe_count;
@@ -308,7 +319,8 @@ void __kprobes add_rp_inst(struct kretprobe_instance *ri)
 }
 
 /* Called with kretprobe_lock held */
-void __kprobes recycle_rp_inst(struct kretprobe_instance *ri)
+void __kprobes recycle_rp_inst(struct kretprobe_instance *ri,
+                               struct hlist_head *head)
 {
        /* remove rp inst off the rprobe_inst_table */
        hlist_del(&ri->hlist);
@@ -320,7 +332,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri)
                hlist_add_head(&ri->uflist, &ri->rp->free_instances);
        } else
                /* Unregistering */
-               kfree(ri);
+               hlist_add_head(&ri->hlist, head);
 }
 
 struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
@@ -336,18 +348,24 @@ struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk)
  */
 void __kprobes kprobe_flush_task(struct task_struct *tk)
 {
-        struct kretprobe_instance *ri;
-        struct hlist_head *head;
+       struct kretprobe_instance *ri;
+       struct hlist_head *head, empty_rp;
        struct hlist_node *node, *tmp;
        unsigned long flags = 0;
 
+       INIT_HLIST_HEAD(&empty_rp);
        spin_lock_irqsave(&kretprobe_lock, flags);
-        head = kretprobe_inst_table_head(tk);
-        hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
-                if (ri->task == tk)
-                        recycle_rp_inst(ri);
-        }
+       head = kretprobe_inst_table_head(tk);
+       hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+               if (ri->task == tk)
+                       recycle_rp_inst(ri, &empty_rp);
+       }
        spin_unlock_irqrestore(&kretprobe_lock, flags);
+
+       hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
+               hlist_del(&ri->hlist);
+               kfree(ri);
+       }
 }
 
 static inline void free_rp_inst(struct kretprobe *rp)
@@ -447,6 +465,21 @@ static int __kprobes __register_kprobe(struct kprobe *p,
        struct kprobe *old_p;
        struct module *probed_mod;
 
+       /*
+        * If we have a symbol_name argument look it up,
+        * and add it to the address.  That way the addr
+        * field can either be global or relative to a symbol.
+        */
+       if (p->symbol_name) {
+               if (p->addr)
+                       return -EINVAL;
+               kprobe_lookup_name(p->symbol_name, p->addr);
+       }
+
+       if (!p->addr)
+               return -EINVAL;
+       p->addr = (kprobe_opcode_t *)(((char *)p->addr)+ p->offset);
+
        if ((!kernel_text_address((unsigned long) p->addr)) ||
                in_kprobes_functions((unsigned long) p->addr))
                return -EINVAL;
@@ -488,7 +521,7 @@ static int __kprobes __register_kprobe(struct kprobe *p,
                                (ARCH_INACTIVE_KPROBE_COUNT + 1))
                register_page_fault_notifier(&kprobe_page_fault_nb);
 
-       arch_arm_kprobe(p);
+       arch_arm_kprobe(p);
 
 out:
        mutex_unlock(&kprobe_mutex);
index e596525..4c05534 100644 (file)
@@ -518,9 +518,9 @@ print_circular_bug_entry(struct lock_list *target, unsigned int depth)
 
 static void print_kernel_version(void)
 {
-       printk("%s %.*s\n", system_utsname.release,
-               (int)strcspn(system_utsname.version, " "),
-               system_utsname.version);
+       printk("%s %.*s\n", init_utsname()->release,
+               (int)strcspn(init_utsname()->version, " "),
+               init_utsname()->version);
 }
 
 /*
index 05625d5..7c77a0a 100644 (file)
@@ -851,6 +851,7 @@ static int check_version(Elf_Shdr *sechdrs,
                printk("%s: no version for \"%s\" found: kernel tainted.\n",
                       mod->name, symname);
                add_taint(TAINT_FORCED_MODULE);
+               mod->taints |= TAINT_FORCED_MODULE;
        }
        return 1;
 }
@@ -1339,6 +1340,7 @@ static void set_license(struct module *mod, const char *license)
                printk(KERN_WARNING "%s: module license '%s' taints kernel.\n",
                       mod->name, license);
                add_taint(TAINT_PROPRIETARY_MODULE);
+               mod->taints |= TAINT_PROPRIETARY_MODULE;
        }
 }
 
@@ -1618,6 +1620,7 @@ static struct module *load_module(void __user *umod,
        /* This is allowed: modprobe --force will invalidate it. */
        if (!modmagic) {
                add_taint(TAINT_FORCED_MODULE);
+               mod->taints |= TAINT_FORCED_MODULE;
                printk(KERN_WARNING "%s: no version magic, tainting kernel.\n",
                       mod->name);
        } else if (!same_magic(modmagic, vermagic)) {
@@ -1711,10 +1714,14 @@ static struct module *load_module(void __user *umod,
        /* Set up license info based on the info section */
        set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
 
-       if (strcmp(mod->name, "ndiswrapper") == 0)
+       if (strcmp(mod->name, "ndiswrapper") == 0) {
                add_taint(TAINT_PROPRIETARY_MODULE);
-       if (strcmp(mod->name, "driverloader") == 0)
+               mod->taints |= TAINT_PROPRIETARY_MODULE;
+       }
+       if (strcmp(mod->name, "driverloader") == 0) {
                add_taint(TAINT_PROPRIETARY_MODULE);
+               mod->taints |= TAINT_PROPRIETARY_MODULE;
+       }
 
        /* Set up MODINFO_ATTR fields */
        setup_modinfo(mod, sechdrs, infoindex);
@@ -1760,6 +1767,7 @@ static struct module *load_module(void __user *umod,
                printk(KERN_WARNING "%s: No versions for exported symbols."
                       " Tainting kernel.\n", mod->name);
                add_taint(TAINT_FORCED_MODULE);
+               mod->taints |= TAINT_FORCED_MODULE;
        }
 #endif
 
@@ -2226,14 +2234,37 @@ struct module *module_text_address(unsigned long addr)
        return mod;
 }
 
+static char *taint_flags(unsigned int taints, char *buf)
+{
+       *buf = '\0';
+       if (taints) {
+               int bx;
+
+               buf[0] = '(';
+               bx = 1;
+               if (taints & TAINT_PROPRIETARY_MODULE)
+                       buf[bx++] = 'P';
+               if (taints & TAINT_FORCED_MODULE)
+                       buf[bx++] = 'F';
+               /*
+                * TAINT_FORCED_RMMOD: could be added.
+                * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
+                * apply to modules.
+                */
+               buf[bx] = ')';
+       }
+       return buf;
+}
+
 /* Don't grab lock, we're oopsing. */
 void print_modules(void)
 {
        struct module *mod;
+       char buf[8];
 
        printk("Modules linked in:");
        list_for_each_entry(mod, &modules, list)
-               printk(" %s", mod->name);
+               printk(" %s%s", mod->name, taint_flags(mod->taints, buf));
        printk("\n");
 }
 
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
new file mode 100644 (file)
index 0000000..6ebdb82
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ *  Copyright (C) 2006 IBM Corporation
+ *
+ *  Author: Serge Hallyn <serue@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ *
+ *  Jun 2006 - namespaces support
+ *             OpenVZ, SWsoft Inc.
+ *             Pavel Emelianov <xemul@openvz.org>
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/nsproxy.h>
+#include <linux/init_task.h>
+#include <linux/namespace.h>
+#include <linux/utsname.h>
+
+struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
+
+static inline void get_nsproxy(struct nsproxy *ns)
+{
+       atomic_inc(&ns->count);
+}
+
+void get_task_namespaces(struct task_struct *tsk)
+{
+       struct nsproxy *ns = tsk->nsproxy;
+       if (ns) {
+               get_nsproxy(ns);
+       }
+}
+
+/*
+ * creates a copy of "orig" with refcount 1.
+ * This does not grab references to the contained namespaces,
+ * so that needs to be done by dup_namespaces.
+ */
+static inline struct nsproxy *clone_namespaces(struct nsproxy *orig)
+{
+       struct nsproxy *ns;
+
+       ns = kmalloc(sizeof(struct nsproxy), GFP_KERNEL);
+       if (ns) {
+               memcpy(ns, orig, sizeof(struct nsproxy));
+               atomic_set(&ns->count, 1);
+       }
+       return ns;
+}
+
+/*
+ * copies the nsproxy, setting refcount to 1, and grabbing a
+ * reference to all contained namespaces.  Called from
+ * sys_unshare()
+ */
+struct nsproxy *dup_namespaces(struct nsproxy *orig)
+{
+       struct nsproxy *ns = clone_namespaces(orig);
+
+       if (ns) {
+               if (ns->namespace)
+                       get_namespace(ns->namespace);
+               if (ns->uts_ns)
+                       get_uts_ns(ns->uts_ns);
+               if (ns->ipc_ns)
+                       get_ipc_ns(ns->ipc_ns);
+       }
+
+       return ns;
+}
+
+/*
+ * called from clone.  This now handles copy for nsproxy and all
+ * namespaces therein.
+ */
+int copy_namespaces(int flags, struct task_struct *tsk)
+{
+       struct nsproxy *old_ns = tsk->nsproxy;
+       struct nsproxy *new_ns;
+       int err = 0;
+
+       if (!old_ns)
+               return 0;
+
+       get_nsproxy(old_ns);
+
+       if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
+               return 0;
+
+       new_ns = clone_namespaces(old_ns);
+       if (!new_ns) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       tsk->nsproxy = new_ns;
+
+       err = copy_namespace(flags, tsk);
+       if (err)
+               goto out_ns;
+
+       err = copy_utsname(flags, tsk);
+       if (err)
+               goto out_uts;
+
+       err = copy_ipcs(flags, tsk);
+       if (err)
+               goto out_ipc;
+
+out:
+       put_nsproxy(old_ns);
+       return err;
+
+out_ipc:
+       if (new_ns->uts_ns)
+               put_uts_ns(new_ns->uts_ns);
+out_uts:
+       if (new_ns->namespace)
+               put_namespace(new_ns->namespace);
+out_ns:
+       tsk->nsproxy = old_ns;
+       kfree(new_ns);
+       goto out;
+}
+
+void free_nsproxy(struct nsproxy *ns)
+{
+               if (ns->namespace)
+                       put_namespace(ns->namespace);
+               if (ns->uts_ns)
+                       put_uts_ns(ns->uts_ns);
+               if (ns->ipc_ns)
+                       put_ipc_ns(ns->ipc_ns);
+               kfree(ns);
+}
index 8387e8c..b914392 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/hash.h>
+#include <linux/pspace.h>
 
 #define pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift)
 static struct hlist_head *pid_hash;
@@ -33,17 +34,20 @@ static int pidhash_shift;
 static kmem_cache_t *pid_cachep;
 
 int pid_max = PID_MAX_DEFAULT;
-int last_pid;
 
 #define RESERVED_PIDS          300
 
 int pid_max_min = RESERVED_PIDS + 1;
 int pid_max_max = PID_MAX_LIMIT;
 
-#define PIDMAP_ENTRIES         ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
 #define BITS_PER_PAGE          (PAGE_SIZE*8)
 #define BITS_PER_PAGE_MASK     (BITS_PER_PAGE-1)
-#define mk_pid(map, off)       (((map) - pidmap_array)*BITS_PER_PAGE + (off))
+
+static inline int mk_pid(struct pspace *pspace, struct pidmap *map, int off)
+{
+       return (map - pspace->pidmap)*BITS_PER_PAGE + off;
+}
+
 #define find_next_offset(map, off)                                     \
                find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
 
@@ -53,13 +57,12 @@ int pid_max_max = PID_MAX_LIMIT;
  * value does not cause lots of bitmaps to be allocated, but
  * the scheme scales to up to 4 million PIDs, runtime.
  */
-typedef struct pidmap {
-       atomic_t nr_free;
-       void *page;
-} pidmap_t;
-
-static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
-        { [ 0 ... PIDMAP_ENTRIES-1 ] = { ATOMIC_INIT(BITS_PER_PAGE), NULL } };
+struct pspace init_pspace = {
+       .pidmap = {
+               [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
+       },
+       .last_pid = 0
+};
 
 /*
  * Note: disable interrupts while the pidmap_lock is held as an
@@ -74,40 +77,41 @@ static pidmap_t pidmap_array[PIDMAP_ENTRIES] =
  * irq handlers that take it we can leave the interrupts enabled.
  * For now it is easier to be safe than to prove it can't happen.
  */
+
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 
-static fastcall void free_pidmap(int pid)
+static fastcall void free_pidmap(struct pspace *pspace, int pid)
 {
-       pidmap_t *map = pidmap_array + pid / BITS_PER_PAGE;
+       struct pidmap *map = pspace->pidmap + pid / BITS_PER_PAGE;
        int offset = pid & BITS_PER_PAGE_MASK;
 
        clear_bit(offset, map->page);
        atomic_inc(&map->nr_free);
 }
 
-static int alloc_pidmap(void)
+static int alloc_pidmap(struct pspace *pspace)
 {
-       int i, offset, max_scan, pid, last = last_pid;
-       pidmap_t *map;
+       int i, offset, max_scan, pid, last = pspace->last_pid;
+       struct pidmap *map;
 
        pid = last + 1;
        if (pid >= pid_max)
                pid = RESERVED_PIDS;
        offset = pid & BITS_PER_PAGE_MASK;
-       map = &pidmap_array[pid/BITS_PER_PAGE];
+       map = &pspace->pidmap[pid/BITS_PER_PAGE];
        max_scan = (pid_max + BITS_PER_PAGE - 1)/BITS_PER_PAGE - !offset;
        for (i = 0; i <= max_scan; ++i) {
                if (unlikely(!map->page)) {
-                       unsigned long page = get_zeroed_page(GFP_KERNEL);
+                       void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
                        /*
                         * Free the page if someone raced with us
                         * installing it:
                         */
                        spin_lock_irq(&pidmap_lock);
                        if (map->page)
-                               free_page(page);
+                               kfree(page);
                        else
-                               map->page = (void *)page;
+                               map->page = page;
                        spin_unlock_irq(&pidmap_lock);
                        if (unlikely(!map->page))
                                break;
@@ -116,11 +120,11 @@ static int alloc_pidmap(void)
                        do {
                                if (!test_and_set_bit(offset, map->page)) {
                                        atomic_dec(&map->nr_free);
-                                       last_pid = pid;
+                                       pspace->last_pid = pid;
                                        return pid;
                                }
                                offset = find_next_offset(map, offset);
-                               pid = mk_pid(map, offset);
+                               pid = mk_pid(pspace, map, offset);
                        /*
                         * find_next_offset() found a bit, the pid from it
                         * is in-bounds, and if we fell back to the last
@@ -131,16 +135,34 @@ static int alloc_pidmap(void)
                                        (i != max_scan || pid < last ||
                                            !((last+1) & BITS_PER_PAGE_MASK)));
                }
-               if (map < &pidmap_array[(pid_max-1)/BITS_PER_PAGE]) {
+               if (map < &pspace->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
                        ++map;
                        offset = 0;
                } else {
-                       map = &pidmap_array[0];
+                       map = &pspace->pidmap[0];
                        offset = RESERVED_PIDS;
                        if (unlikely(last == offset))
                                break;
                }
-               pid = mk_pid(map, offset);
+               pid = mk_pid(pspace, map, offset);
+       }
+       return -1;
+}
+
+static int next_pidmap(struct pspace *pspace, int last)
+{
+       int offset;
+       struct pidmap *map, *end;
+
+       offset = (last + 1) & BITS_PER_PAGE_MASK;
+       map = &pspace->pidmap[(last + 1)/BITS_PER_PAGE];
+       end = &pspace->pidmap[PIDMAP_ENTRIES];
+       for (; map < end; map++, offset = 0) {
+               if (unlikely(!map->page))
+                       continue;
+               offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
+               if (offset < BITS_PER_PAGE)
+                       return mk_pid(pspace, map, offset);
        }
        return -1;
 }
@@ -153,6 +175,7 @@ fastcall void put_pid(struct pid *pid)
             atomic_dec_and_test(&pid->count))
                kmem_cache_free(pid_cachep, pid);
 }
+EXPORT_SYMBOL_GPL(put_pid);
 
 static void delayed_put_pid(struct rcu_head *rhp)
 {
@@ -169,7 +192,7 @@ fastcall void free_pid(struct pid *pid)
        hlist_del_rcu(&pid->pid_chain);
        spin_unlock_irqrestore(&pidmap_lock, flags);
 
-       free_pidmap(pid->nr);
+       free_pidmap(&init_pspace, pid->nr);
        call_rcu(&pid->rcu, delayed_put_pid);
 }
 
@@ -183,7 +206,7 @@ struct pid *alloc_pid(void)
        if (!pid)
                goto out;
 
-       nr = alloc_pidmap();
+       nr = alloc_pidmap(&init_pspace);
        if (nr < 0)
                goto out_free;
 
@@ -217,6 +240,7 @@ struct pid * fastcall find_pid(int nr)
        }
        return NULL;
 }
+EXPORT_SYMBOL_GPL(find_pid);
 
 int fastcall attach_pid(struct task_struct *task, enum pid_type type, int nr)
 {
@@ -280,6 +304,15 @@ struct task_struct *find_task_by_pid_type(int type, int nr)
 
 EXPORT_SYMBOL(find_task_by_pid_type);
 
+struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
+{
+       struct pid *pid;
+       rcu_read_lock();
+       pid = get_pid(task->pids[type].pid);
+       rcu_read_unlock();
+       return pid;
+}
+
 struct task_struct *fastcall get_pid_task(struct pid *pid, enum pid_type type)
 {
        struct task_struct *result;
@@ -302,6 +335,26 @@ struct pid *find_get_pid(pid_t nr)
        return pid;
 }
 
+/*
+ * Used by proc to find the first pid that is greater then or equal to nr.
+ *
+ * If there is a pid at nr this function is exactly the same as find_pid.
+ */
+struct pid *find_ge_pid(int nr)
+{
+       struct pid *pid;
+
+       do {
+               pid = find_pid(nr);
+               if (pid)
+                       break;
+               nr = next_pidmap(&init_pspace, nr);
+       } while (nr > 0);
+
+       return pid;
+}
+EXPORT_SYMBOL_GPL(find_get_pid);
+
 /*
  * The pid hash table is scaled according to the amount of memory in the
  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
@@ -329,10 +382,10 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
-       pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
+       init_pspace.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
        /* Reserve PID 0. We never call free_pidmap(0) */
-       set_bit(0, pidmap_array->page);
-       atomic_dec(&pidmap_array->nr_free);
+       set_bit(0, init_pspace.pidmap[0].page);
+       atomic_dec(&init_pspace.pidmap[0].nr_free);
 
        pid_cachep = kmem_cache_create("pid", sizeof(struct pid),
                                        __alignof__(struct pid),
index 1b84313..99f9b7d 100644 (file)
@@ -906,7 +906,7 @@ static void init_header(struct swsusp_info *info)
        memset(info, 0, sizeof(struct swsusp_info));
        info->version_code = LINUX_VERSION_CODE;
        info->num_physpages = num_physpages;
-       memcpy(&info->uts, &system_utsname, sizeof(system_utsname));
+       memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
        info->cpus = num_online_cpus();
        info->image_pages = nr_copy_pages;
        info->pages = nr_copy_pages + nr_meta_pages + 1;
@@ -1050,13 +1050,13 @@ static inline int check_header(struct swsusp_info *info)
                reason = "kernel version";
        if (info->num_physpages != num_physpages)
                reason = "memory size";
-       if (strcmp(info->uts.sysname,system_utsname.sysname))
+       if (strcmp(info->uts.sysname,init_utsname()->sysname))
                reason = "system type";
-       if (strcmp(info->uts.release,system_utsname.release))
+       if (strcmp(info->uts.release,init_utsname()->release))
                reason = "kernel release";
-       if (strcmp(info->uts.version,system_utsname.version))
+       if (strcmp(info->uts.version,init_utsname()->version))
                reason = "version";
-       if (strcmp(info->uts.machine,system_utsname.machine))
+       if (strcmp(info->uts.machine,init_utsname()->machine))
                reason = "machine";
        if (reason) {
                printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
index 2bbd948..e4e54e8 100644 (file)
@@ -4384,7 +4384,10 @@ EXPORT_SYMBOL(cpu_present_map);
 
 #ifndef CONFIG_SMP
 cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_online_map);
+
 cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
+EXPORT_SYMBOL(cpu_possible_map);
 #endif
 
 long sched_getaffinity(pid_t pid, cpumask_t *mask)
index fb5da6d..7ed8d53 100644 (file)
@@ -1055,28 +1055,44 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 }
 
 /*
- * kill_pg_info() sends a signal to a process group: this is what the tty
+ * kill_pgrp_info() sends a signal to a process group: this is what the tty
  * control characters do (^C, ^Z etc)
  */
 
-int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
+int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
 {
        struct task_struct *p = NULL;
        int retval, success;
 
-       if (pgrp <= 0)
-               return -EINVAL;
-
        success = 0;
        retval = -ESRCH;
-       do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
+       do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
                int err = group_send_sig_info(sig, info, p);
                success |= !err;
                retval = err;
-       } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
+       } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
        return success ? 0 : retval;
 }
 
+int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
+{
+       int retval;
+
+       read_lock(&tasklist_lock);
+       retval = __kill_pgrp_info(sig, info, pgrp);
+       read_unlock(&tasklist_lock);
+
+       return retval;
+}
+
+int __kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
+{
+       if (pgrp <= 0)
+               return -EINVAL;
+
+       return __kill_pgrp_info(sig, info, find_pid(pgrp));
+}
+
 int
 kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
 {
@@ -1089,8 +1105,7 @@ kill_pg_info(int sig, struct siginfo *info, pid_t pgrp)
        return retval;
 }
 
-int
-kill_proc_info(int sig, struct siginfo *info, pid_t pid)
+int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
 {
        int error;
        int acquired_tasklist_lock = 0;
@@ -1101,7 +1116,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
                read_lock(&tasklist_lock);
                acquired_tasklist_lock = 1;
        }
-       p = find_task_by_pid(pid);
+       p = pid_task(pid, PIDTYPE_PID);
        error = -ESRCH;
        if (p)
                error = group_send_sig_info(sig, info, p);
@@ -1111,8 +1126,18 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
        return error;
 }
 
-/* like kill_proc_info(), but doesn't use uid/euid of "current" */
-int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
+int
+kill_proc_info(int sig, struct siginfo *info, pid_t pid)
+{
+       int error;
+       rcu_read_lock();
+       error = kill_pid_info(sig, info, find_pid(pid));
+       rcu_read_unlock();
+       return error;
+}
+
+/* like kill_pid_info(), but doesn't use uid/euid of "current" */
+int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid,
                      uid_t uid, uid_t euid, u32 secid)
 {
        int ret = -EINVAL;
@@ -1122,7 +1147,7 @@ int kill_proc_info_as_uid(int sig, struct siginfo *info, pid_t pid,
                return ret;
 
        read_lock(&tasklist_lock);
-       p = find_task_by_pid(pid);
+       p = pid_task(pid, PIDTYPE_PID);
        if (!p) {
                ret = -ESRCH;
                goto out_unlock;
@@ -1146,7 +1171,7 @@ out_unlock:
        read_unlock(&tasklist_lock);
        return ret;
 }
-EXPORT_SYMBOL_GPL(kill_proc_info_as_uid);
+EXPORT_SYMBOL_GPL(kill_pid_info_as_uid);
 
 /*
  * kill_something_info() interprets pid in interesting ways just like kill(2).
@@ -1264,6 +1289,18 @@ force_sigsegv(int sig, struct task_struct *p)
        return 0;
 }
 
+int kill_pgrp(struct pid *pid, int sig, int priv)
+{
+       return kill_pgrp_info(sig, __si_special(priv), pid);
+}
+EXPORT_SYMBOL(kill_pgrp);
+
+int kill_pid(struct pid *pid, int sig, int priv)
+{
+       return kill_pid_info(sig, __si_special(priv), pid);
+}
+EXPORT_SYMBOL(kill_pid);
+
 int
 kill_pg(pid_t pgrp, int sig, int priv)
 {
index 2460581..2314867 100644 (file)
@@ -92,7 +92,8 @@ EXPORT_SYMBOL(fs_overflowgid);
  */
 
 int C_A_D = 1;
-int cad_pid = 1;
+struct pid *cad_pid;
+EXPORT_SYMBOL(cad_pid);
 
 /*
  *     Notifier list for kernel code which wants to be called
@@ -221,7 +222,7 @@ EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
  *     of the last notifier function called.
  */
  
-int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh,
                unsigned long val, void *v)
 {
        int ret;
@@ -773,10 +774,9 @@ void ctrl_alt_del(void)
        if (C_A_D)
                schedule_work(&cad_work);
        else
-               kill_proc(cad_pid, SIGINT, 1);
+               kill_cad_pid(SIGINT, 1);
 }
        
-
 /*
  * Unprivileged users may change the real gid to the effective gid
  * or vice versa.  (BSD-style)
@@ -1655,7 +1655,7 @@ asmlinkage long sys_newuname(struct new_utsname __user * name)
        int errno = 0;
 
        down_read(&uts_sem);
-       if (copy_to_user(name,&system_utsname,sizeof *name))
+       if (copy_to_user(name, utsname(), sizeof *name))
                errno = -EFAULT;
        up_read(&uts_sem);
        return errno;
@@ -1673,8 +1673,8 @@ asmlinkage long sys_sethostname(char __user *name, int len)
        down_write(&uts_sem);
        errno = -EFAULT;
        if (!copy_from_user(tmp, name, len)) {
-               memcpy(system_utsname.nodename, tmp, len);
-               system_utsname.nodename[len] = 0;
+               memcpy(utsname()->nodename, tmp, len);
+               utsname()->nodename[len] = 0;
                errno = 0;
        }
        up_write(&uts_sem);
@@ -1690,11 +1690,11 @@ asmlinkage long sys_gethostname(char __user *name, int len)
        if (len < 0)
                return -EINVAL;
        down_read(&uts_sem);
-       i = 1 + strlen(system_utsname.nodename);
+       i = 1 + strlen(utsname()->nodename);
        if (i > len)
                i = len;
        errno = 0;
-       if (copy_to_user(name, system_utsname.nodename, i))
+       if (copy_to_user(name, utsname()->nodename, i))
                errno = -EFAULT;
        up_read(&uts_sem);
        return errno;
@@ -1719,8 +1719,8 @@ asmlinkage long sys_setdomainname(char __user *name, int len)
        down_write(&uts_sem);
        errno = -EFAULT;
        if (!copy_from_user(tmp, name, len)) {
-               memcpy(system_utsname.domainname, tmp, len);
-               system_utsname.domainname[len] = 0;
+               memcpy(utsname()->domainname, tmp, len);
+               utsname()->domainname[len] = 0;
                errno = 0;
        }
        up_write(&uts_sem);
index ba42694..8020fb2 100644 (file)
@@ -68,7 +68,6 @@ extern int sysrq_enabled;
 extern int core_uses_pid;
 extern int suid_dumpable;
 extern char core_pattern[];
-extern int cad_pid;
 extern int pid_max;
 extern int min_free_kbytes;
 extern int printk_ratelimit_jiffies;
@@ -92,13 +91,8 @@ extern char modprobe_path[];
 extern int sg_big_buff;
 #endif
 #ifdef CONFIG_SYSVIPC
-extern size_t shm_ctlmax;
-extern size_t shm_ctlall;
-extern int shm_ctlmni;
-extern int msg_ctlmax;
-extern int msg_ctlmnb;
-extern int msg_ctlmni;
-extern int sem_ctls[];
+static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
 #ifdef __sparc__
@@ -139,7 +133,10 @@ static int parse_table(int __user *, int, void __user *, size_t __user *,
                void __user *, size_t, ctl_table *, void **);
 #endif
 
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+                 void __user *buffer, size_t *lenp, loff_t *ppos);
+
+static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
                  void __user *buffer, size_t *lenp, loff_t *ppos);
 
 static ctl_table root_table[];
@@ -229,51 +226,100 @@ static ctl_table root_table[] = {
 };
 
 static ctl_table kern_table[] = {
+#ifndef CONFIG_UTS_NS
+       {
+               .ctl_name       = KERN_OSTYPE,
+               .procname       = "ostype",
+               .data           = init_uts_ns.name.sysname,
+               .maxlen         = sizeof(init_uts_ns.name.sysname),
+               .mode           = 0444,
+               .proc_handler   = &proc_do_uts_string,
+               .strategy       = &sysctl_string,
+       },
+       {
+               .ctl_name       = KERN_OSRELEASE,
+               .procname       = "osrelease",
+               .data           = init_uts_ns.name.release,
+               .maxlen         = sizeof(init_uts_ns.name.release),
+               .mode           = 0444,
+               .proc_handler   = &proc_do_uts_string,
+               .strategy       = &sysctl_string,
+       },
+       {
+               .ctl_name       = KERN_VERSION,
+               .procname       = "version",
+               .data           = init_uts_ns.name.version,
+               .maxlen         = sizeof(init_uts_ns.name.version),
+               .mode           = 0444,
+               .proc_handler   = &proc_do_uts_string,
+               .strategy       = &sysctl_string,
+       },
+       {
+               .ctl_name       = KERN_NODENAME,
+               .procname       = "hostname",
+               .data           = init_uts_ns.name.nodename,
+               .maxlen         = sizeof(init_uts_ns.name.nodename),
+               .mode           = 0644,
+               .proc_handler   = &proc_do_uts_string,
+               .strategy       = &sysctl_string,
+       },
+       {
+               .ctl_name       = KERN_DOMAINNAME,
+               .procname       = "domainname",
+               .data           = init_uts_ns.name.domainname,
+               .maxlen         = sizeof(init_uts_ns.name.domainname),
+               .mode           = 0644,
+               .proc_handler   = &proc_do_uts_string,
+               .strategy       = &sysctl_string,
+       },
+#else  /* !CONFIG_UTS_NS */
        {
                .ctl_name       = KERN_OSTYPE,
                .procname       = "ostype",
-               .data           = system_utsname.sysname,
-               .maxlen         = sizeof(system_utsname.sysname),
+               .data           = NULL,
+               /* could maybe use __NEW_UTS_LEN here? */
+               .maxlen         = FIELD_SIZEOF(struct new_utsname, sysname),
                .mode           = 0444,
-               .proc_handler   = &proc_doutsstring,
+               .proc_handler   = &proc_do_uts_string,
                .strategy       = &sysctl_string,
        },
        {
                .ctl_name       = KERN_OSRELEASE,
                .procname       = "osrelease",
-               .data           = system_utsname.release,
-               .maxlen         = sizeof(system_utsname.release),
+               .data           = NULL,
+               .maxlen         = FIELD_SIZEOF(struct new_utsname, release),
                .mode           = 0444,
-               .proc_handler   = &proc_doutsstring,
+               .proc_handler   = &proc_do_uts_string,
                .strategy       = &sysctl_string,
        },
        {
                .ctl_name       = KERN_VERSION,
                .procname       = "version",
-               .data           = system_utsname.version,
-               .maxlen         = sizeof(system_utsname.version),
+               .data           = NULL,
+               .maxlen         = FIELD_SIZEOF(struct new_utsname, version),
                .mode           = 0444,
-               .proc_handler   = &proc_doutsstring,
+               .proc_handler   = &proc_do_uts_string,
                .strategy       = &sysctl_string,
        },
        {
                .ctl_name       = KERN_NODENAME,
                .procname       = "hostname",
-               .data           = system_utsname.nodename,
-               .maxlen         = sizeof(system_utsname.nodename),
+               .data           = NULL,
+               .maxlen         = FIELD_SIZEOF(struct new_utsname, nodename),
                .mode           = 0644,
-               .proc_handler   = &proc_doutsstring,
+               .proc_handler   = &proc_do_uts_string,
                .strategy       = &sysctl_string,
        },
        {
                .ctl_name       = KERN_DOMAINNAME,
                .procname       = "domainname",
-               .data           = system_utsname.domainname,
-               .maxlen         = sizeof(system_utsname.domainname),
+               .data           = NULL,
+               .maxlen         = FIELD_SIZEOF(struct new_utsname, domainname),
                .mode           = 0644,
-               .proc_handler   = &proc_doutsstring,
+               .proc_handler   = &proc_do_uts_string,
                .strategy       = &sysctl_string,
        },
+#endif /* !CONFIG_UTS_NS */
        {
                .ctl_name       = KERN_PANIC,
                .procname       = "panic",
@@ -432,58 +478,58 @@ static ctl_table kern_table[] = {
        {
                .ctl_name       = KERN_SHMMAX,
                .procname       = "shmmax",
-               .data           = &shm_ctlmax,
+               .data           = NULL,
                .maxlen         = sizeof (size_t),
                .mode           = 0644,
-               .proc_handler   = &proc_doulongvec_minmax,
+               .proc_handler   = &proc_do_ipc_string,
        },
        {
                .ctl_name       = KERN_SHMALL,
                .procname       = "shmall",
-               .data           = &shm_ctlall,
+               .data           = NULL,
                .maxlen         = sizeof (size_t),
                .mode           = 0644,
-               .proc_handler   = &proc_doulongvec_minmax,
+               .proc_handler   = &proc_do_ipc_string,
        },
        {
                .ctl_name       = KERN_SHMMNI,
                .procname       = "shmmni",
-               .data           = &shm_ctlmni,
+               .data           = NULL,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = &proc_do_ipc_string,
        },
        {
                .ctl_name       = KERN_MSGMAX,
                .procname       = "msgmax",
-               .data           = &msg_ctlmax,
+               .data           = NULL,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = &proc_do_ipc_string,
        },
        {
                .ctl_name       = KERN_MSGMNI,
                .procname       = "msgmni",
-               .data           = &msg_ctlmni,
+               .data           = NULL,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = &proc_do_ipc_string,
        },
        {
                .ctl_name       = KERN_MSGMNB,
                .procname       =  "msgmnb",
-               .data           = &msg_ctlmnb,
+               .data           = NULL,
                .maxlen         = sizeof (int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = &proc_do_ipc_string,
        },
        {
                .ctl_name       = KERN_SEM,
                .procname       = "sem",
-               .data           = &sem_ctls,
+               .data           = NULL,
                .maxlen         = 4*sizeof (int),
                .mode           = 0644,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = &proc_do_ipc_string,
        },
 #endif
 #ifdef CONFIG_MAGIC_SYSRQ
@@ -499,10 +545,10 @@ static ctl_table kern_table[] = {
        {
                .ctl_name       = KERN_CADPID,
                .procname       = "cad_pid",
-               .data           = &cad_pid,
+               .data           = NULL,
                .maxlen         = sizeof (int),
                .mode           = 0600,
-               .proc_handler   = &proc_dointvec,
+               .proc_handler   = &proc_do_cad_pid,
        },
        {
                .ctl_name       = KERN_MAX_THREADS,
@@ -1624,32 +1670,15 @@ static ssize_t proc_writesys(struct file * file, const char __user * buf,
        return do_rw_proc(1, file, (char __user *) buf, count, ppos);
 }
 
-/**
- * proc_dostring - read a string sysctl
- * @table: the sysctl table
- * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
- * @buffer: the user buffer
- * @lenp: the size of the user buffer
- * @ppos: file position
- *
- * Reads/writes a string from/to the user buffer. If the kernel
- * buffer provided is not large enough to hold the string, the
- * string is truncated. The copied string is %NULL-terminated.
- * If the string is being read by the user process, it is copied
- * and a newline '\n' is added. It is truncated if the buffer is
- * not large enough.
- *
- * Returns 0 on success.
- */
-int proc_dostring(ctl_table *table, int write, struct file *filp,
-                 void __user *buffer, size_t *lenp, loff_t *ppos)
+static int _proc_do_string(void* data, int maxlen, int write,
+                          struct file *filp, void __user *buffer,
+                          size_t *lenp, loff_t *ppos)
 {
        size_t len;
        char __user *p;
        char c;
        
-       if (!table->data || !table->maxlen || !*lenp ||
+       if (!data || !maxlen || !*lenp ||
            (*ppos && !write)) {
                *lenp = 0;
                return 0;
@@ -1665,20 +1694,20 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
                                break;
                        len++;
                }
-               if (len >= table->maxlen)
-                       len = table->maxlen-1;
-               if(copy_from_user(table->data, buffer, len))
+               if (len >= maxlen)
+                       len = maxlen-1;
+               if(copy_from_user(data, buffer, len))
                        return -EFAULT;
-               ((char *) table->data)[len] = 0;
+               ((char *) data)[len] = 0;
                *ppos += *lenp;
        } else {
-               len = strlen(table->data);
-               if (len > table->maxlen)
-                       len = table->maxlen;
+               len = strlen(data);
+               if (len > maxlen)
+                       len = maxlen;
                if (len > *lenp)
                        len = *lenp;
                if (len)
-                       if(copy_to_user(buffer, table->data, len))
+                       if(copy_to_user(buffer, data, len))
                                return -EFAULT;
                if (len < *lenp) {
                        if(put_user('\n', ((char __user *) buffer) + len))
@@ -1691,12 +1720,38 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
        return 0;
 }
 
+/**
+ * proc_dostring - read a string sysctl
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @filp: the file structure
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes a string from/to the user buffer. If the kernel
+ * buffer provided is not large enough to hold the string, the
+ * string is truncated. The copied string is %NULL-terminated.
+ * If the string is being read by the user process, it is copied
+ * and a newline '\n' is added. It is truncated if the buffer is
+ * not large enough.
+ *
+ * Returns 0 on success.
+ */
+int proc_dostring(ctl_table *table, int write, struct file *filp,
+                 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       return _proc_do_string(table->data, table->maxlen, write, filp,
+                              buffer, lenp, ppos);
+}
+
 /*
  *     Special case of dostring for the UTS structure. This has locks
  *     to observe. Should this be in kernel/sys.c ????
  */
  
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
+#ifndef CONFIG_UTS_NS
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int r;
@@ -1712,6 +1767,48 @@ static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
        }
        return r;
 }
+#else /* !CONFIG_UTS_NS */
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+                 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int r;
+       struct uts_namespace* uts_ns = current->nsproxy->uts_ns;
+       char* which;
+
+       switch (table->ctl_name) {
+       case KERN_OSTYPE:
+               which = uts_ns->name.sysname;
+               break;
+       case KERN_NODENAME:
+               which = uts_ns->name.nodename;
+               break;
+       case KERN_OSRELEASE:
+               which = uts_ns->name.release;
+               break;
+       case KERN_VERSION:
+               which = uts_ns->name.version;
+               break;
+       case KERN_DOMAINNAME:
+               which = uts_ns->name.domainname;
+               break;
+       default:
+               r = -EINVAL;
+               goto out;
+       }
+
+       if (!write) {
+               down_read(&uts_sem);
+               r=_proc_do_string(which,table->maxlen,0,filp,buffer,lenp, ppos);
+               up_read(&uts_sem);
+       } else {
+               down_write(&uts_sem);
+               r=_proc_do_string(which,table->maxlen,1,filp,buffer,lenp, ppos);
+               up_write(&uts_sem);
+       }
+ out:
+       return r;
+}
+#endif /* !CONFIG_UTS_NS */
 
 static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
                                 int *valp,
@@ -1732,8 +1829,9 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
        return 0;
 }
 
-static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
-                 void __user *buffer, size_t *lenp, loff_t *ppos,
+static int __do_proc_dointvec(void *tbl_data, ctl_table *table,
+                 int write, struct file *filp, void __user *buffer,
+                 size_t *lenp, loff_t *ppos,
                  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
                              int write, void *data),
                  void *data)
@@ -1746,13 +1844,13 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
        char buf[TMPBUFLEN], *p;
        char __user *s = buffer;
        
-       if (!table->data || !table->maxlen || !*lenp ||
+       if (!tbl_data || !table->maxlen || !*lenp ||
            (*ppos && !write)) {
                *lenp = 0;
                return 0;
        }
        
-       i = (int *) table->data;
+       i = (int *) tbl_data;
        vleft = table->maxlen / sizeof(*i);
        left = *lenp;
 
@@ -1841,6 +1939,16 @@ static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
 #undef TMPBUFLEN
 }
 
+static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
+                 void __user *buffer, size_t *lenp, loff_t *ppos,
+                 int (*conv)(int *negp, unsigned long *lvalp, int *valp,
+                             int write, void *data),
+                 void *data)
+{
+       return __do_proc_dointvec(table->data, table, write, filp,
+                       buffer, lenp, ppos, conv, data);
+}
+
 /**
  * proc_dointvec - read a vector of integers
  * @table: the sysctl table
@@ -1974,7 +2082,7 @@ int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
                                do_proc_dointvec_minmax_conv, &param);
 }
 
-static int do_proc_doulongvec_minmax(ctl_table *table, int write,
+static int __do_proc_doulongvec_minmax(void *data, ctl_table *table, int write,
                                     struct file *filp,
                                     void __user *buffer,
                                     size_t *lenp, loff_t *ppos,
@@ -1988,13 +2096,13 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write,
        char buf[TMPBUFLEN], *p;
        char __user *s = buffer;
        
-       if (!table->data || !table->maxlen || !*lenp ||
+       if (!data || !table->maxlen || !*lenp ||
            (*ppos && !write)) {
                *lenp = 0;
                return 0;
        }
        
-       i = (unsigned long *) table->data;
+       i = (unsigned long *) data;
        min = (unsigned long *) table->extra1;
        max = (unsigned long *) table->extra2;
        vleft = table->maxlen / sizeof(unsigned long);
@@ -2079,6 +2187,17 @@ static int do_proc_doulongvec_minmax(ctl_table *table, int write,
 #undef TMPBUFLEN
 }
 
+static int do_proc_doulongvec_minmax(ctl_table *table, int write,
+                                    struct file *filp,
+                                    void __user *buffer,
+                                    size_t *lenp, loff_t *ppos,
+                                    unsigned long convmul,
+                                    unsigned long convdiv)
+{
+       return __do_proc_doulongvec_minmax(table->data, table, write,
+                       filp, buffer, lenp, ppos, convmul, convdiv);
+}
+
 /**
  * proc_doulongvec_minmax - read a vector of long integers with min/max values
  * @table: the sysctl table
@@ -2267,6 +2386,71 @@ int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp,
                                do_proc_dointvec_ms_jiffies_conv, NULL);
 }
 
+#ifdef CONFIG_SYSVIPC
+static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       void *data;
+       struct ipc_namespace *ns;
+
+       ns = current->nsproxy->ipc_ns;
+
+       switch (table->ctl_name) {
+       case KERN_SHMMAX:
+               data = &ns->shm_ctlmax;
+               goto proc_minmax;
+       case KERN_SHMALL:
+               data = &ns->shm_ctlall;
+               goto proc_minmax;
+       case KERN_SHMMNI:
+               data = &ns->shm_ctlmni;
+               break;
+       case KERN_MSGMAX:
+               data = &ns->msg_ctlmax;
+               break;
+       case KERN_MSGMNI:
+               data = &ns->msg_ctlmni;
+               break;
+       case KERN_MSGMNB:
+               data = &ns->msg_ctlmnb;
+               break;
+       case KERN_SEM:
+               data = &ns->sem_ctls;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return __do_proc_dointvec(data, table, write, filp, buffer,
+                       lenp, ppos, NULL, NULL);
+proc_minmax:
+       return __do_proc_doulongvec_minmax(data, table, write, filp, buffer,
+                       lenp, ppos, 1l, 1l);
+}
+#endif
+
+static int proc_do_cad_pid(ctl_table *table, int write, struct file *filp,
+                          void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       struct pid *new_pid;
+       pid_t tmp;
+       int r;
+
+       tmp = pid_nr(cad_pid);
+
+       r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
+                              lenp, ppos, NULL, NULL);
+       if (r || !write)
+               return r;
+
+       new_pid = find_get_pid(tmp);
+       if (!new_pid)
+               return -ESRCH;
+
+       put_pid(xchg(&cad_pid, new_pid));
+       return 0;
+}
+
 #else /* CONFIG_PROC_FS */
 
 int proc_dostring(ctl_table *table, int write, struct file *filp,
@@ -2275,12 +2459,20 @@ int proc_dostring(ctl_table *table, int write, struct file *filp,
        return -ENOSYS;
 }
 
-static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
-                           void __user *buffer, size_t *lenp, loff_t *ppos)
+static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        return -ENOSYS;
 }
 
+#ifdef CONFIG_SYSVIPC
+static int proc_do_ipc_string(ctl_table *table, int write, struct file *filp,
+               void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       return -ENOSYS;
+}
+#endif
+
 int proc_dointvec(ctl_table *table, int write, struct file *filp,
                  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
diff --git a/kernel/utsname.c b/kernel/utsname.c
new file mode 100644 (file)
index 0000000..c859164
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ *  Copyright (C) 2004 IBM Corporation
+ *
+ *  Author: Serge Hallyn <serue@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License as
+ *  published by the Free Software Foundation, version 2 of the
+ *  License.
+ */
+
+#include <linux/module.h>
+#include <linux/uts.h>
+#include <linux/utsname.h>
+#include <linux/version.h>
+
+/*
+ * Clone a new ns copying an original utsname, setting refcount to 1
+ * @old_ns: namespace to clone
+ * Return NULL on error (failure to kmalloc), new ns otherwise
+ */
+static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
+{
+       struct uts_namespace *ns;
+
+       ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+       if (ns) {
+               memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+               kref_init(&ns->kref);
+       }
+       return ns;
+}
+
+/*
+ * unshare the current process' utsname namespace.
+ * called only in sys_unshare()
+ */
+int unshare_utsname(unsigned long unshare_flags, struct uts_namespace **new_uts)
+{
+       if (unshare_flags & CLONE_NEWUTS) {
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+
+               *new_uts = clone_uts_ns(current->nsproxy->uts_ns);
+               if (!*new_uts)
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+
+/*
+ * Copy task tsk's utsname namespace, or clone it if flags
+ * specifies CLONE_NEWUTS.  In latter case, changes to the
+ * utsname of this process won't be seen by parent, and vice
+ * versa.
+ */
+int copy_utsname(int flags, struct task_struct *tsk)
+{
+       struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
+       struct uts_namespace *new_ns;
+       int err = 0;
+
+       if (!old_ns)
+               return 0;
+
+       get_uts_ns(old_ns);
+
+       if (!(flags & CLONE_NEWUTS))
+               return 0;
+
+       if (!capable(CAP_SYS_ADMIN)) {
+               err = -EPERM;
+               goto out;
+       }
+
+       new_ns = clone_uts_ns(old_ns);
+       if (!new_ns) {
+               err = -ENOMEM;
+               goto out;
+       }
+       tsk->nsproxy->uts_ns = new_ns;
+
+out:
+       put_uts_ns(old_ns);
+       return err;
+}
+
+void free_uts_ns(struct kref *kref)
+{
+       struct uts_namespace *ns;
+
+       ns = container_of(kref, struct uts_namespace, kref);
+       kfree(ns);
+}
index f9ae75c..756a908 100644 (file)
@@ -384,3 +384,17 @@ config RCU_TORTURE_TEST
          at boot time (you probably don't).
          Say M if you want the RCU torture tests to build as a module.
          Say N if you are unsure.
+
+config LKDTM
+       tristate "Linux Kernel Dump Test Tool Module"
+       depends on KPROBES
+       default n
+       help
+       This module enables testing of the different dumping mechanisms by
+       inducing system failures at predefined crash points.
+       If you don't need it: say N
+       Choose M here to compile this code as a module. The module will be
+       called lkdtm.
+
+       Documentation on how to use the module can be found in
+       drivers/misc/lkdtm.c
index ddf3e67..b036175 100644 (file)
@@ -2,7 +2,7 @@
 # Makefile for some libs needed in the kernel.
 #
 
-lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
+lib-y := ctype.o string.o vsprintf.o cmdline.o \
         bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
         idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
         sha1.o
index 3a67dc5..7a2a73f 100644 (file)
@@ -43,3 +43,19 @@ int __any_online_cpu(const cpumask_t *mask)
        return cpu;
 }
 EXPORT_SYMBOL(__any_online_cpu);
+
+#if MAX_NUMNODES > 1
+/*
+ * Find the highest possible node id.
+ */
+int highest_possible_node_id(void)
+{
+       unsigned int node;
+       unsigned int highest = 0;
+
+       for_each_node_mask(node, node_possible_map)
+               highest = node;
+       return highest;
+}
+EXPORT_SYMBOL(highest_possible_node_id);
+#endif
diff --git a/lib/errno.c b/lib/errno.c
deleted file mode 100644 (file)
index 41cb9d7..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-/*
- *  linux/lib/errno.c
- *
- *  Copyright (C) 1991, 1992  Linus Torvalds
- */
-
-int errno;
index 71338b4..75ae68c 100644 (file)
 #include <linux/genalloc.h>
 
 
-/*
- * Create a new special memory pool.
- *
+/**
+ * gen_pool_create - create a new special memory pool
  * @min_alloc_order: log base 2 of number of bytes each bitmap bit represents
  * @nid: node id of the node the pool structure should be allocated on, or -1
+ *
+ * Create a new special memory pool that can be used to manage special purpose
+ * memory not managed by the regular kmalloc/kfree interface.
  */
 struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
 {
@@ -34,15 +36,15 @@ struct gen_pool *gen_pool_create(int min_alloc_order, int nid)
 }
 EXPORT_SYMBOL(gen_pool_create);
 
-
-/*
- * Add a new chunk of memory to the specified pool.
- *
+/**
+ * gen_pool_add - add a new chunk of special memory to the pool
  * @pool: pool to add new memory chunk to
  * @addr: starting address of memory chunk to add to pool
  * @size: size in bytes of the memory chunk to add to pool
  * @nid: node id of the node the chunk structure and bitmap should be
  *       allocated on, or -1
+ *
+ * Add a new chunk of special memory to the specified pool.
  */
 int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
                 int nid)
@@ -69,13 +71,44 @@ int gen_pool_add(struct gen_pool *pool, unsigned long addr, size_t size,
 }
 EXPORT_SYMBOL(gen_pool_add);
 
-
-/*
- * Allocate the requested number of bytes from the specified pool.
- * Uses a first-fit algorithm.
+/**
+ * gen_pool_destroy - destroy a special memory pool
+ * @pool: pool to destroy
  *
+ * Destroy the specified special memory pool. Verifies that there are no
+ * outstanding allocations.
+ */
+void gen_pool_destroy(struct gen_pool *pool)
+{
+       struct list_head *_chunk, *_next_chunk;
+       struct gen_pool_chunk *chunk;
+       int order = pool->min_alloc_order;
+       int bit, end_bit;
+
+
+       write_lock(&pool->lock);
+       list_for_each_safe(_chunk, _next_chunk, &pool->chunks) {
+               chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
+               list_del(&chunk->next_chunk);
+
+               end_bit = (chunk->end_addr - chunk->start_addr) >> order;
+               bit = find_next_bit(chunk->bits, end_bit, 0);
+               BUG_ON(bit < end_bit);
+
+               kfree(chunk);
+       }
+       kfree(pool);
+       return;
+}
+EXPORT_SYMBOL(gen_pool_destroy);
+
+/**
+ * gen_pool_alloc - allocate special memory from the pool
  * @pool: pool to allocate from
  * @size: number of bytes to allocate from the pool
+ *
+ * Allocate the requested number of bytes from the specified pool.
+ * Uses a first-fit algorithm.
  */
 unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
 {
@@ -127,13 +160,13 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
 }
 EXPORT_SYMBOL(gen_pool_alloc);
 
-
-/*
- * Free the specified memory back to the specified pool.
- *
+/**
+ * gen_pool_free - free allocated special memory back to the pool
  * @pool: pool to free to
  * @addr: starting address of memory to free back to pool
  * @size: size in bytes of memory to free
+ *
+ * Free previously allocated special memory back to the specified pool.
  */
 void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
 {
index 26f3227..1958ad1 100644 (file)
@@ -1011,7 +1011,7 @@ static int rfcomm_tty_tiocmset(struct tty_struct *tty, struct file *filp, unsign
 
 /* ---- TTY structure ---- */
 
-static struct tty_operations rfcomm_ops = {
+static const struct tty_operations rfcomm_ops = {
        .open                   = rfcomm_tty_open,
        .close                  = rfcomm_tty_close,
        .write                  = rfcomm_tty_write,
index 1fbb384..f8ce847 100644 (file)
@@ -366,7 +366,7 @@ static int __init ic_defaults(void)
         */
         
        if (!ic_host_name_set)
-               sprintf(system_utsname.nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr));
+               sprintf(init_utsname()->nodename, "%u.%u.%u.%u", NIPQUAD(ic_myaddr));
 
        if (root_server_addr == INADDR_NONE)
                root_server_addr = ic_servaddr;
@@ -805,7 +805,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
                        }
                        break;
                case 12:        /* Host name */
-                       ic_bootp_string(system_utsname.nodename, ext+1, *ext, __NEW_UTS_LEN);
+                       ic_bootp_string(utsname()->nodename, ext+1, *ext, __NEW_UTS_LEN);
                        ic_host_name_set = 1;
                        break;
                case 15:        /* Domain name (DNS) */
@@ -816,7 +816,7 @@ static void __init ic_do_bootp_ext(u8 *ext)
                                ic_bootp_string(root_server_path, ext+1, *ext, sizeof(root_server_path));
                        break;
                case 40:        /* NIS Domain name (_not_ DNS) */
-                       ic_bootp_string(system_utsname.domainname, ext+1, *ext, __NEW_UTS_LEN);
+                       ic_bootp_string(utsname()->domainname, ext+1, *ext, __NEW_UTS_LEN);
                        break;
        }
 }
@@ -1368,7 +1368,7 @@ static int __init ip_auto_config(void)
        printk(", mask=%u.%u.%u.%u", NIPQUAD(ic_netmask));
        printk(", gw=%u.%u.%u.%u", NIPQUAD(ic_gateway));
        printk(",\n     host=%s, domain=%s, nis-domain=%s",
-              system_utsname.nodename, ic_domain, system_utsname.domainname);
+              utsname()->nodename, ic_domain, utsname()->domainname);
        printk(",\n     bootserver=%u.%u.%u.%u", NIPQUAD(ic_servaddr));
        printk(", rootserver=%u.%u.%u.%u", NIPQUAD(root_server_addr));
        printk(", rootpath=%s", root_server_path);
@@ -1478,11 +1478,11 @@ static int __init ip_auto_config_setup(char *addrs)
                        case 4:
                                if ((dp = strchr(ip, '.'))) {
                                        *dp++ = '\0';
-                                       strlcpy(system_utsname.domainname, dp,
-                                               sizeof(system_utsname.domainname));
+                                       strlcpy(utsname()->domainname, dp,
+                                               sizeof(utsname()->domainname));
                                }
-                               strlcpy(system_utsname.nodename, ip,
-                                       sizeof(system_utsname.nodename));
+                               strlcpy(utsname()->nodename, ip,
+                                       sizeof(utsname()->nodename));
                                ic_host_name_set = 1;
                                break;
                        case 5:
index dab37d2..4be336f 100644 (file)
@@ -99,8 +99,10 @@ static int jtcp_sendmsg(struct kiocb *iocb, struct sock *sk,
 }
 
 static struct jprobe tcp_send_probe = {
-       .kp = { .addr = (kprobe_opcode_t *) &tcp_sendmsg, },
-       .entry = (kprobe_opcode_t *) &jtcp_sendmsg,
+       .kp = {
+               .symbol_name    = "tcp_sendmsg",
+       },
+       .entry  = JPROBE_ENTRY(jtcp_sendmsg),
 };
 
 
index 3bcdb46..d50a020 100644 (file)
@@ -79,7 +79,7 @@ static struct tty_driver *driver;
 
 hashbin_t *ircomm_tty = NULL;
 
-static struct tty_operations ops = {
+static const struct tty_operations ops = {
        .open            = ircomm_tty_open,
        .close           = ircomm_tty_close,
        .write           = ircomm_tty_write,
index 01918f7..6c9b9b3 100644 (file)
@@ -825,7 +825,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
                        break;
                case FIOGETOWN:
                case SIOCGPGRP:
-                       err = put_user(sock->file->f_owner.pid,
+                       err = put_user(f_getown(sock->file),
                                       (int __user *)argp);
                        break;
                case SIOCGIFBR:
index 124ff0c..78696f2 100644 (file)
@@ -161,10 +161,10 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
        }
 
        /* save the nodename */
-       clnt->cl_nodelen = strlen(system_utsname.nodename);
+       clnt->cl_nodelen = strlen(utsname()->nodename);
        if (clnt->cl_nodelen > UNX_MAXNODENAME)
                clnt->cl_nodelen = UNX_MAXNODENAME;
-       memcpy(clnt->cl_nodename, system_utsname.nodename, clnt->cl_nodelen);
+       memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen);
        return clnt;
 
 out_no_auth:
index 26c0531..192dff5 100644 (file)
@@ -70,6 +70,8 @@ EXPORT_SYMBOL(put_rpccred);
 /* RPC server stuff */
 EXPORT_SYMBOL(svc_create);
 EXPORT_SYMBOL(svc_create_thread);
+EXPORT_SYMBOL(svc_create_pooled);
+EXPORT_SYMBOL(svc_set_num_threads);
 EXPORT_SYMBOL(svc_exit_thread);
 EXPORT_SYMBOL(svc_destroy);
 EXPORT_SYMBOL(svc_drop);
index 44b8d9d..a99e67b 100644 (file)
@@ -4,6 +4,10 @@
  * High-level RPC service routines
  *
  * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ *
+ * Multiple threads pools and NUMAisation
+ * Copyright (c) 2006 Silicon Graphics, Inc.
+ * by Greg Banks <gnb@melbourne.sgi.com>
  */
 
 #include <linux/linkage.h>
@@ -12,6 +16,8 @@
 #include <linux/net.h>
 #include <linux/in.h>
 #include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
 
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/xdr.h>
 #define RPCDBG_FACILITY        RPCDBG_SVCDSP
 #define RPC_PARANOIA 1
 
+/*
+ * Mode for mapping cpus to pools.
+ */
+enum {
+       SVC_POOL_NONE = -1,     /* uninitialised, choose one of the others */
+       SVC_POOL_GLOBAL,        /* no mapping, just a single global pool
+                                * (legacy & UP mode) */
+       SVC_POOL_PERCPU,        /* one pool per cpu */
+       SVC_POOL_PERNODE        /* one pool per numa node */
+};
+
+/*
+ * Structure for mapping cpus to pools and vice versa.
+ * Setup once during sunrpc initialisation.
+ */
+static struct svc_pool_map {
+       int mode;                       /* Note: int not enum to avoid
+                                        * warnings about "enumeration value
+                                        * not handled in switch" */
+       unsigned int npools;
+       unsigned int *pool_to;          /* maps pool id to cpu or node */
+       unsigned int *to_pool;          /* maps cpu or node to pool id */
+} svc_pool_map = {
+       .mode = SVC_POOL_NONE
+};
+
+
+/*
+ * Detect best pool mapping mode heuristically,
+ * according to the machine's topology.
+ */
+static int
+svc_pool_map_choose_mode(void)
+{
+       unsigned int node;
+
+       if (num_online_nodes() > 1) {
+               /*
+                * Actually have multiple NUMA nodes,
+                * so split pools on NUMA node boundaries
+                */
+               return SVC_POOL_PERNODE;
+       }
+
+       node = any_online_node(node_online_map);
+       if (nr_cpus_node(node) > 2) {
+               /*
+                * Non-trivial SMP, or CONFIG_NUMA on
+                * non-NUMA hardware, e.g. with a generic
+                * x86_64 kernel on Xeons.  In this case we
+                * want to divide the pools on cpu boundaries.
+                */
+               return SVC_POOL_PERCPU;
+       }
+
+       /* default: one global pool */
+       return SVC_POOL_GLOBAL;
+}
+
+/*
+ * Allocate the to_pool[] and pool_to[] arrays.
+ * Returns 0 on success or an errno.
+ */
+static int
+svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
+{
+       m->to_pool = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
+       if (!m->to_pool)
+               goto fail;
+       m->pool_to = kcalloc(maxpools, sizeof(unsigned int), GFP_KERNEL);
+       if (!m->pool_to)
+               goto fail_free;
+
+       return 0;
+
+fail_free:
+       kfree(m->to_pool);
+fail:
+       return -ENOMEM;
+}
+
+/*
+ * Initialise the pool map for SVC_POOL_PERCPU mode.
+ * Returns number of pools or <0 on error.
+ */
+static int
+svc_pool_map_init_percpu(struct svc_pool_map *m)
+{
+       unsigned int maxpools = highest_possible_processor_id()+1;
+       unsigned int pidx = 0;
+       unsigned int cpu;
+       int err;
+
+       err = svc_pool_map_alloc_arrays(m, maxpools);
+       if (err)
+               return err;
+
+       for_each_online_cpu(cpu) {
+               BUG_ON(pidx > maxpools);
+               m->to_pool[cpu] = pidx;
+               m->pool_to[pidx] = cpu;
+               pidx++;
+       }
+       /* cpus brought online later all get mapped to pool0, sorry */
+
+       return pidx;
+};
+
+
+/*
+ * Initialise the pool map for SVC_POOL_PERNODE mode.
+ * Returns number of pools or <0 on error.
+ */
+static int
+svc_pool_map_init_pernode(struct svc_pool_map *m)
+{
+       unsigned int maxpools = highest_possible_node_id()+1;
+       unsigned int pidx = 0;
+       unsigned int node;
+       int err;
+
+       err = svc_pool_map_alloc_arrays(m, maxpools);
+       if (err)
+               return err;
+
+       for_each_node_with_cpus(node) {
+               /* some architectures (e.g. SN2) have cpuless nodes */
+               BUG_ON(pidx > maxpools);
+               m->to_pool[node] = pidx;
+               m->pool_to[pidx] = node;
+               pidx++;
+       }
+       /* nodes brought online later all get mapped to pool0, sorry */
+
+       return pidx;
+}
+
+
+/*
+ * Build the global map of cpus to pools and vice versa.
+ */
+static unsigned int
+svc_pool_map_init(void)
+{
+       struct svc_pool_map *m = &svc_pool_map;
+       int npools = -1;
+
+       if (m->mode != SVC_POOL_NONE)
+               return m->npools;
+
+       m->mode = svc_pool_map_choose_mode();
+
+       switch (m->mode) {
+       case SVC_POOL_PERCPU:
+               npools = svc_pool_map_init_percpu(m);
+               break;
+       case SVC_POOL_PERNODE:
+               npools = svc_pool_map_init_pernode(m);
+               break;
+       }
+
+       if (npools < 0) {
+               /* default, or memory allocation failure */
+               npools = 1;
+               m->mode = SVC_POOL_GLOBAL;
+       }
+       m->npools = npools;
+
+       return m->npools;
+}
+
+/*
+ * Set the current thread's cpus_allowed mask so that it
+ * will only run on cpus in the given pool.
+ *
+ * Returns 1 and fills in oldmask iff a cpumask was applied.
+ */
+static inline int
+svc_pool_map_set_cpumask(unsigned int pidx, cpumask_t *oldmask)
+{
+       struct svc_pool_map *m = &svc_pool_map;
+       unsigned int node; /* or cpu */
+
+       /*
+        * The caller checks for sv_nrpools > 1, which
+        * implies that we've been initialized and the
+        * map mode is not NONE.
+        */
+       BUG_ON(m->mode == SVC_POOL_NONE);
+
+       switch (m->mode)
+       {
+       default:
+               return 0;
+       case SVC_POOL_PERCPU:
+               node = m->pool_to[pidx];
+               *oldmask = current->cpus_allowed;
+               set_cpus_allowed(current, cpumask_of_cpu(node));
+               return 1;
+       case SVC_POOL_PERNODE:
+               node = m->pool_to[pidx];
+               *oldmask = current->cpus_allowed;
+               set_cpus_allowed(current, node_to_cpumask(node));
+               return 1;
+       }
+}
+
+/*
+ * Use the mapping mode to choose a pool for a given CPU.
+ * Used when enqueueing an incoming RPC.  Always returns
+ * a non-NULL pool pointer.
+ */
+struct svc_pool *
+svc_pool_for_cpu(struct svc_serv *serv, int cpu)
+{
+       struct svc_pool_map *m = &svc_pool_map;
+       unsigned int pidx = 0;
+
+       /*
+        * SVC_POOL_NONE happens in a pure client when
+        * lockd is brought up, so silently treat it the
+        * same as SVC_POOL_GLOBAL.
+        */
+
+       switch (m->mode) {
+       case SVC_POOL_PERCPU:
+               pidx = m->to_pool[cpu];
+               break;
+       case SVC_POOL_PERNODE:
+               pidx = m->to_pool[cpu_to_node(cpu)];
+               break;
+       }
+       return &serv->sv_pools[pidx % serv->sv_nrpools];
+}
+
+
 /*
  * Create an RPC service
  */
-struct svc_serv *
-svc_create(struct svc_program *prog, unsigned int bufsize)
+static struct svc_serv *
+__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
+          void (*shutdown)(struct svc_serv *serv))
 {
        struct svc_serv *serv;
        int vers;
        unsigned int xdrsize;
+       unsigned int i;
 
        if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
                return NULL;
@@ -39,6 +283,7 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
        serv->sv_nrthreads = 1;
        serv->sv_stats     = prog->pg_stats;
        serv->sv_bufsz     = bufsize? bufsize : 4096;
+       serv->sv_shutdown  = shutdown;
        xdrsize = 0;
        while (prog) {
                prog->pg_lovers = prog->pg_nvers-1;
@@ -53,20 +298,68 @@ svc_create(struct svc_program *prog, unsigned int bufsize)
                prog = prog->pg_next;
        }
        serv->sv_xdrsize   = xdrsize;
-       INIT_LIST_HEAD(&serv->sv_threads);
-       INIT_LIST_HEAD(&serv->sv_sockets);
        INIT_LIST_HEAD(&serv->sv_tempsocks);
        INIT_LIST_HEAD(&serv->sv_permsocks);
+       init_timer(&serv->sv_temptimer);
        spin_lock_init(&serv->sv_lock);
 
+       serv->sv_nrpools = npools;
+       serv->sv_pools =
+               kcalloc(sizeof(struct svc_pool), serv->sv_nrpools,
+                       GFP_KERNEL);
+       if (!serv->sv_pools) {
+               kfree(serv);
+               return NULL;
+       }
+
+       for (i = 0; i < serv->sv_nrpools; i++) {
+               struct svc_pool *pool = &serv->sv_pools[i];
+
+               dprintk("initialising pool %u for %s\n",
+                               i, serv->sv_name);
+
+               pool->sp_id = i;
+               INIT_LIST_HEAD(&pool->sp_threads);
+               INIT_LIST_HEAD(&pool->sp_sockets);
+               INIT_LIST_HEAD(&pool->sp_all_threads);
+               spin_lock_init(&pool->sp_lock);
+       }
+
+
        /* Remove any stale portmap registrations */
        svc_register(serv, 0, 0);
 
        return serv;
 }
 
+struct svc_serv *
+svc_create(struct svc_program *prog, unsigned int bufsize,
+               void (*shutdown)(struct svc_serv *serv))
+{
+       return __svc_create(prog, bufsize, /*npools*/1, shutdown);
+}
+
+struct svc_serv *
+svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
+               void (*shutdown)(struct svc_serv *serv),
+                 svc_thread_fn func, int sig, struct module *mod)
+{
+       struct svc_serv *serv;
+       unsigned int npools = svc_pool_map_init();
+
+       serv = __svc_create(prog, bufsize, npools, shutdown);
+
+       if (serv != NULL) {
+               serv->sv_function = func;
+               serv->sv_kill_signal = sig;
+               serv->sv_module = mod;
+       }
+
+       return serv;
+}
+
 /*
- * Destroy an RPC service
+ * Destroy an RPC service.  Should be called with the BKL held
  */
 void
 svc_destroy(struct svc_serv *serv)
@@ -85,12 +378,17 @@ svc_destroy(struct svc_serv *serv)
        } else
                printk("svc_destroy: no threads for serv=%p!\n", serv);
 
+       del_timer_sync(&serv->sv_temptimer);
+
        while (!list_empty(&serv->sv_tempsocks)) {
                svsk = list_entry(serv->sv_tempsocks.next,
                                  struct svc_sock,
                                  sk_list);
                svc_delete_socket(svsk);
        }
+       if (serv->sv_shutdown)
+               serv->sv_shutdown(serv);
+
        while (!list_empty(&serv->sv_permsocks)) {
                svsk = list_entry(serv->sv_permsocks.next,
                                  struct svc_sock,
@@ -102,6 +400,7 @@ svc_destroy(struct svc_serv *serv)
 
        /* Unregister service with the portmapper */
        svc_register(serv, 0, 0);
+       kfree(serv->sv_pools);
        kfree(serv);
 }
 
@@ -150,13 +449,18 @@ svc_release_buffer(struct svc_rqst *rqstp)
 }
 
 /*
- * Create a server thread
+ * Create a thread in the given pool.  Caller must hold BKL.
+ * On a NUMA or SMP machine, with a multi-pool serv, the thread
+ * will be restricted to run on the cpus belonging to the pool.
  */
-int
-svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
+static int
+__svc_create_thread(svc_thread_fn func, struct svc_serv *serv,
+                   struct svc_pool *pool)
 {
        struct svc_rqst *rqstp;
        int             error = -ENOMEM;
+       int             have_oldmask = 0;
+       cpumask_t       oldmask;
 
        rqstp = kzalloc(sizeof(*rqstp), GFP_KERNEL);
        if (!rqstp)
@@ -170,8 +474,21 @@ svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
                goto out_thread;
 
        serv->sv_nrthreads++;
+       spin_lock_bh(&pool->sp_lock);
+       pool->sp_nrthreads++;
+       list_add(&rqstp->rq_all, &pool->sp_all_threads);
+       spin_unlock_bh(&pool->sp_lock);
        rqstp->rq_server = serv;
+       rqstp->rq_pool = pool;
+
+       if (serv->sv_nrpools > 1)
+               have_oldmask = svc_pool_map_set_cpumask(pool->sp_id, &oldmask);
+
        error = kernel_thread((int (*)(void *)) func, rqstp, 0);
+
+       if (have_oldmask)
+               set_cpus_allowed(current, oldmask);
+
        if (error < 0)
                goto out_thread;
        svc_sock_update_bufs(serv);
@@ -185,17 +502,136 @@ out_thread:
 }
 
 /*
- * Destroy an RPC server thread
+ * Create a thread in the default pool.  Caller must hold BKL.
+ */
+int
+svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
+{
+       return __svc_create_thread(func, serv, &serv->sv_pools[0]);
+}
+
+/*
+ * Choose a pool in which to create a new thread, for svc_set_num_threads
+ */
+static inline struct svc_pool *
+choose_pool(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+{
+       if (pool != NULL)
+               return pool;
+
+       return &serv->sv_pools[(*state)++ % serv->sv_nrpools];
+}
+
+/*
+ * Choose a thread to kill, for svc_set_num_threads
+ */
+static inline struct task_struct *
+choose_victim(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state)
+{
+       unsigned int i;
+       struct task_struct *task = NULL;
+
+       if (pool != NULL) {
+               spin_lock_bh(&pool->sp_lock);
+       } else {
+               /* choose a pool in round-robin fashion */
+               for (i = 0; i < serv->sv_nrpools; i++) {
+                       pool = &serv->sv_pools[--(*state) % serv->sv_nrpools];
+                       spin_lock_bh(&pool->sp_lock);
+                       if (!list_empty(&pool->sp_all_threads))
+                               goto found_pool;
+                       spin_unlock_bh(&pool->sp_lock);
+               }
+               return NULL;
+       }
+
+found_pool:
+       if (!list_empty(&pool->sp_all_threads)) {
+               struct svc_rqst *rqstp;
+
+               /*
+                * Remove from the pool->sp_all_threads list
+                * so we don't try to kill it again.
+                */
+               rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
+               list_del_init(&rqstp->rq_all);
+               task = rqstp->rq_task;
+       }
+       spin_unlock_bh(&pool->sp_lock);
+
+       return task;
+}
+
+/*
+ * Create or destroy enough new threads to make the number
+ * of threads the given number.  If `pool' is non-NULL, applies
+ * only to threads in that pool, otherwise round-robins between
+ * all pools.  Must be called with a svc_get() reference and
+ * the BKL held.
+ *
+ * Destroying threads relies on the service threads filling in
+ * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
+ * has been created using svc_create_pooled().
+ *
+ * Based on code that used to be in nfsd_svc() but tweaked
+ * to be pool-aware.
+ */
+int
+svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+{
+       struct task_struct *victim;
+       int error = 0;
+       unsigned int state = serv->sv_nrthreads-1;
+
+       if (pool == NULL) {
+               /* The -1 assumes caller has done a svc_get() */
+               nrservs -= (serv->sv_nrthreads-1);
+       } else {
+               spin_lock_bh(&pool->sp_lock);
+               nrservs -= pool->sp_nrthreads;
+               spin_unlock_bh(&pool->sp_lock);
+       }
+
+       /* create new threads */
+       while (nrservs > 0) {
+               nrservs--;
+               __module_get(serv->sv_module);
+               error = __svc_create_thread(serv->sv_function, serv,
+                                           choose_pool(serv, pool, &state));
+               if (error < 0) {
+                       module_put(serv->sv_module);
+                       break;
+               }
+       }
+       /* destroy old threads */
+       while (nrservs < 0 &&
+              (victim = choose_victim(serv, pool, &state)) != NULL) {
+               send_sig(serv->sv_kill_signal, victim, 1);
+               nrservs++;
+       }
+
+       return error;
+}
+
+/*
+ * Called from a server thread as it's exiting.  Caller must hold BKL.
  */
 void
 svc_exit_thread(struct svc_rqst *rqstp)
 {
        struct svc_serv *serv = rqstp->rq_server;
+       struct svc_pool *pool = rqstp->rq_pool;
 
        svc_release_buffer(rqstp);
        kfree(rqstp->rq_resp);
        kfree(rqstp->rq_argp);
        kfree(rqstp->rq_auth_data);
+
+       spin_lock_bh(&pool->sp_lock);
+       pool->sp_nrthreads--;
+       list_del(&rqstp->rq_all);
+       spin_unlock_bh(&pool->sp_lock);
+
        kfree(rqstp);
 
        /* Release the server */
@@ -248,13 +684,14 @@ svc_register(struct svc_serv *serv, int proto, unsigned short port)
  * Process the RPC request.
  */
 int
-svc_process(struct svc_serv *serv, struct svc_rqst *rqstp)
+svc_process(struct svc_rqst *rqstp)
 {
        struct svc_program      *progp;
        struct svc_version      *versp = NULL;  /* compiler food */
        struct svc_procedure    *procp = NULL;
        struct kvec *           argv = &rqstp->rq_arg.head[0];
        struct kvec *           resv = &rqstp->rq_res.head[0];
+       struct svc_serv         *serv = rqstp->rq_server;
        kxdrproc_t              xdr;
        __be32                  *statp;
        u32                     dir, prog, vers, proc;
index 1020d54..40d41a2 100644 (file)
@@ -348,12 +348,9 @@ int auth_unix_forget_old(struct auth_domain *dom)
 
 struct auth_domain *auth_unix_lookup(struct in_addr addr)
 {
-       struct ip_map key, *ipm;
+       struct ip_map *ipm;
        struct auth_domain *rv;
 
-       strcpy(key.m_class, "nfsd");
-       key.m_addr = addr;
-
        ipm = ip_map_lookup("nfsd", addr);
 
        if (!ipm)
index 5b0fe1b..cba85d1 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/slab.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/file.h>
 #include <net/sock.h>
 #include <net/checksum.h>
 #include <net/ip.h>
 
 /* SMP locking strategy:
  *
- *     svc_serv->sv_lock protects most stuff for that service.
+ *     svc_pool->sp_lock protects most of the fields of that pool.
+ *     svc_serv->sv_lock protects sv_tempsocks, sv_permsocks, sv_tmpcnt.
+ *     when both need to be taken (rare), svc_serv->sv_lock is first.
+ *     BKL protects svc_serv->sv_nrthread.
+ *     svc_sock->sk_defer_lock protects the svc_sock->sk_deferred list
+ *     svc_sock->sk_flags.SK_BUSY prevents a svc_sock being enqueued multiply.
  *
  *     Some flags can be set to certain values at any time
  *     providing that certain rules are followed:
  *
- *     SK_BUSY  can be set to 0 at any time.  
- *             svc_sock_enqueue must be called afterwards
  *     SK_CONN, SK_DATA, can be set or cleared at any time.
  *             after a set, svc_sock_enqueue must be called.   
  *             after a clear, the socket must be read/accepted
@@ -73,23 +77,30 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
 
+/* apparently the "standard" is that clients close
+ * idle connections after 5 minutes, servers after
+ * 6 minutes
+ *   http://www.connectathon.org/talks96/nfstcp.pdf
+ */
+static int svc_conn_age_period = 6*60;
+
 /*
- * Queue up an idle server thread.  Must have serv->sv_lock held.
+ * Queue up an idle server thread.  Must have pool->sp_lock held.
  * Note: this is really a stack rather than a queue, so that we only
- * use as many different threads as we need, and the rest don't polute
+ * use as many different threads as we need, and the rest don't pollute
  * the cache.
  */
 static inline void
-svc_serv_enqueue(struct svc_serv *serv, struct svc_rqst *rqstp)
+svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp)
 {
-       list_add(&rqstp->rq_list, &serv->sv_threads);
+       list_add(&rqstp->rq_list, &pool->sp_threads);
 }
 
 /*
- * Dequeue an nfsd thread.  Must have serv->sv_lock held.
+ * Dequeue an nfsd thread.  Must have pool->sp_lock held.
  */
 static inline void
-svc_serv_dequeue(struct svc_serv *serv, struct svc_rqst *rqstp)
+svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
 {
        list_del(&rqstp->rq_list);
 }
@@ -140,7 +151,9 @@ static void
 svc_sock_enqueue(struct svc_sock *svsk)
 {
        struct svc_serv *serv = svsk->sk_server;
+       struct svc_pool *pool;
        struct svc_rqst *rqstp;
+       int cpu;
 
        if (!(svsk->sk_flags &
              ( (1<<SK_CONN)|(1<<SK_DATA)|(1<<SK_CLOSE)|(1<<SK_DEFERRED)) ))
@@ -148,10 +161,14 @@ svc_sock_enqueue(struct svc_sock *svsk)
        if (test_bit(SK_DEAD, &svsk->sk_flags))
                return;
 
-       spin_lock_bh(&serv->sv_lock);
+       cpu = get_cpu();
+       pool = svc_pool_for_cpu(svsk->sk_server, cpu);
+       put_cpu();
 
-       if (!list_empty(&serv->sv_threads) && 
-           !list_empty(&serv->sv_sockets))
+       spin_lock_bh(&pool->sp_lock);
+
+       if (!list_empty(&pool->sp_threads) &&
+           !list_empty(&pool->sp_sockets))
                printk(KERN_ERR
                        "svc_sock_enqueue: threads and sockets both waiting??\n");
 
@@ -161,73 +178,79 @@ svc_sock_enqueue(struct svc_sock *svsk)
                goto out_unlock;
        }
 
-       if (test_bit(SK_BUSY, &svsk->sk_flags)) {
-               /* Don't enqueue socket while daemon is receiving */
+       /* Mark socket as busy. It will remain in this state until the
+        * server has processed all pending data and put the socket back
+        * on the idle list.  We update SK_BUSY atomically because
+        * it also guards against trying to enqueue the svc_sock twice.
+        */
+       if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) {
+               /* Don't enqueue socket while already enqueued */
                dprintk("svc: socket %p busy, not enqueued\n", svsk->sk_sk);
                goto out_unlock;
        }
+       BUG_ON(svsk->sk_pool != NULL);
+       svsk->sk_pool = pool;
 
        set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
-       if (((svsk->sk_reserved + serv->sv_bufsz)*2
+       if (((atomic_read(&svsk->sk_reserved) + serv->sv_bufsz)*2
             > svc_sock_wspace(svsk))
            && !test_bit(SK_CLOSE, &svsk->sk_flags)
            && !test_bit(SK_CONN, &svsk->sk_flags)) {
                /* Don't enqueue while not enough space for reply */
                dprintk("svc: socket %p  no space, %d*2 > %ld, not enqueued\n",
-                       svsk->sk_sk, svsk->sk_reserved+serv->sv_bufsz,
+                       svsk->sk_sk, atomic_read(&svsk->sk_reserved)+serv->sv_bufsz,
                        svc_sock_wspace(svsk));
+               svsk->sk_pool = NULL;
+               clear_bit(SK_BUSY, &svsk->sk_flags);
                goto out_unlock;
        }
        clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
 
-       /* Mark socket as busy. It will remain in this state until the
-        * server has processed all pending data and put the socket back
-        * on the idle list.
-        */
-       set_bit(SK_BUSY, &svsk->sk_flags);
 
-       if (!list_empty(&serv->sv_threads)) {
-               rqstp = list_entry(serv->sv_threads.next,
+       if (!list_empty(&pool->sp_threads)) {
+               rqstp = list_entry(pool->sp_threads.next,
                                   struct svc_rqst,
                                   rq_list);
                dprintk("svc: socket %p served by daemon %p\n",
                        svsk->sk_sk, rqstp);
-               svc_serv_dequeue(serv, rqstp);
+               svc_thread_dequeue(pool, rqstp);
                if (rqstp->rq_sock)
                        printk(KERN_ERR 
                                "svc_sock_enqueue: server %p, rq_sock=%p!\n",
                                rqstp, rqstp->rq_sock);
                rqstp->rq_sock = svsk;
-               svsk->sk_inuse++;
+               atomic_inc(&svsk->sk_inuse);
                rqstp->rq_reserved = serv->sv_bufsz;
-               svsk->sk_reserved += rqstp->rq_reserved;
+               atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
+               BUG_ON(svsk->sk_pool != pool);
                wake_up(&rqstp->rq_wait);
        } else {
                dprintk("svc: socket %p put into queue\n", svsk->sk_sk);
-               list_add_tail(&svsk->sk_ready, &serv->sv_sockets);
+               list_add_tail(&svsk->sk_ready, &pool->sp_sockets);
+               BUG_ON(svsk->sk_pool != pool);
        }
 
 out_unlock:
-       spin_unlock_bh(&serv->sv_lock);
+       spin_unlock_bh(&pool->sp_lock);
 }
 
 /*
- * Dequeue the first socket.  Must be called with the serv->sv_lock held.
+ * Dequeue the first socket.  Must be called with the pool->sp_lock held.
  */
 static inline struct svc_sock *
-svc_sock_dequeue(struct svc_serv *serv)
+svc_sock_dequeue(struct svc_pool *pool)
 {
        struct svc_sock *svsk;
 
-       if (list_empty(&serv->sv_sockets))
+       if (list_empty(&pool->sp_sockets))
                return NULL;
 
-       svsk = list_entry(serv->sv_sockets.next,
+       svsk = list_entry(pool->sp_sockets.next,
                          struct svc_sock, sk_ready);
        list_del_init(&svsk->sk_ready);
 
        dprintk("svc: socket %p dequeued, inuse=%d\n",
-               svsk->sk_sk, svsk->sk_inuse);
+               svsk->sk_sk, atomic_read(&svsk->sk_inuse));
 
        return svsk;
 }
@@ -241,6 +264,7 @@ svc_sock_dequeue(struct svc_serv *serv)
 static inline void
 svc_sock_received(struct svc_sock *svsk)
 {
+       svsk->sk_pool = NULL;
        clear_bit(SK_BUSY, &svsk->sk_flags);
        svc_sock_enqueue(svsk);
 }
@@ -262,10 +286,8 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 
        if (space < rqstp->rq_reserved) {
                struct svc_sock *svsk = rqstp->rq_sock;
-               spin_lock_bh(&svsk->sk_server->sv_lock);
-               svsk->sk_reserved -= (rqstp->rq_reserved - space);
+               atomic_sub((rqstp->rq_reserved - space), &svsk->sk_reserved);
                rqstp->rq_reserved = space;
-               spin_unlock_bh(&svsk->sk_server->sv_lock);
 
                svc_sock_enqueue(svsk);
        }
@@ -277,17 +299,11 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
 static inline void
 svc_sock_put(struct svc_sock *svsk)
 {
-       struct svc_serv *serv = svsk->sk_server;
-
-       spin_lock_bh(&serv->sv_lock);
-       if (!--(svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) {
-               spin_unlock_bh(&serv->sv_lock);
+       if (atomic_dec_and_test(&svsk->sk_inuse) && test_bit(SK_DEAD, &svsk->sk_flags)) {
                dprintk("svc: releasing dead socket\n");
                sock_release(svsk->sk_sock);
                kfree(svsk);
        }
-       else
-               spin_unlock_bh(&serv->sv_lock);
 }
 
 static void
@@ -321,25 +337,33 @@ svc_sock_release(struct svc_rqst *rqstp)
 
 /*
  * External function to wake up a server waiting for data
+ * This really only makes sense for services like lockd
+ * which have exactly one thread anyway.
  */
 void
 svc_wake_up(struct svc_serv *serv)
 {
        struct svc_rqst *rqstp;
-
-       spin_lock_bh(&serv->sv_lock);
-       if (!list_empty(&serv->sv_threads)) {
-               rqstp = list_entry(serv->sv_threads.next,
-                                  struct svc_rqst,
-                                  rq_list);
-               dprintk("svc: daemon %p woken up.\n", rqstp);
-               /*
-               svc_serv_dequeue(serv, rqstp);
-               rqstp->rq_sock = NULL;
-                */
-               wake_up(&rqstp->rq_wait);
+       unsigned int i;
+       struct svc_pool *pool;
+
+       for (i = 0; i < serv->sv_nrpools; i++) {
+               pool = &serv->sv_pools[i];
+
+               spin_lock_bh(&pool->sp_lock);
+               if (!list_empty(&pool->sp_threads)) {
+                       rqstp = list_entry(pool->sp_threads.next,
+                                          struct svc_rqst,
+                                          rq_list);
+                       dprintk("svc: daemon %p woken up.\n", rqstp);
+                       /*
+                       svc_thread_dequeue(pool, rqstp);
+                       rqstp->rq_sock = NULL;
+                        */
+                       wake_up(&rqstp->rq_wait);
+               }
+               spin_unlock_bh(&pool->sp_lock);
        }
-       spin_unlock_bh(&serv->sv_lock);
 }
 
 /*
@@ -428,6 +452,51 @@ out:
        return len;
 }
 
+/*
+ * Report socket names for nfsdfs
+ */
+static int one_sock_name(char *buf, struct svc_sock *svsk)
+{
+       int len;
+
+       switch(svsk->sk_sk->sk_family) {
+       case AF_INET:
+               len = sprintf(buf, "ipv4 %s %u.%u.%u.%u %d\n",
+                             svsk->sk_sk->sk_protocol==IPPROTO_UDP?
+                             "udp" : "tcp",
+                             NIPQUAD(inet_sk(svsk->sk_sk)->rcv_saddr),
+                             inet_sk(svsk->sk_sk)->num);
+               break;
+       default:
+               len = sprintf(buf, "*unknown-%d*\n",
+                              svsk->sk_sk->sk_family);
+       }
+       return len;
+}
+
+int
+svc_sock_names(char *buf, struct svc_serv *serv, char *toclose)
+{
+       struct svc_sock *svsk, *closesk = NULL;
+       int len = 0;
+
+       if (!serv)
+               return 0;
+       spin_lock(&serv->sv_lock);
+       list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) {
+               int onelen = one_sock_name(buf+len, svsk);
+               if (toclose && strcmp(toclose, buf+len) == 0)
+                       closesk = svsk;
+               else
+                       len += onelen;
+       }
+       spin_unlock(&serv->sv_lock);
+       if (closesk)
+               svc_delete_socket(closesk);
+       return len;
+}
+EXPORT_SYMBOL(svc_sock_names);
+
 /*
  * Check input queue length
  */
@@ -557,7 +626,10 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
            /* udp sockets need large rcvbuf as all pending
             * requests are still in that buffer.  sndbuf must
             * also be large enough that there is enough space
-            * for one reply per thread.
+            * for one reply per thread.  We count all threads
+            * rather than threads in a particular pool, which
+            * provides an upper bound on the number of threads
+            * which will access the socket.
             */
            svc_sock_setbufsize(svsk->sk_sock,
                                (serv->sv_nrthreads+3) * serv->sv_bufsz,
@@ -844,7 +916,7 @@ svc_tcp_accept(struct svc_sock *svsk)
                                          struct svc_sock,
                                          sk_list);
                        set_bit(SK_CLOSE, &svsk->sk_flags);
-                       svsk->sk_inuse ++;
+                       atomic_inc(&svsk->sk_inuse);
                }
                spin_unlock_bh(&serv->sv_lock);
 
@@ -902,6 +974,11 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp)
                /* sndbuf needs to have room for one request
                 * per thread, otherwise we can stall even when the
                 * network isn't a bottleneck.
+                *
+                * We count all threads rather than threads in a
+                * particular pool, which provides an upper bound
+                * on the number of threads which will access the socket.
+                *
                 * rcvbuf just needs to be able to hold a few requests.
                 * Normally they will be removed from the queue 
                 * as soon a a complete request arrives.
@@ -1117,12 +1194,16 @@ svc_sock_update_bufs(struct svc_serv *serv)
 }
 
 /*
- * Receive the next request on any socket.
+ * Receive the next request on any socket.  This code is carefully
+ * organised not to touch any cachelines in the shared svc_serv
+ * structure, only cachelines in the local svc_pool.
  */
 int
-svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
+svc_recv(struct svc_rqst *rqstp, long timeout)
 {
        struct svc_sock         *svsk =NULL;
+       struct svc_serv         *serv = rqstp->rq_server;
+       struct svc_pool         *pool = rqstp->rq_pool;
        int                     len;
        int                     pages;
        struct xdr_buf          *arg;
@@ -1172,32 +1253,15 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
        if (signalled())
                return -EINTR;
 
-       spin_lock_bh(&serv->sv_lock);
-       if (!list_empty(&serv->sv_tempsocks)) {
-               svsk = list_entry(serv->sv_tempsocks.next,
-                                 struct svc_sock, sk_list);
-               /* apparently the "standard" is that clients close
-                * idle connections after 5 minutes, servers after
-                * 6 minutes
-                *   http://www.connectathon.org/talks96/nfstcp.pdf 
-                */
-               if (get_seconds() - svsk->sk_lastrecv < 6*60
-                   || test_bit(SK_BUSY, &svsk->sk_flags))
-                       svsk = NULL;
-       }
-       if (svsk) {
-               set_bit(SK_BUSY, &svsk->sk_flags);
-               set_bit(SK_CLOSE, &svsk->sk_flags);
-               rqstp->rq_sock = svsk;
-               svsk->sk_inuse++;
-       } else if ((svsk = svc_sock_dequeue(serv)) != NULL) {
+       spin_lock_bh(&pool->sp_lock);
+       if ((svsk = svc_sock_dequeue(pool)) != NULL) {
                rqstp->rq_sock = svsk;
-               svsk->sk_inuse++;
+               atomic_inc(&svsk->sk_inuse);
                rqstp->rq_reserved = serv->sv_bufsz;    
-               svsk->sk_reserved += rqstp->rq_reserved;
+               atomic_add(rqstp->rq_reserved, &svsk->sk_reserved);
        } else {
                /* No data pending. Go to sleep */
-               svc_serv_enqueue(serv, rqstp);
+               svc_thread_enqueue(pool, rqstp);
 
                /*
                 * We have to be able to interrupt this wait
@@ -1205,26 +1269,26 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
                 */
                set_current_state(TASK_INTERRUPTIBLE);
                add_wait_queue(&rqstp->rq_wait, &wait);
-               spin_unlock_bh(&serv->sv_lock);
+               spin_unlock_bh(&pool->sp_lock);
 
                schedule_timeout(timeout);
 
                try_to_freeze();
 
-               spin_lock_bh(&serv->sv_lock);
+               spin_lock_bh(&pool->sp_lock);
                remove_wait_queue(&rqstp->rq_wait, &wait);
 
                if (!(svsk = rqstp->rq_sock)) {
-                       svc_serv_dequeue(serv, rqstp);
-                       spin_unlock_bh(&serv->sv_lock);
+                       svc_thread_dequeue(pool, rqstp);
+                       spin_unlock_bh(&pool->sp_lock);
                        dprintk("svc: server %p, no data yet\n", rqstp);
                        return signalled()? -EINTR : -EAGAIN;
                }
        }
-       spin_unlock_bh(&serv->sv_lock);
+       spin_unlock_bh(&pool->sp_lock);
 
-       dprintk("svc: server %p, socket %p, inuse=%d\n",
-                rqstp, svsk, svsk->sk_inuse);
+       dprintk("svc: server %p, pool %u, socket %p, inuse=%d\n",
+                rqstp, pool->sp_id, svsk, atomic_read(&svsk->sk_inuse));
        len = svsk->sk_recvfrom(rqstp);
        dprintk("svc: got len=%d\n", len);
 
@@ -1235,13 +1299,7 @@ svc_recv(struct svc_serv *serv, struct svc_rqst *rqstp, long timeout)
                return -EAGAIN;
        }
        svsk->sk_lastrecv = get_seconds();
-       if (test_bit(SK_TEMP, &svsk->sk_flags)) {
-               /* push active sockets to end of list */
-               spin_lock_bh(&serv->sv_lock);
-               if (!list_empty(&svsk->sk_list))
-                       list_move_tail(&svsk->sk_list, &serv->sv_tempsocks);
-               spin_unlock_bh(&serv->sv_lock);
-       }
+       clear_bit(SK_OLD, &svsk->sk_flags);
 
        rqstp->rq_secure  = ntohs(rqstp->rq_addr.sin_port) < 1024;
        rqstp->rq_chandle.defer = svc_defer;
@@ -1300,6 +1358,58 @@ svc_send(struct svc_rqst *rqstp)
        return len;
 }
 
+/*
+ * Timer function to close old temporary sockets, using
+ * a mark-and-sweep algorithm.
+ */
+static void
+svc_age_temp_sockets(unsigned long closure)
+{
+       struct svc_serv *serv = (struct svc_serv *)closure;
+       struct svc_sock *svsk;
+       struct list_head *le, *next;
+       LIST_HEAD(to_be_aged);
+
+       dprintk("svc_age_temp_sockets\n");
+
+       if (!spin_trylock_bh(&serv->sv_lock)) {
+               /* busy, try again 1 sec later */
+               dprintk("svc_age_temp_sockets: busy\n");
+               mod_timer(&serv->sv_temptimer, jiffies + HZ);
+               return;
+       }
+
+       list_for_each_safe(le, next, &serv->sv_tempsocks) {
+               svsk = list_entry(le, struct svc_sock, sk_list);
+
+               if (!test_and_set_bit(SK_OLD, &svsk->sk_flags))
+                       continue;
+               if (atomic_read(&svsk->sk_inuse) || test_bit(SK_BUSY, &svsk->sk_flags))
+                       continue;
+               atomic_inc(&svsk->sk_inuse);
+               list_move(le, &to_be_aged);
+               set_bit(SK_CLOSE, &svsk->sk_flags);
+               set_bit(SK_DETACHED, &svsk->sk_flags);
+       }
+       spin_unlock_bh(&serv->sv_lock);
+
+       while (!list_empty(&to_be_aged)) {
+               le = to_be_aged.next;
+               /* fiddling the sk_list node is safe 'cos we're SK_DETACHED */
+               list_del_init(le);
+               svsk = list_entry(le, struct svc_sock, sk_list);
+
+               dprintk("queuing svsk %p for closing, %lu seconds old\n",
+                       svsk, get_seconds() - svsk->sk_lastrecv);
+
+               /* a thread will dequeue and close it soon */
+               svc_sock_enqueue(svsk);
+               svc_sock_put(svsk);
+       }
+
+       mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);
+}
+
 /*
  * Initialize socket for RPC use and create svc_sock struct
  * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
@@ -1337,7 +1447,9 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
        svsk->sk_odata = inet->sk_data_ready;
        svsk->sk_owspace = inet->sk_write_space;
        svsk->sk_server = serv;
+       atomic_set(&svsk->sk_inuse, 0);
        svsk->sk_lastrecv = get_seconds();
+       spin_lock_init(&svsk->sk_defer_lock);
        INIT_LIST_HEAD(&svsk->sk_deferred);
        INIT_LIST_HEAD(&svsk->sk_ready);
        mutex_init(&svsk->sk_mutex);
@@ -1353,6 +1465,13 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
                set_bit(SK_TEMP, &svsk->sk_flags);
                list_add(&svsk->sk_list, &serv->sv_tempsocks);
                serv->sv_tmpcnt++;
+               if (serv->sv_temptimer.function == NULL) {
+                       /* setup timer to age temp sockets */
+                       setup_timer(&serv->sv_temptimer, svc_age_temp_sockets,
+                                       (unsigned long)serv);
+                       mod_timer(&serv->sv_temptimer,
+                                       jiffies + svc_conn_age_period * HZ);
+               }
        } else {
                clear_bit(SK_TEMP, &svsk->sk_flags);
                list_add(&svsk->sk_list, &serv->sv_permsocks);
@@ -1367,6 +1486,38 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock,
        return svsk;
 }
 
+int svc_addsock(struct svc_serv *serv,
+               int fd,
+               char *name_return,
+               int *proto)
+{
+       int err = 0;
+       struct socket *so = sockfd_lookup(fd, &err);
+       struct svc_sock *svsk = NULL;
+
+       if (!so)
+               return err;
+       if (so->sk->sk_family != AF_INET)
+               err =  -EAFNOSUPPORT;
+       else if (so->sk->sk_protocol != IPPROTO_TCP &&
+           so->sk->sk_protocol != IPPROTO_UDP)
+               err =  -EPROTONOSUPPORT;
+       else if (so->state > SS_UNCONNECTED)
+               err = -EISCONN;
+       else {
+               svsk = svc_setup_socket(serv, so, &err, 1);
+               if (svsk)
+                       err = 0;
+       }
+       if (err) {
+               sockfd_put(so);
+               return err;
+       }
+       if (proto) *proto = so->sk->sk_protocol;
+       return one_sock_name(name_return, svsk);
+}
+EXPORT_SYMBOL_GPL(svc_addsock);
+
 /*
  * Create socket for RPC service.
  */
@@ -1434,15 +1585,25 @@ svc_delete_socket(struct svc_sock *svsk)
 
        spin_lock_bh(&serv->sv_lock);
 
-       list_del_init(&svsk->sk_list);
-       list_del_init(&svsk->sk_ready);
+       if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags))
+               list_del_init(&svsk->sk_list);
+       /*
+        * We used to delete the svc_sock from whichever list
+        * it's sk_ready node was on, but we don't actually
+        * need to.  This is because the only time we're called
+        * while still attached to a queue, the queue itself
+        * is about to be destroyed (in svc_destroy).
+        */
        if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))
                if (test_bit(SK_TEMP, &svsk->sk_flags))
                        serv->sv_tmpcnt--;
 
-       if (!svsk->sk_inuse) {
+       if (!atomic_read(&svsk->sk_inuse)) {
                spin_unlock_bh(&serv->sv_lock);
-               sock_release(svsk->sk_sock);
+               if (svsk->sk_sock->file)
+                       sockfd_put(svsk->sk_sock);
+               else
+                       sock_release(svsk->sk_sock);
                kfree(svsk);
        } else {
                spin_unlock_bh(&serv->sv_lock);
@@ -1473,7 +1634,6 @@ svc_makesock(struct svc_serv *serv, int protocol, unsigned short port)
 static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
 {
        struct svc_deferred_req *dr = container_of(dreq, struct svc_deferred_req, handle);
-       struct svc_serv *serv = dreq->owner;
        struct svc_sock *svsk;
 
        if (too_many) {
@@ -1484,9 +1644,9 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
        dprintk("revisit queued\n");
        svsk = dr->svsk;
        dr->svsk = NULL;
-       spin_lock_bh(&serv->sv_lock);
+       spin_lock_bh(&svsk->sk_defer_lock);
        list_add(&dr->handle.recent, &svsk->sk_deferred);
-       spin_unlock_bh(&serv->sv_lock);
+       spin_unlock_bh(&svsk->sk_defer_lock);
        set_bit(SK_DEFERRED, &svsk->sk_flags);
        svc_sock_enqueue(svsk);
        svc_sock_put(svsk);
@@ -1518,10 +1678,8 @@ svc_defer(struct cache_req *req)
                dr->argslen = rqstp->rq_arg.len >> 2;
                memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
        }
-       spin_lock_bh(&rqstp->rq_server->sv_lock);
-       rqstp->rq_sock->sk_inuse++;
+       atomic_inc(&rqstp->rq_sock->sk_inuse);
        dr->svsk = rqstp->rq_sock;
-       spin_unlock_bh(&rqstp->rq_server->sv_lock);
 
        dr->handle.revisit = svc_revisit;
        return &dr->handle;
@@ -1548,11 +1706,10 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
 static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
 {
        struct svc_deferred_req *dr = NULL;
-       struct svc_serv *serv = svsk->sk_server;
        
        if (!test_bit(SK_DEFERRED, &svsk->sk_flags))
                return NULL;
-       spin_lock_bh(&serv->sv_lock);
+       spin_lock_bh(&svsk->sk_defer_lock);
        clear_bit(SK_DEFERRED, &svsk->sk_flags);
        if (!list_empty(&svsk->sk_deferred)) {
                dr = list_entry(svsk->sk_deferred.next,
@@ -1561,6 +1718,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
                list_del_init(&dr->handle.recent);
                set_bit(SK_DEFERRED, &svsk->sk_flags);
        }
-       spin_unlock_bh(&serv->sv_lock);
+       spin_unlock_bh(&svsk->sk_defer_lock);
        return dr;
 }
index 3ebc349..a444bfe 100644 (file)
@@ -96,11 +96,11 @@ static void snd_sndstat_proc_read(struct snd_info_entry *entry,
 {
        snd_iprintf(buffer, "Sound Driver:3.8.1a-980706 (ALSA v" CONFIG_SND_VERSION " emulation code)\n");
        snd_iprintf(buffer, "Kernel: %s %s %s %s %s\n",
-                   system_utsname.sysname,
-                   system_utsname.nodename,
-                   system_utsname.release,
-                   system_utsname.version,
-                   system_utsname.machine);
+                   init_utsname()->sysname,
+                   init_utsname()->nodename,
+                   init_utsname()->release,
+                   init_utsname()->version,
+                   init_utsname()->machine);
        snd_iprintf(buffer, "Config options: 0\n");
        snd_iprintf(buffer, "\nInstalled drivers: \n");
        snd_iprintf(buffer, "Type 10: ALSA emulation\n");