Merge branch 'i2c-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelvar...
[pandora-kernel.git] / arch / s390 / mm / fault.c
index ab98813..a0f9e73 100644 (file)
@@ -225,33 +225,6 @@ static noinline void do_sigbus(struct pt_regs *regs, long int_code,
        force_sig_info(SIGBUS, &si, tsk);
 }
 
-#ifdef CONFIG_S390_EXEC_PROTECT
-static noinline int signal_return(struct pt_regs *regs, long int_code,
-                                 unsigned long trans_exc_code)
-{
-       u16 instruction;
-       int rc;
-
-       rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
-
-       if (!rc && instruction == 0x0a77) {
-               clear_tsk_thread_flag(current, TIF_PER_TRAP);
-               if (is_compat_task())
-                       sys32_sigreturn();
-               else
-                       sys_sigreturn();
-       } else if (!rc && instruction == 0x0aad) {
-               clear_tsk_thread_flag(current, TIF_PER_TRAP);
-               if (is_compat_task())
-                       sys32_rt_sigreturn();
-               else
-                       sys_rt_sigreturn();
-       } else
-               do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code);
-       return 0;
-}
-#endif /* CONFIG_S390_EXEC_PROTECT */
-
 static noinline void do_fault_error(struct pt_regs *regs, long int_code,
                                    unsigned long trans_exc_code, int fault)
 {
@@ -259,13 +232,6 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code,
 
        switch (fault) {
        case VM_FAULT_BADACCESS:
-#ifdef CONFIG_S390_EXEC_PROTECT
-               if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY &&
-                   (trans_exc_code & 3) == 0) {
-                       signal_return(regs, int_code, trans_exc_code);
-                       break;
-               }
-#endif /* CONFIG_S390_EXEC_PROTECT */
        case VM_FAULT_BADMAP:
                /* Bad memory access. Check if it is kernel or user space. */
                if (regs->psw.mask & PSW_MASK_PSTATE) {
@@ -414,11 +380,6 @@ void __kprobes do_dat_exception(struct pt_regs *regs, long pgm_int_code,
        int access, fault;
 
        access = VM_READ | VM_EXEC | VM_WRITE;
-#ifdef CONFIG_S390_EXEC_PROTECT
-       if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY &&
-           (trans_exc_code & 3) == 0)
-               access = VM_EXEC;
-#endif
        fault = do_exception(regs, access, trans_exc_code);
        if (unlikely(fault))
                do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault);
@@ -491,22 +452,28 @@ static int __init nopfault(char *str)
 
 __setup("nopfault", nopfault);
 
-typedef struct {
-       __u16 refdiagc;
-       __u16 reffcode;
-       __u16 refdwlen;
-       __u16 refversn;
-       __u64 refgaddr;
-       __u64 refselmk;
-       __u64 refcmpmk;
-       __u64 reserved;
-} __attribute__ ((packed, aligned(8))) pfault_refbk_t;
+struct pfault_refbk {
+       u16 refdiagc;
+       u16 reffcode;
+       u16 refdwlen;
+       u16 refversn;
+       u64 refgaddr;
+       u64 refselmk;
+       u64 refcmpmk;
+       u64 reserved;
+} __attribute__ ((packed, aligned(8)));
 
 int pfault_init(void)
 {
-       pfault_refbk_t refbk =
-               { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48,
-                 __PF_RES_FIELD };
+       struct pfault_refbk refbk = {
+               .refdiagc = 0x258,
+               .reffcode = 0,
+               .refdwlen = 5,
+               .refversn = 2,
+               .refgaddr = __LC_CURRENT_PID,
+               .refselmk = 1ULL << 48,
+               .refcmpmk = 1ULL << 48,
+               .reserved = __PF_RES_FIELD };
         int rc;
 
        if (!MACHINE_IS_VM || pfault_disable)
@@ -524,8 +491,12 @@ int pfault_init(void)
 
 void pfault_fini(void)
 {
-       pfault_refbk_t refbk =
-       { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL };
+       struct pfault_refbk refbk = {
+               .refdiagc = 0x258,
+               .reffcode = 1,
+               .refdwlen = 5,
+               .refversn = 2,
+       };
 
        if (!MACHINE_IS_VM || pfault_disable)
                return;
@@ -537,11 +508,15 @@ void pfault_fini(void)
                : : "a" (&refbk), "m" (refbk) : "cc");
 }
 
+static DEFINE_SPINLOCK(pfault_lock);
+static LIST_HEAD(pfault_list);
+
 static void pfault_interrupt(unsigned int ext_int_code,
                             unsigned int param32, unsigned long param64)
 {
        struct task_struct *tsk;
        __u16 subcode;
+       pid_t pid;
 
        /*
         * Get the external interruption subcode & pfault
@@ -553,44 +528,79 @@ static void pfault_interrupt(unsigned int ext_int_code,
        if ((subcode & 0xff00) != __SUBCODE_MASK)
                return;
        kstat_cpu(smp_processor_id()).irqs[EXTINT_PFL]++;
-
-       /*
-        * Get the token (= address of the task structure of the affected task).
-        */
-#ifdef CONFIG_64BIT
-       tsk = (struct task_struct *) param64;
-#else
-       tsk = (struct task_struct *) param32;
-#endif
-
+       if (subcode & 0x0080) {
+               /* Get the token (= pid of the affected task). */
+               pid = sizeof(void *) == 4 ? param32 : param64;
+               rcu_read_lock();
+               tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+               if (tsk)
+                       get_task_struct(tsk);
+               rcu_read_unlock();
+               if (!tsk)
+                       return;
+       } else {
+               tsk = current;
+       }
+       spin_lock(&pfault_lock);
        if (subcode & 0x0080) {
                /* signal bit is set -> a page has been swapped in by VM */
-               if (xchg(&tsk->thread.pfault_wait, -1) != 0) {
+               if (tsk->thread.pfault_wait == 1) {
                        /* Initial interrupt was faster than the completion
                         * interrupt. pfault_wait is valid. Set pfault_wait
                         * back to zero and wake up the process. This can
                         * safely be done because the task is still sleeping
                         * and can't produce new pfaults. */
                        tsk->thread.pfault_wait = 0;
+                       list_del(&tsk->thread.list);
                        wake_up_process(tsk);
-                       put_task_struct(tsk);
+               } else {
+                       /* Completion interrupt was faster than initial
+                        * interrupt. Set pfault_wait to -1 so the initial
+                        * interrupt doesn't put the task to sleep. */
+                       tsk->thread.pfault_wait = -1;
                }
+               put_task_struct(tsk);
        } else {
                /* signal bit not set -> a real page is missing. */
-               get_task_struct(tsk);
-               set_task_state(tsk, TASK_UNINTERRUPTIBLE);
-               if (xchg(&tsk->thread.pfault_wait, 1) != 0) {
+               if (tsk->thread.pfault_wait == -1) {
                        /* Completion interrupt was faster than the initial
-                        * interrupt (swapped in a -1 for pfault_wait). Set
-                        * pfault_wait back to zero and exit. This can be
-                        * done safely because tsk is running in kernel 
-                        * mode and can't produce new pfaults. */
+                        * interrupt (pfault_wait == -1). Set pfault_wait
+                        * back to zero and exit. */
                        tsk->thread.pfault_wait = 0;
-                       set_task_state(tsk, TASK_RUNNING);
-                       put_task_struct(tsk);
-               } else
+               } else {
+                       /* Initial interrupt arrived before completion
+                        * interrupt. Let the task sleep. */
+                       tsk->thread.pfault_wait = 1;
+                       list_add(&tsk->thread.list, &pfault_list);
+                       set_task_state(tsk, TASK_UNINTERRUPTIBLE);
                        set_tsk_need_resched(tsk);
+               }
+       }
+       spin_unlock(&pfault_lock);
+}
+
+static int __cpuinit pfault_cpu_notify(struct notifier_block *self,
+                                      unsigned long action, void *hcpu)
+{
+       struct thread_struct *thread, *next;
+       struct task_struct *tsk;
+
+       switch (action) {
+       case CPU_DEAD:
+       case CPU_DEAD_FROZEN:
+               spin_lock_irq(&pfault_lock);
+               list_for_each_entry_safe(thread, next, &pfault_list, list) {
+                       thread->pfault_wait = 0;
+                       list_del(&thread->list);
+                       tsk = container_of(thread, struct task_struct, thread);
+                       wake_up_process(tsk);
+               }
+               spin_unlock_irq(&pfault_lock);
+               break;
+       default:
+               break;
        }
+       return NOTIFY_OK;
 }
 
 static int __init pfault_irq_init(void)
@@ -599,22 +609,21 @@ static int __init pfault_irq_init(void)
 
        if (!MACHINE_IS_VM)
                return 0;
-       /*
-        * Try to get pfault pseudo page faults going.
-        */
        rc = register_external_interrupt(0x2603, pfault_interrupt);
-       if (rc) {
-               pfault_disable = 1;
-               return rc;
-       }
-       if (pfault_init() == 0)
-               return 0;
+       if (rc)
+               goto out_extint;
+       rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
+       if (rc)
+               goto out_pfault;
+       hotcpu_notifier(pfault_cpu_notify, 0);
+       return 0;
 
-       /* Tough luck, no pfault. */
-       pfault_disable = 1;
+out_pfault:
        unregister_external_interrupt(0x2603, pfault_interrupt);
-       return 0;
+out_extint:
+       pfault_disable = 1;
+       return rc;
 }
 early_initcall(pfault_irq_init);
 
-#endif
+#endif /* CONFIG_PFAULT */