ptrace: mv send-SIGSTOP from do_fork() to ptrace_init_task()
[pandora-kernel.git] / kernel / fork.c
index 8e7e135..4d4117e 100644 (file)
@@ -37,7 +37,6 @@
 #include <linux/swap.h>
 #include <linux/syscalls.h>
 #include <linux/jiffies.h>
-#include <linux/tracehook.h>
 #include <linux/futex.h>
 #include <linux/compat.h>
 #include <linux/kthread.h>
@@ -59,7 +58,6 @@
 #include <linux/taskstats_kern.h>
 #include <linux/random.h>
 #include <linux/tty.h>
-#include <linux/proc_fs.h>
 #include <linux/blkdev.h>
 #include <linux/fs_struct.h>
 #include <linux/magic.h>
@@ -485,20 +483,6 @@ static void mm_init_aio(struct mm_struct *mm)
 #endif
 }
 
-int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm)
-{
-#ifdef CONFIG_CPUMASK_OFFSTACK
-       if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL))
-               return -ENOMEM;
-
-       if (oldmm)
-               cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm));
-       else
-               memset(mm_cpumask(mm), 0, cpumask_size());
-#endif
-       return 0;
-}
-
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
        atomic_set(&mm->mm_users, 1);
@@ -539,17 +523,8 @@ struct mm_struct * mm_alloc(void)
                return NULL;
 
        memset(mm, 0, sizeof(*mm));
-       mm = mm_init(mm, current);
-       if (!mm)
-               return NULL;
-
-       if (mm_init_cpumask(mm, NULL)) {
-               mm_free_pgd(mm);
-               free_mm(mm);
-               return NULL;
-       }
-
-       return mm;
+       mm_init_cpumask(mm);
+       return mm_init(mm, current);
 }
 
 /*
@@ -560,7 +535,6 @@ struct mm_struct * mm_alloc(void)
 void __mmdrop(struct mm_struct *mm)
 {
        BUG_ON(mm == &init_mm);
-       free_cpumask_var(mm->cpu_vm_mask_var);
        mm_free_pgd(mm);
        destroy_context(mm);
        mmu_notifier_mm_destroy(mm);
@@ -597,6 +571,57 @@ void mmput(struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mmput);
 
+/*
+ * We added or removed a vma mapping the executable. The vmas are only mapped
+ * during exec and are not mapped with the mmap system call.
+ * Callers must hold down_write() on the mm's mmap_sem for these
+ */
+void added_exe_file_vma(struct mm_struct *mm)
+{
+       mm->num_exe_file_vmas++;
+}
+
+void removed_exe_file_vma(struct mm_struct *mm)
+{
+       mm->num_exe_file_vmas--;
+       if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
+               fput(mm->exe_file);
+               mm->exe_file = NULL;
+       }
+
+}
+
+void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
+{
+       if (new_exe_file)
+               get_file(new_exe_file);
+       if (mm->exe_file)
+               fput(mm->exe_file);
+       mm->exe_file = new_exe_file;
+       mm->num_exe_file_vmas = 0;
+}
+
+struct file *get_mm_exe_file(struct mm_struct *mm)
+{
+       struct file *exe_file;
+
+       /* We need mmap_sem to protect against races with removal of
+        * VM_EXECUTABLE vmas */
+       down_read(&mm->mmap_sem);
+       exe_file = mm->exe_file;
+       if (exe_file)
+               get_file(exe_file);
+       up_read(&mm->mmap_sem);
+       return exe_file;
+}
+
+static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
+{
+       /* It's safe to write the exe_file pointer without exe_file_lock because
+        * this is called during fork when the task is not yet in /proc */
+       newmm->exe_file = get_mm_exe_file(oldmm);
+}
+
 /**
  * get_task_mm - acquire a reference to the task's mm
  *
@@ -703,6 +728,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
                goto fail_nomem;
 
        memcpy(mm, oldmm, sizeof(*mm));
+       mm_init_cpumask(mm);
 
        /* Initializing for Swap token stuff */
        mm->token_priority = 0;
@@ -715,9 +741,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
        if (!mm_init(mm, tsk))
                goto fail_nomem;
 
-       if (mm_init_cpumask(mm, oldmm))
-               goto fail_nocpumask;
-
        if (init_new_context(tsk, mm))
                goto fail_nocontext;
 
@@ -744,9 +767,6 @@ fail_nomem:
        return NULL;
 
 fail_nocontext:
-       free_cpumask_var(mm->cpu_vm_mask_var);
-
-fail_nocpumask:
        /*
         * If init_new_context() failed, we cannot use mmput() to free the mm
         * because it calls destroy_context()
@@ -957,6 +977,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        tty_audit_fork(sig);
        sched_autogroup_fork(sig);
 
+#ifdef CONFIG_CGROUPS
+       init_rwsem(&sig->threadgroup_fork_lock);
+#endif
+
        sig->oom_adj = current->signal->oom_adj;
        sig->oom_score_adj = current->signal->oom_score_adj;
        sig->oom_score_adj_min = current->signal->oom_score_adj_min;
@@ -1138,6 +1162,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        monotonic_to_bootbased(&p->real_start_time);
        p->io_context = NULL;
        p->audit_context = NULL;
+       if (clone_flags & CLONE_THREAD)
+               threadgroup_fork_read_lock(current);
        cgroup_fork(p);
 #ifdef CONFIG_NUMA
        p->mempolicy = mpol_dup(p->mempolicy);
@@ -1223,12 +1249,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if (clone_flags & CLONE_THREAD)
                p->tgid = current->tgid;
 
-       if (current->nsproxy != p->nsproxy) {
-               retval = ns_cgroup_clone(p, pid);
-               if (retval)
-                       goto bad_fork_free_pid;
-       }
-
        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
        /*
         * Clear TID on mm_release()?
@@ -1319,7 +1339,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        }
 
        if (likely(p->pid)) {
-               tracehook_finish_clone(p, clone_flags, trace);
+               ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
 
                if (thread_group_leader(p)) {
                        if (is_child_reaper(pid))
@@ -1342,6 +1362,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        write_unlock_irq(&tasklist_lock);
        proc_fork_connector(p);
        cgroup_post_fork(p);
+       if (clone_flags & CLONE_THREAD)
+               threadgroup_fork_read_unlock(current);
        perf_event_fork(p);
        return p;
 
@@ -1380,6 +1402,8 @@ bad_fork_cleanup_policy:
        mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
 #endif
+       if (clone_flags & CLONE_THREAD)
+               threadgroup_fork_read_unlock(current);
        cgroup_exit(p, cgroup_callbacks_done);
        delayacct_tsk_free(p);
        module_put(task_thread_info(p)->exec_domain->module);
@@ -1456,10 +1480,22 @@ long do_fork(unsigned long clone_flags,
        }
 
        /*
-        * When called from kernel_thread, don't do user tracing stuff.
+        * Determine whether and which event to report to ptracer.  When
+        * called from kernel_thread or CLONE_UNTRACED is explicitly
+        * requested, no event is reported; otherwise, report if the event
+        * for the type of forking is enabled.
         */
-       if (likely(user_mode(regs)))
-               trace = tracehook_prepare_clone(clone_flags);
+       if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) {
+               if (clone_flags & CLONE_VFORK)
+                       trace = PTRACE_EVENT_VFORK;
+               else if ((clone_flags & CSIGNAL) != SIGCHLD)
+                       trace = PTRACE_EVENT_CLONE;
+               else
+                       trace = PTRACE_EVENT_FORK;
+
+               if (likely(!ptrace_event_enabled(current, trace)))
+                       trace = 0;
+       }
 
        p = copy_process(clone_flags, stack_start, regs, stack_size,
                         child_tidptr, NULL, trace);
@@ -1483,26 +1519,26 @@ long do_fork(unsigned long clone_flags,
                }
 
                audit_finish_fork(p);
-               tracehook_report_clone(regs, clone_flags, nr, p);
 
                /*
                 * We set PF_STARTING at creation in case tracing wants to
                 * use this to distinguish a fully live task from one that
-                * hasn't gotten to tracehook_report_clone() yet.  Now we
-                * clear it and set the child going.
+                * hasn't finished SIGSTOP raising yet.  Now we clear it
+                * and set the child going.
                 */
                p->flags &= ~PF_STARTING;
 
                wake_up_new_task(p);
 
-               tracehook_report_clone_complete(trace, regs,
-                                               clone_flags, nr, p);
+               /* forking complete and child started to run, tell ptracer */
+               if (unlikely(trace))
+                       ptrace_event(trace, nr);
 
                if (clone_flags & CLONE_VFORK) {
                        freezer_do_not_count();
                        wait_for_completion(&vfork);
                        freezer_count();
-                       tracehook_report_vfork_done(p, nr);
+                       ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
                }
        } else {
                nr = PTR_ERR(p);
@@ -1537,6 +1573,13 @@ void __init proc_caches_init(void)
        fs_cachep = kmem_cache_create("fs_cache",
                        sizeof(struct fs_struct), 0,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
+       /*
+        * FIXME! The "sizeof(struct mm_struct)" currently includes the
+        * whole struct cpumask for the OFFSTACK case. We could change
+        * this to *only* allocate as much of it as required by the
+        * maximum number of CPU's we can ever have.  The cpumask_allocation
+        * is at the end of the structure, exactly for that reason.
+        */
        mm_cachep = kmem_cache_create("mm_struct",
                        sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);