coredump: fix unfreezable coredumping task
[pandora-kernel.git] / fs / exec.c
index a2d0e51..3f8d8f3 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
+#include <linux/freezer.h>
 #include <linux/mm.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
@@ -55,6 +56,9 @@
 #include <linux/pipe_fs_i.h>
 #include <linux/oom.h>
 #include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/path.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -1282,6 +1286,45 @@ int check_unsafe_exec(struct linux_binprm *bprm)
        return res;
 }
 
+static void bprm_fill_uid(struct linux_binprm *bprm)
+{
+       struct inode *inode;
+       unsigned int mode;
+       uid_t uid;
+       gid_t gid;
+
+       /* clear any previous set[ug]id data from a previous binary */
+       bprm->cred->euid = current_euid();
+       bprm->cred->egid = current_egid();
+
+       if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+               return;
+
+       inode = bprm->file->f_path.dentry->d_inode;
+       mode = ACCESS_ONCE(inode->i_mode);
+       if (!(mode & (S_ISUID|S_ISGID)))
+               return;
+
+       /* Be careful if suid/sgid is set */
+       mutex_lock(&inode->i_mutex);
+
+       /* reload atomically mode/uid/gid now that lock held */
+       mode = inode->i_mode;
+       uid = inode->i_uid;
+       gid = inode->i_gid;
+       mutex_unlock(&inode->i_mutex);
+
+       if (mode & S_ISUID) {
+               bprm->per_clear |= PER_CLEAR_ON_SETID;
+               bprm->cred->euid = uid;
+       }
+
+       if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+               bprm->per_clear |= PER_CLEAR_ON_SETID;
+               bprm->cred->egid = gid;
+       }
+}
+
 /* 
  * Fill the binprm structure from the inode. 
  * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
@@ -1290,36 +1333,12 @@ int check_unsafe_exec(struct linux_binprm *bprm)
  */
 int prepare_binprm(struct linux_binprm *bprm)
 {
-       umode_t mode;
-       struct inode * inode = bprm->file->f_path.dentry->d_inode;
        int retval;
 
-       mode = inode->i_mode;
        if (bprm->file->f_op == NULL)
                return -EACCES;
 
-       /* clear any previous set[ug]id data from a previous binary */
-       bprm->cred->euid = current_euid();
-       bprm->cred->egid = current_egid();
-
-       if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) {
-               /* Set-uid? */
-               if (mode & S_ISUID) {
-                       bprm->per_clear |= PER_CLEAR_ON_SETID;
-                       bprm->cred->euid = inode->i_uid;
-               }
-
-               /* Set-gid? */
-               /*
-                * If setgid is set but no group execute bit then this
-                * is a candidate for mandatory locking, not a setgid
-                * executable.
-                */
-               if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
-                       bprm->per_clear |= PER_CLEAR_ON_SETID;
-                       bprm->cred->egid = inode->i_gid;
-               }
-       }
+       bprm_fill_uid(bprm);
 
        /* fill in binprm security blob */
        retval = security_bprm_set_creds(bprm);
@@ -1956,8 +1975,11 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
                complete(vfork_done);
        }
 
-       if (core_waiters)
+       if (core_waiters > 0) {
+               freezer_do_not_count();
                wait_for_completion(&core_state->startup);
+               freezer_count();
+       }
 fail:
        return core_waiters;
 }
@@ -2032,6 +2054,12 @@ static int __get_dumpable(unsigned long mm_flags)
        return (ret >= 2) ? 2 : ret;
 }
 
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
 int get_dumpable(struct mm_struct *mm)
 {
        return __get_dumpable(mm->flags);
@@ -2113,8 +2141,9 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
        const struct cred *old_cred;
        struct cred *cred;
        int retval = 0;
-       int flag = 0;
        int ispipe;
+       /* require nonrelative corefile path and be extra careful */
+       bool need_suid_safe = false;
        static atomic_t core_dump_count = ATOMIC_INIT(0);
        struct coredump_params cprm = {
                .signr = signr,
@@ -2140,14 +2169,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
        if (!cred)
                goto fail;
        /*
-        *      We cannot trust fsuid as being the "true" uid of the
-        *      process nor do we know its entire history. We only know it
-        *      was tainted so we dump it as root in mode 2.
+        * We cannot trust fsuid as being the "true" uid of the process
+        * nor do we know its entire history. We only know it was tainted
+        * so we dump it as root in mode 2, and only into a controlled
+        * environment (pipe handler or fully qualified path).
         */
        if (__get_dumpable(cprm.mm_flags) == 2) {
                /* Setuid core dump mode */
-               flag = O_EXCL;          /* Stop rewrite attacks */
                cred->fsuid = 0;        /* Dump root private */
+               need_suid_safe = true;
        }
 
        retval = coredump_wait(exit_code, &core_state);
@@ -2223,13 +2253,67 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
                }
        } else {
                struct inode *inode;
+               int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
+                                O_LARGEFILE | O_EXCL;
 
                if (cprm.limit < binfmt->min_coredump)
                        goto fail_unlock;
 
-               cprm.file = filp_open(cn.corename,
-                                O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
-                                0600);
+               if (need_suid_safe && cn.corename[0] != '/') {
+                       printk(KERN_WARNING "Pid %d(%s) can only dump core "\
+                               "to fully qualified path!\n",
+                               task_tgid_vnr(current), current->comm);
+                       printk(KERN_WARNING "Skipping core dump\n");
+                       goto fail_unlock;
+               }
+
+               /*
+                * Unlink the file if it exists unless this is a SUID
+                * binary - in that case, we're running around with root
+                * privs and don't want to unlink another user's coredump.
+                */
+               if (!need_suid_safe) {
+                       mm_segment_t old_fs;
+
+                       old_fs = get_fs();
+                       set_fs(KERNEL_DS);
+                       /*
+                        * If it doesn't exist, that's fine. If there's some
+                        * other problem, we'll catch it at the filp_open().
+                        */
+                       (void) sys_unlink((const char __user *)cn.corename);
+                       set_fs(old_fs);
+               }
+
+               /*
+                * There is a race between unlinking and creating the
+                * file, but if that causes an EEXIST here, that's
+                * fine - another process raced with us while creating
+                * the corefile, and the other process won. To userspace,
+                * what matters is that at least one of the two processes
+                * writes its coredump successfully, not which one.
+                */
+               if (need_suid_safe) {
+                       /*
+                        * Using user namespaces, normal user tasks can change
+                        * their current->fs->root to point to arbitrary
+                        * directories. Since the intention of the "only dump
+                        * with a fully qualified path" rule is to control where
+                        * coredumps may be placed using root privileges,
+                        * current->fs->root must not be used. Instead, use the
+                        * root directory of init_task.
+                        */
+                       struct path root;
+
+                       task_lock(&init_task);
+                       get_fs_root(init_task.fs, &root);
+                       task_unlock(&init_task);
+                       cprm.file = file_open_root(root.dentry, root.mnt,
+                               cn.corename, open_flags, 0600);
+                       path_put(&root);
+               } else {
+                       cprm.file = filp_open(cn.corename, open_flags, 0600);
+               }
                if (IS_ERR(cprm.file))
                        goto fail_unlock;