pandora: defconfig: update

[pandora-kernel.git] / fs / exec.c
diff --git a/fs/exec.c b/fs/exec.c

index 65785c9..62d48ca 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -25,6 +25,7 @@
  #include <linux/slab.h>
  #include <linux/file.h>
  #include <linux/fdtable.h>
+#include <linux/freezer.h>
  #include <linux/mm.h>
  #include <linux/stat.h>
  #include <linux/fcntl.h>
@@ -55,6 +56,9 @@
  #include <linux/pipe_fs_i.h>
  #include <linux/oom.h>
  #include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/path.h>
  
  #include <asm/uaccess.h>
  #include <asm/mmu_context.h>
@@ -204,8 +208,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
  
         if (write) {
                 unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+               unsigned long ptr_size;
                 struct rlimit *rlim;
  
+               /*
+                * Since the stack will hold pointers to the strings, we
+                * must account for them as well.
+                *
+                * The size calculation is the entire vma while each arg page is
+                * built, so each time we get here it's calculating how far it
+                * is currently (rather than each call being just the newly
+                * added size from the arg page).  As a result, we need to
+                * always add the entire size of the pointers, so that on the
+                * last call to get_arg_page() we'll actually have the entire
+                * correct size.
+                */
+               ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+               if (ptr_size > ULONG_MAX - size)
+                       goto fail;
+               size += ptr_size;
+
                 acct_arg_size(bprm, size / PAGE_SIZE);
  
                 /*
@@ -223,13 +245,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
                  *    to work from.
                  */
                 rlim = current->signal->rlim;
-               if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
-                       put_page(page);
-                       return NULL;
-               }
+               if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4)
+                       goto fail;
         }
  
         return page;
+
+fail:
+       put_page(page);
+       return NULL;
  }
  
  static void put_arg_page(struct page *page)
@@ -1971,8 +1995,11 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
                 complete(vfork_done);
         }
  
-       if (core_waiters)
+       if (core_waiters > 0) {
+               freezer_do_not_count();
                 wait_for_completion(&core_state->startup);
+               freezer_count();
+       }
  fail:
         return core_waiters;
  }
@@ -2115,8 +2142,8 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
         fd_install(0, rp);
         spin_lock(&cf->file_lock);
         fdt = files_fdtable(cf);
-       FD_SET(0, fdt->open_fds);
-       FD_CLR(0, fdt->close_on_exec);
+       __set_open_fd(0, fdt);
+       __clear_close_on_exec(0, fdt);
         spin_unlock(&cf->file_lock);
  
         /* and disallow core files too */
@@ -2134,9 +2161,9 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
         const struct cred *old_cred;
         struct cred *cred;
         int retval = 0;
-       int flag = 0;
         int ispipe;
-       bool need_nonrelative = false;
+       /* require nonrelative corefile path and be extra careful */
+       bool need_suid_safe = false;
         static atomic_t core_dump_count = ATOMIC_INIT(0);
         struct coredump_params cprm = {
                 .signr = signr,
@@ -2169,9 +2196,8 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
          */
         if (__get_dumpable(cprm.mm_flags) == 2) {
                 /* Setuid core dump mode */
-               flag = O_EXCL;          /* Stop rewrite attacks */
                 cred->fsuid = 0;        /* Dump root private */
-               need_nonrelative = true;
+               need_suid_safe = true;
         }
  
         retval = coredump_wait(exit_code, &core_state);
@@ -2247,11 +2273,13 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
                 }
         } else {
                 struct inode *inode;
+               int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
+                                O_LARGEFILE | O_EXCL;
  
                 if (cprm.limit < binfmt->min_coredump)
                         goto fail_unlock;
  
-               if (need_nonrelative && cn.corename[0] != '/') {
+               if (need_suid_safe && cn.corename[0] != '/') {
                         printk(KERN_WARNING "Pid %d(%s) can only dump core "\
                                 "to fully qualified path!\n",
                                 task_tgid_vnr(current), current->comm);
@@ -2259,9 +2287,53 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
                         goto fail_unlock;
                 }
  
-               cprm.file = filp_open(cn.corename,
-                                O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
-                                0600);
+               /*
+                * Unlink the file if it exists unless this is a SUID
+                * binary - in that case, we're running around with root
+                * privs and don't want to unlink another user's coredump.
+                */
+               if (!need_suid_safe) {
+                       mm_segment_t old_fs;
+
+                       old_fs = get_fs();
+                       set_fs(KERNEL_DS);
+                       /*
+                        * If it doesn't exist, that's fine. If there's some
+                        * other problem, we'll catch it at the filp_open().
+                        */
+                       (void) sys_unlink((const char __user *)cn.corename);
+                       set_fs(old_fs);
+               }
+
+               /*
+                * There is a race between unlinking and creating the
+                * file, but if that causes an EEXIST here, that's
+                * fine - another process raced with us while creating
+                * the corefile, and the other process won. To userspace,
+                * what matters is that at least one of the two processes
+                * writes its coredump successfully, not which one.
+                */
+               if (need_suid_safe) {
+                       /*
+                        * Using user namespaces, normal user tasks can change
+                        * their current->fs->root to point to arbitrary
+                        * directories. Since the intention of the "only dump
+                        * with a fully qualified path" rule is to control where
+                        * coredumps may be placed using root privileges,
+                        * current->fs->root must not be used. Instead, use the
+                        * root directory of init_task.
+                        */
+                       struct path root;
+
+                       task_lock(&init_task);
+                       get_fs_root(init_task.fs, &root);
+                       task_unlock(&init_task);
+                       cprm.file = file_open_root(root.dentry, root.mnt,
+                               cn.corename, open_flags, 0600);
+                       path_put(&root);
+               } else {
+                       cprm.file = filp_open(cn.corename, open_flags, 0600);
+               }
                 if (IS_ERR(cprm.file))
                         goto fail_unlock;