#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fdtable.h>
+#include <linux/freezer.h>
#include <linux/mm.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/path.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
if (write) {
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
+ unsigned long ptr_size;
struct rlimit *rlim;
+ /*
+ * Since the stack will hold pointers to the strings, we
+ * must account for them as well.
+ *
+ * The size calculation is the entire vma while each arg page is
+ * built, so each time we get here it's calculating how far it
+ * is currently (rather than each call being just the newly
+ * added size from the arg page). As a result, we need to
+ * always add the entire size of the pointers, so that on the
+ * last call to get_arg_page() we'll actually have the entire
+ * correct size.
+ */
+ ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+ if (ptr_size > ULONG_MAX - size)
+ goto fail;
+ size += ptr_size;
+
acct_arg_size(bprm, size / PAGE_SIZE);
/*
* to work from.
*/
rlim = current->signal->rlim;
- if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
- put_page(page);
- return NULL;
- }
+ if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4)
+ goto fail;
}
return page;
+
+fail:
+ put_page(page);
+ return NULL;
}
static void put_arg_page(struct page *page)
complete(vfork_done);
}
- if (core_waiters)
+ if (core_waiters > 0) {
+ freezer_do_not_count();
wait_for_completion(&core_state->startup);
+ freezer_count();
+ }
fail:
return core_waiters;
}
fd_install(0, rp);
spin_lock(&cf->file_lock);
fdt = files_fdtable(cf);
- FD_SET(0, fdt->open_fds);
- FD_CLR(0, fdt->close_on_exec);
+ __set_open_fd(0, fdt);
+ __clear_close_on_exec(0, fdt);
spin_unlock(&cf->file_lock);
/* and disallow core files too */
const struct cred *old_cred;
struct cred *cred;
int retval = 0;
- int flag = 0;
int ispipe;
- bool need_nonrelative = false;
+ /* require nonrelative corefile path and be extra careful */
+ bool need_suid_safe = false;
static atomic_t core_dump_count = ATOMIC_INIT(0);
struct coredump_params cprm = {
.signr = signr,
*/
if (__get_dumpable(cprm.mm_flags) == 2) {
/* Setuid core dump mode */
- flag = O_EXCL; /* Stop rewrite attacks */
cred->fsuid = 0; /* Dump root private */
- need_nonrelative = true;
+ need_suid_safe = true;
}
retval = coredump_wait(exit_code, &core_state);
}
} else {
struct inode *inode;
+ int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
+ O_LARGEFILE | O_EXCL;
if (cprm.limit < binfmt->min_coredump)
goto fail_unlock;
- if (need_nonrelative && cn.corename[0] != '/') {
+ if (need_suid_safe && cn.corename[0] != '/') {
printk(KERN_WARNING "Pid %d(%s) can only dump core "\
"to fully qualified path!\n",
task_tgid_vnr(current), current->comm);
goto fail_unlock;
}
- cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
- 0600);
+ /*
+ * Unlink the file if it exists unless this is a SUID
+ * binary - in that case, we're running around with root
+ * privs and don't want to unlink another user's coredump.
+ */
+ if (!need_suid_safe) {
+ mm_segment_t old_fs;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ /*
+ * If it doesn't exist, that's fine. If there's some
+ * other problem, we'll catch it at the filp_open().
+ */
+ (void) sys_unlink((const char __user *)cn.corename);
+ set_fs(old_fs);
+ }
+
+ /*
+ * There is a race between unlinking and creating the
+ * file, but if that causes an EEXIST here, that's
+ * fine - another process raced with us while creating
+ * the corefile, and the other process won. To userspace,
+ * what matters is that at least one of the two processes
+ * writes its coredump successfully, not which one.
+ */
+ if (need_suid_safe) {
+ /*
+ * Using user namespaces, normal user tasks can change
+ * their current->fs->root to point to arbitrary
+ * directories. Since the intention of the "only dump
+ * with a fully qualified path" rule is to control where
+ * coredumps may be placed using root privileges,
+ * current->fs->root must not be used. Instead, use the
+ * root directory of init_task.
+ */
+ struct path root;
+
+ task_lock(&init_task);
+ get_fs_root(init_task.fs, &root);
+ task_unlock(&init_task);
+ cprm.file = file_open_root(root.dentry, root.mnt,
+ cn.corename, open_flags, 0600);
+ path_put(&root);
+ } else {
+ cprm.file = filp_open(cn.corename, open_flags, 0600);
+ }
if (IS_ERR(cprm.file))
goto fail_unlock;