From b1bf6857ac304ee1c05cb3d804f70312e947887c Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 22 Mar 2016 14:25:36 -0700 Subject: [PATCH] fs/coredump: prevent fsuid=0 dumps into user-controlled directories commit 378c6520e7d29280f400ef2ceaf155c86f05a71a upstream. This commit fixes the following security hole affecting systems where all of the following conditions are fulfilled: - The fs.suid_dumpable sysctl is set to 2. - The kernel.core_pattern sysctl's value starts with "/". (Systems where kernel.core_pattern starts with "|/" are not affected.) - Unprivileged user namespace creation is permitted. (This is true on Linux >=3.8, but some distributions disallow it by default using a distro patch.) Under these conditions, if a program executes under secure exec rules, causing it to run with the SUID_DUMP_ROOT flag, then unshares its user namespace, changes its root directory and crashes, the coredump will be written using fsuid=0 and a path derived from kernel.core_pattern - but this path is interpreted relative to the root directory of the process, allowing the attacker to control where a coredump will be written with root privileges. To fix the security issue, always interpret core_pattern for dumps that are written under SUID_DUMP_ROOT relative to the root directory of init. Signed-off-by: Jann Horn Acked-by: Kees Cook Cc: Al Viro Cc: "Eric W. Biederman" Cc: Andy Lutomirski Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [bwh: Backported to 3.2: adjust filename, context] Signed-off-by: Ben Hutchings --- arch/um/drivers/mconsole_kern.c | 2 +- fs/exec.c | 30 ++++++++++++++++++++++++++---- fs/fhandle.c | 2 +- fs/open.c | 6 ++---- include/linux/fs.h | 2 +- kernel/sysctl_binary.c | 2 +- 6 files changed, 32 insertions(+), 12 deletions(-) diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index c70e047eed72..f4a6af25dc72 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -133,7 +133,7 @@ void mconsole_proc(struct mc_request *req) ptr += strlen("proc"); ptr = skip_spaces(ptr); - file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY); + file = file_open_root(mnt->mnt_root, mnt, ptr, O_RDONLY, 0); if (IS_ERR(file)) { mconsole_reply(req, "Failed to open file", 1, 0); goto out; diff --git a/fs/exec.c b/fs/exec.c index aba5e13a6a68..a0006d85785c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -55,6 +55,9 @@ #include #include #include +#include +#include +#include #include #include @@ -2246,6 +2249,8 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) } } else { struct inode *inode; + int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW | + O_LARGEFILE | O_EXCL; if (cprm.limit < binfmt->min_coredump) goto fail_unlock; @@ -2284,10 +2289,27 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) * what matters is that at least one of the two processes * writes its coredump successfully, not which one. */ - cprm.file = filp_open(cn.corename, - O_CREAT | 2 | O_NOFOLLOW | - O_LARGEFILE | O_EXCL, - 0600); + if (need_suid_safe) { + /* + * Using user namespaces, normal user tasks can change + * their current->fs->root to point to arbitrary + * directories. Since the intention of the "only dump + * with a fully qualified path" rule is to control where + * coredumps may be placed using root privileges, + * current->fs->root must not be used. Instead, use the + * root directory of init_task. + */ + struct path root; + + task_lock(&init_task); + get_fs_root(init_task.fs, &root); + task_unlock(&init_task); + cprm.file = file_open_root(root.dentry, root.mnt, + cn.corename, open_flags, 0600); + path_put(&root); + } else { + cprm.file = filp_open(cn.corename, open_flags, 0600); + } if (IS_ERR(cprm.file)) goto fail_unlock; diff --git a/fs/fhandle.c b/fs/fhandle.c index c9e18f3ecc41..710438a1e021 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -229,7 +229,7 @@ long do_handle_open(int mountdirfd, path_put(&path); return fd; } - file = file_open_root(path.dentry, path.mnt, "", open_flag); + file = file_open_root(path.dentry, path.mnt, "", open_flag, 0); if (IS_ERR(file)) { put_unused_fd(fd); retval = PTR_ERR(file); diff --git a/fs/open.c b/fs/open.c index b8485d3cef97..ca155d4f23d3 100644 --- a/fs/open.c +++ b/fs/open.c @@ -958,12 +958,10 @@ struct file *filp_open(const char *filename, int flags, int mode) EXPORT_SYMBOL(filp_open); struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, - const char *filename, int flags) + const char *filename, int flags, umode_t mode) { struct open_flags op; - int lookup = build_open_flags(flags, 0, &op); - if (flags & O_CREAT) - return ERR_PTR(-EINVAL); + int lookup = build_open_flags(flags, mode, &op); if (!filename && (flags & O_DIRECTORY)) if (!dentry->d_inode->i_op->lookup) return ERR_PTR(-ENOTDIR); diff --git a/include/linux/fs.h b/include/linux/fs.h index dd743859f04e..44e856ba8a79 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2073,7 +2073,7 @@ extern long do_sys_open(int dfd, const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); extern struct file *file_open_root(struct dentry *, struct vfsmount *, - const char *, int); + const char *, int, umode_t); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 9f9aa3205973..cd2ea02c5a9d 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1346,7 +1346,7 @@ static ssize_t binary_sysctl(const int *name, int nlen, } mnt = current->nsproxy->pid_ns->proc_mnt; - file = file_open_root(mnt->mnt_root, mnt, pathname, flags); + file = file_open_root(mnt->mnt_root, mnt, pathname, flags, 0); result = PTR_ERR(file); if (IS_ERR(file)) goto out_putname; -- 2.39.2