X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=blobdiff_plain;f=fs%2Fexec.c;h=1554e549c1de4102e2c1e3044d18939075671e20;hp=36254645b7cc4d81f7c73b2940767fe06de1642c;hb=cea299eb189fca09c413432b807abd607385b3bc;hpb=30307c69d59b14723fbf8a524847b302388c702d diff --git a/fs/exec.c b/fs/exec.c index 36254645b7cc..1554e549c1de 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,9 @@ #include #include #include +#include +#include +#include #include #include @@ -204,8 +208,26 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, if (write) { unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; + unsigned long ptr_size; struct rlimit *rlim; + /* + * Since the stack will hold pointers to the strings, we + * must account for them as well. + * + * The size calculation is the entire vma while each arg page is + * built, so each time we get here it's calculating how far it + * is currently (rather than each call being just the newly + * added size from the arg page). As a result, we need to + * always add the entire size of the pointers, so that on the + * last call to get_arg_page() we'll actually have the entire + * correct size. + */ + ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); + if (ptr_size > ULONG_MAX - size) + goto fail; + size += ptr_size; + acct_arg_size(bprm, size / PAGE_SIZE); /* @@ -223,13 +245,15 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, * to work from. */ rlim = current->signal->rlim; - if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) { - put_page(page); - return NULL; - } + if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) + goto fail; } return page; + +fail: + put_page(page); + return NULL; } static void put_arg_page(struct page *page) @@ -973,6 +997,9 @@ static int de_thread(struct task_struct *tsk) sig->notify_count = 0; no_thread_group: + /* we have changed execution domain */ + tsk->exit_signal = SIGCHLD; + if (current->mm) setmax_mm_hiwater_rss(&sig->maxrss, current->mm); @@ -1092,7 +1119,8 @@ int flush_old_exec(struct linux_binprm * bprm) bprm->mm = NULL; /* We're using it now */ set_fs(USER_DS); - current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); + current->flags &= + ~(PF_RANDOMIZE | PF_KTHREAD | PF_NOFREEZE | PF_FREEZER_NOSIG); flush_thread(); current->personality &= ~bprm->per_clear; @@ -1155,13 +1183,6 @@ void setup_new_exec(struct linux_binprm * bprm) set_dumpable(current->mm, suid_dumpable); } - /* - * Flush performance counters when crossing a - * security domain: - */ - if (!get_dumpable(current->mm)) - perf_event_exit_task(current); - /* An exec changes our domain. We are no longer part of the thread group */ @@ -1198,9 +1219,24 @@ void free_bprm(struct linux_binprm *bprm) mutex_unlock(¤t->signal->cred_guard_mutex); abort_creds(bprm->cred); } + /* If a binfmt changed the interp, free it. */ + if (bprm->interp != bprm->filename) + kfree(bprm->interp); kfree(bprm); } +int bprm_change_interp(char *interp, struct linux_binprm *bprm) +{ + /* If a binfmt changed the interp, free it first. */ + if (bprm->interp != bprm->filename) + kfree(bprm->interp); + bprm->interp = kstrdup(interp, GFP_KERNEL); + if (!bprm->interp) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL(bprm_change_interp); + /* * install the new credentials for this executable */ @@ -1210,6 +1246,15 @@ void install_exec_creds(struct linux_binprm *bprm) commit_creds(bprm->cred); bprm->cred = NULL; + + /* + * Disable monitoring for regular users + * when executing setuid binaries. Must + * wait until new credentials are committed + * by commit_creds() above + */ + if (get_dumpable(current->mm) != SUID_DUMP_USER) + perf_event_exit_task(current); /* * cred_guard_mutex must be held at least to this point to prevent * ptrace_attach() from altering our determination of the task's @@ -1261,6 +1306,45 @@ int check_unsafe_exec(struct linux_binprm *bprm) return res; } +static void bprm_fill_uid(struct linux_binprm *bprm) +{ + struct inode *inode; + unsigned int mode; + uid_t uid; + gid_t gid; + + /* clear any previous set[ug]id data from a previous binary */ + bprm->cred->euid = current_euid(); + bprm->cred->egid = current_egid(); + + if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) + return; + + inode = bprm->file->f_path.dentry->d_inode; + mode = ACCESS_ONCE(inode->i_mode); + if (!(mode & (S_ISUID|S_ISGID))) + return; + + /* Be careful if suid/sgid is set */ + mutex_lock(&inode->i_mutex); + + /* reload atomically mode/uid/gid now that lock held */ + mode = inode->i_mode; + uid = inode->i_uid; + gid = inode->i_gid; + mutex_unlock(&inode->i_mutex); + + if (mode & S_ISUID) { + bprm->per_clear |= PER_CLEAR_ON_SETID; + bprm->cred->euid = uid; + } + + if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { + bprm->per_clear |= PER_CLEAR_ON_SETID; + bprm->cred->egid = gid; + } +} + /* * Fill the binprm structure from the inode. * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes @@ -1269,36 +1353,12 @@ int check_unsafe_exec(struct linux_binprm *bprm) */ int prepare_binprm(struct linux_binprm *bprm) { - umode_t mode; - struct inode * inode = bprm->file->f_path.dentry->d_inode; int retval; - mode = inode->i_mode; if (bprm->file->f_op == NULL) return -EACCES; - /* clear any previous set[ug]id data from a previous binary */ - bprm->cred->euid = current_euid(); - bprm->cred->egid = current_egid(); - - if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)) { - /* Set-uid? */ - if (mode & S_ISUID) { - bprm->per_clear |= PER_CLEAR_ON_SETID; - bprm->cred->euid = inode->i_uid; - } - - /* Set-gid? */ - /* - * If setgid is set but no group execute bit then this - * is a candidate for mandatory locking, not a setgid - * executable. - */ - if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { - bprm->per_clear |= PER_CLEAR_ON_SETID; - bprm->cred->egid = inode->i_gid; - } - } + bprm_fill_uid(bprm); /* fill in binprm security blob */ retval = security_bprm_set_creds(bprm); @@ -1366,6 +1426,10 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) struct linux_binfmt *fmt; pid_t old_pid; + /* This allows 4 levels of binfmt rewrites before failing hard. */ + if (depth > 5) + return -ELOOP; + retval = security_bprm_check(bprm); if (retval) return retval; @@ -1389,12 +1453,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) if (!try_module_get(fmt->module)) continue; read_unlock(&binfmt_lock); + bprm->recursion_depth = depth + 1; retval = fn(bprm, regs); - /* - * Restore the depth counter to its starting value - * in this call, so we don't have to rely on every - * load_binary function to restore it on return. - */ bprm->recursion_depth = depth; if (retval >= 0) { if (depth == 0) @@ -1935,8 +1995,11 @@ static int coredump_wait(int exit_code, struct core_state *core_state) complete(vfork_done); } - if (core_waiters) + if (core_waiters > 0) { + freezer_do_not_count(); wait_for_completion(&core_state->startup); + freezer_count(); + } fail: return core_waiters; } @@ -2011,6 +2074,12 @@ static int __get_dumpable(unsigned long mm_flags) return (ret >= 2) ? 2 : ret; } +/* + * This returns the actual value of the suid_dumpable flag. For things + * that are using this for checking for privilege transitions, it must + * test against SUID_DUMP_USER rather than treating it as a boolean + * value. + */ int get_dumpable(struct mm_struct *mm) { return __get_dumpable(mm->flags); @@ -2092,8 +2161,9 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) const struct cred *old_cred; struct cred *cred; int retval = 0; - int flag = 0; int ispipe; + /* require nonrelative corefile path and be extra careful */ + bool need_suid_safe = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { .signr = signr, @@ -2119,14 +2189,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) if (!cred) goto fail; /* - * We cannot trust fsuid as being the "true" uid of the - * process nor do we know its entire history. We only know it - * was tainted so we dump it as root in mode 2. + * We cannot trust fsuid as being the "true" uid of the process + * nor do we know its entire history. We only know it was tainted + * so we dump it as root in mode 2, and only into a controlled + * environment (pipe handler or fully qualified path). */ if (__get_dumpable(cprm.mm_flags) == 2) { /* Setuid core dump mode */ - flag = O_EXCL; /* Stop rewrite attacks */ cred->fsuid = 0; /* Dump root private */ + need_suid_safe = true; } retval = coredump_wait(exit_code, &core_state); @@ -2202,13 +2273,67 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) } } else { struct inode *inode; + int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW | + O_LARGEFILE | O_EXCL; if (cprm.limit < binfmt->min_coredump) goto fail_unlock; - cprm.file = filp_open(cn.corename, - O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, - 0600); + if (need_suid_safe && cn.corename[0] != '/') { + printk(KERN_WARNING "Pid %d(%s) can only dump core "\ + "to fully qualified path!\n", + task_tgid_vnr(current), current->comm); + printk(KERN_WARNING "Skipping core dump\n"); + goto fail_unlock; + } + + /* + * Unlink the file if it exists unless this is a SUID + * binary - in that case, we're running around with root + * privs and don't want to unlink another user's coredump. + */ + if (!need_suid_safe) { + mm_segment_t old_fs; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + /* + * If it doesn't exist, that's fine. If there's some + * other problem, we'll catch it at the filp_open(). + */ + (void) sys_unlink((const char __user *)cn.corename); + set_fs(old_fs); + } + + /* + * There is a race between unlinking and creating the + * file, but if that causes an EEXIST here, that's + * fine - another process raced with us while creating + * the corefile, and the other process won. To userspace, + * what matters is that at least one of the two processes + * writes its coredump successfully, not which one. + */ + if (need_suid_safe) { + /* + * Using user namespaces, normal user tasks can change + * their current->fs->root to point to arbitrary + * directories. Since the intention of the "only dump + * with a fully qualified path" rule is to control where + * coredumps may be placed using root privileges, + * current->fs->root must not be used. Instead, use the + * root directory of init_task. + */ + struct path root; + + task_lock(&init_task); + get_fs_root(init_task.fs, &root); + task_unlock(&init_task); + cprm.file = file_open_root(root.dentry, root.mnt, + cn.corename, open_flags, 0600); + path_put(&root); + } else { + cprm.file = filp_open(cn.corename, open_flags, 0600); + } if (IS_ERR(cprm.file)) goto fail_unlock;