x86, vdso: Remove compat vdso support
authorAndy Lutomirski <luto@amacapital.net>
Thu, 13 Mar 2014 23:01:26 +0000 (16:01 -0700)
committerH. Peter Anvin <hpa@linux.intel.com>
Thu, 13 Mar 2014 23:20:09 +0000 (16:20 -0700)
The compat vDSO is a complicated hack that's needed to maintain
compatibility with a small range of glibc versions.

This removes it and replaces it with a much simpler hack: a config
option to disable the 32-bit vDSO by default.

This also changes the default value of CONFIG_COMPAT_VDSO to n --
users configuring kernels from scratch almost certainly want that
choice.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Link: http://lkml.kernel.org/r/4bb4690899106eb11430b1186d5cc66ca9d1660c.1394751608.git.luto@amacapital.net
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Documentation/kernel-parameters.txt
arch/x86/Kconfig
arch/x86/include/asm/elf.h
arch/x86/include/asm/fixmap.h
arch/x86/include/asm/vdso.h
arch/x86/vdso/vdso-layout.lds.S
arch/x86/vdso/vdso32-setup.c
arch/x86/vdso/vdso32/vdso32.lds.S

index 7116fda..8601975 100644 (file)
@@ -3409,14 +3409,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                                        of CONFIG_HIGHPTE.
 
        vdso=           [X86,SH]
-                       vdso=2: enable compat VDSO (default with COMPAT_VDSO)
-                       vdso=1: enable VDSO (default)
+                       On X86_32, this is an alias for vdso32=.  Otherwise:
+
+                       vdso=1: enable VDSO (the default)
                        vdso=0: disable VDSO mapping
 
-       vdso32=         [X86]
-                       vdso32=2: enable compat VDSO (default with COMPAT_VDSO)
-                       vdso32=1: enable 32-bit VDSO (default)
-                       vdso32=0: disable 32-bit VDSO mapping
+       vdso32=         [X86] Control the 32-bit vDSO
+                       vdso32=1: enable 32-bit VDSO
+                       vdso32=0 or vdso32=2: disable 32-bit VDSO
+
+                       See the help text for CONFIG_COMPAT_VDSO for more
+                       details.  If CONFIG_COMPAT_VDSO is set, the default is
+                       vdso32=0; otherwise, the default is vdso32=1.
+
+                       For compatibility with older kernels, vdso32=2 is an
+                       alias for vdso32=0.
+
+                       Try vdso32=0 if you encounter an error that says:
+                       dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
 
        vector=         [IA-64,SMP]
                        vector=percpu: enable percpu vector domain
index 0af5250..9122f6b 100644 (file)
@@ -1836,17 +1836,29 @@ config DEBUG_HOTPLUG_CPU0
          If unsure, say N.
 
 config COMPAT_VDSO
-       def_bool y
-       prompt "Compat VDSO support"
+       def_bool n
+       prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)"
        depends on X86_32 || IA32_EMULATION
        ---help---
-         Map the 32-bit VDSO to the predictable old-style address too.
+         Certain buggy versions of glibc will crash if they are
+         presented with a 32-bit vDSO that is not mapped at the address
+         indicated in its segment table.
 
-         Say N here if you are running a sufficiently recent glibc
-         version (2.3.3 or later), to remove the high-mapped
-         VDSO mapping and to exclusively use the randomized VDSO.
+         The bug was introduced by f866314b89d56845f55e6f365e18b31ec978ec3a
+         and fixed by 3b3ddb4f7db98ec9e912ccdf54d35df4aa30e04a and
+         49ad572a70b8aeb91e57483a11dd1b77e31c4468.  Glibc 2.3.3 is
+         the only released version with the bug, but OpenSUSE 9
+         contains a buggy "glibc 2.3.2".
 
-         If unsure, say Y.
+         The symptom of the bug is that everything crashes on startup, saying:
+         dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
+
+         Saying Y here changes the default value of the vdso32 boot
+         option from 1 to 0, which turns off the 32-bit vDSO entirely.
+         This works around the glibc bug but hurts performance.
+
+         If unsure, say N: if you are compiling your own kernel, you
+         are unlikely to be using a buggy version of glibc.
 
 config CMDLINE_BOOL
        bool "Built-in kernel command line"
index 9c999c1..2c71182 100644 (file)
@@ -281,16 +281,12 @@ do {                                                                      \
 
 #define STACK_RND_MASK (0x7ff)
 
-#define VDSO_HIGH_BASE         (__fix_to_virt(FIX_VDSO))
-
 #define ARCH_DLINFO            ARCH_DLINFO_IA32(vdso_enabled)
 
 /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */
 
 #else /* CONFIG_X86_32 */
 
-#define VDSO_HIGH_BASE         0xffffe000U /* CONFIG_COMPAT_VDSO address */
-
 /* 1GB for 64bit, 8MB for 32bit */
 #define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
 
index 7252cd3..2377f56 100644 (file)
  */
 extern unsigned long __FIXADDR_TOP;
 #define FIXADDR_TOP    ((unsigned long)__FIXADDR_TOP)
-
-#define FIXADDR_USER_START     __fix_to_virt(FIX_VDSO)
-#define FIXADDR_USER_END       __fix_to_virt(FIX_VDSO - 1)
 #else
 #define FIXADDR_TOP    (VSYSCALL_END-PAGE_SIZE)
-
-/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */
-#define FIXADDR_USER_START     ((unsigned long)VSYSCALL32_VSYSCALL)
-#define FIXADDR_USER_END       (FIXADDR_USER_START + PAGE_SIZE)
 #endif
 
 
@@ -74,7 +67,6 @@ extern unsigned long __FIXADDR_TOP;
 enum fixed_addresses {
 #ifdef CONFIG_X86_32
        FIX_HOLE,
-       FIX_VDSO,
 #else
        VSYSCALL_LAST_PAGE,
        VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
index fddb53d..5594e84 100644 (file)
@@ -2,8 +2,6 @@
 #define _ASM_X86_VDSO_H
 
 #if defined CONFIG_X86_32 || defined CONFIG_COMPAT
-extern const char VDSO32_PRELINK[];
-
 /*
  * Given a pointer to the vDSO image, find the pointer to VDSO32_name
  * as that symbol is defined in the vDSO sources or linker script.
@@ -11,8 +9,7 @@ extern const char VDSO32_PRELINK[];
 #define VDSO32_SYMBOL(base, name)                                      \
 ({                                                                     \
        extern const char VDSO32_##name[];                              \
-       (void __user *)(VDSO32_##name - VDSO32_PRELINK +                \
-                       (unsigned long)(base));                         \
+       (void __user *)(VDSO32_##name + (unsigned long)(base));         \
 })
 #endif
 
index 634a2cf..8c550c1 100644 (file)
@@ -6,7 +6,7 @@
 
 SECTIONS
 {
-       . = VDSO_PRELINK + SIZEOF_HEADERS;
+       . = SIZEOF_HEADERS;
 
        .hash           : { *(.hash) }                  :text
        .gnu.hash       : { *(.gnu.hash) }
index d6bfb87..ab20c04 100644 (file)
 #include <asm/vdso.h>
 #include <asm/proto.h>
 
-enum {
-       VDSO_DISABLED = 0,
-       VDSO_ENABLED = 1,
-       VDSO_COMPAT = 2,
-};
-
 #ifdef CONFIG_COMPAT_VDSO
-#define VDSO_DEFAULT   VDSO_COMPAT
+#define VDSO_DEFAULT   0
 #else
-#define VDSO_DEFAULT   VDSO_ENABLED
+#define VDSO_DEFAULT   1
 #endif
 
 #ifdef CONFIG_X86_64
@@ -43,13 +37,6 @@ enum {
 #define arch_setup_additional_pages    syscall32_setup_pages
 #endif
 
-/*
- * This is the difference between the prelinked addresses in the vDSO images
- * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO
- * in the user address space.
- */
-#define VDSO_ADDR_ADJUST       (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK)
-
 /*
  * Should the kernel map a VDSO page into processes and pass its
  * address down to glibc upon exec()?
@@ -60,6 +47,9 @@ static int __init vdso_setup(char *s)
 {
        vdso_enabled = simple_strtoul(s, NULL, 0);
 
+       if (vdso_enabled > 1)
+               pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n");
+
        return 1;
 }
 
@@ -76,123 +66,6 @@ __setup_param("vdso=", vdso32_setup, vdso_setup, 0);
 EXPORT_SYMBOL_GPL(vdso_enabled);
 #endif
 
-static __init void reloc_symtab(Elf32_Ehdr *ehdr,
-                               unsigned offset, unsigned size)
-{
-       Elf32_Sym *sym = (void *)ehdr + offset;
-       unsigned nsym = size / sizeof(*sym);
-       unsigned i;
-
-       for(i = 0; i < nsym; i++, sym++) {
-               if (sym->st_shndx == SHN_UNDEF ||
-                   sym->st_shndx == SHN_ABS)
-                       continue;  /* skip */
-
-               if (sym->st_shndx > SHN_LORESERVE) {
-                       printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
-                              sym->st_shndx);
-                       continue;
-               }
-
-               switch(ELF_ST_TYPE(sym->st_info)) {
-               case STT_OBJECT:
-               case STT_FUNC:
-               case STT_SECTION:
-               case STT_FILE:
-                       sym->st_value += VDSO_ADDR_ADJUST;
-               }
-       }
-}
-
-static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
-{
-       Elf32_Dyn *dyn = (void *)ehdr + offset;
-
-       for(; dyn->d_tag != DT_NULL; dyn++)
-               switch(dyn->d_tag) {
-               case DT_PLTGOT:
-               case DT_HASH:
-               case DT_STRTAB:
-               case DT_SYMTAB:
-               case DT_RELA:
-               case DT_INIT:
-               case DT_FINI:
-               case DT_REL:
-               case DT_DEBUG:
-               case DT_JMPREL:
-               case DT_VERSYM:
-               case DT_VERDEF:
-               case DT_VERNEED:
-               case DT_ADDRRNGLO ... DT_ADDRRNGHI:
-                       /* definitely pointers needing relocation */
-                       dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
-                       break;
-
-               case DT_ENCODING ... OLD_DT_LOOS-1:
-               case DT_LOOS ... DT_HIOS-1:
-                       /* Tags above DT_ENCODING are pointers if
-                          they're even */
-                       if (dyn->d_tag >= DT_ENCODING &&
-                           (dyn->d_tag & 1) == 0)
-                               dyn->d_un.d_ptr += VDSO_ADDR_ADJUST;
-                       break;
-
-               case DT_VERDEFNUM:
-               case DT_VERNEEDNUM:
-               case DT_FLAGS_1:
-               case DT_RELACOUNT:
-               case DT_RELCOUNT:
-               case DT_VALRNGLO ... DT_VALRNGHI:
-                       /* definitely not pointers */
-                       break;
-
-               case OLD_DT_LOOS ... DT_LOOS-1:
-               case DT_HIOS ... DT_VALRNGLO-1:
-               default:
-                       if (dyn->d_tag > DT_ENCODING)
-                               printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
-                                      dyn->d_tag);
-                       break;
-               }
-}
-
-static __init void relocate_vdso(Elf32_Ehdr *ehdr)
-{
-       Elf32_Phdr *phdr;
-       Elf32_Shdr *shdr;
-       int i;
-
-       BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
-              !elf_check_arch_ia32(ehdr) ||
-              ehdr->e_type != ET_DYN);
-
-       ehdr->e_entry += VDSO_ADDR_ADJUST;
-
-       /* rebase phdrs */
-       phdr = (void *)ehdr + ehdr->e_phoff;
-       for (i = 0; i < ehdr->e_phnum; i++) {
-               phdr[i].p_vaddr += VDSO_ADDR_ADJUST;
-
-               /* relocate dynamic stuff */
-               if (phdr[i].p_type == PT_DYNAMIC)
-                       reloc_dyn(ehdr, phdr[i].p_offset);
-       }
-
-       /* rebase sections */
-       shdr = (void *)ehdr + ehdr->e_shoff;
-       for(i = 0; i < ehdr->e_shnum; i++) {
-               if (!(shdr[i].sh_flags & SHF_ALLOC))
-                       continue;
-
-               shdr[i].sh_addr += VDSO_ADDR_ADJUST;
-
-               if (shdr[i].sh_type == SHT_SYMTAB ||
-                   shdr[i].sh_type == SHT_DYNSYM)
-                       reloc_symtab(ehdr, shdr[i].sh_offset,
-                                    shdr[i].sh_size);
-       }
-}
-
 static struct page *vdso32_pages[1];
 
 #ifdef CONFIG_X86_64
@@ -212,12 +85,6 @@ void syscall32_cpu_init(void)
        wrmsrl(MSR_CSTAR, ia32_cstar_target);
 }
 
-#define compat_uses_vma                1
-
-static inline void map_compat_vdso(int map)
-{
-}
-
 #else  /* CONFIG_X86_32 */
 
 #define vdso32_sysenter()      (boot_cpu_has(X86_FEATURE_SEP))
@@ -241,37 +108,6 @@ void enable_sep_cpu(void)
        put_cpu();      
 }
 
-static struct vm_area_struct gate_vma;
-
-static int __init gate_vma_init(void)
-{
-       gate_vma.vm_mm = NULL;
-       gate_vma.vm_start = FIXADDR_USER_START;
-       gate_vma.vm_end = FIXADDR_USER_END;
-       gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
-       gate_vma.vm_page_prot = __P101;
-
-       return 0;
-}
-
-#define compat_uses_vma                0
-
-static void map_compat_vdso(int map)
-{
-       static int vdso_mapped;
-
-       if (map == vdso_mapped)
-               return;
-
-       vdso_mapped = map;
-
-       __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT,
-                    map ? PAGE_READONLY_EXEC : PAGE_NONE);
-
-       /* flush stray tlbs */
-       flush_tlb_all();
-}
-
 #endif /* CONFIG_X86_64 */
 
 int __init sysenter_setup(void)
@@ -282,10 +118,6 @@ int __init sysenter_setup(void)
 
        vdso32_pages[0] = virt_to_page(syscall_page);
 
-#ifdef CONFIG_X86_32
-       gate_vma_init();
-#endif
-
        if (vdso32_syscall()) {
                vsyscall = &vdso32_syscall_start;
                vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start;
@@ -298,7 +130,6 @@ int __init sysenter_setup(void)
        }
 
        memcpy(syscall_page, vsyscall, vsyscall_len);
-       relocate_vdso(syscall_page);
 
        return 0;
 }
@@ -309,48 +140,35 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
        struct mm_struct *mm = current->mm;
        unsigned long addr;
        int ret = 0;
-       bool compat;
 
 #ifdef CONFIG_X86_X32_ABI
        if (test_thread_flag(TIF_X32))
                return x32_setup_additional_pages(bprm, uses_interp);
 #endif
 
-       if (vdso_enabled == VDSO_DISABLED)
+       if (vdso_enabled != 1)  /* Other values all mean "disabled" */
                return 0;
 
        down_write(&mm->mmap_sem);
 
-       /* Test compat mode once here, in case someone
-          changes it via sysctl */
-       compat = (vdso_enabled == VDSO_COMPAT);
-
-       map_compat_vdso(compat);
-
-       if (compat)
-               addr = VDSO_HIGH_BASE;
-       else {
-               addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
-               if (IS_ERR_VALUE(addr)) {
-                       ret = addr;
-                       goto up_fail;
-               }
+       addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+       if (IS_ERR_VALUE(addr)) {
+               ret = addr;
+               goto up_fail;
        }
 
        current->mm->context.vdso = (void *)addr;
 
-       if (compat_uses_vma || !compat) {
-               /*
-                * MAYWRITE to allow gdb to COW and set breakpoints
-                */
-               ret = install_special_mapping(mm, addr, PAGE_SIZE,
-                                             VM_READ|VM_EXEC|
-                                             VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-                                             vdso32_pages);
-
-               if (ret)
-                       goto up_fail;
-       }
+       /*
+        * MAYWRITE to allow gdb to COW and set breakpoints
+        */
+       ret = install_special_mapping(mm, addr, PAGE_SIZE,
+                                     VM_READ|VM_EXEC|
+                                     VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+                                     vdso32_pages);
+
+       if (ret)
+               goto up_fail;
 
        current_thread_info()->sysenter_return =
                VDSO32_SYMBOL(addr, SYSENTER_RETURN);
@@ -411,20 +229,12 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 
 struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
 {
-       /*
-        * Check to see if the corresponding task was created in compat vdso
-        * mode.
-        */
-       if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
-               return &gate_vma;
        return NULL;
 }
 
 int in_gate_area(struct mm_struct *mm, unsigned long addr)
 {
-       const struct vm_area_struct *vma = get_gate_vma(mm);
-
-       return vma && addr >= vma->vm_start && addr < vma->vm_end;
+       return 0;
 }
 
 int in_gate_area_no_mm(unsigned long addr)
index 976124b..90e7aa9 100644 (file)
@@ -8,7 +8,6 @@
  * values visible using the asm-x86/vdso.h macros from the kernel proper.
  */
 
-#define VDSO_PRELINK 0
 #include "../vdso-layout.lds.S"
 
 /* The ELF entry point can be used to set the AT_SYSINFO value.  */
@@ -31,7 +30,6 @@ VERSION
 /*
  * Symbols we define here called VDSO* get their values into vdso32-syms.h.
  */
-VDSO32_PRELINK         = VDSO_PRELINK;
 VDSO32_vsyscall                = __kernel_vsyscall;
 VDSO32_sigreturn       = __kernel_sigreturn;
 VDSO32_rt_sigreturn    = __kernel_rt_sigreturn;