Merge branch 'linux-next' of git://git.infradead.org/~dedekind/ubi-2.6
[pandora-kernel.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/stat.h>
16 #include <linux/time.h>
17 #include <linux/mm.h>
18 #include <linux/mman.h>
19 #include <linux/errno.h>
20 #include <linux/signal.h>
21 #include <linux/binfmts.h>
22 #include <linux/string.h>
23 #include <linux/file.h>
24 #include <linux/fcntl.h>
25 #include <linux/ptrace.h>
26 #include <linux/slab.h>
27 #include <linux/shm.h>
28 #include <linux/personality.h>
29 #include <linux/elfcore.h>
30 #include <linux/init.h>
31 #include <linux/highuid.h>
32 #include <linux/smp.h>
33 #include <linux/compiler.h>
34 #include <linux/highmem.h>
35 #include <linux/pagemap.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/random.h>
39 #include <linux/elf.h>
40 #include <linux/utsname.h>
41 #include <asm/uaccess.h>
42 #include <asm/param.h>
43 #include <asm/page.h>
44
45 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
46 static int load_elf_library(struct file *);
47 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
48                                 int, int, unsigned long);
49
50 /*
51  * If we don't support core dumping, then supply a NULL so we
52  * don't even try.
53  */
54 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
55 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
56 #else
57 #define elf_core_dump   NULL
58 #endif
59
60 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
61 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
62 #else
63 #define ELF_MIN_ALIGN   PAGE_SIZE
64 #endif
65
66 #ifndef ELF_CORE_EFLAGS
67 #define ELF_CORE_EFLAGS 0
68 #endif
69
70 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
71 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
72 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
73
74 static struct linux_binfmt elf_format = {
75                 .module         = THIS_MODULE,
76                 .load_binary    = load_elf_binary,
77                 .load_shlib     = load_elf_library,
78                 .core_dump      = elf_core_dump,
79                 .min_coredump   = ELF_EXEC_PAGESIZE,
80                 .hasvdso        = 1
81 };
82
83 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
84
85 static int set_brk(unsigned long start, unsigned long end)
86 {
87         start = ELF_PAGEALIGN(start);
88         end = ELF_PAGEALIGN(end);
89         if (end > start) {
90                 unsigned long addr;
91                 down_write(&current->mm->mmap_sem);
92                 addr = do_brk(start, end - start);
93                 up_write(&current->mm->mmap_sem);
94                 if (BAD_ADDR(addr))
95                         return addr;
96         }
97         current->mm->start_brk = current->mm->brk = end;
98         return 0;
99 }
100
101 /* We need to explicitly zero any fractional pages
102    after the data section (i.e. bss).  This would
103    contain the junk from the file that should not
104    be in memory
105  */
106 static int padzero(unsigned long elf_bss)
107 {
108         unsigned long nbyte;
109
110         nbyte = ELF_PAGEOFFSET(elf_bss);
111         if (nbyte) {
112                 nbyte = ELF_MIN_ALIGN - nbyte;
113                 if (clear_user((void __user *) elf_bss, nbyte))
114                         return -EFAULT;
115         }
116         return 0;
117 }
118
119 /* Let's use some macros to make this stack manipulation a little clearer */
120 #ifdef CONFIG_STACK_GROWSUP
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
122 #define STACK_ROUND(sp, items) \
123         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ \
125         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
126         old_sp; })
127 #else
128 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
129 #define STACK_ROUND(sp, items) \
130         (((unsigned long) (sp - items)) &~ 15UL)
131 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132 #endif
133
134 static int
135 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136                 unsigned long load_addr, unsigned long interp_load_addr)
137 {
138         unsigned long p = bprm->p;
139         int argc = bprm->argc;
140         int envc = bprm->envc;
141         elf_addr_t __user *argv;
142         elf_addr_t __user *envp;
143         elf_addr_t __user *sp;
144         elf_addr_t __user *u_platform;
145         const char *k_platform = ELF_PLATFORM;
146         int items;
147         elf_addr_t *elf_info;
148         int ei_index = 0;
149         struct task_struct *tsk = current;
150         struct vm_area_struct *vma;
151
152         /*
153          * In some cases (e.g. Hyper-Threading), we want to avoid L1
154          * evictions by the processes running on the same package. One
155          * thing we can do is to shuffle the initial stack for them.
156          */
157
158         p = arch_align_stack(p);
159
160         /*
161          * If this architecture has a platform capability string, copy it
162          * to userspace.  In some cases (Sparc), this info is impossible
163          * for userspace to get any other way, in others (i386) it is
164          * merely difficult.
165          */
166         u_platform = NULL;
167         if (k_platform) {
168                 size_t len = strlen(k_platform) + 1;
169
170                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
171                 if (__copy_to_user(u_platform, k_platform, len))
172                         return -EFAULT;
173         }
174
175         /* Create the ELF interpreter info */
176         elf_info = (elf_addr_t *)current->mm->saved_auxv;
177         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
178 #define NEW_AUX_ENT(id, val) \
179         do { \
180                 elf_info[ei_index++] = id; \
181                 elf_info[ei_index++] = val; \
182         } while (0)
183
184 #ifdef ARCH_DLINFO
185         /* 
186          * ARCH_DLINFO must come first so PPC can do its special alignment of
187          * AUXV.
188          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
189          * ARCH_DLINFO changes
190          */
191         ARCH_DLINFO;
192 #endif
193         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
194         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
195         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
196         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
197         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
198         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
199         NEW_AUX_ENT(AT_BASE, interp_load_addr);
200         NEW_AUX_ENT(AT_FLAGS, 0);
201         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
202         NEW_AUX_ENT(AT_UID, tsk->uid);
203         NEW_AUX_ENT(AT_EUID, tsk->euid);
204         NEW_AUX_ENT(AT_GID, tsk->gid);
205         NEW_AUX_ENT(AT_EGID, tsk->egid);
206         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
207         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
208         if (k_platform) {
209                 NEW_AUX_ENT(AT_PLATFORM,
210                             (elf_addr_t)(unsigned long)u_platform);
211         }
212         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
213                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
214         }
215 #undef NEW_AUX_ENT
216         /* AT_NULL is zero; clear the rest too */
217         memset(&elf_info[ei_index], 0,
218                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
219
220         /* And advance past the AT_NULL entry.  */
221         ei_index += 2;
222
223         sp = STACK_ADD(p, ei_index);
224
225         items = (argc + 1) + (envc + 1) + 1;
226         bprm->p = STACK_ROUND(sp, items);
227
228         /* Point sp at the lowest address on the stack */
229 #ifdef CONFIG_STACK_GROWSUP
230         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
231         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
232 #else
233         sp = (elf_addr_t __user *)bprm->p;
234 #endif
235
236
237         /*
238          * Grow the stack manually; some architectures have a limit on how
239          * far ahead a user-space access may be in order to grow the stack.
240          */
241         vma = find_extend_vma(current->mm, bprm->p);
242         if (!vma)
243                 return -EFAULT;
244
245         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
246         if (__put_user(argc, sp++))
247                 return -EFAULT;
248         argv = sp;
249         envp = argv + argc + 1;
250
251         /* Populate argv and envp */
252         p = current->mm->arg_end = current->mm->arg_start;
253         while (argc-- > 0) {
254                 size_t len;
255                 if (__put_user((elf_addr_t)p, argv++))
256                         return -EFAULT;
257                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
258                 if (!len || len > MAX_ARG_STRLEN)
259                         return -EINVAL;
260                 p += len;
261         }
262         if (__put_user(0, argv))
263                 return -EFAULT;
264         current->mm->arg_end = current->mm->env_start = p;
265         while (envc-- > 0) {
266                 size_t len;
267                 if (__put_user((elf_addr_t)p, envp++))
268                         return -EFAULT;
269                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
270                 if (!len || len > MAX_ARG_STRLEN)
271                         return -EINVAL;
272                 p += len;
273         }
274         if (__put_user(0, envp))
275                 return -EFAULT;
276         current->mm->env_end = p;
277
278         /* Put the elf_info on the stack in the right place.  */
279         sp = (elf_addr_t __user *)envp + 1;
280         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
281                 return -EFAULT;
282         return 0;
283 }
284
285 #ifndef elf_map
286
287 static unsigned long elf_map(struct file *filep, unsigned long addr,
288                 struct elf_phdr *eppnt, int prot, int type,
289                 unsigned long total_size)
290 {
291         unsigned long map_addr;
292         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
293         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
294         addr = ELF_PAGESTART(addr);
295         size = ELF_PAGEALIGN(size);
296
297         /* mmap() will return -EINVAL if given a zero size, but a
298          * segment with zero filesize is perfectly valid */
299         if (!size)
300                 return addr;
301
302         down_write(&current->mm->mmap_sem);
303         /*
304         * total_size is the size of the ELF (interpreter) image.
305         * The _first_ mmap needs to know the full size, otherwise
306         * randomization might put this image into an overlapping
307         * position with the ELF binary image. (since size < total_size)
308         * So we first map the 'big' image - and unmap the remainder at
309         * the end. (which unmap is needed for ELF images with holes.)
310         */
311         if (total_size) {
312                 total_size = ELF_PAGEALIGN(total_size);
313                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
314                 if (!BAD_ADDR(map_addr))
315                         do_munmap(current->mm, map_addr+size, total_size-size);
316         } else
317                 map_addr = do_mmap(filep, addr, size, prot, type, off);
318
319         up_write(&current->mm->mmap_sem);
320         return(map_addr);
321 }
322
323 #endif /* !elf_map */
324
325 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
326 {
327         int i, first_idx = -1, last_idx = -1;
328
329         for (i = 0; i < nr; i++) {
330                 if (cmds[i].p_type == PT_LOAD) {
331                         last_idx = i;
332                         if (first_idx == -1)
333                                 first_idx = i;
334                 }
335         }
336         if (first_idx == -1)
337                 return 0;
338
339         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
340                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
341 }
342
343
344 /* This is much more generalized than the library routine read function,
345    so we keep this separate.  Technically the library read function
346    is only provided so that we can read a.out libraries that have
347    an ELF header */
348
349 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
350                 struct file *interpreter, unsigned long *interp_map_addr,
351                 unsigned long no_base)
352 {
353         struct elf_phdr *elf_phdata;
354         struct elf_phdr *eppnt;
355         unsigned long load_addr = 0;
356         int load_addr_set = 0;
357         unsigned long last_bss = 0, elf_bss = 0;
358         unsigned long error = ~0UL;
359         unsigned long total_size;
360         int retval, i, size;
361
362         /* First of all, some simple consistency checks */
363         if (interp_elf_ex->e_type != ET_EXEC &&
364             interp_elf_ex->e_type != ET_DYN)
365                 goto out;
366         if (!elf_check_arch(interp_elf_ex))
367                 goto out;
368         if (!interpreter->f_op || !interpreter->f_op->mmap)
369                 goto out;
370
371         /*
372          * If the size of this structure has changed, then punt, since
373          * we will be doing the wrong thing.
374          */
375         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
376                 goto out;
377         if (interp_elf_ex->e_phnum < 1 ||
378                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
379                 goto out;
380
381         /* Now read in all of the header information */
382         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
383         if (size > ELF_MIN_ALIGN)
384                 goto out;
385         elf_phdata = kmalloc(size, GFP_KERNEL);
386         if (!elf_phdata)
387                 goto out;
388
389         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
390                              (char *)elf_phdata,size);
391         error = -EIO;
392         if (retval != size) {
393                 if (retval < 0)
394                         error = retval; 
395                 goto out_close;
396         }
397
398         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
399         if (!total_size) {
400                 error = -EINVAL;
401                 goto out_close;
402         }
403
404         eppnt = elf_phdata;
405         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
406                 if (eppnt->p_type == PT_LOAD) {
407                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
408                         int elf_prot = 0;
409                         unsigned long vaddr = 0;
410                         unsigned long k, map_addr;
411
412                         if (eppnt->p_flags & PF_R)
413                                 elf_prot = PROT_READ;
414                         if (eppnt->p_flags & PF_W)
415                                 elf_prot |= PROT_WRITE;
416                         if (eppnt->p_flags & PF_X)
417                                 elf_prot |= PROT_EXEC;
418                         vaddr = eppnt->p_vaddr;
419                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
420                                 elf_type |= MAP_FIXED;
421                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
422                                 load_addr = -vaddr;
423
424                         map_addr = elf_map(interpreter, load_addr + vaddr,
425                                         eppnt, elf_prot, elf_type, total_size);
426                         total_size = 0;
427                         if (!*interp_map_addr)
428                                 *interp_map_addr = map_addr;
429                         error = map_addr;
430                         if (BAD_ADDR(map_addr))
431                                 goto out_close;
432
433                         if (!load_addr_set &&
434                             interp_elf_ex->e_type == ET_DYN) {
435                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
436                                 load_addr_set = 1;
437                         }
438
439                         /*
440                          * Check to see if the section's size will overflow the
441                          * allowed task size. Note that p_filesz must always be
442                          * <= p_memsize so it's only necessary to check p_memsz.
443                          */
444                         k = load_addr + eppnt->p_vaddr;
445                         if (BAD_ADDR(k) ||
446                             eppnt->p_filesz > eppnt->p_memsz ||
447                             eppnt->p_memsz > TASK_SIZE ||
448                             TASK_SIZE - eppnt->p_memsz < k) {
449                                 error = -ENOMEM;
450                                 goto out_close;
451                         }
452
453                         /*
454                          * Find the end of the file mapping for this phdr, and
455                          * keep track of the largest address we see for this.
456                          */
457                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
458                         if (k > elf_bss)
459                                 elf_bss = k;
460
461                         /*
462                          * Do the same thing for the memory mapping - between
463                          * elf_bss and last_bss is the bss section.
464                          */
465                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
466                         if (k > last_bss)
467                                 last_bss = k;
468                 }
469         }
470
471         /*
472          * Now fill out the bss section.  First pad the last page up
473          * to the page boundary, and then perform a mmap to make sure
474          * that there are zero-mapped pages up to and including the 
475          * last bss page.
476          */
477         if (padzero(elf_bss)) {
478                 error = -EFAULT;
479                 goto out_close;
480         }
481
482         /* What we have mapped so far */
483         elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
484
485         /* Map the last of the bss segment */
486         if (last_bss > elf_bss) {
487                 down_write(&current->mm->mmap_sem);
488                 error = do_brk(elf_bss, last_bss - elf_bss);
489                 up_write(&current->mm->mmap_sem);
490                 if (BAD_ADDR(error))
491                         goto out_close;
492         }
493
494         error = load_addr;
495
496 out_close:
497         kfree(elf_phdata);
498 out:
499         return error;
500 }
501
502 /*
503  * These are the functions used to load ELF style executables and shared
504  * libraries.  There is no binary dependent code anywhere else.
505  */
506
507 #define INTERPRETER_NONE 0
508 #define INTERPRETER_ELF 2
509
510 #ifndef STACK_RND_MASK
511 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
512 #endif
513
514 static unsigned long randomize_stack_top(unsigned long stack_top)
515 {
516         unsigned int random_variable = 0;
517
518         if ((current->flags & PF_RANDOMIZE) &&
519                 !(current->personality & ADDR_NO_RANDOMIZE)) {
520                 random_variable = get_random_int() & STACK_RND_MASK;
521                 random_variable <<= PAGE_SHIFT;
522         }
523 #ifdef CONFIG_STACK_GROWSUP
524         return PAGE_ALIGN(stack_top) + random_variable;
525 #else
526         return PAGE_ALIGN(stack_top) - random_variable;
527 #endif
528 }
529
530 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
531 {
532         struct file *interpreter = NULL; /* to shut gcc up */
533         unsigned long load_addr = 0, load_bias = 0;
534         int load_addr_set = 0;
535         char * elf_interpreter = NULL;
536         unsigned long error;
537         struct elf_phdr *elf_ppnt, *elf_phdata;
538         unsigned long elf_bss, elf_brk;
539         int elf_exec_fileno;
540         int retval, i;
541         unsigned int size;
542         unsigned long elf_entry;
543         unsigned long interp_load_addr = 0;
544         unsigned long start_code, end_code, start_data, end_data;
545         unsigned long reloc_func_desc = 0;
546         int executable_stack = EXSTACK_DEFAULT;
547         unsigned long def_flags = 0;
548         struct {
549                 struct elfhdr elf_ex;
550                 struct elfhdr interp_elf_ex;
551         } *loc;
552
553         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
554         if (!loc) {
555                 retval = -ENOMEM;
556                 goto out_ret;
557         }
558         
559         /* Get the exec-header */
560         loc->elf_ex = *((struct elfhdr *)bprm->buf);
561
562         retval = -ENOEXEC;
563         /* First of all, some simple consistency checks */
564         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
565                 goto out;
566
567         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
568                 goto out;
569         if (!elf_check_arch(&loc->elf_ex))
570                 goto out;
571         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
572                 goto out;
573
574         /* Now read in all of the header information */
575         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
576                 goto out;
577         if (loc->elf_ex.e_phnum < 1 ||
578                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
579                 goto out;
580         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
581         retval = -ENOMEM;
582         elf_phdata = kmalloc(size, GFP_KERNEL);
583         if (!elf_phdata)
584                 goto out;
585
586         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
587                              (char *)elf_phdata, size);
588         if (retval != size) {
589                 if (retval >= 0)
590                         retval = -EIO;
591                 goto out_free_ph;
592         }
593
594         retval = get_unused_fd();
595         if (retval < 0)
596                 goto out_free_ph;
597         get_file(bprm->file);
598         fd_install(elf_exec_fileno = retval, bprm->file);
599
600         elf_ppnt = elf_phdata;
601         elf_bss = 0;
602         elf_brk = 0;
603
604         start_code = ~0UL;
605         end_code = 0;
606         start_data = 0;
607         end_data = 0;
608
609         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
610                 if (elf_ppnt->p_type == PT_INTERP) {
611                         /* This is the program interpreter used for
612                          * shared libraries - for now assume that this
613                          * is an a.out format binary
614                          */
615                         retval = -ENOEXEC;
616                         if (elf_ppnt->p_filesz > PATH_MAX || 
617                             elf_ppnt->p_filesz < 2)
618                                 goto out_free_file;
619
620                         retval = -ENOMEM;
621                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
622                                                   GFP_KERNEL);
623                         if (!elf_interpreter)
624                                 goto out_free_file;
625
626                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
627                                              elf_interpreter,
628                                              elf_ppnt->p_filesz);
629                         if (retval != elf_ppnt->p_filesz) {
630                                 if (retval >= 0)
631                                         retval = -EIO;
632                                 goto out_free_interp;
633                         }
634                         /* make sure path is NULL terminated */
635                         retval = -ENOEXEC;
636                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
637                                 goto out_free_interp;
638
639                         /*
640                          * The early SET_PERSONALITY here is so that the lookup
641                          * for the interpreter happens in the namespace of the 
642                          * to-be-execed image.  SET_PERSONALITY can select an
643                          * alternate root.
644                          *
645                          * However, SET_PERSONALITY is NOT allowed to switch
646                          * this task into the new images's memory mapping
647                          * policy - that is, TASK_SIZE must still evaluate to
648                          * that which is appropriate to the execing application.
649                          * This is because exit_mmap() needs to have TASK_SIZE
650                          * evaluate to the size of the old image.
651                          *
652                          * So if (say) a 64-bit application is execing a 32-bit
653                          * application it is the architecture's responsibility
654                          * to defer changing the value of TASK_SIZE until the
655                          * switch really is going to happen - do this in
656                          * flush_thread().      - akpm
657                          */
658                         SET_PERSONALITY(loc->elf_ex, 0);
659
660                         interpreter = open_exec(elf_interpreter);
661                         retval = PTR_ERR(interpreter);
662                         if (IS_ERR(interpreter))
663                                 goto out_free_interp;
664
665                         /*
666                          * If the binary is not readable then enforce
667                          * mm->dumpable = 0 regardless of the interpreter's
668                          * permissions.
669                          */
670                         if (file_permission(interpreter, MAY_READ) < 0)
671                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
672
673                         retval = kernel_read(interpreter, 0, bprm->buf,
674                                              BINPRM_BUF_SIZE);
675                         if (retval != BINPRM_BUF_SIZE) {
676                                 if (retval >= 0)
677                                         retval = -EIO;
678                                 goto out_free_dentry;
679                         }
680
681                         /* Get the exec headers */
682                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
683                         break;
684                 }
685                 elf_ppnt++;
686         }
687
688         elf_ppnt = elf_phdata;
689         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
690                 if (elf_ppnt->p_type == PT_GNU_STACK) {
691                         if (elf_ppnt->p_flags & PF_X)
692                                 executable_stack = EXSTACK_ENABLE_X;
693                         else
694                                 executable_stack = EXSTACK_DISABLE_X;
695                         break;
696                 }
697
698         /* Some simple consistency checks for the interpreter */
699         if (elf_interpreter) {
700                 retval = -ELIBBAD;
701                 /* Not an ELF interpreter */
702                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
703                         goto out_free_dentry;
704                 /* Verify the interpreter has a valid arch */
705                 if (!elf_check_arch(&loc->interp_elf_ex))
706                         goto out_free_dentry;
707         } else {
708                 /* Executables without an interpreter also need a personality  */
709                 SET_PERSONALITY(loc->elf_ex, 0);
710         }
711
712         /* Flush all traces of the currently running executable */
713         retval = flush_old_exec(bprm);
714         if (retval)
715                 goto out_free_dentry;
716
717         /* OK, This is the point of no return */
718         current->flags &= ~PF_FORKNOEXEC;
719         current->mm->def_flags = def_flags;
720
721         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
722            may depend on the personality.  */
723         SET_PERSONALITY(loc->elf_ex, 0);
724         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
725                 current->personality |= READ_IMPLIES_EXEC;
726
727         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
728                 current->flags |= PF_RANDOMIZE;
729         arch_pick_mmap_layout(current->mm);
730
731         /* Do this so that we can load the interpreter, if need be.  We will
732            change some of these later */
733         current->mm->free_area_cache = current->mm->mmap_base;
734         current->mm->cached_hole_size = 0;
735         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
736                                  executable_stack);
737         if (retval < 0) {
738                 send_sig(SIGKILL, current, 0);
739                 goto out_free_dentry;
740         }
741         
742         current->mm->start_stack = bprm->p;
743
744         /* Now we do a little grungy work by mmaping the ELF image into
745            the correct location in memory. */
746         for(i = 0, elf_ppnt = elf_phdata;
747             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
748                 int elf_prot = 0, elf_flags;
749                 unsigned long k, vaddr;
750
751                 if (elf_ppnt->p_type != PT_LOAD)
752                         continue;
753
754                 if (unlikely (elf_brk > elf_bss)) {
755                         unsigned long nbyte;
756                     
757                         /* There was a PT_LOAD segment with p_memsz > p_filesz
758                            before this one. Map anonymous pages, if needed,
759                            and clear the area.  */
760                         retval = set_brk (elf_bss + load_bias,
761                                           elf_brk + load_bias);
762                         if (retval) {
763                                 send_sig(SIGKILL, current, 0);
764                                 goto out_free_dentry;
765                         }
766                         nbyte = ELF_PAGEOFFSET(elf_bss);
767                         if (nbyte) {
768                                 nbyte = ELF_MIN_ALIGN - nbyte;
769                                 if (nbyte > elf_brk - elf_bss)
770                                         nbyte = elf_brk - elf_bss;
771                                 if (clear_user((void __user *)elf_bss +
772                                                         load_bias, nbyte)) {
773                                         /*
774                                          * This bss-zeroing can fail if the ELF
775                                          * file specifies odd protections. So
776                                          * we don't check the return value
777                                          */
778                                 }
779                         }
780                 }
781
782                 if (elf_ppnt->p_flags & PF_R)
783                         elf_prot |= PROT_READ;
784                 if (elf_ppnt->p_flags & PF_W)
785                         elf_prot |= PROT_WRITE;
786                 if (elf_ppnt->p_flags & PF_X)
787                         elf_prot |= PROT_EXEC;
788
789                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
790
791                 vaddr = elf_ppnt->p_vaddr;
792                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
793                         elf_flags |= MAP_FIXED;
794                 } else if (loc->elf_ex.e_type == ET_DYN) {
795                         /* Try and get dynamic programs out of the way of the
796                          * default mmap base, as well as whatever program they
797                          * might try to exec.  This is because the brk will
798                          * follow the loader, and is not movable.  */
799 #ifdef CONFIG_X86
800                         load_bias = 0;
801 #else
802                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
803 #endif
804                 }
805
806                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
807                                 elf_prot, elf_flags, 0);
808                 if (BAD_ADDR(error)) {
809                         send_sig(SIGKILL, current, 0);
810                         retval = IS_ERR((void *)error) ?
811                                 PTR_ERR((void*)error) : -EINVAL;
812                         goto out_free_dentry;
813                 }
814
815                 if (!load_addr_set) {
816                         load_addr_set = 1;
817                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
818                         if (loc->elf_ex.e_type == ET_DYN) {
819                                 load_bias += error -
820                                              ELF_PAGESTART(load_bias + vaddr);
821                                 load_addr += load_bias;
822                                 reloc_func_desc = load_bias;
823                         }
824                 }
825                 k = elf_ppnt->p_vaddr;
826                 if (k < start_code)
827                         start_code = k;
828                 if (start_data < k)
829                         start_data = k;
830
831                 /*
832                  * Check to see if the section's size will overflow the
833                  * allowed task size. Note that p_filesz must always be
834                  * <= p_memsz so it is only necessary to check p_memsz.
835                  */
836                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
837                     elf_ppnt->p_memsz > TASK_SIZE ||
838                     TASK_SIZE - elf_ppnt->p_memsz < k) {
839                         /* set_brk can never work. Avoid overflows. */
840                         send_sig(SIGKILL, current, 0);
841                         retval = -EINVAL;
842                         goto out_free_dentry;
843                 }
844
845                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
846
847                 if (k > elf_bss)
848                         elf_bss = k;
849                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
850                         end_code = k;
851                 if (end_data < k)
852                         end_data = k;
853                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
854                 if (k > elf_brk)
855                         elf_brk = k;
856         }
857
858         loc->elf_ex.e_entry += load_bias;
859         elf_bss += load_bias;
860         elf_brk += load_bias;
861         start_code += load_bias;
862         end_code += load_bias;
863         start_data += load_bias;
864         end_data += load_bias;
865
866         /* Calling set_brk effectively mmaps the pages that we need
867          * for the bss and break sections.  We must do this before
868          * mapping in the interpreter, to make sure it doesn't wind
869          * up getting placed where the bss needs to go.
870          */
871         retval = set_brk(elf_bss, elf_brk);
872         if (retval) {
873                 send_sig(SIGKILL, current, 0);
874                 goto out_free_dentry;
875         }
876         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
877                 send_sig(SIGSEGV, current, 0);
878                 retval = -EFAULT; /* Nobody gets to see this, but.. */
879                 goto out_free_dentry;
880         }
881
882         if (elf_interpreter) {
883                 unsigned long uninitialized_var(interp_map_addr);
884
885                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
886                                             interpreter,
887                                             &interp_map_addr,
888                                             load_bias);
889                 if (!IS_ERR((void *)elf_entry)) {
890                         /*
891                          * load_elf_interp() returns relocation
892                          * adjustment
893                          */
894                         interp_load_addr = elf_entry;
895                         elf_entry += loc->interp_elf_ex.e_entry;
896                 }
897                 if (BAD_ADDR(elf_entry)) {
898                         force_sig(SIGSEGV, current);
899                         retval = IS_ERR((void *)elf_entry) ?
900                                         (int)elf_entry : -EINVAL;
901                         goto out_free_dentry;
902                 }
903                 reloc_func_desc = interp_load_addr;
904
905                 allow_write_access(interpreter);
906                 fput(interpreter);
907                 kfree(elf_interpreter);
908         } else {
909                 elf_entry = loc->elf_ex.e_entry;
910                 if (BAD_ADDR(elf_entry)) {
911                         force_sig(SIGSEGV, current);
912                         retval = -EINVAL;
913                         goto out_free_dentry;
914                 }
915         }
916
917         kfree(elf_phdata);
918
919         sys_close(elf_exec_fileno);
920
921         set_binfmt(&elf_format);
922
923 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
924         retval = arch_setup_additional_pages(bprm, executable_stack);
925         if (retval < 0) {
926                 send_sig(SIGKILL, current, 0);
927                 goto out;
928         }
929 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
930
931         compute_creds(bprm);
932         current->flags &= ~PF_FORKNOEXEC;
933         retval = create_elf_tables(bprm, &loc->elf_ex,
934                           load_addr, interp_load_addr);
935         if (retval < 0) {
936                 send_sig(SIGKILL, current, 0);
937                 goto out;
938         }
939         /* N.B. passed_fileno might not be initialized? */
940         current->mm->end_code = end_code;
941         current->mm->start_code = start_code;
942         current->mm->start_data = start_data;
943         current->mm->end_data = end_data;
944         current->mm->start_stack = bprm->p;
945
946 #ifdef arch_randomize_brk
947         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
948                 current->mm->brk = current->mm->start_brk =
949                         arch_randomize_brk(current->mm);
950 #endif
951
952         if (current->personality & MMAP_PAGE_ZERO) {
953                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
954                    and some applications "depend" upon this behavior.
955                    Since we do not have the power to recompile these, we
956                    emulate the SVr4 behavior. Sigh. */
957                 down_write(&current->mm->mmap_sem);
958                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
959                                 MAP_FIXED | MAP_PRIVATE, 0);
960                 up_write(&current->mm->mmap_sem);
961         }
962
963 #ifdef ELF_PLAT_INIT
964         /*
965          * The ABI may specify that certain registers be set up in special
966          * ways (on i386 %edx is the address of a DT_FINI function, for
967          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
968          * that the e_entry field is the address of the function descriptor
969          * for the startup routine, rather than the address of the startup
970          * routine itself.  This macro performs whatever initialization to
971          * the regs structure is required as well as any relocations to the
972          * function descriptor entries when executing dynamically links apps.
973          */
974         ELF_PLAT_INIT(regs, reloc_func_desc);
975 #endif
976
977         start_thread(regs, elf_entry, bprm->p);
978         if (unlikely(current->ptrace & PT_PTRACED)) {
979                 if (current->ptrace & PT_TRACE_EXEC)
980                         ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
981                 else
982                         send_sig(SIGTRAP, current, 0);
983         }
984         retval = 0;
985 out:
986         kfree(loc);
987 out_ret:
988         return retval;
989
990         /* error cleanup */
991 out_free_dentry:
992         allow_write_access(interpreter);
993         if (interpreter)
994                 fput(interpreter);
995 out_free_interp:
996         kfree(elf_interpreter);
997 out_free_file:
998         sys_close(elf_exec_fileno);
999 out_free_ph:
1000         kfree(elf_phdata);
1001         goto out;
1002 }
1003
1004 /* This is really simpleminded and specialized - we are loading an
1005    a.out library that is given an ELF header. */
1006 static int load_elf_library(struct file *file)
1007 {
1008         struct elf_phdr *elf_phdata;
1009         struct elf_phdr *eppnt;
1010         unsigned long elf_bss, bss, len;
1011         int retval, error, i, j;
1012         struct elfhdr elf_ex;
1013
1014         error = -ENOEXEC;
1015         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1016         if (retval != sizeof(elf_ex))
1017                 goto out;
1018
1019         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1020                 goto out;
1021
1022         /* First of all, some simple consistency checks */
1023         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1024             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1025                 goto out;
1026
1027         /* Now read in all of the header information */
1028
1029         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1030         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1031
1032         error = -ENOMEM;
1033         elf_phdata = kmalloc(j, GFP_KERNEL);
1034         if (!elf_phdata)
1035                 goto out;
1036
1037         eppnt = elf_phdata;
1038         error = -ENOEXEC;
1039         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1040         if (retval != j)
1041                 goto out_free_ph;
1042
1043         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1044                 if ((eppnt + i)->p_type == PT_LOAD)
1045                         j++;
1046         if (j != 1)
1047                 goto out_free_ph;
1048
1049         while (eppnt->p_type != PT_LOAD)
1050                 eppnt++;
1051
1052         /* Now use mmap to map the library into memory. */
1053         down_write(&current->mm->mmap_sem);
1054         error = do_mmap(file,
1055                         ELF_PAGESTART(eppnt->p_vaddr),
1056                         (eppnt->p_filesz +
1057                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1058                         PROT_READ | PROT_WRITE | PROT_EXEC,
1059                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1060                         (eppnt->p_offset -
1061                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1062         up_write(&current->mm->mmap_sem);
1063         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1064                 goto out_free_ph;
1065
1066         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1067         if (padzero(elf_bss)) {
1068                 error = -EFAULT;
1069                 goto out_free_ph;
1070         }
1071
1072         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1073                             ELF_MIN_ALIGN - 1);
1074         bss = eppnt->p_memsz + eppnt->p_vaddr;
1075         if (bss > len) {
1076                 down_write(&current->mm->mmap_sem);
1077                 do_brk(len, bss - len);
1078                 up_write(&current->mm->mmap_sem);
1079         }
1080         error = 0;
1081
1082 out_free_ph:
1083         kfree(elf_phdata);
1084 out:
1085         return error;
1086 }
1087
1088 /*
1089  * Note that some platforms still use traditional core dumps and not
1090  * the ELF core dump.  Each platform can select it as appropriate.
1091  */
1092 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1093
1094 /*
1095  * ELF core dumper
1096  *
1097  * Modelled on fs/exec.c:aout_core_dump()
1098  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1099  */
1100 /*
1101  * These are the only things you should do on a core-file: use only these
1102  * functions to write out all the necessary info.
1103  */
1104 static int dump_write(struct file *file, const void *addr, int nr)
1105 {
1106         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1107 }
1108
1109 static int dump_seek(struct file *file, loff_t off)
1110 {
1111         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1112                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1113                         return 0;
1114         } else {
1115                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1116                 if (!buf)
1117                         return 0;
1118                 while (off > 0) {
1119                         unsigned long n = off;
1120                         if (n > PAGE_SIZE)
1121                                 n = PAGE_SIZE;
1122                         if (!dump_write(file, buf, n))
1123                                 return 0;
1124                         off -= n;
1125                 }
1126                 free_page((unsigned long)buf);
1127         }
1128         return 1;
1129 }
1130
1131 /*
1132  * Decide what to dump of a segment, part, all or none.
1133  */
1134 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1135                                    unsigned long mm_flags)
1136 {
1137         /* The vma can be set up to tell us the answer directly.  */
1138         if (vma->vm_flags & VM_ALWAYSDUMP)
1139                 goto whole;
1140
1141         /* Do not dump I/O mapped devices or special mappings */
1142         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1143                 return 0;
1144
1145 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1146
1147         /* By default, dump shared memory if mapped from an anonymous file. */
1148         if (vma->vm_flags & VM_SHARED) {
1149                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1150                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1151                         goto whole;
1152                 return 0;
1153         }
1154
1155         /* Dump segments that have been written to.  */
1156         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1157                 goto whole;
1158         if (vma->vm_file == NULL)
1159                 return 0;
1160
1161         if (FILTER(MAPPED_PRIVATE))
1162                 goto whole;
1163
1164         /*
1165          * If this looks like the beginning of a DSO or executable mapping,
1166          * check for an ELF header.  If we find one, dump the first page to
1167          * aid in determining what was mapped here.
1168          */
1169         if (FILTER(ELF_HEADERS) && vma->vm_file != NULL && vma->vm_pgoff == 0) {
1170                 u32 __user *header = (u32 __user *) vma->vm_start;
1171                 u32 word;
1172                 /*
1173                  * Doing it this way gets the constant folded by GCC.
1174                  */
1175                 union {
1176                         u32 cmp;
1177                         char elfmag[SELFMAG];
1178                 } magic;
1179                 BUILD_BUG_ON(SELFMAG != sizeof word);
1180                 magic.elfmag[EI_MAG0] = ELFMAG0;
1181                 magic.elfmag[EI_MAG1] = ELFMAG1;
1182                 magic.elfmag[EI_MAG2] = ELFMAG2;
1183                 magic.elfmag[EI_MAG3] = ELFMAG3;
1184                 if (get_user(word, header) == 0 && word == magic.cmp)
1185                         return PAGE_SIZE;
1186         }
1187
1188 #undef  FILTER
1189
1190         return 0;
1191
1192 whole:
1193         return vma->vm_end - vma->vm_start;
1194 }
1195
1196 /* An ELF note in memory */
1197 struct memelfnote
1198 {
1199         const char *name;
1200         int type;
1201         unsigned int datasz;
1202         void *data;
1203 };
1204
1205 static int notesize(struct memelfnote *en)
1206 {
1207         int sz;
1208
1209         sz = sizeof(struct elf_note);
1210         sz += roundup(strlen(en->name) + 1, 4);
1211         sz += roundup(en->datasz, 4);
1212
1213         return sz;
1214 }
1215
1216 #define DUMP_WRITE(addr, nr, foffset)   \
1217         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1218
1219 static int alignfile(struct file *file, loff_t *foffset)
1220 {
1221         static const char buf[4] = { 0, };
1222         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1223         return 1;
1224 }
1225
1226 static int writenote(struct memelfnote *men, struct file *file,
1227                         loff_t *foffset)
1228 {
1229         struct elf_note en;
1230         en.n_namesz = strlen(men->name) + 1;
1231         en.n_descsz = men->datasz;
1232         en.n_type = men->type;
1233
1234         DUMP_WRITE(&en, sizeof(en), foffset);
1235         DUMP_WRITE(men->name, en.n_namesz, foffset);
1236         if (!alignfile(file, foffset))
1237                 return 0;
1238         DUMP_WRITE(men->data, men->datasz, foffset);
1239         if (!alignfile(file, foffset))
1240                 return 0;
1241
1242         return 1;
1243 }
1244 #undef DUMP_WRITE
1245
1246 #define DUMP_WRITE(addr, nr)    \
1247         if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1248                 goto end_coredump;
1249 #define DUMP_SEEK(off)  \
1250         if (!dump_seek(file, (off))) \
1251                 goto end_coredump;
1252
1253 static void fill_elf_header(struct elfhdr *elf, int segs,
1254                             u16 machine, u32 flags, u8 osabi)
1255 {
1256         memset(elf, 0, sizeof(*elf));
1257
1258         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1259         elf->e_ident[EI_CLASS] = ELF_CLASS;
1260         elf->e_ident[EI_DATA] = ELF_DATA;
1261         elf->e_ident[EI_VERSION] = EV_CURRENT;
1262         elf->e_ident[EI_OSABI] = ELF_OSABI;
1263
1264         elf->e_type = ET_CORE;
1265         elf->e_machine = machine;
1266         elf->e_version = EV_CURRENT;
1267         elf->e_phoff = sizeof(struct elfhdr);
1268         elf->e_flags = flags;
1269         elf->e_ehsize = sizeof(struct elfhdr);
1270         elf->e_phentsize = sizeof(struct elf_phdr);
1271         elf->e_phnum = segs;
1272
1273         return;
1274 }
1275
1276 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1277 {
1278         phdr->p_type = PT_NOTE;
1279         phdr->p_offset = offset;
1280         phdr->p_vaddr = 0;
1281         phdr->p_paddr = 0;
1282         phdr->p_filesz = sz;
1283         phdr->p_memsz = 0;
1284         phdr->p_flags = 0;
1285         phdr->p_align = 0;
1286         return;
1287 }
1288
1289 static void fill_note(struct memelfnote *note, const char *name, int type, 
1290                 unsigned int sz, void *data)
1291 {
1292         note->name = name;
1293         note->type = type;
1294         note->datasz = sz;
1295         note->data = data;
1296         return;
1297 }
1298
1299 /*
1300  * fill up all the fields in prstatus from the given task struct, except
1301  * registers which need to be filled up separately.
1302  */
1303 static void fill_prstatus(struct elf_prstatus *prstatus,
1304                 struct task_struct *p, long signr)
1305 {
1306         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1307         prstatus->pr_sigpend = p->pending.signal.sig[0];
1308         prstatus->pr_sighold = p->blocked.sig[0];
1309         prstatus->pr_pid = task_pid_vnr(p);
1310         prstatus->pr_ppid = task_pid_vnr(p->real_parent);
1311         prstatus->pr_pgrp = task_pgrp_vnr(p);
1312         prstatus->pr_sid = task_session_vnr(p);
1313         if (thread_group_leader(p)) {
1314                 /*
1315                  * This is the record for the group leader.  Add in the
1316                  * cumulative times of previous dead threads.  This total
1317                  * won't include the time of each live thread whose state
1318                  * is included in the core dump.  The final total reported
1319                  * to our parent process when it calls wait4 will include
1320                  * those sums as well as the little bit more time it takes
1321                  * this and each other thread to finish dying after the
1322                  * core dump synchronization phase.
1323                  */
1324                 cputime_to_timeval(cputime_add(p->utime, p->signal->utime),
1325                                    &prstatus->pr_utime);
1326                 cputime_to_timeval(cputime_add(p->stime, p->signal->stime),
1327                                    &prstatus->pr_stime);
1328         } else {
1329                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1330                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1331         }
1332         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1333         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1334 }
1335
1336 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1337                        struct mm_struct *mm)
1338 {
1339         unsigned int i, len;
1340         
1341         /* first copy the parameters from user space */
1342         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1343
1344         len = mm->arg_end - mm->arg_start;
1345         if (len >= ELF_PRARGSZ)
1346                 len = ELF_PRARGSZ-1;
1347         if (copy_from_user(&psinfo->pr_psargs,
1348                            (const char __user *)mm->arg_start, len))
1349                 return -EFAULT;
1350         for(i = 0; i < len; i++)
1351                 if (psinfo->pr_psargs[i] == 0)
1352                         psinfo->pr_psargs[i] = ' ';
1353         psinfo->pr_psargs[len] = 0;
1354
1355         psinfo->pr_pid = task_pid_vnr(p);
1356         psinfo->pr_ppid = task_pid_vnr(p->real_parent);
1357         psinfo->pr_pgrp = task_pgrp_vnr(p);
1358         psinfo->pr_sid = task_session_vnr(p);
1359
1360         i = p->state ? ffz(~p->state) + 1 : 0;
1361         psinfo->pr_state = i;
1362         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1363         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1364         psinfo->pr_nice = task_nice(p);
1365         psinfo->pr_flag = p->flags;
1366         SET_UID(psinfo->pr_uid, p->uid);
1367         SET_GID(psinfo->pr_gid, p->gid);
1368         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1369         
1370         return 0;
1371 }
1372
1373 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1374 {
1375         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1376         int i = 0;
1377         do
1378                 i += 2;
1379         while (auxv[i - 2] != AT_NULL);
1380         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1381 }
1382
1383 #ifdef CORE_DUMP_USE_REGSET
1384 #include <linux/regset.h>
1385
1386 struct elf_thread_core_info {
1387         struct elf_thread_core_info *next;
1388         struct task_struct *task;
1389         struct elf_prstatus prstatus;
1390         struct memelfnote notes[0];
1391 };
1392
1393 struct elf_note_info {
1394         struct elf_thread_core_info *thread;
1395         struct memelfnote psinfo;
1396         struct memelfnote auxv;
1397         size_t size;
1398         int thread_notes;
1399 };
1400
1401 /*
1402  * When a regset has a writeback hook, we call it on each thread before
1403  * dumping user memory.  On register window machines, this makes sure the
1404  * user memory backing the register data is up to date before we read it.
1405  */
1406 static void do_thread_regset_writeback(struct task_struct *task,
1407                                        const struct user_regset *regset)
1408 {
1409         if (regset->writeback)
1410                 regset->writeback(task, regset, 1);
1411 }
1412
1413 static int fill_thread_core_info(struct elf_thread_core_info *t,
1414                                  const struct user_regset_view *view,
1415                                  long signr, size_t *total)
1416 {
1417         unsigned int i;
1418
1419         /*
1420          * NT_PRSTATUS is the one special case, because the regset data
1421          * goes into the pr_reg field inside the note contents, rather
1422          * than being the whole note contents.  We fill the reset in here.
1423          * We assume that regset 0 is NT_PRSTATUS.
1424          */
1425         fill_prstatus(&t->prstatus, t->task, signr);
1426         (void) view->regsets[0].get(t->task, &view->regsets[0],
1427                                     0, sizeof(t->prstatus.pr_reg),
1428                                     &t->prstatus.pr_reg, NULL);
1429
1430         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1431                   sizeof(t->prstatus), &t->prstatus);
1432         *total += notesize(&t->notes[0]);
1433
1434         do_thread_regset_writeback(t->task, &view->regsets[0]);
1435
1436         /*
1437          * Each other regset might generate a note too.  For each regset
1438          * that has no core_note_type or is inactive, we leave t->notes[i]
1439          * all zero and we'll know to skip writing it later.
1440          */
1441         for (i = 1; i < view->n; ++i) {
1442                 const struct user_regset *regset = &view->regsets[i];
1443                 do_thread_regset_writeback(t->task, regset);
1444                 if (regset->core_note_type &&
1445                     (!regset->active || regset->active(t->task, regset))) {
1446                         int ret;
1447                         size_t size = regset->n * regset->size;
1448                         void *data = kmalloc(size, GFP_KERNEL);
1449                         if (unlikely(!data))
1450                                 return 0;
1451                         ret = regset->get(t->task, regset,
1452                                           0, size, data, NULL);
1453                         if (unlikely(ret))
1454                                 kfree(data);
1455                         else {
1456                                 if (regset->core_note_type != NT_PRFPREG)
1457                                         fill_note(&t->notes[i], "LINUX",
1458                                                   regset->core_note_type,
1459                                                   size, data);
1460                                 else {
1461                                         t->prstatus.pr_fpvalid = 1;
1462                                         fill_note(&t->notes[i], "CORE",
1463                                                   NT_PRFPREG, size, data);
1464                                 }
1465                                 *total += notesize(&t->notes[i]);
1466                         }
1467                 }
1468         }
1469
1470         return 1;
1471 }
1472
1473 static int fill_note_info(struct elfhdr *elf, int phdrs,
1474                           struct elf_note_info *info,
1475                           long signr, struct pt_regs *regs)
1476 {
1477         struct task_struct *dump_task = current;
1478         const struct user_regset_view *view = task_user_regset_view(dump_task);
1479         struct elf_thread_core_info *t;
1480         struct elf_prpsinfo *psinfo;
1481         struct core_thread *ct;
1482         unsigned int i;
1483
1484         info->size = 0;
1485         info->thread = NULL;
1486
1487         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1488         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1489
1490         if (psinfo == NULL)
1491                 return 0;
1492
1493         /*
1494          * Figure out how many notes we're going to need for each thread.
1495          */
1496         info->thread_notes = 0;
1497         for (i = 0; i < view->n; ++i)
1498                 if (view->regsets[i].core_note_type != 0)
1499                         ++info->thread_notes;
1500
1501         /*
1502          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1503          * since it is our one special case.
1504          */
1505         if (unlikely(info->thread_notes == 0) ||
1506             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1507                 WARN_ON(1);
1508                 return 0;
1509         }
1510
1511         /*
1512          * Initialize the ELF file header.
1513          */
1514         fill_elf_header(elf, phdrs,
1515                         view->e_machine, view->e_flags, view->ei_osabi);
1516
1517         /*
1518          * Allocate a structure for each thread.
1519          */
1520         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1521                 t = kzalloc(offsetof(struct elf_thread_core_info,
1522                                      notes[info->thread_notes]),
1523                             GFP_KERNEL);
1524                 if (unlikely(!t))
1525                         return 0;
1526
1527                 t->task = ct->task;
1528                 if (ct->task == dump_task || !info->thread) {
1529                         t->next = info->thread;
1530                         info->thread = t;
1531                 } else {
1532                         /*
1533                          * Make sure to keep the original task at
1534                          * the head of the list.
1535                          */
1536                         t->next = info->thread->next;
1537                         info->thread->next = t;
1538                 }
1539         }
1540
1541         /*
1542          * Now fill in each thread's information.
1543          */
1544         for (t = info->thread; t != NULL; t = t->next)
1545                 if (!fill_thread_core_info(t, view, signr, &info->size))
1546                         return 0;
1547
1548         /*
1549          * Fill in the two process-wide notes.
1550          */
1551         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1552         info->size += notesize(&info->psinfo);
1553
1554         fill_auxv_note(&info->auxv, current->mm);
1555         info->size += notesize(&info->auxv);
1556
1557         return 1;
1558 }
1559
1560 static size_t get_note_info_size(struct elf_note_info *info)
1561 {
1562         return info->size;
1563 }
1564
1565 /*
1566  * Write all the notes for each thread.  When writing the first thread, the
1567  * process-wide notes are interleaved after the first thread-specific note.
1568  */
1569 static int write_note_info(struct elf_note_info *info,
1570                            struct file *file, loff_t *foffset)
1571 {
1572         bool first = 1;
1573         struct elf_thread_core_info *t = info->thread;
1574
1575         do {
1576                 int i;
1577
1578                 if (!writenote(&t->notes[0], file, foffset))
1579                         return 0;
1580
1581                 if (first && !writenote(&info->psinfo, file, foffset))
1582                         return 0;
1583                 if (first && !writenote(&info->auxv, file, foffset))
1584                         return 0;
1585
1586                 for (i = 1; i < info->thread_notes; ++i)
1587                         if (t->notes[i].data &&
1588                             !writenote(&t->notes[i], file, foffset))
1589                                 return 0;
1590
1591                 first = 0;
1592                 t = t->next;
1593         } while (t);
1594
1595         return 1;
1596 }
1597
1598 static void free_note_info(struct elf_note_info *info)
1599 {
1600         struct elf_thread_core_info *threads = info->thread;
1601         while (threads) {
1602                 unsigned int i;
1603                 struct elf_thread_core_info *t = threads;
1604                 threads = t->next;
1605                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1606                 for (i = 1; i < info->thread_notes; ++i)
1607                         kfree(t->notes[i].data);
1608                 kfree(t);
1609         }
1610         kfree(info->psinfo.data);
1611 }
1612
1613 #else
1614
1615 /* Here is the structure in which status of each thread is captured. */
1616 struct elf_thread_status
1617 {
1618         struct list_head list;
1619         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1620         elf_fpregset_t fpu;             /* NT_PRFPREG */
1621         struct task_struct *thread;
1622 #ifdef ELF_CORE_COPY_XFPREGS
1623         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1624 #endif
1625         struct memelfnote notes[3];
1626         int num_notes;
1627 };
1628
1629 /*
1630  * In order to add the specific thread information for the elf file format,
1631  * we need to keep a linked list of every threads pr_status and then create
1632  * a single section for them in the final core file.
1633  */
1634 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1635 {
1636         int sz = 0;
1637         struct task_struct *p = t->thread;
1638         t->num_notes = 0;
1639
1640         fill_prstatus(&t->prstatus, p, signr);
1641         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1642         
1643         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1644                   &(t->prstatus));
1645         t->num_notes++;
1646         sz += notesize(&t->notes[0]);
1647
1648         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1649                                                                 &t->fpu))) {
1650                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1651                           &(t->fpu));
1652                 t->num_notes++;
1653                 sz += notesize(&t->notes[1]);
1654         }
1655
1656 #ifdef ELF_CORE_COPY_XFPREGS
1657         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1658                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1659                           sizeof(t->xfpu), &t->xfpu);
1660                 t->num_notes++;
1661                 sz += notesize(&t->notes[2]);
1662         }
1663 #endif  
1664         return sz;
1665 }
1666
1667 struct elf_note_info {
1668         struct memelfnote *notes;
1669         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1670         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1671         struct list_head thread_list;
1672         elf_fpregset_t *fpu;
1673 #ifdef ELF_CORE_COPY_XFPREGS
1674         elf_fpxregset_t *xfpu;
1675 #endif
1676         int thread_status_size;
1677         int numnote;
1678 };
1679
1680 static int fill_note_info(struct elfhdr *elf, int phdrs,
1681                           struct elf_note_info *info,
1682                           long signr, struct pt_regs *regs)
1683 {
1684 #define NUM_NOTES       6
1685         struct list_head *t;
1686
1687         info->notes = NULL;
1688         info->prstatus = NULL;
1689         info->psinfo = NULL;
1690         info->fpu = NULL;
1691 #ifdef ELF_CORE_COPY_XFPREGS
1692         info->xfpu = NULL;
1693 #endif
1694         INIT_LIST_HEAD(&info->thread_list);
1695
1696         info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1697                               GFP_KERNEL);
1698         if (!info->notes)
1699                 return 0;
1700         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1701         if (!info->psinfo)
1702                 return 0;
1703         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1704         if (!info->prstatus)
1705                 return 0;
1706         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1707         if (!info->fpu)
1708                 return 0;
1709 #ifdef ELF_CORE_COPY_XFPREGS
1710         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1711         if (!info->xfpu)
1712                 return 0;
1713 #endif
1714
1715         info->thread_status_size = 0;
1716         if (signr) {
1717                 struct core_thread *ct;
1718                 struct elf_thread_status *ets;
1719
1720                 for (ct = current->mm->core_state->dumper.next;
1721                                                 ct; ct = ct->next) {
1722                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1723                         if (!ets)
1724                                 return 0;
1725
1726                         ets->thread = ct->task;
1727                         list_add(&ets->list, &info->thread_list);
1728                 }
1729
1730                 list_for_each(t, &info->thread_list) {
1731                         int sz;
1732
1733                         ets = list_entry(t, struct elf_thread_status, list);
1734                         sz = elf_dump_thread_status(signr, ets);
1735                         info->thread_status_size += sz;
1736                 }
1737         }
1738         /* now collect the dump for the current */
1739         memset(info->prstatus, 0, sizeof(*info->prstatus));
1740         fill_prstatus(info->prstatus, current, signr);
1741         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1742
1743         /* Set up header */
1744         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1745
1746         /*
1747          * Set up the notes in similar form to SVR4 core dumps made
1748          * with info from their /proc.
1749          */
1750
1751         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1752                   sizeof(*info->prstatus), info->prstatus);
1753         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1754         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1755                   sizeof(*info->psinfo), info->psinfo);
1756
1757         info->numnote = 2;
1758
1759         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1760
1761         /* Try to dump the FPU. */
1762         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1763                                                                info->fpu);
1764         if (info->prstatus->pr_fpvalid)
1765                 fill_note(info->notes + info->numnote++,
1766                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1767 #ifdef ELF_CORE_COPY_XFPREGS
1768         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1769                 fill_note(info->notes + info->numnote++,
1770                           "LINUX", ELF_CORE_XFPREG_TYPE,
1771                           sizeof(*info->xfpu), info->xfpu);
1772 #endif
1773
1774         return 1;
1775
1776 #undef NUM_NOTES
1777 }
1778
1779 static size_t get_note_info_size(struct elf_note_info *info)
1780 {
1781         int sz = 0;
1782         int i;
1783
1784         for (i = 0; i < info->numnote; i++)
1785                 sz += notesize(info->notes + i);
1786
1787         sz += info->thread_status_size;
1788
1789         return sz;
1790 }
1791
1792 static int write_note_info(struct elf_note_info *info,
1793                            struct file *file, loff_t *foffset)
1794 {
1795         int i;
1796         struct list_head *t;
1797
1798         for (i = 0; i < info->numnote; i++)
1799                 if (!writenote(info->notes + i, file, foffset))
1800                         return 0;
1801
1802         /* write out the thread status notes section */
1803         list_for_each(t, &info->thread_list) {
1804                 struct elf_thread_status *tmp =
1805                                 list_entry(t, struct elf_thread_status, list);
1806
1807                 for (i = 0; i < tmp->num_notes; i++)
1808                         if (!writenote(&tmp->notes[i], file, foffset))
1809                                 return 0;
1810         }
1811
1812         return 1;
1813 }
1814
1815 static void free_note_info(struct elf_note_info *info)
1816 {
1817         while (!list_empty(&info->thread_list)) {
1818                 struct list_head *tmp = info->thread_list.next;
1819                 list_del(tmp);
1820                 kfree(list_entry(tmp, struct elf_thread_status, list));
1821         }
1822
1823         kfree(info->prstatus);
1824         kfree(info->psinfo);
1825         kfree(info->notes);
1826         kfree(info->fpu);
1827 #ifdef ELF_CORE_COPY_XFPREGS
1828         kfree(info->xfpu);
1829 #endif
1830 }
1831
1832 #endif
1833
1834 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1835                                         struct vm_area_struct *gate_vma)
1836 {
1837         struct vm_area_struct *ret = tsk->mm->mmap;
1838
1839         if (ret)
1840                 return ret;
1841         return gate_vma;
1842 }
1843 /*
1844  * Helper function for iterating across a vma list.  It ensures that the caller
1845  * will visit `gate_vma' prior to terminating the search.
1846  */
1847 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1848                                         struct vm_area_struct *gate_vma)
1849 {
1850         struct vm_area_struct *ret;
1851
1852         ret = this_vma->vm_next;
1853         if (ret)
1854                 return ret;
1855         if (this_vma == gate_vma)
1856                 return NULL;
1857         return gate_vma;
1858 }
1859
1860 /*
1861  * Actual dumper
1862  *
1863  * This is a two-pass process; first we find the offsets of the bits,
1864  * and then they are actually written out.  If we run out of core limit
1865  * we just truncate.
1866  */
1867 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1868 {
1869         int has_dumped = 0;
1870         mm_segment_t fs;
1871         int segs;
1872         size_t size = 0;
1873         struct vm_area_struct *vma, *gate_vma;
1874         struct elfhdr *elf = NULL;
1875         loff_t offset = 0, dataoff, foffset;
1876         unsigned long mm_flags;
1877         struct elf_note_info info;
1878
1879         /*
1880          * We no longer stop all VM operations.
1881          * 
1882          * This is because those proceses that could possibly change map_count
1883          * or the mmap / vma pages are now blocked in do_exit on current
1884          * finishing this core dump.
1885          *
1886          * Only ptrace can touch these memory addresses, but it doesn't change
1887          * the map_count or the pages allocated. So no possibility of crashing
1888          * exists while dumping the mm->vm_next areas to the core file.
1889          */
1890   
1891         /* alloc memory for large data structures: too large to be on stack */
1892         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1893         if (!elf)
1894                 goto out;
1895         
1896         segs = current->mm->map_count;
1897 #ifdef ELF_CORE_EXTRA_PHDRS
1898         segs += ELF_CORE_EXTRA_PHDRS;
1899 #endif
1900
1901         gate_vma = get_gate_vma(current);
1902         if (gate_vma != NULL)
1903                 segs++;
1904
1905         /*
1906          * Collect all the non-memory information about the process for the
1907          * notes.  This also sets up the file header.
1908          */
1909         if (!fill_note_info(elf, segs + 1, /* including notes section */
1910                             &info, signr, regs))
1911                 goto cleanup;
1912
1913         has_dumped = 1;
1914         current->flags |= PF_DUMPCORE;
1915   
1916         fs = get_fs();
1917         set_fs(KERNEL_DS);
1918
1919         DUMP_WRITE(elf, sizeof(*elf));
1920         offset += sizeof(*elf);                         /* Elf header */
1921         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1922         foffset = offset;
1923
1924         /* Write notes phdr entry */
1925         {
1926                 struct elf_phdr phdr;
1927                 size_t sz = get_note_info_size(&info);
1928
1929                 sz += elf_coredump_extra_notes_size();
1930
1931                 fill_elf_note_phdr(&phdr, sz, offset);
1932                 offset += sz;
1933                 DUMP_WRITE(&phdr, sizeof(phdr));
1934         }
1935
1936         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1937
1938         /*
1939          * We must use the same mm->flags while dumping core to avoid
1940          * inconsistency between the program headers and bodies, otherwise an
1941          * unusable core file can be generated.
1942          */
1943         mm_flags = current->mm->flags;
1944
1945         /* Write program headers for segments dump */
1946         for (vma = first_vma(current, gate_vma); vma != NULL;
1947                         vma = next_vma(vma, gate_vma)) {
1948                 struct elf_phdr phdr;
1949
1950                 phdr.p_type = PT_LOAD;
1951                 phdr.p_offset = offset;
1952                 phdr.p_vaddr = vma->vm_start;
1953                 phdr.p_paddr = 0;
1954                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
1955                 phdr.p_memsz = vma->vm_end - vma->vm_start;
1956                 offset += phdr.p_filesz;
1957                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1958                 if (vma->vm_flags & VM_WRITE)
1959                         phdr.p_flags |= PF_W;
1960                 if (vma->vm_flags & VM_EXEC)
1961                         phdr.p_flags |= PF_X;
1962                 phdr.p_align = ELF_EXEC_PAGESIZE;
1963
1964                 DUMP_WRITE(&phdr, sizeof(phdr));
1965         }
1966
1967 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
1968         ELF_CORE_WRITE_EXTRA_PHDRS;
1969 #endif
1970
1971         /* write out the notes section */
1972         if (!write_note_info(&info, file, &foffset))
1973                 goto end_coredump;
1974
1975         if (elf_coredump_extra_notes_write(file, &foffset))
1976                 goto end_coredump;
1977
1978         /* Align to page */
1979         DUMP_SEEK(dataoff - foffset);
1980
1981         for (vma = first_vma(current, gate_vma); vma != NULL;
1982                         vma = next_vma(vma, gate_vma)) {
1983                 unsigned long addr;
1984                 unsigned long end;
1985
1986                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
1987
1988                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
1989                         struct page *page;
1990                         struct vm_area_struct *tmp_vma;
1991
1992                         if (get_user_pages(current, current->mm, addr, 1, 0, 1,
1993                                                 &page, &tmp_vma) <= 0) {
1994                                 DUMP_SEEK(PAGE_SIZE);
1995                         } else {
1996                                 if (page == ZERO_PAGE(0)) {
1997                                         if (!dump_seek(file, PAGE_SIZE)) {
1998                                                 page_cache_release(page);
1999                                                 goto end_coredump;
2000                                         }
2001                                 } else {
2002                                         void *kaddr;
2003                                         flush_cache_page(tmp_vma, addr,
2004                                                          page_to_pfn(page));
2005                                         kaddr = kmap(page);
2006                                         if ((size += PAGE_SIZE) > limit ||
2007                                             !dump_write(file, kaddr,
2008                                             PAGE_SIZE)) {
2009                                                 kunmap(page);
2010                                                 page_cache_release(page);
2011                                                 goto end_coredump;
2012                                         }
2013                                         kunmap(page);
2014                                 }
2015                                 page_cache_release(page);
2016                         }
2017                 }
2018         }
2019
2020 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2021         ELF_CORE_WRITE_EXTRA_DATA;
2022 #endif
2023
2024 end_coredump:
2025         set_fs(fs);
2026
2027 cleanup:
2028         free_note_info(&info);
2029         kfree(elf);
2030 out:
2031         return has_dumped;
2032 }
2033
2034 #endif          /* USE_ELF_CORE_DUMP */
2035
2036 static int __init init_elf_binfmt(void)
2037 {
2038         return register_binfmt(&elf_format);
2039 }
2040
2041 static void __exit exit_elf_binfmt(void)
2042 {
2043         /* Remove the COFF and ELF loaders. */
2044         unregister_binfmt(&elf_format);
2045 }
2046
2047 core_initcall(init_elf_binfmt);
2048 module_exit(exit_elf_binfmt);
2049 MODULE_LICENSE("GPL");