Merge branch 'pcmcia' of git://git.linaro.org/people/rmk/linux-arm
[pandora-kernel.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38
39 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
40 static int load_elf_library(struct file *);
41 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
42                                 int, int, unsigned long);
43
44 /*
45  * If we don't support core dumping, then supply a NULL so we
46  * don't even try.
47  */
48 #ifdef CONFIG_ELF_CORE
49 static int elf_core_dump(struct coredump_params *cprm);
50 #else
51 #define elf_core_dump   NULL
52 #endif
53
54 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
55 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
56 #else
57 #define ELF_MIN_ALIGN   PAGE_SIZE
58 #endif
59
60 #ifndef ELF_CORE_EFLAGS
61 #define ELF_CORE_EFLAGS 0
62 #endif
63
64 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
65 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
66 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
67
68 static struct linux_binfmt elf_format = {
69         .module         = THIS_MODULE,
70         .load_binary    = load_elf_binary,
71         .load_shlib     = load_elf_library,
72         .core_dump      = elf_core_dump,
73         .min_coredump   = ELF_EXEC_PAGESIZE,
74 };
75
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
77
78 static int set_brk(unsigned long start, unsigned long end)
79 {
80         start = ELF_PAGEALIGN(start);
81         end = ELF_PAGEALIGN(end);
82         if (end > start) {
83                 unsigned long addr;
84                 down_write(&current->mm->mmap_sem);
85                 addr = do_brk(start, end - start);
86                 up_write(&current->mm->mmap_sem);
87                 if (BAD_ADDR(addr))
88                         return addr;
89         }
90         current->mm->start_brk = current->mm->brk = end;
91         return 0;
92 }
93
94 /* We need to explicitly zero any fractional pages
95    after the data section (i.e. bss).  This would
96    contain the junk from the file that should not
97    be in memory
98  */
99 static int padzero(unsigned long elf_bss)
100 {
101         unsigned long nbyte;
102
103         nbyte = ELF_PAGEOFFSET(elf_bss);
104         if (nbyte) {
105                 nbyte = ELF_MIN_ALIGN - nbyte;
106                 if (clear_user((void __user *) elf_bss, nbyte))
107                         return -EFAULT;
108         }
109         return 0;
110 }
111
112 /* Let's use some macros to make this stack manipulation a little clearer */
113 #ifdef CONFIG_STACK_GROWSUP
114 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
115 #define STACK_ROUND(sp, items) \
116         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
117 #define STACK_ALLOC(sp, len) ({ \
118         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
119         old_sp; })
120 #else
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
122 #define STACK_ROUND(sp, items) \
123         (((unsigned long) (sp - items)) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
125 #endif
126
127 #ifndef ELF_BASE_PLATFORM
128 /*
129  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
130  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
131  * will be copied to the user stack in the same manner as AT_PLATFORM.
132  */
133 #define ELF_BASE_PLATFORM NULL
134 #endif
135
136 static int
137 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
138                 unsigned long load_addr, unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         elf_addr_t __user *u_base_platform;
148         elf_addr_t __user *u_rand_bytes;
149         const char *k_platform = ELF_PLATFORM;
150         const char *k_base_platform = ELF_BASE_PLATFORM;
151         unsigned char k_rand_bytes[16];
152         int items;
153         elf_addr_t *elf_info;
154         int ei_index = 0;
155         const struct cred *cred = current_cred();
156         struct vm_area_struct *vma;
157
158         /*
159          * In some cases (e.g. Hyper-Threading), we want to avoid L1
160          * evictions by the processes running on the same package. One
161          * thing we can do is to shuffle the initial stack for them.
162          */
163
164         p = arch_align_stack(p);
165
166         /*
167          * If this architecture has a platform capability string, copy it
168          * to userspace.  In some cases (Sparc), this info is impossible
169          * for userspace to get any other way, in others (i386) it is
170          * merely difficult.
171          */
172         u_platform = NULL;
173         if (k_platform) {
174                 size_t len = strlen(k_platform) + 1;
175
176                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
177                 if (__copy_to_user(u_platform, k_platform, len))
178                         return -EFAULT;
179         }
180
181         /*
182          * If this architecture has a "base" platform capability
183          * string, copy it to userspace.
184          */
185         u_base_platform = NULL;
186         if (k_base_platform) {
187                 size_t len = strlen(k_base_platform) + 1;
188
189                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190                 if (__copy_to_user(u_base_platform, k_base_platform, len))
191                         return -EFAULT;
192         }
193
194         /*
195          * Generate 16 random bytes for userspace PRNG seeding.
196          */
197         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
198         u_rand_bytes = (elf_addr_t __user *)
199                        STACK_ALLOC(p, sizeof(k_rand_bytes));
200         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
201                 return -EFAULT;
202
203         /* Create the ELF interpreter info */
204         elf_info = (elf_addr_t *)current->mm->saved_auxv;
205         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
206 #define NEW_AUX_ENT(id, val) \
207         do { \
208                 elf_info[ei_index++] = id; \
209                 elf_info[ei_index++] = val; \
210         } while (0)
211
212 #ifdef ARCH_DLINFO
213         /* 
214          * ARCH_DLINFO must come first so PPC can do its special alignment of
215          * AUXV.
216          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
217          * ARCH_DLINFO changes
218          */
219         ARCH_DLINFO;
220 #endif
221         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
222         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
223         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
224         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
225         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
226         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
227         NEW_AUX_ENT(AT_BASE, interp_load_addr);
228         NEW_AUX_ENT(AT_FLAGS, 0);
229         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
230         NEW_AUX_ENT(AT_UID, cred->uid);
231         NEW_AUX_ENT(AT_EUID, cred->euid);
232         NEW_AUX_ENT(AT_GID, cred->gid);
233         NEW_AUX_ENT(AT_EGID, cred->egid);
234         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
235         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
236         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
237         if (k_platform) {
238                 NEW_AUX_ENT(AT_PLATFORM,
239                             (elf_addr_t)(unsigned long)u_platform);
240         }
241         if (k_base_platform) {
242                 NEW_AUX_ENT(AT_BASE_PLATFORM,
243                             (elf_addr_t)(unsigned long)u_base_platform);
244         }
245         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
246                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
247         }
248 #undef NEW_AUX_ENT
249         /* AT_NULL is zero; clear the rest too */
250         memset(&elf_info[ei_index], 0,
251                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
252
253         /* And advance past the AT_NULL entry.  */
254         ei_index += 2;
255
256         sp = STACK_ADD(p, ei_index);
257
258         items = (argc + 1) + (envc + 1) + 1;
259         bprm->p = STACK_ROUND(sp, items);
260
261         /* Point sp at the lowest address on the stack */
262 #ifdef CONFIG_STACK_GROWSUP
263         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
264         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
265 #else
266         sp = (elf_addr_t __user *)bprm->p;
267 #endif
268
269
270         /*
271          * Grow the stack manually; some architectures have a limit on how
272          * far ahead a user-space access may be in order to grow the stack.
273          */
274         vma = find_extend_vma(current->mm, bprm->p);
275         if (!vma)
276                 return -EFAULT;
277
278         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
279         if (__put_user(argc, sp++))
280                 return -EFAULT;
281         argv = sp;
282         envp = argv + argc + 1;
283
284         /* Populate argv and envp */
285         p = current->mm->arg_end = current->mm->arg_start;
286         while (argc-- > 0) {
287                 size_t len;
288                 if (__put_user((elf_addr_t)p, argv++))
289                         return -EFAULT;
290                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
291                 if (!len || len > MAX_ARG_STRLEN)
292                         return -EINVAL;
293                 p += len;
294         }
295         if (__put_user(0, argv))
296                 return -EFAULT;
297         current->mm->arg_end = current->mm->env_start = p;
298         while (envc-- > 0) {
299                 size_t len;
300                 if (__put_user((elf_addr_t)p, envp++))
301                         return -EFAULT;
302                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
303                 if (!len || len > MAX_ARG_STRLEN)
304                         return -EINVAL;
305                 p += len;
306         }
307         if (__put_user(0, envp))
308                 return -EFAULT;
309         current->mm->env_end = p;
310
311         /* Put the elf_info on the stack in the right place.  */
312         sp = (elf_addr_t __user *)envp + 1;
313         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
314                 return -EFAULT;
315         return 0;
316 }
317
318 static unsigned long elf_map(struct file *filep, unsigned long addr,
319                 struct elf_phdr *eppnt, int prot, int type,
320                 unsigned long total_size)
321 {
322         unsigned long map_addr;
323         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
324         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
325         addr = ELF_PAGESTART(addr);
326         size = ELF_PAGEALIGN(size);
327
328         /* mmap() will return -EINVAL if given a zero size, but a
329          * segment with zero filesize is perfectly valid */
330         if (!size)
331                 return addr;
332
333         down_write(&current->mm->mmap_sem);
334         /*
335         * total_size is the size of the ELF (interpreter) image.
336         * The _first_ mmap needs to know the full size, otherwise
337         * randomization might put this image into an overlapping
338         * position with the ELF binary image. (since size < total_size)
339         * So we first map the 'big' image - and unmap the remainder at
340         * the end. (which unmap is needed for ELF images with holes.)
341         */
342         if (total_size) {
343                 total_size = ELF_PAGEALIGN(total_size);
344                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
345                 if (!BAD_ADDR(map_addr))
346                         do_munmap(current->mm, map_addr+size, total_size-size);
347         } else
348                 map_addr = do_mmap(filep, addr, size, prot, type, off);
349
350         up_write(&current->mm->mmap_sem);
351         return(map_addr);
352 }
353
354 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
355 {
356         int i, first_idx = -1, last_idx = -1;
357
358         for (i = 0; i < nr; i++) {
359                 if (cmds[i].p_type == PT_LOAD) {
360                         last_idx = i;
361                         if (first_idx == -1)
362                                 first_idx = i;
363                 }
364         }
365         if (first_idx == -1)
366                 return 0;
367
368         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
369                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
370 }
371
372
373 /* This is much more generalized than the library routine read function,
374    so we keep this separate.  Technically the library read function
375    is only provided so that we can read a.out libraries that have
376    an ELF header */
377
378 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
379                 struct file *interpreter, unsigned long *interp_map_addr,
380                 unsigned long no_base)
381 {
382         struct elf_phdr *elf_phdata;
383         struct elf_phdr *eppnt;
384         unsigned long load_addr = 0;
385         int load_addr_set = 0;
386         unsigned long last_bss = 0, elf_bss = 0;
387         unsigned long error = ~0UL;
388         unsigned long total_size;
389         int retval, i, size;
390
391         /* First of all, some simple consistency checks */
392         if (interp_elf_ex->e_type != ET_EXEC &&
393             interp_elf_ex->e_type != ET_DYN)
394                 goto out;
395         if (!elf_check_arch(interp_elf_ex))
396                 goto out;
397         if (!interpreter->f_op || !interpreter->f_op->mmap)
398                 goto out;
399
400         /*
401          * If the size of this structure has changed, then punt, since
402          * we will be doing the wrong thing.
403          */
404         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
405                 goto out;
406         if (interp_elf_ex->e_phnum < 1 ||
407                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
408                 goto out;
409
410         /* Now read in all of the header information */
411         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
412         if (size > ELF_MIN_ALIGN)
413                 goto out;
414         elf_phdata = kmalloc(size, GFP_KERNEL);
415         if (!elf_phdata)
416                 goto out;
417
418         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
419                              (char *)elf_phdata, size);
420         error = -EIO;
421         if (retval != size) {
422                 if (retval < 0)
423                         error = retval; 
424                 goto out_close;
425         }
426
427         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
428         if (!total_size) {
429                 error = -EINVAL;
430                 goto out_close;
431         }
432
433         eppnt = elf_phdata;
434         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
435                 if (eppnt->p_type == PT_LOAD) {
436                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
437                         int elf_prot = 0;
438                         unsigned long vaddr = 0;
439                         unsigned long k, map_addr;
440
441                         if (eppnt->p_flags & PF_R)
442                                 elf_prot = PROT_READ;
443                         if (eppnt->p_flags & PF_W)
444                                 elf_prot |= PROT_WRITE;
445                         if (eppnt->p_flags & PF_X)
446                                 elf_prot |= PROT_EXEC;
447                         vaddr = eppnt->p_vaddr;
448                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
449                                 elf_type |= MAP_FIXED;
450                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
451                                 load_addr = -vaddr;
452
453                         map_addr = elf_map(interpreter, load_addr + vaddr,
454                                         eppnt, elf_prot, elf_type, total_size);
455                         total_size = 0;
456                         if (!*interp_map_addr)
457                                 *interp_map_addr = map_addr;
458                         error = map_addr;
459                         if (BAD_ADDR(map_addr))
460                                 goto out_close;
461
462                         if (!load_addr_set &&
463                             interp_elf_ex->e_type == ET_DYN) {
464                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
465                                 load_addr_set = 1;
466                         }
467
468                         /*
469                          * Check to see if the section's size will overflow the
470                          * allowed task size. Note that p_filesz must always be
471                          * <= p_memsize so it's only necessary to check p_memsz.
472                          */
473                         k = load_addr + eppnt->p_vaddr;
474                         if (BAD_ADDR(k) ||
475                             eppnt->p_filesz > eppnt->p_memsz ||
476                             eppnt->p_memsz > TASK_SIZE ||
477                             TASK_SIZE - eppnt->p_memsz < k) {
478                                 error = -ENOMEM;
479                                 goto out_close;
480                         }
481
482                         /*
483                          * Find the end of the file mapping for this phdr, and
484                          * keep track of the largest address we see for this.
485                          */
486                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
487                         if (k > elf_bss)
488                                 elf_bss = k;
489
490                         /*
491                          * Do the same thing for the memory mapping - between
492                          * elf_bss and last_bss is the bss section.
493                          */
494                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
495                         if (k > last_bss)
496                                 last_bss = k;
497                 }
498         }
499
500         if (last_bss > elf_bss) {
501                 /*
502                  * Now fill out the bss section.  First pad the last page up
503                  * to the page boundary, and then perform a mmap to make sure
504                  * that there are zero-mapped pages up to and including the
505                  * last bss page.
506                  */
507                 if (padzero(elf_bss)) {
508                         error = -EFAULT;
509                         goto out_close;
510                 }
511
512                 /* What we have mapped so far */
513                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
514
515                 /* Map the last of the bss segment */
516                 down_write(&current->mm->mmap_sem);
517                 error = do_brk(elf_bss, last_bss - elf_bss);
518                 up_write(&current->mm->mmap_sem);
519                 if (BAD_ADDR(error))
520                         goto out_close;
521         }
522
523         error = load_addr;
524
525 out_close:
526         kfree(elf_phdata);
527 out:
528         return error;
529 }
530
531 /*
532  * These are the functions used to load ELF style executables and shared
533  * libraries.  There is no binary dependent code anywhere else.
534  */
535
536 #define INTERPRETER_NONE 0
537 #define INTERPRETER_ELF 2
538
539 #ifndef STACK_RND_MASK
540 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
541 #endif
542
543 static unsigned long randomize_stack_top(unsigned long stack_top)
544 {
545         unsigned int random_variable = 0;
546
547         if ((current->flags & PF_RANDOMIZE) &&
548                 !(current->personality & ADDR_NO_RANDOMIZE)) {
549                 random_variable = get_random_int() & STACK_RND_MASK;
550                 random_variable <<= PAGE_SHIFT;
551         }
552 #ifdef CONFIG_STACK_GROWSUP
553         return PAGE_ALIGN(stack_top) + random_variable;
554 #else
555         return PAGE_ALIGN(stack_top) - random_variable;
556 #endif
557 }
558
559 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
560 {
561         struct file *interpreter = NULL; /* to shut gcc up */
562         unsigned long load_addr = 0, load_bias = 0;
563         int load_addr_set = 0;
564         char * elf_interpreter = NULL;
565         unsigned long error;
566         struct elf_phdr *elf_ppnt, *elf_phdata;
567         unsigned long elf_bss, elf_brk;
568         int retval, i;
569         unsigned int size;
570         unsigned long elf_entry;
571         unsigned long interp_load_addr = 0;
572         unsigned long start_code, end_code, start_data, end_data;
573         unsigned long reloc_func_desc __maybe_unused = 0;
574         int executable_stack = EXSTACK_DEFAULT;
575         unsigned long def_flags = 0;
576         struct {
577                 struct elfhdr elf_ex;
578                 struct elfhdr interp_elf_ex;
579         } *loc;
580
581         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
582         if (!loc) {
583                 retval = -ENOMEM;
584                 goto out_ret;
585         }
586         
587         /* Get the exec-header */
588         loc->elf_ex = *((struct elfhdr *)bprm->buf);
589
590         retval = -ENOEXEC;
591         /* First of all, some simple consistency checks */
592         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
593                 goto out;
594
595         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
596                 goto out;
597         if (!elf_check_arch(&loc->elf_ex))
598                 goto out;
599         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
600                 goto out;
601
602         /* Now read in all of the header information */
603         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
604                 goto out;
605         if (loc->elf_ex.e_phnum < 1 ||
606                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
607                 goto out;
608         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
609         retval = -ENOMEM;
610         elf_phdata = kmalloc(size, GFP_KERNEL);
611         if (!elf_phdata)
612                 goto out;
613
614         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
615                              (char *)elf_phdata, size);
616         if (retval != size) {
617                 if (retval >= 0)
618                         retval = -EIO;
619                 goto out_free_ph;
620         }
621
622         elf_ppnt = elf_phdata;
623         elf_bss = 0;
624         elf_brk = 0;
625
626         start_code = ~0UL;
627         end_code = 0;
628         start_data = 0;
629         end_data = 0;
630
631         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
632                 if (elf_ppnt->p_type == PT_INTERP) {
633                         /* This is the program interpreter used for
634                          * shared libraries - for now assume that this
635                          * is an a.out format binary
636                          */
637                         retval = -ENOEXEC;
638                         if (elf_ppnt->p_filesz > PATH_MAX || 
639                             elf_ppnt->p_filesz < 2)
640                                 goto out_free_ph;
641
642                         retval = -ENOMEM;
643                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
644                                                   GFP_KERNEL);
645                         if (!elf_interpreter)
646                                 goto out_free_ph;
647
648                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
649                                              elf_interpreter,
650                                              elf_ppnt->p_filesz);
651                         if (retval != elf_ppnt->p_filesz) {
652                                 if (retval >= 0)
653                                         retval = -EIO;
654                                 goto out_free_interp;
655                         }
656                         /* make sure path is NULL terminated */
657                         retval = -ENOEXEC;
658                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
659                                 goto out_free_interp;
660
661                         interpreter = open_exec(elf_interpreter);
662                         retval = PTR_ERR(interpreter);
663                         if (IS_ERR(interpreter))
664                                 goto out_free_interp;
665
666                         /*
667                          * If the binary is not readable then enforce
668                          * mm->dumpable = 0 regardless of the interpreter's
669                          * permissions.
670                          */
671                         would_dump(bprm, interpreter);
672
673                         retval = kernel_read(interpreter, 0, bprm->buf,
674                                              BINPRM_BUF_SIZE);
675                         if (retval != BINPRM_BUF_SIZE) {
676                                 if (retval >= 0)
677                                         retval = -EIO;
678                                 goto out_free_dentry;
679                         }
680
681                         /* Get the exec headers */
682                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
683                         break;
684                 }
685                 elf_ppnt++;
686         }
687
688         elf_ppnt = elf_phdata;
689         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
690                 if (elf_ppnt->p_type == PT_GNU_STACK) {
691                         if (elf_ppnt->p_flags & PF_X)
692                                 executable_stack = EXSTACK_ENABLE_X;
693                         else
694                                 executable_stack = EXSTACK_DISABLE_X;
695                         break;
696                 }
697
698         /* Some simple consistency checks for the interpreter */
699         if (elf_interpreter) {
700                 retval = -ELIBBAD;
701                 /* Not an ELF interpreter */
702                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
703                         goto out_free_dentry;
704                 /* Verify the interpreter has a valid arch */
705                 if (!elf_check_arch(&loc->interp_elf_ex))
706                         goto out_free_dentry;
707         }
708
709         /* Flush all traces of the currently running executable */
710         retval = flush_old_exec(bprm);
711         if (retval)
712                 goto out_free_dentry;
713
714         /* OK, This is the point of no return */
715         current->mm->def_flags = def_flags;
716
717         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
718            may depend on the personality.  */
719         SET_PERSONALITY(loc->elf_ex);
720         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
721                 current->personality |= READ_IMPLIES_EXEC;
722
723         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
724                 current->flags |= PF_RANDOMIZE;
725
726         setup_new_exec(bprm);
727
728         /* Do this so that we can load the interpreter, if need be.  We will
729            change some of these later */
730         current->mm->free_area_cache = current->mm->mmap_base;
731         current->mm->cached_hole_size = 0;
732         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
733                                  executable_stack);
734         if (retval < 0) {
735                 send_sig(SIGKILL, current, 0);
736                 goto out_free_dentry;
737         }
738         
739         current->mm->start_stack = bprm->p;
740
741         /* Now we do a little grungy work by mmapping the ELF image into
742            the correct location in memory. */
743         for(i = 0, elf_ppnt = elf_phdata;
744             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
745                 int elf_prot = 0, elf_flags;
746                 unsigned long k, vaddr;
747
748                 if (elf_ppnt->p_type != PT_LOAD)
749                         continue;
750
751                 if (unlikely (elf_brk > elf_bss)) {
752                         unsigned long nbyte;
753                     
754                         /* There was a PT_LOAD segment with p_memsz > p_filesz
755                            before this one. Map anonymous pages, if needed,
756                            and clear the area.  */
757                         retval = set_brk(elf_bss + load_bias,
758                                          elf_brk + load_bias);
759                         if (retval) {
760                                 send_sig(SIGKILL, current, 0);
761                                 goto out_free_dentry;
762                         }
763                         nbyte = ELF_PAGEOFFSET(elf_bss);
764                         if (nbyte) {
765                                 nbyte = ELF_MIN_ALIGN - nbyte;
766                                 if (nbyte > elf_brk - elf_bss)
767                                         nbyte = elf_brk - elf_bss;
768                                 if (clear_user((void __user *)elf_bss +
769                                                         load_bias, nbyte)) {
770                                         /*
771                                          * This bss-zeroing can fail if the ELF
772                                          * file specifies odd protections. So
773                                          * we don't check the return value
774                                          */
775                                 }
776                         }
777                 }
778
779                 if (elf_ppnt->p_flags & PF_R)
780                         elf_prot |= PROT_READ;
781                 if (elf_ppnt->p_flags & PF_W)
782                         elf_prot |= PROT_WRITE;
783                 if (elf_ppnt->p_flags & PF_X)
784                         elf_prot |= PROT_EXEC;
785
786                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
787
788                 vaddr = elf_ppnt->p_vaddr;
789                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
790                         elf_flags |= MAP_FIXED;
791                 } else if (loc->elf_ex.e_type == ET_DYN) {
792                         /* Try and get dynamic programs out of the way of the
793                          * default mmap base, as well as whatever program they
794                          * might try to exec.  This is because the brk will
795                          * follow the loader, and is not movable.  */
796 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
797                         /* Memory randomization might have been switched off
798                          * in runtime via sysctl.
799                          * If that is the case, retain the original non-zero
800                          * load_bias value in order to establish proper
801                          * non-randomized mappings.
802                          */
803                         if (current->flags & PF_RANDOMIZE)
804                                 load_bias = 0;
805                         else
806                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
807 #else
808                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
809 #endif
810                 }
811
812                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
813                                 elf_prot, elf_flags, 0);
814                 if (BAD_ADDR(error)) {
815                         send_sig(SIGKILL, current, 0);
816                         retval = IS_ERR((void *)error) ?
817                                 PTR_ERR((void*)error) : -EINVAL;
818                         goto out_free_dentry;
819                 }
820
821                 if (!load_addr_set) {
822                         load_addr_set = 1;
823                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
824                         if (loc->elf_ex.e_type == ET_DYN) {
825                                 load_bias += error -
826                                              ELF_PAGESTART(load_bias + vaddr);
827                                 load_addr += load_bias;
828                                 reloc_func_desc = load_bias;
829                         }
830                 }
831                 k = elf_ppnt->p_vaddr;
832                 if (k < start_code)
833                         start_code = k;
834                 if (start_data < k)
835                         start_data = k;
836
837                 /*
838                  * Check to see if the section's size will overflow the
839                  * allowed task size. Note that p_filesz must always be
840                  * <= p_memsz so it is only necessary to check p_memsz.
841                  */
842                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
843                     elf_ppnt->p_memsz > TASK_SIZE ||
844                     TASK_SIZE - elf_ppnt->p_memsz < k) {
845                         /* set_brk can never work. Avoid overflows. */
846                         send_sig(SIGKILL, current, 0);
847                         retval = -EINVAL;
848                         goto out_free_dentry;
849                 }
850
851                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
852
853                 if (k > elf_bss)
854                         elf_bss = k;
855                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
856                         end_code = k;
857                 if (end_data < k)
858                         end_data = k;
859                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
860                 if (k > elf_brk)
861                         elf_brk = k;
862         }
863
864         loc->elf_ex.e_entry += load_bias;
865         elf_bss += load_bias;
866         elf_brk += load_bias;
867         start_code += load_bias;
868         end_code += load_bias;
869         start_data += load_bias;
870         end_data += load_bias;
871
872         /* Calling set_brk effectively mmaps the pages that we need
873          * for the bss and break sections.  We must do this before
874          * mapping in the interpreter, to make sure it doesn't wind
875          * up getting placed where the bss needs to go.
876          */
877         retval = set_brk(elf_bss, elf_brk);
878         if (retval) {
879                 send_sig(SIGKILL, current, 0);
880                 goto out_free_dentry;
881         }
882         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
883                 send_sig(SIGSEGV, current, 0);
884                 retval = -EFAULT; /* Nobody gets to see this, but.. */
885                 goto out_free_dentry;
886         }
887
888         if (elf_interpreter) {
889                 unsigned long uninitialized_var(interp_map_addr);
890
891                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
892                                             interpreter,
893                                             &interp_map_addr,
894                                             load_bias);
895                 if (!IS_ERR((void *)elf_entry)) {
896                         /*
897                          * load_elf_interp() returns relocation
898                          * adjustment
899                          */
900                         interp_load_addr = elf_entry;
901                         elf_entry += loc->interp_elf_ex.e_entry;
902                 }
903                 if (BAD_ADDR(elf_entry)) {
904                         force_sig(SIGSEGV, current);
905                         retval = IS_ERR((void *)elf_entry) ?
906                                         (int)elf_entry : -EINVAL;
907                         goto out_free_dentry;
908                 }
909                 reloc_func_desc = interp_load_addr;
910
911                 allow_write_access(interpreter);
912                 fput(interpreter);
913                 kfree(elf_interpreter);
914         } else {
915                 elf_entry = loc->elf_ex.e_entry;
916                 if (BAD_ADDR(elf_entry)) {
917                         force_sig(SIGSEGV, current);
918                         retval = -EINVAL;
919                         goto out_free_dentry;
920                 }
921         }
922
923         kfree(elf_phdata);
924
925         set_binfmt(&elf_format);
926
927 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
928         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
929         if (retval < 0) {
930                 send_sig(SIGKILL, current, 0);
931                 goto out;
932         }
933 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
934
935         install_exec_creds(bprm);
936         retval = create_elf_tables(bprm, &loc->elf_ex,
937                           load_addr, interp_load_addr);
938         if (retval < 0) {
939                 send_sig(SIGKILL, current, 0);
940                 goto out;
941         }
942         /* N.B. passed_fileno might not be initialized? */
943         current->mm->end_code = end_code;
944         current->mm->start_code = start_code;
945         current->mm->start_data = start_data;
946         current->mm->end_data = end_data;
947         current->mm->start_stack = bprm->p;
948
949 #ifdef arch_randomize_brk
950         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
951                 current->mm->brk = current->mm->start_brk =
952                         arch_randomize_brk(current->mm);
953 #ifdef CONFIG_COMPAT_BRK
954                 current->brk_randomized = 1;
955 #endif
956         }
957 #endif
958
959         if (current->personality & MMAP_PAGE_ZERO) {
960                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
961                    and some applications "depend" upon this behavior.
962                    Since we do not have the power to recompile these, we
963                    emulate the SVr4 behavior. Sigh. */
964                 down_write(&current->mm->mmap_sem);
965                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
966                                 MAP_FIXED | MAP_PRIVATE, 0);
967                 up_write(&current->mm->mmap_sem);
968         }
969
970 #ifdef ELF_PLAT_INIT
971         /*
972          * The ABI may specify that certain registers be set up in special
973          * ways (on i386 %edx is the address of a DT_FINI function, for
974          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
975          * that the e_entry field is the address of the function descriptor
976          * for the startup routine, rather than the address of the startup
977          * routine itself.  This macro performs whatever initialization to
978          * the regs structure is required as well as any relocations to the
979          * function descriptor entries when executing dynamically links apps.
980          */
981         ELF_PLAT_INIT(regs, reloc_func_desc);
982 #endif
983
984         start_thread(regs, elf_entry, bprm->p);
985         retval = 0;
986 out:
987         kfree(loc);
988 out_ret:
989         return retval;
990
991         /* error cleanup */
992 out_free_dentry:
993         allow_write_access(interpreter);
994         if (interpreter)
995                 fput(interpreter);
996 out_free_interp:
997         kfree(elf_interpreter);
998 out_free_ph:
999         kfree(elf_phdata);
1000         goto out;
1001 }
1002
1003 /* This is really simpleminded and specialized - we are loading an
1004    a.out library that is given an ELF header. */
1005 static int load_elf_library(struct file *file)
1006 {
1007         struct elf_phdr *elf_phdata;
1008         struct elf_phdr *eppnt;
1009         unsigned long elf_bss, bss, len;
1010         int retval, error, i, j;
1011         struct elfhdr elf_ex;
1012
1013         error = -ENOEXEC;
1014         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1015         if (retval != sizeof(elf_ex))
1016                 goto out;
1017
1018         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1019                 goto out;
1020
1021         /* First of all, some simple consistency checks */
1022         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1023             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1024                 goto out;
1025
1026         /* Now read in all of the header information */
1027
1028         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1029         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1030
1031         error = -ENOMEM;
1032         elf_phdata = kmalloc(j, GFP_KERNEL);
1033         if (!elf_phdata)
1034                 goto out;
1035
1036         eppnt = elf_phdata;
1037         error = -ENOEXEC;
1038         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1039         if (retval != j)
1040                 goto out_free_ph;
1041
1042         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1043                 if ((eppnt + i)->p_type == PT_LOAD)
1044                         j++;
1045         if (j != 1)
1046                 goto out_free_ph;
1047
1048         while (eppnt->p_type != PT_LOAD)
1049                 eppnt++;
1050
1051         /* Now use mmap to map the library into memory. */
1052         down_write(&current->mm->mmap_sem);
1053         error = do_mmap(file,
1054                         ELF_PAGESTART(eppnt->p_vaddr),
1055                         (eppnt->p_filesz +
1056                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1057                         PROT_READ | PROT_WRITE | PROT_EXEC,
1058                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1059                         (eppnt->p_offset -
1060                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1061         up_write(&current->mm->mmap_sem);
1062         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1063                 goto out_free_ph;
1064
1065         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1066         if (padzero(elf_bss)) {
1067                 error = -EFAULT;
1068                 goto out_free_ph;
1069         }
1070
1071         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1072                             ELF_MIN_ALIGN - 1);
1073         bss = eppnt->p_memsz + eppnt->p_vaddr;
1074         if (bss > len) {
1075                 down_write(&current->mm->mmap_sem);
1076                 do_brk(len, bss - len);
1077                 up_write(&current->mm->mmap_sem);
1078         }
1079         error = 0;
1080
1081 out_free_ph:
1082         kfree(elf_phdata);
1083 out:
1084         return error;
1085 }
1086
1087 #ifdef CONFIG_ELF_CORE
1088 /*
1089  * ELF core dumper
1090  *
1091  * Modelled on fs/exec.c:aout_core_dump()
1092  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1093  */
1094
1095 /*
1096  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1097  * that are useful for post-mortem analysis are included in every core dump.
1098  * In that way we ensure that the core dump is fully interpretable later
1099  * without matching up the same kernel and hardware config to see what PC values
1100  * meant. These special mappings include - vDSO, vsyscall, and other
1101  * architecture specific mappings
1102  */
1103 static bool always_dump_vma(struct vm_area_struct *vma)
1104 {
1105         /* Any vsyscall mappings? */
1106         if (vma == get_gate_vma(vma->vm_mm))
1107                 return true;
1108         /*
1109          * arch_vma_name() returns non-NULL for special architecture mappings,
1110          * such as vDSO sections.
1111          */
1112         if (arch_vma_name(vma))
1113                 return true;
1114
1115         return false;
1116 }
1117
1118 /*
1119  * Decide what to dump of a segment, part, all or none.
1120  */
1121 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1122                                    unsigned long mm_flags)
1123 {
1124 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1125
1126         /* always dump the vdso and vsyscall sections */
1127         if (always_dump_vma(vma))
1128                 goto whole;
1129
1130         if (vma->vm_flags & VM_NODUMP)
1131                 return 0;
1132
1133         /* Hugetlb memory check */
1134         if (vma->vm_flags & VM_HUGETLB) {
1135                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1136                         goto whole;
1137                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1138                         goto whole;
1139         }
1140
1141         /* Do not dump I/O mapped devices or special mappings */
1142         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1143                 return 0;
1144
1145         /* By default, dump shared memory if mapped from an anonymous file. */
1146         if (vma->vm_flags & VM_SHARED) {
1147                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1148                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1149                         goto whole;
1150                 return 0;
1151         }
1152
1153         /* Dump segments that have been written to.  */
1154         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1155                 goto whole;
1156         if (vma->vm_file == NULL)
1157                 return 0;
1158
1159         if (FILTER(MAPPED_PRIVATE))
1160                 goto whole;
1161
1162         /*
1163          * If this looks like the beginning of a DSO or executable mapping,
1164          * check for an ELF header.  If we find one, dump the first page to
1165          * aid in determining what was mapped here.
1166          */
1167         if (FILTER(ELF_HEADERS) &&
1168             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1169                 u32 __user *header = (u32 __user *) vma->vm_start;
1170                 u32 word;
1171                 mm_segment_t fs = get_fs();
1172                 /*
1173                  * Doing it this way gets the constant folded by GCC.
1174                  */
1175                 union {
1176                         u32 cmp;
1177                         char elfmag[SELFMAG];
1178                 } magic;
1179                 BUILD_BUG_ON(SELFMAG != sizeof word);
1180                 magic.elfmag[EI_MAG0] = ELFMAG0;
1181                 magic.elfmag[EI_MAG1] = ELFMAG1;
1182                 magic.elfmag[EI_MAG2] = ELFMAG2;
1183                 magic.elfmag[EI_MAG3] = ELFMAG3;
1184                 /*
1185                  * Switch to the user "segment" for get_user(),
1186                  * then put back what elf_core_dump() had in place.
1187                  */
1188                 set_fs(USER_DS);
1189                 if (unlikely(get_user(word, header)))
1190                         word = 0;
1191                 set_fs(fs);
1192                 if (word == magic.cmp)
1193                         return PAGE_SIZE;
1194         }
1195
1196 #undef  FILTER
1197
1198         return 0;
1199
1200 whole:
1201         return vma->vm_end - vma->vm_start;
1202 }
1203
1204 /* An ELF note in memory */
1205 struct memelfnote
1206 {
1207         const char *name;
1208         int type;
1209         unsigned int datasz;
1210         void *data;
1211 };
1212
1213 static int notesize(struct memelfnote *en)
1214 {
1215         int sz;
1216
1217         sz = sizeof(struct elf_note);
1218         sz += roundup(strlen(en->name) + 1, 4);
1219         sz += roundup(en->datasz, 4);
1220
1221         return sz;
1222 }
1223
1224 #define DUMP_WRITE(addr, nr, foffset)   \
1225         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1226
1227 static int alignfile(struct file *file, loff_t *foffset)
1228 {
1229         static const char buf[4] = { 0, };
1230         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1231         return 1;
1232 }
1233
1234 static int writenote(struct memelfnote *men, struct file *file,
1235                         loff_t *foffset)
1236 {
1237         struct elf_note en;
1238         en.n_namesz = strlen(men->name) + 1;
1239         en.n_descsz = men->datasz;
1240         en.n_type = men->type;
1241
1242         DUMP_WRITE(&en, sizeof(en), foffset);
1243         DUMP_WRITE(men->name, en.n_namesz, foffset);
1244         if (!alignfile(file, foffset))
1245                 return 0;
1246         DUMP_WRITE(men->data, men->datasz, foffset);
1247         if (!alignfile(file, foffset))
1248                 return 0;
1249
1250         return 1;
1251 }
1252 #undef DUMP_WRITE
1253
1254 static void fill_elf_header(struct elfhdr *elf, int segs,
1255                             u16 machine, u32 flags, u8 osabi)
1256 {
1257         memset(elf, 0, sizeof(*elf));
1258
1259         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1260         elf->e_ident[EI_CLASS] = ELF_CLASS;
1261         elf->e_ident[EI_DATA] = ELF_DATA;
1262         elf->e_ident[EI_VERSION] = EV_CURRENT;
1263         elf->e_ident[EI_OSABI] = ELF_OSABI;
1264
1265         elf->e_type = ET_CORE;
1266         elf->e_machine = machine;
1267         elf->e_version = EV_CURRENT;
1268         elf->e_phoff = sizeof(struct elfhdr);
1269         elf->e_flags = flags;
1270         elf->e_ehsize = sizeof(struct elfhdr);
1271         elf->e_phentsize = sizeof(struct elf_phdr);
1272         elf->e_phnum = segs;
1273
1274         return;
1275 }
1276
1277 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1278 {
1279         phdr->p_type = PT_NOTE;
1280         phdr->p_offset = offset;
1281         phdr->p_vaddr = 0;
1282         phdr->p_paddr = 0;
1283         phdr->p_filesz = sz;
1284         phdr->p_memsz = 0;
1285         phdr->p_flags = 0;
1286         phdr->p_align = 0;
1287         return;
1288 }
1289
1290 static void fill_note(struct memelfnote *note, const char *name, int type, 
1291                 unsigned int sz, void *data)
1292 {
1293         note->name = name;
1294         note->type = type;
1295         note->datasz = sz;
1296         note->data = data;
1297         return;
1298 }
1299
1300 /*
1301  * fill up all the fields in prstatus from the given task struct, except
1302  * registers which need to be filled up separately.
1303  */
1304 static void fill_prstatus(struct elf_prstatus *prstatus,
1305                 struct task_struct *p, long signr)
1306 {
1307         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1308         prstatus->pr_sigpend = p->pending.signal.sig[0];
1309         prstatus->pr_sighold = p->blocked.sig[0];
1310         rcu_read_lock();
1311         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1312         rcu_read_unlock();
1313         prstatus->pr_pid = task_pid_vnr(p);
1314         prstatus->pr_pgrp = task_pgrp_vnr(p);
1315         prstatus->pr_sid = task_session_vnr(p);
1316         if (thread_group_leader(p)) {
1317                 struct task_cputime cputime;
1318
1319                 /*
1320                  * This is the record for the group leader.  It shows the
1321                  * group-wide total, not its individual thread total.
1322                  */
1323                 thread_group_cputime(p, &cputime);
1324                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1325                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1326         } else {
1327                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1328                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1329         }
1330         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1331         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1332 }
1333
1334 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1335                        struct mm_struct *mm)
1336 {
1337         const struct cred *cred;
1338         unsigned int i, len;
1339         
1340         /* first copy the parameters from user space */
1341         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1342
1343         len = mm->arg_end - mm->arg_start;
1344         if (len >= ELF_PRARGSZ)
1345                 len = ELF_PRARGSZ-1;
1346         if (copy_from_user(&psinfo->pr_psargs,
1347                            (const char __user *)mm->arg_start, len))
1348                 return -EFAULT;
1349         for(i = 0; i < len; i++)
1350                 if (psinfo->pr_psargs[i] == 0)
1351                         psinfo->pr_psargs[i] = ' ';
1352         psinfo->pr_psargs[len] = 0;
1353
1354         rcu_read_lock();
1355         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1356         rcu_read_unlock();
1357         psinfo->pr_pid = task_pid_vnr(p);
1358         psinfo->pr_pgrp = task_pgrp_vnr(p);
1359         psinfo->pr_sid = task_session_vnr(p);
1360
1361         i = p->state ? ffz(~p->state) + 1 : 0;
1362         psinfo->pr_state = i;
1363         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1364         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1365         psinfo->pr_nice = task_nice(p);
1366         psinfo->pr_flag = p->flags;
1367         rcu_read_lock();
1368         cred = __task_cred(p);
1369         SET_UID(psinfo->pr_uid, cred->uid);
1370         SET_GID(psinfo->pr_gid, cred->gid);
1371         rcu_read_unlock();
1372         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1373         
1374         return 0;
1375 }
1376
1377 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1378 {
1379         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1380         int i = 0;
1381         do
1382                 i += 2;
1383         while (auxv[i - 2] != AT_NULL);
1384         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1385 }
1386
1387 #ifdef CORE_DUMP_USE_REGSET
1388 #include <linux/regset.h>
1389
1390 struct elf_thread_core_info {
1391         struct elf_thread_core_info *next;
1392         struct task_struct *task;
1393         struct elf_prstatus prstatus;
1394         struct memelfnote notes[0];
1395 };
1396
1397 struct elf_note_info {
1398         struct elf_thread_core_info *thread;
1399         struct memelfnote psinfo;
1400         struct memelfnote auxv;
1401         size_t size;
1402         int thread_notes;
1403 };
1404
1405 /*
1406  * When a regset has a writeback hook, we call it on each thread before
1407  * dumping user memory.  On register window machines, this makes sure the
1408  * user memory backing the register data is up to date before we read it.
1409  */
1410 static void do_thread_regset_writeback(struct task_struct *task,
1411                                        const struct user_regset *regset)
1412 {
1413         if (regset->writeback)
1414                 regset->writeback(task, regset, 1);
1415 }
1416
1417 static int fill_thread_core_info(struct elf_thread_core_info *t,
1418                                  const struct user_regset_view *view,
1419                                  long signr, size_t *total)
1420 {
1421         unsigned int i;
1422
1423         /*
1424          * NT_PRSTATUS is the one special case, because the regset data
1425          * goes into the pr_reg field inside the note contents, rather
1426          * than being the whole note contents.  We fill the reset in here.
1427          * We assume that regset 0 is NT_PRSTATUS.
1428          */
1429         fill_prstatus(&t->prstatus, t->task, signr);
1430         (void) view->regsets[0].get(t->task, &view->regsets[0],
1431                                     0, sizeof(t->prstatus.pr_reg),
1432                                     &t->prstatus.pr_reg, NULL);
1433
1434         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1435                   sizeof(t->prstatus), &t->prstatus);
1436         *total += notesize(&t->notes[0]);
1437
1438         do_thread_regset_writeback(t->task, &view->regsets[0]);
1439
1440         /*
1441          * Each other regset might generate a note too.  For each regset
1442          * that has no core_note_type or is inactive, we leave t->notes[i]
1443          * all zero and we'll know to skip writing it later.
1444          */
1445         for (i = 1; i < view->n; ++i) {
1446                 const struct user_regset *regset = &view->regsets[i];
1447                 do_thread_regset_writeback(t->task, regset);
1448                 if (regset->core_note_type && regset->get &&
1449                     (!regset->active || regset->active(t->task, regset))) {
1450                         int ret;
1451                         size_t size = regset->n * regset->size;
1452                         void *data = kmalloc(size, GFP_KERNEL);
1453                         if (unlikely(!data))
1454                                 return 0;
1455                         ret = regset->get(t->task, regset,
1456                                           0, size, data, NULL);
1457                         if (unlikely(ret))
1458                                 kfree(data);
1459                         else {
1460                                 if (regset->core_note_type != NT_PRFPREG)
1461                                         fill_note(&t->notes[i], "LINUX",
1462                                                   regset->core_note_type,
1463                                                   size, data);
1464                                 else {
1465                                         t->prstatus.pr_fpvalid = 1;
1466                                         fill_note(&t->notes[i], "CORE",
1467                                                   NT_PRFPREG, size, data);
1468                                 }
1469                                 *total += notesize(&t->notes[i]);
1470                         }
1471                 }
1472         }
1473
1474         return 1;
1475 }
1476
1477 static int fill_note_info(struct elfhdr *elf, int phdrs,
1478                           struct elf_note_info *info,
1479                           long signr, struct pt_regs *regs)
1480 {
1481         struct task_struct *dump_task = current;
1482         const struct user_regset_view *view = task_user_regset_view(dump_task);
1483         struct elf_thread_core_info *t;
1484         struct elf_prpsinfo *psinfo;
1485         struct core_thread *ct;
1486         unsigned int i;
1487
1488         info->size = 0;
1489         info->thread = NULL;
1490
1491         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1492         if (psinfo == NULL)
1493                 return 0;
1494
1495         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1496
1497         /*
1498          * Figure out how many notes we're going to need for each thread.
1499          */
1500         info->thread_notes = 0;
1501         for (i = 0; i < view->n; ++i)
1502                 if (view->regsets[i].core_note_type != 0)
1503                         ++info->thread_notes;
1504
1505         /*
1506          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1507          * since it is our one special case.
1508          */
1509         if (unlikely(info->thread_notes == 0) ||
1510             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1511                 WARN_ON(1);
1512                 return 0;
1513         }
1514
1515         /*
1516          * Initialize the ELF file header.
1517          */
1518         fill_elf_header(elf, phdrs,
1519                         view->e_machine, view->e_flags, view->ei_osabi);
1520
1521         /*
1522          * Allocate a structure for each thread.
1523          */
1524         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1525                 t = kzalloc(offsetof(struct elf_thread_core_info,
1526                                      notes[info->thread_notes]),
1527                             GFP_KERNEL);
1528                 if (unlikely(!t))
1529                         return 0;
1530
1531                 t->task = ct->task;
1532                 if (ct->task == dump_task || !info->thread) {
1533                         t->next = info->thread;
1534                         info->thread = t;
1535                 } else {
1536                         /*
1537                          * Make sure to keep the original task at
1538                          * the head of the list.
1539                          */
1540                         t->next = info->thread->next;
1541                         info->thread->next = t;
1542                 }
1543         }
1544
1545         /*
1546          * Now fill in each thread's information.
1547          */
1548         for (t = info->thread; t != NULL; t = t->next)
1549                 if (!fill_thread_core_info(t, view, signr, &info->size))
1550                         return 0;
1551
1552         /*
1553          * Fill in the two process-wide notes.
1554          */
1555         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1556         info->size += notesize(&info->psinfo);
1557
1558         fill_auxv_note(&info->auxv, current->mm);
1559         info->size += notesize(&info->auxv);
1560
1561         return 1;
1562 }
1563
1564 static size_t get_note_info_size(struct elf_note_info *info)
1565 {
1566         return info->size;
1567 }
1568
1569 /*
1570  * Write all the notes for each thread.  When writing the first thread, the
1571  * process-wide notes are interleaved after the first thread-specific note.
1572  */
1573 static int write_note_info(struct elf_note_info *info,
1574                            struct file *file, loff_t *foffset)
1575 {
1576         bool first = 1;
1577         struct elf_thread_core_info *t = info->thread;
1578
1579         do {
1580                 int i;
1581
1582                 if (!writenote(&t->notes[0], file, foffset))
1583                         return 0;
1584
1585                 if (first && !writenote(&info->psinfo, file, foffset))
1586                         return 0;
1587                 if (first && !writenote(&info->auxv, file, foffset))
1588                         return 0;
1589
1590                 for (i = 1; i < info->thread_notes; ++i)
1591                         if (t->notes[i].data &&
1592                             !writenote(&t->notes[i], file, foffset))
1593                                 return 0;
1594
1595                 first = 0;
1596                 t = t->next;
1597         } while (t);
1598
1599         return 1;
1600 }
1601
1602 static void free_note_info(struct elf_note_info *info)
1603 {
1604         struct elf_thread_core_info *threads = info->thread;
1605         while (threads) {
1606                 unsigned int i;
1607                 struct elf_thread_core_info *t = threads;
1608                 threads = t->next;
1609                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1610                 for (i = 1; i < info->thread_notes; ++i)
1611                         kfree(t->notes[i].data);
1612                 kfree(t);
1613         }
1614         kfree(info->psinfo.data);
1615 }
1616
1617 #else
1618
1619 /* Here is the structure in which status of each thread is captured. */
1620 struct elf_thread_status
1621 {
1622         struct list_head list;
1623         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1624         elf_fpregset_t fpu;             /* NT_PRFPREG */
1625         struct task_struct *thread;
1626 #ifdef ELF_CORE_COPY_XFPREGS
1627         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1628 #endif
1629         struct memelfnote notes[3];
1630         int num_notes;
1631 };
1632
1633 /*
1634  * In order to add the specific thread information for the elf file format,
1635  * we need to keep a linked list of every threads pr_status and then create
1636  * a single section for them in the final core file.
1637  */
1638 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1639 {
1640         int sz = 0;
1641         struct task_struct *p = t->thread;
1642         t->num_notes = 0;
1643
1644         fill_prstatus(&t->prstatus, p, signr);
1645         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1646         
1647         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1648                   &(t->prstatus));
1649         t->num_notes++;
1650         sz += notesize(&t->notes[0]);
1651
1652         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1653                                                                 &t->fpu))) {
1654                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1655                           &(t->fpu));
1656                 t->num_notes++;
1657                 sz += notesize(&t->notes[1]);
1658         }
1659
1660 #ifdef ELF_CORE_COPY_XFPREGS
1661         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1662                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1663                           sizeof(t->xfpu), &t->xfpu);
1664                 t->num_notes++;
1665                 sz += notesize(&t->notes[2]);
1666         }
1667 #endif  
1668         return sz;
1669 }
1670
1671 struct elf_note_info {
1672         struct memelfnote *notes;
1673         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1674         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1675         struct list_head thread_list;
1676         elf_fpregset_t *fpu;
1677 #ifdef ELF_CORE_COPY_XFPREGS
1678         elf_fpxregset_t *xfpu;
1679 #endif
1680         int thread_status_size;
1681         int numnote;
1682 };
1683
1684 static int elf_note_info_init(struct elf_note_info *info)
1685 {
1686         memset(info, 0, sizeof(*info));
1687         INIT_LIST_HEAD(&info->thread_list);
1688
1689         /* Allocate space for six ELF notes */
1690         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1691         if (!info->notes)
1692                 return 0;
1693         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1694         if (!info->psinfo)
1695                 goto notes_free;
1696         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1697         if (!info->prstatus)
1698                 goto psinfo_free;
1699         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1700         if (!info->fpu)
1701                 goto prstatus_free;
1702 #ifdef ELF_CORE_COPY_XFPREGS
1703         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1704         if (!info->xfpu)
1705                 goto fpu_free;
1706 #endif
1707         return 1;
1708 #ifdef ELF_CORE_COPY_XFPREGS
1709  fpu_free:
1710         kfree(info->fpu);
1711 #endif
1712  prstatus_free:
1713         kfree(info->prstatus);
1714  psinfo_free:
1715         kfree(info->psinfo);
1716  notes_free:
1717         kfree(info->notes);
1718         return 0;
1719 }
1720
1721 static int fill_note_info(struct elfhdr *elf, int phdrs,
1722                           struct elf_note_info *info,
1723                           long signr, struct pt_regs *regs)
1724 {
1725         struct list_head *t;
1726
1727         if (!elf_note_info_init(info))
1728                 return 0;
1729
1730         if (signr) {
1731                 struct core_thread *ct;
1732                 struct elf_thread_status *ets;
1733
1734                 for (ct = current->mm->core_state->dumper.next;
1735                                                 ct; ct = ct->next) {
1736                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1737                         if (!ets)
1738                                 return 0;
1739
1740                         ets->thread = ct->task;
1741                         list_add(&ets->list, &info->thread_list);
1742                 }
1743
1744                 list_for_each(t, &info->thread_list) {
1745                         int sz;
1746
1747                         ets = list_entry(t, struct elf_thread_status, list);
1748                         sz = elf_dump_thread_status(signr, ets);
1749                         info->thread_status_size += sz;
1750                 }
1751         }
1752         /* now collect the dump for the current */
1753         memset(info->prstatus, 0, sizeof(*info->prstatus));
1754         fill_prstatus(info->prstatus, current, signr);
1755         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1756
1757         /* Set up header */
1758         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1759
1760         /*
1761          * Set up the notes in similar form to SVR4 core dumps made
1762          * with info from their /proc.
1763          */
1764
1765         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1766                   sizeof(*info->prstatus), info->prstatus);
1767         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1768         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1769                   sizeof(*info->psinfo), info->psinfo);
1770
1771         info->numnote = 2;
1772
1773         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1774
1775         /* Try to dump the FPU. */
1776         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1777                                                                info->fpu);
1778         if (info->prstatus->pr_fpvalid)
1779                 fill_note(info->notes + info->numnote++,
1780                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1781 #ifdef ELF_CORE_COPY_XFPREGS
1782         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1783                 fill_note(info->notes + info->numnote++,
1784                           "LINUX", ELF_CORE_XFPREG_TYPE,
1785                           sizeof(*info->xfpu), info->xfpu);
1786 #endif
1787
1788         return 1;
1789 }
1790
1791 static size_t get_note_info_size(struct elf_note_info *info)
1792 {
1793         int sz = 0;
1794         int i;
1795
1796         for (i = 0; i < info->numnote; i++)
1797                 sz += notesize(info->notes + i);
1798
1799         sz += info->thread_status_size;
1800
1801         return sz;
1802 }
1803
1804 static int write_note_info(struct elf_note_info *info,
1805                            struct file *file, loff_t *foffset)
1806 {
1807         int i;
1808         struct list_head *t;
1809
1810         for (i = 0; i < info->numnote; i++)
1811                 if (!writenote(info->notes + i, file, foffset))
1812                         return 0;
1813
1814         /* write out the thread status notes section */
1815         list_for_each(t, &info->thread_list) {
1816                 struct elf_thread_status *tmp =
1817                                 list_entry(t, struct elf_thread_status, list);
1818
1819                 for (i = 0; i < tmp->num_notes; i++)
1820                         if (!writenote(&tmp->notes[i], file, foffset))
1821                                 return 0;
1822         }
1823
1824         return 1;
1825 }
1826
1827 static void free_note_info(struct elf_note_info *info)
1828 {
1829         while (!list_empty(&info->thread_list)) {
1830                 struct list_head *tmp = info->thread_list.next;
1831                 list_del(tmp);
1832                 kfree(list_entry(tmp, struct elf_thread_status, list));
1833         }
1834
1835         kfree(info->prstatus);
1836         kfree(info->psinfo);
1837         kfree(info->notes);
1838         kfree(info->fpu);
1839 #ifdef ELF_CORE_COPY_XFPREGS
1840         kfree(info->xfpu);
1841 #endif
1842 }
1843
1844 #endif
1845
1846 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1847                                         struct vm_area_struct *gate_vma)
1848 {
1849         struct vm_area_struct *ret = tsk->mm->mmap;
1850
1851         if (ret)
1852                 return ret;
1853         return gate_vma;
1854 }
1855 /*
1856  * Helper function for iterating across a vma list.  It ensures that the caller
1857  * will visit `gate_vma' prior to terminating the search.
1858  */
1859 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1860                                         struct vm_area_struct *gate_vma)
1861 {
1862         struct vm_area_struct *ret;
1863
1864         ret = this_vma->vm_next;
1865         if (ret)
1866                 return ret;
1867         if (this_vma == gate_vma)
1868                 return NULL;
1869         return gate_vma;
1870 }
1871
1872 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1873                              elf_addr_t e_shoff, int segs)
1874 {
1875         elf->e_shoff = e_shoff;
1876         elf->e_shentsize = sizeof(*shdr4extnum);
1877         elf->e_shnum = 1;
1878         elf->e_shstrndx = SHN_UNDEF;
1879
1880         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1881
1882         shdr4extnum->sh_type = SHT_NULL;
1883         shdr4extnum->sh_size = elf->e_shnum;
1884         shdr4extnum->sh_link = elf->e_shstrndx;
1885         shdr4extnum->sh_info = segs;
1886 }
1887
1888 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1889                                      unsigned long mm_flags)
1890 {
1891         struct vm_area_struct *vma;
1892         size_t size = 0;
1893
1894         for (vma = first_vma(current, gate_vma); vma != NULL;
1895              vma = next_vma(vma, gate_vma))
1896                 size += vma_dump_size(vma, mm_flags);
1897         return size;
1898 }
1899
1900 /*
1901  * Actual dumper
1902  *
1903  * This is a two-pass process; first we find the offsets of the bits,
1904  * and then they are actually written out.  If we run out of core limit
1905  * we just truncate.
1906  */
1907 static int elf_core_dump(struct coredump_params *cprm)
1908 {
1909         int has_dumped = 0;
1910         mm_segment_t fs;
1911         int segs;
1912         size_t size = 0;
1913         struct vm_area_struct *vma, *gate_vma;
1914         struct elfhdr *elf = NULL;
1915         loff_t offset = 0, dataoff, foffset;
1916         struct elf_note_info info;
1917         struct elf_phdr *phdr4note = NULL;
1918         struct elf_shdr *shdr4extnum = NULL;
1919         Elf_Half e_phnum;
1920         elf_addr_t e_shoff;
1921
1922         /*
1923          * We no longer stop all VM operations.
1924          * 
1925          * This is because those proceses that could possibly change map_count
1926          * or the mmap / vma pages are now blocked in do_exit on current
1927          * finishing this core dump.
1928          *
1929          * Only ptrace can touch these memory addresses, but it doesn't change
1930          * the map_count or the pages allocated. So no possibility of crashing
1931          * exists while dumping the mm->vm_next areas to the core file.
1932          */
1933   
1934         /* alloc memory for large data structures: too large to be on stack */
1935         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1936         if (!elf)
1937                 goto out;
1938         /*
1939          * The number of segs are recored into ELF header as 16bit value.
1940          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1941          */
1942         segs = current->mm->map_count;
1943         segs += elf_core_extra_phdrs();
1944
1945         gate_vma = get_gate_vma(current->mm);
1946         if (gate_vma != NULL)
1947                 segs++;
1948
1949         /* for notes section */
1950         segs++;
1951
1952         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1953          * this, kernel supports extended numbering. Have a look at
1954          * include/linux/elf.h for further information. */
1955         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1956
1957         /*
1958          * Collect all the non-memory information about the process for the
1959          * notes.  This also sets up the file header.
1960          */
1961         if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1962                 goto cleanup;
1963
1964         has_dumped = 1;
1965         current->flags |= PF_DUMPCORE;
1966   
1967         fs = get_fs();
1968         set_fs(KERNEL_DS);
1969
1970         offset += sizeof(*elf);                         /* Elf header */
1971         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
1972         foffset = offset;
1973
1974         /* Write notes phdr entry */
1975         {
1976                 size_t sz = get_note_info_size(&info);
1977
1978                 sz += elf_coredump_extra_notes_size();
1979
1980                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1981                 if (!phdr4note)
1982                         goto end_coredump;
1983
1984                 fill_elf_note_phdr(phdr4note, sz, offset);
1985                 offset += sz;
1986         }
1987
1988         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1989
1990         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1991         offset += elf_core_extra_data_size();
1992         e_shoff = offset;
1993
1994         if (e_phnum == PN_XNUM) {
1995                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
1996                 if (!shdr4extnum)
1997                         goto end_coredump;
1998                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
1999         }
2000
2001         offset = dataoff;
2002
2003         size += sizeof(*elf);
2004         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2005                 goto end_coredump;
2006
2007         size += sizeof(*phdr4note);
2008         if (size > cprm->limit
2009             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2010                 goto end_coredump;
2011
2012         /* Write program headers for segments dump */
2013         for (vma = first_vma(current, gate_vma); vma != NULL;
2014                         vma = next_vma(vma, gate_vma)) {
2015                 struct elf_phdr phdr;
2016
2017                 phdr.p_type = PT_LOAD;
2018                 phdr.p_offset = offset;
2019                 phdr.p_vaddr = vma->vm_start;
2020                 phdr.p_paddr = 0;
2021                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2022                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2023                 offset += phdr.p_filesz;
2024                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2025                 if (vma->vm_flags & VM_WRITE)
2026                         phdr.p_flags |= PF_W;
2027                 if (vma->vm_flags & VM_EXEC)
2028                         phdr.p_flags |= PF_X;
2029                 phdr.p_align = ELF_EXEC_PAGESIZE;
2030
2031                 size += sizeof(phdr);
2032                 if (size > cprm->limit
2033                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2034                         goto end_coredump;
2035         }
2036
2037         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2038                 goto end_coredump;
2039
2040         /* write out the notes section */
2041         if (!write_note_info(&info, cprm->file, &foffset))
2042                 goto end_coredump;
2043
2044         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2045                 goto end_coredump;
2046
2047         /* Align to page */
2048         if (!dump_seek(cprm->file, dataoff - foffset))
2049                 goto end_coredump;
2050
2051         for (vma = first_vma(current, gate_vma); vma != NULL;
2052                         vma = next_vma(vma, gate_vma)) {
2053                 unsigned long addr;
2054                 unsigned long end;
2055
2056                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2057
2058                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2059                         struct page *page;
2060                         int stop;
2061
2062                         page = get_dump_page(addr);
2063                         if (page) {
2064                                 void *kaddr = kmap(page);
2065                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2066                                         !dump_write(cprm->file, kaddr,
2067                                                     PAGE_SIZE);
2068                                 kunmap(page);
2069                                 page_cache_release(page);
2070                         } else
2071                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2072                         if (stop)
2073                                 goto end_coredump;
2074                 }
2075         }
2076
2077         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2078                 goto end_coredump;
2079
2080         if (e_phnum == PN_XNUM) {
2081                 size += sizeof(*shdr4extnum);
2082                 if (size > cprm->limit
2083                     || !dump_write(cprm->file, shdr4extnum,
2084                                    sizeof(*shdr4extnum)))
2085                         goto end_coredump;
2086         }
2087
2088 end_coredump:
2089         set_fs(fs);
2090
2091 cleanup:
2092         free_note_info(&info);
2093         kfree(shdr4extnum);
2094         kfree(phdr4note);
2095         kfree(elf);
2096 out:
2097         return has_dumped;
2098 }
2099
2100 #endif          /* CONFIG_ELF_CORE */
2101
2102 static int __init init_elf_binfmt(void)
2103 {
2104         register_binfmt(&elf_format);
2105         return 0;
2106 }
2107
2108 static void __exit exit_elf_binfmt(void)
2109 {
2110         /* Remove the COFF and ELF loaders. */
2111         unregister_binfmt(&elf_format);
2112 }
2113
2114 core_initcall(init_elf_binfmt);
2115 module_exit(exit_elf_binfmt);
2116 MODULE_LICENSE("GPL");