brk: COMPAT_BRK: fix detection of randomized brk
[pandora-kernel.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38
39 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
40 static int load_elf_library(struct file *);
41 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
42                                 int, int, unsigned long);
43
44 /*
45  * If we don't support core dumping, then supply a NULL so we
46  * don't even try.
47  */
48 #ifdef CONFIG_ELF_CORE
49 static int elf_core_dump(struct coredump_params *cprm);
50 #else
51 #define elf_core_dump   NULL
52 #endif
53
54 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
55 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
56 #else
57 #define ELF_MIN_ALIGN   PAGE_SIZE
58 #endif
59
60 #ifndef ELF_CORE_EFLAGS
61 #define ELF_CORE_EFLAGS 0
62 #endif
63
64 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
65 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
66 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
67
68 static struct linux_binfmt elf_format = {
69         .module         = THIS_MODULE,
70         .load_binary    = load_elf_binary,
71         .load_shlib     = load_elf_library,
72         .core_dump      = elf_core_dump,
73         .min_coredump   = ELF_EXEC_PAGESIZE,
74 };
75
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
77
78 static int set_brk(unsigned long start, unsigned long end)
79 {
80         start = ELF_PAGEALIGN(start);
81         end = ELF_PAGEALIGN(end);
82         if (end > start) {
83                 unsigned long addr;
84                 down_write(&current->mm->mmap_sem);
85                 addr = do_brk(start, end - start);
86                 up_write(&current->mm->mmap_sem);
87                 if (BAD_ADDR(addr))
88                         return addr;
89         }
90         current->mm->start_brk = current->mm->brk = end;
91         return 0;
92 }
93
94 /* We need to explicitly zero any fractional pages
95    after the data section (i.e. bss).  This would
96    contain the junk from the file that should not
97    be in memory
98  */
99 static int padzero(unsigned long elf_bss)
100 {
101         unsigned long nbyte;
102
103         nbyte = ELF_PAGEOFFSET(elf_bss);
104         if (nbyte) {
105                 nbyte = ELF_MIN_ALIGN - nbyte;
106                 if (clear_user((void __user *) elf_bss, nbyte))
107                         return -EFAULT;
108         }
109         return 0;
110 }
111
112 /* Let's use some macros to make this stack manipulation a little clearer */
113 #ifdef CONFIG_STACK_GROWSUP
114 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
115 #define STACK_ROUND(sp, items) \
116         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
117 #define STACK_ALLOC(sp, len) ({ \
118         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
119         old_sp; })
120 #else
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
122 #define STACK_ROUND(sp, items) \
123         (((unsigned long) (sp - items)) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
125 #endif
126
127 #ifndef ELF_BASE_PLATFORM
128 /*
129  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
130  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
131  * will be copied to the user stack in the same manner as AT_PLATFORM.
132  */
133 #define ELF_BASE_PLATFORM NULL
134 #endif
135
136 static int
137 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
138                 unsigned long load_addr, unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         elf_addr_t __user *u_base_platform;
148         elf_addr_t __user *u_rand_bytes;
149         const char *k_platform = ELF_PLATFORM;
150         const char *k_base_platform = ELF_BASE_PLATFORM;
151         unsigned char k_rand_bytes[16];
152         int items;
153         elf_addr_t *elf_info;
154         int ei_index = 0;
155         const struct cred *cred = current_cred();
156         struct vm_area_struct *vma;
157
158         /*
159          * In some cases (e.g. Hyper-Threading), we want to avoid L1
160          * evictions by the processes running on the same package. One
161          * thing we can do is to shuffle the initial stack for them.
162          */
163
164         p = arch_align_stack(p);
165
166         /*
167          * If this architecture has a platform capability string, copy it
168          * to userspace.  In some cases (Sparc), this info is impossible
169          * for userspace to get any other way, in others (i386) it is
170          * merely difficult.
171          */
172         u_platform = NULL;
173         if (k_platform) {
174                 size_t len = strlen(k_platform) + 1;
175
176                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
177                 if (__copy_to_user(u_platform, k_platform, len))
178                         return -EFAULT;
179         }
180
181         /*
182          * If this architecture has a "base" platform capability
183          * string, copy it to userspace.
184          */
185         u_base_platform = NULL;
186         if (k_base_platform) {
187                 size_t len = strlen(k_base_platform) + 1;
188
189                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190                 if (__copy_to_user(u_base_platform, k_base_platform, len))
191                         return -EFAULT;
192         }
193
194         /*
195          * Generate 16 random bytes for userspace PRNG seeding.
196          */
197         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
198         u_rand_bytes = (elf_addr_t __user *)
199                        STACK_ALLOC(p, sizeof(k_rand_bytes));
200         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
201                 return -EFAULT;
202
203         /* Create the ELF interpreter info */
204         elf_info = (elf_addr_t *)current->mm->saved_auxv;
205         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
206 #define NEW_AUX_ENT(id, val) \
207         do { \
208                 elf_info[ei_index++] = id; \
209                 elf_info[ei_index++] = val; \
210         } while (0)
211
212 #ifdef ARCH_DLINFO
213         /* 
214          * ARCH_DLINFO must come first so PPC can do its special alignment of
215          * AUXV.
216          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
217          * ARCH_DLINFO changes
218          */
219         ARCH_DLINFO;
220 #endif
221         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
222         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
223         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
224         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
225         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
226         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
227         NEW_AUX_ENT(AT_BASE, interp_load_addr);
228         NEW_AUX_ENT(AT_FLAGS, 0);
229         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
230         NEW_AUX_ENT(AT_UID, cred->uid);
231         NEW_AUX_ENT(AT_EUID, cred->euid);
232         NEW_AUX_ENT(AT_GID, cred->gid);
233         NEW_AUX_ENT(AT_EGID, cred->egid);
234         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
235         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
236         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
237         if (k_platform) {
238                 NEW_AUX_ENT(AT_PLATFORM,
239                             (elf_addr_t)(unsigned long)u_platform);
240         }
241         if (k_base_platform) {
242                 NEW_AUX_ENT(AT_BASE_PLATFORM,
243                             (elf_addr_t)(unsigned long)u_base_platform);
244         }
245         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
246                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
247         }
248 #undef NEW_AUX_ENT
249         /* AT_NULL is zero; clear the rest too */
250         memset(&elf_info[ei_index], 0,
251                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
252
253         /* And advance past the AT_NULL entry.  */
254         ei_index += 2;
255
256         sp = STACK_ADD(p, ei_index);
257
258         items = (argc + 1) + (envc + 1) + 1;
259         bprm->p = STACK_ROUND(sp, items);
260
261         /* Point sp at the lowest address on the stack */
262 #ifdef CONFIG_STACK_GROWSUP
263         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
264         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
265 #else
266         sp = (elf_addr_t __user *)bprm->p;
267 #endif
268
269
270         /*
271          * Grow the stack manually; some architectures have a limit on how
272          * far ahead a user-space access may be in order to grow the stack.
273          */
274         vma = find_extend_vma(current->mm, bprm->p);
275         if (!vma)
276                 return -EFAULT;
277
278         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
279         if (__put_user(argc, sp++))
280                 return -EFAULT;
281         argv = sp;
282         envp = argv + argc + 1;
283
284         /* Populate argv and envp */
285         p = current->mm->arg_end = current->mm->arg_start;
286         while (argc-- > 0) {
287                 size_t len;
288                 if (__put_user((elf_addr_t)p, argv++))
289                         return -EFAULT;
290                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
291                 if (!len || len > MAX_ARG_STRLEN)
292                         return -EINVAL;
293                 p += len;
294         }
295         if (__put_user(0, argv))
296                 return -EFAULT;
297         current->mm->arg_end = current->mm->env_start = p;
298         while (envc-- > 0) {
299                 size_t len;
300                 if (__put_user((elf_addr_t)p, envp++))
301                         return -EFAULT;
302                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
303                 if (!len || len > MAX_ARG_STRLEN)
304                         return -EINVAL;
305                 p += len;
306         }
307         if (__put_user(0, envp))
308                 return -EFAULT;
309         current->mm->env_end = p;
310
311         /* Put the elf_info on the stack in the right place.  */
312         sp = (elf_addr_t __user *)envp + 1;
313         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
314                 return -EFAULT;
315         return 0;
316 }
317
318 static unsigned long elf_map(struct file *filep, unsigned long addr,
319                 struct elf_phdr *eppnt, int prot, int type,
320                 unsigned long total_size)
321 {
322         unsigned long map_addr;
323         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
324         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
325         addr = ELF_PAGESTART(addr);
326         size = ELF_PAGEALIGN(size);
327
328         /* mmap() will return -EINVAL if given a zero size, but a
329          * segment with zero filesize is perfectly valid */
330         if (!size)
331                 return addr;
332
333         down_write(&current->mm->mmap_sem);
334         /*
335         * total_size is the size of the ELF (interpreter) image.
336         * The _first_ mmap needs to know the full size, otherwise
337         * randomization might put this image into an overlapping
338         * position with the ELF binary image. (since size < total_size)
339         * So we first map the 'big' image - and unmap the remainder at
340         * the end. (which unmap is needed for ELF images with holes.)
341         */
342         if (total_size) {
343                 total_size = ELF_PAGEALIGN(total_size);
344                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
345                 if (!BAD_ADDR(map_addr))
346                         do_munmap(current->mm, map_addr+size, total_size-size);
347         } else
348                 map_addr = do_mmap(filep, addr, size, prot, type, off);
349
350         up_write(&current->mm->mmap_sem);
351         return(map_addr);
352 }
353
354 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
355 {
356         int i, first_idx = -1, last_idx = -1;
357
358         for (i = 0; i < nr; i++) {
359                 if (cmds[i].p_type == PT_LOAD) {
360                         last_idx = i;
361                         if (first_idx == -1)
362                                 first_idx = i;
363                 }
364         }
365         if (first_idx == -1)
366                 return 0;
367
368         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
369                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
370 }
371
372
373 /* This is much more generalized than the library routine read function,
374    so we keep this separate.  Technically the library read function
375    is only provided so that we can read a.out libraries that have
376    an ELF header */
377
378 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
379                 struct file *interpreter, unsigned long *interp_map_addr,
380                 unsigned long no_base)
381 {
382         struct elf_phdr *elf_phdata;
383         struct elf_phdr *eppnt;
384         unsigned long load_addr = 0;
385         int load_addr_set = 0;
386         unsigned long last_bss = 0, elf_bss = 0;
387         unsigned long error = ~0UL;
388         unsigned long total_size;
389         int retval, i, size;
390
391         /* First of all, some simple consistency checks */
392         if (interp_elf_ex->e_type != ET_EXEC &&
393             interp_elf_ex->e_type != ET_DYN)
394                 goto out;
395         if (!elf_check_arch(interp_elf_ex))
396                 goto out;
397         if (!interpreter->f_op || !interpreter->f_op->mmap)
398                 goto out;
399
400         /*
401          * If the size of this structure has changed, then punt, since
402          * we will be doing the wrong thing.
403          */
404         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
405                 goto out;
406         if (interp_elf_ex->e_phnum < 1 ||
407                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
408                 goto out;
409
410         /* Now read in all of the header information */
411         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
412         if (size > ELF_MIN_ALIGN)
413                 goto out;
414         elf_phdata = kmalloc(size, GFP_KERNEL);
415         if (!elf_phdata)
416                 goto out;
417
418         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
419                              (char *)elf_phdata, size);
420         error = -EIO;
421         if (retval != size) {
422                 if (retval < 0)
423                         error = retval; 
424                 goto out_close;
425         }
426
427         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
428         if (!total_size) {
429                 error = -EINVAL;
430                 goto out_close;
431         }
432
433         eppnt = elf_phdata;
434         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
435                 if (eppnt->p_type == PT_LOAD) {
436                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
437                         int elf_prot = 0;
438                         unsigned long vaddr = 0;
439                         unsigned long k, map_addr;
440
441                         if (eppnt->p_flags & PF_R)
442                                 elf_prot = PROT_READ;
443                         if (eppnt->p_flags & PF_W)
444                                 elf_prot |= PROT_WRITE;
445                         if (eppnt->p_flags & PF_X)
446                                 elf_prot |= PROT_EXEC;
447                         vaddr = eppnt->p_vaddr;
448                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
449                                 elf_type |= MAP_FIXED;
450                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
451                                 load_addr = -vaddr;
452
453                         map_addr = elf_map(interpreter, load_addr + vaddr,
454                                         eppnt, elf_prot, elf_type, total_size);
455                         total_size = 0;
456                         if (!*interp_map_addr)
457                                 *interp_map_addr = map_addr;
458                         error = map_addr;
459                         if (BAD_ADDR(map_addr))
460                                 goto out_close;
461
462                         if (!load_addr_set &&
463                             interp_elf_ex->e_type == ET_DYN) {
464                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
465                                 load_addr_set = 1;
466                         }
467
468                         /*
469                          * Check to see if the section's size will overflow the
470                          * allowed task size. Note that p_filesz must always be
471                          * <= p_memsize so it's only necessary to check p_memsz.
472                          */
473                         k = load_addr + eppnt->p_vaddr;
474                         if (BAD_ADDR(k) ||
475                             eppnt->p_filesz > eppnt->p_memsz ||
476                             eppnt->p_memsz > TASK_SIZE ||
477                             TASK_SIZE - eppnt->p_memsz < k) {
478                                 error = -ENOMEM;
479                                 goto out_close;
480                         }
481
482                         /*
483                          * Find the end of the file mapping for this phdr, and
484                          * keep track of the largest address we see for this.
485                          */
486                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
487                         if (k > elf_bss)
488                                 elf_bss = k;
489
490                         /*
491                          * Do the same thing for the memory mapping - between
492                          * elf_bss and last_bss is the bss section.
493                          */
494                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
495                         if (k > last_bss)
496                                 last_bss = k;
497                 }
498         }
499
500         if (last_bss > elf_bss) {
501                 /*
502                  * Now fill out the bss section.  First pad the last page up
503                  * to the page boundary, and then perform a mmap to make sure
504                  * that there are zero-mapped pages up to and including the
505                  * last bss page.
506                  */
507                 if (padzero(elf_bss)) {
508                         error = -EFAULT;
509                         goto out_close;
510                 }
511
512                 /* What we have mapped so far */
513                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
514
515                 /* Map the last of the bss segment */
516                 down_write(&current->mm->mmap_sem);
517                 error = do_brk(elf_bss, last_bss - elf_bss);
518                 up_write(&current->mm->mmap_sem);
519                 if (BAD_ADDR(error))
520                         goto out_close;
521         }
522
523         error = load_addr;
524
525 out_close:
526         kfree(elf_phdata);
527 out:
528         return error;
529 }
530
531 /*
532  * These are the functions used to load ELF style executables and shared
533  * libraries.  There is no binary dependent code anywhere else.
534  */
535
536 #define INTERPRETER_NONE 0
537 #define INTERPRETER_ELF 2
538
539 #ifndef STACK_RND_MASK
540 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
541 #endif
542
543 static unsigned long randomize_stack_top(unsigned long stack_top)
544 {
545         unsigned int random_variable = 0;
546
547         if ((current->flags & PF_RANDOMIZE) &&
548                 !(current->personality & ADDR_NO_RANDOMIZE)) {
549                 random_variable = get_random_int() & STACK_RND_MASK;
550                 random_variable <<= PAGE_SHIFT;
551         }
552 #ifdef CONFIG_STACK_GROWSUP
553         return PAGE_ALIGN(stack_top) + random_variable;
554 #else
555         return PAGE_ALIGN(stack_top) - random_variable;
556 #endif
557 }
558
559 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
560 {
561         struct file *interpreter = NULL; /* to shut gcc up */
562         unsigned long load_addr = 0, load_bias = 0;
563         int load_addr_set = 0;
564         char * elf_interpreter = NULL;
565         unsigned long error;
566         struct elf_phdr *elf_ppnt, *elf_phdata;
567         unsigned long elf_bss, elf_brk;
568         int retval, i;
569         unsigned int size;
570         unsigned long elf_entry;
571         unsigned long interp_load_addr = 0;
572         unsigned long start_code, end_code, start_data, end_data;
573         unsigned long reloc_func_desc __maybe_unused = 0;
574         int executable_stack = EXSTACK_DEFAULT;
575         unsigned long def_flags = 0;
576         struct {
577                 struct elfhdr elf_ex;
578                 struct elfhdr interp_elf_ex;
579         } *loc;
580
581         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
582         if (!loc) {
583                 retval = -ENOMEM;
584                 goto out_ret;
585         }
586         
587         /* Get the exec-header */
588         loc->elf_ex = *((struct elfhdr *)bprm->buf);
589
590         retval = -ENOEXEC;
591         /* First of all, some simple consistency checks */
592         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
593                 goto out;
594
595         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
596                 goto out;
597         if (!elf_check_arch(&loc->elf_ex))
598                 goto out;
599         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
600                 goto out;
601
602         /* Now read in all of the header information */
603         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
604                 goto out;
605         if (loc->elf_ex.e_phnum < 1 ||
606                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
607                 goto out;
608         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
609         retval = -ENOMEM;
610         elf_phdata = kmalloc(size, GFP_KERNEL);
611         if (!elf_phdata)
612                 goto out;
613
614         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
615                              (char *)elf_phdata, size);
616         if (retval != size) {
617                 if (retval >= 0)
618                         retval = -EIO;
619                 goto out_free_ph;
620         }
621
622         elf_ppnt = elf_phdata;
623         elf_bss = 0;
624         elf_brk = 0;
625
626         start_code = ~0UL;
627         end_code = 0;
628         start_data = 0;
629         end_data = 0;
630
631         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
632                 if (elf_ppnt->p_type == PT_INTERP) {
633                         /* This is the program interpreter used for
634                          * shared libraries - for now assume that this
635                          * is an a.out format binary
636                          */
637                         retval = -ENOEXEC;
638                         if (elf_ppnt->p_filesz > PATH_MAX || 
639                             elf_ppnt->p_filesz < 2)
640                                 goto out_free_ph;
641
642                         retval = -ENOMEM;
643                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
644                                                   GFP_KERNEL);
645                         if (!elf_interpreter)
646                                 goto out_free_ph;
647
648                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
649                                              elf_interpreter,
650                                              elf_ppnt->p_filesz);
651                         if (retval != elf_ppnt->p_filesz) {
652                                 if (retval >= 0)
653                                         retval = -EIO;
654                                 goto out_free_interp;
655                         }
656                         /* make sure path is NULL terminated */
657                         retval = -ENOEXEC;
658                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
659                                 goto out_free_interp;
660
661                         interpreter = open_exec(elf_interpreter);
662                         retval = PTR_ERR(interpreter);
663                         if (IS_ERR(interpreter))
664                                 goto out_free_interp;
665
666                         /*
667                          * If the binary is not readable then enforce
668                          * mm->dumpable = 0 regardless of the interpreter's
669                          * permissions.
670                          */
671                         if (file_permission(interpreter, MAY_READ) < 0)
672                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
673
674                         retval = kernel_read(interpreter, 0, bprm->buf,
675                                              BINPRM_BUF_SIZE);
676                         if (retval != BINPRM_BUF_SIZE) {
677                                 if (retval >= 0)
678                                         retval = -EIO;
679                                 goto out_free_dentry;
680                         }
681
682                         /* Get the exec headers */
683                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
684                         break;
685                 }
686                 elf_ppnt++;
687         }
688
689         elf_ppnt = elf_phdata;
690         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
691                 if (elf_ppnt->p_type == PT_GNU_STACK) {
692                         if (elf_ppnt->p_flags & PF_X)
693                                 executable_stack = EXSTACK_ENABLE_X;
694                         else
695                                 executable_stack = EXSTACK_DISABLE_X;
696                         break;
697                 }
698
699         /* Some simple consistency checks for the interpreter */
700         if (elf_interpreter) {
701                 retval = -ELIBBAD;
702                 /* Not an ELF interpreter */
703                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
704                         goto out_free_dentry;
705                 /* Verify the interpreter has a valid arch */
706                 if (!elf_check_arch(&loc->interp_elf_ex))
707                         goto out_free_dentry;
708         }
709
710         /* Flush all traces of the currently running executable */
711         retval = flush_old_exec(bprm);
712         if (retval)
713                 goto out_free_dentry;
714
715         /* OK, This is the point of no return */
716         current->flags &= ~PF_FORKNOEXEC;
717         current->mm->def_flags = def_flags;
718
719         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
720            may depend on the personality.  */
721         SET_PERSONALITY(loc->elf_ex);
722         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
723                 current->personality |= READ_IMPLIES_EXEC;
724
725         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
726                 current->flags |= PF_RANDOMIZE;
727
728         setup_new_exec(bprm);
729
730         /* Do this so that we can load the interpreter, if need be.  We will
731            change some of these later */
732         current->mm->free_area_cache = current->mm->mmap_base;
733         current->mm->cached_hole_size = 0;
734         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
735                                  executable_stack);
736         if (retval < 0) {
737                 send_sig(SIGKILL, current, 0);
738                 goto out_free_dentry;
739         }
740         
741         current->mm->start_stack = bprm->p;
742
743         /* Now we do a little grungy work by mmapping the ELF image into
744            the correct location in memory. */
745         for(i = 0, elf_ppnt = elf_phdata;
746             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
747                 int elf_prot = 0, elf_flags;
748                 unsigned long k, vaddr;
749
750                 if (elf_ppnt->p_type != PT_LOAD)
751                         continue;
752
753                 if (unlikely (elf_brk > elf_bss)) {
754                         unsigned long nbyte;
755                     
756                         /* There was a PT_LOAD segment with p_memsz > p_filesz
757                            before this one. Map anonymous pages, if needed,
758                            and clear the area.  */
759                         retval = set_brk(elf_bss + load_bias,
760                                          elf_brk + load_bias);
761                         if (retval) {
762                                 send_sig(SIGKILL, current, 0);
763                                 goto out_free_dentry;
764                         }
765                         nbyte = ELF_PAGEOFFSET(elf_bss);
766                         if (nbyte) {
767                                 nbyte = ELF_MIN_ALIGN - nbyte;
768                                 if (nbyte > elf_brk - elf_bss)
769                                         nbyte = elf_brk - elf_bss;
770                                 if (clear_user((void __user *)elf_bss +
771                                                         load_bias, nbyte)) {
772                                         /*
773                                          * This bss-zeroing can fail if the ELF
774                                          * file specifies odd protections. So
775                                          * we don't check the return value
776                                          */
777                                 }
778                         }
779                 }
780
781                 if (elf_ppnt->p_flags & PF_R)
782                         elf_prot |= PROT_READ;
783                 if (elf_ppnt->p_flags & PF_W)
784                         elf_prot |= PROT_WRITE;
785                 if (elf_ppnt->p_flags & PF_X)
786                         elf_prot |= PROT_EXEC;
787
788                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
789
790                 vaddr = elf_ppnt->p_vaddr;
791                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
792                         elf_flags |= MAP_FIXED;
793                 } else if (loc->elf_ex.e_type == ET_DYN) {
794                         /* Try and get dynamic programs out of the way of the
795                          * default mmap base, as well as whatever program they
796                          * might try to exec.  This is because the brk will
797                          * follow the loader, and is not movable.  */
798 #if defined(CONFIG_X86) || defined(CONFIG_ARM)
799                         load_bias = 0;
800 #else
801                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
802 #endif
803                 }
804
805                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
806                                 elf_prot, elf_flags, 0);
807                 if (BAD_ADDR(error)) {
808                         send_sig(SIGKILL, current, 0);
809                         retval = IS_ERR((void *)error) ?
810                                 PTR_ERR((void*)error) : -EINVAL;
811                         goto out_free_dentry;
812                 }
813
814                 if (!load_addr_set) {
815                         load_addr_set = 1;
816                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
817                         if (loc->elf_ex.e_type == ET_DYN) {
818                                 load_bias += error -
819                                              ELF_PAGESTART(load_bias + vaddr);
820                                 load_addr += load_bias;
821                                 reloc_func_desc = load_bias;
822                         }
823                 }
824                 k = elf_ppnt->p_vaddr;
825                 if (k < start_code)
826                         start_code = k;
827                 if (start_data < k)
828                         start_data = k;
829
830                 /*
831                  * Check to see if the section's size will overflow the
832                  * allowed task size. Note that p_filesz must always be
833                  * <= p_memsz so it is only necessary to check p_memsz.
834                  */
835                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
836                     elf_ppnt->p_memsz > TASK_SIZE ||
837                     TASK_SIZE - elf_ppnt->p_memsz < k) {
838                         /* set_brk can never work. Avoid overflows. */
839                         send_sig(SIGKILL, current, 0);
840                         retval = -EINVAL;
841                         goto out_free_dentry;
842                 }
843
844                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
845
846                 if (k > elf_bss)
847                         elf_bss = k;
848                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
849                         end_code = k;
850                 if (end_data < k)
851                         end_data = k;
852                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
853                 if (k > elf_brk)
854                         elf_brk = k;
855         }
856
857         loc->elf_ex.e_entry += load_bias;
858         elf_bss += load_bias;
859         elf_brk += load_bias;
860         start_code += load_bias;
861         end_code += load_bias;
862         start_data += load_bias;
863         end_data += load_bias;
864
865         /* Calling set_brk effectively mmaps the pages that we need
866          * for the bss and break sections.  We must do this before
867          * mapping in the interpreter, to make sure it doesn't wind
868          * up getting placed where the bss needs to go.
869          */
870         retval = set_brk(elf_bss, elf_brk);
871         if (retval) {
872                 send_sig(SIGKILL, current, 0);
873                 goto out_free_dentry;
874         }
875         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
876                 send_sig(SIGSEGV, current, 0);
877                 retval = -EFAULT; /* Nobody gets to see this, but.. */
878                 goto out_free_dentry;
879         }
880
881         if (elf_interpreter) {
882                 unsigned long uninitialized_var(interp_map_addr);
883
884                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
885                                             interpreter,
886                                             &interp_map_addr,
887                                             load_bias);
888                 if (!IS_ERR((void *)elf_entry)) {
889                         /*
890                          * load_elf_interp() returns relocation
891                          * adjustment
892                          */
893                         interp_load_addr = elf_entry;
894                         elf_entry += loc->interp_elf_ex.e_entry;
895                 }
896                 if (BAD_ADDR(elf_entry)) {
897                         force_sig(SIGSEGV, current);
898                         retval = IS_ERR((void *)elf_entry) ?
899                                         (int)elf_entry : -EINVAL;
900                         goto out_free_dentry;
901                 }
902                 reloc_func_desc = interp_load_addr;
903
904                 allow_write_access(interpreter);
905                 fput(interpreter);
906                 kfree(elf_interpreter);
907         } else {
908                 elf_entry = loc->elf_ex.e_entry;
909                 if (BAD_ADDR(elf_entry)) {
910                         force_sig(SIGSEGV, current);
911                         retval = -EINVAL;
912                         goto out_free_dentry;
913                 }
914         }
915
916         kfree(elf_phdata);
917
918         set_binfmt(&elf_format);
919
920 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
921         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
922         if (retval < 0) {
923                 send_sig(SIGKILL, current, 0);
924                 goto out;
925         }
926 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
927
928         install_exec_creds(bprm);
929         current->flags &= ~PF_FORKNOEXEC;
930         retval = create_elf_tables(bprm, &loc->elf_ex,
931                           load_addr, interp_load_addr);
932         if (retval < 0) {
933                 send_sig(SIGKILL, current, 0);
934                 goto out;
935         }
936         /* N.B. passed_fileno might not be initialized? */
937         current->mm->end_code = end_code;
938         current->mm->start_code = start_code;
939         current->mm->start_data = start_data;
940         current->mm->end_data = end_data;
941         current->mm->start_stack = bprm->p;
942
943 #ifdef arch_randomize_brk
944         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
945                 current->mm->brk = current->mm->start_brk =
946                         arch_randomize_brk(current->mm);
947 #ifdef CONFIG_COMPAT_BRK
948                 current->brk_randomized = 1;
949 #endif
950         }
951 #endif
952
953         if (current->personality & MMAP_PAGE_ZERO) {
954                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
955                    and some applications "depend" upon this behavior.
956                    Since we do not have the power to recompile these, we
957                    emulate the SVr4 behavior. Sigh. */
958                 down_write(&current->mm->mmap_sem);
959                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
960                                 MAP_FIXED | MAP_PRIVATE, 0);
961                 up_write(&current->mm->mmap_sem);
962         }
963
964 #ifdef ELF_PLAT_INIT
965         /*
966          * The ABI may specify that certain registers be set up in special
967          * ways (on i386 %edx is the address of a DT_FINI function, for
968          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
969          * that the e_entry field is the address of the function descriptor
970          * for the startup routine, rather than the address of the startup
971          * routine itself.  This macro performs whatever initialization to
972          * the regs structure is required as well as any relocations to the
973          * function descriptor entries when executing dynamically links apps.
974          */
975         ELF_PLAT_INIT(regs, reloc_func_desc);
976 #endif
977
978         start_thread(regs, elf_entry, bprm->p);
979         retval = 0;
980 out:
981         kfree(loc);
982 out_ret:
983         return retval;
984
985         /* error cleanup */
986 out_free_dentry:
987         allow_write_access(interpreter);
988         if (interpreter)
989                 fput(interpreter);
990 out_free_interp:
991         kfree(elf_interpreter);
992 out_free_ph:
993         kfree(elf_phdata);
994         goto out;
995 }
996
997 /* This is really simpleminded and specialized - we are loading an
998    a.out library that is given an ELF header. */
999 static int load_elf_library(struct file *file)
1000 {
1001         struct elf_phdr *elf_phdata;
1002         struct elf_phdr *eppnt;
1003         unsigned long elf_bss, bss, len;
1004         int retval, error, i, j;
1005         struct elfhdr elf_ex;
1006
1007         error = -ENOEXEC;
1008         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1009         if (retval != sizeof(elf_ex))
1010                 goto out;
1011
1012         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1013                 goto out;
1014
1015         /* First of all, some simple consistency checks */
1016         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1017             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1018                 goto out;
1019
1020         /* Now read in all of the header information */
1021
1022         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1023         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1024
1025         error = -ENOMEM;
1026         elf_phdata = kmalloc(j, GFP_KERNEL);
1027         if (!elf_phdata)
1028                 goto out;
1029
1030         eppnt = elf_phdata;
1031         error = -ENOEXEC;
1032         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1033         if (retval != j)
1034                 goto out_free_ph;
1035
1036         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1037                 if ((eppnt + i)->p_type == PT_LOAD)
1038                         j++;
1039         if (j != 1)
1040                 goto out_free_ph;
1041
1042         while (eppnt->p_type != PT_LOAD)
1043                 eppnt++;
1044
1045         /* Now use mmap to map the library into memory. */
1046         down_write(&current->mm->mmap_sem);
1047         error = do_mmap(file,
1048                         ELF_PAGESTART(eppnt->p_vaddr),
1049                         (eppnt->p_filesz +
1050                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1051                         PROT_READ | PROT_WRITE | PROT_EXEC,
1052                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1053                         (eppnt->p_offset -
1054                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1055         up_write(&current->mm->mmap_sem);
1056         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1057                 goto out_free_ph;
1058
1059         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1060         if (padzero(elf_bss)) {
1061                 error = -EFAULT;
1062                 goto out_free_ph;
1063         }
1064
1065         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1066                             ELF_MIN_ALIGN - 1);
1067         bss = eppnt->p_memsz + eppnt->p_vaddr;
1068         if (bss > len) {
1069                 down_write(&current->mm->mmap_sem);
1070                 do_brk(len, bss - len);
1071                 up_write(&current->mm->mmap_sem);
1072         }
1073         error = 0;
1074
1075 out_free_ph:
1076         kfree(elf_phdata);
1077 out:
1078         return error;
1079 }
1080
1081 #ifdef CONFIG_ELF_CORE
1082 /*
1083  * ELF core dumper
1084  *
1085  * Modelled on fs/exec.c:aout_core_dump()
1086  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1087  */
1088
1089 /*
1090  * Decide what to dump of a segment, part, all or none.
1091  */
1092 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1093                                    unsigned long mm_flags)
1094 {
1095 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1096
1097         /* The vma can be set up to tell us the answer directly.  */
1098         if (vma->vm_flags & VM_ALWAYSDUMP)
1099                 goto whole;
1100
1101         /* Hugetlb memory check */
1102         if (vma->vm_flags & VM_HUGETLB) {
1103                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1104                         goto whole;
1105                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1106                         goto whole;
1107         }
1108
1109         /* Do not dump I/O mapped devices or special mappings */
1110         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1111                 return 0;
1112
1113         /* By default, dump shared memory if mapped from an anonymous file. */
1114         if (vma->vm_flags & VM_SHARED) {
1115                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1116                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1117                         goto whole;
1118                 return 0;
1119         }
1120
1121         /* Dump segments that have been written to.  */
1122         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1123                 goto whole;
1124         if (vma->vm_file == NULL)
1125                 return 0;
1126
1127         if (FILTER(MAPPED_PRIVATE))
1128                 goto whole;
1129
1130         /*
1131          * If this looks like the beginning of a DSO or executable mapping,
1132          * check for an ELF header.  If we find one, dump the first page to
1133          * aid in determining what was mapped here.
1134          */
1135         if (FILTER(ELF_HEADERS) &&
1136             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1137                 u32 __user *header = (u32 __user *) vma->vm_start;
1138                 u32 word;
1139                 mm_segment_t fs = get_fs();
1140                 /*
1141                  * Doing it this way gets the constant folded by GCC.
1142                  */
1143                 union {
1144                         u32 cmp;
1145                         char elfmag[SELFMAG];
1146                 } magic;
1147                 BUILD_BUG_ON(SELFMAG != sizeof word);
1148                 magic.elfmag[EI_MAG0] = ELFMAG0;
1149                 magic.elfmag[EI_MAG1] = ELFMAG1;
1150                 magic.elfmag[EI_MAG2] = ELFMAG2;
1151                 magic.elfmag[EI_MAG3] = ELFMAG3;
1152                 /*
1153                  * Switch to the user "segment" for get_user(),
1154                  * then put back what elf_core_dump() had in place.
1155                  */
1156                 set_fs(USER_DS);
1157                 if (unlikely(get_user(word, header)))
1158                         word = 0;
1159                 set_fs(fs);
1160                 if (word == magic.cmp)
1161                         return PAGE_SIZE;
1162         }
1163
1164 #undef  FILTER
1165
1166         return 0;
1167
1168 whole:
1169         return vma->vm_end - vma->vm_start;
1170 }
1171
1172 /* An ELF note in memory */
1173 struct memelfnote
1174 {
1175         const char *name;
1176         int type;
1177         unsigned int datasz;
1178         void *data;
1179 };
1180
1181 static int notesize(struct memelfnote *en)
1182 {
1183         int sz;
1184
1185         sz = sizeof(struct elf_note);
1186         sz += roundup(strlen(en->name) + 1, 4);
1187         sz += roundup(en->datasz, 4);
1188
1189         return sz;
1190 }
1191
1192 #define DUMP_WRITE(addr, nr, foffset)   \
1193         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1194
1195 static int alignfile(struct file *file, loff_t *foffset)
1196 {
1197         static const char buf[4] = { 0, };
1198         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1199         return 1;
1200 }
1201
1202 static int writenote(struct memelfnote *men, struct file *file,
1203                         loff_t *foffset)
1204 {
1205         struct elf_note en;
1206         en.n_namesz = strlen(men->name) + 1;
1207         en.n_descsz = men->datasz;
1208         en.n_type = men->type;
1209
1210         DUMP_WRITE(&en, sizeof(en), foffset);
1211         DUMP_WRITE(men->name, en.n_namesz, foffset);
1212         if (!alignfile(file, foffset))
1213                 return 0;
1214         DUMP_WRITE(men->data, men->datasz, foffset);
1215         if (!alignfile(file, foffset))
1216                 return 0;
1217
1218         return 1;
1219 }
1220 #undef DUMP_WRITE
1221
1222 static void fill_elf_header(struct elfhdr *elf, int segs,
1223                             u16 machine, u32 flags, u8 osabi)
1224 {
1225         memset(elf, 0, sizeof(*elf));
1226
1227         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1228         elf->e_ident[EI_CLASS] = ELF_CLASS;
1229         elf->e_ident[EI_DATA] = ELF_DATA;
1230         elf->e_ident[EI_VERSION] = EV_CURRENT;
1231         elf->e_ident[EI_OSABI] = ELF_OSABI;
1232
1233         elf->e_type = ET_CORE;
1234         elf->e_machine = machine;
1235         elf->e_version = EV_CURRENT;
1236         elf->e_phoff = sizeof(struct elfhdr);
1237         elf->e_flags = flags;
1238         elf->e_ehsize = sizeof(struct elfhdr);
1239         elf->e_phentsize = sizeof(struct elf_phdr);
1240         elf->e_phnum = segs;
1241
1242         return;
1243 }
1244
1245 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1246 {
1247         phdr->p_type = PT_NOTE;
1248         phdr->p_offset = offset;
1249         phdr->p_vaddr = 0;
1250         phdr->p_paddr = 0;
1251         phdr->p_filesz = sz;
1252         phdr->p_memsz = 0;
1253         phdr->p_flags = 0;
1254         phdr->p_align = 0;
1255         return;
1256 }
1257
1258 static void fill_note(struct memelfnote *note, const char *name, int type, 
1259                 unsigned int sz, void *data)
1260 {
1261         note->name = name;
1262         note->type = type;
1263         note->datasz = sz;
1264         note->data = data;
1265         return;
1266 }
1267
1268 /*
1269  * fill up all the fields in prstatus from the given task struct, except
1270  * registers which need to be filled up separately.
1271  */
1272 static void fill_prstatus(struct elf_prstatus *prstatus,
1273                 struct task_struct *p, long signr)
1274 {
1275         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1276         prstatus->pr_sigpend = p->pending.signal.sig[0];
1277         prstatus->pr_sighold = p->blocked.sig[0];
1278         rcu_read_lock();
1279         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1280         rcu_read_unlock();
1281         prstatus->pr_pid = task_pid_vnr(p);
1282         prstatus->pr_pgrp = task_pgrp_vnr(p);
1283         prstatus->pr_sid = task_session_vnr(p);
1284         if (thread_group_leader(p)) {
1285                 struct task_cputime cputime;
1286
1287                 /*
1288                  * This is the record for the group leader.  It shows the
1289                  * group-wide total, not its individual thread total.
1290                  */
1291                 thread_group_cputime(p, &cputime);
1292                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1293                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1294         } else {
1295                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1296                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1297         }
1298         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1299         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1300 }
1301
1302 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1303                        struct mm_struct *mm)
1304 {
1305         const struct cred *cred;
1306         unsigned int i, len;
1307         
1308         /* first copy the parameters from user space */
1309         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1310
1311         len = mm->arg_end - mm->arg_start;
1312         if (len >= ELF_PRARGSZ)
1313                 len = ELF_PRARGSZ-1;
1314         if (copy_from_user(&psinfo->pr_psargs,
1315                            (const char __user *)mm->arg_start, len))
1316                 return -EFAULT;
1317         for(i = 0; i < len; i++)
1318                 if (psinfo->pr_psargs[i] == 0)
1319                         psinfo->pr_psargs[i] = ' ';
1320         psinfo->pr_psargs[len] = 0;
1321
1322         rcu_read_lock();
1323         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1324         rcu_read_unlock();
1325         psinfo->pr_pid = task_pid_vnr(p);
1326         psinfo->pr_pgrp = task_pgrp_vnr(p);
1327         psinfo->pr_sid = task_session_vnr(p);
1328
1329         i = p->state ? ffz(~p->state) + 1 : 0;
1330         psinfo->pr_state = i;
1331         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1332         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1333         psinfo->pr_nice = task_nice(p);
1334         psinfo->pr_flag = p->flags;
1335         rcu_read_lock();
1336         cred = __task_cred(p);
1337         SET_UID(psinfo->pr_uid, cred->uid);
1338         SET_GID(psinfo->pr_gid, cred->gid);
1339         rcu_read_unlock();
1340         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1341         
1342         return 0;
1343 }
1344
1345 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1346 {
1347         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1348         int i = 0;
1349         do
1350                 i += 2;
1351         while (auxv[i - 2] != AT_NULL);
1352         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1353 }
1354
1355 #ifdef CORE_DUMP_USE_REGSET
1356 #include <linux/regset.h>
1357
1358 struct elf_thread_core_info {
1359         struct elf_thread_core_info *next;
1360         struct task_struct *task;
1361         struct elf_prstatus prstatus;
1362         struct memelfnote notes[0];
1363 };
1364
1365 struct elf_note_info {
1366         struct elf_thread_core_info *thread;
1367         struct memelfnote psinfo;
1368         struct memelfnote auxv;
1369         size_t size;
1370         int thread_notes;
1371 };
1372
1373 /*
1374  * When a regset has a writeback hook, we call it on each thread before
1375  * dumping user memory.  On register window machines, this makes sure the
1376  * user memory backing the register data is up to date before we read it.
1377  */
1378 static void do_thread_regset_writeback(struct task_struct *task,
1379                                        const struct user_regset *regset)
1380 {
1381         if (regset->writeback)
1382                 regset->writeback(task, regset, 1);
1383 }
1384
1385 static int fill_thread_core_info(struct elf_thread_core_info *t,
1386                                  const struct user_regset_view *view,
1387                                  long signr, size_t *total)
1388 {
1389         unsigned int i;
1390
1391         /*
1392          * NT_PRSTATUS is the one special case, because the regset data
1393          * goes into the pr_reg field inside the note contents, rather
1394          * than being the whole note contents.  We fill the reset in here.
1395          * We assume that regset 0 is NT_PRSTATUS.
1396          */
1397         fill_prstatus(&t->prstatus, t->task, signr);
1398         (void) view->regsets[0].get(t->task, &view->regsets[0],
1399                                     0, sizeof(t->prstatus.pr_reg),
1400                                     &t->prstatus.pr_reg, NULL);
1401
1402         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1403                   sizeof(t->prstatus), &t->prstatus);
1404         *total += notesize(&t->notes[0]);
1405
1406         do_thread_regset_writeback(t->task, &view->regsets[0]);
1407
1408         /*
1409          * Each other regset might generate a note too.  For each regset
1410          * that has no core_note_type or is inactive, we leave t->notes[i]
1411          * all zero and we'll know to skip writing it later.
1412          */
1413         for (i = 1; i < view->n; ++i) {
1414                 const struct user_regset *regset = &view->regsets[i];
1415                 do_thread_regset_writeback(t->task, regset);
1416                 if (regset->core_note_type &&
1417                     (!regset->active || regset->active(t->task, regset))) {
1418                         int ret;
1419                         size_t size = regset->n * regset->size;
1420                         void *data = kmalloc(size, GFP_KERNEL);
1421                         if (unlikely(!data))
1422                                 return 0;
1423                         ret = regset->get(t->task, regset,
1424                                           0, size, data, NULL);
1425                         if (unlikely(ret))
1426                                 kfree(data);
1427                         else {
1428                                 if (regset->core_note_type != NT_PRFPREG)
1429                                         fill_note(&t->notes[i], "LINUX",
1430                                                   regset->core_note_type,
1431                                                   size, data);
1432                                 else {
1433                                         t->prstatus.pr_fpvalid = 1;
1434                                         fill_note(&t->notes[i], "CORE",
1435                                                   NT_PRFPREG, size, data);
1436                                 }
1437                                 *total += notesize(&t->notes[i]);
1438                         }
1439                 }
1440         }
1441
1442         return 1;
1443 }
1444
1445 static int fill_note_info(struct elfhdr *elf, int phdrs,
1446                           struct elf_note_info *info,
1447                           long signr, struct pt_regs *regs)
1448 {
1449         struct task_struct *dump_task = current;
1450         const struct user_regset_view *view = task_user_regset_view(dump_task);
1451         struct elf_thread_core_info *t;
1452         struct elf_prpsinfo *psinfo;
1453         struct core_thread *ct;
1454         unsigned int i;
1455
1456         info->size = 0;
1457         info->thread = NULL;
1458
1459         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1460         if (psinfo == NULL)
1461                 return 0;
1462
1463         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1464
1465         /*
1466          * Figure out how many notes we're going to need for each thread.
1467          */
1468         info->thread_notes = 0;
1469         for (i = 0; i < view->n; ++i)
1470                 if (view->regsets[i].core_note_type != 0)
1471                         ++info->thread_notes;
1472
1473         /*
1474          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1475          * since it is our one special case.
1476          */
1477         if (unlikely(info->thread_notes == 0) ||
1478             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1479                 WARN_ON(1);
1480                 return 0;
1481         }
1482
1483         /*
1484          * Initialize the ELF file header.
1485          */
1486         fill_elf_header(elf, phdrs,
1487                         view->e_machine, view->e_flags, view->ei_osabi);
1488
1489         /*
1490          * Allocate a structure for each thread.
1491          */
1492         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1493                 t = kzalloc(offsetof(struct elf_thread_core_info,
1494                                      notes[info->thread_notes]),
1495                             GFP_KERNEL);
1496                 if (unlikely(!t))
1497                         return 0;
1498
1499                 t->task = ct->task;
1500                 if (ct->task == dump_task || !info->thread) {
1501                         t->next = info->thread;
1502                         info->thread = t;
1503                 } else {
1504                         /*
1505                          * Make sure to keep the original task at
1506                          * the head of the list.
1507                          */
1508                         t->next = info->thread->next;
1509                         info->thread->next = t;
1510                 }
1511         }
1512
1513         /*
1514          * Now fill in each thread's information.
1515          */
1516         for (t = info->thread; t != NULL; t = t->next)
1517                 if (!fill_thread_core_info(t, view, signr, &info->size))
1518                         return 0;
1519
1520         /*
1521          * Fill in the two process-wide notes.
1522          */
1523         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1524         info->size += notesize(&info->psinfo);
1525
1526         fill_auxv_note(&info->auxv, current->mm);
1527         info->size += notesize(&info->auxv);
1528
1529         return 1;
1530 }
1531
1532 static size_t get_note_info_size(struct elf_note_info *info)
1533 {
1534         return info->size;
1535 }
1536
1537 /*
1538  * Write all the notes for each thread.  When writing the first thread, the
1539  * process-wide notes are interleaved after the first thread-specific note.
1540  */
1541 static int write_note_info(struct elf_note_info *info,
1542                            struct file *file, loff_t *foffset)
1543 {
1544         bool first = 1;
1545         struct elf_thread_core_info *t = info->thread;
1546
1547         do {
1548                 int i;
1549
1550                 if (!writenote(&t->notes[0], file, foffset))
1551                         return 0;
1552
1553                 if (first && !writenote(&info->psinfo, file, foffset))
1554                         return 0;
1555                 if (first && !writenote(&info->auxv, file, foffset))
1556                         return 0;
1557
1558                 for (i = 1; i < info->thread_notes; ++i)
1559                         if (t->notes[i].data &&
1560                             !writenote(&t->notes[i], file, foffset))
1561                                 return 0;
1562
1563                 first = 0;
1564                 t = t->next;
1565         } while (t);
1566
1567         return 1;
1568 }
1569
1570 static void free_note_info(struct elf_note_info *info)
1571 {
1572         struct elf_thread_core_info *threads = info->thread;
1573         while (threads) {
1574                 unsigned int i;
1575                 struct elf_thread_core_info *t = threads;
1576                 threads = t->next;
1577                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1578                 for (i = 1; i < info->thread_notes; ++i)
1579                         kfree(t->notes[i].data);
1580                 kfree(t);
1581         }
1582         kfree(info->psinfo.data);
1583 }
1584
1585 #else
1586
1587 /* Here is the structure in which status of each thread is captured. */
1588 struct elf_thread_status
1589 {
1590         struct list_head list;
1591         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1592         elf_fpregset_t fpu;             /* NT_PRFPREG */
1593         struct task_struct *thread;
1594 #ifdef ELF_CORE_COPY_XFPREGS
1595         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1596 #endif
1597         struct memelfnote notes[3];
1598         int num_notes;
1599 };
1600
1601 /*
1602  * In order to add the specific thread information for the elf file format,
1603  * we need to keep a linked list of every threads pr_status and then create
1604  * a single section for them in the final core file.
1605  */
1606 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1607 {
1608         int sz = 0;
1609         struct task_struct *p = t->thread;
1610         t->num_notes = 0;
1611
1612         fill_prstatus(&t->prstatus, p, signr);
1613         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1614         
1615         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1616                   &(t->prstatus));
1617         t->num_notes++;
1618         sz += notesize(&t->notes[0]);
1619
1620         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1621                                                                 &t->fpu))) {
1622                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1623                           &(t->fpu));
1624                 t->num_notes++;
1625                 sz += notesize(&t->notes[1]);
1626         }
1627
1628 #ifdef ELF_CORE_COPY_XFPREGS
1629         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1630                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1631                           sizeof(t->xfpu), &t->xfpu);
1632                 t->num_notes++;
1633                 sz += notesize(&t->notes[2]);
1634         }
1635 #endif  
1636         return sz;
1637 }
1638
1639 struct elf_note_info {
1640         struct memelfnote *notes;
1641         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1642         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1643         struct list_head thread_list;
1644         elf_fpregset_t *fpu;
1645 #ifdef ELF_CORE_COPY_XFPREGS
1646         elf_fpxregset_t *xfpu;
1647 #endif
1648         int thread_status_size;
1649         int numnote;
1650 };
1651
1652 static int elf_note_info_init(struct elf_note_info *info)
1653 {
1654         memset(info, 0, sizeof(*info));
1655         INIT_LIST_HEAD(&info->thread_list);
1656
1657         /* Allocate space for six ELF notes */
1658         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1659         if (!info->notes)
1660                 return 0;
1661         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1662         if (!info->psinfo)
1663                 goto notes_free;
1664         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1665         if (!info->prstatus)
1666                 goto psinfo_free;
1667         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1668         if (!info->fpu)
1669                 goto prstatus_free;
1670 #ifdef ELF_CORE_COPY_XFPREGS
1671         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1672         if (!info->xfpu)
1673                 goto fpu_free;
1674 #endif
1675         return 1;
1676 #ifdef ELF_CORE_COPY_XFPREGS
1677  fpu_free:
1678         kfree(info->fpu);
1679 #endif
1680  prstatus_free:
1681         kfree(info->prstatus);
1682  psinfo_free:
1683         kfree(info->psinfo);
1684  notes_free:
1685         kfree(info->notes);
1686         return 0;
1687 }
1688
1689 static int fill_note_info(struct elfhdr *elf, int phdrs,
1690                           struct elf_note_info *info,
1691                           long signr, struct pt_regs *regs)
1692 {
1693         struct list_head *t;
1694
1695         if (!elf_note_info_init(info))
1696                 return 0;
1697
1698         if (signr) {
1699                 struct core_thread *ct;
1700                 struct elf_thread_status *ets;
1701
1702                 for (ct = current->mm->core_state->dumper.next;
1703                                                 ct; ct = ct->next) {
1704                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1705                         if (!ets)
1706                                 return 0;
1707
1708                         ets->thread = ct->task;
1709                         list_add(&ets->list, &info->thread_list);
1710                 }
1711
1712                 list_for_each(t, &info->thread_list) {
1713                         int sz;
1714
1715                         ets = list_entry(t, struct elf_thread_status, list);
1716                         sz = elf_dump_thread_status(signr, ets);
1717                         info->thread_status_size += sz;
1718                 }
1719         }
1720         /* now collect the dump for the current */
1721         memset(info->prstatus, 0, sizeof(*info->prstatus));
1722         fill_prstatus(info->prstatus, current, signr);
1723         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1724
1725         /* Set up header */
1726         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1727
1728         /*
1729          * Set up the notes in similar form to SVR4 core dumps made
1730          * with info from their /proc.
1731          */
1732
1733         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1734                   sizeof(*info->prstatus), info->prstatus);
1735         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1736         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1737                   sizeof(*info->psinfo), info->psinfo);
1738
1739         info->numnote = 2;
1740
1741         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1742
1743         /* Try to dump the FPU. */
1744         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1745                                                                info->fpu);
1746         if (info->prstatus->pr_fpvalid)
1747                 fill_note(info->notes + info->numnote++,
1748                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1749 #ifdef ELF_CORE_COPY_XFPREGS
1750         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1751                 fill_note(info->notes + info->numnote++,
1752                           "LINUX", ELF_CORE_XFPREG_TYPE,
1753                           sizeof(*info->xfpu), info->xfpu);
1754 #endif
1755
1756         return 1;
1757 }
1758
1759 static size_t get_note_info_size(struct elf_note_info *info)
1760 {
1761         int sz = 0;
1762         int i;
1763
1764         for (i = 0; i < info->numnote; i++)
1765                 sz += notesize(info->notes + i);
1766
1767         sz += info->thread_status_size;
1768
1769         return sz;
1770 }
1771
1772 static int write_note_info(struct elf_note_info *info,
1773                            struct file *file, loff_t *foffset)
1774 {
1775         int i;
1776         struct list_head *t;
1777
1778         for (i = 0; i < info->numnote; i++)
1779                 if (!writenote(info->notes + i, file, foffset))
1780                         return 0;
1781
1782         /* write out the thread status notes section */
1783         list_for_each(t, &info->thread_list) {
1784                 struct elf_thread_status *tmp =
1785                                 list_entry(t, struct elf_thread_status, list);
1786
1787                 for (i = 0; i < tmp->num_notes; i++)
1788                         if (!writenote(&tmp->notes[i], file, foffset))
1789                                 return 0;
1790         }
1791
1792         return 1;
1793 }
1794
1795 static void free_note_info(struct elf_note_info *info)
1796 {
1797         while (!list_empty(&info->thread_list)) {
1798                 struct list_head *tmp = info->thread_list.next;
1799                 list_del(tmp);
1800                 kfree(list_entry(tmp, struct elf_thread_status, list));
1801         }
1802
1803         kfree(info->prstatus);
1804         kfree(info->psinfo);
1805         kfree(info->notes);
1806         kfree(info->fpu);
1807 #ifdef ELF_CORE_COPY_XFPREGS
1808         kfree(info->xfpu);
1809 #endif
1810 }
1811
1812 #endif
1813
1814 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1815                                         struct vm_area_struct *gate_vma)
1816 {
1817         struct vm_area_struct *ret = tsk->mm->mmap;
1818
1819         if (ret)
1820                 return ret;
1821         return gate_vma;
1822 }
1823 /*
1824  * Helper function for iterating across a vma list.  It ensures that the caller
1825  * will visit `gate_vma' prior to terminating the search.
1826  */
1827 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1828                                         struct vm_area_struct *gate_vma)
1829 {
1830         struct vm_area_struct *ret;
1831
1832         ret = this_vma->vm_next;
1833         if (ret)
1834                 return ret;
1835         if (this_vma == gate_vma)
1836                 return NULL;
1837         return gate_vma;
1838 }
1839
1840 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1841                              elf_addr_t e_shoff, int segs)
1842 {
1843         elf->e_shoff = e_shoff;
1844         elf->e_shentsize = sizeof(*shdr4extnum);
1845         elf->e_shnum = 1;
1846         elf->e_shstrndx = SHN_UNDEF;
1847
1848         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1849
1850         shdr4extnum->sh_type = SHT_NULL;
1851         shdr4extnum->sh_size = elf->e_shnum;
1852         shdr4extnum->sh_link = elf->e_shstrndx;
1853         shdr4extnum->sh_info = segs;
1854 }
1855
1856 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1857                                      unsigned long mm_flags)
1858 {
1859         struct vm_area_struct *vma;
1860         size_t size = 0;
1861
1862         for (vma = first_vma(current, gate_vma); vma != NULL;
1863              vma = next_vma(vma, gate_vma))
1864                 size += vma_dump_size(vma, mm_flags);
1865         return size;
1866 }
1867
1868 /*
1869  * Actual dumper
1870  *
1871  * This is a two-pass process; first we find the offsets of the bits,
1872  * and then they are actually written out.  If we run out of core limit
1873  * we just truncate.
1874  */
1875 static int elf_core_dump(struct coredump_params *cprm)
1876 {
1877         int has_dumped = 0;
1878         mm_segment_t fs;
1879         int segs;
1880         size_t size = 0;
1881         struct vm_area_struct *vma, *gate_vma;
1882         struct elfhdr *elf = NULL;
1883         loff_t offset = 0, dataoff, foffset;
1884         struct elf_note_info info;
1885         struct elf_phdr *phdr4note = NULL;
1886         struct elf_shdr *shdr4extnum = NULL;
1887         Elf_Half e_phnum;
1888         elf_addr_t e_shoff;
1889
1890         /*
1891          * We no longer stop all VM operations.
1892          * 
1893          * This is because those proceses that could possibly change map_count
1894          * or the mmap / vma pages are now blocked in do_exit on current
1895          * finishing this core dump.
1896          *
1897          * Only ptrace can touch these memory addresses, but it doesn't change
1898          * the map_count or the pages allocated. So no possibility of crashing
1899          * exists while dumping the mm->vm_next areas to the core file.
1900          */
1901   
1902         /* alloc memory for large data structures: too large to be on stack */
1903         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1904         if (!elf)
1905                 goto out;
1906         /*
1907          * The number of segs are recored into ELF header as 16bit value.
1908          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1909          */
1910         segs = current->mm->map_count;
1911         segs += elf_core_extra_phdrs();
1912
1913         gate_vma = get_gate_vma(current->mm);
1914         if (gate_vma != NULL)
1915                 segs++;
1916
1917         /* for notes section */
1918         segs++;
1919
1920         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1921          * this, kernel supports extended numbering. Have a look at
1922          * include/linux/elf.h for further information. */
1923         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1924
1925         /*
1926          * Collect all the non-memory information about the process for the
1927          * notes.  This also sets up the file header.
1928          */
1929         if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1930                 goto cleanup;
1931
1932         has_dumped = 1;
1933         current->flags |= PF_DUMPCORE;
1934   
1935         fs = get_fs();
1936         set_fs(KERNEL_DS);
1937
1938         offset += sizeof(*elf);                         /* Elf header */
1939         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
1940         foffset = offset;
1941
1942         /* Write notes phdr entry */
1943         {
1944                 size_t sz = get_note_info_size(&info);
1945
1946                 sz += elf_coredump_extra_notes_size();
1947
1948                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1949                 if (!phdr4note)
1950                         goto end_coredump;
1951
1952                 fill_elf_note_phdr(phdr4note, sz, offset);
1953                 offset += sz;
1954         }
1955
1956         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1957
1958         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1959         offset += elf_core_extra_data_size();
1960         e_shoff = offset;
1961
1962         if (e_phnum == PN_XNUM) {
1963                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
1964                 if (!shdr4extnum)
1965                         goto end_coredump;
1966                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
1967         }
1968
1969         offset = dataoff;
1970
1971         size += sizeof(*elf);
1972         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1973                 goto end_coredump;
1974
1975         size += sizeof(*phdr4note);
1976         if (size > cprm->limit
1977             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
1978                 goto end_coredump;
1979
1980         /* Write program headers for segments dump */
1981         for (vma = first_vma(current, gate_vma); vma != NULL;
1982                         vma = next_vma(vma, gate_vma)) {
1983                 struct elf_phdr phdr;
1984
1985                 phdr.p_type = PT_LOAD;
1986                 phdr.p_offset = offset;
1987                 phdr.p_vaddr = vma->vm_start;
1988                 phdr.p_paddr = 0;
1989                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
1990                 phdr.p_memsz = vma->vm_end - vma->vm_start;
1991                 offset += phdr.p_filesz;
1992                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1993                 if (vma->vm_flags & VM_WRITE)
1994                         phdr.p_flags |= PF_W;
1995                 if (vma->vm_flags & VM_EXEC)
1996                         phdr.p_flags |= PF_X;
1997                 phdr.p_align = ELF_EXEC_PAGESIZE;
1998
1999                 size += sizeof(phdr);
2000                 if (size > cprm->limit
2001                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2002                         goto end_coredump;
2003         }
2004
2005         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2006                 goto end_coredump;
2007
2008         /* write out the notes section */
2009         if (!write_note_info(&info, cprm->file, &foffset))
2010                 goto end_coredump;
2011
2012         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2013                 goto end_coredump;
2014
2015         /* Align to page */
2016         if (!dump_seek(cprm->file, dataoff - foffset))
2017                 goto end_coredump;
2018
2019         for (vma = first_vma(current, gate_vma); vma != NULL;
2020                         vma = next_vma(vma, gate_vma)) {
2021                 unsigned long addr;
2022                 unsigned long end;
2023
2024                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2025
2026                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2027                         struct page *page;
2028                         int stop;
2029
2030                         page = get_dump_page(addr);
2031                         if (page) {
2032                                 void *kaddr = kmap(page);
2033                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2034                                         !dump_write(cprm->file, kaddr,
2035                                                     PAGE_SIZE);
2036                                 kunmap(page);
2037                                 page_cache_release(page);
2038                         } else
2039                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2040                         if (stop)
2041                                 goto end_coredump;
2042                 }
2043         }
2044
2045         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2046                 goto end_coredump;
2047
2048         if (e_phnum == PN_XNUM) {
2049                 size += sizeof(*shdr4extnum);
2050                 if (size > cprm->limit
2051                     || !dump_write(cprm->file, shdr4extnum,
2052                                    sizeof(*shdr4extnum)))
2053                         goto end_coredump;
2054         }
2055
2056 end_coredump:
2057         set_fs(fs);
2058
2059 cleanup:
2060         free_note_info(&info);
2061         kfree(shdr4extnum);
2062         kfree(phdr4note);
2063         kfree(elf);
2064 out:
2065         return has_dumped;
2066 }
2067
2068 #endif          /* CONFIG_ELF_CORE */
2069
2070 static int __init init_elf_binfmt(void)
2071 {
2072         return register_binfmt(&elf_format);
2073 }
2074
2075 static void __exit exit_elf_binfmt(void)
2076 {
2077         /* Remove the COFF and ELF loaders. */
2078         unregister_binfmt(&elf_format);
2079 }
2080
2081 core_initcall(init_elf_binfmt);
2082 module_exit(exit_elf_binfmt);
2083 MODULE_LICENSE("GPL");