fs/binfmt_elf.c: fix bug in loading of PIE binaries
[pandora-kernel.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38
39 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
40 static int load_elf_library(struct file *);
41 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
42                                 int, int, unsigned long);
43
44 /*
45  * If we don't support core dumping, then supply a NULL so we
46  * don't even try.
47  */
48 #ifdef CONFIG_ELF_CORE
49 static int elf_core_dump(struct coredump_params *cprm);
50 #else
51 #define elf_core_dump   NULL
52 #endif
53
54 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
55 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
56 #else
57 #define ELF_MIN_ALIGN   PAGE_SIZE
58 #endif
59
60 #ifndef ELF_CORE_EFLAGS
61 #define ELF_CORE_EFLAGS 0
62 #endif
63
64 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
65 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
66 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
67
68 static struct linux_binfmt elf_format = {
69         .module         = THIS_MODULE,
70         .load_binary    = load_elf_binary,
71         .load_shlib     = load_elf_library,
72         .core_dump      = elf_core_dump,
73         .min_coredump   = ELF_EXEC_PAGESIZE,
74 };
75
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
77
78 static int set_brk(unsigned long start, unsigned long end)
79 {
80         start = ELF_PAGEALIGN(start);
81         end = ELF_PAGEALIGN(end);
82         if (end > start) {
83                 unsigned long addr;
84                 down_write(&current->mm->mmap_sem);
85                 addr = do_brk(start, end - start);
86                 up_write(&current->mm->mmap_sem);
87                 if (BAD_ADDR(addr))
88                         return addr;
89         }
90         current->mm->start_brk = current->mm->brk = end;
91         return 0;
92 }
93
94 /* We need to explicitly zero any fractional pages
95    after the data section (i.e. bss).  This would
96    contain the junk from the file that should not
97    be in memory
98  */
99 static int padzero(unsigned long elf_bss)
100 {
101         unsigned long nbyte;
102
103         nbyte = ELF_PAGEOFFSET(elf_bss);
104         if (nbyte) {
105                 nbyte = ELF_MIN_ALIGN - nbyte;
106                 if (clear_user((void __user *) elf_bss, nbyte))
107                         return -EFAULT;
108         }
109         return 0;
110 }
111
112 /* Let's use some macros to make this stack manipulation a little clearer */
113 #ifdef CONFIG_STACK_GROWSUP
114 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
115 #define STACK_ROUND(sp, items) \
116         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
117 #define STACK_ALLOC(sp, len) ({ \
118         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
119         old_sp; })
120 #else
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
122 #define STACK_ROUND(sp, items) \
123         (((unsigned long) (sp - items)) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
125 #endif
126
127 #ifndef ELF_BASE_PLATFORM
128 /*
129  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
130  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
131  * will be copied to the user stack in the same manner as AT_PLATFORM.
132  */
133 #define ELF_BASE_PLATFORM NULL
134 #endif
135
136 static int
137 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
138                 unsigned long load_addr, unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         elf_addr_t __user *u_base_platform;
148         elf_addr_t __user *u_rand_bytes;
149         const char *k_platform = ELF_PLATFORM;
150         const char *k_base_platform = ELF_BASE_PLATFORM;
151         unsigned char k_rand_bytes[16];
152         int items;
153         elf_addr_t *elf_info;
154         int ei_index = 0;
155         const struct cred *cred = current_cred();
156         struct vm_area_struct *vma;
157
158         /*
159          * In some cases (e.g. Hyper-Threading), we want to avoid L1
160          * evictions by the processes running on the same package. One
161          * thing we can do is to shuffle the initial stack for them.
162          */
163
164         p = arch_align_stack(p);
165
166         /*
167          * If this architecture has a platform capability string, copy it
168          * to userspace.  In some cases (Sparc), this info is impossible
169          * for userspace to get any other way, in others (i386) it is
170          * merely difficult.
171          */
172         u_platform = NULL;
173         if (k_platform) {
174                 size_t len = strlen(k_platform) + 1;
175
176                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
177                 if (__copy_to_user(u_platform, k_platform, len))
178                         return -EFAULT;
179         }
180
181         /*
182          * If this architecture has a "base" platform capability
183          * string, copy it to userspace.
184          */
185         u_base_platform = NULL;
186         if (k_base_platform) {
187                 size_t len = strlen(k_base_platform) + 1;
188
189                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190                 if (__copy_to_user(u_base_platform, k_base_platform, len))
191                         return -EFAULT;
192         }
193
194         /*
195          * Generate 16 random bytes for userspace PRNG seeding.
196          */
197         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
198         u_rand_bytes = (elf_addr_t __user *)
199                        STACK_ALLOC(p, sizeof(k_rand_bytes));
200         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
201                 return -EFAULT;
202
203         /* Create the ELF interpreter info */
204         elf_info = (elf_addr_t *)current->mm->saved_auxv;
205         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
206 #define NEW_AUX_ENT(id, val) \
207         do { \
208                 elf_info[ei_index++] = id; \
209                 elf_info[ei_index++] = val; \
210         } while (0)
211
212 #ifdef ARCH_DLINFO
213         /* 
214          * ARCH_DLINFO must come first so PPC can do its special alignment of
215          * AUXV.
216          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
217          * ARCH_DLINFO changes
218          */
219         ARCH_DLINFO;
220 #endif
221         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
222         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
223         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
224         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
225         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
226         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
227         NEW_AUX_ENT(AT_BASE, interp_load_addr);
228         NEW_AUX_ENT(AT_FLAGS, 0);
229         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
230         NEW_AUX_ENT(AT_UID, cred->uid);
231         NEW_AUX_ENT(AT_EUID, cred->euid);
232         NEW_AUX_ENT(AT_GID, cred->gid);
233         NEW_AUX_ENT(AT_EGID, cred->egid);
234         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
235         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
236         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
237         if (k_platform) {
238                 NEW_AUX_ENT(AT_PLATFORM,
239                             (elf_addr_t)(unsigned long)u_platform);
240         }
241         if (k_base_platform) {
242                 NEW_AUX_ENT(AT_BASE_PLATFORM,
243                             (elf_addr_t)(unsigned long)u_base_platform);
244         }
245         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
246                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
247         }
248 #undef NEW_AUX_ENT
249         /* AT_NULL is zero; clear the rest too */
250         memset(&elf_info[ei_index], 0,
251                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
252
253         /* And advance past the AT_NULL entry.  */
254         ei_index += 2;
255
256         sp = STACK_ADD(p, ei_index);
257
258         items = (argc + 1) + (envc + 1) + 1;
259         bprm->p = STACK_ROUND(sp, items);
260
261         /* Point sp at the lowest address on the stack */
262 #ifdef CONFIG_STACK_GROWSUP
263         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
264         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
265 #else
266         sp = (elf_addr_t __user *)bprm->p;
267 #endif
268
269
270         /*
271          * Grow the stack manually; some architectures have a limit on how
272          * far ahead a user-space access may be in order to grow the stack.
273          */
274         vma = find_extend_vma(current->mm, bprm->p);
275         if (!vma)
276                 return -EFAULT;
277
278         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
279         if (__put_user(argc, sp++))
280                 return -EFAULT;
281         argv = sp;
282         envp = argv + argc + 1;
283
284         /* Populate argv and envp */
285         p = current->mm->arg_end = current->mm->arg_start;
286         while (argc-- > 0) {
287                 size_t len;
288                 if (__put_user((elf_addr_t)p, argv++))
289                         return -EFAULT;
290                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
291                 if (!len || len > MAX_ARG_STRLEN)
292                         return -EINVAL;
293                 p += len;
294         }
295         if (__put_user(0, argv))
296                 return -EFAULT;
297         current->mm->arg_end = current->mm->env_start = p;
298         while (envc-- > 0) {
299                 size_t len;
300                 if (__put_user((elf_addr_t)p, envp++))
301                         return -EFAULT;
302                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
303                 if (!len || len > MAX_ARG_STRLEN)
304                         return -EINVAL;
305                 p += len;
306         }
307         if (__put_user(0, envp))
308                 return -EFAULT;
309         current->mm->env_end = p;
310
311         /* Put the elf_info on the stack in the right place.  */
312         sp = (elf_addr_t __user *)envp + 1;
313         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
314                 return -EFAULT;
315         return 0;
316 }
317
318 static unsigned long elf_map(struct file *filep, unsigned long addr,
319                 struct elf_phdr *eppnt, int prot, int type,
320                 unsigned long total_size)
321 {
322         unsigned long map_addr;
323         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
324         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
325         addr = ELF_PAGESTART(addr);
326         size = ELF_PAGEALIGN(size);
327
328         /* mmap() will return -EINVAL if given a zero size, but a
329          * segment with zero filesize is perfectly valid */
330         if (!size)
331                 return addr;
332
333         down_write(&current->mm->mmap_sem);
334         /*
335         * total_size is the size of the ELF (interpreter) image.
336         * The _first_ mmap needs to know the full size, otherwise
337         * randomization might put this image into an overlapping
338         * position with the ELF binary image. (since size < total_size)
339         * So we first map the 'big' image - and unmap the remainder at
340         * the end. (which unmap is needed for ELF images with holes.)
341         */
342         if (total_size) {
343                 total_size = ELF_PAGEALIGN(total_size);
344                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
345                 if (!BAD_ADDR(map_addr))
346                         do_munmap(current->mm, map_addr+size, total_size-size);
347         } else
348                 map_addr = do_mmap(filep, addr, size, prot, type, off);
349
350         up_write(&current->mm->mmap_sem);
351         return(map_addr);
352 }
353
354 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
355 {
356         int i, first_idx = -1, last_idx = -1;
357
358         for (i = 0; i < nr; i++) {
359                 if (cmds[i].p_type == PT_LOAD) {
360                         last_idx = i;
361                         if (first_idx == -1)
362                                 first_idx = i;
363                 }
364         }
365         if (first_idx == -1)
366                 return 0;
367
368         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
369                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
370 }
371
372
373 /* This is much more generalized than the library routine read function,
374    so we keep this separate.  Technically the library read function
375    is only provided so that we can read a.out libraries that have
376    an ELF header */
377
378 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
379                 struct file *interpreter, unsigned long *interp_map_addr,
380                 unsigned long no_base)
381 {
382         struct elf_phdr *elf_phdata;
383         struct elf_phdr *eppnt;
384         unsigned long load_addr = 0;
385         int load_addr_set = 0;
386         unsigned long last_bss = 0, elf_bss = 0;
387         unsigned long error = ~0UL;
388         unsigned long total_size;
389         int retval, i, size;
390
391         /* First of all, some simple consistency checks */
392         if (interp_elf_ex->e_type != ET_EXEC &&
393             interp_elf_ex->e_type != ET_DYN)
394                 goto out;
395         if (!elf_check_arch(interp_elf_ex))
396                 goto out;
397         if (!interpreter->f_op || !interpreter->f_op->mmap)
398                 goto out;
399
400         /*
401          * If the size of this structure has changed, then punt, since
402          * we will be doing the wrong thing.
403          */
404         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
405                 goto out;
406         if (interp_elf_ex->e_phnum < 1 ||
407                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
408                 goto out;
409
410         /* Now read in all of the header information */
411         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
412         if (size > ELF_MIN_ALIGN)
413                 goto out;
414         elf_phdata = kmalloc(size, GFP_KERNEL);
415         if (!elf_phdata)
416                 goto out;
417
418         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
419                              (char *)elf_phdata, size);
420         error = -EIO;
421         if (retval != size) {
422                 if (retval < 0)
423                         error = retval; 
424                 goto out_close;
425         }
426
427         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
428         if (!total_size) {
429                 error = -EINVAL;
430                 goto out_close;
431         }
432
433         eppnt = elf_phdata;
434         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
435                 if (eppnt->p_type == PT_LOAD) {
436                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
437                         int elf_prot = 0;
438                         unsigned long vaddr = 0;
439                         unsigned long k, map_addr;
440
441                         if (eppnt->p_flags & PF_R)
442                                 elf_prot = PROT_READ;
443                         if (eppnt->p_flags & PF_W)
444                                 elf_prot |= PROT_WRITE;
445                         if (eppnt->p_flags & PF_X)
446                                 elf_prot |= PROT_EXEC;
447                         vaddr = eppnt->p_vaddr;
448                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
449                                 elf_type |= MAP_FIXED;
450                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
451                                 load_addr = -vaddr;
452
453                         map_addr = elf_map(interpreter, load_addr + vaddr,
454                                         eppnt, elf_prot, elf_type, total_size);
455                         total_size = 0;
456                         if (!*interp_map_addr)
457                                 *interp_map_addr = map_addr;
458                         error = map_addr;
459                         if (BAD_ADDR(map_addr))
460                                 goto out_close;
461
462                         if (!load_addr_set &&
463                             interp_elf_ex->e_type == ET_DYN) {
464                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
465                                 load_addr_set = 1;
466                         }
467
468                         /*
469                          * Check to see if the section's size will overflow the
470                          * allowed task size. Note that p_filesz must always be
471                          * <= p_memsize so it's only necessary to check p_memsz.
472                          */
473                         k = load_addr + eppnt->p_vaddr;
474                         if (BAD_ADDR(k) ||
475                             eppnt->p_filesz > eppnt->p_memsz ||
476                             eppnt->p_memsz > TASK_SIZE ||
477                             TASK_SIZE - eppnt->p_memsz < k) {
478                                 error = -ENOMEM;
479                                 goto out_close;
480                         }
481
482                         /*
483                          * Find the end of the file mapping for this phdr, and
484                          * keep track of the largest address we see for this.
485                          */
486                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
487                         if (k > elf_bss)
488                                 elf_bss = k;
489
490                         /*
491                          * Do the same thing for the memory mapping - between
492                          * elf_bss and last_bss is the bss section.
493                          */
494                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
495                         if (k > last_bss)
496                                 last_bss = k;
497                 }
498         }
499
500         if (last_bss > elf_bss) {
501                 /*
502                  * Now fill out the bss section.  First pad the last page up
503                  * to the page boundary, and then perform a mmap to make sure
504                  * that there are zero-mapped pages up to and including the
505                  * last bss page.
506                  */
507                 if (padzero(elf_bss)) {
508                         error = -EFAULT;
509                         goto out_close;
510                 }
511
512                 /* What we have mapped so far */
513                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
514
515                 /* Map the last of the bss segment */
516                 down_write(&current->mm->mmap_sem);
517                 error = do_brk(elf_bss, last_bss - elf_bss);
518                 up_write(&current->mm->mmap_sem);
519                 if (BAD_ADDR(error))
520                         goto out_close;
521         }
522
523         error = load_addr;
524
525 out_close:
526         kfree(elf_phdata);
527 out:
528         return error;
529 }
530
531 /*
532  * These are the functions used to load ELF style executables and shared
533  * libraries.  There is no binary dependent code anywhere else.
534  */
535
536 #define INTERPRETER_NONE 0
537 #define INTERPRETER_ELF 2
538
539 #ifndef STACK_RND_MASK
540 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
541 #endif
542
543 static unsigned long randomize_stack_top(unsigned long stack_top)
544 {
545         unsigned long random_variable = 0;
546
547         if ((current->flags & PF_RANDOMIZE) &&
548                 !(current->personality & ADDR_NO_RANDOMIZE)) {
549                 random_variable = (unsigned long) get_random_int();
550                 random_variable &= STACK_RND_MASK;
551                 random_variable <<= PAGE_SHIFT;
552         }
553 #ifdef CONFIG_STACK_GROWSUP
554         return PAGE_ALIGN(stack_top) + random_variable;
555 #else
556         return PAGE_ALIGN(stack_top) - random_variable;
557 #endif
558 }
559
560 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
561 {
562         struct file *interpreter = NULL; /* to shut gcc up */
563         unsigned long load_addr = 0, load_bias = 0;
564         int load_addr_set = 0;
565         char * elf_interpreter = NULL;
566         unsigned long error;
567         struct elf_phdr *elf_ppnt, *elf_phdata;
568         unsigned long elf_bss, elf_brk;
569         int retval, i;
570         unsigned int size;
571         unsigned long elf_entry;
572         unsigned long interp_load_addr = 0;
573         unsigned long start_code, end_code, start_data, end_data;
574         unsigned long reloc_func_desc __maybe_unused = 0;
575         int executable_stack = EXSTACK_DEFAULT;
576         unsigned long def_flags = 0;
577         struct {
578                 struct elfhdr elf_ex;
579                 struct elfhdr interp_elf_ex;
580         } *loc;
581
582         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
583         if (!loc) {
584                 retval = -ENOMEM;
585                 goto out_ret;
586         }
587         
588         /* Get the exec-header */
589         loc->elf_ex = *((struct elfhdr *)bprm->buf);
590
591         retval = -ENOEXEC;
592         /* First of all, some simple consistency checks */
593         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
594                 goto out;
595
596         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
597                 goto out;
598         if (!elf_check_arch(&loc->elf_ex))
599                 goto out;
600         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
601                 goto out;
602
603         /* Now read in all of the header information */
604         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
605                 goto out;
606         if (loc->elf_ex.e_phnum < 1 ||
607                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
608                 goto out;
609         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
610         retval = -ENOMEM;
611         elf_phdata = kmalloc(size, GFP_KERNEL);
612         if (!elf_phdata)
613                 goto out;
614
615         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
616                              (char *)elf_phdata, size);
617         if (retval != size) {
618                 if (retval >= 0)
619                         retval = -EIO;
620                 goto out_free_ph;
621         }
622
623         elf_ppnt = elf_phdata;
624         elf_bss = 0;
625         elf_brk = 0;
626
627         start_code = ~0UL;
628         end_code = 0;
629         start_data = 0;
630         end_data = 0;
631
632         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
633                 if (elf_ppnt->p_type == PT_INTERP) {
634                         /* This is the program interpreter used for
635                          * shared libraries - for now assume that this
636                          * is an a.out format binary
637                          */
638                         retval = -ENOEXEC;
639                         if (elf_ppnt->p_filesz > PATH_MAX || 
640                             elf_ppnt->p_filesz < 2)
641                                 goto out_free_ph;
642
643                         retval = -ENOMEM;
644                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
645                                                   GFP_KERNEL);
646                         if (!elf_interpreter)
647                                 goto out_free_ph;
648
649                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
650                                              elf_interpreter,
651                                              elf_ppnt->p_filesz);
652                         if (retval != elf_ppnt->p_filesz) {
653                                 if (retval >= 0)
654                                         retval = -EIO;
655                                 goto out_free_interp;
656                         }
657                         /* make sure path is NULL terminated */
658                         retval = -ENOEXEC;
659                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
660                                 goto out_free_interp;
661
662                         interpreter = open_exec(elf_interpreter);
663                         retval = PTR_ERR(interpreter);
664                         if (IS_ERR(interpreter))
665                                 goto out_free_interp;
666
667                         /*
668                          * If the binary is not readable then enforce
669                          * mm->dumpable = 0 regardless of the interpreter's
670                          * permissions.
671                          */
672                         would_dump(bprm, interpreter);
673
674                         retval = kernel_read(interpreter, 0, bprm->buf,
675                                              BINPRM_BUF_SIZE);
676                         if (retval != BINPRM_BUF_SIZE) {
677                                 if (retval >= 0)
678                                         retval = -EIO;
679                                 goto out_free_dentry;
680                         }
681
682                         /* Get the exec headers */
683                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
684                         break;
685                 }
686                 elf_ppnt++;
687         }
688
689         elf_ppnt = elf_phdata;
690         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
691                 if (elf_ppnt->p_type == PT_GNU_STACK) {
692                         if (elf_ppnt->p_flags & PF_X)
693                                 executable_stack = EXSTACK_ENABLE_X;
694                         else
695                                 executable_stack = EXSTACK_DISABLE_X;
696                         break;
697                 }
698
699         /* Some simple consistency checks for the interpreter */
700         if (elf_interpreter) {
701                 retval = -ELIBBAD;
702                 /* Not an ELF interpreter */
703                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
704                         goto out_free_dentry;
705                 /* Verify the interpreter has a valid arch */
706                 if (!elf_check_arch(&loc->interp_elf_ex))
707                         goto out_free_dentry;
708         }
709
710         /* Flush all traces of the currently running executable */
711         retval = flush_old_exec(bprm);
712         if (retval)
713                 goto out_free_dentry;
714
715         /* OK, This is the point of no return */
716         current->flags &= ~PF_FORKNOEXEC;
717         current->mm->def_flags = def_flags;
718
719         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
720            may depend on the personality.  */
721         SET_PERSONALITY(loc->elf_ex);
722         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
723                 current->personality |= READ_IMPLIES_EXEC;
724
725         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
726                 current->flags |= PF_RANDOMIZE;
727
728         setup_new_exec(bprm);
729
730         /* Do this so that we can load the interpreter, if need be.  We will
731            change some of these later */
732         current->mm->free_area_cache = current->mm->mmap_base;
733         current->mm->cached_hole_size = 0;
734         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
735                                  executable_stack);
736         if (retval < 0) {
737                 send_sig(SIGKILL, current, 0);
738                 goto out_free_dentry;
739         }
740         
741         current->mm->start_stack = bprm->p;
742
743         /* Now we do a little grungy work by mmapping the ELF image into
744            the correct location in memory. */
745         for(i = 0, elf_ppnt = elf_phdata;
746             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
747                 int elf_prot = 0, elf_flags;
748                 unsigned long k, vaddr;
749                 unsigned long total_size = 0;
750
751                 if (elf_ppnt->p_type != PT_LOAD)
752                         continue;
753
754                 if (unlikely (elf_brk > elf_bss)) {
755                         unsigned long nbyte;
756                     
757                         /* There was a PT_LOAD segment with p_memsz > p_filesz
758                            before this one. Map anonymous pages, if needed,
759                            and clear the area.  */
760                         retval = set_brk(elf_bss + load_bias,
761                                          elf_brk + load_bias);
762                         if (retval) {
763                                 send_sig(SIGKILL, current, 0);
764                                 goto out_free_dentry;
765                         }
766                         nbyte = ELF_PAGEOFFSET(elf_bss);
767                         if (nbyte) {
768                                 nbyte = ELF_MIN_ALIGN - nbyte;
769                                 if (nbyte > elf_brk - elf_bss)
770                                         nbyte = elf_brk - elf_bss;
771                                 if (clear_user((void __user *)elf_bss +
772                                                         load_bias, nbyte)) {
773                                         /*
774                                          * This bss-zeroing can fail if the ELF
775                                          * file specifies odd protections. So
776                                          * we don't check the return value
777                                          */
778                                 }
779                         }
780                 }
781
782                 if (elf_ppnt->p_flags & PF_R)
783                         elf_prot |= PROT_READ;
784                 if (elf_ppnt->p_flags & PF_W)
785                         elf_prot |= PROT_WRITE;
786                 if (elf_ppnt->p_flags & PF_X)
787                         elf_prot |= PROT_EXEC;
788
789                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
790
791                 vaddr = elf_ppnt->p_vaddr;
792                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
793                         elf_flags |= MAP_FIXED;
794                 } else if (loc->elf_ex.e_type == ET_DYN) {
795                         /* Try and get dynamic programs out of the way of the
796                          * default mmap base, as well as whatever program they
797                          * might try to exec.  This is because the brk will
798                          * follow the loader, and is not movable.  */
799 #if defined(CONFIG_X86) || defined(CONFIG_ARM)
800                         /* Memory randomization might have been switched off
801                          * in runtime via sysctl.
802                          * If that is the case, retain the original non-zero
803                          * load_bias value in order to establish proper
804                          * non-randomized mappings.
805                          */
806                         if (current->flags & PF_RANDOMIZE)
807                                 load_bias = 0;
808                         else
809                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
810 #else
811                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
812 #endif
813                         total_size = total_mapping_size(elf_phdata,
814                                                         loc->elf_ex.e_phnum);
815                         if (!total_size) {
816                                 error = -EINVAL;
817                                 goto out_free_dentry;
818                         }
819                 }
820
821                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
822                                 elf_prot, elf_flags, total_size);
823                 if (BAD_ADDR(error)) {
824                         send_sig(SIGKILL, current, 0);
825                         retval = IS_ERR((void *)error) ?
826                                 PTR_ERR((void*)error) : -EINVAL;
827                         goto out_free_dentry;
828                 }
829
830                 if (!load_addr_set) {
831                         load_addr_set = 1;
832                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
833                         if (loc->elf_ex.e_type == ET_DYN) {
834                                 load_bias += error -
835                                              ELF_PAGESTART(load_bias + vaddr);
836                                 load_addr += load_bias;
837                                 reloc_func_desc = load_bias;
838                         }
839                 }
840                 k = elf_ppnt->p_vaddr;
841                 if (k < start_code)
842                         start_code = k;
843                 if (start_data < k)
844                         start_data = k;
845
846                 /*
847                  * Check to see if the section's size will overflow the
848                  * allowed task size. Note that p_filesz must always be
849                  * <= p_memsz so it is only necessary to check p_memsz.
850                  */
851                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
852                     elf_ppnt->p_memsz > TASK_SIZE ||
853                     TASK_SIZE - elf_ppnt->p_memsz < k) {
854                         /* set_brk can never work. Avoid overflows. */
855                         send_sig(SIGKILL, current, 0);
856                         retval = -EINVAL;
857                         goto out_free_dentry;
858                 }
859
860                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
861
862                 if (k > elf_bss)
863                         elf_bss = k;
864                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
865                         end_code = k;
866                 if (end_data < k)
867                         end_data = k;
868                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
869                 if (k > elf_brk)
870                         elf_brk = k;
871         }
872
873         loc->elf_ex.e_entry += load_bias;
874         elf_bss += load_bias;
875         elf_brk += load_bias;
876         start_code += load_bias;
877         end_code += load_bias;
878         start_data += load_bias;
879         end_data += load_bias;
880
881         /* Calling set_brk effectively mmaps the pages that we need
882          * for the bss and break sections.  We must do this before
883          * mapping in the interpreter, to make sure it doesn't wind
884          * up getting placed where the bss needs to go.
885          */
886         retval = set_brk(elf_bss, elf_brk);
887         if (retval) {
888                 send_sig(SIGKILL, current, 0);
889                 goto out_free_dentry;
890         }
891         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
892                 send_sig(SIGSEGV, current, 0);
893                 retval = -EFAULT; /* Nobody gets to see this, but.. */
894                 goto out_free_dentry;
895         }
896
897         if (elf_interpreter) {
898                 unsigned long uninitialized_var(interp_map_addr);
899
900                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
901                                             interpreter,
902                                             &interp_map_addr,
903                                             load_bias);
904                 if (!IS_ERR((void *)elf_entry)) {
905                         /*
906                          * load_elf_interp() returns relocation
907                          * adjustment
908                          */
909                         interp_load_addr = elf_entry;
910                         elf_entry += loc->interp_elf_ex.e_entry;
911                 }
912                 if (BAD_ADDR(elf_entry)) {
913                         force_sig(SIGSEGV, current);
914                         retval = IS_ERR((void *)elf_entry) ?
915                                         (int)elf_entry : -EINVAL;
916                         goto out_free_dentry;
917                 }
918                 reloc_func_desc = interp_load_addr;
919
920                 allow_write_access(interpreter);
921                 fput(interpreter);
922                 kfree(elf_interpreter);
923         } else {
924                 elf_entry = loc->elf_ex.e_entry;
925                 if (BAD_ADDR(elf_entry)) {
926                         force_sig(SIGSEGV, current);
927                         retval = -EINVAL;
928                         goto out_free_dentry;
929                 }
930         }
931
932         kfree(elf_phdata);
933
934         set_binfmt(&elf_format);
935
936 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
937         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
938         if (retval < 0) {
939                 send_sig(SIGKILL, current, 0);
940                 goto out;
941         }
942 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
943
944         install_exec_creds(bprm);
945         current->flags &= ~PF_FORKNOEXEC;
946         retval = create_elf_tables(bprm, &loc->elf_ex,
947                           load_addr, interp_load_addr);
948         if (retval < 0) {
949                 send_sig(SIGKILL, current, 0);
950                 goto out;
951         }
952         /* N.B. passed_fileno might not be initialized? */
953         current->mm->end_code = end_code;
954         current->mm->start_code = start_code;
955         current->mm->start_data = start_data;
956         current->mm->end_data = end_data;
957         current->mm->start_stack = bprm->p;
958
959 #ifdef arch_randomize_brk
960         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
961                 current->mm->brk = current->mm->start_brk =
962                         arch_randomize_brk(current->mm);
963 #ifdef CONFIG_COMPAT_BRK
964                 current->brk_randomized = 1;
965 #endif
966         }
967 #endif
968
969         if (current->personality & MMAP_PAGE_ZERO) {
970                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
971                    and some applications "depend" upon this behavior.
972                    Since we do not have the power to recompile these, we
973                    emulate the SVr4 behavior. Sigh. */
974                 down_write(&current->mm->mmap_sem);
975                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
976                                 MAP_FIXED | MAP_PRIVATE, 0);
977                 up_write(&current->mm->mmap_sem);
978         }
979
980 #ifdef ELF_PLAT_INIT
981         /*
982          * The ABI may specify that certain registers be set up in special
983          * ways (on i386 %edx is the address of a DT_FINI function, for
984          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
985          * that the e_entry field is the address of the function descriptor
986          * for the startup routine, rather than the address of the startup
987          * routine itself.  This macro performs whatever initialization to
988          * the regs structure is required as well as any relocations to the
989          * function descriptor entries when executing dynamically links apps.
990          */
991         ELF_PLAT_INIT(regs, reloc_func_desc);
992 #endif
993
994         start_thread(regs, elf_entry, bprm->p);
995         retval = 0;
996 out:
997         kfree(loc);
998 out_ret:
999         return retval;
1000
1001         /* error cleanup */
1002 out_free_dentry:
1003         allow_write_access(interpreter);
1004         if (interpreter)
1005                 fput(interpreter);
1006 out_free_interp:
1007         kfree(elf_interpreter);
1008 out_free_ph:
1009         kfree(elf_phdata);
1010         goto out;
1011 }
1012
1013 /* This is really simpleminded and specialized - we are loading an
1014    a.out library that is given an ELF header. */
1015 static int load_elf_library(struct file *file)
1016 {
1017         struct elf_phdr *elf_phdata;
1018         struct elf_phdr *eppnt;
1019         unsigned long elf_bss, bss, len;
1020         int retval, error, i, j;
1021         struct elfhdr elf_ex;
1022
1023         error = -ENOEXEC;
1024         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1025         if (retval != sizeof(elf_ex))
1026                 goto out;
1027
1028         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1029                 goto out;
1030
1031         /* First of all, some simple consistency checks */
1032         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1033             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1034                 goto out;
1035
1036         /* Now read in all of the header information */
1037
1038         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1039         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1040
1041         error = -ENOMEM;
1042         elf_phdata = kmalloc(j, GFP_KERNEL);
1043         if (!elf_phdata)
1044                 goto out;
1045
1046         eppnt = elf_phdata;
1047         error = -ENOEXEC;
1048         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1049         if (retval != j)
1050                 goto out_free_ph;
1051
1052         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1053                 if ((eppnt + i)->p_type == PT_LOAD)
1054                         j++;
1055         if (j != 1)
1056                 goto out_free_ph;
1057
1058         while (eppnt->p_type != PT_LOAD)
1059                 eppnt++;
1060
1061         /* Now use mmap to map the library into memory. */
1062         down_write(&current->mm->mmap_sem);
1063         error = do_mmap(file,
1064                         ELF_PAGESTART(eppnt->p_vaddr),
1065                         (eppnt->p_filesz +
1066                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1067                         PROT_READ | PROT_WRITE | PROT_EXEC,
1068                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1069                         (eppnt->p_offset -
1070                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1071         up_write(&current->mm->mmap_sem);
1072         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1073                 goto out_free_ph;
1074
1075         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1076         if (padzero(elf_bss)) {
1077                 error = -EFAULT;
1078                 goto out_free_ph;
1079         }
1080
1081         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1082                             ELF_MIN_ALIGN - 1);
1083         bss = eppnt->p_memsz + eppnt->p_vaddr;
1084         if (bss > len) {
1085                 down_write(&current->mm->mmap_sem);
1086                 do_brk(len, bss - len);
1087                 up_write(&current->mm->mmap_sem);
1088         }
1089         error = 0;
1090
1091 out_free_ph:
1092         kfree(elf_phdata);
1093 out:
1094         return error;
1095 }
1096
1097 #ifdef CONFIG_ELF_CORE
1098 /*
1099  * ELF core dumper
1100  *
1101  * Modelled on fs/exec.c:aout_core_dump()
1102  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1103  */
1104
1105 /*
1106  * Decide what to dump of a segment, part, all or none.
1107  */
1108 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1109                                    unsigned long mm_flags)
1110 {
1111 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1112
1113         /* The vma can be set up to tell us the answer directly.  */
1114         if (vma->vm_flags & VM_ALWAYSDUMP)
1115                 goto whole;
1116
1117         /* Hugetlb memory check */
1118         if (vma->vm_flags & VM_HUGETLB) {
1119                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1120                         goto whole;
1121                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1122                         goto whole;
1123         }
1124
1125         /* Do not dump I/O mapped devices or special mappings */
1126         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1127                 return 0;
1128
1129         /* By default, dump shared memory if mapped from an anonymous file. */
1130         if (vma->vm_flags & VM_SHARED) {
1131                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1132                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1133                         goto whole;
1134                 return 0;
1135         }
1136
1137         /* Dump segments that have been written to.  */
1138         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1139                 goto whole;
1140         if (vma->vm_file == NULL)
1141                 return 0;
1142
1143         if (FILTER(MAPPED_PRIVATE))
1144                 goto whole;
1145
1146         /*
1147          * If this looks like the beginning of a DSO or executable mapping,
1148          * check for an ELF header.  If we find one, dump the first page to
1149          * aid in determining what was mapped here.
1150          */
1151         if (FILTER(ELF_HEADERS) &&
1152             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1153                 u32 __user *header = (u32 __user *) vma->vm_start;
1154                 u32 word;
1155                 mm_segment_t fs = get_fs();
1156                 /*
1157                  * Doing it this way gets the constant folded by GCC.
1158                  */
1159                 union {
1160                         u32 cmp;
1161                         char elfmag[SELFMAG];
1162                 } magic;
1163                 BUILD_BUG_ON(SELFMAG != sizeof word);
1164                 magic.elfmag[EI_MAG0] = ELFMAG0;
1165                 magic.elfmag[EI_MAG1] = ELFMAG1;
1166                 magic.elfmag[EI_MAG2] = ELFMAG2;
1167                 magic.elfmag[EI_MAG3] = ELFMAG3;
1168                 /*
1169                  * Switch to the user "segment" for get_user(),
1170                  * then put back what elf_core_dump() had in place.
1171                  */
1172                 set_fs(USER_DS);
1173                 if (unlikely(get_user(word, header)))
1174                         word = 0;
1175                 set_fs(fs);
1176                 if (word == magic.cmp)
1177                         return PAGE_SIZE;
1178         }
1179
1180 #undef  FILTER
1181
1182         return 0;
1183
1184 whole:
1185         return vma->vm_end - vma->vm_start;
1186 }
1187
1188 /* An ELF note in memory */
1189 struct memelfnote
1190 {
1191         const char *name;
1192         int type;
1193         unsigned int datasz;
1194         void *data;
1195 };
1196
1197 static int notesize(struct memelfnote *en)
1198 {
1199         int sz;
1200
1201         sz = sizeof(struct elf_note);
1202         sz += roundup(strlen(en->name) + 1, 4);
1203         sz += roundup(en->datasz, 4);
1204
1205         return sz;
1206 }
1207
1208 #define DUMP_WRITE(addr, nr, foffset)   \
1209         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1210
1211 static int alignfile(struct file *file, loff_t *foffset)
1212 {
1213         static const char buf[4] = { 0, };
1214         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1215         return 1;
1216 }
1217
1218 static int writenote(struct memelfnote *men, struct file *file,
1219                         loff_t *foffset)
1220 {
1221         struct elf_note en;
1222         en.n_namesz = strlen(men->name) + 1;
1223         en.n_descsz = men->datasz;
1224         en.n_type = men->type;
1225
1226         DUMP_WRITE(&en, sizeof(en), foffset);
1227         DUMP_WRITE(men->name, en.n_namesz, foffset);
1228         if (!alignfile(file, foffset))
1229                 return 0;
1230         DUMP_WRITE(men->data, men->datasz, foffset);
1231         if (!alignfile(file, foffset))
1232                 return 0;
1233
1234         return 1;
1235 }
1236 #undef DUMP_WRITE
1237
1238 static void fill_elf_header(struct elfhdr *elf, int segs,
1239                             u16 machine, u32 flags, u8 osabi)
1240 {
1241         memset(elf, 0, sizeof(*elf));
1242
1243         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1244         elf->e_ident[EI_CLASS] = ELF_CLASS;
1245         elf->e_ident[EI_DATA] = ELF_DATA;
1246         elf->e_ident[EI_VERSION] = EV_CURRENT;
1247         elf->e_ident[EI_OSABI] = ELF_OSABI;
1248
1249         elf->e_type = ET_CORE;
1250         elf->e_machine = machine;
1251         elf->e_version = EV_CURRENT;
1252         elf->e_phoff = sizeof(struct elfhdr);
1253         elf->e_flags = flags;
1254         elf->e_ehsize = sizeof(struct elfhdr);
1255         elf->e_phentsize = sizeof(struct elf_phdr);
1256         elf->e_phnum = segs;
1257
1258         return;
1259 }
1260
1261 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1262 {
1263         phdr->p_type = PT_NOTE;
1264         phdr->p_offset = offset;
1265         phdr->p_vaddr = 0;
1266         phdr->p_paddr = 0;
1267         phdr->p_filesz = sz;
1268         phdr->p_memsz = 0;
1269         phdr->p_flags = 0;
1270         phdr->p_align = 0;
1271         return;
1272 }
1273
1274 static void fill_note(struct memelfnote *note, const char *name, int type, 
1275                 unsigned int sz, void *data)
1276 {
1277         note->name = name;
1278         note->type = type;
1279         note->datasz = sz;
1280         note->data = data;
1281         return;
1282 }
1283
1284 /*
1285  * fill up all the fields in prstatus from the given task struct, except
1286  * registers which need to be filled up separately.
1287  */
1288 static void fill_prstatus(struct elf_prstatus *prstatus,
1289                 struct task_struct *p, long signr)
1290 {
1291         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1292         prstatus->pr_sigpend = p->pending.signal.sig[0];
1293         prstatus->pr_sighold = p->blocked.sig[0];
1294         rcu_read_lock();
1295         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1296         rcu_read_unlock();
1297         prstatus->pr_pid = task_pid_vnr(p);
1298         prstatus->pr_pgrp = task_pgrp_vnr(p);
1299         prstatus->pr_sid = task_session_vnr(p);
1300         if (thread_group_leader(p)) {
1301                 struct task_cputime cputime;
1302
1303                 /*
1304                  * This is the record for the group leader.  It shows the
1305                  * group-wide total, not its individual thread total.
1306                  */
1307                 thread_group_cputime(p, &cputime);
1308                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1309                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1310         } else {
1311                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1312                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1313         }
1314         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1315         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1316 }
1317
1318 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1319                        struct mm_struct *mm)
1320 {
1321         const struct cred *cred;
1322         unsigned int i, len;
1323         
1324         /* first copy the parameters from user space */
1325         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1326
1327         len = mm->arg_end - mm->arg_start;
1328         if (len >= ELF_PRARGSZ)
1329                 len = ELF_PRARGSZ-1;
1330         if (copy_from_user(&psinfo->pr_psargs,
1331                            (const char __user *)mm->arg_start, len))
1332                 return -EFAULT;
1333         for(i = 0; i < len; i++)
1334                 if (psinfo->pr_psargs[i] == 0)
1335                         psinfo->pr_psargs[i] = ' ';
1336         psinfo->pr_psargs[len] = 0;
1337
1338         rcu_read_lock();
1339         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1340         rcu_read_unlock();
1341         psinfo->pr_pid = task_pid_vnr(p);
1342         psinfo->pr_pgrp = task_pgrp_vnr(p);
1343         psinfo->pr_sid = task_session_vnr(p);
1344
1345         i = p->state ? ffz(~p->state) + 1 : 0;
1346         psinfo->pr_state = i;
1347         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1348         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1349         psinfo->pr_nice = task_nice(p);
1350         psinfo->pr_flag = p->flags;
1351         rcu_read_lock();
1352         cred = __task_cred(p);
1353         SET_UID(psinfo->pr_uid, cred->uid);
1354         SET_GID(psinfo->pr_gid, cred->gid);
1355         rcu_read_unlock();
1356         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1357         
1358         return 0;
1359 }
1360
1361 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1362 {
1363         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1364         int i = 0;
1365         do
1366                 i += 2;
1367         while (auxv[i - 2] != AT_NULL);
1368         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1369 }
1370
1371 #ifdef CORE_DUMP_USE_REGSET
1372 #include <linux/regset.h>
1373
1374 struct elf_thread_core_info {
1375         struct elf_thread_core_info *next;
1376         struct task_struct *task;
1377         struct elf_prstatus prstatus;
1378         struct memelfnote notes[0];
1379 };
1380
1381 struct elf_note_info {
1382         struct elf_thread_core_info *thread;
1383         struct memelfnote psinfo;
1384         struct memelfnote auxv;
1385         size_t size;
1386         int thread_notes;
1387 };
1388
1389 /*
1390  * When a regset has a writeback hook, we call it on each thread before
1391  * dumping user memory.  On register window machines, this makes sure the
1392  * user memory backing the register data is up to date before we read it.
1393  */
1394 static void do_thread_regset_writeback(struct task_struct *task,
1395                                        const struct user_regset *regset)
1396 {
1397         if (regset->writeback)
1398                 regset->writeback(task, regset, 1);
1399 }
1400
1401 static int fill_thread_core_info(struct elf_thread_core_info *t,
1402                                  const struct user_regset_view *view,
1403                                  long signr, size_t *total)
1404 {
1405         unsigned int i;
1406
1407         /*
1408          * NT_PRSTATUS is the one special case, because the regset data
1409          * goes into the pr_reg field inside the note contents, rather
1410          * than being the whole note contents.  We fill the reset in here.
1411          * We assume that regset 0 is NT_PRSTATUS.
1412          */
1413         fill_prstatus(&t->prstatus, t->task, signr);
1414         (void) view->regsets[0].get(t->task, &view->regsets[0],
1415                                     0, sizeof(t->prstatus.pr_reg),
1416                                     &t->prstatus.pr_reg, NULL);
1417
1418         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1419                   sizeof(t->prstatus), &t->prstatus);
1420         *total += notesize(&t->notes[0]);
1421
1422         do_thread_regset_writeback(t->task, &view->regsets[0]);
1423
1424         /*
1425          * Each other regset might generate a note too.  For each regset
1426          * that has no core_note_type or is inactive, we leave t->notes[i]
1427          * all zero and we'll know to skip writing it later.
1428          */
1429         for (i = 1; i < view->n; ++i) {
1430                 const struct user_regset *regset = &view->regsets[i];
1431                 do_thread_regset_writeback(t->task, regset);
1432                 if (regset->core_note_type && regset->get &&
1433                     (!regset->active || regset->active(t->task, regset))) {
1434                         int ret;
1435                         size_t size = regset->n * regset->size;
1436                         void *data = kmalloc(size, GFP_KERNEL);
1437                         if (unlikely(!data))
1438                                 return 0;
1439                         ret = regset->get(t->task, regset,
1440                                           0, size, data, NULL);
1441                         if (unlikely(ret))
1442                                 kfree(data);
1443                         else {
1444                                 if (regset->core_note_type != NT_PRFPREG)
1445                                         fill_note(&t->notes[i], "LINUX",
1446                                                   regset->core_note_type,
1447                                                   size, data);
1448                                 else {
1449                                         t->prstatus.pr_fpvalid = 1;
1450                                         fill_note(&t->notes[i], "CORE",
1451                                                   NT_PRFPREG, size, data);
1452                                 }
1453                                 *total += notesize(&t->notes[i]);
1454                         }
1455                 }
1456         }
1457
1458         return 1;
1459 }
1460
1461 static int fill_note_info(struct elfhdr *elf, int phdrs,
1462                           struct elf_note_info *info,
1463                           long signr, struct pt_regs *regs)
1464 {
1465         struct task_struct *dump_task = current;
1466         const struct user_regset_view *view = task_user_regset_view(dump_task);
1467         struct elf_thread_core_info *t;
1468         struct elf_prpsinfo *psinfo;
1469         struct core_thread *ct;
1470         unsigned int i;
1471
1472         info->size = 0;
1473         info->thread = NULL;
1474
1475         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1476         if (psinfo == NULL)
1477                 return 0;
1478
1479         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1480
1481         /*
1482          * Figure out how many notes we're going to need for each thread.
1483          */
1484         info->thread_notes = 0;
1485         for (i = 0; i < view->n; ++i)
1486                 if (view->regsets[i].core_note_type != 0)
1487                         ++info->thread_notes;
1488
1489         /*
1490          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1491          * since it is our one special case.
1492          */
1493         if (unlikely(info->thread_notes == 0) ||
1494             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1495                 WARN_ON(1);
1496                 return 0;
1497         }
1498
1499         /*
1500          * Initialize the ELF file header.
1501          */
1502         fill_elf_header(elf, phdrs,
1503                         view->e_machine, view->e_flags, view->ei_osabi);
1504
1505         /*
1506          * Allocate a structure for each thread.
1507          */
1508         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1509                 t = kzalloc(offsetof(struct elf_thread_core_info,
1510                                      notes[info->thread_notes]),
1511                             GFP_KERNEL);
1512                 if (unlikely(!t))
1513                         return 0;
1514
1515                 t->task = ct->task;
1516                 if (ct->task == dump_task || !info->thread) {
1517                         t->next = info->thread;
1518                         info->thread = t;
1519                 } else {
1520                         /*
1521                          * Make sure to keep the original task at
1522                          * the head of the list.
1523                          */
1524                         t->next = info->thread->next;
1525                         info->thread->next = t;
1526                 }
1527         }
1528
1529         /*
1530          * Now fill in each thread's information.
1531          */
1532         for (t = info->thread; t != NULL; t = t->next)
1533                 if (!fill_thread_core_info(t, view, signr, &info->size))
1534                         return 0;
1535
1536         /*
1537          * Fill in the two process-wide notes.
1538          */
1539         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1540         info->size += notesize(&info->psinfo);
1541
1542         fill_auxv_note(&info->auxv, current->mm);
1543         info->size += notesize(&info->auxv);
1544
1545         return 1;
1546 }
1547
1548 static size_t get_note_info_size(struct elf_note_info *info)
1549 {
1550         return info->size;
1551 }
1552
1553 /*
1554  * Write all the notes for each thread.  When writing the first thread, the
1555  * process-wide notes are interleaved after the first thread-specific note.
1556  */
1557 static int write_note_info(struct elf_note_info *info,
1558                            struct file *file, loff_t *foffset)
1559 {
1560         bool first = 1;
1561         struct elf_thread_core_info *t = info->thread;
1562
1563         do {
1564                 int i;
1565
1566                 if (!writenote(&t->notes[0], file, foffset))
1567                         return 0;
1568
1569                 if (first && !writenote(&info->psinfo, file, foffset))
1570                         return 0;
1571                 if (first && !writenote(&info->auxv, file, foffset))
1572                         return 0;
1573
1574                 for (i = 1; i < info->thread_notes; ++i)
1575                         if (t->notes[i].data &&
1576                             !writenote(&t->notes[i], file, foffset))
1577                                 return 0;
1578
1579                 first = 0;
1580                 t = t->next;
1581         } while (t);
1582
1583         return 1;
1584 }
1585
1586 static void free_note_info(struct elf_note_info *info)
1587 {
1588         struct elf_thread_core_info *threads = info->thread;
1589         while (threads) {
1590                 unsigned int i;
1591                 struct elf_thread_core_info *t = threads;
1592                 threads = t->next;
1593                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1594                 for (i = 1; i < info->thread_notes; ++i)
1595                         kfree(t->notes[i].data);
1596                 kfree(t);
1597         }
1598         kfree(info->psinfo.data);
1599 }
1600
1601 #else
1602
1603 /* Here is the structure in which status of each thread is captured. */
1604 struct elf_thread_status
1605 {
1606         struct list_head list;
1607         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1608         elf_fpregset_t fpu;             /* NT_PRFPREG */
1609         struct task_struct *thread;
1610 #ifdef ELF_CORE_COPY_XFPREGS
1611         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1612 #endif
1613         struct memelfnote notes[3];
1614         int num_notes;
1615 };
1616
1617 /*
1618  * In order to add the specific thread information for the elf file format,
1619  * we need to keep a linked list of every threads pr_status and then create
1620  * a single section for them in the final core file.
1621  */
1622 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1623 {
1624         int sz = 0;
1625         struct task_struct *p = t->thread;
1626         t->num_notes = 0;
1627
1628         fill_prstatus(&t->prstatus, p, signr);
1629         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1630         
1631         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1632                   &(t->prstatus));
1633         t->num_notes++;
1634         sz += notesize(&t->notes[0]);
1635
1636         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1637                                                                 &t->fpu))) {
1638                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1639                           &(t->fpu));
1640                 t->num_notes++;
1641                 sz += notesize(&t->notes[1]);
1642         }
1643
1644 #ifdef ELF_CORE_COPY_XFPREGS
1645         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1646                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1647                           sizeof(t->xfpu), &t->xfpu);
1648                 t->num_notes++;
1649                 sz += notesize(&t->notes[2]);
1650         }
1651 #endif  
1652         return sz;
1653 }
1654
1655 struct elf_note_info {
1656         struct memelfnote *notes;
1657         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1658         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1659         struct list_head thread_list;
1660         elf_fpregset_t *fpu;
1661 #ifdef ELF_CORE_COPY_XFPREGS
1662         elf_fpxregset_t *xfpu;
1663 #endif
1664         int thread_status_size;
1665         int numnote;
1666 };
1667
1668 static int elf_note_info_init(struct elf_note_info *info)
1669 {
1670         memset(info, 0, sizeof(*info));
1671         INIT_LIST_HEAD(&info->thread_list);
1672
1673         /* Allocate space for six ELF notes */
1674         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1675         if (!info->notes)
1676                 return 0;
1677         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1678         if (!info->psinfo)
1679                 return 0;
1680         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1681         if (!info->prstatus)
1682                 return 0;
1683         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1684         if (!info->fpu)
1685                 return 0;
1686 #ifdef ELF_CORE_COPY_XFPREGS
1687         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1688         if (!info->xfpu)
1689                 return 0;
1690 #endif
1691         return 1;
1692 }
1693
1694 static int fill_note_info(struct elfhdr *elf, int phdrs,
1695                           struct elf_note_info *info,
1696                           long signr, struct pt_regs *regs)
1697 {
1698         struct list_head *t;
1699
1700         if (!elf_note_info_init(info))
1701                 return 0;
1702
1703         if (signr) {
1704                 struct core_thread *ct;
1705                 struct elf_thread_status *ets;
1706
1707                 for (ct = current->mm->core_state->dumper.next;
1708                                                 ct; ct = ct->next) {
1709                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1710                         if (!ets)
1711                                 return 0;
1712
1713                         ets->thread = ct->task;
1714                         list_add(&ets->list, &info->thread_list);
1715                 }
1716
1717                 list_for_each(t, &info->thread_list) {
1718                         int sz;
1719
1720                         ets = list_entry(t, struct elf_thread_status, list);
1721                         sz = elf_dump_thread_status(signr, ets);
1722                         info->thread_status_size += sz;
1723                 }
1724         }
1725         /* now collect the dump for the current */
1726         memset(info->prstatus, 0, sizeof(*info->prstatus));
1727         fill_prstatus(info->prstatus, current, signr);
1728         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1729
1730         /* Set up header */
1731         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1732
1733         /*
1734          * Set up the notes in similar form to SVR4 core dumps made
1735          * with info from their /proc.
1736          */
1737
1738         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1739                   sizeof(*info->prstatus), info->prstatus);
1740         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1741         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1742                   sizeof(*info->psinfo), info->psinfo);
1743
1744         info->numnote = 2;
1745
1746         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1747
1748         /* Try to dump the FPU. */
1749         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1750                                                                info->fpu);
1751         if (info->prstatus->pr_fpvalid)
1752                 fill_note(info->notes + info->numnote++,
1753                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1754 #ifdef ELF_CORE_COPY_XFPREGS
1755         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1756                 fill_note(info->notes + info->numnote++,
1757                           "LINUX", ELF_CORE_XFPREG_TYPE,
1758                           sizeof(*info->xfpu), info->xfpu);
1759 #endif
1760
1761         return 1;
1762 }
1763
1764 static size_t get_note_info_size(struct elf_note_info *info)
1765 {
1766         int sz = 0;
1767         int i;
1768
1769         for (i = 0; i < info->numnote; i++)
1770                 sz += notesize(info->notes + i);
1771
1772         sz += info->thread_status_size;
1773
1774         return sz;
1775 }
1776
1777 static int write_note_info(struct elf_note_info *info,
1778                            struct file *file, loff_t *foffset)
1779 {
1780         int i;
1781         struct list_head *t;
1782
1783         for (i = 0; i < info->numnote; i++)
1784                 if (!writenote(info->notes + i, file, foffset))
1785                         return 0;
1786
1787         /* write out the thread status notes section */
1788         list_for_each(t, &info->thread_list) {
1789                 struct elf_thread_status *tmp =
1790                                 list_entry(t, struct elf_thread_status, list);
1791
1792                 for (i = 0; i < tmp->num_notes; i++)
1793                         if (!writenote(&tmp->notes[i], file, foffset))
1794                                 return 0;
1795         }
1796
1797         return 1;
1798 }
1799
1800 static void free_note_info(struct elf_note_info *info)
1801 {
1802         while (!list_empty(&info->thread_list)) {
1803                 struct list_head *tmp = info->thread_list.next;
1804                 list_del(tmp);
1805                 kfree(list_entry(tmp, struct elf_thread_status, list));
1806         }
1807
1808         kfree(info->prstatus);
1809         kfree(info->psinfo);
1810         kfree(info->notes);
1811         kfree(info->fpu);
1812 #ifdef ELF_CORE_COPY_XFPREGS
1813         kfree(info->xfpu);
1814 #endif
1815 }
1816
1817 #endif
1818
1819 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1820                                         struct vm_area_struct *gate_vma)
1821 {
1822         struct vm_area_struct *ret = tsk->mm->mmap;
1823
1824         if (ret)
1825                 return ret;
1826         return gate_vma;
1827 }
1828 /*
1829  * Helper function for iterating across a vma list.  It ensures that the caller
1830  * will visit `gate_vma' prior to terminating the search.
1831  */
1832 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1833                                         struct vm_area_struct *gate_vma)
1834 {
1835         struct vm_area_struct *ret;
1836
1837         ret = this_vma->vm_next;
1838         if (ret)
1839                 return ret;
1840         if (this_vma == gate_vma)
1841                 return NULL;
1842         return gate_vma;
1843 }
1844
1845 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1846                              elf_addr_t e_shoff, int segs)
1847 {
1848         elf->e_shoff = e_shoff;
1849         elf->e_shentsize = sizeof(*shdr4extnum);
1850         elf->e_shnum = 1;
1851         elf->e_shstrndx = SHN_UNDEF;
1852
1853         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1854
1855         shdr4extnum->sh_type = SHT_NULL;
1856         shdr4extnum->sh_size = elf->e_shnum;
1857         shdr4extnum->sh_link = elf->e_shstrndx;
1858         shdr4extnum->sh_info = segs;
1859 }
1860
1861 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1862                                      unsigned long mm_flags)
1863 {
1864         struct vm_area_struct *vma;
1865         size_t size = 0;
1866
1867         for (vma = first_vma(current, gate_vma); vma != NULL;
1868              vma = next_vma(vma, gate_vma))
1869                 size += vma_dump_size(vma, mm_flags);
1870         return size;
1871 }
1872
1873 /*
1874  * Actual dumper
1875  *
1876  * This is a two-pass process; first we find the offsets of the bits,
1877  * and then they are actually written out.  If we run out of core limit
1878  * we just truncate.
1879  */
1880 static int elf_core_dump(struct coredump_params *cprm)
1881 {
1882         int has_dumped = 0;
1883         mm_segment_t fs;
1884         int segs;
1885         size_t size = 0;
1886         struct vm_area_struct *vma, *gate_vma;
1887         struct elfhdr *elf = NULL;
1888         loff_t offset = 0, dataoff, foffset;
1889         struct elf_note_info info;
1890         struct elf_phdr *phdr4note = NULL;
1891         struct elf_shdr *shdr4extnum = NULL;
1892         Elf_Half e_phnum;
1893         elf_addr_t e_shoff;
1894
1895         /*
1896          * We no longer stop all VM operations.
1897          * 
1898          * This is because those proceses that could possibly change map_count
1899          * or the mmap / vma pages are now blocked in do_exit on current
1900          * finishing this core dump.
1901          *
1902          * Only ptrace can touch these memory addresses, but it doesn't change
1903          * the map_count or the pages allocated. So no possibility of crashing
1904          * exists while dumping the mm->vm_next areas to the core file.
1905          */
1906   
1907         /* alloc memory for large data structures: too large to be on stack */
1908         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1909         if (!elf)
1910                 goto out;
1911         /*
1912          * The number of segs are recored into ELF header as 16bit value.
1913          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1914          */
1915         segs = current->mm->map_count;
1916         segs += elf_core_extra_phdrs();
1917
1918         gate_vma = get_gate_vma(current->mm);
1919         if (gate_vma != NULL)
1920                 segs++;
1921
1922         /* for notes section */
1923         segs++;
1924
1925         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1926          * this, kernel supports extended numbering. Have a look at
1927          * include/linux/elf.h for further information. */
1928         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1929
1930         /*
1931          * Collect all the non-memory information about the process for the
1932          * notes.  This also sets up the file header.
1933          */
1934         if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1935                 goto cleanup;
1936
1937         has_dumped = 1;
1938         current->flags |= PF_DUMPCORE;
1939   
1940         fs = get_fs();
1941         set_fs(KERNEL_DS);
1942
1943         offset += sizeof(*elf);                         /* Elf header */
1944         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
1945         foffset = offset;
1946
1947         /* Write notes phdr entry */
1948         {
1949                 size_t sz = get_note_info_size(&info);
1950
1951                 sz += elf_coredump_extra_notes_size();
1952
1953                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1954                 if (!phdr4note)
1955                         goto end_coredump;
1956
1957                 fill_elf_note_phdr(phdr4note, sz, offset);
1958                 offset += sz;
1959         }
1960
1961         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1962
1963         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1964         offset += elf_core_extra_data_size();
1965         e_shoff = offset;
1966
1967         if (e_phnum == PN_XNUM) {
1968                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
1969                 if (!shdr4extnum)
1970                         goto end_coredump;
1971                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
1972         }
1973
1974         offset = dataoff;
1975
1976         size += sizeof(*elf);
1977         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
1978                 goto end_coredump;
1979
1980         size += sizeof(*phdr4note);
1981         if (size > cprm->limit
1982             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
1983                 goto end_coredump;
1984
1985         /* Write program headers for segments dump */
1986         for (vma = first_vma(current, gate_vma); vma != NULL;
1987                         vma = next_vma(vma, gate_vma)) {
1988                 struct elf_phdr phdr;
1989
1990                 phdr.p_type = PT_LOAD;
1991                 phdr.p_offset = offset;
1992                 phdr.p_vaddr = vma->vm_start;
1993                 phdr.p_paddr = 0;
1994                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
1995                 phdr.p_memsz = vma->vm_end - vma->vm_start;
1996                 offset += phdr.p_filesz;
1997                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1998                 if (vma->vm_flags & VM_WRITE)
1999                         phdr.p_flags |= PF_W;
2000                 if (vma->vm_flags & VM_EXEC)
2001                         phdr.p_flags |= PF_X;
2002                 phdr.p_align = ELF_EXEC_PAGESIZE;
2003
2004                 size += sizeof(phdr);
2005                 if (size > cprm->limit
2006                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2007                         goto end_coredump;
2008         }
2009
2010         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2011                 goto end_coredump;
2012
2013         /* write out the notes section */
2014         if (!write_note_info(&info, cprm->file, &foffset))
2015                 goto end_coredump;
2016
2017         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2018                 goto end_coredump;
2019
2020         /* Align to page */
2021         if (!dump_seek(cprm->file, dataoff - foffset))
2022                 goto end_coredump;
2023
2024         for (vma = first_vma(current, gate_vma); vma != NULL;
2025                         vma = next_vma(vma, gate_vma)) {
2026                 unsigned long addr;
2027                 unsigned long end;
2028
2029                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2030
2031                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2032                         struct page *page;
2033                         int stop;
2034
2035                         page = get_dump_page(addr);
2036                         if (page) {
2037                                 void *kaddr = kmap(page);
2038                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2039                                         !dump_write(cprm->file, kaddr,
2040                                                     PAGE_SIZE);
2041                                 kunmap(page);
2042                                 page_cache_release(page);
2043                         } else
2044                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2045                         if (stop)
2046                                 goto end_coredump;
2047                 }
2048         }
2049
2050         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2051                 goto end_coredump;
2052
2053         if (e_phnum == PN_XNUM) {
2054                 size += sizeof(*shdr4extnum);
2055                 if (size > cprm->limit
2056                     || !dump_write(cprm->file, shdr4extnum,
2057                                    sizeof(*shdr4extnum)))
2058                         goto end_coredump;
2059         }
2060
2061 end_coredump:
2062         set_fs(fs);
2063
2064 cleanup:
2065         free_note_info(&info);
2066         kfree(shdr4extnum);
2067         kfree(phdr4note);
2068         kfree(elf);
2069 out:
2070         return has_dumped;
2071 }
2072
2073 #endif          /* CONFIG_ELF_CORE */
2074
2075 static int __init init_elf_binfmt(void)
2076 {
2077         return register_binfmt(&elf_format);
2078 }
2079
2080 static void __exit exit_elf_binfmt(void)
2081 {
2082         /* Remove the COFF and ELF loaders. */
2083         unregister_binfmt(&elf_format);
2084 }
2085
2086 core_initcall(init_elf_binfmt);
2087 module_exit(exit_elf_binfmt);
2088 MODULE_LICENSE("GPL");