Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
[pandora-kernel.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <asm/uaccess.h>
35 #include <asm/param.h>
36 #include <asm/page.h>
37
38 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
39 static int load_elf_library(struct file *);
40 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
41                                 int, int, unsigned long);
42
43 /*
44  * If we don't support core dumping, then supply a NULL so we
45  * don't even try.
46  */
47 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
48 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
49 #else
50 #define elf_core_dump   NULL
51 #endif
52
53 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
54 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
55 #else
56 #define ELF_MIN_ALIGN   PAGE_SIZE
57 #endif
58
59 #ifndef ELF_CORE_EFLAGS
60 #define ELF_CORE_EFLAGS 0
61 #endif
62
63 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
64 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
65 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
66
67 static struct linux_binfmt elf_format = {
68                 .module         = THIS_MODULE,
69                 .load_binary    = load_elf_binary,
70                 .load_shlib     = load_elf_library,
71                 .core_dump      = elf_core_dump,
72                 .min_coredump   = ELF_EXEC_PAGESIZE,
73                 .hasvdso        = 1
74 };
75
76 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
77
78 static int set_brk(unsigned long start, unsigned long end)
79 {
80         start = ELF_PAGEALIGN(start);
81         end = ELF_PAGEALIGN(end);
82         if (end > start) {
83                 unsigned long addr;
84                 down_write(&current->mm->mmap_sem);
85                 addr = do_brk(start, end - start);
86                 up_write(&current->mm->mmap_sem);
87                 if (BAD_ADDR(addr))
88                         return addr;
89         }
90         current->mm->start_brk = current->mm->brk = end;
91         return 0;
92 }
93
94 /* We need to explicitly zero any fractional pages
95    after the data section (i.e. bss).  This would
96    contain the junk from the file that should not
97    be in memory
98  */
99 static int padzero(unsigned long elf_bss)
100 {
101         unsigned long nbyte;
102
103         nbyte = ELF_PAGEOFFSET(elf_bss);
104         if (nbyte) {
105                 nbyte = ELF_MIN_ALIGN - nbyte;
106                 if (clear_user((void __user *) elf_bss, nbyte))
107                         return -EFAULT;
108         }
109         return 0;
110 }
111
112 /* Let's use some macros to make this stack manipulation a little clearer */
113 #ifdef CONFIG_STACK_GROWSUP
114 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
115 #define STACK_ROUND(sp, items) \
116         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
117 #define STACK_ALLOC(sp, len) ({ \
118         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
119         old_sp; })
120 #else
121 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
122 #define STACK_ROUND(sp, items) \
123         (((unsigned long) (sp - items)) &~ 15UL)
124 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
125 #endif
126
127 #ifndef ELF_BASE_PLATFORM
128 /*
129  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
130  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
131  * will be copied to the user stack in the same manner as AT_PLATFORM.
132  */
133 #define ELF_BASE_PLATFORM NULL
134 #endif
135
136 static int
137 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
138                 unsigned long load_addr, unsigned long interp_load_addr)
139 {
140         unsigned long p = bprm->p;
141         int argc = bprm->argc;
142         int envc = bprm->envc;
143         elf_addr_t __user *argv;
144         elf_addr_t __user *envp;
145         elf_addr_t __user *sp;
146         elf_addr_t __user *u_platform;
147         elf_addr_t __user *u_base_platform;
148         elf_addr_t __user *u_rand_bytes;
149         const char *k_platform = ELF_PLATFORM;
150         const char *k_base_platform = ELF_BASE_PLATFORM;
151         unsigned char k_rand_bytes[16];
152         int items;
153         elf_addr_t *elf_info;
154         int ei_index = 0;
155         const struct cred *cred = current_cred();
156         struct vm_area_struct *vma;
157
158         /*
159          * In some cases (e.g. Hyper-Threading), we want to avoid L1
160          * evictions by the processes running on the same package. One
161          * thing we can do is to shuffle the initial stack for them.
162          */
163
164         p = arch_align_stack(p);
165
166         /*
167          * If this architecture has a platform capability string, copy it
168          * to userspace.  In some cases (Sparc), this info is impossible
169          * for userspace to get any other way, in others (i386) it is
170          * merely difficult.
171          */
172         u_platform = NULL;
173         if (k_platform) {
174                 size_t len = strlen(k_platform) + 1;
175
176                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
177                 if (__copy_to_user(u_platform, k_platform, len))
178                         return -EFAULT;
179         }
180
181         /*
182          * If this architecture has a "base" platform capability
183          * string, copy it to userspace.
184          */
185         u_base_platform = NULL;
186         if (k_base_platform) {
187                 size_t len = strlen(k_base_platform) + 1;
188
189                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190                 if (__copy_to_user(u_base_platform, k_base_platform, len))
191                         return -EFAULT;
192         }
193
194         /*
195          * Generate 16 random bytes for userspace PRNG seeding.
196          */
197         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
198         u_rand_bytes = (elf_addr_t __user *)
199                        STACK_ALLOC(p, sizeof(k_rand_bytes));
200         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
201                 return -EFAULT;
202
203         /* Create the ELF interpreter info */
204         elf_info = (elf_addr_t *)current->mm->saved_auxv;
205         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
206 #define NEW_AUX_ENT(id, val) \
207         do { \
208                 elf_info[ei_index++] = id; \
209                 elf_info[ei_index++] = val; \
210         } while (0)
211
212 #ifdef ARCH_DLINFO
213         /* 
214          * ARCH_DLINFO must come first so PPC can do its special alignment of
215          * AUXV.
216          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
217          * ARCH_DLINFO changes
218          */
219         ARCH_DLINFO;
220 #endif
221         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
222         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
223         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
224         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
225         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
226         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
227         NEW_AUX_ENT(AT_BASE, interp_load_addr);
228         NEW_AUX_ENT(AT_FLAGS, 0);
229         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
230         NEW_AUX_ENT(AT_UID, cred->uid);
231         NEW_AUX_ENT(AT_EUID, cred->euid);
232         NEW_AUX_ENT(AT_GID, cred->gid);
233         NEW_AUX_ENT(AT_EGID, cred->egid);
234         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
235         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
236         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
237         if (k_platform) {
238                 NEW_AUX_ENT(AT_PLATFORM,
239                             (elf_addr_t)(unsigned long)u_platform);
240         }
241         if (k_base_platform) {
242                 NEW_AUX_ENT(AT_BASE_PLATFORM,
243                             (elf_addr_t)(unsigned long)u_base_platform);
244         }
245         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
246                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
247         }
248 #undef NEW_AUX_ENT
249         /* AT_NULL is zero; clear the rest too */
250         memset(&elf_info[ei_index], 0,
251                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
252
253         /* And advance past the AT_NULL entry.  */
254         ei_index += 2;
255
256         sp = STACK_ADD(p, ei_index);
257
258         items = (argc + 1) + (envc + 1) + 1;
259         bprm->p = STACK_ROUND(sp, items);
260
261         /* Point sp at the lowest address on the stack */
262 #ifdef CONFIG_STACK_GROWSUP
263         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
264         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
265 #else
266         sp = (elf_addr_t __user *)bprm->p;
267 #endif
268
269
270         /*
271          * Grow the stack manually; some architectures have a limit on how
272          * far ahead a user-space access may be in order to grow the stack.
273          */
274         vma = find_extend_vma(current->mm, bprm->p);
275         if (!vma)
276                 return -EFAULT;
277
278         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
279         if (__put_user(argc, sp++))
280                 return -EFAULT;
281         argv = sp;
282         envp = argv + argc + 1;
283
284         /* Populate argv and envp */
285         p = current->mm->arg_end = current->mm->arg_start;
286         while (argc-- > 0) {
287                 size_t len;
288                 if (__put_user((elf_addr_t)p, argv++))
289                         return -EFAULT;
290                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
291                 if (!len || len > MAX_ARG_STRLEN)
292                         return -EINVAL;
293                 p += len;
294         }
295         if (__put_user(0, argv))
296                 return -EFAULT;
297         current->mm->arg_end = current->mm->env_start = p;
298         while (envc-- > 0) {
299                 size_t len;
300                 if (__put_user((elf_addr_t)p, envp++))
301                         return -EFAULT;
302                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
303                 if (!len || len > MAX_ARG_STRLEN)
304                         return -EINVAL;
305                 p += len;
306         }
307         if (__put_user(0, envp))
308                 return -EFAULT;
309         current->mm->env_end = p;
310
311         /* Put the elf_info on the stack in the right place.  */
312         sp = (elf_addr_t __user *)envp + 1;
313         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
314                 return -EFAULT;
315         return 0;
316 }
317
318 #ifndef elf_map
319
320 static unsigned long elf_map(struct file *filep, unsigned long addr,
321                 struct elf_phdr *eppnt, int prot, int type,
322                 unsigned long total_size)
323 {
324         unsigned long map_addr;
325         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
326         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
327         addr = ELF_PAGESTART(addr);
328         size = ELF_PAGEALIGN(size);
329
330         /* mmap() will return -EINVAL if given a zero size, but a
331          * segment with zero filesize is perfectly valid */
332         if (!size)
333                 return addr;
334
335         down_write(&current->mm->mmap_sem);
336         /*
337         * total_size is the size of the ELF (interpreter) image.
338         * The _first_ mmap needs to know the full size, otherwise
339         * randomization might put this image into an overlapping
340         * position with the ELF binary image. (since size < total_size)
341         * So we first map the 'big' image - and unmap the remainder at
342         * the end. (which unmap is needed for ELF images with holes.)
343         */
344         if (total_size) {
345                 total_size = ELF_PAGEALIGN(total_size);
346                 map_addr = do_mmap(filep, addr, total_size, prot, type, off);
347                 if (!BAD_ADDR(map_addr))
348                         do_munmap(current->mm, map_addr+size, total_size-size);
349         } else
350                 map_addr = do_mmap(filep, addr, size, prot, type, off);
351
352         up_write(&current->mm->mmap_sem);
353         return(map_addr);
354 }
355
356 #endif /* !elf_map */
357
358 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
359 {
360         int i, first_idx = -1, last_idx = -1;
361
362         for (i = 0; i < nr; i++) {
363                 if (cmds[i].p_type == PT_LOAD) {
364                         last_idx = i;
365                         if (first_idx == -1)
366                                 first_idx = i;
367                 }
368         }
369         if (first_idx == -1)
370                 return 0;
371
372         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
373                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
374 }
375
376
377 /* This is much more generalized than the library routine read function,
378    so we keep this separate.  Technically the library read function
379    is only provided so that we can read a.out libraries that have
380    an ELF header */
381
382 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
383                 struct file *interpreter, unsigned long *interp_map_addr,
384                 unsigned long no_base)
385 {
386         struct elf_phdr *elf_phdata;
387         struct elf_phdr *eppnt;
388         unsigned long load_addr = 0;
389         int load_addr_set = 0;
390         unsigned long last_bss = 0, elf_bss = 0;
391         unsigned long error = ~0UL;
392         unsigned long total_size;
393         int retval, i, size;
394
395         /* First of all, some simple consistency checks */
396         if (interp_elf_ex->e_type != ET_EXEC &&
397             interp_elf_ex->e_type != ET_DYN)
398                 goto out;
399         if (!elf_check_arch(interp_elf_ex))
400                 goto out;
401         if (!interpreter->f_op || !interpreter->f_op->mmap)
402                 goto out;
403
404         /*
405          * If the size of this structure has changed, then punt, since
406          * we will be doing the wrong thing.
407          */
408         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
409                 goto out;
410         if (interp_elf_ex->e_phnum < 1 ||
411                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
412                 goto out;
413
414         /* Now read in all of the header information */
415         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
416         if (size > ELF_MIN_ALIGN)
417                 goto out;
418         elf_phdata = kmalloc(size, GFP_KERNEL);
419         if (!elf_phdata)
420                 goto out;
421
422         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
423                              (char *)elf_phdata,size);
424         error = -EIO;
425         if (retval != size) {
426                 if (retval < 0)
427                         error = retval; 
428                 goto out_close;
429         }
430
431         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
432         if (!total_size) {
433                 error = -EINVAL;
434                 goto out_close;
435         }
436
437         eppnt = elf_phdata;
438         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
439                 if (eppnt->p_type == PT_LOAD) {
440                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
441                         int elf_prot = 0;
442                         unsigned long vaddr = 0;
443                         unsigned long k, map_addr;
444
445                         if (eppnt->p_flags & PF_R)
446                                 elf_prot = PROT_READ;
447                         if (eppnt->p_flags & PF_W)
448                                 elf_prot |= PROT_WRITE;
449                         if (eppnt->p_flags & PF_X)
450                                 elf_prot |= PROT_EXEC;
451                         vaddr = eppnt->p_vaddr;
452                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
453                                 elf_type |= MAP_FIXED;
454                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
455                                 load_addr = -vaddr;
456
457                         map_addr = elf_map(interpreter, load_addr + vaddr,
458                                         eppnt, elf_prot, elf_type, total_size);
459                         total_size = 0;
460                         if (!*interp_map_addr)
461                                 *interp_map_addr = map_addr;
462                         error = map_addr;
463                         if (BAD_ADDR(map_addr))
464                                 goto out_close;
465
466                         if (!load_addr_set &&
467                             interp_elf_ex->e_type == ET_DYN) {
468                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
469                                 load_addr_set = 1;
470                         }
471
472                         /*
473                          * Check to see if the section's size will overflow the
474                          * allowed task size. Note that p_filesz must always be
475                          * <= p_memsize so it's only necessary to check p_memsz.
476                          */
477                         k = load_addr + eppnt->p_vaddr;
478                         if (BAD_ADDR(k) ||
479                             eppnt->p_filesz > eppnt->p_memsz ||
480                             eppnt->p_memsz > TASK_SIZE ||
481                             TASK_SIZE - eppnt->p_memsz < k) {
482                                 error = -ENOMEM;
483                                 goto out_close;
484                         }
485
486                         /*
487                          * Find the end of the file mapping for this phdr, and
488                          * keep track of the largest address we see for this.
489                          */
490                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
491                         if (k > elf_bss)
492                                 elf_bss = k;
493
494                         /*
495                          * Do the same thing for the memory mapping - between
496                          * elf_bss and last_bss is the bss section.
497                          */
498                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
499                         if (k > last_bss)
500                                 last_bss = k;
501                 }
502         }
503
504         /*
505          * Now fill out the bss section.  First pad the last page up
506          * to the page boundary, and then perform a mmap to make sure
507          * that there are zero-mapped pages up to and including the 
508          * last bss page.
509          */
510         if (padzero(elf_bss)) {
511                 error = -EFAULT;
512                 goto out_close;
513         }
514
515         /* What we have mapped so far */
516         elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
517
518         /* Map the last of the bss segment */
519         if (last_bss > elf_bss) {
520                 down_write(&current->mm->mmap_sem);
521                 error = do_brk(elf_bss, last_bss - elf_bss);
522                 up_write(&current->mm->mmap_sem);
523                 if (BAD_ADDR(error))
524                         goto out_close;
525         }
526
527         error = load_addr;
528
529 out_close:
530         kfree(elf_phdata);
531 out:
532         return error;
533 }
534
535 /*
536  * These are the functions used to load ELF style executables and shared
537  * libraries.  There is no binary dependent code anywhere else.
538  */
539
540 #define INTERPRETER_NONE 0
541 #define INTERPRETER_ELF 2
542
543 #ifndef STACK_RND_MASK
544 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
545 #endif
546
547 static unsigned long randomize_stack_top(unsigned long stack_top)
548 {
549         unsigned int random_variable = 0;
550
551         if ((current->flags & PF_RANDOMIZE) &&
552                 !(current->personality & ADDR_NO_RANDOMIZE)) {
553                 random_variable = get_random_int() & STACK_RND_MASK;
554                 random_variable <<= PAGE_SHIFT;
555         }
556 #ifdef CONFIG_STACK_GROWSUP
557         return PAGE_ALIGN(stack_top) + random_variable;
558 #else
559         return PAGE_ALIGN(stack_top) - random_variable;
560 #endif
561 }
562
563 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
564 {
565         struct file *interpreter = NULL; /* to shut gcc up */
566         unsigned long load_addr = 0, load_bias = 0;
567         int load_addr_set = 0;
568         char * elf_interpreter = NULL;
569         unsigned long error;
570         struct elf_phdr *elf_ppnt, *elf_phdata;
571         unsigned long elf_bss, elf_brk;
572         int retval, i;
573         unsigned int size;
574         unsigned long elf_entry;
575         unsigned long interp_load_addr = 0;
576         unsigned long start_code, end_code, start_data, end_data;
577         unsigned long reloc_func_desc = 0;
578         int executable_stack = EXSTACK_DEFAULT;
579         unsigned long def_flags = 0;
580         struct {
581                 struct elfhdr elf_ex;
582                 struct elfhdr interp_elf_ex;
583         } *loc;
584
585         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
586         if (!loc) {
587                 retval = -ENOMEM;
588                 goto out_ret;
589         }
590         
591         /* Get the exec-header */
592         loc->elf_ex = *((struct elfhdr *)bprm->buf);
593
594         retval = -ENOEXEC;
595         /* First of all, some simple consistency checks */
596         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
597                 goto out;
598
599         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
600                 goto out;
601         if (!elf_check_arch(&loc->elf_ex))
602                 goto out;
603         if (!bprm->file->f_op||!bprm->file->f_op->mmap)
604                 goto out;
605
606         /* Now read in all of the header information */
607         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
608                 goto out;
609         if (loc->elf_ex.e_phnum < 1 ||
610                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
611                 goto out;
612         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
613         retval = -ENOMEM;
614         elf_phdata = kmalloc(size, GFP_KERNEL);
615         if (!elf_phdata)
616                 goto out;
617
618         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
619                              (char *)elf_phdata, size);
620         if (retval != size) {
621                 if (retval >= 0)
622                         retval = -EIO;
623                 goto out_free_ph;
624         }
625
626         elf_ppnt = elf_phdata;
627         elf_bss = 0;
628         elf_brk = 0;
629
630         start_code = ~0UL;
631         end_code = 0;
632         start_data = 0;
633         end_data = 0;
634
635         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
636                 if (elf_ppnt->p_type == PT_INTERP) {
637                         /* This is the program interpreter used for
638                          * shared libraries - for now assume that this
639                          * is an a.out format binary
640                          */
641                         retval = -ENOEXEC;
642                         if (elf_ppnt->p_filesz > PATH_MAX || 
643                             elf_ppnt->p_filesz < 2)
644                                 goto out_free_ph;
645
646                         retval = -ENOMEM;
647                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
648                                                   GFP_KERNEL);
649                         if (!elf_interpreter)
650                                 goto out_free_ph;
651
652                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
653                                              elf_interpreter,
654                                              elf_ppnt->p_filesz);
655                         if (retval != elf_ppnt->p_filesz) {
656                                 if (retval >= 0)
657                                         retval = -EIO;
658                                 goto out_free_interp;
659                         }
660                         /* make sure path is NULL terminated */
661                         retval = -ENOEXEC;
662                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
663                                 goto out_free_interp;
664
665                         /*
666                          * The early SET_PERSONALITY here is so that the lookup
667                          * for the interpreter happens in the namespace of the 
668                          * to-be-execed image.  SET_PERSONALITY can select an
669                          * alternate root.
670                          *
671                          * However, SET_PERSONALITY is NOT allowed to switch
672                          * this task into the new images's memory mapping
673                          * policy - that is, TASK_SIZE must still evaluate to
674                          * that which is appropriate to the execing application.
675                          * This is because exit_mmap() needs to have TASK_SIZE
676                          * evaluate to the size of the old image.
677                          *
678                          * So if (say) a 64-bit application is execing a 32-bit
679                          * application it is the architecture's responsibility
680                          * to defer changing the value of TASK_SIZE until the
681                          * switch really is going to happen - do this in
682                          * flush_thread().      - akpm
683                          */
684                         SET_PERSONALITY(loc->elf_ex);
685
686                         interpreter = open_exec(elf_interpreter);
687                         retval = PTR_ERR(interpreter);
688                         if (IS_ERR(interpreter))
689                                 goto out_free_interp;
690
691                         /*
692                          * If the binary is not readable then enforce
693                          * mm->dumpable = 0 regardless of the interpreter's
694                          * permissions.
695                          */
696                         if (file_permission(interpreter, MAY_READ) < 0)
697                                 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
698
699                         retval = kernel_read(interpreter, 0, bprm->buf,
700                                              BINPRM_BUF_SIZE);
701                         if (retval != BINPRM_BUF_SIZE) {
702                                 if (retval >= 0)
703                                         retval = -EIO;
704                                 goto out_free_dentry;
705                         }
706
707                         /* Get the exec headers */
708                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
709                         break;
710                 }
711                 elf_ppnt++;
712         }
713
714         elf_ppnt = elf_phdata;
715         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
716                 if (elf_ppnt->p_type == PT_GNU_STACK) {
717                         if (elf_ppnt->p_flags & PF_X)
718                                 executable_stack = EXSTACK_ENABLE_X;
719                         else
720                                 executable_stack = EXSTACK_DISABLE_X;
721                         break;
722                 }
723
724         /* Some simple consistency checks for the interpreter */
725         if (elf_interpreter) {
726                 retval = -ELIBBAD;
727                 /* Not an ELF interpreter */
728                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
729                         goto out_free_dentry;
730                 /* Verify the interpreter has a valid arch */
731                 if (!elf_check_arch(&loc->interp_elf_ex))
732                         goto out_free_dentry;
733         } else {
734                 /* Executables without an interpreter also need a personality  */
735                 SET_PERSONALITY(loc->elf_ex);
736         }
737
738         /* Flush all traces of the currently running executable */
739         retval = flush_old_exec(bprm);
740         if (retval)
741                 goto out_free_dentry;
742
743         /* OK, This is the point of no return */
744         current->flags &= ~PF_FORKNOEXEC;
745         current->mm->def_flags = def_flags;
746
747         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
748            may depend on the personality.  */
749         SET_PERSONALITY(loc->elf_ex);
750         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
751                 current->personality |= READ_IMPLIES_EXEC;
752
753         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
754                 current->flags |= PF_RANDOMIZE;
755         arch_pick_mmap_layout(current->mm);
756
757         /* Do this so that we can load the interpreter, if need be.  We will
758            change some of these later */
759         current->mm->free_area_cache = current->mm->mmap_base;
760         current->mm->cached_hole_size = 0;
761         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
762                                  executable_stack);
763         if (retval < 0) {
764                 send_sig(SIGKILL, current, 0);
765                 goto out_free_dentry;
766         }
767         
768         current->mm->start_stack = bprm->p;
769
770         /* Now we do a little grungy work by mmaping the ELF image into
771            the correct location in memory. */
772         for(i = 0, elf_ppnt = elf_phdata;
773             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
774                 int elf_prot = 0, elf_flags;
775                 unsigned long k, vaddr;
776
777                 if (elf_ppnt->p_type != PT_LOAD)
778                         continue;
779
780                 if (unlikely (elf_brk > elf_bss)) {
781                         unsigned long nbyte;
782                     
783                         /* There was a PT_LOAD segment with p_memsz > p_filesz
784                            before this one. Map anonymous pages, if needed,
785                            and clear the area.  */
786                         retval = set_brk (elf_bss + load_bias,
787                                           elf_brk + load_bias);
788                         if (retval) {
789                                 send_sig(SIGKILL, current, 0);
790                                 goto out_free_dentry;
791                         }
792                         nbyte = ELF_PAGEOFFSET(elf_bss);
793                         if (nbyte) {
794                                 nbyte = ELF_MIN_ALIGN - nbyte;
795                                 if (nbyte > elf_brk - elf_bss)
796                                         nbyte = elf_brk - elf_bss;
797                                 if (clear_user((void __user *)elf_bss +
798                                                         load_bias, nbyte)) {
799                                         /*
800                                          * This bss-zeroing can fail if the ELF
801                                          * file specifies odd protections. So
802                                          * we don't check the return value
803                                          */
804                                 }
805                         }
806                 }
807
808                 if (elf_ppnt->p_flags & PF_R)
809                         elf_prot |= PROT_READ;
810                 if (elf_ppnt->p_flags & PF_W)
811                         elf_prot |= PROT_WRITE;
812                 if (elf_ppnt->p_flags & PF_X)
813                         elf_prot |= PROT_EXEC;
814
815                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
816
817                 vaddr = elf_ppnt->p_vaddr;
818                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
819                         elf_flags |= MAP_FIXED;
820                 } else if (loc->elf_ex.e_type == ET_DYN) {
821                         /* Try and get dynamic programs out of the way of the
822                          * default mmap base, as well as whatever program they
823                          * might try to exec.  This is because the brk will
824                          * follow the loader, and is not movable.  */
825 #ifdef CONFIG_X86
826                         load_bias = 0;
827 #else
828                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
829 #endif
830                 }
831
832                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
833                                 elf_prot, elf_flags, 0);
834                 if (BAD_ADDR(error)) {
835                         send_sig(SIGKILL, current, 0);
836                         retval = IS_ERR((void *)error) ?
837                                 PTR_ERR((void*)error) : -EINVAL;
838                         goto out_free_dentry;
839                 }
840
841                 if (!load_addr_set) {
842                         load_addr_set = 1;
843                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
844                         if (loc->elf_ex.e_type == ET_DYN) {
845                                 load_bias += error -
846                                              ELF_PAGESTART(load_bias + vaddr);
847                                 load_addr += load_bias;
848                                 reloc_func_desc = load_bias;
849                         }
850                 }
851                 k = elf_ppnt->p_vaddr;
852                 if (k < start_code)
853                         start_code = k;
854                 if (start_data < k)
855                         start_data = k;
856
857                 /*
858                  * Check to see if the section's size will overflow the
859                  * allowed task size. Note that p_filesz must always be
860                  * <= p_memsz so it is only necessary to check p_memsz.
861                  */
862                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
863                     elf_ppnt->p_memsz > TASK_SIZE ||
864                     TASK_SIZE - elf_ppnt->p_memsz < k) {
865                         /* set_brk can never work. Avoid overflows. */
866                         send_sig(SIGKILL, current, 0);
867                         retval = -EINVAL;
868                         goto out_free_dentry;
869                 }
870
871                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
872
873                 if (k > elf_bss)
874                         elf_bss = k;
875                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
876                         end_code = k;
877                 if (end_data < k)
878                         end_data = k;
879                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
880                 if (k > elf_brk)
881                         elf_brk = k;
882         }
883
884         loc->elf_ex.e_entry += load_bias;
885         elf_bss += load_bias;
886         elf_brk += load_bias;
887         start_code += load_bias;
888         end_code += load_bias;
889         start_data += load_bias;
890         end_data += load_bias;
891
892         /* Calling set_brk effectively mmaps the pages that we need
893          * for the bss and break sections.  We must do this before
894          * mapping in the interpreter, to make sure it doesn't wind
895          * up getting placed where the bss needs to go.
896          */
897         retval = set_brk(elf_bss, elf_brk);
898         if (retval) {
899                 send_sig(SIGKILL, current, 0);
900                 goto out_free_dentry;
901         }
902         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
903                 send_sig(SIGSEGV, current, 0);
904                 retval = -EFAULT; /* Nobody gets to see this, but.. */
905                 goto out_free_dentry;
906         }
907
908         if (elf_interpreter) {
909                 unsigned long uninitialized_var(interp_map_addr);
910
911                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
912                                             interpreter,
913                                             &interp_map_addr,
914                                             load_bias);
915                 if (!IS_ERR((void *)elf_entry)) {
916                         /*
917                          * load_elf_interp() returns relocation
918                          * adjustment
919                          */
920                         interp_load_addr = elf_entry;
921                         elf_entry += loc->interp_elf_ex.e_entry;
922                 }
923                 if (BAD_ADDR(elf_entry)) {
924                         force_sig(SIGSEGV, current);
925                         retval = IS_ERR((void *)elf_entry) ?
926                                         (int)elf_entry : -EINVAL;
927                         goto out_free_dentry;
928                 }
929                 reloc_func_desc = interp_load_addr;
930
931                 allow_write_access(interpreter);
932                 fput(interpreter);
933                 kfree(elf_interpreter);
934         } else {
935                 elf_entry = loc->elf_ex.e_entry;
936                 if (BAD_ADDR(elf_entry)) {
937                         force_sig(SIGSEGV, current);
938                         retval = -EINVAL;
939                         goto out_free_dentry;
940                 }
941         }
942
943         kfree(elf_phdata);
944
945         set_binfmt(&elf_format);
946
947 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
948         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
949         if (retval < 0) {
950                 send_sig(SIGKILL, current, 0);
951                 goto out;
952         }
953 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
954
955         install_exec_creds(bprm);
956         current->flags &= ~PF_FORKNOEXEC;
957         retval = create_elf_tables(bprm, &loc->elf_ex,
958                           load_addr, interp_load_addr);
959         if (retval < 0) {
960                 send_sig(SIGKILL, current, 0);
961                 goto out;
962         }
963         /* N.B. passed_fileno might not be initialized? */
964         current->mm->end_code = end_code;
965         current->mm->start_code = start_code;
966         current->mm->start_data = start_data;
967         current->mm->end_data = end_data;
968         current->mm->start_stack = bprm->p;
969
970 #ifdef arch_randomize_brk
971         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1))
972                 current->mm->brk = current->mm->start_brk =
973                         arch_randomize_brk(current->mm);
974 #endif
975
976         if (current->personality & MMAP_PAGE_ZERO) {
977                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
978                    and some applications "depend" upon this behavior.
979                    Since we do not have the power to recompile these, we
980                    emulate the SVr4 behavior. Sigh. */
981                 down_write(&current->mm->mmap_sem);
982                 error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
983                                 MAP_FIXED | MAP_PRIVATE, 0);
984                 up_write(&current->mm->mmap_sem);
985         }
986
987 #ifdef ELF_PLAT_INIT
988         /*
989          * The ABI may specify that certain registers be set up in special
990          * ways (on i386 %edx is the address of a DT_FINI function, for
991          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
992          * that the e_entry field is the address of the function descriptor
993          * for the startup routine, rather than the address of the startup
994          * routine itself.  This macro performs whatever initialization to
995          * the regs structure is required as well as any relocations to the
996          * function descriptor entries when executing dynamically links apps.
997          */
998         ELF_PLAT_INIT(regs, reloc_func_desc);
999 #endif
1000
1001         start_thread(regs, elf_entry, bprm->p);
1002         retval = 0;
1003 out:
1004         kfree(loc);
1005 out_ret:
1006         return retval;
1007
1008         /* error cleanup */
1009 out_free_dentry:
1010         allow_write_access(interpreter);
1011         if (interpreter)
1012                 fput(interpreter);
1013 out_free_interp:
1014         kfree(elf_interpreter);
1015 out_free_ph:
1016         kfree(elf_phdata);
1017         goto out;
1018 }
1019
1020 /* This is really simpleminded and specialized - we are loading an
1021    a.out library that is given an ELF header. */
1022 static int load_elf_library(struct file *file)
1023 {
1024         struct elf_phdr *elf_phdata;
1025         struct elf_phdr *eppnt;
1026         unsigned long elf_bss, bss, len;
1027         int retval, error, i, j;
1028         struct elfhdr elf_ex;
1029
1030         error = -ENOEXEC;
1031         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1032         if (retval != sizeof(elf_ex))
1033                 goto out;
1034
1035         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1036                 goto out;
1037
1038         /* First of all, some simple consistency checks */
1039         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1040             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1041                 goto out;
1042
1043         /* Now read in all of the header information */
1044
1045         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1046         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1047
1048         error = -ENOMEM;
1049         elf_phdata = kmalloc(j, GFP_KERNEL);
1050         if (!elf_phdata)
1051                 goto out;
1052
1053         eppnt = elf_phdata;
1054         error = -ENOEXEC;
1055         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1056         if (retval != j)
1057                 goto out_free_ph;
1058
1059         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1060                 if ((eppnt + i)->p_type == PT_LOAD)
1061                         j++;
1062         if (j != 1)
1063                 goto out_free_ph;
1064
1065         while (eppnt->p_type != PT_LOAD)
1066                 eppnt++;
1067
1068         /* Now use mmap to map the library into memory. */
1069         down_write(&current->mm->mmap_sem);
1070         error = do_mmap(file,
1071                         ELF_PAGESTART(eppnt->p_vaddr),
1072                         (eppnt->p_filesz +
1073                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1074                         PROT_READ | PROT_WRITE | PROT_EXEC,
1075                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1076                         (eppnt->p_offset -
1077                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1078         up_write(&current->mm->mmap_sem);
1079         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1080                 goto out_free_ph;
1081
1082         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1083         if (padzero(elf_bss)) {
1084                 error = -EFAULT;
1085                 goto out_free_ph;
1086         }
1087
1088         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1089                             ELF_MIN_ALIGN - 1);
1090         bss = eppnt->p_memsz + eppnt->p_vaddr;
1091         if (bss > len) {
1092                 down_write(&current->mm->mmap_sem);
1093                 do_brk(len, bss - len);
1094                 up_write(&current->mm->mmap_sem);
1095         }
1096         error = 0;
1097
1098 out_free_ph:
1099         kfree(elf_phdata);
1100 out:
1101         return error;
1102 }
1103
1104 /*
1105  * Note that some platforms still use traditional core dumps and not
1106  * the ELF core dump.  Each platform can select it as appropriate.
1107  */
1108 #if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
1109
1110 /*
1111  * ELF core dumper
1112  *
1113  * Modelled on fs/exec.c:aout_core_dump()
1114  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1115  */
1116 /*
1117  * These are the only things you should do on a core-file: use only these
1118  * functions to write out all the necessary info.
1119  */
1120 static int dump_write(struct file *file, const void *addr, int nr)
1121 {
1122         return file->f_op->write(file, addr, nr, &file->f_pos) == nr;
1123 }
1124
1125 static int dump_seek(struct file *file, loff_t off)
1126 {
1127         if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
1128                 if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
1129                         return 0;
1130         } else {
1131                 char *buf = (char *)get_zeroed_page(GFP_KERNEL);
1132                 if (!buf)
1133                         return 0;
1134                 while (off > 0) {
1135                         unsigned long n = off;
1136                         if (n > PAGE_SIZE)
1137                                 n = PAGE_SIZE;
1138                         if (!dump_write(file, buf, n))
1139                                 return 0;
1140                         off -= n;
1141                 }
1142                 free_page((unsigned long)buf);
1143         }
1144         return 1;
1145 }
1146
1147 /*
1148  * Decide what to dump of a segment, part, all or none.
1149  */
1150 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1151                                    unsigned long mm_flags)
1152 {
1153 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1154
1155         /* The vma can be set up to tell us the answer directly.  */
1156         if (vma->vm_flags & VM_ALWAYSDUMP)
1157                 goto whole;
1158
1159         /* Hugetlb memory check */
1160         if (vma->vm_flags & VM_HUGETLB) {
1161                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1162                         goto whole;
1163                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1164                         goto whole;
1165         }
1166
1167         /* Do not dump I/O mapped devices or special mappings */
1168         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1169                 return 0;
1170
1171         /* By default, dump shared memory if mapped from an anonymous file. */
1172         if (vma->vm_flags & VM_SHARED) {
1173                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1174                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1175                         goto whole;
1176                 return 0;
1177         }
1178
1179         /* Dump segments that have been written to.  */
1180         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1181                 goto whole;
1182         if (vma->vm_file == NULL)
1183                 return 0;
1184
1185         if (FILTER(MAPPED_PRIVATE))
1186                 goto whole;
1187
1188         /*
1189          * If this looks like the beginning of a DSO or executable mapping,
1190          * check for an ELF header.  If we find one, dump the first page to
1191          * aid in determining what was mapped here.
1192          */
1193         if (FILTER(ELF_HEADERS) &&
1194             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1195                 u32 __user *header = (u32 __user *) vma->vm_start;
1196                 u32 word;
1197                 mm_segment_t fs = get_fs();
1198                 /*
1199                  * Doing it this way gets the constant folded by GCC.
1200                  */
1201                 union {
1202                         u32 cmp;
1203                         char elfmag[SELFMAG];
1204                 } magic;
1205                 BUILD_BUG_ON(SELFMAG != sizeof word);
1206                 magic.elfmag[EI_MAG0] = ELFMAG0;
1207                 magic.elfmag[EI_MAG1] = ELFMAG1;
1208                 magic.elfmag[EI_MAG2] = ELFMAG2;
1209                 magic.elfmag[EI_MAG3] = ELFMAG3;
1210                 /*
1211                  * Switch to the user "segment" for get_user(),
1212                  * then put back what elf_core_dump() had in place.
1213                  */
1214                 set_fs(USER_DS);
1215                 if (unlikely(get_user(word, header)))
1216                         word = 0;
1217                 set_fs(fs);
1218                 if (word == magic.cmp)
1219                         return PAGE_SIZE;
1220         }
1221
1222 #undef  FILTER
1223
1224         return 0;
1225
1226 whole:
1227         return vma->vm_end - vma->vm_start;
1228 }
1229
1230 /* An ELF note in memory */
1231 struct memelfnote
1232 {
1233         const char *name;
1234         int type;
1235         unsigned int datasz;
1236         void *data;
1237 };
1238
1239 static int notesize(struct memelfnote *en)
1240 {
1241         int sz;
1242
1243         sz = sizeof(struct elf_note);
1244         sz += roundup(strlen(en->name) + 1, 4);
1245         sz += roundup(en->datasz, 4);
1246
1247         return sz;
1248 }
1249
1250 #define DUMP_WRITE(addr, nr, foffset)   \
1251         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1252
1253 static int alignfile(struct file *file, loff_t *foffset)
1254 {
1255         static const char buf[4] = { 0, };
1256         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1257         return 1;
1258 }
1259
1260 static int writenote(struct memelfnote *men, struct file *file,
1261                         loff_t *foffset)
1262 {
1263         struct elf_note en;
1264         en.n_namesz = strlen(men->name) + 1;
1265         en.n_descsz = men->datasz;
1266         en.n_type = men->type;
1267
1268         DUMP_WRITE(&en, sizeof(en), foffset);
1269         DUMP_WRITE(men->name, en.n_namesz, foffset);
1270         if (!alignfile(file, foffset))
1271                 return 0;
1272         DUMP_WRITE(men->data, men->datasz, foffset);
1273         if (!alignfile(file, foffset))
1274                 return 0;
1275
1276         return 1;
1277 }
1278 #undef DUMP_WRITE
1279
1280 #define DUMP_WRITE(addr, nr)    \
1281         if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
1282                 goto end_coredump;
1283 #define DUMP_SEEK(off)  \
1284         if (!dump_seek(file, (off))) \
1285                 goto end_coredump;
1286
1287 static void fill_elf_header(struct elfhdr *elf, int segs,
1288                             u16 machine, u32 flags, u8 osabi)
1289 {
1290         memset(elf, 0, sizeof(*elf));
1291
1292         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1293         elf->e_ident[EI_CLASS] = ELF_CLASS;
1294         elf->e_ident[EI_DATA] = ELF_DATA;
1295         elf->e_ident[EI_VERSION] = EV_CURRENT;
1296         elf->e_ident[EI_OSABI] = ELF_OSABI;
1297
1298         elf->e_type = ET_CORE;
1299         elf->e_machine = machine;
1300         elf->e_version = EV_CURRENT;
1301         elf->e_phoff = sizeof(struct elfhdr);
1302         elf->e_flags = flags;
1303         elf->e_ehsize = sizeof(struct elfhdr);
1304         elf->e_phentsize = sizeof(struct elf_phdr);
1305         elf->e_phnum = segs;
1306
1307         return;
1308 }
1309
1310 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1311 {
1312         phdr->p_type = PT_NOTE;
1313         phdr->p_offset = offset;
1314         phdr->p_vaddr = 0;
1315         phdr->p_paddr = 0;
1316         phdr->p_filesz = sz;
1317         phdr->p_memsz = 0;
1318         phdr->p_flags = 0;
1319         phdr->p_align = 0;
1320         return;
1321 }
1322
1323 static void fill_note(struct memelfnote *note, const char *name, int type, 
1324                 unsigned int sz, void *data)
1325 {
1326         note->name = name;
1327         note->type = type;
1328         note->datasz = sz;
1329         note->data = data;
1330         return;
1331 }
1332
1333 /*
1334  * fill up all the fields in prstatus from the given task struct, except
1335  * registers which need to be filled up separately.
1336  */
1337 static void fill_prstatus(struct elf_prstatus *prstatus,
1338                 struct task_struct *p, long signr)
1339 {
1340         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1341         prstatus->pr_sigpend = p->pending.signal.sig[0];
1342         prstatus->pr_sighold = p->blocked.sig[0];
1343         rcu_read_lock();
1344         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1345         rcu_read_unlock();
1346         prstatus->pr_pid = task_pid_vnr(p);
1347         prstatus->pr_pgrp = task_pgrp_vnr(p);
1348         prstatus->pr_sid = task_session_vnr(p);
1349         if (thread_group_leader(p)) {
1350                 struct task_cputime cputime;
1351
1352                 /*
1353                  * This is the record for the group leader.  It shows the
1354                  * group-wide total, not its individual thread total.
1355                  */
1356                 thread_group_cputime(p, &cputime);
1357                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1358                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1359         } else {
1360                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1361                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1362         }
1363         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1364         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1365 }
1366
1367 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1368                        struct mm_struct *mm)
1369 {
1370         const struct cred *cred;
1371         unsigned int i, len;
1372         
1373         /* first copy the parameters from user space */
1374         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1375
1376         len = mm->arg_end - mm->arg_start;
1377         if (len >= ELF_PRARGSZ)
1378                 len = ELF_PRARGSZ-1;
1379         if (copy_from_user(&psinfo->pr_psargs,
1380                            (const char __user *)mm->arg_start, len))
1381                 return -EFAULT;
1382         for(i = 0; i < len; i++)
1383                 if (psinfo->pr_psargs[i] == 0)
1384                         psinfo->pr_psargs[i] = ' ';
1385         psinfo->pr_psargs[len] = 0;
1386
1387         rcu_read_lock();
1388         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1389         rcu_read_unlock();
1390         psinfo->pr_pid = task_pid_vnr(p);
1391         psinfo->pr_pgrp = task_pgrp_vnr(p);
1392         psinfo->pr_sid = task_session_vnr(p);
1393
1394         i = p->state ? ffz(~p->state) + 1 : 0;
1395         psinfo->pr_state = i;
1396         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1397         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1398         psinfo->pr_nice = task_nice(p);
1399         psinfo->pr_flag = p->flags;
1400         rcu_read_lock();
1401         cred = __task_cred(p);
1402         SET_UID(psinfo->pr_uid, cred->uid);
1403         SET_GID(psinfo->pr_gid, cred->gid);
1404         rcu_read_unlock();
1405         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1406         
1407         return 0;
1408 }
1409
1410 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1411 {
1412         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1413         int i = 0;
1414         do
1415                 i += 2;
1416         while (auxv[i - 2] != AT_NULL);
1417         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1418 }
1419
1420 #ifdef CORE_DUMP_USE_REGSET
1421 #include <linux/regset.h>
1422
1423 struct elf_thread_core_info {
1424         struct elf_thread_core_info *next;
1425         struct task_struct *task;
1426         struct elf_prstatus prstatus;
1427         struct memelfnote notes[0];
1428 };
1429
1430 struct elf_note_info {
1431         struct elf_thread_core_info *thread;
1432         struct memelfnote psinfo;
1433         struct memelfnote auxv;
1434         size_t size;
1435         int thread_notes;
1436 };
1437
1438 /*
1439  * When a regset has a writeback hook, we call it on each thread before
1440  * dumping user memory.  On register window machines, this makes sure the
1441  * user memory backing the register data is up to date before we read it.
1442  */
1443 static void do_thread_regset_writeback(struct task_struct *task,
1444                                        const struct user_regset *regset)
1445 {
1446         if (regset->writeback)
1447                 regset->writeback(task, regset, 1);
1448 }
1449
1450 static int fill_thread_core_info(struct elf_thread_core_info *t,
1451                                  const struct user_regset_view *view,
1452                                  long signr, size_t *total)
1453 {
1454         unsigned int i;
1455
1456         /*
1457          * NT_PRSTATUS is the one special case, because the regset data
1458          * goes into the pr_reg field inside the note contents, rather
1459          * than being the whole note contents.  We fill the reset in here.
1460          * We assume that regset 0 is NT_PRSTATUS.
1461          */
1462         fill_prstatus(&t->prstatus, t->task, signr);
1463         (void) view->regsets[0].get(t->task, &view->regsets[0],
1464                                     0, sizeof(t->prstatus.pr_reg),
1465                                     &t->prstatus.pr_reg, NULL);
1466
1467         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1468                   sizeof(t->prstatus), &t->prstatus);
1469         *total += notesize(&t->notes[0]);
1470
1471         do_thread_regset_writeback(t->task, &view->regsets[0]);
1472
1473         /*
1474          * Each other regset might generate a note too.  For each regset
1475          * that has no core_note_type or is inactive, we leave t->notes[i]
1476          * all zero and we'll know to skip writing it later.
1477          */
1478         for (i = 1; i < view->n; ++i) {
1479                 const struct user_regset *regset = &view->regsets[i];
1480                 do_thread_regset_writeback(t->task, regset);
1481                 if (regset->core_note_type &&
1482                     (!regset->active || regset->active(t->task, regset))) {
1483                         int ret;
1484                         size_t size = regset->n * regset->size;
1485                         void *data = kmalloc(size, GFP_KERNEL);
1486                         if (unlikely(!data))
1487                                 return 0;
1488                         ret = regset->get(t->task, regset,
1489                                           0, size, data, NULL);
1490                         if (unlikely(ret))
1491                                 kfree(data);
1492                         else {
1493                                 if (regset->core_note_type != NT_PRFPREG)
1494                                         fill_note(&t->notes[i], "LINUX",
1495                                                   regset->core_note_type,
1496                                                   size, data);
1497                                 else {
1498                                         t->prstatus.pr_fpvalid = 1;
1499                                         fill_note(&t->notes[i], "CORE",
1500                                                   NT_PRFPREG, size, data);
1501                                 }
1502                                 *total += notesize(&t->notes[i]);
1503                         }
1504                 }
1505         }
1506
1507         return 1;
1508 }
1509
1510 static int fill_note_info(struct elfhdr *elf, int phdrs,
1511                           struct elf_note_info *info,
1512                           long signr, struct pt_regs *regs)
1513 {
1514         struct task_struct *dump_task = current;
1515         const struct user_regset_view *view = task_user_regset_view(dump_task);
1516         struct elf_thread_core_info *t;
1517         struct elf_prpsinfo *psinfo;
1518         struct core_thread *ct;
1519         unsigned int i;
1520
1521         info->size = 0;
1522         info->thread = NULL;
1523
1524         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1525         if (psinfo == NULL)
1526                 return 0;
1527
1528         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1529
1530         /*
1531          * Figure out how many notes we're going to need for each thread.
1532          */
1533         info->thread_notes = 0;
1534         for (i = 0; i < view->n; ++i)
1535                 if (view->regsets[i].core_note_type != 0)
1536                         ++info->thread_notes;
1537
1538         /*
1539          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1540          * since it is our one special case.
1541          */
1542         if (unlikely(info->thread_notes == 0) ||
1543             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1544                 WARN_ON(1);
1545                 return 0;
1546         }
1547
1548         /*
1549          * Initialize the ELF file header.
1550          */
1551         fill_elf_header(elf, phdrs,
1552                         view->e_machine, view->e_flags, view->ei_osabi);
1553
1554         /*
1555          * Allocate a structure for each thread.
1556          */
1557         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1558                 t = kzalloc(offsetof(struct elf_thread_core_info,
1559                                      notes[info->thread_notes]),
1560                             GFP_KERNEL);
1561                 if (unlikely(!t))
1562                         return 0;
1563
1564                 t->task = ct->task;
1565                 if (ct->task == dump_task || !info->thread) {
1566                         t->next = info->thread;
1567                         info->thread = t;
1568                 } else {
1569                         /*
1570                          * Make sure to keep the original task at
1571                          * the head of the list.
1572                          */
1573                         t->next = info->thread->next;
1574                         info->thread->next = t;
1575                 }
1576         }
1577
1578         /*
1579          * Now fill in each thread's information.
1580          */
1581         for (t = info->thread; t != NULL; t = t->next)
1582                 if (!fill_thread_core_info(t, view, signr, &info->size))
1583                         return 0;
1584
1585         /*
1586          * Fill in the two process-wide notes.
1587          */
1588         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1589         info->size += notesize(&info->psinfo);
1590
1591         fill_auxv_note(&info->auxv, current->mm);
1592         info->size += notesize(&info->auxv);
1593
1594         return 1;
1595 }
1596
1597 static size_t get_note_info_size(struct elf_note_info *info)
1598 {
1599         return info->size;
1600 }
1601
1602 /*
1603  * Write all the notes for each thread.  When writing the first thread, the
1604  * process-wide notes are interleaved after the first thread-specific note.
1605  */
1606 static int write_note_info(struct elf_note_info *info,
1607                            struct file *file, loff_t *foffset)
1608 {
1609         bool first = 1;
1610         struct elf_thread_core_info *t = info->thread;
1611
1612         do {
1613                 int i;
1614
1615                 if (!writenote(&t->notes[0], file, foffset))
1616                         return 0;
1617
1618                 if (first && !writenote(&info->psinfo, file, foffset))
1619                         return 0;
1620                 if (first && !writenote(&info->auxv, file, foffset))
1621                         return 0;
1622
1623                 for (i = 1; i < info->thread_notes; ++i)
1624                         if (t->notes[i].data &&
1625                             !writenote(&t->notes[i], file, foffset))
1626                                 return 0;
1627
1628                 first = 0;
1629                 t = t->next;
1630         } while (t);
1631
1632         return 1;
1633 }
1634
1635 static void free_note_info(struct elf_note_info *info)
1636 {
1637         struct elf_thread_core_info *threads = info->thread;
1638         while (threads) {
1639                 unsigned int i;
1640                 struct elf_thread_core_info *t = threads;
1641                 threads = t->next;
1642                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1643                 for (i = 1; i < info->thread_notes; ++i)
1644                         kfree(t->notes[i].data);
1645                 kfree(t);
1646         }
1647         kfree(info->psinfo.data);
1648 }
1649
1650 #else
1651
1652 /* Here is the structure in which status of each thread is captured. */
1653 struct elf_thread_status
1654 {
1655         struct list_head list;
1656         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1657         elf_fpregset_t fpu;             /* NT_PRFPREG */
1658         struct task_struct *thread;
1659 #ifdef ELF_CORE_COPY_XFPREGS
1660         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1661 #endif
1662         struct memelfnote notes[3];
1663         int num_notes;
1664 };
1665
1666 /*
1667  * In order to add the specific thread information for the elf file format,
1668  * we need to keep a linked list of every threads pr_status and then create
1669  * a single section for them in the final core file.
1670  */
1671 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1672 {
1673         int sz = 0;
1674         struct task_struct *p = t->thread;
1675         t->num_notes = 0;
1676
1677         fill_prstatus(&t->prstatus, p, signr);
1678         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1679         
1680         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1681                   &(t->prstatus));
1682         t->num_notes++;
1683         sz += notesize(&t->notes[0]);
1684
1685         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1686                                                                 &t->fpu))) {
1687                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1688                           &(t->fpu));
1689                 t->num_notes++;
1690                 sz += notesize(&t->notes[1]);
1691         }
1692
1693 #ifdef ELF_CORE_COPY_XFPREGS
1694         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1695                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1696                           sizeof(t->xfpu), &t->xfpu);
1697                 t->num_notes++;
1698                 sz += notesize(&t->notes[2]);
1699         }
1700 #endif  
1701         return sz;
1702 }
1703
1704 struct elf_note_info {
1705         struct memelfnote *notes;
1706         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1707         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1708         struct list_head thread_list;
1709         elf_fpregset_t *fpu;
1710 #ifdef ELF_CORE_COPY_XFPREGS
1711         elf_fpxregset_t *xfpu;
1712 #endif
1713         int thread_status_size;
1714         int numnote;
1715 };
1716
1717 static int fill_note_info(struct elfhdr *elf, int phdrs,
1718                           struct elf_note_info *info,
1719                           long signr, struct pt_regs *regs)
1720 {
1721 #define NUM_NOTES       6
1722         struct list_head *t;
1723
1724         info->notes = NULL;
1725         info->prstatus = NULL;
1726         info->psinfo = NULL;
1727         info->fpu = NULL;
1728 #ifdef ELF_CORE_COPY_XFPREGS
1729         info->xfpu = NULL;
1730 #endif
1731         INIT_LIST_HEAD(&info->thread_list);
1732
1733         info->notes = kmalloc(NUM_NOTES * sizeof(struct memelfnote),
1734                               GFP_KERNEL);
1735         if (!info->notes)
1736                 return 0;
1737         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1738         if (!info->psinfo)
1739                 return 0;
1740         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1741         if (!info->prstatus)
1742                 return 0;
1743         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1744         if (!info->fpu)
1745                 return 0;
1746 #ifdef ELF_CORE_COPY_XFPREGS
1747         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1748         if (!info->xfpu)
1749                 return 0;
1750 #endif
1751
1752         info->thread_status_size = 0;
1753         if (signr) {
1754                 struct core_thread *ct;
1755                 struct elf_thread_status *ets;
1756
1757                 for (ct = current->mm->core_state->dumper.next;
1758                                                 ct; ct = ct->next) {
1759                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1760                         if (!ets)
1761                                 return 0;
1762
1763                         ets->thread = ct->task;
1764                         list_add(&ets->list, &info->thread_list);
1765                 }
1766
1767                 list_for_each(t, &info->thread_list) {
1768                         int sz;
1769
1770                         ets = list_entry(t, struct elf_thread_status, list);
1771                         sz = elf_dump_thread_status(signr, ets);
1772                         info->thread_status_size += sz;
1773                 }
1774         }
1775         /* now collect the dump for the current */
1776         memset(info->prstatus, 0, sizeof(*info->prstatus));
1777         fill_prstatus(info->prstatus, current, signr);
1778         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1779
1780         /* Set up header */
1781         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1782
1783         /*
1784          * Set up the notes in similar form to SVR4 core dumps made
1785          * with info from their /proc.
1786          */
1787
1788         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1789                   sizeof(*info->prstatus), info->prstatus);
1790         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1791         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1792                   sizeof(*info->psinfo), info->psinfo);
1793
1794         info->numnote = 2;
1795
1796         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1797
1798         /* Try to dump the FPU. */
1799         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1800                                                                info->fpu);
1801         if (info->prstatus->pr_fpvalid)
1802                 fill_note(info->notes + info->numnote++,
1803                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1804 #ifdef ELF_CORE_COPY_XFPREGS
1805         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1806                 fill_note(info->notes + info->numnote++,
1807                           "LINUX", ELF_CORE_XFPREG_TYPE,
1808                           sizeof(*info->xfpu), info->xfpu);
1809 #endif
1810
1811         return 1;
1812
1813 #undef NUM_NOTES
1814 }
1815
1816 static size_t get_note_info_size(struct elf_note_info *info)
1817 {
1818         int sz = 0;
1819         int i;
1820
1821         for (i = 0; i < info->numnote; i++)
1822                 sz += notesize(info->notes + i);
1823
1824         sz += info->thread_status_size;
1825
1826         return sz;
1827 }
1828
1829 static int write_note_info(struct elf_note_info *info,
1830                            struct file *file, loff_t *foffset)
1831 {
1832         int i;
1833         struct list_head *t;
1834
1835         for (i = 0; i < info->numnote; i++)
1836                 if (!writenote(info->notes + i, file, foffset))
1837                         return 0;
1838
1839         /* write out the thread status notes section */
1840         list_for_each(t, &info->thread_list) {
1841                 struct elf_thread_status *tmp =
1842                                 list_entry(t, struct elf_thread_status, list);
1843
1844                 for (i = 0; i < tmp->num_notes; i++)
1845                         if (!writenote(&tmp->notes[i], file, foffset))
1846                                 return 0;
1847         }
1848
1849         return 1;
1850 }
1851
1852 static void free_note_info(struct elf_note_info *info)
1853 {
1854         while (!list_empty(&info->thread_list)) {
1855                 struct list_head *tmp = info->thread_list.next;
1856                 list_del(tmp);
1857                 kfree(list_entry(tmp, struct elf_thread_status, list));
1858         }
1859
1860         kfree(info->prstatus);
1861         kfree(info->psinfo);
1862         kfree(info->notes);
1863         kfree(info->fpu);
1864 #ifdef ELF_CORE_COPY_XFPREGS
1865         kfree(info->xfpu);
1866 #endif
1867 }
1868
1869 #endif
1870
1871 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1872                                         struct vm_area_struct *gate_vma)
1873 {
1874         struct vm_area_struct *ret = tsk->mm->mmap;
1875
1876         if (ret)
1877                 return ret;
1878         return gate_vma;
1879 }
1880 /*
1881  * Helper function for iterating across a vma list.  It ensures that the caller
1882  * will visit `gate_vma' prior to terminating the search.
1883  */
1884 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1885                                         struct vm_area_struct *gate_vma)
1886 {
1887         struct vm_area_struct *ret;
1888
1889         ret = this_vma->vm_next;
1890         if (ret)
1891                 return ret;
1892         if (this_vma == gate_vma)
1893                 return NULL;
1894         return gate_vma;
1895 }
1896
1897 /*
1898  * Actual dumper
1899  *
1900  * This is a two-pass process; first we find the offsets of the bits,
1901  * and then they are actually written out.  If we run out of core limit
1902  * we just truncate.
1903  */
1904 static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
1905 {
1906         int has_dumped = 0;
1907         mm_segment_t fs;
1908         int segs;
1909         size_t size = 0;
1910         struct vm_area_struct *vma, *gate_vma;
1911         struct elfhdr *elf = NULL;
1912         loff_t offset = 0, dataoff, foffset;
1913         unsigned long mm_flags;
1914         struct elf_note_info info;
1915
1916         /*
1917          * We no longer stop all VM operations.
1918          * 
1919          * This is because those proceses that could possibly change map_count
1920          * or the mmap / vma pages are now blocked in do_exit on current
1921          * finishing this core dump.
1922          *
1923          * Only ptrace can touch these memory addresses, but it doesn't change
1924          * the map_count or the pages allocated. So no possibility of crashing
1925          * exists while dumping the mm->vm_next areas to the core file.
1926          */
1927   
1928         /* alloc memory for large data structures: too large to be on stack */
1929         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1930         if (!elf)
1931                 goto out;
1932         /*
1933          * The number of segs are recored into ELF header as 16bit value.
1934          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1935          */
1936         segs = current->mm->map_count;
1937 #ifdef ELF_CORE_EXTRA_PHDRS
1938         segs += ELF_CORE_EXTRA_PHDRS;
1939 #endif
1940
1941         gate_vma = get_gate_vma(current);
1942         if (gate_vma != NULL)
1943                 segs++;
1944
1945         /*
1946          * Collect all the non-memory information about the process for the
1947          * notes.  This also sets up the file header.
1948          */
1949         if (!fill_note_info(elf, segs + 1, /* including notes section */
1950                             &info, signr, regs))
1951                 goto cleanup;
1952
1953         has_dumped = 1;
1954         current->flags |= PF_DUMPCORE;
1955   
1956         fs = get_fs();
1957         set_fs(KERNEL_DS);
1958
1959         DUMP_WRITE(elf, sizeof(*elf));
1960         offset += sizeof(*elf);                         /* Elf header */
1961         offset += (segs + 1) * sizeof(struct elf_phdr); /* Program headers */
1962         foffset = offset;
1963
1964         /* Write notes phdr entry */
1965         {
1966                 struct elf_phdr phdr;
1967                 size_t sz = get_note_info_size(&info);
1968
1969                 sz += elf_coredump_extra_notes_size();
1970
1971                 fill_elf_note_phdr(&phdr, sz, offset);
1972                 offset += sz;
1973                 DUMP_WRITE(&phdr, sizeof(phdr));
1974         }
1975
1976         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1977
1978         /*
1979          * We must use the same mm->flags while dumping core to avoid
1980          * inconsistency between the program headers and bodies, otherwise an
1981          * unusable core file can be generated.
1982          */
1983         mm_flags = current->mm->flags;
1984
1985         /* Write program headers for segments dump */
1986         for (vma = first_vma(current, gate_vma); vma != NULL;
1987                         vma = next_vma(vma, gate_vma)) {
1988                 struct elf_phdr phdr;
1989
1990                 phdr.p_type = PT_LOAD;
1991                 phdr.p_offset = offset;
1992                 phdr.p_vaddr = vma->vm_start;
1993                 phdr.p_paddr = 0;
1994                 phdr.p_filesz = vma_dump_size(vma, mm_flags);
1995                 phdr.p_memsz = vma->vm_end - vma->vm_start;
1996                 offset += phdr.p_filesz;
1997                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
1998                 if (vma->vm_flags & VM_WRITE)
1999                         phdr.p_flags |= PF_W;
2000                 if (vma->vm_flags & VM_EXEC)
2001                         phdr.p_flags |= PF_X;
2002                 phdr.p_align = ELF_EXEC_PAGESIZE;
2003
2004                 DUMP_WRITE(&phdr, sizeof(phdr));
2005         }
2006
2007 #ifdef ELF_CORE_WRITE_EXTRA_PHDRS
2008         ELF_CORE_WRITE_EXTRA_PHDRS;
2009 #endif
2010
2011         /* write out the notes section */
2012         if (!write_note_info(&info, file, &foffset))
2013                 goto end_coredump;
2014
2015         if (elf_coredump_extra_notes_write(file, &foffset))
2016                 goto end_coredump;
2017
2018         /* Align to page */
2019         DUMP_SEEK(dataoff - foffset);
2020
2021         for (vma = first_vma(current, gate_vma); vma != NULL;
2022                         vma = next_vma(vma, gate_vma)) {
2023                 unsigned long addr;
2024                 unsigned long end;
2025
2026                 end = vma->vm_start + vma_dump_size(vma, mm_flags);
2027
2028                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2029                         struct page *page;
2030                         struct vm_area_struct *tmp_vma;
2031
2032                         if (get_user_pages(current, current->mm, addr, 1, 0, 1,
2033                                                 &page, &tmp_vma) <= 0) {
2034                                 DUMP_SEEK(PAGE_SIZE);
2035                         } else {
2036                                 if (page == ZERO_PAGE(0)) {
2037                                         if (!dump_seek(file, PAGE_SIZE)) {
2038                                                 page_cache_release(page);
2039                                                 goto end_coredump;
2040                                         }
2041                                 } else {
2042                                         void *kaddr;
2043                                         flush_cache_page(tmp_vma, addr,
2044                                                          page_to_pfn(page));
2045                                         kaddr = kmap(page);
2046                                         if ((size += PAGE_SIZE) > limit ||
2047                                             !dump_write(file, kaddr,
2048                                             PAGE_SIZE)) {
2049                                                 kunmap(page);
2050                                                 page_cache_release(page);
2051                                                 goto end_coredump;
2052                                         }
2053                                         kunmap(page);
2054                                 }
2055                                 page_cache_release(page);
2056                         }
2057                 }
2058         }
2059
2060 #ifdef ELF_CORE_WRITE_EXTRA_DATA
2061         ELF_CORE_WRITE_EXTRA_DATA;
2062 #endif
2063
2064 end_coredump:
2065         set_fs(fs);
2066
2067 cleanup:
2068         free_note_info(&info);
2069         kfree(elf);
2070 out:
2071         return has_dumped;
2072 }
2073
2074 #endif          /* USE_ELF_CORE_DUMP */
2075
2076 static int __init init_elf_binfmt(void)
2077 {
2078         return register_binfmt(&elf_format);
2079 }
2080
2081 static void __exit exit_elf_binfmt(void)
2082 {
2083         /* Remove the COFF and ELF loaders. */
2084         unregister_binfmt(&elf_format);
2085 }
2086
2087 core_initcall(init_elf_binfmt);
2088 module_exit(exit_elf_binfmt);
2089 MODULE_LICENSE("GPL");