]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/binfmt_elf.c
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
[karo-tx-linux.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <linux/sched.h>
37 #include <asm/uaccess.h>
38 #include <asm/param.h>
39 #include <asm/page.h>
40
41 #ifndef user_long_t
42 #define user_long_t long
43 #endif
44 #ifndef user_siginfo_t
45 #define user_siginfo_t siginfo_t
46 #endif
47
48 static int load_elf_binary(struct linux_binprm *bprm);
49 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
50                                 int, int, unsigned long);
51
52 #ifdef CONFIG_USELIB
53 static int load_elf_library(struct file *);
54 #else
55 #define load_elf_library NULL
56 #endif
57
58 /*
59  * If we don't support core dumping, then supply a NULL so we
60  * don't even try.
61  */
62 #ifdef CONFIG_ELF_CORE
63 static int elf_core_dump(struct coredump_params *cprm);
64 #else
65 #define elf_core_dump   NULL
66 #endif
67
68 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
69 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
70 #else
71 #define ELF_MIN_ALIGN   PAGE_SIZE
72 #endif
73
74 #ifndef ELF_CORE_EFLAGS
75 #define ELF_CORE_EFLAGS 0
76 #endif
77
78 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
79 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
80 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
81
82 static struct linux_binfmt elf_format = {
83         .module         = THIS_MODULE,
84         .load_binary    = load_elf_binary,
85         .load_shlib     = load_elf_library,
86         .core_dump      = elf_core_dump,
87         .min_coredump   = ELF_EXEC_PAGESIZE,
88 };
89
90 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
91
92 static int set_brk(unsigned long start, unsigned long end)
93 {
94         start = ELF_PAGEALIGN(start);
95         end = ELF_PAGEALIGN(end);
96         if (end > start) {
97                 unsigned long addr;
98                 addr = vm_brk(start, end - start);
99                 if (BAD_ADDR(addr))
100                         return addr;
101         }
102         current->mm->start_brk = current->mm->brk = end;
103         return 0;
104 }
105
106 /* We need to explicitly zero any fractional pages
107    after the data section (i.e. bss).  This would
108    contain the junk from the file that should not
109    be in memory
110  */
111 static int padzero(unsigned long elf_bss)
112 {
113         unsigned long nbyte;
114
115         nbyte = ELF_PAGEOFFSET(elf_bss);
116         if (nbyte) {
117                 nbyte = ELF_MIN_ALIGN - nbyte;
118                 if (clear_user((void __user *) elf_bss, nbyte))
119                         return -EFAULT;
120         }
121         return 0;
122 }
123
124 /* Let's use some macros to make this stack manipulation a little clearer */
125 #ifdef CONFIG_STACK_GROWSUP
126 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
127 #define STACK_ROUND(sp, items) \
128         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
129 #define STACK_ALLOC(sp, len) ({ \
130         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
131         old_sp; })
132 #else
133 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
134 #define STACK_ROUND(sp, items) \
135         (((unsigned long) (sp - items)) &~ 15UL)
136 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
137 #endif
138
139 #ifndef ELF_BASE_PLATFORM
140 /*
141  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
142  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
143  * will be copied to the user stack in the same manner as AT_PLATFORM.
144  */
145 #define ELF_BASE_PLATFORM NULL
146 #endif
147
148 static int
149 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
150                 unsigned long load_addr, unsigned long interp_load_addr)
151 {
152         unsigned long p = bprm->p;
153         int argc = bprm->argc;
154         int envc = bprm->envc;
155         elf_addr_t __user *argv;
156         elf_addr_t __user *envp;
157         elf_addr_t __user *sp;
158         elf_addr_t __user *u_platform;
159         elf_addr_t __user *u_base_platform;
160         elf_addr_t __user *u_rand_bytes;
161         const char *k_platform = ELF_PLATFORM;
162         const char *k_base_platform = ELF_BASE_PLATFORM;
163         unsigned char k_rand_bytes[16];
164         int items;
165         elf_addr_t *elf_info;
166         int ei_index = 0;
167         const struct cred *cred = current_cred();
168         struct vm_area_struct *vma;
169
170         /*
171          * In some cases (e.g. Hyper-Threading), we want to avoid L1
172          * evictions by the processes running on the same package. One
173          * thing we can do is to shuffle the initial stack for them.
174          */
175
176         p = arch_align_stack(p);
177
178         /*
179          * If this architecture has a platform capability string, copy it
180          * to userspace.  In some cases (Sparc), this info is impossible
181          * for userspace to get any other way, in others (i386) it is
182          * merely difficult.
183          */
184         u_platform = NULL;
185         if (k_platform) {
186                 size_t len = strlen(k_platform) + 1;
187
188                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189                 if (__copy_to_user(u_platform, k_platform, len))
190                         return -EFAULT;
191         }
192
193         /*
194          * If this architecture has a "base" platform capability
195          * string, copy it to userspace.
196          */
197         u_base_platform = NULL;
198         if (k_base_platform) {
199                 size_t len = strlen(k_base_platform) + 1;
200
201                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
202                 if (__copy_to_user(u_base_platform, k_base_platform, len))
203                         return -EFAULT;
204         }
205
206         /*
207          * Generate 16 random bytes for userspace PRNG seeding.
208          */
209         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
210         u_rand_bytes = (elf_addr_t __user *)
211                        STACK_ALLOC(p, sizeof(k_rand_bytes));
212         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
213                 return -EFAULT;
214
215         /* Create the ELF interpreter info */
216         elf_info = (elf_addr_t *)current->mm->saved_auxv;
217         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
218 #define NEW_AUX_ENT(id, val) \
219         do { \
220                 elf_info[ei_index++] = id; \
221                 elf_info[ei_index++] = val; \
222         } while (0)
223
224 #ifdef ARCH_DLINFO
225         /* 
226          * ARCH_DLINFO must come first so PPC can do its special alignment of
227          * AUXV.
228          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
229          * ARCH_DLINFO changes
230          */
231         ARCH_DLINFO;
232 #endif
233         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
234         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
235         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
236         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
237         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
238         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
239         NEW_AUX_ENT(AT_BASE, interp_load_addr);
240         NEW_AUX_ENT(AT_FLAGS, 0);
241         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
242         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
243         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
244         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
245         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
246         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
247         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
248 #ifdef ELF_HWCAP2
249         NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
250 #endif
251         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
252         if (k_platform) {
253                 NEW_AUX_ENT(AT_PLATFORM,
254                             (elf_addr_t)(unsigned long)u_platform);
255         }
256         if (k_base_platform) {
257                 NEW_AUX_ENT(AT_BASE_PLATFORM,
258                             (elf_addr_t)(unsigned long)u_base_platform);
259         }
260         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
261                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
262         }
263 #undef NEW_AUX_ENT
264         /* AT_NULL is zero; clear the rest too */
265         memset(&elf_info[ei_index], 0,
266                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
267
268         /* And advance past the AT_NULL entry.  */
269         ei_index += 2;
270
271         sp = STACK_ADD(p, ei_index);
272
273         items = (argc + 1) + (envc + 1) + 1;
274         bprm->p = STACK_ROUND(sp, items);
275
276         /* Point sp at the lowest address on the stack */
277 #ifdef CONFIG_STACK_GROWSUP
278         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
279         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
280 #else
281         sp = (elf_addr_t __user *)bprm->p;
282 #endif
283
284
285         /*
286          * Grow the stack manually; some architectures have a limit on how
287          * far ahead a user-space access may be in order to grow the stack.
288          */
289         vma = find_extend_vma(current->mm, bprm->p);
290         if (!vma)
291                 return -EFAULT;
292
293         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
294         if (__put_user(argc, sp++))
295                 return -EFAULT;
296         argv = sp;
297         envp = argv + argc + 1;
298
299         /* Populate argv and envp */
300         p = current->mm->arg_end = current->mm->arg_start;
301         while (argc-- > 0) {
302                 size_t len;
303                 if (__put_user((elf_addr_t)p, argv++))
304                         return -EFAULT;
305                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
306                 if (!len || len > MAX_ARG_STRLEN)
307                         return -EINVAL;
308                 p += len;
309         }
310         if (__put_user(0, argv))
311                 return -EFAULT;
312         current->mm->arg_end = current->mm->env_start = p;
313         while (envc-- > 0) {
314                 size_t len;
315                 if (__put_user((elf_addr_t)p, envp++))
316                         return -EFAULT;
317                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
318                 if (!len || len > MAX_ARG_STRLEN)
319                         return -EINVAL;
320                 p += len;
321         }
322         if (__put_user(0, envp))
323                 return -EFAULT;
324         current->mm->env_end = p;
325
326         /* Put the elf_info on the stack in the right place.  */
327         sp = (elf_addr_t __user *)envp + 1;
328         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
329                 return -EFAULT;
330         return 0;
331 }
332
333 #ifndef elf_map
334
335 static unsigned long elf_map(struct file *filep, unsigned long addr,
336                 struct elf_phdr *eppnt, int prot, int type,
337                 unsigned long total_size)
338 {
339         unsigned long map_addr;
340         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
341         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
342         addr = ELF_PAGESTART(addr);
343         size = ELF_PAGEALIGN(size);
344
345         /* mmap() will return -EINVAL if given a zero size, but a
346          * segment with zero filesize is perfectly valid */
347         if (!size)
348                 return addr;
349
350         /*
351         * total_size is the size of the ELF (interpreter) image.
352         * The _first_ mmap needs to know the full size, otherwise
353         * randomization might put this image into an overlapping
354         * position with the ELF binary image. (since size < total_size)
355         * So we first map the 'big' image - and unmap the remainder at
356         * the end. (which unmap is needed for ELF images with holes.)
357         */
358         if (total_size) {
359                 total_size = ELF_PAGEALIGN(total_size);
360                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
361                 if (!BAD_ADDR(map_addr))
362                         vm_munmap(map_addr+size, total_size-size);
363         } else
364                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
365
366         return(map_addr);
367 }
368
369 #endif /* !elf_map */
370
371 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
372 {
373         int i, first_idx = -1, last_idx = -1;
374
375         for (i = 0; i < nr; i++) {
376                 if (cmds[i].p_type == PT_LOAD) {
377                         last_idx = i;
378                         if (first_idx == -1)
379                                 first_idx = i;
380                 }
381         }
382         if (first_idx == -1)
383                 return 0;
384
385         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
386                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
387 }
388
389
390 /* This is much more generalized than the library routine read function,
391    so we keep this separate.  Technically the library read function
392    is only provided so that we can read a.out libraries that have
393    an ELF header */
394
395 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
396                 struct file *interpreter, unsigned long *interp_map_addr,
397                 unsigned long no_base)
398 {
399         struct elf_phdr *elf_phdata;
400         struct elf_phdr *eppnt;
401         unsigned long load_addr = 0;
402         int load_addr_set = 0;
403         unsigned long last_bss = 0, elf_bss = 0;
404         unsigned long error = ~0UL;
405         unsigned long total_size;
406         int retval, i, size;
407
408         /* First of all, some simple consistency checks */
409         if (interp_elf_ex->e_type != ET_EXEC &&
410             interp_elf_ex->e_type != ET_DYN)
411                 goto out;
412         if (!elf_check_arch(interp_elf_ex))
413                 goto out;
414         if (!interpreter->f_op->mmap)
415                 goto out;
416
417         /*
418          * If the size of this structure has changed, then punt, since
419          * we will be doing the wrong thing.
420          */
421         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
422                 goto out;
423         if (interp_elf_ex->e_phnum < 1 ||
424                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
425                 goto out;
426
427         /* Now read in all of the header information */
428         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
429         if (size > ELF_MIN_ALIGN)
430                 goto out;
431         elf_phdata = kmalloc(size, GFP_KERNEL);
432         if (!elf_phdata)
433                 goto out;
434
435         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
436                              (char *)elf_phdata, size);
437         error = -EIO;
438         if (retval != size) {
439                 if (retval < 0)
440                         error = retval; 
441                 goto out_close;
442         }
443
444         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
445         if (!total_size) {
446                 error = -EINVAL;
447                 goto out_close;
448         }
449
450         eppnt = elf_phdata;
451         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
452                 if (eppnt->p_type == PT_LOAD) {
453                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
454                         int elf_prot = 0;
455                         unsigned long vaddr = 0;
456                         unsigned long k, map_addr;
457
458                         if (eppnt->p_flags & PF_R)
459                                 elf_prot = PROT_READ;
460                         if (eppnt->p_flags & PF_W)
461                                 elf_prot |= PROT_WRITE;
462                         if (eppnt->p_flags & PF_X)
463                                 elf_prot |= PROT_EXEC;
464                         vaddr = eppnt->p_vaddr;
465                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
466                                 elf_type |= MAP_FIXED;
467                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
468                                 load_addr = -vaddr;
469
470                         map_addr = elf_map(interpreter, load_addr + vaddr,
471                                         eppnt, elf_prot, elf_type, total_size);
472                         total_size = 0;
473                         if (!*interp_map_addr)
474                                 *interp_map_addr = map_addr;
475                         error = map_addr;
476                         if (BAD_ADDR(map_addr))
477                                 goto out_close;
478
479                         if (!load_addr_set &&
480                             interp_elf_ex->e_type == ET_DYN) {
481                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
482                                 load_addr_set = 1;
483                         }
484
485                         /*
486                          * Check to see if the section's size will overflow the
487                          * allowed task size. Note that p_filesz must always be
488                          * <= p_memsize so it's only necessary to check p_memsz.
489                          */
490                         k = load_addr + eppnt->p_vaddr;
491                         if (BAD_ADDR(k) ||
492                             eppnt->p_filesz > eppnt->p_memsz ||
493                             eppnt->p_memsz > TASK_SIZE ||
494                             TASK_SIZE - eppnt->p_memsz < k) {
495                                 error = -ENOMEM;
496                                 goto out_close;
497                         }
498
499                         /*
500                          * Find the end of the file mapping for this phdr, and
501                          * keep track of the largest address we see for this.
502                          */
503                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
504                         if (k > elf_bss)
505                                 elf_bss = k;
506
507                         /*
508                          * Do the same thing for the memory mapping - between
509                          * elf_bss and last_bss is the bss section.
510                          */
511                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
512                         if (k > last_bss)
513                                 last_bss = k;
514                 }
515         }
516
517         if (last_bss > elf_bss) {
518                 /*
519                  * Now fill out the bss section.  First pad the last page up
520                  * to the page boundary, and then perform a mmap to make sure
521                  * that there are zero-mapped pages up to and including the
522                  * last bss page.
523                  */
524                 if (padzero(elf_bss)) {
525                         error = -EFAULT;
526                         goto out_close;
527                 }
528
529                 /* What we have mapped so far */
530                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
531
532                 /* Map the last of the bss segment */
533                 error = vm_brk(elf_bss, last_bss - elf_bss);
534                 if (BAD_ADDR(error))
535                         goto out_close;
536         }
537
538         error = load_addr;
539
540 out_close:
541         kfree(elf_phdata);
542 out:
543         return error;
544 }
545
546 /*
547  * These are the functions used to load ELF style executables and shared
548  * libraries.  There is no binary dependent code anywhere else.
549  */
550
551 #ifndef STACK_RND_MASK
552 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
553 #endif
554
555 static unsigned long randomize_stack_top(unsigned long stack_top)
556 {
557         unsigned int random_variable = 0;
558
559         if ((current->flags & PF_RANDOMIZE) &&
560                 !(current->personality & ADDR_NO_RANDOMIZE)) {
561                 random_variable = get_random_int() & STACK_RND_MASK;
562                 random_variable <<= PAGE_SHIFT;
563         }
564 #ifdef CONFIG_STACK_GROWSUP
565         return PAGE_ALIGN(stack_top) + random_variable;
566 #else
567         return PAGE_ALIGN(stack_top) - random_variable;
568 #endif
569 }
570
571 static int load_elf_binary(struct linux_binprm *bprm)
572 {
573         struct file *interpreter = NULL; /* to shut gcc up */
574         unsigned long load_addr = 0, load_bias = 0;
575         int load_addr_set = 0;
576         char * elf_interpreter = NULL;
577         unsigned long error;
578         struct elf_phdr *elf_ppnt, *elf_phdata;
579         unsigned long elf_bss, elf_brk;
580         int retval, i;
581         unsigned int size;
582         unsigned long elf_entry;
583         unsigned long interp_load_addr = 0;
584         unsigned long start_code, end_code, start_data, end_data;
585         unsigned long reloc_func_desc __maybe_unused = 0;
586         int executable_stack = EXSTACK_DEFAULT;
587         struct pt_regs *regs = current_pt_regs();
588         struct {
589                 struct elfhdr elf_ex;
590                 struct elfhdr interp_elf_ex;
591         } *loc;
592
593         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
594         if (!loc) {
595                 retval = -ENOMEM;
596                 goto out_ret;
597         }
598         
599         /* Get the exec-header */
600         loc->elf_ex = *((struct elfhdr *)bprm->buf);
601
602         retval = -ENOEXEC;
603         /* First of all, some simple consistency checks */
604         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
605                 goto out;
606
607         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
608                 goto out;
609         if (!elf_check_arch(&loc->elf_ex))
610                 goto out;
611         if (!bprm->file->f_op->mmap)
612                 goto out;
613
614         /* Now read in all of the header information */
615         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
616                 goto out;
617         if (loc->elf_ex.e_phnum < 1 ||
618                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
619                 goto out;
620         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
621         retval = -ENOMEM;
622         elf_phdata = kmalloc(size, GFP_KERNEL);
623         if (!elf_phdata)
624                 goto out;
625
626         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
627                              (char *)elf_phdata, size);
628         if (retval != size) {
629                 if (retval >= 0)
630                         retval = -EIO;
631                 goto out_free_ph;
632         }
633
634         elf_ppnt = elf_phdata;
635         elf_bss = 0;
636         elf_brk = 0;
637
638         start_code = ~0UL;
639         end_code = 0;
640         start_data = 0;
641         end_data = 0;
642
643         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
644                 if (elf_ppnt->p_type == PT_INTERP) {
645                         /* This is the program interpreter used for
646                          * shared libraries - for now assume that this
647                          * is an a.out format binary
648                          */
649                         retval = -ENOEXEC;
650                         if (elf_ppnt->p_filesz > PATH_MAX || 
651                             elf_ppnt->p_filesz < 2)
652                                 goto out_free_ph;
653
654                         retval = -ENOMEM;
655                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
656                                                   GFP_KERNEL);
657                         if (!elf_interpreter)
658                                 goto out_free_ph;
659
660                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
661                                              elf_interpreter,
662                                              elf_ppnt->p_filesz);
663                         if (retval != elf_ppnt->p_filesz) {
664                                 if (retval >= 0)
665                                         retval = -EIO;
666                                 goto out_free_interp;
667                         }
668                         /* make sure path is NULL terminated */
669                         retval = -ENOEXEC;
670                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
671                                 goto out_free_interp;
672
673                         interpreter = open_exec(elf_interpreter);
674                         retval = PTR_ERR(interpreter);
675                         if (IS_ERR(interpreter))
676                                 goto out_free_interp;
677
678                         /*
679                          * If the binary is not readable then enforce
680                          * mm->dumpable = 0 regardless of the interpreter's
681                          * permissions.
682                          */
683                         would_dump(bprm, interpreter);
684
685                         retval = kernel_read(interpreter, 0, bprm->buf,
686                                              BINPRM_BUF_SIZE);
687                         if (retval != BINPRM_BUF_SIZE) {
688                                 if (retval >= 0)
689                                         retval = -EIO;
690                                 goto out_free_dentry;
691                         }
692
693                         /* Get the exec headers */
694                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
695                         break;
696                 }
697                 elf_ppnt++;
698         }
699
700         elf_ppnt = elf_phdata;
701         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
702                 if (elf_ppnt->p_type == PT_GNU_STACK) {
703                         if (elf_ppnt->p_flags & PF_X)
704                                 executable_stack = EXSTACK_ENABLE_X;
705                         else
706                                 executable_stack = EXSTACK_DISABLE_X;
707                         break;
708                 }
709
710         /* Some simple consistency checks for the interpreter */
711         if (elf_interpreter) {
712                 retval = -ELIBBAD;
713                 /* Not an ELF interpreter */
714                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
715                         goto out_free_dentry;
716                 /* Verify the interpreter has a valid arch */
717                 if (!elf_check_arch(&loc->interp_elf_ex))
718                         goto out_free_dentry;
719         }
720
721         /* Flush all traces of the currently running executable */
722         retval = flush_old_exec(bprm);
723         if (retval)
724                 goto out_free_dentry;
725
726         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
727            may depend on the personality.  */
728         SET_PERSONALITY(loc->elf_ex);
729         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
730                 current->personality |= READ_IMPLIES_EXEC;
731
732         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
733                 current->flags |= PF_RANDOMIZE;
734
735         setup_new_exec(bprm);
736
737         /* Do this so that we can load the interpreter, if need be.  We will
738            change some of these later */
739         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
740                                  executable_stack);
741         if (retval < 0) {
742                 send_sig(SIGKILL, current, 0);
743                 goto out_free_dentry;
744         }
745         
746         current->mm->start_stack = bprm->p;
747
748         /* Now we do a little grungy work by mmapping the ELF image into
749            the correct location in memory. */
750         for(i = 0, elf_ppnt = elf_phdata;
751             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
752                 int elf_prot = 0, elf_flags;
753                 unsigned long k, vaddr;
754
755                 if (elf_ppnt->p_type != PT_LOAD)
756                         continue;
757
758                 if (unlikely (elf_brk > elf_bss)) {
759                         unsigned long nbyte;
760                     
761                         /* There was a PT_LOAD segment with p_memsz > p_filesz
762                            before this one. Map anonymous pages, if needed,
763                            and clear the area.  */
764                         retval = set_brk(elf_bss + load_bias,
765                                          elf_brk + load_bias);
766                         if (retval) {
767                                 send_sig(SIGKILL, current, 0);
768                                 goto out_free_dentry;
769                         }
770                         nbyte = ELF_PAGEOFFSET(elf_bss);
771                         if (nbyte) {
772                                 nbyte = ELF_MIN_ALIGN - nbyte;
773                                 if (nbyte > elf_brk - elf_bss)
774                                         nbyte = elf_brk - elf_bss;
775                                 if (clear_user((void __user *)elf_bss +
776                                                         load_bias, nbyte)) {
777                                         /*
778                                          * This bss-zeroing can fail if the ELF
779                                          * file specifies odd protections. So
780                                          * we don't check the return value
781                                          */
782                                 }
783                         }
784                 }
785
786                 if (elf_ppnt->p_flags & PF_R)
787                         elf_prot |= PROT_READ;
788                 if (elf_ppnt->p_flags & PF_W)
789                         elf_prot |= PROT_WRITE;
790                 if (elf_ppnt->p_flags & PF_X)
791                         elf_prot |= PROT_EXEC;
792
793                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
794
795                 vaddr = elf_ppnt->p_vaddr;
796                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
797                         elf_flags |= MAP_FIXED;
798                 } else if (loc->elf_ex.e_type == ET_DYN) {
799                         /* Try and get dynamic programs out of the way of the
800                          * default mmap base, as well as whatever program they
801                          * might try to exec.  This is because the brk will
802                          * follow the loader, and is not movable.  */
803 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
804                         /* Memory randomization might have been switched off
805                          * in runtime via sysctl or explicit setting of
806                          * personality flags.
807                          * If that is the case, retain the original non-zero
808                          * load_bias value in order to establish proper
809                          * non-randomized mappings.
810                          */
811                         if (current->flags & PF_RANDOMIZE)
812                                 load_bias = 0;
813                         else
814                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
815 #else
816                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
817 #endif
818                 }
819
820                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
821                                 elf_prot, elf_flags, 0);
822                 if (BAD_ADDR(error)) {
823                         send_sig(SIGKILL, current, 0);
824                         retval = IS_ERR((void *)error) ?
825                                 PTR_ERR((void*)error) : -EINVAL;
826                         goto out_free_dentry;
827                 }
828
829                 if (!load_addr_set) {
830                         load_addr_set = 1;
831                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
832                         if (loc->elf_ex.e_type == ET_DYN) {
833                                 load_bias += error -
834                                              ELF_PAGESTART(load_bias + vaddr);
835                                 load_addr += load_bias;
836                                 reloc_func_desc = load_bias;
837                         }
838                 }
839                 k = elf_ppnt->p_vaddr;
840                 if (k < start_code)
841                         start_code = k;
842                 if (start_data < k)
843                         start_data = k;
844
845                 /*
846                  * Check to see if the section's size will overflow the
847                  * allowed task size. Note that p_filesz must always be
848                  * <= p_memsz so it is only necessary to check p_memsz.
849                  */
850                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
851                     elf_ppnt->p_memsz > TASK_SIZE ||
852                     TASK_SIZE - elf_ppnt->p_memsz < k) {
853                         /* set_brk can never work. Avoid overflows. */
854                         send_sig(SIGKILL, current, 0);
855                         retval = -EINVAL;
856                         goto out_free_dentry;
857                 }
858
859                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
860
861                 if (k > elf_bss)
862                         elf_bss = k;
863                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
864                         end_code = k;
865                 if (end_data < k)
866                         end_data = k;
867                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
868                 if (k > elf_brk)
869                         elf_brk = k;
870         }
871
872         loc->elf_ex.e_entry += load_bias;
873         elf_bss += load_bias;
874         elf_brk += load_bias;
875         start_code += load_bias;
876         end_code += load_bias;
877         start_data += load_bias;
878         end_data += load_bias;
879
880         /* Calling set_brk effectively mmaps the pages that we need
881          * for the bss and break sections.  We must do this before
882          * mapping in the interpreter, to make sure it doesn't wind
883          * up getting placed where the bss needs to go.
884          */
885         retval = set_brk(elf_bss, elf_brk);
886         if (retval) {
887                 send_sig(SIGKILL, current, 0);
888                 goto out_free_dentry;
889         }
890         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
891                 send_sig(SIGSEGV, current, 0);
892                 retval = -EFAULT; /* Nobody gets to see this, but.. */
893                 goto out_free_dentry;
894         }
895
896         if (elf_interpreter) {
897                 unsigned long interp_map_addr = 0;
898
899                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
900                                             interpreter,
901                                             &interp_map_addr,
902                                             load_bias);
903                 if (!IS_ERR((void *)elf_entry)) {
904                         /*
905                          * load_elf_interp() returns relocation
906                          * adjustment
907                          */
908                         interp_load_addr = elf_entry;
909                         elf_entry += loc->interp_elf_ex.e_entry;
910                 }
911                 if (BAD_ADDR(elf_entry)) {
912                         force_sig(SIGSEGV, current);
913                         retval = IS_ERR((void *)elf_entry) ?
914                                         (int)elf_entry : -EINVAL;
915                         goto out_free_dentry;
916                 }
917                 reloc_func_desc = interp_load_addr;
918
919                 allow_write_access(interpreter);
920                 fput(interpreter);
921                 kfree(elf_interpreter);
922         } else {
923                 elf_entry = loc->elf_ex.e_entry;
924                 if (BAD_ADDR(elf_entry)) {
925                         force_sig(SIGSEGV, current);
926                         retval = -EINVAL;
927                         goto out_free_dentry;
928                 }
929         }
930
931         kfree(elf_phdata);
932
933         set_binfmt(&elf_format);
934
935 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
936         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
937         if (retval < 0) {
938                 send_sig(SIGKILL, current, 0);
939                 goto out;
940         }
941 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
942
943         install_exec_creds(bprm);
944         retval = create_elf_tables(bprm, &loc->elf_ex,
945                           load_addr, interp_load_addr);
946         if (retval < 0) {
947                 send_sig(SIGKILL, current, 0);
948                 goto out;
949         }
950         /* N.B. passed_fileno might not be initialized? */
951         current->mm->end_code = end_code;
952         current->mm->start_code = start_code;
953         current->mm->start_data = start_data;
954         current->mm->end_data = end_data;
955         current->mm->start_stack = bprm->p;
956
957 #ifdef arch_randomize_brk
958         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
959                 current->mm->brk = current->mm->start_brk =
960                         arch_randomize_brk(current->mm);
961 #ifdef CONFIG_COMPAT_BRK
962                 current->brk_randomized = 1;
963 #endif
964         }
965 #endif
966
967         if (current->personality & MMAP_PAGE_ZERO) {
968                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
969                    and some applications "depend" upon this behavior.
970                    Since we do not have the power to recompile these, we
971                    emulate the SVr4 behavior. Sigh. */
972                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
973                                 MAP_FIXED | MAP_PRIVATE, 0);
974         }
975
976 #ifdef ELF_PLAT_INIT
977         /*
978          * The ABI may specify that certain registers be set up in special
979          * ways (on i386 %edx is the address of a DT_FINI function, for
980          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
981          * that the e_entry field is the address of the function descriptor
982          * for the startup routine, rather than the address of the startup
983          * routine itself.  This macro performs whatever initialization to
984          * the regs structure is required as well as any relocations to the
985          * function descriptor entries when executing dynamically links apps.
986          */
987         ELF_PLAT_INIT(regs, reloc_func_desc);
988 #endif
989
990         start_thread(regs, elf_entry, bprm->p);
991         retval = 0;
992 out:
993         kfree(loc);
994 out_ret:
995         return retval;
996
997         /* error cleanup */
998 out_free_dentry:
999         allow_write_access(interpreter);
1000         if (interpreter)
1001                 fput(interpreter);
1002 out_free_interp:
1003         kfree(elf_interpreter);
1004 out_free_ph:
1005         kfree(elf_phdata);
1006         goto out;
1007 }
1008
1009 #ifdef CONFIG_USELIB
1010 /* This is really simpleminded and specialized - we are loading an
1011    a.out library that is given an ELF header. */
1012 static int load_elf_library(struct file *file)
1013 {
1014         struct elf_phdr *elf_phdata;
1015         struct elf_phdr *eppnt;
1016         unsigned long elf_bss, bss, len;
1017         int retval, error, i, j;
1018         struct elfhdr elf_ex;
1019
1020         error = -ENOEXEC;
1021         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1022         if (retval != sizeof(elf_ex))
1023                 goto out;
1024
1025         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1026                 goto out;
1027
1028         /* First of all, some simple consistency checks */
1029         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1030             !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1031                 goto out;
1032
1033         /* Now read in all of the header information */
1034
1035         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1036         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1037
1038         error = -ENOMEM;
1039         elf_phdata = kmalloc(j, GFP_KERNEL);
1040         if (!elf_phdata)
1041                 goto out;
1042
1043         eppnt = elf_phdata;
1044         error = -ENOEXEC;
1045         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1046         if (retval != j)
1047                 goto out_free_ph;
1048
1049         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1050                 if ((eppnt + i)->p_type == PT_LOAD)
1051                         j++;
1052         if (j != 1)
1053                 goto out_free_ph;
1054
1055         while (eppnt->p_type != PT_LOAD)
1056                 eppnt++;
1057
1058         /* Now use mmap to map the library into memory. */
1059         error = vm_mmap(file,
1060                         ELF_PAGESTART(eppnt->p_vaddr),
1061                         (eppnt->p_filesz +
1062                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1063                         PROT_READ | PROT_WRITE | PROT_EXEC,
1064                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1065                         (eppnt->p_offset -
1066                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1067         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1068                 goto out_free_ph;
1069
1070         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1071         if (padzero(elf_bss)) {
1072                 error = -EFAULT;
1073                 goto out_free_ph;
1074         }
1075
1076         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1077                             ELF_MIN_ALIGN - 1);
1078         bss = eppnt->p_memsz + eppnt->p_vaddr;
1079         if (bss > len)
1080                 vm_brk(len, bss - len);
1081         error = 0;
1082
1083 out_free_ph:
1084         kfree(elf_phdata);
1085 out:
1086         return error;
1087 }
1088 #endif /* #ifdef CONFIG_USELIB */
1089
1090 #ifdef CONFIG_ELF_CORE
1091 /*
1092  * ELF core dumper
1093  *
1094  * Modelled on fs/exec.c:aout_core_dump()
1095  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1096  */
1097
1098 /*
1099  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1100  * that are useful for post-mortem analysis are included in every core dump.
1101  * In that way we ensure that the core dump is fully interpretable later
1102  * without matching up the same kernel and hardware config to see what PC values
1103  * meant. These special mappings include - vDSO, vsyscall, and other
1104  * architecture specific mappings
1105  */
1106 static bool always_dump_vma(struct vm_area_struct *vma)
1107 {
1108         /* Any vsyscall mappings? */
1109         if (vma == get_gate_vma(vma->vm_mm))
1110                 return true;
1111
1112         /*
1113          * Assume that all vmas with a .name op should always be dumped.
1114          * If this changes, a new vm_ops field can easily be added.
1115          */
1116         if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1117                 return true;
1118
1119         /*
1120          * arch_vma_name() returns non-NULL for special architecture mappings,
1121          * such as vDSO sections.
1122          */
1123         if (arch_vma_name(vma))
1124                 return true;
1125
1126         return false;
1127 }
1128
1129 /*
1130  * Decide what to dump of a segment, part, all or none.
1131  */
1132 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1133                                    unsigned long mm_flags)
1134 {
1135 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1136
1137         /* always dump the vdso and vsyscall sections */
1138         if (always_dump_vma(vma))
1139                 goto whole;
1140
1141         if (vma->vm_flags & VM_DONTDUMP)
1142                 return 0;
1143
1144         /* Hugetlb memory check */
1145         if (vma->vm_flags & VM_HUGETLB) {
1146                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1147                         goto whole;
1148                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1149                         goto whole;
1150                 return 0;
1151         }
1152
1153         /* Do not dump I/O mapped devices or special mappings */
1154         if (vma->vm_flags & VM_IO)
1155                 return 0;
1156
1157         /* By default, dump shared memory if mapped from an anonymous file. */
1158         if (vma->vm_flags & VM_SHARED) {
1159                 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1160                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1161                         goto whole;
1162                 return 0;
1163         }
1164
1165         /* Dump segments that have been written to.  */
1166         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1167                 goto whole;
1168         if (vma->vm_file == NULL)
1169                 return 0;
1170
1171         if (FILTER(MAPPED_PRIVATE))
1172                 goto whole;
1173
1174         /*
1175          * If this looks like the beginning of a DSO or executable mapping,
1176          * check for an ELF header.  If we find one, dump the first page to
1177          * aid in determining what was mapped here.
1178          */
1179         if (FILTER(ELF_HEADERS) &&
1180             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1181                 u32 __user *header = (u32 __user *) vma->vm_start;
1182                 u32 word;
1183                 mm_segment_t fs = get_fs();
1184                 /*
1185                  * Doing it this way gets the constant folded by GCC.
1186                  */
1187                 union {
1188                         u32 cmp;
1189                         char elfmag[SELFMAG];
1190                 } magic;
1191                 BUILD_BUG_ON(SELFMAG != sizeof word);
1192                 magic.elfmag[EI_MAG0] = ELFMAG0;
1193                 magic.elfmag[EI_MAG1] = ELFMAG1;
1194                 magic.elfmag[EI_MAG2] = ELFMAG2;
1195                 magic.elfmag[EI_MAG3] = ELFMAG3;
1196                 /*
1197                  * Switch to the user "segment" for get_user(),
1198                  * then put back what elf_core_dump() had in place.
1199                  */
1200                 set_fs(USER_DS);
1201                 if (unlikely(get_user(word, header)))
1202                         word = 0;
1203                 set_fs(fs);
1204                 if (word == magic.cmp)
1205                         return PAGE_SIZE;
1206         }
1207
1208 #undef  FILTER
1209
1210         return 0;
1211
1212 whole:
1213         return vma->vm_end - vma->vm_start;
1214 }
1215
1216 /* An ELF note in memory */
1217 struct memelfnote
1218 {
1219         const char *name;
1220         int type;
1221         unsigned int datasz;
1222         void *data;
1223 };
1224
1225 static int notesize(struct memelfnote *en)
1226 {
1227         int sz;
1228
1229         sz = sizeof(struct elf_note);
1230         sz += roundup(strlen(en->name) + 1, 4);
1231         sz += roundup(en->datasz, 4);
1232
1233         return sz;
1234 }
1235
1236 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1237 {
1238         struct elf_note en;
1239         en.n_namesz = strlen(men->name) + 1;
1240         en.n_descsz = men->datasz;
1241         en.n_type = men->type;
1242
1243         return dump_emit(cprm, &en, sizeof(en)) &&
1244             dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1245             dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1246 }
1247
1248 static void fill_elf_header(struct elfhdr *elf, int segs,
1249                             u16 machine, u32 flags)
1250 {
1251         memset(elf, 0, sizeof(*elf));
1252
1253         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1254         elf->e_ident[EI_CLASS] = ELF_CLASS;
1255         elf->e_ident[EI_DATA] = ELF_DATA;
1256         elf->e_ident[EI_VERSION] = EV_CURRENT;
1257         elf->e_ident[EI_OSABI] = ELF_OSABI;
1258
1259         elf->e_type = ET_CORE;
1260         elf->e_machine = machine;
1261         elf->e_version = EV_CURRENT;
1262         elf->e_phoff = sizeof(struct elfhdr);
1263         elf->e_flags = flags;
1264         elf->e_ehsize = sizeof(struct elfhdr);
1265         elf->e_phentsize = sizeof(struct elf_phdr);
1266         elf->e_phnum = segs;
1267
1268         return;
1269 }
1270
1271 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1272 {
1273         phdr->p_type = PT_NOTE;
1274         phdr->p_offset = offset;
1275         phdr->p_vaddr = 0;
1276         phdr->p_paddr = 0;
1277         phdr->p_filesz = sz;
1278         phdr->p_memsz = 0;
1279         phdr->p_flags = 0;
1280         phdr->p_align = 0;
1281         return;
1282 }
1283
1284 static void fill_note(struct memelfnote *note, const char *name, int type, 
1285                 unsigned int sz, void *data)
1286 {
1287         note->name = name;
1288         note->type = type;
1289         note->datasz = sz;
1290         note->data = data;
1291         return;
1292 }
1293
1294 /*
1295  * fill up all the fields in prstatus from the given task struct, except
1296  * registers which need to be filled up separately.
1297  */
1298 static void fill_prstatus(struct elf_prstatus *prstatus,
1299                 struct task_struct *p, long signr)
1300 {
1301         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1302         prstatus->pr_sigpend = p->pending.signal.sig[0];
1303         prstatus->pr_sighold = p->blocked.sig[0];
1304         rcu_read_lock();
1305         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1306         rcu_read_unlock();
1307         prstatus->pr_pid = task_pid_vnr(p);
1308         prstatus->pr_pgrp = task_pgrp_vnr(p);
1309         prstatus->pr_sid = task_session_vnr(p);
1310         if (thread_group_leader(p)) {
1311                 struct task_cputime cputime;
1312
1313                 /*
1314                  * This is the record for the group leader.  It shows the
1315                  * group-wide total, not its individual thread total.
1316                  */
1317                 thread_group_cputime(p, &cputime);
1318                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1319                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1320         } else {
1321                 cputime_t utime, stime;
1322
1323                 task_cputime(p, &utime, &stime);
1324                 cputime_to_timeval(utime, &prstatus->pr_utime);
1325                 cputime_to_timeval(stime, &prstatus->pr_stime);
1326         }
1327         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1328         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1329 }
1330
1331 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1332                        struct mm_struct *mm)
1333 {
1334         const struct cred *cred;
1335         unsigned int i, len;
1336         
1337         /* first copy the parameters from user space */
1338         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1339
1340         len = mm->arg_end - mm->arg_start;
1341         if (len >= ELF_PRARGSZ)
1342                 len = ELF_PRARGSZ-1;
1343         if (copy_from_user(&psinfo->pr_psargs,
1344                            (const char __user *)mm->arg_start, len))
1345                 return -EFAULT;
1346         for(i = 0; i < len; i++)
1347                 if (psinfo->pr_psargs[i] == 0)
1348                         psinfo->pr_psargs[i] = ' ';
1349         psinfo->pr_psargs[len] = 0;
1350
1351         rcu_read_lock();
1352         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1353         rcu_read_unlock();
1354         psinfo->pr_pid = task_pid_vnr(p);
1355         psinfo->pr_pgrp = task_pgrp_vnr(p);
1356         psinfo->pr_sid = task_session_vnr(p);
1357
1358         i = p->state ? ffz(~p->state) + 1 : 0;
1359         psinfo->pr_state = i;
1360         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1361         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1362         psinfo->pr_nice = task_nice(p);
1363         psinfo->pr_flag = p->flags;
1364         rcu_read_lock();
1365         cred = __task_cred(p);
1366         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1367         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1368         rcu_read_unlock();
1369         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1370         
1371         return 0;
1372 }
1373
1374 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1375 {
1376         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1377         int i = 0;
1378         do
1379                 i += 2;
1380         while (auxv[i - 2] != AT_NULL);
1381         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1382 }
1383
1384 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1385                 const siginfo_t *siginfo)
1386 {
1387         mm_segment_t old_fs = get_fs();
1388         set_fs(KERNEL_DS);
1389         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1390         set_fs(old_fs);
1391         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1392 }
1393
1394 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1395 /*
1396  * Format of NT_FILE note:
1397  *
1398  * long count     -- how many files are mapped
1399  * long page_size -- units for file_ofs
1400  * array of [COUNT] elements of
1401  *   long start
1402  *   long end
1403  *   long file_ofs
1404  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1405  */
1406 static int fill_files_note(struct memelfnote *note)
1407 {
1408         struct vm_area_struct *vma;
1409         unsigned count, size, names_ofs, remaining, n;
1410         user_long_t *data;
1411         user_long_t *start_end_ofs;
1412         char *name_base, *name_curpos;
1413
1414         /* *Estimated* file count and total data size needed */
1415         count = current->mm->map_count;
1416         size = count * 64;
1417
1418         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1419  alloc:
1420         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1421                 return -EINVAL;
1422         size = round_up(size, PAGE_SIZE);
1423         data = vmalloc(size);
1424         if (!data)
1425                 return -ENOMEM;
1426
1427         start_end_ofs = data + 2;
1428         name_base = name_curpos = ((char *)data) + names_ofs;
1429         remaining = size - names_ofs;
1430         count = 0;
1431         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1432                 struct file *file;
1433                 const char *filename;
1434
1435                 file = vma->vm_file;
1436                 if (!file)
1437                         continue;
1438                 filename = d_path(&file->f_path, name_curpos, remaining);
1439                 if (IS_ERR(filename)) {
1440                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1441                                 vfree(data);
1442                                 size = size * 5 / 4;
1443                                 goto alloc;
1444                         }
1445                         continue;
1446                 }
1447
1448                 /* d_path() fills at the end, move name down */
1449                 /* n = strlen(filename) + 1: */
1450                 n = (name_curpos + remaining) - filename;
1451                 remaining = filename - name_curpos;
1452                 memmove(name_curpos, filename, n);
1453                 name_curpos += n;
1454
1455                 *start_end_ofs++ = vma->vm_start;
1456                 *start_end_ofs++ = vma->vm_end;
1457                 *start_end_ofs++ = vma->vm_pgoff;
1458                 count++;
1459         }
1460
1461         /* Now we know exact count of files, can store it */
1462         data[0] = count;
1463         data[1] = PAGE_SIZE;
1464         /*
1465          * Count usually is less than current->mm->map_count,
1466          * we need to move filenames down.
1467          */
1468         n = current->mm->map_count - count;
1469         if (n != 0) {
1470                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1471                 memmove(name_base - shift_bytes, name_base,
1472                         name_curpos - name_base);
1473                 name_curpos -= shift_bytes;
1474         }
1475
1476         size = name_curpos - (char *)data;
1477         fill_note(note, "CORE", NT_FILE, size, data);
1478         return 0;
1479 }
1480
1481 #ifdef CORE_DUMP_USE_REGSET
1482 #include <linux/regset.h>
1483
1484 struct elf_thread_core_info {
1485         struct elf_thread_core_info *next;
1486         struct task_struct *task;
1487         struct elf_prstatus prstatus;
1488         struct memelfnote notes[0];
1489 };
1490
1491 struct elf_note_info {
1492         struct elf_thread_core_info *thread;
1493         struct memelfnote psinfo;
1494         struct memelfnote signote;
1495         struct memelfnote auxv;
1496         struct memelfnote files;
1497         user_siginfo_t csigdata;
1498         size_t size;
1499         int thread_notes;
1500 };
1501
1502 /*
1503  * When a regset has a writeback hook, we call it on each thread before
1504  * dumping user memory.  On register window machines, this makes sure the
1505  * user memory backing the register data is up to date before we read it.
1506  */
1507 static void do_thread_regset_writeback(struct task_struct *task,
1508                                        const struct user_regset *regset)
1509 {
1510         if (regset->writeback)
1511                 regset->writeback(task, regset, 1);
1512 }
1513
1514 #ifndef PR_REG_SIZE
1515 #define PR_REG_SIZE(S) sizeof(S)
1516 #endif
1517
1518 #ifndef PRSTATUS_SIZE
1519 #define PRSTATUS_SIZE(S) sizeof(S)
1520 #endif
1521
1522 #ifndef PR_REG_PTR
1523 #define PR_REG_PTR(S) (&((S)->pr_reg))
1524 #endif
1525
1526 #ifndef SET_PR_FPVALID
1527 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1528 #endif
1529
1530 static int fill_thread_core_info(struct elf_thread_core_info *t,
1531                                  const struct user_regset_view *view,
1532                                  long signr, size_t *total)
1533 {
1534         unsigned int i;
1535
1536         /*
1537          * NT_PRSTATUS is the one special case, because the regset data
1538          * goes into the pr_reg field inside the note contents, rather
1539          * than being the whole note contents.  We fill the reset in here.
1540          * We assume that regset 0 is NT_PRSTATUS.
1541          */
1542         fill_prstatus(&t->prstatus, t->task, signr);
1543         (void) view->regsets[0].get(t->task, &view->regsets[0],
1544                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1545                                     PR_REG_PTR(&t->prstatus), NULL);
1546
1547         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1548                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1549         *total += notesize(&t->notes[0]);
1550
1551         do_thread_regset_writeback(t->task, &view->regsets[0]);
1552
1553         /*
1554          * Each other regset might generate a note too.  For each regset
1555          * that has no core_note_type or is inactive, we leave t->notes[i]
1556          * all zero and we'll know to skip writing it later.
1557          */
1558         for (i = 1; i < view->n; ++i) {
1559                 const struct user_regset *regset = &view->regsets[i];
1560                 do_thread_regset_writeback(t->task, regset);
1561                 if (regset->core_note_type && regset->get &&
1562                     (!regset->active || regset->active(t->task, regset))) {
1563                         int ret;
1564                         size_t size = regset->n * regset->size;
1565                         void *data = kmalloc(size, GFP_KERNEL);
1566                         if (unlikely(!data))
1567                                 return 0;
1568                         ret = regset->get(t->task, regset,
1569                                           0, size, data, NULL);
1570                         if (unlikely(ret))
1571                                 kfree(data);
1572                         else {
1573                                 if (regset->core_note_type != NT_PRFPREG)
1574                                         fill_note(&t->notes[i], "LINUX",
1575                                                   regset->core_note_type,
1576                                                   size, data);
1577                                 else {
1578                                         SET_PR_FPVALID(&t->prstatus, 1);
1579                                         fill_note(&t->notes[i], "CORE",
1580                                                   NT_PRFPREG, size, data);
1581                                 }
1582                                 *total += notesize(&t->notes[i]);
1583                         }
1584                 }
1585         }
1586
1587         return 1;
1588 }
1589
1590 static int fill_note_info(struct elfhdr *elf, int phdrs,
1591                           struct elf_note_info *info,
1592                           const siginfo_t *siginfo, struct pt_regs *regs)
1593 {
1594         struct task_struct *dump_task = current;
1595         const struct user_regset_view *view = task_user_regset_view(dump_task);
1596         struct elf_thread_core_info *t;
1597         struct elf_prpsinfo *psinfo;
1598         struct core_thread *ct;
1599         unsigned int i;
1600
1601         info->size = 0;
1602         info->thread = NULL;
1603
1604         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1605         if (psinfo == NULL) {
1606                 info->psinfo.data = NULL; /* So we don't free this wrongly */
1607                 return 0;
1608         }
1609
1610         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1611
1612         /*
1613          * Figure out how many notes we're going to need for each thread.
1614          */
1615         info->thread_notes = 0;
1616         for (i = 0; i < view->n; ++i)
1617                 if (view->regsets[i].core_note_type != 0)
1618                         ++info->thread_notes;
1619
1620         /*
1621          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1622          * since it is our one special case.
1623          */
1624         if (unlikely(info->thread_notes == 0) ||
1625             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1626                 WARN_ON(1);
1627                 return 0;
1628         }
1629
1630         /*
1631          * Initialize the ELF file header.
1632          */
1633         fill_elf_header(elf, phdrs,
1634                         view->e_machine, view->e_flags);
1635
1636         /*
1637          * Allocate a structure for each thread.
1638          */
1639         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1640                 t = kzalloc(offsetof(struct elf_thread_core_info,
1641                                      notes[info->thread_notes]),
1642                             GFP_KERNEL);
1643                 if (unlikely(!t))
1644                         return 0;
1645
1646                 t->task = ct->task;
1647                 if (ct->task == dump_task || !info->thread) {
1648                         t->next = info->thread;
1649                         info->thread = t;
1650                 } else {
1651                         /*
1652                          * Make sure to keep the original task at
1653                          * the head of the list.
1654                          */
1655                         t->next = info->thread->next;
1656                         info->thread->next = t;
1657                 }
1658         }
1659
1660         /*
1661          * Now fill in each thread's information.
1662          */
1663         for (t = info->thread; t != NULL; t = t->next)
1664                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1665                         return 0;
1666
1667         /*
1668          * Fill in the two process-wide notes.
1669          */
1670         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1671         info->size += notesize(&info->psinfo);
1672
1673         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1674         info->size += notesize(&info->signote);
1675
1676         fill_auxv_note(&info->auxv, current->mm);
1677         info->size += notesize(&info->auxv);
1678
1679         if (fill_files_note(&info->files) == 0)
1680                 info->size += notesize(&info->files);
1681
1682         return 1;
1683 }
1684
1685 static size_t get_note_info_size(struct elf_note_info *info)
1686 {
1687         return info->size;
1688 }
1689
1690 /*
1691  * Write all the notes for each thread.  When writing the first thread, the
1692  * process-wide notes are interleaved after the first thread-specific note.
1693  */
1694 static int write_note_info(struct elf_note_info *info,
1695                            struct coredump_params *cprm)
1696 {
1697         bool first = true;
1698         struct elf_thread_core_info *t = info->thread;
1699
1700         do {
1701                 int i;
1702
1703                 if (!writenote(&t->notes[0], cprm))
1704                         return 0;
1705
1706                 if (first && !writenote(&info->psinfo, cprm))
1707                         return 0;
1708                 if (first && !writenote(&info->signote, cprm))
1709                         return 0;
1710                 if (first && !writenote(&info->auxv, cprm))
1711                         return 0;
1712                 if (first && info->files.data &&
1713                                 !writenote(&info->files, cprm))
1714                         return 0;
1715
1716                 for (i = 1; i < info->thread_notes; ++i)
1717                         if (t->notes[i].data &&
1718                             !writenote(&t->notes[i], cprm))
1719                                 return 0;
1720
1721                 first = false;
1722                 t = t->next;
1723         } while (t);
1724
1725         return 1;
1726 }
1727
1728 static void free_note_info(struct elf_note_info *info)
1729 {
1730         struct elf_thread_core_info *threads = info->thread;
1731         while (threads) {
1732                 unsigned int i;
1733                 struct elf_thread_core_info *t = threads;
1734                 threads = t->next;
1735                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1736                 for (i = 1; i < info->thread_notes; ++i)
1737                         kfree(t->notes[i].data);
1738                 kfree(t);
1739         }
1740         kfree(info->psinfo.data);
1741         vfree(info->files.data);
1742 }
1743
1744 #else
1745
1746 /* Here is the structure in which status of each thread is captured. */
1747 struct elf_thread_status
1748 {
1749         struct list_head list;
1750         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1751         elf_fpregset_t fpu;             /* NT_PRFPREG */
1752         struct task_struct *thread;
1753 #ifdef ELF_CORE_COPY_XFPREGS
1754         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1755 #endif
1756         struct memelfnote notes[3];
1757         int num_notes;
1758 };
1759
1760 /*
1761  * In order to add the specific thread information for the elf file format,
1762  * we need to keep a linked list of every threads pr_status and then create
1763  * a single section for them in the final core file.
1764  */
1765 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1766 {
1767         int sz = 0;
1768         struct task_struct *p = t->thread;
1769         t->num_notes = 0;
1770
1771         fill_prstatus(&t->prstatus, p, signr);
1772         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1773         
1774         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1775                   &(t->prstatus));
1776         t->num_notes++;
1777         sz += notesize(&t->notes[0]);
1778
1779         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1780                                                                 &t->fpu))) {
1781                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1782                           &(t->fpu));
1783                 t->num_notes++;
1784                 sz += notesize(&t->notes[1]);
1785         }
1786
1787 #ifdef ELF_CORE_COPY_XFPREGS
1788         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1789                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1790                           sizeof(t->xfpu), &t->xfpu);
1791                 t->num_notes++;
1792                 sz += notesize(&t->notes[2]);
1793         }
1794 #endif  
1795         return sz;
1796 }
1797
1798 struct elf_note_info {
1799         struct memelfnote *notes;
1800         struct memelfnote *notes_files;
1801         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1802         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1803         struct list_head thread_list;
1804         elf_fpregset_t *fpu;
1805 #ifdef ELF_CORE_COPY_XFPREGS
1806         elf_fpxregset_t *xfpu;
1807 #endif
1808         user_siginfo_t csigdata;
1809         int thread_status_size;
1810         int numnote;
1811 };
1812
1813 static int elf_note_info_init(struct elf_note_info *info)
1814 {
1815         memset(info, 0, sizeof(*info));
1816         INIT_LIST_HEAD(&info->thread_list);
1817
1818         /* Allocate space for ELF notes */
1819         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1820         if (!info->notes)
1821                 return 0;
1822         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1823         if (!info->psinfo)
1824                 return 0;
1825         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1826         if (!info->prstatus)
1827                 return 0;
1828         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1829         if (!info->fpu)
1830                 return 0;
1831 #ifdef ELF_CORE_COPY_XFPREGS
1832         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1833         if (!info->xfpu)
1834                 return 0;
1835 #endif
1836         return 1;
1837 }
1838
1839 static int fill_note_info(struct elfhdr *elf, int phdrs,
1840                           struct elf_note_info *info,
1841                           const siginfo_t *siginfo, struct pt_regs *regs)
1842 {
1843         struct list_head *t;
1844         struct core_thread *ct;
1845         struct elf_thread_status *ets;
1846
1847         if (!elf_note_info_init(info))
1848                 return 0;
1849
1850         for (ct = current->mm->core_state->dumper.next;
1851                                         ct; ct = ct->next) {
1852                 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1853                 if (!ets)
1854                         return 0;
1855
1856                 ets->thread = ct->task;
1857                 list_add(&ets->list, &info->thread_list);
1858         }
1859
1860         list_for_each(t, &info->thread_list) {
1861                 int sz;
1862
1863                 ets = list_entry(t, struct elf_thread_status, list);
1864                 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1865                 info->thread_status_size += sz;
1866         }
1867         /* now collect the dump for the current */
1868         memset(info->prstatus, 0, sizeof(*info->prstatus));
1869         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1870         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1871
1872         /* Set up header */
1873         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1874
1875         /*
1876          * Set up the notes in similar form to SVR4 core dumps made
1877          * with info from their /proc.
1878          */
1879
1880         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1881                   sizeof(*info->prstatus), info->prstatus);
1882         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1883         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1884                   sizeof(*info->psinfo), info->psinfo);
1885
1886         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1887         fill_auxv_note(info->notes + 3, current->mm);
1888         info->numnote = 4;
1889
1890         if (fill_files_note(info->notes + info->numnote) == 0) {
1891                 info->notes_files = info->notes + info->numnote;
1892                 info->numnote++;
1893         }
1894
1895         /* Try to dump the FPU. */
1896         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1897                                                                info->fpu);
1898         if (info->prstatus->pr_fpvalid)
1899                 fill_note(info->notes + info->numnote++,
1900                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1901 #ifdef ELF_CORE_COPY_XFPREGS
1902         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1903                 fill_note(info->notes + info->numnote++,
1904                           "LINUX", ELF_CORE_XFPREG_TYPE,
1905                           sizeof(*info->xfpu), info->xfpu);
1906 #endif
1907
1908         return 1;
1909 }
1910
1911 static size_t get_note_info_size(struct elf_note_info *info)
1912 {
1913         int sz = 0;
1914         int i;
1915
1916         for (i = 0; i < info->numnote; i++)
1917                 sz += notesize(info->notes + i);
1918
1919         sz += info->thread_status_size;
1920
1921         return sz;
1922 }
1923
1924 static int write_note_info(struct elf_note_info *info,
1925                            struct coredump_params *cprm)
1926 {
1927         int i;
1928         struct list_head *t;
1929
1930         for (i = 0; i < info->numnote; i++)
1931                 if (!writenote(info->notes + i, cprm))
1932                         return 0;
1933
1934         /* write out the thread status notes section */
1935         list_for_each(t, &info->thread_list) {
1936                 struct elf_thread_status *tmp =
1937                                 list_entry(t, struct elf_thread_status, list);
1938
1939                 for (i = 0; i < tmp->num_notes; i++)
1940                         if (!writenote(&tmp->notes[i], cprm))
1941                                 return 0;
1942         }
1943
1944         return 1;
1945 }
1946
1947 static void free_note_info(struct elf_note_info *info)
1948 {
1949         while (!list_empty(&info->thread_list)) {
1950                 struct list_head *tmp = info->thread_list.next;
1951                 list_del(tmp);
1952                 kfree(list_entry(tmp, struct elf_thread_status, list));
1953         }
1954
1955         /* Free data possibly allocated by fill_files_note(): */
1956         if (info->notes_files)
1957                 vfree(info->notes_files->data);
1958
1959         kfree(info->prstatus);
1960         kfree(info->psinfo);
1961         kfree(info->notes);
1962         kfree(info->fpu);
1963 #ifdef ELF_CORE_COPY_XFPREGS
1964         kfree(info->xfpu);
1965 #endif
1966 }
1967
1968 #endif
1969
1970 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1971                                         struct vm_area_struct *gate_vma)
1972 {
1973         struct vm_area_struct *ret = tsk->mm->mmap;
1974
1975         if (ret)
1976                 return ret;
1977         return gate_vma;
1978 }
1979 /*
1980  * Helper function for iterating across a vma list.  It ensures that the caller
1981  * will visit `gate_vma' prior to terminating the search.
1982  */
1983 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1984                                         struct vm_area_struct *gate_vma)
1985 {
1986         struct vm_area_struct *ret;
1987
1988         ret = this_vma->vm_next;
1989         if (ret)
1990                 return ret;
1991         if (this_vma == gate_vma)
1992                 return NULL;
1993         return gate_vma;
1994 }
1995
1996 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1997                              elf_addr_t e_shoff, int segs)
1998 {
1999         elf->e_shoff = e_shoff;
2000         elf->e_shentsize = sizeof(*shdr4extnum);
2001         elf->e_shnum = 1;
2002         elf->e_shstrndx = SHN_UNDEF;
2003
2004         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2005
2006         shdr4extnum->sh_type = SHT_NULL;
2007         shdr4extnum->sh_size = elf->e_shnum;
2008         shdr4extnum->sh_link = elf->e_shstrndx;
2009         shdr4extnum->sh_info = segs;
2010 }
2011
2012 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2013                                      unsigned long mm_flags)
2014 {
2015         struct vm_area_struct *vma;
2016         size_t size = 0;
2017
2018         for (vma = first_vma(current, gate_vma); vma != NULL;
2019              vma = next_vma(vma, gate_vma))
2020                 size += vma_dump_size(vma, mm_flags);
2021         return size;
2022 }
2023
2024 /*
2025  * Actual dumper
2026  *
2027  * This is a two-pass process; first we find the offsets of the bits,
2028  * and then they are actually written out.  If we run out of core limit
2029  * we just truncate.
2030  */
2031 static int elf_core_dump(struct coredump_params *cprm)
2032 {
2033         int has_dumped = 0;
2034         mm_segment_t fs;
2035         int segs;
2036         struct vm_area_struct *vma, *gate_vma;
2037         struct elfhdr *elf = NULL;
2038         loff_t offset = 0, dataoff;
2039         struct elf_note_info info = { };
2040         struct elf_phdr *phdr4note = NULL;
2041         struct elf_shdr *shdr4extnum = NULL;
2042         Elf_Half e_phnum;
2043         elf_addr_t e_shoff;
2044
2045         /*
2046          * We no longer stop all VM operations.
2047          * 
2048          * This is because those proceses that could possibly change map_count
2049          * or the mmap / vma pages are now blocked in do_exit on current
2050          * finishing this core dump.
2051          *
2052          * Only ptrace can touch these memory addresses, but it doesn't change
2053          * the map_count or the pages allocated. So no possibility of crashing
2054          * exists while dumping the mm->vm_next areas to the core file.
2055          */
2056   
2057         /* alloc memory for large data structures: too large to be on stack */
2058         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2059         if (!elf)
2060                 goto out;
2061         /*
2062          * The number of segs are recored into ELF header as 16bit value.
2063          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2064          */
2065         segs = current->mm->map_count;
2066         segs += elf_core_extra_phdrs();
2067
2068         gate_vma = get_gate_vma(current->mm);
2069         if (gate_vma != NULL)
2070                 segs++;
2071
2072         /* for notes section */
2073         segs++;
2074
2075         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2076          * this, kernel supports extended numbering. Have a look at
2077          * include/linux/elf.h for further information. */
2078         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2079
2080         /*
2081          * Collect all the non-memory information about the process for the
2082          * notes.  This also sets up the file header.
2083          */
2084         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2085                 goto cleanup;
2086
2087         has_dumped = 1;
2088
2089         fs = get_fs();
2090         set_fs(KERNEL_DS);
2091
2092         offset += sizeof(*elf);                         /* Elf header */
2093         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2094
2095         /* Write notes phdr entry */
2096         {
2097                 size_t sz = get_note_info_size(&info);
2098
2099                 sz += elf_coredump_extra_notes_size();
2100
2101                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2102                 if (!phdr4note)
2103                         goto end_coredump;
2104
2105                 fill_elf_note_phdr(phdr4note, sz, offset);
2106                 offset += sz;
2107         }
2108
2109         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2110
2111         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2112         offset += elf_core_extra_data_size();
2113         e_shoff = offset;
2114
2115         if (e_phnum == PN_XNUM) {
2116                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2117                 if (!shdr4extnum)
2118                         goto end_coredump;
2119                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2120         }
2121
2122         offset = dataoff;
2123
2124         if (!dump_emit(cprm, elf, sizeof(*elf)))
2125                 goto end_coredump;
2126
2127         if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2128                 goto end_coredump;
2129
2130         /* Write program headers for segments dump */
2131         for (vma = first_vma(current, gate_vma); vma != NULL;
2132                         vma = next_vma(vma, gate_vma)) {
2133                 struct elf_phdr phdr;
2134
2135                 phdr.p_type = PT_LOAD;
2136                 phdr.p_offset = offset;
2137                 phdr.p_vaddr = vma->vm_start;
2138                 phdr.p_paddr = 0;
2139                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2140                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2141                 offset += phdr.p_filesz;
2142                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2143                 if (vma->vm_flags & VM_WRITE)
2144                         phdr.p_flags |= PF_W;
2145                 if (vma->vm_flags & VM_EXEC)
2146                         phdr.p_flags |= PF_X;
2147                 phdr.p_align = ELF_EXEC_PAGESIZE;
2148
2149                 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2150                         goto end_coredump;
2151         }
2152
2153         if (!elf_core_write_extra_phdrs(cprm, offset))
2154                 goto end_coredump;
2155
2156         /* write out the notes section */
2157         if (!write_note_info(&info, cprm))
2158                 goto end_coredump;
2159
2160         if (elf_coredump_extra_notes_write(cprm))
2161                 goto end_coredump;
2162
2163         /* Align to page */
2164         if (!dump_skip(cprm, dataoff - cprm->written))
2165                 goto end_coredump;
2166
2167         for (vma = first_vma(current, gate_vma); vma != NULL;
2168                         vma = next_vma(vma, gate_vma)) {
2169                 unsigned long addr;
2170                 unsigned long end;
2171
2172                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2173
2174                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2175                         struct page *page;
2176                         int stop;
2177
2178                         page = get_dump_page(addr);
2179                         if (page) {
2180                                 void *kaddr = kmap(page);
2181                                 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2182                                 kunmap(page);
2183                                 page_cache_release(page);
2184                         } else
2185                                 stop = !dump_skip(cprm, PAGE_SIZE);
2186                         if (stop)
2187                                 goto end_coredump;
2188                 }
2189         }
2190
2191         if (!elf_core_write_extra_data(cprm))
2192                 goto end_coredump;
2193
2194         if (e_phnum == PN_XNUM) {
2195                 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2196                         goto end_coredump;
2197         }
2198
2199 end_coredump:
2200         set_fs(fs);
2201
2202 cleanup:
2203         free_note_info(&info);
2204         kfree(shdr4extnum);
2205         kfree(phdr4note);
2206         kfree(elf);
2207 out:
2208         return has_dumped;
2209 }
2210
2211 #endif          /* CONFIG_ELF_CORE */
2212
2213 static int __init init_elf_binfmt(void)
2214 {
2215         register_binfmt(&elf_format);
2216         return 0;
2217 }
2218
2219 static void __exit exit_elf_binfmt(void)
2220 {
2221         /* Remove the COFF and ELF loaders. */
2222         unregister_binfmt(&elf_format);
2223 }
2224
2225 core_initcall(init_elf_binfmt);
2226 module_exit(exit_elf_binfmt);
2227 MODULE_LICENSE("GPL");