]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/binfmt_elf.c
binfmt_elf: fix corner case kfree of uninitialized data
[karo-tx-linux.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/utsname.h>
35 #include <linux/coredump.h>
36 #include <asm/uaccess.h>
37 #include <asm/param.h>
38 #include <asm/page.h>
39
40 #ifndef user_long_t
41 #define user_long_t long
42 #endif
43 #ifndef user_siginfo_t
44 #define user_siginfo_t siginfo_t
45 #endif
46
47 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
48 static int load_elf_library(struct file *);
49 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
50                                 int, int, unsigned long);
51
52 /*
53  * If we don't support core dumping, then supply a NULL so we
54  * don't even try.
55  */
56 #ifdef CONFIG_ELF_CORE
57 static int elf_core_dump(struct coredump_params *cprm);
58 #else
59 #define elf_core_dump   NULL
60 #endif
61
62 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
63 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
64 #else
65 #define ELF_MIN_ALIGN   PAGE_SIZE
66 #endif
67
68 #ifndef ELF_CORE_EFLAGS
69 #define ELF_CORE_EFLAGS 0
70 #endif
71
72 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
73 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
74 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
75
76 static struct linux_binfmt elf_format = {
77         .module         = THIS_MODULE,
78         .load_binary    = load_elf_binary,
79         .load_shlib     = load_elf_library,
80         .core_dump      = elf_core_dump,
81         .min_coredump   = ELF_EXEC_PAGESIZE,
82 };
83
84 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
85
86 static int set_brk(unsigned long start, unsigned long end)
87 {
88         start = ELF_PAGEALIGN(start);
89         end = ELF_PAGEALIGN(end);
90         if (end > start) {
91                 unsigned long addr;
92                 addr = vm_brk(start, end - start);
93                 if (BAD_ADDR(addr))
94                         return addr;
95         }
96         current->mm->start_brk = current->mm->brk = end;
97         return 0;
98 }
99
100 /* We need to explicitly zero any fractional pages
101    after the data section (i.e. bss).  This would
102    contain the junk from the file that should not
103    be in memory
104  */
105 static int padzero(unsigned long elf_bss)
106 {
107         unsigned long nbyte;
108
109         nbyte = ELF_PAGEOFFSET(elf_bss);
110         if (nbyte) {
111                 nbyte = ELF_MIN_ALIGN - nbyte;
112                 if (clear_user((void __user *) elf_bss, nbyte))
113                         return -EFAULT;
114         }
115         return 0;
116 }
117
118 /* Let's use some macros to make this stack manipulation a little clearer */
119 #ifdef CONFIG_STACK_GROWSUP
120 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
121 #define STACK_ROUND(sp, items) \
122         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
123 #define STACK_ALLOC(sp, len) ({ \
124         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
125         old_sp; })
126 #else
127 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
128 #define STACK_ROUND(sp, items) \
129         (((unsigned long) (sp - items)) &~ 15UL)
130 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
131 #endif
132
133 #ifndef ELF_BASE_PLATFORM
134 /*
135  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
136  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
137  * will be copied to the user stack in the same manner as AT_PLATFORM.
138  */
139 #define ELF_BASE_PLATFORM NULL
140 #endif
141
142 static int
143 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
144                 unsigned long load_addr, unsigned long interp_load_addr)
145 {
146         unsigned long p = bprm->p;
147         int argc = bprm->argc;
148         int envc = bprm->envc;
149         elf_addr_t __user *argv;
150         elf_addr_t __user *envp;
151         elf_addr_t __user *sp;
152         elf_addr_t __user *u_platform;
153         elf_addr_t __user *u_base_platform;
154         elf_addr_t __user *u_rand_bytes;
155         const char *k_platform = ELF_PLATFORM;
156         const char *k_base_platform = ELF_BASE_PLATFORM;
157         unsigned char k_rand_bytes[16];
158         int items;
159         elf_addr_t *elf_info;
160         int ei_index = 0;
161         const struct cred *cred = current_cred();
162         struct vm_area_struct *vma;
163
164         /*
165          * In some cases (e.g. Hyper-Threading), we want to avoid L1
166          * evictions by the processes running on the same package. One
167          * thing we can do is to shuffle the initial stack for them.
168          */
169
170         p = arch_align_stack(p);
171
172         /*
173          * If this architecture has a platform capability string, copy it
174          * to userspace.  In some cases (Sparc), this info is impossible
175          * for userspace to get any other way, in others (i386) it is
176          * merely difficult.
177          */
178         u_platform = NULL;
179         if (k_platform) {
180                 size_t len = strlen(k_platform) + 1;
181
182                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
183                 if (__copy_to_user(u_platform, k_platform, len))
184                         return -EFAULT;
185         }
186
187         /*
188          * If this architecture has a "base" platform capability
189          * string, copy it to userspace.
190          */
191         u_base_platform = NULL;
192         if (k_base_platform) {
193                 size_t len = strlen(k_base_platform) + 1;
194
195                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
196                 if (__copy_to_user(u_base_platform, k_base_platform, len))
197                         return -EFAULT;
198         }
199
200         /*
201          * Generate 16 random bytes for userspace PRNG seeding.
202          */
203         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
204         u_rand_bytes = (elf_addr_t __user *)
205                        STACK_ALLOC(p, sizeof(k_rand_bytes));
206         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
207                 return -EFAULT;
208
209         /* Create the ELF interpreter info */
210         elf_info = (elf_addr_t *)current->mm->saved_auxv;
211         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
212 #define NEW_AUX_ENT(id, val) \
213         do { \
214                 elf_info[ei_index++] = id; \
215                 elf_info[ei_index++] = val; \
216         } while (0)
217
218 #ifdef ARCH_DLINFO
219         /* 
220          * ARCH_DLINFO must come first so PPC can do its special alignment of
221          * AUXV.
222          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
223          * ARCH_DLINFO changes
224          */
225         ARCH_DLINFO;
226 #endif
227         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
228         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
229         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
230         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
231         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
232         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
233         NEW_AUX_ENT(AT_BASE, interp_load_addr);
234         NEW_AUX_ENT(AT_FLAGS, 0);
235         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
236         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
237         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
238         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
239         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
240         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
241         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
242         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
243         if (k_platform) {
244                 NEW_AUX_ENT(AT_PLATFORM,
245                             (elf_addr_t)(unsigned long)u_platform);
246         }
247         if (k_base_platform) {
248                 NEW_AUX_ENT(AT_BASE_PLATFORM,
249                             (elf_addr_t)(unsigned long)u_base_platform);
250         }
251         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
252                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
253         }
254 #undef NEW_AUX_ENT
255         /* AT_NULL is zero; clear the rest too */
256         memset(&elf_info[ei_index], 0,
257                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
258
259         /* And advance past the AT_NULL entry.  */
260         ei_index += 2;
261
262         sp = STACK_ADD(p, ei_index);
263
264         items = (argc + 1) + (envc + 1) + 1;
265         bprm->p = STACK_ROUND(sp, items);
266
267         /* Point sp at the lowest address on the stack */
268 #ifdef CONFIG_STACK_GROWSUP
269         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
270         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
271 #else
272         sp = (elf_addr_t __user *)bprm->p;
273 #endif
274
275
276         /*
277          * Grow the stack manually; some architectures have a limit on how
278          * far ahead a user-space access may be in order to grow the stack.
279          */
280         vma = find_extend_vma(current->mm, bprm->p);
281         if (!vma)
282                 return -EFAULT;
283
284         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
285         if (__put_user(argc, sp++))
286                 return -EFAULT;
287         argv = sp;
288         envp = argv + argc + 1;
289
290         /* Populate argv and envp */
291         p = current->mm->arg_end = current->mm->arg_start;
292         while (argc-- > 0) {
293                 size_t len;
294                 if (__put_user((elf_addr_t)p, argv++))
295                         return -EFAULT;
296                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
297                 if (!len || len > MAX_ARG_STRLEN)
298                         return -EINVAL;
299                 p += len;
300         }
301         if (__put_user(0, argv))
302                 return -EFAULT;
303         current->mm->arg_end = current->mm->env_start = p;
304         while (envc-- > 0) {
305                 size_t len;
306                 if (__put_user((elf_addr_t)p, envp++))
307                         return -EFAULT;
308                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
309                 if (!len || len > MAX_ARG_STRLEN)
310                         return -EINVAL;
311                 p += len;
312         }
313         if (__put_user(0, envp))
314                 return -EFAULT;
315         current->mm->env_end = p;
316
317         /* Put the elf_info on the stack in the right place.  */
318         sp = (elf_addr_t __user *)envp + 1;
319         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
320                 return -EFAULT;
321         return 0;
322 }
323
324 static unsigned long elf_map(struct file *filep, unsigned long addr,
325                 struct elf_phdr *eppnt, int prot, int type,
326                 unsigned long total_size)
327 {
328         unsigned long map_addr;
329         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
330         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
331         addr = ELF_PAGESTART(addr);
332         size = ELF_PAGEALIGN(size);
333
334         /* mmap() will return -EINVAL if given a zero size, but a
335          * segment with zero filesize is perfectly valid */
336         if (!size)
337                 return addr;
338
339         /*
340         * total_size is the size of the ELF (interpreter) image.
341         * The _first_ mmap needs to know the full size, otherwise
342         * randomization might put this image into an overlapping
343         * position with the ELF binary image. (since size < total_size)
344         * So we first map the 'big' image - and unmap the remainder at
345         * the end. (which unmap is needed for ELF images with holes.)
346         */
347         if (total_size) {
348                 total_size = ELF_PAGEALIGN(total_size);
349                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
350                 if (!BAD_ADDR(map_addr))
351                         vm_munmap(map_addr+size, total_size-size);
352         } else
353                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
354
355         return(map_addr);
356 }
357
358 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
359 {
360         int i, first_idx = -1, last_idx = -1;
361
362         for (i = 0; i < nr; i++) {
363                 if (cmds[i].p_type == PT_LOAD) {
364                         last_idx = i;
365                         if (first_idx == -1)
366                                 first_idx = i;
367                 }
368         }
369         if (first_idx == -1)
370                 return 0;
371
372         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
373                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
374 }
375
376
377 /* This is much more generalized than the library routine read function,
378    so we keep this separate.  Technically the library read function
379    is only provided so that we can read a.out libraries that have
380    an ELF header */
381
382 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
383                 struct file *interpreter, unsigned long *interp_map_addr,
384                 unsigned long no_base)
385 {
386         struct elf_phdr *elf_phdata;
387         struct elf_phdr *eppnt;
388         unsigned long load_addr = 0;
389         int load_addr_set = 0;
390         unsigned long last_bss = 0, elf_bss = 0;
391         unsigned long error = ~0UL;
392         unsigned long total_size;
393         int retval, i, size;
394
395         /* First of all, some simple consistency checks */
396         if (interp_elf_ex->e_type != ET_EXEC &&
397             interp_elf_ex->e_type != ET_DYN)
398                 goto out;
399         if (!elf_check_arch(interp_elf_ex))
400                 goto out;
401         if (!interpreter->f_op || !interpreter->f_op->mmap)
402                 goto out;
403
404         /*
405          * If the size of this structure has changed, then punt, since
406          * we will be doing the wrong thing.
407          */
408         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
409                 goto out;
410         if (interp_elf_ex->e_phnum < 1 ||
411                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
412                 goto out;
413
414         /* Now read in all of the header information */
415         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
416         if (size > ELF_MIN_ALIGN)
417                 goto out;
418         elf_phdata = kmalloc(size, GFP_KERNEL);
419         if (!elf_phdata)
420                 goto out;
421
422         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
423                              (char *)elf_phdata, size);
424         error = -EIO;
425         if (retval != size) {
426                 if (retval < 0)
427                         error = retval; 
428                 goto out_close;
429         }
430
431         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
432         if (!total_size) {
433                 error = -EINVAL;
434                 goto out_close;
435         }
436
437         eppnt = elf_phdata;
438         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
439                 if (eppnt->p_type == PT_LOAD) {
440                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
441                         int elf_prot = 0;
442                         unsigned long vaddr = 0;
443                         unsigned long k, map_addr;
444
445                         if (eppnt->p_flags & PF_R)
446                                 elf_prot = PROT_READ;
447                         if (eppnt->p_flags & PF_W)
448                                 elf_prot |= PROT_WRITE;
449                         if (eppnt->p_flags & PF_X)
450                                 elf_prot |= PROT_EXEC;
451                         vaddr = eppnt->p_vaddr;
452                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
453                                 elf_type |= MAP_FIXED;
454                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
455                                 load_addr = -vaddr;
456
457                         map_addr = elf_map(interpreter, load_addr + vaddr,
458                                         eppnt, elf_prot, elf_type, total_size);
459                         total_size = 0;
460                         if (!*interp_map_addr)
461                                 *interp_map_addr = map_addr;
462                         error = map_addr;
463                         if (BAD_ADDR(map_addr))
464                                 goto out_close;
465
466                         if (!load_addr_set &&
467                             interp_elf_ex->e_type == ET_DYN) {
468                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
469                                 load_addr_set = 1;
470                         }
471
472                         /*
473                          * Check to see if the section's size will overflow the
474                          * allowed task size. Note that p_filesz must always be
475                          * <= p_memsize so it's only necessary to check p_memsz.
476                          */
477                         k = load_addr + eppnt->p_vaddr;
478                         if (BAD_ADDR(k) ||
479                             eppnt->p_filesz > eppnt->p_memsz ||
480                             eppnt->p_memsz > TASK_SIZE ||
481                             TASK_SIZE - eppnt->p_memsz < k) {
482                                 error = -ENOMEM;
483                                 goto out_close;
484                         }
485
486                         /*
487                          * Find the end of the file mapping for this phdr, and
488                          * keep track of the largest address we see for this.
489                          */
490                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
491                         if (k > elf_bss)
492                                 elf_bss = k;
493
494                         /*
495                          * Do the same thing for the memory mapping - between
496                          * elf_bss and last_bss is the bss section.
497                          */
498                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
499                         if (k > last_bss)
500                                 last_bss = k;
501                 }
502         }
503
504         if (last_bss > elf_bss) {
505                 /*
506                  * Now fill out the bss section.  First pad the last page up
507                  * to the page boundary, and then perform a mmap to make sure
508                  * that there are zero-mapped pages up to and including the
509                  * last bss page.
510                  */
511                 if (padzero(elf_bss)) {
512                         error = -EFAULT;
513                         goto out_close;
514                 }
515
516                 /* What we have mapped so far */
517                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
518
519                 /* Map the last of the bss segment */
520                 error = vm_brk(elf_bss, last_bss - elf_bss);
521                 if (BAD_ADDR(error))
522                         goto out_close;
523         }
524
525         error = load_addr;
526
527 out_close:
528         kfree(elf_phdata);
529 out:
530         return error;
531 }
532
533 /*
534  * These are the functions used to load ELF style executables and shared
535  * libraries.  There is no binary dependent code anywhere else.
536  */
537
538 #define INTERPRETER_NONE 0
539 #define INTERPRETER_ELF 2
540
541 #ifndef STACK_RND_MASK
542 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
543 #endif
544
545 static unsigned long randomize_stack_top(unsigned long stack_top)
546 {
547         unsigned int random_variable = 0;
548
549         if ((current->flags & PF_RANDOMIZE) &&
550                 !(current->personality & ADDR_NO_RANDOMIZE)) {
551                 random_variable = get_random_int() & STACK_RND_MASK;
552                 random_variable <<= PAGE_SHIFT;
553         }
554 #ifdef CONFIG_STACK_GROWSUP
555         return PAGE_ALIGN(stack_top) + random_variable;
556 #else
557         return PAGE_ALIGN(stack_top) - random_variable;
558 #endif
559 }
560
561 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
562 {
563         struct file *interpreter = NULL; /* to shut gcc up */
564         unsigned long load_addr = 0, load_bias = 0;
565         int load_addr_set = 0;
566         char * elf_interpreter = NULL;
567         unsigned long error;
568         struct elf_phdr *elf_ppnt, *elf_phdata;
569         unsigned long elf_bss, elf_brk;
570         int retval, i;
571         unsigned int size;
572         unsigned long elf_entry;
573         unsigned long interp_load_addr = 0;
574         unsigned long start_code, end_code, start_data, end_data;
575         unsigned long reloc_func_desc __maybe_unused = 0;
576         int executable_stack = EXSTACK_DEFAULT;
577         unsigned long def_flags = 0;
578         struct {
579                 struct elfhdr elf_ex;
580                 struct elfhdr interp_elf_ex;
581         } *loc;
582
583         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
584         if (!loc) {
585                 retval = -ENOMEM;
586                 goto out_ret;
587         }
588         
589         /* Get the exec-header */
590         loc->elf_ex = *((struct elfhdr *)bprm->buf);
591
592         retval = -ENOEXEC;
593         /* First of all, some simple consistency checks */
594         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
595                 goto out;
596
597         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
598                 goto out;
599         if (!elf_check_arch(&loc->elf_ex))
600                 goto out;
601         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
602                 goto out;
603
604         /* Now read in all of the header information */
605         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
606                 goto out;
607         if (loc->elf_ex.e_phnum < 1 ||
608                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
609                 goto out;
610         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
611         retval = -ENOMEM;
612         elf_phdata = kmalloc(size, GFP_KERNEL);
613         if (!elf_phdata)
614                 goto out;
615
616         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
617                              (char *)elf_phdata, size);
618         if (retval != size) {
619                 if (retval >= 0)
620                         retval = -EIO;
621                 goto out_free_ph;
622         }
623
624         elf_ppnt = elf_phdata;
625         elf_bss = 0;
626         elf_brk = 0;
627
628         start_code = ~0UL;
629         end_code = 0;
630         start_data = 0;
631         end_data = 0;
632
633         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
634                 if (elf_ppnt->p_type == PT_INTERP) {
635                         /* This is the program interpreter used for
636                          * shared libraries - for now assume that this
637                          * is an a.out format binary
638                          */
639                         retval = -ENOEXEC;
640                         if (elf_ppnt->p_filesz > PATH_MAX || 
641                             elf_ppnt->p_filesz < 2)
642                                 goto out_free_ph;
643
644                         retval = -ENOMEM;
645                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
646                                                   GFP_KERNEL);
647                         if (!elf_interpreter)
648                                 goto out_free_ph;
649
650                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
651                                              elf_interpreter,
652                                              elf_ppnt->p_filesz);
653                         if (retval != elf_ppnt->p_filesz) {
654                                 if (retval >= 0)
655                                         retval = -EIO;
656                                 goto out_free_interp;
657                         }
658                         /* make sure path is NULL terminated */
659                         retval = -ENOEXEC;
660                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
661                                 goto out_free_interp;
662
663                         interpreter = open_exec(elf_interpreter);
664                         retval = PTR_ERR(interpreter);
665                         if (IS_ERR(interpreter))
666                                 goto out_free_interp;
667
668                         /*
669                          * If the binary is not readable then enforce
670                          * mm->dumpable = 0 regardless of the interpreter's
671                          * permissions.
672                          */
673                         would_dump(bprm, interpreter);
674
675                         retval = kernel_read(interpreter, 0, bprm->buf,
676                                              BINPRM_BUF_SIZE);
677                         if (retval != BINPRM_BUF_SIZE) {
678                                 if (retval >= 0)
679                                         retval = -EIO;
680                                 goto out_free_dentry;
681                         }
682
683                         /* Get the exec headers */
684                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
685                         break;
686                 }
687                 elf_ppnt++;
688         }
689
690         elf_ppnt = elf_phdata;
691         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
692                 if (elf_ppnt->p_type == PT_GNU_STACK) {
693                         if (elf_ppnt->p_flags & PF_X)
694                                 executable_stack = EXSTACK_ENABLE_X;
695                         else
696                                 executable_stack = EXSTACK_DISABLE_X;
697                         break;
698                 }
699
700         /* Some simple consistency checks for the interpreter */
701         if (elf_interpreter) {
702                 retval = -ELIBBAD;
703                 /* Not an ELF interpreter */
704                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
705                         goto out_free_dentry;
706                 /* Verify the interpreter has a valid arch */
707                 if (!elf_check_arch(&loc->interp_elf_ex))
708                         goto out_free_dentry;
709         }
710
711         /* Flush all traces of the currently running executable */
712         retval = flush_old_exec(bprm);
713         if (retval)
714                 goto out_free_dentry;
715
716         /* OK, This is the point of no return */
717         current->mm->def_flags = def_flags;
718
719         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
720            may depend on the personality.  */
721         SET_PERSONALITY(loc->elf_ex);
722         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
723                 current->personality |= READ_IMPLIES_EXEC;
724
725         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
726                 current->flags |= PF_RANDOMIZE;
727
728         setup_new_exec(bprm);
729
730         /* Do this so that we can load the interpreter, if need be.  We will
731            change some of these later */
732         current->mm->free_area_cache = current->mm->mmap_base;
733         current->mm->cached_hole_size = 0;
734         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
735                                  executable_stack);
736         if (retval < 0) {
737                 send_sig(SIGKILL, current, 0);
738                 goto out_free_dentry;
739         }
740         
741         current->mm->start_stack = bprm->p;
742
743         /* Now we do a little grungy work by mmapping the ELF image into
744            the correct location in memory. */
745         for(i = 0, elf_ppnt = elf_phdata;
746             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
747                 int elf_prot = 0, elf_flags;
748                 unsigned long k, vaddr;
749
750                 if (elf_ppnt->p_type != PT_LOAD)
751                         continue;
752
753                 if (unlikely (elf_brk > elf_bss)) {
754                         unsigned long nbyte;
755                     
756                         /* There was a PT_LOAD segment with p_memsz > p_filesz
757                            before this one. Map anonymous pages, if needed,
758                            and clear the area.  */
759                         retval = set_brk(elf_bss + load_bias,
760                                          elf_brk + load_bias);
761                         if (retval) {
762                                 send_sig(SIGKILL, current, 0);
763                                 goto out_free_dentry;
764                         }
765                         nbyte = ELF_PAGEOFFSET(elf_bss);
766                         if (nbyte) {
767                                 nbyte = ELF_MIN_ALIGN - nbyte;
768                                 if (nbyte > elf_brk - elf_bss)
769                                         nbyte = elf_brk - elf_bss;
770                                 if (clear_user((void __user *)elf_bss +
771                                                         load_bias, nbyte)) {
772                                         /*
773                                          * This bss-zeroing can fail if the ELF
774                                          * file specifies odd protections. So
775                                          * we don't check the return value
776                                          */
777                                 }
778                         }
779                 }
780
781                 if (elf_ppnt->p_flags & PF_R)
782                         elf_prot |= PROT_READ;
783                 if (elf_ppnt->p_flags & PF_W)
784                         elf_prot |= PROT_WRITE;
785                 if (elf_ppnt->p_flags & PF_X)
786                         elf_prot |= PROT_EXEC;
787
788                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
789
790                 vaddr = elf_ppnt->p_vaddr;
791                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
792                         elf_flags |= MAP_FIXED;
793                 } else if (loc->elf_ex.e_type == ET_DYN) {
794                         /* Try and get dynamic programs out of the way of the
795                          * default mmap base, as well as whatever program they
796                          * might try to exec.  This is because the brk will
797                          * follow the loader, and is not movable.  */
798 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
799                         /* Memory randomization might have been switched off
800                          * in runtime via sysctl.
801                          * If that is the case, retain the original non-zero
802                          * load_bias value in order to establish proper
803                          * non-randomized mappings.
804                          */
805                         if (current->flags & PF_RANDOMIZE)
806                                 load_bias = 0;
807                         else
808                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
809 #else
810                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
811 #endif
812                 }
813
814                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
815                                 elf_prot, elf_flags, 0);
816                 if (BAD_ADDR(error)) {
817                         send_sig(SIGKILL, current, 0);
818                         retval = IS_ERR((void *)error) ?
819                                 PTR_ERR((void*)error) : -EINVAL;
820                         goto out_free_dentry;
821                 }
822
823                 if (!load_addr_set) {
824                         load_addr_set = 1;
825                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
826                         if (loc->elf_ex.e_type == ET_DYN) {
827                                 load_bias += error -
828                                              ELF_PAGESTART(load_bias + vaddr);
829                                 load_addr += load_bias;
830                                 reloc_func_desc = load_bias;
831                         }
832                 }
833                 k = elf_ppnt->p_vaddr;
834                 if (k < start_code)
835                         start_code = k;
836                 if (start_data < k)
837                         start_data = k;
838
839                 /*
840                  * Check to see if the section's size will overflow the
841                  * allowed task size. Note that p_filesz must always be
842                  * <= p_memsz so it is only necessary to check p_memsz.
843                  */
844                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
845                     elf_ppnt->p_memsz > TASK_SIZE ||
846                     TASK_SIZE - elf_ppnt->p_memsz < k) {
847                         /* set_brk can never work. Avoid overflows. */
848                         send_sig(SIGKILL, current, 0);
849                         retval = -EINVAL;
850                         goto out_free_dentry;
851                 }
852
853                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
854
855                 if (k > elf_bss)
856                         elf_bss = k;
857                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
858                         end_code = k;
859                 if (end_data < k)
860                         end_data = k;
861                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
862                 if (k > elf_brk)
863                         elf_brk = k;
864         }
865
866         loc->elf_ex.e_entry += load_bias;
867         elf_bss += load_bias;
868         elf_brk += load_bias;
869         start_code += load_bias;
870         end_code += load_bias;
871         start_data += load_bias;
872         end_data += load_bias;
873
874         /* Calling set_brk effectively mmaps the pages that we need
875          * for the bss and break sections.  We must do this before
876          * mapping in the interpreter, to make sure it doesn't wind
877          * up getting placed where the bss needs to go.
878          */
879         retval = set_brk(elf_bss, elf_brk);
880         if (retval) {
881                 send_sig(SIGKILL, current, 0);
882                 goto out_free_dentry;
883         }
884         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
885                 send_sig(SIGSEGV, current, 0);
886                 retval = -EFAULT; /* Nobody gets to see this, but.. */
887                 goto out_free_dentry;
888         }
889
890         if (elf_interpreter) {
891                 unsigned long interp_map_addr = 0;
892
893                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
894                                             interpreter,
895                                             &interp_map_addr,
896                                             load_bias);
897                 if (!IS_ERR((void *)elf_entry)) {
898                         /*
899                          * load_elf_interp() returns relocation
900                          * adjustment
901                          */
902                         interp_load_addr = elf_entry;
903                         elf_entry += loc->interp_elf_ex.e_entry;
904                 }
905                 if (BAD_ADDR(elf_entry)) {
906                         force_sig(SIGSEGV, current);
907                         retval = IS_ERR((void *)elf_entry) ?
908                                         (int)elf_entry : -EINVAL;
909                         goto out_free_dentry;
910                 }
911                 reloc_func_desc = interp_load_addr;
912
913                 allow_write_access(interpreter);
914                 fput(interpreter);
915                 kfree(elf_interpreter);
916         } else {
917                 elf_entry = loc->elf_ex.e_entry;
918                 if (BAD_ADDR(elf_entry)) {
919                         force_sig(SIGSEGV, current);
920                         retval = -EINVAL;
921                         goto out_free_dentry;
922                 }
923         }
924
925         kfree(elf_phdata);
926
927         set_binfmt(&elf_format);
928
929 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
930         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
931         if (retval < 0) {
932                 send_sig(SIGKILL, current, 0);
933                 goto out;
934         }
935 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
936
937         install_exec_creds(bprm);
938         retval = create_elf_tables(bprm, &loc->elf_ex,
939                           load_addr, interp_load_addr);
940         if (retval < 0) {
941                 send_sig(SIGKILL, current, 0);
942                 goto out;
943         }
944         /* N.B. passed_fileno might not be initialized? */
945         current->mm->end_code = end_code;
946         current->mm->start_code = start_code;
947         current->mm->start_data = start_data;
948         current->mm->end_data = end_data;
949         current->mm->start_stack = bprm->p;
950
951 #ifdef arch_randomize_brk
952         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
953                 current->mm->brk = current->mm->start_brk =
954                         arch_randomize_brk(current->mm);
955 #ifdef CONFIG_COMPAT_BRK
956                 current->brk_randomized = 1;
957 #endif
958         }
959 #endif
960
961         if (current->personality & MMAP_PAGE_ZERO) {
962                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
963                    and some applications "depend" upon this behavior.
964                    Since we do not have the power to recompile these, we
965                    emulate the SVr4 behavior. Sigh. */
966                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
967                                 MAP_FIXED | MAP_PRIVATE, 0);
968         }
969
970 #ifdef ELF_PLAT_INIT
971         /*
972          * The ABI may specify that certain registers be set up in special
973          * ways (on i386 %edx is the address of a DT_FINI function, for
974          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
975          * that the e_entry field is the address of the function descriptor
976          * for the startup routine, rather than the address of the startup
977          * routine itself.  This macro performs whatever initialization to
978          * the regs structure is required as well as any relocations to the
979          * function descriptor entries when executing dynamically links apps.
980          */
981         ELF_PLAT_INIT(regs, reloc_func_desc);
982 #endif
983
984         start_thread(regs, elf_entry, bprm->p);
985         retval = 0;
986 out:
987         kfree(loc);
988 out_ret:
989         return retval;
990
991         /* error cleanup */
992 out_free_dentry:
993         allow_write_access(interpreter);
994         if (interpreter)
995                 fput(interpreter);
996 out_free_interp:
997         kfree(elf_interpreter);
998 out_free_ph:
999         kfree(elf_phdata);
1000         goto out;
1001 }
1002
1003 /* This is really simpleminded and specialized - we are loading an
1004    a.out library that is given an ELF header. */
1005 static int load_elf_library(struct file *file)
1006 {
1007         struct elf_phdr *elf_phdata;
1008         struct elf_phdr *eppnt;
1009         unsigned long elf_bss, bss, len;
1010         int retval, error, i, j;
1011         struct elfhdr elf_ex;
1012
1013         error = -ENOEXEC;
1014         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1015         if (retval != sizeof(elf_ex))
1016                 goto out;
1017
1018         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1019                 goto out;
1020
1021         /* First of all, some simple consistency checks */
1022         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1023             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1024                 goto out;
1025
1026         /* Now read in all of the header information */
1027
1028         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1029         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1030
1031         error = -ENOMEM;
1032         elf_phdata = kmalloc(j, GFP_KERNEL);
1033         if (!elf_phdata)
1034                 goto out;
1035
1036         eppnt = elf_phdata;
1037         error = -ENOEXEC;
1038         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1039         if (retval != j)
1040                 goto out_free_ph;
1041
1042         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1043                 if ((eppnt + i)->p_type == PT_LOAD)
1044                         j++;
1045         if (j != 1)
1046                 goto out_free_ph;
1047
1048         while (eppnt->p_type != PT_LOAD)
1049                 eppnt++;
1050
1051         /* Now use mmap to map the library into memory. */
1052         error = vm_mmap(file,
1053                         ELF_PAGESTART(eppnt->p_vaddr),
1054                         (eppnt->p_filesz +
1055                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1056                         PROT_READ | PROT_WRITE | PROT_EXEC,
1057                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1058                         (eppnt->p_offset -
1059                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1060         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1061                 goto out_free_ph;
1062
1063         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1064         if (padzero(elf_bss)) {
1065                 error = -EFAULT;
1066                 goto out_free_ph;
1067         }
1068
1069         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1070                             ELF_MIN_ALIGN - 1);
1071         bss = eppnt->p_memsz + eppnt->p_vaddr;
1072         if (bss > len)
1073                 vm_brk(len, bss - len);
1074         error = 0;
1075
1076 out_free_ph:
1077         kfree(elf_phdata);
1078 out:
1079         return error;
1080 }
1081
1082 #ifdef CONFIG_ELF_CORE
1083 /*
1084  * ELF core dumper
1085  *
1086  * Modelled on fs/exec.c:aout_core_dump()
1087  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1088  */
1089
1090 /*
1091  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1092  * that are useful for post-mortem analysis are included in every core dump.
1093  * In that way we ensure that the core dump is fully interpretable later
1094  * without matching up the same kernel and hardware config to see what PC values
1095  * meant. These special mappings include - vDSO, vsyscall, and other
1096  * architecture specific mappings
1097  */
1098 static bool always_dump_vma(struct vm_area_struct *vma)
1099 {
1100         /* Any vsyscall mappings? */
1101         if (vma == get_gate_vma(vma->vm_mm))
1102                 return true;
1103         /*
1104          * arch_vma_name() returns non-NULL for special architecture mappings,
1105          * such as vDSO sections.
1106          */
1107         if (arch_vma_name(vma))
1108                 return true;
1109
1110         return false;
1111 }
1112
1113 /*
1114  * Decide what to dump of a segment, part, all or none.
1115  */
1116 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1117                                    unsigned long mm_flags)
1118 {
1119 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1120
1121         /* always dump the vdso and vsyscall sections */
1122         if (always_dump_vma(vma))
1123                 goto whole;
1124
1125         if (vma->vm_flags & VM_DONTDUMP)
1126                 return 0;
1127
1128         /* Hugetlb memory check */
1129         if (vma->vm_flags & VM_HUGETLB) {
1130                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1131                         goto whole;
1132                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1133                         goto whole;
1134         }
1135
1136         /* Do not dump I/O mapped devices or special mappings */
1137         if (vma->vm_flags & VM_IO)
1138                 return 0;
1139
1140         /* By default, dump shared memory if mapped from an anonymous file. */
1141         if (vma->vm_flags & VM_SHARED) {
1142                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1143                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1144                         goto whole;
1145                 return 0;
1146         }
1147
1148         /* Dump segments that have been written to.  */
1149         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1150                 goto whole;
1151         if (vma->vm_file == NULL)
1152                 return 0;
1153
1154         if (FILTER(MAPPED_PRIVATE))
1155                 goto whole;
1156
1157         /*
1158          * If this looks like the beginning of a DSO or executable mapping,
1159          * check for an ELF header.  If we find one, dump the first page to
1160          * aid in determining what was mapped here.
1161          */
1162         if (FILTER(ELF_HEADERS) &&
1163             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1164                 u32 __user *header = (u32 __user *) vma->vm_start;
1165                 u32 word;
1166                 mm_segment_t fs = get_fs();
1167                 /*
1168                  * Doing it this way gets the constant folded by GCC.
1169                  */
1170                 union {
1171                         u32 cmp;
1172                         char elfmag[SELFMAG];
1173                 } magic;
1174                 BUILD_BUG_ON(SELFMAG != sizeof word);
1175                 magic.elfmag[EI_MAG0] = ELFMAG0;
1176                 magic.elfmag[EI_MAG1] = ELFMAG1;
1177                 magic.elfmag[EI_MAG2] = ELFMAG2;
1178                 magic.elfmag[EI_MAG3] = ELFMAG3;
1179                 /*
1180                  * Switch to the user "segment" for get_user(),
1181                  * then put back what elf_core_dump() had in place.
1182                  */
1183                 set_fs(USER_DS);
1184                 if (unlikely(get_user(word, header)))
1185                         word = 0;
1186                 set_fs(fs);
1187                 if (word == magic.cmp)
1188                         return PAGE_SIZE;
1189         }
1190
1191 #undef  FILTER
1192
1193         return 0;
1194
1195 whole:
1196         return vma->vm_end - vma->vm_start;
1197 }
1198
1199 /* An ELF note in memory */
1200 struct memelfnote
1201 {
1202         const char *name;
1203         int type;
1204         unsigned int datasz;
1205         void *data;
1206 };
1207
1208 static int notesize(struct memelfnote *en)
1209 {
1210         int sz;
1211
1212         sz = sizeof(struct elf_note);
1213         sz += roundup(strlen(en->name) + 1, 4);
1214         sz += roundup(en->datasz, 4);
1215
1216         return sz;
1217 }
1218
1219 #define DUMP_WRITE(addr, nr, foffset)   \
1220         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1221
1222 static int alignfile(struct file *file, loff_t *foffset)
1223 {
1224         static const char buf[4] = { 0, };
1225         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1226         return 1;
1227 }
1228
1229 static int writenote(struct memelfnote *men, struct file *file,
1230                         loff_t *foffset)
1231 {
1232         struct elf_note en;
1233         en.n_namesz = strlen(men->name) + 1;
1234         en.n_descsz = men->datasz;
1235         en.n_type = men->type;
1236
1237         DUMP_WRITE(&en, sizeof(en), foffset);
1238         DUMP_WRITE(men->name, en.n_namesz, foffset);
1239         if (!alignfile(file, foffset))
1240                 return 0;
1241         DUMP_WRITE(men->data, men->datasz, foffset);
1242         if (!alignfile(file, foffset))
1243                 return 0;
1244
1245         return 1;
1246 }
1247 #undef DUMP_WRITE
1248
1249 static void fill_elf_header(struct elfhdr *elf, int segs,
1250                             u16 machine, u32 flags, u8 osabi)
1251 {
1252         memset(elf, 0, sizeof(*elf));
1253
1254         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1255         elf->e_ident[EI_CLASS] = ELF_CLASS;
1256         elf->e_ident[EI_DATA] = ELF_DATA;
1257         elf->e_ident[EI_VERSION] = EV_CURRENT;
1258         elf->e_ident[EI_OSABI] = ELF_OSABI;
1259
1260         elf->e_type = ET_CORE;
1261         elf->e_machine = machine;
1262         elf->e_version = EV_CURRENT;
1263         elf->e_phoff = sizeof(struct elfhdr);
1264         elf->e_flags = flags;
1265         elf->e_ehsize = sizeof(struct elfhdr);
1266         elf->e_phentsize = sizeof(struct elf_phdr);
1267         elf->e_phnum = segs;
1268
1269         return;
1270 }
1271
1272 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1273 {
1274         phdr->p_type = PT_NOTE;
1275         phdr->p_offset = offset;
1276         phdr->p_vaddr = 0;
1277         phdr->p_paddr = 0;
1278         phdr->p_filesz = sz;
1279         phdr->p_memsz = 0;
1280         phdr->p_flags = 0;
1281         phdr->p_align = 0;
1282         return;
1283 }
1284
1285 static void fill_note(struct memelfnote *note, const char *name, int type, 
1286                 unsigned int sz, void *data)
1287 {
1288         note->name = name;
1289         note->type = type;
1290         note->datasz = sz;
1291         note->data = data;
1292         return;
1293 }
1294
1295 /*
1296  * fill up all the fields in prstatus from the given task struct, except
1297  * registers which need to be filled up separately.
1298  */
1299 static void fill_prstatus(struct elf_prstatus *prstatus,
1300                 struct task_struct *p, long signr)
1301 {
1302         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1303         prstatus->pr_sigpend = p->pending.signal.sig[0];
1304         prstatus->pr_sighold = p->blocked.sig[0];
1305         rcu_read_lock();
1306         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1307         rcu_read_unlock();
1308         prstatus->pr_pid = task_pid_vnr(p);
1309         prstatus->pr_pgrp = task_pgrp_vnr(p);
1310         prstatus->pr_sid = task_session_vnr(p);
1311         if (thread_group_leader(p)) {
1312                 struct task_cputime cputime;
1313
1314                 /*
1315                  * This is the record for the group leader.  It shows the
1316                  * group-wide total, not its individual thread total.
1317                  */
1318                 thread_group_cputime(p, &cputime);
1319                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1320                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1321         } else {
1322                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1323                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1324         }
1325         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1326         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1327 }
1328
1329 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1330                        struct mm_struct *mm)
1331 {
1332         const struct cred *cred;
1333         unsigned int i, len;
1334         
1335         /* first copy the parameters from user space */
1336         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1337
1338         len = mm->arg_end - mm->arg_start;
1339         if (len >= ELF_PRARGSZ)
1340                 len = ELF_PRARGSZ-1;
1341         if (copy_from_user(&psinfo->pr_psargs,
1342                            (const char __user *)mm->arg_start, len))
1343                 return -EFAULT;
1344         for(i = 0; i < len; i++)
1345                 if (psinfo->pr_psargs[i] == 0)
1346                         psinfo->pr_psargs[i] = ' ';
1347         psinfo->pr_psargs[len] = 0;
1348
1349         rcu_read_lock();
1350         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1351         rcu_read_unlock();
1352         psinfo->pr_pid = task_pid_vnr(p);
1353         psinfo->pr_pgrp = task_pgrp_vnr(p);
1354         psinfo->pr_sid = task_session_vnr(p);
1355
1356         i = p->state ? ffz(~p->state) + 1 : 0;
1357         psinfo->pr_state = i;
1358         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1359         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1360         psinfo->pr_nice = task_nice(p);
1361         psinfo->pr_flag = p->flags;
1362         rcu_read_lock();
1363         cred = __task_cred(p);
1364         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1365         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1366         rcu_read_unlock();
1367         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1368         
1369         return 0;
1370 }
1371
1372 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1373 {
1374         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1375         int i = 0;
1376         do
1377                 i += 2;
1378         while (auxv[i - 2] != AT_NULL);
1379         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1380 }
1381
1382 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1383                 siginfo_t *siginfo)
1384 {
1385         mm_segment_t old_fs = get_fs();
1386         set_fs(KERNEL_DS);
1387         copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1388         set_fs(old_fs);
1389         fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1390 }
1391
1392 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1393 /*
1394  * Format of NT_FILE note:
1395  *
1396  * long count     -- how many files are mapped
1397  * long page_size -- units for file_ofs
1398  * array of [COUNT] elements of
1399  *   long start
1400  *   long end
1401  *   long file_ofs
1402  * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1403  */
1404 static void fill_files_note(struct memelfnote *note)
1405 {
1406         struct vm_area_struct *vma;
1407         unsigned count, size, names_ofs, remaining, n;
1408         user_long_t *data;
1409         user_long_t *start_end_ofs;
1410         char *name_base, *name_curpos;
1411
1412         /* *Estimated* file count and total data size needed */
1413         count = current->mm->map_count;
1414         size = count * 64;
1415
1416         names_ofs = (2 + 3 * count) * sizeof(data[0]);
1417  alloc:
1418         if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1419                 goto err;
1420         size = round_up(size, PAGE_SIZE);
1421         data = vmalloc(size);
1422         if (!data)
1423                 goto err;
1424
1425         start_end_ofs = data + 2;
1426         name_base = name_curpos = ((char *)data) + names_ofs;
1427         remaining = size - names_ofs;
1428         count = 0;
1429         for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1430                 struct file *file;
1431                 const char *filename;
1432
1433                 file = vma->vm_file;
1434                 if (!file)
1435                         continue;
1436                 filename = d_path(&file->f_path, name_curpos, remaining);
1437                 if (IS_ERR(filename)) {
1438                         if (PTR_ERR(filename) == -ENAMETOOLONG) {
1439                                 vfree(data);
1440                                 size = size * 5 / 4;
1441                                 goto alloc;
1442                         }
1443                         continue;
1444                 }
1445
1446                 /* d_path() fills at the end, move name down */
1447                 /* n = strlen(filename) + 1: */
1448                 n = (name_curpos + remaining) - filename;
1449                 remaining = filename - name_curpos;
1450                 memmove(name_curpos, filename, n);
1451                 name_curpos += n;
1452
1453                 *start_end_ofs++ = vma->vm_start;
1454                 *start_end_ofs++ = vma->vm_end;
1455                 *start_end_ofs++ = vma->vm_pgoff;
1456                 count++;
1457         }
1458
1459         /* Now we know exact count of files, can store it */
1460         data[0] = count;
1461         data[1] = PAGE_SIZE;
1462         /*
1463          * Count usually is less than current->mm->map_count,
1464          * we need to move filenames down.
1465          */
1466         n = current->mm->map_count - count;
1467         if (n != 0) {
1468                 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1469                 memmove(name_base - shift_bytes, name_base,
1470                         name_curpos - name_base);
1471                 name_curpos -= shift_bytes;
1472         }
1473
1474         size = name_curpos - (char *)data;
1475         fill_note(note, "CORE", NT_FILE, size, data);
1476  err: ;
1477 }
1478
1479 #ifdef CORE_DUMP_USE_REGSET
1480 #include <linux/regset.h>
1481
1482 struct elf_thread_core_info {
1483         struct elf_thread_core_info *next;
1484         struct task_struct *task;
1485         struct elf_prstatus prstatus;
1486         struct memelfnote notes[0];
1487 };
1488
1489 struct elf_note_info {
1490         struct elf_thread_core_info *thread;
1491         struct memelfnote psinfo;
1492         struct memelfnote signote;
1493         struct memelfnote auxv;
1494         struct memelfnote files;
1495         user_siginfo_t csigdata;
1496         size_t size;
1497         int thread_notes;
1498 };
1499
1500 /*
1501  * When a regset has a writeback hook, we call it on each thread before
1502  * dumping user memory.  On register window machines, this makes sure the
1503  * user memory backing the register data is up to date before we read it.
1504  */
1505 static void do_thread_regset_writeback(struct task_struct *task,
1506                                        const struct user_regset *regset)
1507 {
1508         if (regset->writeback)
1509                 regset->writeback(task, regset, 1);
1510 }
1511
1512 #ifndef PR_REG_SIZE
1513 #define PR_REG_SIZE(S) sizeof(S)
1514 #endif
1515
1516 #ifndef PRSTATUS_SIZE
1517 #define PRSTATUS_SIZE(S) sizeof(S)
1518 #endif
1519
1520 #ifndef PR_REG_PTR
1521 #define PR_REG_PTR(S) (&((S)->pr_reg))
1522 #endif
1523
1524 #ifndef SET_PR_FPVALID
1525 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1526 #endif
1527
1528 static int fill_thread_core_info(struct elf_thread_core_info *t,
1529                                  const struct user_regset_view *view,
1530                                  long signr, size_t *total)
1531 {
1532         unsigned int i;
1533
1534         /*
1535          * NT_PRSTATUS is the one special case, because the regset data
1536          * goes into the pr_reg field inside the note contents, rather
1537          * than being the whole note contents.  We fill the reset in here.
1538          * We assume that regset 0 is NT_PRSTATUS.
1539          */
1540         fill_prstatus(&t->prstatus, t->task, signr);
1541         (void) view->regsets[0].get(t->task, &view->regsets[0],
1542                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1543                                     PR_REG_PTR(&t->prstatus), NULL);
1544
1545         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1546                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1547         *total += notesize(&t->notes[0]);
1548
1549         do_thread_regset_writeback(t->task, &view->regsets[0]);
1550
1551         /*
1552          * Each other regset might generate a note too.  For each regset
1553          * that has no core_note_type or is inactive, we leave t->notes[i]
1554          * all zero and we'll know to skip writing it later.
1555          */
1556         for (i = 1; i < view->n; ++i) {
1557                 const struct user_regset *regset = &view->regsets[i];
1558                 do_thread_regset_writeback(t->task, regset);
1559                 if (regset->core_note_type && regset->get &&
1560                     (!regset->active || regset->active(t->task, regset))) {
1561                         int ret;
1562                         size_t size = regset->n * regset->size;
1563                         void *data = kmalloc(size, GFP_KERNEL);
1564                         if (unlikely(!data))
1565                                 return 0;
1566                         ret = regset->get(t->task, regset,
1567                                           0, size, data, NULL);
1568                         if (unlikely(ret))
1569                                 kfree(data);
1570                         else {
1571                                 if (regset->core_note_type != NT_PRFPREG)
1572                                         fill_note(&t->notes[i], "LINUX",
1573                                                   regset->core_note_type,
1574                                                   size, data);
1575                                 else {
1576                                         SET_PR_FPVALID(&t->prstatus, 1);
1577                                         fill_note(&t->notes[i], "CORE",
1578                                                   NT_PRFPREG, size, data);
1579                                 }
1580                                 *total += notesize(&t->notes[i]);
1581                         }
1582                 }
1583         }
1584
1585         return 1;
1586 }
1587
1588 static int fill_note_info(struct elfhdr *elf, int phdrs,
1589                           struct elf_note_info *info,
1590                           siginfo_t *siginfo, struct pt_regs *regs)
1591 {
1592         struct task_struct *dump_task = current;
1593         const struct user_regset_view *view = task_user_regset_view(dump_task);
1594         struct elf_thread_core_info *t;
1595         struct elf_prpsinfo *psinfo;
1596         struct core_thread *ct;
1597         unsigned int i;
1598
1599         info->size = 0;
1600         info->thread = NULL;
1601
1602         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1603         if (psinfo == NULL) {
1604                 info->psinfo.data = NULL;       /* So we don't free this wrongly */
1605                 return 0;
1606         }
1607
1608         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1609
1610         /*
1611          * Figure out how many notes we're going to need for each thread.
1612          */
1613         info->thread_notes = 0;
1614         for (i = 0; i < view->n; ++i)
1615                 if (view->regsets[i].core_note_type != 0)
1616                         ++info->thread_notes;
1617
1618         /*
1619          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1620          * since it is our one special case.
1621          */
1622         if (unlikely(info->thread_notes == 0) ||
1623             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1624                 WARN_ON(1);
1625                 return 0;
1626         }
1627
1628         /*
1629          * Initialize the ELF file header.
1630          */
1631         fill_elf_header(elf, phdrs,
1632                         view->e_machine, view->e_flags, view->ei_osabi);
1633
1634         /*
1635          * Allocate a structure for each thread.
1636          */
1637         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1638                 t = kzalloc(offsetof(struct elf_thread_core_info,
1639                                      notes[info->thread_notes]),
1640                             GFP_KERNEL);
1641                 if (unlikely(!t))
1642                         return 0;
1643
1644                 t->task = ct->task;
1645                 if (ct->task == dump_task || !info->thread) {
1646                         t->next = info->thread;
1647                         info->thread = t;
1648                 } else {
1649                         /*
1650                          * Make sure to keep the original task at
1651                          * the head of the list.
1652                          */
1653                         t->next = info->thread->next;
1654                         info->thread->next = t;
1655                 }
1656         }
1657
1658         /*
1659          * Now fill in each thread's information.
1660          */
1661         for (t = info->thread; t != NULL; t = t->next)
1662                 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1663                         return 0;
1664
1665         /*
1666          * Fill in the two process-wide notes.
1667          */
1668         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1669         info->size += notesize(&info->psinfo);
1670
1671         fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1672         info->size += notesize(&info->signote);
1673
1674         fill_auxv_note(&info->auxv, current->mm);
1675         info->size += notesize(&info->auxv);
1676
1677         fill_files_note(&info->files);
1678         info->size += notesize(&info->files);
1679
1680         return 1;
1681 }
1682
1683 static size_t get_note_info_size(struct elf_note_info *info)
1684 {
1685         return info->size;
1686 }
1687
1688 /*
1689  * Write all the notes for each thread.  When writing the first thread, the
1690  * process-wide notes are interleaved after the first thread-specific note.
1691  */
1692 static int write_note_info(struct elf_note_info *info,
1693                            struct file *file, loff_t *foffset)
1694 {
1695         bool first = 1;
1696         struct elf_thread_core_info *t = info->thread;
1697
1698         do {
1699                 int i;
1700
1701                 if (!writenote(&t->notes[0], file, foffset))
1702                         return 0;
1703
1704                 if (first && !writenote(&info->psinfo, file, foffset))
1705                         return 0;
1706                 if (first && !writenote(&info->signote, file, foffset))
1707                         return 0;
1708                 if (first && !writenote(&info->auxv, file, foffset))
1709                         return 0;
1710                 if (first && !writenote(&info->files, file, foffset))
1711                         return 0;
1712
1713                 for (i = 1; i < info->thread_notes; ++i)
1714                         if (t->notes[i].data &&
1715                             !writenote(&t->notes[i], file, foffset))
1716                                 return 0;
1717
1718                 first = 0;
1719                 t = t->next;
1720         } while (t);
1721
1722         return 1;
1723 }
1724
1725 static void free_note_info(struct elf_note_info *info)
1726 {
1727         struct elf_thread_core_info *threads = info->thread;
1728         while (threads) {
1729                 unsigned int i;
1730                 struct elf_thread_core_info *t = threads;
1731                 threads = t->next;
1732                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1733                 for (i = 1; i < info->thread_notes; ++i)
1734                         kfree(t->notes[i].data);
1735                 kfree(t);
1736         }
1737         kfree(info->psinfo.data);
1738         vfree(info->files.data);
1739 }
1740
1741 #else
1742
1743 /* Here is the structure in which status of each thread is captured. */
1744 struct elf_thread_status
1745 {
1746         struct list_head list;
1747         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1748         elf_fpregset_t fpu;             /* NT_PRFPREG */
1749         struct task_struct *thread;
1750 #ifdef ELF_CORE_COPY_XFPREGS
1751         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1752 #endif
1753         struct memelfnote notes[3];
1754         int num_notes;
1755 };
1756
1757 /*
1758  * In order to add the specific thread information for the elf file format,
1759  * we need to keep a linked list of every threads pr_status and then create
1760  * a single section for them in the final core file.
1761  */
1762 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1763 {
1764         int sz = 0;
1765         struct task_struct *p = t->thread;
1766         t->num_notes = 0;
1767
1768         fill_prstatus(&t->prstatus, p, signr);
1769         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1770         
1771         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1772                   &(t->prstatus));
1773         t->num_notes++;
1774         sz += notesize(&t->notes[0]);
1775
1776         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1777                                                                 &t->fpu))) {
1778                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1779                           &(t->fpu));
1780                 t->num_notes++;
1781                 sz += notesize(&t->notes[1]);
1782         }
1783
1784 #ifdef ELF_CORE_COPY_XFPREGS
1785         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1786                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1787                           sizeof(t->xfpu), &t->xfpu);
1788                 t->num_notes++;
1789                 sz += notesize(&t->notes[2]);
1790         }
1791 #endif  
1792         return sz;
1793 }
1794
1795 struct elf_note_info {
1796         struct memelfnote *notes;
1797         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1798         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1799         struct list_head thread_list;
1800         elf_fpregset_t *fpu;
1801 #ifdef ELF_CORE_COPY_XFPREGS
1802         elf_fpxregset_t *xfpu;
1803 #endif
1804         user_siginfo_t csigdata;
1805         int thread_status_size;
1806         int numnote;
1807 };
1808
1809 static int elf_note_info_init(struct elf_note_info *info)
1810 {
1811         memset(info, 0, sizeof(*info));
1812         INIT_LIST_HEAD(&info->thread_list);
1813
1814         /* Allocate space for ELF notes */
1815         info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1816         if (!info->notes)
1817                 return 0;
1818         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1819         if (!info->psinfo)
1820                 return 0;
1821         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1822         if (!info->prstatus)
1823                 return 0;
1824         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1825         if (!info->fpu)
1826                 return 0;
1827 #ifdef ELF_CORE_COPY_XFPREGS
1828         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1829         if (!info->xfpu)
1830                 return 0;
1831 #endif
1832         return 1;
1833 }
1834
1835 static int fill_note_info(struct elfhdr *elf, int phdrs,
1836                           struct elf_note_info *info,
1837                           siginfo_t *siginfo, struct pt_regs *regs)
1838 {
1839         struct list_head *t;
1840
1841         if (!elf_note_info_init(info))
1842                 return 0;
1843
1844         if (siginfo->si_signo) {
1845                 struct core_thread *ct;
1846                 struct elf_thread_status *ets;
1847
1848                 for (ct = current->mm->core_state->dumper.next;
1849                                                 ct; ct = ct->next) {
1850                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1851                         if (!ets)
1852                                 return 0;
1853
1854                         ets->thread = ct->task;
1855                         list_add(&ets->list, &info->thread_list);
1856                 }
1857
1858                 list_for_each(t, &info->thread_list) {
1859                         int sz;
1860
1861                         ets = list_entry(t, struct elf_thread_status, list);
1862                         sz = elf_dump_thread_status(siginfo->si_signo, ets);
1863                         info->thread_status_size += sz;
1864                 }
1865         }
1866         /* now collect the dump for the current */
1867         memset(info->prstatus, 0, sizeof(*info->prstatus));
1868         fill_prstatus(info->prstatus, current, siginfo->si_signo);
1869         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1870
1871         /* Set up header */
1872         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1873
1874         /*
1875          * Set up the notes in similar form to SVR4 core dumps made
1876          * with info from their /proc.
1877          */
1878
1879         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1880                   sizeof(*info->prstatus), info->prstatus);
1881         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1882         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1883                   sizeof(*info->psinfo), info->psinfo);
1884
1885         fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1886         fill_auxv_note(info->notes + 3, current->mm);
1887         fill_files_note(info->notes + 4);
1888
1889         info->numnote = 5;
1890
1891         /* Try to dump the FPU. */
1892         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1893                                                                info->fpu);
1894         if (info->prstatus->pr_fpvalid)
1895                 fill_note(info->notes + info->numnote++,
1896                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1897 #ifdef ELF_CORE_COPY_XFPREGS
1898         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1899                 fill_note(info->notes + info->numnote++,
1900                           "LINUX", ELF_CORE_XFPREG_TYPE,
1901                           sizeof(*info->xfpu), info->xfpu);
1902 #endif
1903
1904         return 1;
1905 }
1906
1907 static size_t get_note_info_size(struct elf_note_info *info)
1908 {
1909         int sz = 0;
1910         int i;
1911
1912         for (i = 0; i < info->numnote; i++)
1913                 sz += notesize(info->notes + i);
1914
1915         sz += info->thread_status_size;
1916
1917         return sz;
1918 }
1919
1920 static int write_note_info(struct elf_note_info *info,
1921                            struct file *file, loff_t *foffset)
1922 {
1923         int i;
1924         struct list_head *t;
1925
1926         for (i = 0; i < info->numnote; i++)
1927                 if (!writenote(info->notes + i, file, foffset))
1928                         return 0;
1929
1930         /* write out the thread status notes section */
1931         list_for_each(t, &info->thread_list) {
1932                 struct elf_thread_status *tmp =
1933                                 list_entry(t, struct elf_thread_status, list);
1934
1935                 for (i = 0; i < tmp->num_notes; i++)
1936                         if (!writenote(&tmp->notes[i], file, foffset))
1937                                 return 0;
1938         }
1939
1940         return 1;
1941 }
1942
1943 static void free_note_info(struct elf_note_info *info)
1944 {
1945         while (!list_empty(&info->thread_list)) {
1946                 struct list_head *tmp = info->thread_list.next;
1947                 list_del(tmp);
1948                 kfree(list_entry(tmp, struct elf_thread_status, list));
1949         }
1950
1951         /* Free data allocated by fill_files_note(): */
1952         vfree(info->notes[4].data);
1953
1954         kfree(info->prstatus);
1955         kfree(info->psinfo);
1956         kfree(info->notes);
1957         kfree(info->fpu);
1958 #ifdef ELF_CORE_COPY_XFPREGS
1959         kfree(info->xfpu);
1960 #endif
1961 }
1962
1963 #endif
1964
1965 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1966                                         struct vm_area_struct *gate_vma)
1967 {
1968         struct vm_area_struct *ret = tsk->mm->mmap;
1969
1970         if (ret)
1971                 return ret;
1972         return gate_vma;
1973 }
1974 /*
1975  * Helper function for iterating across a vma list.  It ensures that the caller
1976  * will visit `gate_vma' prior to terminating the search.
1977  */
1978 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1979                                         struct vm_area_struct *gate_vma)
1980 {
1981         struct vm_area_struct *ret;
1982
1983         ret = this_vma->vm_next;
1984         if (ret)
1985                 return ret;
1986         if (this_vma == gate_vma)
1987                 return NULL;
1988         return gate_vma;
1989 }
1990
1991 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1992                              elf_addr_t e_shoff, int segs)
1993 {
1994         elf->e_shoff = e_shoff;
1995         elf->e_shentsize = sizeof(*shdr4extnum);
1996         elf->e_shnum = 1;
1997         elf->e_shstrndx = SHN_UNDEF;
1998
1999         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2000
2001         shdr4extnum->sh_type = SHT_NULL;
2002         shdr4extnum->sh_size = elf->e_shnum;
2003         shdr4extnum->sh_link = elf->e_shstrndx;
2004         shdr4extnum->sh_info = segs;
2005 }
2006
2007 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
2008                                      unsigned long mm_flags)
2009 {
2010         struct vm_area_struct *vma;
2011         size_t size = 0;
2012
2013         for (vma = first_vma(current, gate_vma); vma != NULL;
2014              vma = next_vma(vma, gate_vma))
2015                 size += vma_dump_size(vma, mm_flags);
2016         return size;
2017 }
2018
2019 /*
2020  * Actual dumper
2021  *
2022  * This is a two-pass process; first we find the offsets of the bits,
2023  * and then they are actually written out.  If we run out of core limit
2024  * we just truncate.
2025  */
2026 static int elf_core_dump(struct coredump_params *cprm)
2027 {
2028         int has_dumped = 0;
2029         mm_segment_t fs;
2030         int segs;
2031         size_t size = 0;
2032         struct vm_area_struct *vma, *gate_vma;
2033         struct elfhdr *elf = NULL;
2034         loff_t offset = 0, dataoff, foffset;
2035         struct elf_note_info info;
2036         struct elf_phdr *phdr4note = NULL;
2037         struct elf_shdr *shdr4extnum = NULL;
2038         Elf_Half e_phnum;
2039         elf_addr_t e_shoff;
2040
2041         /*
2042          * We no longer stop all VM operations.
2043          * 
2044          * This is because those proceses that could possibly change map_count
2045          * or the mmap / vma pages are now blocked in do_exit on current
2046          * finishing this core dump.
2047          *
2048          * Only ptrace can touch these memory addresses, but it doesn't change
2049          * the map_count or the pages allocated. So no possibility of crashing
2050          * exists while dumping the mm->vm_next areas to the core file.
2051          */
2052   
2053         /* alloc memory for large data structures: too large to be on stack */
2054         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2055         if (!elf)
2056                 goto out;
2057         /*
2058          * The number of segs are recored into ELF header as 16bit value.
2059          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2060          */
2061         segs = current->mm->map_count;
2062         segs += elf_core_extra_phdrs();
2063
2064         gate_vma = get_gate_vma(current->mm);
2065         if (gate_vma != NULL)
2066                 segs++;
2067
2068         /* for notes section */
2069         segs++;
2070
2071         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2072          * this, kernel supports extended numbering. Have a look at
2073          * include/linux/elf.h for further information. */
2074         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2075
2076         /*
2077          * Collect all the non-memory information about the process for the
2078          * notes.  This also sets up the file header.
2079          */
2080         if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2081                 goto cleanup;
2082
2083         has_dumped = 1;
2084         current->flags |= PF_DUMPCORE;
2085   
2086         fs = get_fs();
2087         set_fs(KERNEL_DS);
2088
2089         offset += sizeof(*elf);                         /* Elf header */
2090         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
2091         foffset = offset;
2092
2093         /* Write notes phdr entry */
2094         {
2095                 size_t sz = get_note_info_size(&info);
2096
2097                 sz += elf_coredump_extra_notes_size();
2098
2099                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2100                 if (!phdr4note)
2101                         goto end_coredump;
2102
2103                 fill_elf_note_phdr(phdr4note, sz, offset);
2104                 offset += sz;
2105         }
2106
2107         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2108
2109         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
2110         offset += elf_core_extra_data_size();
2111         e_shoff = offset;
2112
2113         if (e_phnum == PN_XNUM) {
2114                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2115                 if (!shdr4extnum)
2116                         goto end_coredump;
2117                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2118         }
2119
2120         offset = dataoff;
2121
2122         size += sizeof(*elf);
2123         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2124                 goto end_coredump;
2125
2126         size += sizeof(*phdr4note);
2127         if (size > cprm->limit
2128             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2129                 goto end_coredump;
2130
2131         /* Write program headers for segments dump */
2132         for (vma = first_vma(current, gate_vma); vma != NULL;
2133                         vma = next_vma(vma, gate_vma)) {
2134                 struct elf_phdr phdr;
2135
2136                 phdr.p_type = PT_LOAD;
2137                 phdr.p_offset = offset;
2138                 phdr.p_vaddr = vma->vm_start;
2139                 phdr.p_paddr = 0;
2140                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2141                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2142                 offset += phdr.p_filesz;
2143                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2144                 if (vma->vm_flags & VM_WRITE)
2145                         phdr.p_flags |= PF_W;
2146                 if (vma->vm_flags & VM_EXEC)
2147                         phdr.p_flags |= PF_X;
2148                 phdr.p_align = ELF_EXEC_PAGESIZE;
2149
2150                 size += sizeof(phdr);
2151                 if (size > cprm->limit
2152                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2153                         goto end_coredump;
2154         }
2155
2156         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2157                 goto end_coredump;
2158
2159         /* write out the notes section */
2160         if (!write_note_info(&info, cprm->file, &foffset))
2161                 goto end_coredump;
2162
2163         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2164                 goto end_coredump;
2165
2166         /* Align to page */
2167         if (!dump_seek(cprm->file, dataoff - foffset))
2168                 goto end_coredump;
2169
2170         for (vma = first_vma(current, gate_vma); vma != NULL;
2171                         vma = next_vma(vma, gate_vma)) {
2172                 unsigned long addr;
2173                 unsigned long end;
2174
2175                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2176
2177                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2178                         struct page *page;
2179                         int stop;
2180
2181                         page = get_dump_page(addr);
2182                         if (page) {
2183                                 void *kaddr = kmap(page);
2184                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2185                                         !dump_write(cprm->file, kaddr,
2186                                                     PAGE_SIZE);
2187                                 kunmap(page);
2188                                 page_cache_release(page);
2189                         } else
2190                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2191                         if (stop)
2192                                 goto end_coredump;
2193                 }
2194         }
2195
2196         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2197                 goto end_coredump;
2198
2199         if (e_phnum == PN_XNUM) {
2200                 size += sizeof(*shdr4extnum);
2201                 if (size > cprm->limit
2202                     || !dump_write(cprm->file, shdr4extnum,
2203                                    sizeof(*shdr4extnum)))
2204                         goto end_coredump;
2205         }
2206
2207 end_coredump:
2208         set_fs(fs);
2209
2210 cleanup:
2211         free_note_info(&info);
2212         kfree(shdr4extnum);
2213         kfree(phdr4note);
2214         kfree(elf);
2215 out:
2216         return has_dumped;
2217 }
2218
2219 #endif          /* CONFIG_ELF_CORE */
2220
2221 static int __init init_elf_binfmt(void)
2222 {
2223         register_binfmt(&elf_format);
2224         return 0;
2225 }
2226
2227 static void __exit exit_elf_binfmt(void)
2228 {
2229         /* Remove the COFF and ELF loaders. */
2230         unregister_binfmt(&elf_format);
2231 }
2232
2233 core_initcall(init_elf_binfmt);
2234 module_exit(exit_elf_binfmt);
2235 MODULE_LICENSE("GPL");