2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
12 #include <linux/module.h>
13 #include <linux/kernel.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <asm/uaccess.h>
39 #include <asm/param.h>
43 #define user_long_t long
45 #ifndef user_siginfo_t
46 #define user_siginfo_t siginfo_t
49 static int load_elf_binary(struct linux_binprm *bprm);
50 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
51 int, int, unsigned long);
54 static int load_elf_library(struct file *);
56 #define load_elf_library NULL
60 * If we don't support core dumping, then supply a NULL so we
63 #ifdef CONFIG_ELF_CORE
64 static int elf_core_dump(struct coredump_params *cprm);
66 #define elf_core_dump NULL
69 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
70 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
72 #define ELF_MIN_ALIGN PAGE_SIZE
75 #ifndef ELF_CORE_EFLAGS
76 #define ELF_CORE_EFLAGS 0
79 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
80 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
81 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
83 static struct linux_binfmt elf_format = {
84 .module = THIS_MODULE,
85 .load_binary = load_elf_binary,
86 .load_shlib = load_elf_library,
87 .core_dump = elf_core_dump,
88 .min_coredump = ELF_EXEC_PAGESIZE,
91 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
93 static int set_brk(unsigned long start, unsigned long end)
95 start = ELF_PAGEALIGN(start);
96 end = ELF_PAGEALIGN(end);
99 addr = vm_brk(start, end - start);
103 current->mm->start_brk = current->mm->brk = end;
107 /* We need to explicitly zero any fractional pages
108 after the data section (i.e. bss). This would
109 contain the junk from the file that should not
112 static int padzero(unsigned long elf_bss)
116 nbyte = ELF_PAGEOFFSET(elf_bss);
118 nbyte = ELF_MIN_ALIGN - nbyte;
119 if (clear_user((void __user *) elf_bss, nbyte))
125 /* Let's use some macros to make this stack manipulation a little clearer */
126 #ifdef CONFIG_STACK_GROWSUP
127 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
128 #define STACK_ROUND(sp, items) \
129 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
130 #define STACK_ALLOC(sp, len) ({ \
131 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
134 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
135 #define STACK_ROUND(sp, items) \
136 (((unsigned long) (sp - items)) &~ 15UL)
137 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
140 #ifndef ELF_BASE_PLATFORM
142 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
143 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
144 * will be copied to the user stack in the same manner as AT_PLATFORM.
146 #define ELF_BASE_PLATFORM NULL
150 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
151 unsigned long load_addr, unsigned long interp_load_addr)
153 unsigned long p = bprm->p;
154 int argc = bprm->argc;
155 int envc = bprm->envc;
156 elf_addr_t __user *argv;
157 elf_addr_t __user *envp;
158 elf_addr_t __user *sp;
159 elf_addr_t __user *u_platform;
160 elf_addr_t __user *u_base_platform;
161 elf_addr_t __user *u_rand_bytes;
162 const char *k_platform = ELF_PLATFORM;
163 const char *k_base_platform = ELF_BASE_PLATFORM;
164 unsigned char k_rand_bytes[16];
166 elf_addr_t *elf_info;
168 const struct cred *cred = current_cred();
169 struct vm_area_struct *vma;
172 * In some cases (e.g. Hyper-Threading), we want to avoid L1
173 * evictions by the processes running on the same package. One
174 * thing we can do is to shuffle the initial stack for them.
177 p = arch_align_stack(p);
180 * If this architecture has a platform capability string, copy it
181 * to userspace. In some cases (Sparc), this info is impossible
182 * for userspace to get any other way, in others (i386) it is
187 size_t len = strlen(k_platform) + 1;
189 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
190 if (__copy_to_user(u_platform, k_platform, len))
195 * If this architecture has a "base" platform capability
196 * string, copy it to userspace.
198 u_base_platform = NULL;
199 if (k_base_platform) {
200 size_t len = strlen(k_base_platform) + 1;
202 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
203 if (__copy_to_user(u_base_platform, k_base_platform, len))
208 * Generate 16 random bytes for userspace PRNG seeding.
210 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
211 u_rand_bytes = (elf_addr_t __user *)
212 STACK_ALLOC(p, sizeof(k_rand_bytes));
213 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
216 /* Create the ELF interpreter info */
217 elf_info = (elf_addr_t *)current->mm->saved_auxv;
218 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
219 #define NEW_AUX_ENT(id, val) \
221 elf_info[ei_index++] = id; \
222 elf_info[ei_index++] = val; \
227 * ARCH_DLINFO must come first so PPC can do its special alignment of
229 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
230 * ARCH_DLINFO changes
234 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
235 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
236 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
237 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
238 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
239 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
240 NEW_AUX_ENT(AT_BASE, interp_load_addr);
241 NEW_AUX_ENT(AT_FLAGS, 0);
242 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
243 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
244 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
245 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
246 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
247 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
248 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
250 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
252 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
254 NEW_AUX_ENT(AT_PLATFORM,
255 (elf_addr_t)(unsigned long)u_platform);
257 if (k_base_platform) {
258 NEW_AUX_ENT(AT_BASE_PLATFORM,
259 (elf_addr_t)(unsigned long)u_base_platform);
261 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
262 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
265 /* AT_NULL is zero; clear the rest too */
266 memset(&elf_info[ei_index], 0,
267 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
269 /* And advance past the AT_NULL entry. */
272 sp = STACK_ADD(p, ei_index);
274 items = (argc + 1) + (envc + 1) + 1;
275 bprm->p = STACK_ROUND(sp, items);
277 /* Point sp at the lowest address on the stack */
278 #ifdef CONFIG_STACK_GROWSUP
279 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
280 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
282 sp = (elf_addr_t __user *)bprm->p;
287 * Grow the stack manually; some architectures have a limit on how
288 * far ahead a user-space access may be in order to grow the stack.
290 vma = find_extend_vma(current->mm, bprm->p);
294 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
295 if (__put_user(argc, sp++))
298 envp = argv + argc + 1;
300 /* Populate argv and envp */
301 p = current->mm->arg_end = current->mm->arg_start;
304 if (__put_user((elf_addr_t)p, argv++))
306 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
307 if (!len || len > MAX_ARG_STRLEN)
311 if (__put_user(0, argv))
313 current->mm->arg_end = current->mm->env_start = p;
316 if (__put_user((elf_addr_t)p, envp++))
318 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
319 if (!len || len > MAX_ARG_STRLEN)
323 if (__put_user(0, envp))
325 current->mm->env_end = p;
327 /* Put the elf_info on the stack in the right place. */
328 sp = (elf_addr_t __user *)envp + 1;
329 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
336 static unsigned long elf_map(struct file *filep, unsigned long addr,
337 struct elf_phdr *eppnt, int prot, int type,
338 unsigned long total_size)
340 unsigned long map_addr;
341 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
342 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
343 addr = ELF_PAGESTART(addr);
344 size = ELF_PAGEALIGN(size);
346 /* mmap() will return -EINVAL if given a zero size, but a
347 * segment with zero filesize is perfectly valid */
352 * total_size is the size of the ELF (interpreter) image.
353 * The _first_ mmap needs to know the full size, otherwise
354 * randomization might put this image into an overlapping
355 * position with the ELF binary image. (since size < total_size)
356 * So we first map the 'big' image - and unmap the remainder at
357 * the end. (which unmap is needed for ELF images with holes.)
360 total_size = ELF_PAGEALIGN(total_size);
361 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
362 if (!BAD_ADDR(map_addr))
363 vm_munmap(map_addr+size, total_size-size);
365 map_addr = vm_mmap(filep, addr, size, prot, type, off);
370 #endif /* !elf_map */
372 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
374 int i, first_idx = -1, last_idx = -1;
376 for (i = 0; i < nr; i++) {
377 if (cmds[i].p_type == PT_LOAD) {
386 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
387 ELF_PAGESTART(cmds[first_idx].p_vaddr);
391 * load_elf_phdrs() - load ELF program headers
392 * @elf_ex: ELF header of the binary whose program headers should be loaded
393 * @elf_file: the opened ELF binary file
395 * Loads ELF program headers from the binary file elf_file, which has the ELF
396 * header pointed to by elf_ex, into a newly allocated array. The caller is
397 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
399 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
400 struct file *elf_file)
402 struct elf_phdr *elf_phdata = NULL;
403 int retval, size, err = -1;
406 * If the size of this structure has changed, then punt, since
407 * we will be doing the wrong thing.
409 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
412 /* Sanity check the number of program headers... */
413 if (elf_ex->e_phnum < 1 ||
414 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
417 /* ...and their total size. */
418 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
419 if (size > ELF_MIN_ALIGN)
422 elf_phdata = kmalloc(size, GFP_KERNEL);
426 /* Read in the program headers */
427 retval = kernel_read(elf_file, elf_ex->e_phoff,
428 (char *)elf_phdata, size);
429 if (retval != size) {
430 err = (retval < 0) ? retval : -EIO;
444 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
447 * struct arch_elf_state - arch-specific ELF loading state
449 * This structure is used to preserve architecture specific data during
450 * the loading of an ELF file, throughout the checking of architecture
451 * specific ELF headers & through to the point where the ELF load is
452 * known to be proceeding (ie. SET_PERSONALITY).
454 * This implementation is a dummy for architectures which require no
457 struct arch_elf_state {
460 #define INIT_ARCH_ELF_STATE {}
463 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
464 * @ehdr: The main ELF header
465 * @phdr: The program header to check
466 * @elf: The open ELF file
467 * @is_interp: True if the phdr is from the interpreter of the ELF being
468 * loaded, else false.
469 * @state: Architecture-specific state preserved throughout the process
470 * of loading the ELF.
472 * Inspects the program header phdr to validate its correctness and/or
473 * suitability for the system. Called once per ELF program header in the
474 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
477 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
478 * with that return code.
480 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
481 struct elf_phdr *phdr,
482 struct file *elf, bool is_interp,
483 struct arch_elf_state *state)
485 /* Dummy implementation, always proceed */
490 * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
491 * @ehdr: The main ELF header
492 * @has_interp: True if the ELF has an interpreter, else false.
493 * @state: Architecture-specific state preserved throughout the process
494 * of loading the ELF.
496 * Provides a final opportunity for architecture code to reject the loading
497 * of the ELF & cause an exec syscall to return an error. This is called after
498 * all program headers to be checked by arch_elf_pt_proc have been.
500 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
501 * with that return code.
503 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
504 struct arch_elf_state *state)
506 /* Dummy implementation, always proceed */
510 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
512 /* This is much more generalized than the library routine read function,
513 so we keep this separate. Technically the library read function
514 is only provided so that we can read a.out libraries that have
517 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
518 struct file *interpreter, unsigned long *interp_map_addr,
519 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
521 struct elf_phdr *eppnt;
522 unsigned long load_addr = 0;
523 int load_addr_set = 0;
524 unsigned long last_bss = 0, elf_bss = 0;
525 unsigned long error = ~0UL;
526 unsigned long total_size;
529 /* First of all, some simple consistency checks */
530 if (interp_elf_ex->e_type != ET_EXEC &&
531 interp_elf_ex->e_type != ET_DYN)
533 if (!elf_check_arch(interp_elf_ex))
535 if (!interpreter->f_op->mmap)
538 total_size = total_mapping_size(interp_elf_phdata,
539 interp_elf_ex->e_phnum);
545 eppnt = interp_elf_phdata;
546 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
547 if (eppnt->p_type == PT_LOAD) {
548 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
550 unsigned long vaddr = 0;
551 unsigned long k, map_addr;
553 if (eppnt->p_flags & PF_R)
554 elf_prot = PROT_READ;
555 if (eppnt->p_flags & PF_W)
556 elf_prot |= PROT_WRITE;
557 if (eppnt->p_flags & PF_X)
558 elf_prot |= PROT_EXEC;
559 vaddr = eppnt->p_vaddr;
560 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
561 elf_type |= MAP_FIXED;
562 else if (no_base && interp_elf_ex->e_type == ET_DYN)
565 map_addr = elf_map(interpreter, load_addr + vaddr,
566 eppnt, elf_prot, elf_type, total_size);
568 if (!*interp_map_addr)
569 *interp_map_addr = map_addr;
571 if (BAD_ADDR(map_addr))
574 if (!load_addr_set &&
575 interp_elf_ex->e_type == ET_DYN) {
576 load_addr = map_addr - ELF_PAGESTART(vaddr);
581 * Check to see if the section's size will overflow the
582 * allowed task size. Note that p_filesz must always be
583 * <= p_memsize so it's only necessary to check p_memsz.
585 k = load_addr + eppnt->p_vaddr;
587 eppnt->p_filesz > eppnt->p_memsz ||
588 eppnt->p_memsz > TASK_SIZE ||
589 TASK_SIZE - eppnt->p_memsz < k) {
595 * Find the end of the file mapping for this phdr, and
596 * keep track of the largest address we see for this.
598 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
603 * Do the same thing for the memory mapping - between
604 * elf_bss and last_bss is the bss section.
606 k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
612 if (last_bss > elf_bss) {
614 * Now fill out the bss section. First pad the last page up
615 * to the page boundary, and then perform a mmap to make sure
616 * that there are zero-mapped pages up to and including the
619 if (padzero(elf_bss)) {
624 /* What we have mapped so far */
625 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
627 /* Map the last of the bss segment */
628 error = vm_brk(elf_bss, last_bss - elf_bss);
639 * These are the functions used to load ELF style executables and shared
640 * libraries. There is no binary dependent code anywhere else.
643 #ifndef STACK_RND_MASK
644 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
647 static unsigned long randomize_stack_top(unsigned long stack_top)
649 unsigned long random_variable = 0;
651 if ((current->flags & PF_RANDOMIZE) &&
652 !(current->personality & ADDR_NO_RANDOMIZE)) {
653 random_variable = (unsigned long) get_random_int();
654 random_variable &= STACK_RND_MASK;
655 random_variable <<= PAGE_SHIFT;
657 #ifdef CONFIG_STACK_GROWSUP
658 return PAGE_ALIGN(stack_top) + random_variable;
660 return PAGE_ALIGN(stack_top) - random_variable;
664 static int load_elf_binary(struct linux_binprm *bprm)
666 struct file *interpreter = NULL; /* to shut gcc up */
667 unsigned long load_addr = 0, load_bias = 0;
668 int load_addr_set = 0;
669 char * elf_interpreter = NULL;
671 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
672 unsigned long elf_bss, elf_brk;
674 unsigned long elf_entry;
675 unsigned long interp_load_addr = 0;
676 unsigned long start_code, end_code, start_data, end_data;
677 unsigned long reloc_func_desc __maybe_unused = 0;
678 int executable_stack = EXSTACK_DEFAULT;
679 struct pt_regs *regs = current_pt_regs();
681 struct elfhdr elf_ex;
682 struct elfhdr interp_elf_ex;
684 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
686 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
692 /* Get the exec-header */
693 loc->elf_ex = *((struct elfhdr *)bprm->buf);
696 /* First of all, some simple consistency checks */
697 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
700 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
702 if (!elf_check_arch(&loc->elf_ex))
704 if (!bprm->file->f_op->mmap)
707 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
711 elf_ppnt = elf_phdata;
720 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
721 if (elf_ppnt->p_type == PT_INTERP) {
722 /* This is the program interpreter used for
723 * shared libraries - for now assume that this
724 * is an a.out format binary
727 if (elf_ppnt->p_filesz > PATH_MAX ||
728 elf_ppnt->p_filesz < 2)
732 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
734 if (!elf_interpreter)
737 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
740 if (retval != elf_ppnt->p_filesz) {
743 goto out_free_interp;
745 /* make sure path is NULL terminated */
747 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
748 goto out_free_interp;
750 interpreter = open_exec(elf_interpreter);
751 retval = PTR_ERR(interpreter);
752 if (IS_ERR(interpreter))
753 goto out_free_interp;
756 * If the binary is not readable then enforce
757 * mm->dumpable = 0 regardless of the interpreter's
760 would_dump(bprm, interpreter);
762 retval = kernel_read(interpreter, 0, bprm->buf,
764 if (retval != BINPRM_BUF_SIZE) {
767 goto out_free_dentry;
770 /* Get the exec headers */
771 loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
777 elf_ppnt = elf_phdata;
778 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
779 switch (elf_ppnt->p_type) {
781 if (elf_ppnt->p_flags & PF_X)
782 executable_stack = EXSTACK_ENABLE_X;
784 executable_stack = EXSTACK_DISABLE_X;
787 case PT_LOPROC ... PT_HIPROC:
788 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
792 goto out_free_dentry;
796 /* Some simple consistency checks for the interpreter */
797 if (elf_interpreter) {
799 /* Not an ELF interpreter */
800 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
801 goto out_free_dentry;
802 /* Verify the interpreter has a valid arch */
803 if (!elf_check_arch(&loc->interp_elf_ex))
804 goto out_free_dentry;
806 /* Load the interpreter program headers */
807 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
809 if (!interp_elf_phdata)
810 goto out_free_dentry;
812 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
813 elf_ppnt = interp_elf_phdata;
814 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
815 switch (elf_ppnt->p_type) {
816 case PT_LOPROC ... PT_HIPROC:
817 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
818 elf_ppnt, interpreter,
821 goto out_free_dentry;
827 * Allow arch code to reject the ELF at this point, whilst it's
828 * still possible to return an error to the code that invoked
831 retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
833 goto out_free_dentry;
835 /* Flush all traces of the currently running executable */
836 retval = flush_old_exec(bprm);
838 goto out_free_dentry;
840 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
841 may depend on the personality. */
842 SET_PERSONALITY2(loc->elf_ex, &arch_state);
843 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
844 current->personality |= READ_IMPLIES_EXEC;
846 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
847 current->flags |= PF_RANDOMIZE;
849 setup_new_exec(bprm);
851 /* Do this so that we can load the interpreter, if need be. We will
852 change some of these later */
853 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
856 goto out_free_dentry;
858 current->mm->start_stack = bprm->p;
860 /* Now we do a little grungy work by mmapping the ELF image into
861 the correct location in memory. */
862 for(i = 0, elf_ppnt = elf_phdata;
863 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
864 int elf_prot = 0, elf_flags;
865 unsigned long k, vaddr;
866 unsigned long total_size = 0;
868 if (elf_ppnt->p_type != PT_LOAD)
871 if (unlikely (elf_brk > elf_bss)) {
874 /* There was a PT_LOAD segment with p_memsz > p_filesz
875 before this one. Map anonymous pages, if needed,
876 and clear the area. */
877 retval = set_brk(elf_bss + load_bias,
878 elf_brk + load_bias);
880 goto out_free_dentry;
881 nbyte = ELF_PAGEOFFSET(elf_bss);
883 nbyte = ELF_MIN_ALIGN - nbyte;
884 if (nbyte > elf_brk - elf_bss)
885 nbyte = elf_brk - elf_bss;
886 if (clear_user((void __user *)elf_bss +
889 * This bss-zeroing can fail if the ELF
890 * file specifies odd protections. So
891 * we don't check the return value
897 if (elf_ppnt->p_flags & PF_R)
898 elf_prot |= PROT_READ;
899 if (elf_ppnt->p_flags & PF_W)
900 elf_prot |= PROT_WRITE;
901 if (elf_ppnt->p_flags & PF_X)
902 elf_prot |= PROT_EXEC;
904 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
906 vaddr = elf_ppnt->p_vaddr;
907 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
908 elf_flags |= MAP_FIXED;
909 } else if (loc->elf_ex.e_type == ET_DYN) {
910 /* Try and get dynamic programs out of the way of the
911 * default mmap base, as well as whatever program they
912 * might try to exec. This is because the brk will
913 * follow the loader, and is not movable. */
914 load_bias = ELF_ET_DYN_BASE - vaddr;
915 if (current->flags & PF_RANDOMIZE)
916 load_bias += arch_mmap_rnd();
917 load_bias = ELF_PAGESTART(load_bias);
918 total_size = total_mapping_size(elf_phdata,
919 loc->elf_ex.e_phnum);
922 goto out_free_dentry;
926 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
927 elf_prot, elf_flags, total_size);
928 if (BAD_ADDR(error)) {
929 retval = IS_ERR((void *)error) ?
930 PTR_ERR((void*)error) : -EINVAL;
931 goto out_free_dentry;
934 if (!load_addr_set) {
936 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
937 if (loc->elf_ex.e_type == ET_DYN) {
939 ELF_PAGESTART(load_bias + vaddr);
940 load_addr += load_bias;
941 reloc_func_desc = load_bias;
944 k = elf_ppnt->p_vaddr;
951 * Check to see if the section's size will overflow the
952 * allowed task size. Note that p_filesz must always be
953 * <= p_memsz so it is only necessary to check p_memsz.
955 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
956 elf_ppnt->p_memsz > TASK_SIZE ||
957 TASK_SIZE - elf_ppnt->p_memsz < k) {
958 /* set_brk can never work. Avoid overflows. */
960 goto out_free_dentry;
963 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
967 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
971 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
976 loc->elf_ex.e_entry += load_bias;
977 elf_bss += load_bias;
978 elf_brk += load_bias;
979 start_code += load_bias;
980 end_code += load_bias;
981 start_data += load_bias;
982 end_data += load_bias;
984 /* Calling set_brk effectively mmaps the pages that we need
985 * for the bss and break sections. We must do this before
986 * mapping in the interpreter, to make sure it doesn't wind
987 * up getting placed where the bss needs to go.
989 retval = set_brk(elf_bss, elf_brk);
991 goto out_free_dentry;
992 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
993 retval = -EFAULT; /* Nobody gets to see this, but.. */
994 goto out_free_dentry;
997 if (elf_interpreter) {
998 unsigned long interp_map_addr = 0;
1000 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1003 load_bias, interp_elf_phdata);
1004 if (!IS_ERR((void *)elf_entry)) {
1006 * load_elf_interp() returns relocation
1009 interp_load_addr = elf_entry;
1010 elf_entry += loc->interp_elf_ex.e_entry;
1012 if (BAD_ADDR(elf_entry)) {
1013 retval = IS_ERR((void *)elf_entry) ?
1014 (int)elf_entry : -EINVAL;
1015 goto out_free_dentry;
1017 reloc_func_desc = interp_load_addr;
1019 allow_write_access(interpreter);
1021 kfree(elf_interpreter);
1023 elf_entry = loc->elf_ex.e_entry;
1024 if (BAD_ADDR(elf_entry)) {
1026 goto out_free_dentry;
1030 kfree(interp_elf_phdata);
1033 set_binfmt(&elf_format);
1035 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1036 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1039 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1041 install_exec_creds(bprm);
1042 retval = create_elf_tables(bprm, &loc->elf_ex,
1043 load_addr, interp_load_addr);
1046 /* N.B. passed_fileno might not be initialized? */
1047 current->mm->end_code = end_code;
1048 current->mm->start_code = start_code;
1049 current->mm->start_data = start_data;
1050 current->mm->end_data = end_data;
1051 current->mm->start_stack = bprm->p;
1053 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1054 current->mm->brk = current->mm->start_brk =
1055 arch_randomize_brk(current->mm);
1056 #ifdef compat_brk_randomized
1057 current->brk_randomized = 1;
1061 if (current->personality & MMAP_PAGE_ZERO) {
1062 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1063 and some applications "depend" upon this behavior.
1064 Since we do not have the power to recompile these, we
1065 emulate the SVr4 behavior. Sigh. */
1066 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1067 MAP_FIXED | MAP_PRIVATE, 0);
1070 #ifdef ELF_PLAT_INIT
1072 * The ABI may specify that certain registers be set up in special
1073 * ways (on i386 %edx is the address of a DT_FINI function, for
1074 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1075 * that the e_entry field is the address of the function descriptor
1076 * for the startup routine, rather than the address of the startup
1077 * routine itself. This macro performs whatever initialization to
1078 * the regs structure is required as well as any relocations to the
1079 * function descriptor entries when executing dynamically links apps.
1081 ELF_PLAT_INIT(regs, reloc_func_desc);
1084 start_thread(regs, elf_entry, bprm->p);
1093 kfree(interp_elf_phdata);
1094 allow_write_access(interpreter);
1098 kfree(elf_interpreter);
1104 #ifdef CONFIG_USELIB
1105 /* This is really simpleminded and specialized - we are loading an
1106 a.out library that is given an ELF header. */
1107 static int load_elf_library(struct file *file)
1109 struct elf_phdr *elf_phdata;
1110 struct elf_phdr *eppnt;
1111 unsigned long elf_bss, bss, len;
1112 int retval, error, i, j;
1113 struct elfhdr elf_ex;
1116 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1117 if (retval != sizeof(elf_ex))
1120 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1123 /* First of all, some simple consistency checks */
1124 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1125 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1128 /* Now read in all of the header information */
1130 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1131 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1134 elf_phdata = kmalloc(j, GFP_KERNEL);
1140 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1144 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1145 if ((eppnt + i)->p_type == PT_LOAD)
1150 while (eppnt->p_type != PT_LOAD)
1153 /* Now use mmap to map the library into memory. */
1154 error = vm_mmap(file,
1155 ELF_PAGESTART(eppnt->p_vaddr),
1157 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1158 PROT_READ | PROT_WRITE | PROT_EXEC,
1159 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1161 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1162 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1165 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1166 if (padzero(elf_bss)) {
1171 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1173 bss = eppnt->p_memsz + eppnt->p_vaddr;
1175 vm_brk(len, bss - len);
1183 #endif /* #ifdef CONFIG_USELIB */
1185 #ifdef CONFIG_ELF_CORE
1189 * Modelled on fs/exec.c:aout_core_dump()
1190 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1194 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1195 * that are useful for post-mortem analysis are included in every core dump.
1196 * In that way we ensure that the core dump is fully interpretable later
1197 * without matching up the same kernel and hardware config to see what PC values
1198 * meant. These special mappings include - vDSO, vsyscall, and other
1199 * architecture specific mappings
1201 static bool always_dump_vma(struct vm_area_struct *vma)
1203 /* Any vsyscall mappings? */
1204 if (vma == get_gate_vma(vma->vm_mm))
1208 * Assume that all vmas with a .name op should always be dumped.
1209 * If this changes, a new vm_ops field can easily be added.
1211 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1215 * arch_vma_name() returns non-NULL for special architecture mappings,
1216 * such as vDSO sections.
1218 if (arch_vma_name(vma))
1225 * Decide what to dump of a segment, part, all or none.
1227 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1228 unsigned long mm_flags)
1230 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1232 /* always dump the vdso and vsyscall sections */
1233 if (always_dump_vma(vma))
1236 if (vma->vm_flags & VM_DONTDUMP)
1239 /* Hugetlb memory check */
1240 if (vma->vm_flags & VM_HUGETLB) {
1241 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1243 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1248 /* Do not dump I/O mapped devices or special mappings */
1249 if (vma->vm_flags & VM_IO)
1252 /* By default, dump shared memory if mapped from an anonymous file. */
1253 if (vma->vm_flags & VM_SHARED) {
1254 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1255 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1260 /* Dump segments that have been written to. */
1261 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1263 if (vma->vm_file == NULL)
1266 if (FILTER(MAPPED_PRIVATE))
1270 * If this looks like the beginning of a DSO or executable mapping,
1271 * check for an ELF header. If we find one, dump the first page to
1272 * aid in determining what was mapped here.
1274 if (FILTER(ELF_HEADERS) &&
1275 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1276 u32 __user *header = (u32 __user *) vma->vm_start;
1278 mm_segment_t fs = get_fs();
1280 * Doing it this way gets the constant folded by GCC.
1284 char elfmag[SELFMAG];
1286 BUILD_BUG_ON(SELFMAG != sizeof word);
1287 magic.elfmag[EI_MAG0] = ELFMAG0;
1288 magic.elfmag[EI_MAG1] = ELFMAG1;
1289 magic.elfmag[EI_MAG2] = ELFMAG2;
1290 magic.elfmag[EI_MAG3] = ELFMAG3;
1292 * Switch to the user "segment" for get_user(),
1293 * then put back what elf_core_dump() had in place.
1296 if (unlikely(get_user(word, header)))
1299 if (word == magic.cmp)
1308 return vma->vm_end - vma->vm_start;
1311 /* An ELF note in memory */
1316 unsigned int datasz;
1320 static int notesize(struct memelfnote *en)
1324 sz = sizeof(struct elf_note);
1325 sz += roundup(strlen(en->name) + 1, 4);
1326 sz += roundup(en->datasz, 4);
1331 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1334 en.n_namesz = strlen(men->name) + 1;
1335 en.n_descsz = men->datasz;
1336 en.n_type = men->type;
1338 return dump_emit(cprm, &en, sizeof(en)) &&
1339 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1340 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1343 static void fill_elf_header(struct elfhdr *elf, int segs,
1344 u16 machine, u32 flags)
1346 memset(elf, 0, sizeof(*elf));
1348 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1349 elf->e_ident[EI_CLASS] = ELF_CLASS;
1350 elf->e_ident[EI_DATA] = ELF_DATA;
1351 elf->e_ident[EI_VERSION] = EV_CURRENT;
1352 elf->e_ident[EI_OSABI] = ELF_OSABI;
1354 elf->e_type = ET_CORE;
1355 elf->e_machine = machine;
1356 elf->e_version = EV_CURRENT;
1357 elf->e_phoff = sizeof(struct elfhdr);
1358 elf->e_flags = flags;
1359 elf->e_ehsize = sizeof(struct elfhdr);
1360 elf->e_phentsize = sizeof(struct elf_phdr);
1361 elf->e_phnum = segs;
1366 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1368 phdr->p_type = PT_NOTE;
1369 phdr->p_offset = offset;
1372 phdr->p_filesz = sz;
1379 static void fill_note(struct memelfnote *note, const char *name, int type,
1380 unsigned int sz, void *data)
1390 * fill up all the fields in prstatus from the given task struct, except
1391 * registers which need to be filled up separately.
1393 static void fill_prstatus(struct elf_prstatus *prstatus,
1394 struct task_struct *p, long signr)
1396 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1397 prstatus->pr_sigpend = p->pending.signal.sig[0];
1398 prstatus->pr_sighold = p->blocked.sig[0];
1400 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1402 prstatus->pr_pid = task_pid_vnr(p);
1403 prstatus->pr_pgrp = task_pgrp_vnr(p);
1404 prstatus->pr_sid = task_session_vnr(p);
1405 if (thread_group_leader(p)) {
1406 struct task_cputime cputime;
1409 * This is the record for the group leader. It shows the
1410 * group-wide total, not its individual thread total.
1412 thread_group_cputime(p, &cputime);
1413 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1414 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1416 cputime_t utime, stime;
1418 task_cputime(p, &utime, &stime);
1419 cputime_to_timeval(utime, &prstatus->pr_utime);
1420 cputime_to_timeval(stime, &prstatus->pr_stime);
1422 cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1423 cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1426 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1427 struct mm_struct *mm)
1429 const struct cred *cred;
1430 unsigned int i, len;
1432 /* first copy the parameters from user space */
1433 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1435 len = mm->arg_end - mm->arg_start;
1436 if (len >= ELF_PRARGSZ)
1437 len = ELF_PRARGSZ-1;
1438 if (copy_from_user(&psinfo->pr_psargs,
1439 (const char __user *)mm->arg_start, len))
1441 for(i = 0; i < len; i++)
1442 if (psinfo->pr_psargs[i] == 0)
1443 psinfo->pr_psargs[i] = ' ';
1444 psinfo->pr_psargs[len] = 0;
1447 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1449 psinfo->pr_pid = task_pid_vnr(p);
1450 psinfo->pr_pgrp = task_pgrp_vnr(p);
1451 psinfo->pr_sid = task_session_vnr(p);
1453 i = p->state ? ffz(~p->state) + 1 : 0;
1454 psinfo->pr_state = i;
1455 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1456 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1457 psinfo->pr_nice = task_nice(p);
1458 psinfo->pr_flag = p->flags;
1460 cred = __task_cred(p);
1461 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1462 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1464 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1469 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1471 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1475 while (auxv[i - 2] != AT_NULL);
1476 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1479 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1480 const siginfo_t *siginfo)
1482 mm_segment_t old_fs = get_fs();
1484 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1486 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1489 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1491 * Format of NT_FILE note:
1493 * long count -- how many files are mapped
1494 * long page_size -- units for file_ofs
1495 * array of [COUNT] elements of
1499 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1501 static int fill_files_note(struct memelfnote *note)
1503 struct vm_area_struct *vma;
1504 unsigned count, size, names_ofs, remaining, n;
1506 user_long_t *start_end_ofs;
1507 char *name_base, *name_curpos;
1509 /* *Estimated* file count and total data size needed */
1510 count = current->mm->map_count;
1513 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1515 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1517 size = round_up(size, PAGE_SIZE);
1518 data = vmalloc(size);
1522 start_end_ofs = data + 2;
1523 name_base = name_curpos = ((char *)data) + names_ofs;
1524 remaining = size - names_ofs;
1526 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1528 const char *filename;
1530 file = vma->vm_file;
1533 filename = d_path(&file->f_path, name_curpos, remaining);
1534 if (IS_ERR(filename)) {
1535 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1537 size = size * 5 / 4;
1543 /* d_path() fills at the end, move name down */
1544 /* n = strlen(filename) + 1: */
1545 n = (name_curpos + remaining) - filename;
1546 remaining = filename - name_curpos;
1547 memmove(name_curpos, filename, n);
1550 *start_end_ofs++ = vma->vm_start;
1551 *start_end_ofs++ = vma->vm_end;
1552 *start_end_ofs++ = vma->vm_pgoff;
1556 /* Now we know exact count of files, can store it */
1558 data[1] = PAGE_SIZE;
1560 * Count usually is less than current->mm->map_count,
1561 * we need to move filenames down.
1563 n = current->mm->map_count - count;
1565 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1566 memmove(name_base - shift_bytes, name_base,
1567 name_curpos - name_base);
1568 name_curpos -= shift_bytes;
1571 size = name_curpos - (char *)data;
1572 fill_note(note, "CORE", NT_FILE, size, data);
1576 #ifdef CORE_DUMP_USE_REGSET
1577 #include <linux/regset.h>
1579 struct elf_thread_core_info {
1580 struct elf_thread_core_info *next;
1581 struct task_struct *task;
1582 struct elf_prstatus prstatus;
1583 struct memelfnote notes[0];
1586 struct elf_note_info {
1587 struct elf_thread_core_info *thread;
1588 struct memelfnote psinfo;
1589 struct memelfnote signote;
1590 struct memelfnote auxv;
1591 struct memelfnote files;
1592 user_siginfo_t csigdata;
1598 * When a regset has a writeback hook, we call it on each thread before
1599 * dumping user memory. On register window machines, this makes sure the
1600 * user memory backing the register data is up to date before we read it.
1602 static void do_thread_regset_writeback(struct task_struct *task,
1603 const struct user_regset *regset)
1605 if (regset->writeback)
1606 regset->writeback(task, regset, 1);
1610 #define PR_REG_SIZE(S) sizeof(S)
1613 #ifndef PRSTATUS_SIZE
1614 #define PRSTATUS_SIZE(S) sizeof(S)
1618 #define PR_REG_PTR(S) (&((S)->pr_reg))
1621 #ifndef SET_PR_FPVALID
1622 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1625 static int fill_thread_core_info(struct elf_thread_core_info *t,
1626 const struct user_regset_view *view,
1627 long signr, size_t *total)
1632 * NT_PRSTATUS is the one special case, because the regset data
1633 * goes into the pr_reg field inside the note contents, rather
1634 * than being the whole note contents. We fill the reset in here.
1635 * We assume that regset 0 is NT_PRSTATUS.
1637 fill_prstatus(&t->prstatus, t->task, signr);
1638 (void) view->regsets[0].get(t->task, &view->regsets[0],
1639 0, PR_REG_SIZE(t->prstatus.pr_reg),
1640 PR_REG_PTR(&t->prstatus), NULL);
1642 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1643 PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1644 *total += notesize(&t->notes[0]);
1646 do_thread_regset_writeback(t->task, &view->regsets[0]);
1649 * Each other regset might generate a note too. For each regset
1650 * that has no core_note_type or is inactive, we leave t->notes[i]
1651 * all zero and we'll know to skip writing it later.
1653 for (i = 1; i < view->n; ++i) {
1654 const struct user_regset *regset = &view->regsets[i];
1655 do_thread_regset_writeback(t->task, regset);
1656 if (regset->core_note_type && regset->get &&
1657 (!regset->active || regset->active(t->task, regset))) {
1659 size_t size = regset->n * regset->size;
1660 void *data = kmalloc(size, GFP_KERNEL);
1661 if (unlikely(!data))
1663 ret = regset->get(t->task, regset,
1664 0, size, data, NULL);
1668 if (regset->core_note_type != NT_PRFPREG)
1669 fill_note(&t->notes[i], "LINUX",
1670 regset->core_note_type,
1673 SET_PR_FPVALID(&t->prstatus, 1);
1674 fill_note(&t->notes[i], "CORE",
1675 NT_PRFPREG, size, data);
1677 *total += notesize(&t->notes[i]);
1685 static int fill_note_info(struct elfhdr *elf, int phdrs,
1686 struct elf_note_info *info,
1687 const siginfo_t *siginfo, struct pt_regs *regs)
1689 struct task_struct *dump_task = current;
1690 const struct user_regset_view *view = task_user_regset_view(dump_task);
1691 struct elf_thread_core_info *t;
1692 struct elf_prpsinfo *psinfo;
1693 struct core_thread *ct;
1697 info->thread = NULL;
1699 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1700 if (psinfo == NULL) {
1701 info->psinfo.data = NULL; /* So we don't free this wrongly */
1705 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1708 * Figure out how many notes we're going to need for each thread.
1710 info->thread_notes = 0;
1711 for (i = 0; i < view->n; ++i)
1712 if (view->regsets[i].core_note_type != 0)
1713 ++info->thread_notes;
1716 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1717 * since it is our one special case.
1719 if (unlikely(info->thread_notes == 0) ||
1720 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1726 * Initialize the ELF file header.
1728 fill_elf_header(elf, phdrs,
1729 view->e_machine, view->e_flags);
1732 * Allocate a structure for each thread.
1734 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1735 t = kzalloc(offsetof(struct elf_thread_core_info,
1736 notes[info->thread_notes]),
1742 if (ct->task == dump_task || !info->thread) {
1743 t->next = info->thread;
1747 * Make sure to keep the original task at
1748 * the head of the list.
1750 t->next = info->thread->next;
1751 info->thread->next = t;
1756 * Now fill in each thread's information.
1758 for (t = info->thread; t != NULL; t = t->next)
1759 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1763 * Fill in the two process-wide notes.
1765 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1766 info->size += notesize(&info->psinfo);
1768 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1769 info->size += notesize(&info->signote);
1771 fill_auxv_note(&info->auxv, current->mm);
1772 info->size += notesize(&info->auxv);
1774 if (fill_files_note(&info->files) == 0)
1775 info->size += notesize(&info->files);
1780 static size_t get_note_info_size(struct elf_note_info *info)
1786 * Write all the notes for each thread. When writing the first thread, the
1787 * process-wide notes are interleaved after the first thread-specific note.
1789 static int write_note_info(struct elf_note_info *info,
1790 struct coredump_params *cprm)
1793 struct elf_thread_core_info *t = info->thread;
1798 if (!writenote(&t->notes[0], cprm))
1801 if (first && !writenote(&info->psinfo, cprm))
1803 if (first && !writenote(&info->signote, cprm))
1805 if (first && !writenote(&info->auxv, cprm))
1807 if (first && info->files.data &&
1808 !writenote(&info->files, cprm))
1811 for (i = 1; i < info->thread_notes; ++i)
1812 if (t->notes[i].data &&
1813 !writenote(&t->notes[i], cprm))
1823 static void free_note_info(struct elf_note_info *info)
1825 struct elf_thread_core_info *threads = info->thread;
1828 struct elf_thread_core_info *t = threads;
1830 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1831 for (i = 1; i < info->thread_notes; ++i)
1832 kfree(t->notes[i].data);
1835 kfree(info->psinfo.data);
1836 vfree(info->files.data);
1841 /* Here is the structure in which status of each thread is captured. */
1842 struct elf_thread_status
1844 struct list_head list;
1845 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1846 elf_fpregset_t fpu; /* NT_PRFPREG */
1847 struct task_struct *thread;
1848 #ifdef ELF_CORE_COPY_XFPREGS
1849 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1851 struct memelfnote notes[3];
1856 * In order to add the specific thread information for the elf file format,
1857 * we need to keep a linked list of every threads pr_status and then create
1858 * a single section for them in the final core file.
1860 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1863 struct task_struct *p = t->thread;
1866 fill_prstatus(&t->prstatus, p, signr);
1867 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1869 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1872 sz += notesize(&t->notes[0]);
1874 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1876 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1879 sz += notesize(&t->notes[1]);
1882 #ifdef ELF_CORE_COPY_XFPREGS
1883 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1884 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1885 sizeof(t->xfpu), &t->xfpu);
1887 sz += notesize(&t->notes[2]);
1893 struct elf_note_info {
1894 struct memelfnote *notes;
1895 struct memelfnote *notes_files;
1896 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1897 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1898 struct list_head thread_list;
1899 elf_fpregset_t *fpu;
1900 #ifdef ELF_CORE_COPY_XFPREGS
1901 elf_fpxregset_t *xfpu;
1903 user_siginfo_t csigdata;
1904 int thread_status_size;
1908 static int elf_note_info_init(struct elf_note_info *info)
1910 memset(info, 0, sizeof(*info));
1911 INIT_LIST_HEAD(&info->thread_list);
1913 /* Allocate space for ELF notes */
1914 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1917 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1920 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1921 if (!info->prstatus)
1923 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1926 #ifdef ELF_CORE_COPY_XFPREGS
1927 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1934 static int fill_note_info(struct elfhdr *elf, int phdrs,
1935 struct elf_note_info *info,
1936 const siginfo_t *siginfo, struct pt_regs *regs)
1938 struct list_head *t;
1939 struct core_thread *ct;
1940 struct elf_thread_status *ets;
1942 if (!elf_note_info_init(info))
1945 for (ct = current->mm->core_state->dumper.next;
1946 ct; ct = ct->next) {
1947 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1951 ets->thread = ct->task;
1952 list_add(&ets->list, &info->thread_list);
1955 list_for_each(t, &info->thread_list) {
1958 ets = list_entry(t, struct elf_thread_status, list);
1959 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1960 info->thread_status_size += sz;
1962 /* now collect the dump for the current */
1963 memset(info->prstatus, 0, sizeof(*info->prstatus));
1964 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1965 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1968 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
1971 * Set up the notes in similar form to SVR4 core dumps made
1972 * with info from their /proc.
1975 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1976 sizeof(*info->prstatus), info->prstatus);
1977 fill_psinfo(info->psinfo, current->group_leader, current->mm);
1978 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1979 sizeof(*info->psinfo), info->psinfo);
1981 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
1982 fill_auxv_note(info->notes + 3, current->mm);
1985 if (fill_files_note(info->notes + info->numnote) == 0) {
1986 info->notes_files = info->notes + info->numnote;
1990 /* Try to dump the FPU. */
1991 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1993 if (info->prstatus->pr_fpvalid)
1994 fill_note(info->notes + info->numnote++,
1995 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1996 #ifdef ELF_CORE_COPY_XFPREGS
1997 if (elf_core_copy_task_xfpregs(current, info->xfpu))
1998 fill_note(info->notes + info->numnote++,
1999 "LINUX", ELF_CORE_XFPREG_TYPE,
2000 sizeof(*info->xfpu), info->xfpu);
2006 static size_t get_note_info_size(struct elf_note_info *info)
2011 for (i = 0; i < info->numnote; i++)
2012 sz += notesize(info->notes + i);
2014 sz += info->thread_status_size;
2019 static int write_note_info(struct elf_note_info *info,
2020 struct coredump_params *cprm)
2023 struct list_head *t;
2025 for (i = 0; i < info->numnote; i++)
2026 if (!writenote(info->notes + i, cprm))
2029 /* write out the thread status notes section */
2030 list_for_each(t, &info->thread_list) {
2031 struct elf_thread_status *tmp =
2032 list_entry(t, struct elf_thread_status, list);
2034 for (i = 0; i < tmp->num_notes; i++)
2035 if (!writenote(&tmp->notes[i], cprm))
2042 static void free_note_info(struct elf_note_info *info)
2044 while (!list_empty(&info->thread_list)) {
2045 struct list_head *tmp = info->thread_list.next;
2047 kfree(list_entry(tmp, struct elf_thread_status, list));
2050 /* Free data possibly allocated by fill_files_note(): */
2051 if (info->notes_files)
2052 vfree(info->notes_files->data);
2054 kfree(info->prstatus);
2055 kfree(info->psinfo);
2058 #ifdef ELF_CORE_COPY_XFPREGS
2065 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2066 struct vm_area_struct *gate_vma)
2068 struct vm_area_struct *ret = tsk->mm->mmap;
2075 * Helper function for iterating across a vma list. It ensures that the caller
2076 * will visit `gate_vma' prior to terminating the search.
2078 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2079 struct vm_area_struct *gate_vma)
2081 struct vm_area_struct *ret;
2083 ret = this_vma->vm_next;
2086 if (this_vma == gate_vma)
2091 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2092 elf_addr_t e_shoff, int segs)
2094 elf->e_shoff = e_shoff;
2095 elf->e_shentsize = sizeof(*shdr4extnum);
2097 elf->e_shstrndx = SHN_UNDEF;
2099 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2101 shdr4extnum->sh_type = SHT_NULL;
2102 shdr4extnum->sh_size = elf->e_shnum;
2103 shdr4extnum->sh_link = elf->e_shstrndx;
2104 shdr4extnum->sh_info = segs;
2110 * This is a two-pass process; first we find the offsets of the bits,
2111 * and then they are actually written out. If we run out of core limit
2114 static int elf_core_dump(struct coredump_params *cprm)
2119 size_t vma_data_size = 0;
2120 struct vm_area_struct *vma, *gate_vma;
2121 struct elfhdr *elf = NULL;
2122 loff_t offset = 0, dataoff;
2123 struct elf_note_info info = { };
2124 struct elf_phdr *phdr4note = NULL;
2125 struct elf_shdr *shdr4extnum = NULL;
2128 elf_addr_t *vma_filesz = NULL;
2131 * We no longer stop all VM operations.
2133 * This is because those proceses that could possibly change map_count
2134 * or the mmap / vma pages are now blocked in do_exit on current
2135 * finishing this core dump.
2137 * Only ptrace can touch these memory addresses, but it doesn't change
2138 * the map_count or the pages allocated. So no possibility of crashing
2139 * exists while dumping the mm->vm_next areas to the core file.
2142 /* alloc memory for large data structures: too large to be on stack */
2143 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2147 * The number of segs are recored into ELF header as 16bit value.
2148 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2150 segs = current->mm->map_count;
2151 segs += elf_core_extra_phdrs();
2153 gate_vma = get_gate_vma(current->mm);
2154 if (gate_vma != NULL)
2157 /* for notes section */
2160 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2161 * this, kernel supports extended numbering. Have a look at
2162 * include/linux/elf.h for further information. */
2163 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2166 * Collect all the non-memory information about the process for the
2167 * notes. This also sets up the file header.
2169 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2177 offset += sizeof(*elf); /* Elf header */
2178 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2180 /* Write notes phdr entry */
2182 size_t sz = get_note_info_size(&info);
2184 sz += elf_coredump_extra_notes_size();
2186 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2190 fill_elf_note_phdr(phdr4note, sz, offset);
2194 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2196 vma_filesz = kmalloc_array(segs - 1, sizeof(*vma_filesz), GFP_KERNEL);
2200 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2201 vma = next_vma(vma, gate_vma)) {
2202 unsigned long dump_size;
2204 dump_size = vma_dump_size(vma, cprm->mm_flags);
2205 vma_filesz[i++] = dump_size;
2206 vma_data_size += dump_size;
2209 offset += vma_data_size;
2210 offset += elf_core_extra_data_size();
2213 if (e_phnum == PN_XNUM) {
2214 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2217 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2222 if (!dump_emit(cprm, elf, sizeof(*elf)))
2225 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2228 /* Write program headers for segments dump */
2229 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2230 vma = next_vma(vma, gate_vma)) {
2231 struct elf_phdr phdr;
2233 phdr.p_type = PT_LOAD;
2234 phdr.p_offset = offset;
2235 phdr.p_vaddr = vma->vm_start;
2237 phdr.p_filesz = vma_filesz[i++];
2238 phdr.p_memsz = vma->vm_end - vma->vm_start;
2239 offset += phdr.p_filesz;
2240 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2241 if (vma->vm_flags & VM_WRITE)
2242 phdr.p_flags |= PF_W;
2243 if (vma->vm_flags & VM_EXEC)
2244 phdr.p_flags |= PF_X;
2245 phdr.p_align = ELF_EXEC_PAGESIZE;
2247 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2251 if (!elf_core_write_extra_phdrs(cprm, offset))
2254 /* write out the notes section */
2255 if (!write_note_info(&info, cprm))
2258 if (elf_coredump_extra_notes_write(cprm))
2262 if (!dump_skip(cprm, dataoff - cprm->written))
2265 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2266 vma = next_vma(vma, gate_vma)) {
2270 end = vma->vm_start + vma_filesz[i++];
2272 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2276 page = get_dump_page(addr);
2278 void *kaddr = kmap(page);
2279 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2281 page_cache_release(page);
2283 stop = !dump_skip(cprm, PAGE_SIZE);
2289 if (!elf_core_write_extra_data(cprm))
2292 if (e_phnum == PN_XNUM) {
2293 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2301 free_note_info(&info);
2310 #endif /* CONFIG_ELF_CORE */
2312 static int __init init_elf_binfmt(void)
2314 register_binfmt(&elf_format);
2318 static void __exit exit_elf_binfmt(void)
2320 /* Remove the COFF and ELF loaders. */
2321 unregister_binfmt(&elf_format);
2324 core_initcall(init_elf_binfmt);
2325 module_exit(exit_elf_binfmt);
2326 MODULE_LICENSE("GPL");