2 * linux/fs/binfmt_elf.c
4 * These are the functions used to load ELF format executables as used
5 * on SVr4 machines. Information on the format may be found in the book
6 * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
9 * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
12 #include <linux/module.h>
13 #include <linux/kernel.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/vmalloc.h>
31 #include <linux/security.h>
32 #include <linux/random.h>
33 #include <linux/elf.h>
34 #include <linux/elf-randomize.h>
35 #include <linux/utsname.h>
36 #include <linux/coredump.h>
37 #include <linux/sched.h>
38 #include <linux/sched/coredump.h>
39 #include <linux/sched/task_stack.h>
40 #include <linux/cred.h>
41 #include <linux/dax.h>
42 #include <linux/uaccess.h>
43 #include <asm/param.h>
47 #define user_long_t long
49 #ifndef user_siginfo_t
50 #define user_siginfo_t siginfo_t
53 static int load_elf_binary(struct linux_binprm *bprm);
54 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
55 int, int, unsigned long);
58 static int load_elf_library(struct file *);
60 #define load_elf_library NULL
64 * If we don't support core dumping, then supply a NULL so we
67 #ifdef CONFIG_ELF_CORE
68 static int elf_core_dump(struct coredump_params *cprm);
70 #define elf_core_dump NULL
73 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
74 #define ELF_MIN_ALIGN ELF_EXEC_PAGESIZE
76 #define ELF_MIN_ALIGN PAGE_SIZE
79 #ifndef ELF_CORE_EFLAGS
80 #define ELF_CORE_EFLAGS 0
83 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
84 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
85 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
87 static struct linux_binfmt elf_format = {
88 .module = THIS_MODULE,
89 .load_binary = load_elf_binary,
90 .load_shlib = load_elf_library,
91 .core_dump = elf_core_dump,
92 .min_coredump = ELF_EXEC_PAGESIZE,
95 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
97 static int set_brk(unsigned long start, unsigned long end, int prot)
99 start = ELF_PAGEALIGN(start);
100 end = ELF_PAGEALIGN(end);
103 * Map the last of the bss segment.
104 * If the header is requesting these pages to be
105 * executable, honour that (ppc32 needs this).
107 int error = vm_brk_flags(start, end - start,
108 prot & PROT_EXEC ? VM_EXEC : 0);
112 current->mm->start_brk = current->mm->brk = end;
116 /* We need to explicitly zero any fractional pages
117 after the data section (i.e. bss). This would
118 contain the junk from the file that should not
121 static int padzero(unsigned long elf_bss)
125 nbyte = ELF_PAGEOFFSET(elf_bss);
127 nbyte = ELF_MIN_ALIGN - nbyte;
128 if (clear_user((void __user *) elf_bss, nbyte))
134 /* Let's use some macros to make this stack manipulation a little clearer */
135 #ifdef CONFIG_STACK_GROWSUP
136 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
137 #define STACK_ROUND(sp, items) \
138 ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
139 #define STACK_ALLOC(sp, len) ({ \
140 elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
143 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
144 #define STACK_ROUND(sp, items) \
145 (((unsigned long) (sp - items)) &~ 15UL)
146 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
149 #ifndef ELF_BASE_PLATFORM
151 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
152 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
153 * will be copied to the user stack in the same manner as AT_PLATFORM.
155 #define ELF_BASE_PLATFORM NULL
159 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
160 unsigned long load_addr, unsigned long interp_load_addr)
162 unsigned long p = bprm->p;
163 int argc = bprm->argc;
164 int envc = bprm->envc;
165 elf_addr_t __user *argv;
166 elf_addr_t __user *envp;
167 elf_addr_t __user *sp;
168 elf_addr_t __user *u_platform;
169 elf_addr_t __user *u_base_platform;
170 elf_addr_t __user *u_rand_bytes;
171 const char *k_platform = ELF_PLATFORM;
172 const char *k_base_platform = ELF_BASE_PLATFORM;
173 unsigned char k_rand_bytes[16];
175 elf_addr_t *elf_info;
177 const struct cred *cred = current_cred();
178 struct vm_area_struct *vma;
181 * In some cases (e.g. Hyper-Threading), we want to avoid L1
182 * evictions by the processes running on the same package. One
183 * thing we can do is to shuffle the initial stack for them.
186 p = arch_align_stack(p);
189 * If this architecture has a platform capability string, copy it
190 * to userspace. In some cases (Sparc), this info is impossible
191 * for userspace to get any other way, in others (i386) it is
196 size_t len = strlen(k_platform) + 1;
198 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
199 if (__copy_to_user(u_platform, k_platform, len))
204 * If this architecture has a "base" platform capability
205 * string, copy it to userspace.
207 u_base_platform = NULL;
208 if (k_base_platform) {
209 size_t len = strlen(k_base_platform) + 1;
211 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
212 if (__copy_to_user(u_base_platform, k_base_platform, len))
217 * Generate 16 random bytes for userspace PRNG seeding.
219 get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
220 u_rand_bytes = (elf_addr_t __user *)
221 STACK_ALLOC(p, sizeof(k_rand_bytes));
222 if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
225 /* Create the ELF interpreter info */
226 elf_info = (elf_addr_t *)current->mm->saved_auxv;
227 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
228 #define NEW_AUX_ENT(id, val) \
230 elf_info[ei_index++] = id; \
231 elf_info[ei_index++] = val; \
236 * ARCH_DLINFO must come first so PPC can do its special alignment of
238 * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
239 * ARCH_DLINFO changes
243 NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
244 NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
245 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
246 NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
247 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
248 NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
249 NEW_AUX_ENT(AT_BASE, interp_load_addr);
250 NEW_AUX_ENT(AT_FLAGS, 0);
251 NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
252 NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
253 NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
254 NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
255 NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
256 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
257 NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
259 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2);
261 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
263 NEW_AUX_ENT(AT_PLATFORM,
264 (elf_addr_t)(unsigned long)u_platform);
266 if (k_base_platform) {
267 NEW_AUX_ENT(AT_BASE_PLATFORM,
268 (elf_addr_t)(unsigned long)u_base_platform);
270 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
271 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
274 /* AT_NULL is zero; clear the rest too */
275 memset(&elf_info[ei_index], 0,
276 sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
278 /* And advance past the AT_NULL entry. */
281 sp = STACK_ADD(p, ei_index);
283 items = (argc + 1) + (envc + 1) + 1;
284 bprm->p = STACK_ROUND(sp, items);
286 /* Point sp at the lowest address on the stack */
287 #ifdef CONFIG_STACK_GROWSUP
288 sp = (elf_addr_t __user *)bprm->p - items - ei_index;
289 bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
291 sp = (elf_addr_t __user *)bprm->p;
296 * Grow the stack manually; some architectures have a limit on how
297 * far ahead a user-space access may be in order to grow the stack.
299 vma = find_extend_vma(current->mm, bprm->p);
303 /* Now, let's put argc (and argv, envp if appropriate) on the stack */
304 if (__put_user(argc, sp++))
307 envp = argv + argc + 1;
309 /* Populate argv and envp */
310 p = current->mm->arg_end = current->mm->arg_start;
313 if (__put_user((elf_addr_t)p, argv++))
315 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
316 if (!len || len > MAX_ARG_STRLEN)
320 if (__put_user(0, argv))
322 current->mm->arg_end = current->mm->env_start = p;
325 if (__put_user((elf_addr_t)p, envp++))
327 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
328 if (!len || len > MAX_ARG_STRLEN)
332 if (__put_user(0, envp))
334 current->mm->env_end = p;
336 /* Put the elf_info on the stack in the right place. */
337 sp = (elf_addr_t __user *)envp + 1;
338 if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
345 static unsigned long elf_map(struct file *filep, unsigned long addr,
346 struct elf_phdr *eppnt, int prot, int type,
347 unsigned long total_size)
349 unsigned long map_addr;
350 unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
351 unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
352 addr = ELF_PAGESTART(addr);
353 size = ELF_PAGEALIGN(size);
355 /* mmap() will return -EINVAL if given a zero size, but a
356 * segment with zero filesize is perfectly valid */
361 * total_size is the size of the ELF (interpreter) image.
362 * The _first_ mmap needs to know the full size, otherwise
363 * randomization might put this image into an overlapping
364 * position with the ELF binary image. (since size < total_size)
365 * So we first map the 'big' image - and unmap the remainder at
366 * the end. (which unmap is needed for ELF images with holes.)
369 total_size = ELF_PAGEALIGN(total_size);
370 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
371 if (!BAD_ADDR(map_addr))
372 vm_munmap(map_addr+size, total_size-size);
374 map_addr = vm_mmap(filep, addr, size, prot, type, off);
379 #endif /* !elf_map */
381 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
383 int i, first_idx = -1, last_idx = -1;
385 for (i = 0; i < nr; i++) {
386 if (cmds[i].p_type == PT_LOAD) {
395 return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
396 ELF_PAGESTART(cmds[first_idx].p_vaddr);
400 * load_elf_phdrs() - load ELF program headers
401 * @elf_ex: ELF header of the binary whose program headers should be loaded
402 * @elf_file: the opened ELF binary file
404 * Loads ELF program headers from the binary file elf_file, which has the ELF
405 * header pointed to by elf_ex, into a newly allocated array. The caller is
406 * responsible for freeing the allocated data. Returns an ERR_PTR upon failure.
408 static struct elf_phdr *load_elf_phdrs(struct elfhdr *elf_ex,
409 struct file *elf_file)
411 struct elf_phdr *elf_phdata = NULL;
412 int retval, size, err = -1;
415 * If the size of this structure has changed, then punt, since
416 * we will be doing the wrong thing.
418 if (elf_ex->e_phentsize != sizeof(struct elf_phdr))
421 /* Sanity check the number of program headers... */
422 if (elf_ex->e_phnum < 1 ||
423 elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
426 /* ...and their total size. */
427 size = sizeof(struct elf_phdr) * elf_ex->e_phnum;
428 if (size > ELF_MIN_ALIGN)
431 elf_phdata = kmalloc(size, GFP_KERNEL);
435 /* Read in the program headers */
436 retval = kernel_read(elf_file, elf_ex->e_phoff,
437 (char *)elf_phdata, size);
438 if (retval != size) {
439 err = (retval < 0) ? retval : -EIO;
453 #ifndef CONFIG_ARCH_BINFMT_ELF_STATE
456 * struct arch_elf_state - arch-specific ELF loading state
458 * This structure is used to preserve architecture specific data during
459 * the loading of an ELF file, throughout the checking of architecture
460 * specific ELF headers & through to the point where the ELF load is
461 * known to be proceeding (ie. SET_PERSONALITY).
463 * This implementation is a dummy for architectures which require no
466 struct arch_elf_state {
469 #define INIT_ARCH_ELF_STATE {}
472 * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
473 * @ehdr: The main ELF header
474 * @phdr: The program header to check
475 * @elf: The open ELF file
476 * @is_interp: True if the phdr is from the interpreter of the ELF being
477 * loaded, else false.
478 * @state: Architecture-specific state preserved throughout the process
479 * of loading the ELF.
481 * Inspects the program header phdr to validate its correctness and/or
482 * suitability for the system. Called once per ELF program header in the
483 * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
486 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
487 * with that return code.
489 static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
490 struct elf_phdr *phdr,
491 struct file *elf, bool is_interp,
492 struct arch_elf_state *state)
494 /* Dummy implementation, always proceed */
499 * arch_check_elf() - check an ELF executable
500 * @ehdr: The main ELF header
501 * @has_interp: True if the ELF has an interpreter, else false.
502 * @interp_ehdr: The interpreter's ELF header
503 * @state: Architecture-specific state preserved throughout the process
504 * of loading the ELF.
506 * Provides a final opportunity for architecture code to reject the loading
507 * of the ELF & cause an exec syscall to return an error. This is called after
508 * all program headers to be checked by arch_elf_pt_proc have been.
510 * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
511 * with that return code.
513 static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
514 struct elfhdr *interp_ehdr,
515 struct arch_elf_state *state)
517 /* Dummy implementation, always proceed */
521 #endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
523 /* This is much more generalized than the library routine read function,
524 so we keep this separate. Technically the library read function
525 is only provided so that we can read a.out libraries that have
528 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
529 struct file *interpreter, unsigned long *interp_map_addr,
530 unsigned long no_base, struct elf_phdr *interp_elf_phdata)
532 struct elf_phdr *eppnt;
533 unsigned long load_addr = 0;
534 int load_addr_set = 0;
535 unsigned long last_bss = 0, elf_bss = 0;
537 unsigned long error = ~0UL;
538 unsigned long total_size;
541 /* First of all, some simple consistency checks */
542 if (interp_elf_ex->e_type != ET_EXEC &&
543 interp_elf_ex->e_type != ET_DYN)
545 if (!elf_check_arch(interp_elf_ex))
547 if (!interpreter->f_op->mmap)
550 total_size = total_mapping_size(interp_elf_phdata,
551 interp_elf_ex->e_phnum);
557 eppnt = interp_elf_phdata;
558 for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
559 if (eppnt->p_type == PT_LOAD) {
560 int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
562 unsigned long vaddr = 0;
563 unsigned long k, map_addr;
565 if (eppnt->p_flags & PF_R)
566 elf_prot = PROT_READ;
567 if (eppnt->p_flags & PF_W)
568 elf_prot |= PROT_WRITE;
569 if (eppnt->p_flags & PF_X)
570 elf_prot |= PROT_EXEC;
571 vaddr = eppnt->p_vaddr;
572 if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
573 elf_type |= MAP_FIXED;
574 else if (no_base && interp_elf_ex->e_type == ET_DYN)
577 map_addr = elf_map(interpreter, load_addr + vaddr,
578 eppnt, elf_prot, elf_type, total_size);
580 if (!*interp_map_addr)
581 *interp_map_addr = map_addr;
583 if (BAD_ADDR(map_addr))
586 if (!load_addr_set &&
587 interp_elf_ex->e_type == ET_DYN) {
588 load_addr = map_addr - ELF_PAGESTART(vaddr);
593 * Check to see if the section's size will overflow the
594 * allowed task size. Note that p_filesz must always be
595 * <= p_memsize so it's only necessary to check p_memsz.
597 k = load_addr + eppnt->p_vaddr;
599 eppnt->p_filesz > eppnt->p_memsz ||
600 eppnt->p_memsz > TASK_SIZE ||
601 TASK_SIZE - eppnt->p_memsz < k) {
607 * Find the end of the file mapping for this phdr, and
608 * keep track of the largest address we see for this.
610 k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
615 * Do the same thing for the memory mapping - between
616 * elf_bss and last_bss is the bss section.
618 k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
627 * Now fill out the bss section: first pad the last page from
628 * the file up to the page boundary, and zero it from elf_bss
629 * up to the end of the page.
631 if (padzero(elf_bss)) {
636 * Next, align both the file and mem bss up to the page size,
637 * since this is where elf_bss was just zeroed up to, and where
638 * last_bss will end after the vm_brk_flags() below.
640 elf_bss = ELF_PAGEALIGN(elf_bss);
641 last_bss = ELF_PAGEALIGN(last_bss);
642 /* Finally, if there is still more bss to allocate, do it. */
643 if (last_bss > elf_bss) {
644 error = vm_brk_flags(elf_bss, last_bss - elf_bss,
645 bss_prot & PROT_EXEC ? VM_EXEC : 0);
656 * These are the functions used to load ELF style executables and shared
657 * libraries. There is no binary dependent code anywhere else.
660 #ifndef STACK_RND_MASK
661 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
664 static unsigned long randomize_stack_top(unsigned long stack_top)
666 unsigned long random_variable = 0;
668 if ((current->flags & PF_RANDOMIZE) &&
669 !(current->personality & ADDR_NO_RANDOMIZE)) {
670 random_variable = get_random_long();
671 random_variable &= STACK_RND_MASK;
672 random_variable <<= PAGE_SHIFT;
674 #ifdef CONFIG_STACK_GROWSUP
675 return PAGE_ALIGN(stack_top) + random_variable;
677 return PAGE_ALIGN(stack_top) - random_variable;
681 static int load_elf_binary(struct linux_binprm *bprm)
683 struct file *interpreter = NULL; /* to shut gcc up */
684 unsigned long load_addr = 0, load_bias = 0;
685 int load_addr_set = 0;
686 char * elf_interpreter = NULL;
688 struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata = NULL;
689 unsigned long elf_bss, elf_brk;
692 unsigned long elf_entry;
693 unsigned long interp_load_addr = 0;
694 unsigned long start_code, end_code, start_data, end_data;
695 unsigned long reloc_func_desc __maybe_unused = 0;
696 int executable_stack = EXSTACK_DEFAULT;
697 struct pt_regs *regs = current_pt_regs();
699 struct elfhdr elf_ex;
700 struct elfhdr interp_elf_ex;
702 struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
704 loc = kmalloc(sizeof(*loc), GFP_KERNEL);
710 /* Get the exec-header */
711 loc->elf_ex = *((struct elfhdr *)bprm->buf);
714 /* First of all, some simple consistency checks */
715 if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
718 if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
720 if (!elf_check_arch(&loc->elf_ex))
722 if (!bprm->file->f_op->mmap)
725 elf_phdata = load_elf_phdrs(&loc->elf_ex, bprm->file);
729 elf_ppnt = elf_phdata;
738 for (i = 0; i < loc->elf_ex.e_phnum; i++) {
739 if (elf_ppnt->p_type == PT_INTERP) {
740 /* This is the program interpreter used for
741 * shared libraries - for now assume that this
742 * is an a.out format binary
745 if (elf_ppnt->p_filesz > PATH_MAX ||
746 elf_ppnt->p_filesz < 2)
750 elf_interpreter = kmalloc(elf_ppnt->p_filesz,
752 if (!elf_interpreter)
755 retval = kernel_read(bprm->file, elf_ppnt->p_offset,
758 if (retval != elf_ppnt->p_filesz) {
761 goto out_free_interp;
763 /* make sure path is NULL terminated */
765 if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
766 goto out_free_interp;
768 interpreter = open_exec(elf_interpreter);
769 retval = PTR_ERR(interpreter);
770 if (IS_ERR(interpreter))
771 goto out_free_interp;
774 * If the binary is not readable then enforce
775 * mm->dumpable = 0 regardless of the interpreter's
778 would_dump(bprm, interpreter);
780 /* Get the exec headers */
781 retval = kernel_read(interpreter, 0,
782 (void *)&loc->interp_elf_ex,
783 sizeof(loc->interp_elf_ex));
784 if (retval != sizeof(loc->interp_elf_ex)) {
787 goto out_free_dentry;
795 elf_ppnt = elf_phdata;
796 for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
797 switch (elf_ppnt->p_type) {
799 if (elf_ppnt->p_flags & PF_X)
800 executable_stack = EXSTACK_ENABLE_X;
802 executable_stack = EXSTACK_DISABLE_X;
805 case PT_LOPROC ... PT_HIPROC:
806 retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
810 goto out_free_dentry;
814 /* Some simple consistency checks for the interpreter */
815 if (elf_interpreter) {
817 /* Not an ELF interpreter */
818 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
819 goto out_free_dentry;
820 /* Verify the interpreter has a valid arch */
821 if (!elf_check_arch(&loc->interp_elf_ex))
822 goto out_free_dentry;
824 /* Load the interpreter program headers */
825 interp_elf_phdata = load_elf_phdrs(&loc->interp_elf_ex,
827 if (!interp_elf_phdata)
828 goto out_free_dentry;
830 /* Pass PT_LOPROC..PT_HIPROC headers to arch code */
831 elf_ppnt = interp_elf_phdata;
832 for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
833 switch (elf_ppnt->p_type) {
834 case PT_LOPROC ... PT_HIPROC:
835 retval = arch_elf_pt_proc(&loc->interp_elf_ex,
836 elf_ppnt, interpreter,
839 goto out_free_dentry;
845 * Allow arch code to reject the ELF at this point, whilst it's
846 * still possible to return an error to the code that invoked
849 retval = arch_check_elf(&loc->elf_ex,
850 !!interpreter, &loc->interp_elf_ex,
853 goto out_free_dentry;
855 /* Flush all traces of the currently running executable */
856 retval = flush_old_exec(bprm);
858 goto out_free_dentry;
860 /* Do this immediately, since STACK_TOP as used in setup_arg_pages
861 may depend on the personality. */
862 SET_PERSONALITY2(loc->elf_ex, &arch_state);
863 if (elf_read_implies_exec(loc->elf_ex, executable_stack))
864 current->personality |= READ_IMPLIES_EXEC;
866 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
867 current->flags |= PF_RANDOMIZE;
869 setup_new_exec(bprm);
870 install_exec_creds(bprm);
872 /* Do this so that we can load the interpreter, if need be. We will
873 change some of these later */
874 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
877 goto out_free_dentry;
879 current->mm->start_stack = bprm->p;
881 /* Now we do a little grungy work by mmapping the ELF image into
882 the correct location in memory. */
883 for(i = 0, elf_ppnt = elf_phdata;
884 i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
885 int elf_prot = 0, elf_flags;
886 unsigned long k, vaddr;
887 unsigned long total_size = 0;
889 if (elf_ppnt->p_type != PT_LOAD)
892 if (unlikely (elf_brk > elf_bss)) {
895 /* There was a PT_LOAD segment with p_memsz > p_filesz
896 before this one. Map anonymous pages, if needed,
897 and clear the area. */
898 retval = set_brk(elf_bss + load_bias,
902 goto out_free_dentry;
903 nbyte = ELF_PAGEOFFSET(elf_bss);
905 nbyte = ELF_MIN_ALIGN - nbyte;
906 if (nbyte > elf_brk - elf_bss)
907 nbyte = elf_brk - elf_bss;
908 if (clear_user((void __user *)elf_bss +
911 * This bss-zeroing can fail if the ELF
912 * file specifies odd protections. So
913 * we don't check the return value
919 if (elf_ppnt->p_flags & PF_R)
920 elf_prot |= PROT_READ;
921 if (elf_ppnt->p_flags & PF_W)
922 elf_prot |= PROT_WRITE;
923 if (elf_ppnt->p_flags & PF_X)
924 elf_prot |= PROT_EXEC;
926 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
928 vaddr = elf_ppnt->p_vaddr;
929 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
930 elf_flags |= MAP_FIXED;
931 } else if (loc->elf_ex.e_type == ET_DYN) {
932 /* Try and get dynamic programs out of the way of the
933 * default mmap base, as well as whatever program they
934 * might try to exec. This is because the brk will
935 * follow the loader, and is not movable. */
936 load_bias = ELF_ET_DYN_BASE - vaddr;
937 if (current->flags & PF_RANDOMIZE)
938 load_bias += arch_mmap_rnd();
939 load_bias = ELF_PAGESTART(load_bias);
940 total_size = total_mapping_size(elf_phdata,
941 loc->elf_ex.e_phnum);
944 goto out_free_dentry;
948 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
949 elf_prot, elf_flags, total_size);
950 if (BAD_ADDR(error)) {
951 retval = IS_ERR((void *)error) ?
952 PTR_ERR((void*)error) : -EINVAL;
953 goto out_free_dentry;
956 if (!load_addr_set) {
958 load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
959 if (loc->elf_ex.e_type == ET_DYN) {
961 ELF_PAGESTART(load_bias + vaddr);
962 load_addr += load_bias;
963 reloc_func_desc = load_bias;
966 k = elf_ppnt->p_vaddr;
973 * Check to see if the section's size will overflow the
974 * allowed task size. Note that p_filesz must always be
975 * <= p_memsz so it is only necessary to check p_memsz.
977 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
978 elf_ppnt->p_memsz > TASK_SIZE ||
979 TASK_SIZE - elf_ppnt->p_memsz < k) {
980 /* set_brk can never work. Avoid overflows. */
982 goto out_free_dentry;
985 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
989 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
993 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
1000 loc->elf_ex.e_entry += load_bias;
1001 elf_bss += load_bias;
1002 elf_brk += load_bias;
1003 start_code += load_bias;
1004 end_code += load_bias;
1005 start_data += load_bias;
1006 end_data += load_bias;
1008 /* Calling set_brk effectively mmaps the pages that we need
1009 * for the bss and break sections. We must do this before
1010 * mapping in the interpreter, to make sure it doesn't wind
1011 * up getting placed where the bss needs to go.
1013 retval = set_brk(elf_bss, elf_brk, bss_prot);
1015 goto out_free_dentry;
1016 if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
1017 retval = -EFAULT; /* Nobody gets to see this, but.. */
1018 goto out_free_dentry;
1021 if (elf_interpreter) {
1022 unsigned long interp_map_addr = 0;
1024 elf_entry = load_elf_interp(&loc->interp_elf_ex,
1027 load_bias, interp_elf_phdata);
1028 if (!IS_ERR((void *)elf_entry)) {
1030 * load_elf_interp() returns relocation
1033 interp_load_addr = elf_entry;
1034 elf_entry += loc->interp_elf_ex.e_entry;
1036 if (BAD_ADDR(elf_entry)) {
1037 retval = IS_ERR((void *)elf_entry) ?
1038 (int)elf_entry : -EINVAL;
1039 goto out_free_dentry;
1041 reloc_func_desc = interp_load_addr;
1043 allow_write_access(interpreter);
1045 kfree(elf_interpreter);
1047 elf_entry = loc->elf_ex.e_entry;
1048 if (BAD_ADDR(elf_entry)) {
1050 goto out_free_dentry;
1054 kfree(interp_elf_phdata);
1057 set_binfmt(&elf_format);
1059 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
1060 retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
1063 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
1065 retval = create_elf_tables(bprm, &loc->elf_ex,
1066 load_addr, interp_load_addr);
1069 /* N.B. passed_fileno might not be initialized? */
1070 current->mm->end_code = end_code;
1071 current->mm->start_code = start_code;
1072 current->mm->start_data = start_data;
1073 current->mm->end_data = end_data;
1074 current->mm->start_stack = bprm->p;
1076 if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
1077 current->mm->brk = current->mm->start_brk =
1078 arch_randomize_brk(current->mm);
1079 #ifdef compat_brk_randomized
1080 current->brk_randomized = 1;
1084 if (current->personality & MMAP_PAGE_ZERO) {
1085 /* Why this, you ask??? Well SVr4 maps page 0 as read-only,
1086 and some applications "depend" upon this behavior.
1087 Since we do not have the power to recompile these, we
1088 emulate the SVr4 behavior. Sigh. */
1089 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
1090 MAP_FIXED | MAP_PRIVATE, 0);
1093 #ifdef ELF_PLAT_INIT
1095 * The ABI may specify that certain registers be set up in special
1096 * ways (on i386 %edx is the address of a DT_FINI function, for
1097 * example. In addition, it may also specify (eg, PowerPC64 ELF)
1098 * that the e_entry field is the address of the function descriptor
1099 * for the startup routine, rather than the address of the startup
1100 * routine itself. This macro performs whatever initialization to
1101 * the regs structure is required as well as any relocations to the
1102 * function descriptor entries when executing dynamically links apps.
1104 ELF_PLAT_INIT(regs, reloc_func_desc);
1107 start_thread(regs, elf_entry, bprm->p);
1116 kfree(interp_elf_phdata);
1117 allow_write_access(interpreter);
1121 kfree(elf_interpreter);
1127 #ifdef CONFIG_USELIB
1128 /* This is really simpleminded and specialized - we are loading an
1129 a.out library that is given an ELF header. */
1130 static int load_elf_library(struct file *file)
1132 struct elf_phdr *elf_phdata;
1133 struct elf_phdr *eppnt;
1134 unsigned long elf_bss, bss, len;
1135 int retval, error, i, j;
1136 struct elfhdr elf_ex;
1139 retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1140 if (retval != sizeof(elf_ex))
1143 if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1146 /* First of all, some simple consistency checks */
1147 if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1148 !elf_check_arch(&elf_ex) || !file->f_op->mmap)
1151 /* Now read in all of the header information */
1153 j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1154 /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1157 elf_phdata = kmalloc(j, GFP_KERNEL);
1163 retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1167 for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1168 if ((eppnt + i)->p_type == PT_LOAD)
1173 while (eppnt->p_type != PT_LOAD)
1176 /* Now use mmap to map the library into memory. */
1177 error = vm_mmap(file,
1178 ELF_PAGESTART(eppnt->p_vaddr),
1180 ELF_PAGEOFFSET(eppnt->p_vaddr)),
1181 PROT_READ | PROT_WRITE | PROT_EXEC,
1182 MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1184 ELF_PAGEOFFSET(eppnt->p_vaddr)));
1185 if (error != ELF_PAGESTART(eppnt->p_vaddr))
1188 elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1189 if (padzero(elf_bss)) {
1194 len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1196 bss = eppnt->p_memsz + eppnt->p_vaddr;
1198 error = vm_brk(len, bss - len);
1209 #endif /* #ifdef CONFIG_USELIB */
1211 #ifdef CONFIG_ELF_CORE
1215 * Modelled on fs/exec.c:aout_core_dump()
1216 * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1220 * The purpose of always_dump_vma() is to make sure that special kernel mappings
1221 * that are useful for post-mortem analysis are included in every core dump.
1222 * In that way we ensure that the core dump is fully interpretable later
1223 * without matching up the same kernel and hardware config to see what PC values
1224 * meant. These special mappings include - vDSO, vsyscall, and other
1225 * architecture specific mappings
1227 static bool always_dump_vma(struct vm_area_struct *vma)
1229 /* Any vsyscall mappings? */
1230 if (vma == get_gate_vma(vma->vm_mm))
1234 * Assume that all vmas with a .name op should always be dumped.
1235 * If this changes, a new vm_ops field can easily be added.
1237 if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
1241 * arch_vma_name() returns non-NULL for special architecture mappings,
1242 * such as vDSO sections.
1244 if (arch_vma_name(vma))
1251 * Decide what to dump of a segment, part, all or none.
1253 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1254 unsigned long mm_flags)
1256 #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
1258 /* always dump the vdso and vsyscall sections */
1259 if (always_dump_vma(vma))
1262 if (vma->vm_flags & VM_DONTDUMP)
1265 /* support for DAX */
1266 if (vma_is_dax(vma)) {
1267 if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
1269 if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
1274 /* Hugetlb memory check */
1275 if (vma->vm_flags & VM_HUGETLB) {
1276 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1278 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1283 /* Do not dump I/O mapped devices or special mappings */
1284 if (vma->vm_flags & VM_IO)
1287 /* By default, dump shared memory if mapped from an anonymous file. */
1288 if (vma->vm_flags & VM_SHARED) {
1289 if (file_inode(vma->vm_file)->i_nlink == 0 ?
1290 FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1295 /* Dump segments that have been written to. */
1296 if (vma->anon_vma && FILTER(ANON_PRIVATE))
1298 if (vma->vm_file == NULL)
1301 if (FILTER(MAPPED_PRIVATE))
1305 * If this looks like the beginning of a DSO or executable mapping,
1306 * check for an ELF header. If we find one, dump the first page to
1307 * aid in determining what was mapped here.
1309 if (FILTER(ELF_HEADERS) &&
1310 vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1311 u32 __user *header = (u32 __user *) vma->vm_start;
1313 mm_segment_t fs = get_fs();
1315 * Doing it this way gets the constant folded by GCC.
1319 char elfmag[SELFMAG];
1321 BUILD_BUG_ON(SELFMAG != sizeof word);
1322 magic.elfmag[EI_MAG0] = ELFMAG0;
1323 magic.elfmag[EI_MAG1] = ELFMAG1;
1324 magic.elfmag[EI_MAG2] = ELFMAG2;
1325 magic.elfmag[EI_MAG3] = ELFMAG3;
1327 * Switch to the user "segment" for get_user(),
1328 * then put back what elf_core_dump() had in place.
1331 if (unlikely(get_user(word, header)))
1334 if (word == magic.cmp)
1343 return vma->vm_end - vma->vm_start;
1346 /* An ELF note in memory */
1351 unsigned int datasz;
1355 static int notesize(struct memelfnote *en)
1359 sz = sizeof(struct elf_note);
1360 sz += roundup(strlen(en->name) + 1, 4);
1361 sz += roundup(en->datasz, 4);
1366 static int writenote(struct memelfnote *men, struct coredump_params *cprm)
1369 en.n_namesz = strlen(men->name) + 1;
1370 en.n_descsz = men->datasz;
1371 en.n_type = men->type;
1373 return dump_emit(cprm, &en, sizeof(en)) &&
1374 dump_emit(cprm, men->name, en.n_namesz) && dump_align(cprm, 4) &&
1375 dump_emit(cprm, men->data, men->datasz) && dump_align(cprm, 4);
1378 static void fill_elf_header(struct elfhdr *elf, int segs,
1379 u16 machine, u32 flags)
1381 memset(elf, 0, sizeof(*elf));
1383 memcpy(elf->e_ident, ELFMAG, SELFMAG);
1384 elf->e_ident[EI_CLASS] = ELF_CLASS;
1385 elf->e_ident[EI_DATA] = ELF_DATA;
1386 elf->e_ident[EI_VERSION] = EV_CURRENT;
1387 elf->e_ident[EI_OSABI] = ELF_OSABI;
1389 elf->e_type = ET_CORE;
1390 elf->e_machine = machine;
1391 elf->e_version = EV_CURRENT;
1392 elf->e_phoff = sizeof(struct elfhdr);
1393 elf->e_flags = flags;
1394 elf->e_ehsize = sizeof(struct elfhdr);
1395 elf->e_phentsize = sizeof(struct elf_phdr);
1396 elf->e_phnum = segs;
1401 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1403 phdr->p_type = PT_NOTE;
1404 phdr->p_offset = offset;
1407 phdr->p_filesz = sz;
1414 static void fill_note(struct memelfnote *note, const char *name, int type,
1415 unsigned int sz, void *data)
1425 * fill up all the fields in prstatus from the given task struct, except
1426 * registers which need to be filled up separately.
1428 static void fill_prstatus(struct elf_prstatus *prstatus,
1429 struct task_struct *p, long signr)
1431 prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1432 prstatus->pr_sigpend = p->pending.signal.sig[0];
1433 prstatus->pr_sighold = p->blocked.sig[0];
1435 prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1437 prstatus->pr_pid = task_pid_vnr(p);
1438 prstatus->pr_pgrp = task_pgrp_vnr(p);
1439 prstatus->pr_sid = task_session_vnr(p);
1440 if (thread_group_leader(p)) {
1441 struct task_cputime cputime;
1444 * This is the record for the group leader. It shows the
1445 * group-wide total, not its individual thread total.
1447 thread_group_cputime(p, &cputime);
1448 prstatus->pr_utime = ns_to_timeval(cputime.utime);
1449 prstatus->pr_stime = ns_to_timeval(cputime.stime);
1453 task_cputime(p, &utime, &stime);
1454 prstatus->pr_utime = ns_to_timeval(utime);
1455 prstatus->pr_stime = ns_to_timeval(stime);
1458 prstatus->pr_cutime = ns_to_timeval(p->signal->cutime);
1459 prstatus->pr_cstime = ns_to_timeval(p->signal->cstime);
1462 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1463 struct mm_struct *mm)
1465 const struct cred *cred;
1466 unsigned int i, len;
1468 /* first copy the parameters from user space */
1469 memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1471 len = mm->arg_end - mm->arg_start;
1472 if (len >= ELF_PRARGSZ)
1473 len = ELF_PRARGSZ-1;
1474 if (copy_from_user(&psinfo->pr_psargs,
1475 (const char __user *)mm->arg_start, len))
1477 for(i = 0; i < len; i++)
1478 if (psinfo->pr_psargs[i] == 0)
1479 psinfo->pr_psargs[i] = ' ';
1480 psinfo->pr_psargs[len] = 0;
1483 psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1485 psinfo->pr_pid = task_pid_vnr(p);
1486 psinfo->pr_pgrp = task_pgrp_vnr(p);
1487 psinfo->pr_sid = task_session_vnr(p);
1489 i = p->state ? ffz(~p->state) + 1 : 0;
1490 psinfo->pr_state = i;
1491 psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1492 psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1493 psinfo->pr_nice = task_nice(p);
1494 psinfo->pr_flag = p->flags;
1496 cred = __task_cred(p);
1497 SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1498 SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1500 strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1505 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1507 elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1511 while (auxv[i - 2] != AT_NULL);
1512 fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1515 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
1516 const siginfo_t *siginfo)
1518 mm_segment_t old_fs = get_fs();
1520 copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
1522 fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
1525 #define MAX_FILE_NOTE_SIZE (4*1024*1024)
1527 * Format of NT_FILE note:
1529 * long count -- how many files are mapped
1530 * long page_size -- units for file_ofs
1531 * array of [COUNT] elements of
1535 * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
1537 static int fill_files_note(struct memelfnote *note)
1539 struct vm_area_struct *vma;
1540 unsigned count, size, names_ofs, remaining, n;
1542 user_long_t *start_end_ofs;
1543 char *name_base, *name_curpos;
1545 /* *Estimated* file count and total data size needed */
1546 count = current->mm->map_count;
1549 names_ofs = (2 + 3 * count) * sizeof(data[0]);
1551 if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
1553 size = round_up(size, PAGE_SIZE);
1554 data = vmalloc(size);
1558 start_end_ofs = data + 2;
1559 name_base = name_curpos = ((char *)data) + names_ofs;
1560 remaining = size - names_ofs;
1562 for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
1564 const char *filename;
1566 file = vma->vm_file;
1569 filename = file_path(file, name_curpos, remaining);
1570 if (IS_ERR(filename)) {
1571 if (PTR_ERR(filename) == -ENAMETOOLONG) {
1573 size = size * 5 / 4;
1579 /* file_path() fills at the end, move name down */
1580 /* n = strlen(filename) + 1: */
1581 n = (name_curpos + remaining) - filename;
1582 remaining = filename - name_curpos;
1583 memmove(name_curpos, filename, n);
1586 *start_end_ofs++ = vma->vm_start;
1587 *start_end_ofs++ = vma->vm_end;
1588 *start_end_ofs++ = vma->vm_pgoff;
1592 /* Now we know exact count of files, can store it */
1594 data[1] = PAGE_SIZE;
1596 * Count usually is less than current->mm->map_count,
1597 * we need to move filenames down.
1599 n = current->mm->map_count - count;
1601 unsigned shift_bytes = n * 3 * sizeof(data[0]);
1602 memmove(name_base - shift_bytes, name_base,
1603 name_curpos - name_base);
1604 name_curpos -= shift_bytes;
1607 size = name_curpos - (char *)data;
1608 fill_note(note, "CORE", NT_FILE, size, data);
1612 #ifdef CORE_DUMP_USE_REGSET
1613 #include <linux/regset.h>
1615 struct elf_thread_core_info {
1616 struct elf_thread_core_info *next;
1617 struct task_struct *task;
1618 struct elf_prstatus prstatus;
1619 struct memelfnote notes[0];
1622 struct elf_note_info {
1623 struct elf_thread_core_info *thread;
1624 struct memelfnote psinfo;
1625 struct memelfnote signote;
1626 struct memelfnote auxv;
1627 struct memelfnote files;
1628 user_siginfo_t csigdata;
1634 * When a regset has a writeback hook, we call it on each thread before
1635 * dumping user memory. On register window machines, this makes sure the
1636 * user memory backing the register data is up to date before we read it.
1638 static void do_thread_regset_writeback(struct task_struct *task,
1639 const struct user_regset *regset)
1641 if (regset->writeback)
1642 regset->writeback(task, regset, 1);
1645 #ifndef PRSTATUS_SIZE
1646 #define PRSTATUS_SIZE(S, R) sizeof(S)
1649 #ifndef SET_PR_FPVALID
1650 #define SET_PR_FPVALID(S, V, R) ((S)->pr_fpvalid = (V))
1653 static int fill_thread_core_info(struct elf_thread_core_info *t,
1654 const struct user_regset_view *view,
1655 long signr, size_t *total)
1658 unsigned int regset_size = view->regsets[0].n * view->regsets[0].size;
1661 * NT_PRSTATUS is the one special case, because the regset data
1662 * goes into the pr_reg field inside the note contents, rather
1663 * than being the whole note contents. We fill the reset in here.
1664 * We assume that regset 0 is NT_PRSTATUS.
1666 fill_prstatus(&t->prstatus, t->task, signr);
1667 (void) view->regsets[0].get(t->task, &view->regsets[0], 0, regset_size,
1668 &t->prstatus.pr_reg, NULL);
1670 fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1671 PRSTATUS_SIZE(t->prstatus, regset_size), &t->prstatus);
1672 *total += notesize(&t->notes[0]);
1674 do_thread_regset_writeback(t->task, &view->regsets[0]);
1677 * Each other regset might generate a note too. For each regset
1678 * that has no core_note_type or is inactive, we leave t->notes[i]
1679 * all zero and we'll know to skip writing it later.
1681 for (i = 1; i < view->n; ++i) {
1682 const struct user_regset *regset = &view->regsets[i];
1683 do_thread_regset_writeback(t->task, regset);
1684 if (regset->core_note_type && regset->get &&
1685 (!regset->active || regset->active(t->task, regset))) {
1687 size_t size = regset->n * regset->size;
1688 void *data = kmalloc(size, GFP_KERNEL);
1689 if (unlikely(!data))
1691 ret = regset->get(t->task, regset,
1692 0, size, data, NULL);
1696 if (regset->core_note_type != NT_PRFPREG)
1697 fill_note(&t->notes[i], "LINUX",
1698 regset->core_note_type,
1701 SET_PR_FPVALID(&t->prstatus,
1703 fill_note(&t->notes[i], "CORE",
1704 NT_PRFPREG, size, data);
1706 *total += notesize(&t->notes[i]);
1714 static int fill_note_info(struct elfhdr *elf, int phdrs,
1715 struct elf_note_info *info,
1716 const siginfo_t *siginfo, struct pt_regs *regs)
1718 struct task_struct *dump_task = current;
1719 const struct user_regset_view *view = task_user_regset_view(dump_task);
1720 struct elf_thread_core_info *t;
1721 struct elf_prpsinfo *psinfo;
1722 struct core_thread *ct;
1726 info->thread = NULL;
1728 psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1729 if (psinfo == NULL) {
1730 info->psinfo.data = NULL; /* So we don't free this wrongly */
1734 fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1737 * Figure out how many notes we're going to need for each thread.
1739 info->thread_notes = 0;
1740 for (i = 0; i < view->n; ++i)
1741 if (view->regsets[i].core_note_type != 0)
1742 ++info->thread_notes;
1745 * Sanity check. We rely on regset 0 being in NT_PRSTATUS,
1746 * since it is our one special case.
1748 if (unlikely(info->thread_notes == 0) ||
1749 unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1755 * Initialize the ELF file header.
1757 fill_elf_header(elf, phdrs,
1758 view->e_machine, view->e_flags);
1761 * Allocate a structure for each thread.
1763 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1764 t = kzalloc(offsetof(struct elf_thread_core_info,
1765 notes[info->thread_notes]),
1771 if (ct->task == dump_task || !info->thread) {
1772 t->next = info->thread;
1776 * Make sure to keep the original task at
1777 * the head of the list.
1779 t->next = info->thread->next;
1780 info->thread->next = t;
1785 * Now fill in each thread's information.
1787 for (t = info->thread; t != NULL; t = t->next)
1788 if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
1792 * Fill in the two process-wide notes.
1794 fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1795 info->size += notesize(&info->psinfo);
1797 fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
1798 info->size += notesize(&info->signote);
1800 fill_auxv_note(&info->auxv, current->mm);
1801 info->size += notesize(&info->auxv);
1803 if (fill_files_note(&info->files) == 0)
1804 info->size += notesize(&info->files);
1809 static size_t get_note_info_size(struct elf_note_info *info)
1815 * Write all the notes for each thread. When writing the first thread, the
1816 * process-wide notes are interleaved after the first thread-specific note.
1818 static int write_note_info(struct elf_note_info *info,
1819 struct coredump_params *cprm)
1822 struct elf_thread_core_info *t = info->thread;
1827 if (!writenote(&t->notes[0], cprm))
1830 if (first && !writenote(&info->psinfo, cprm))
1832 if (first && !writenote(&info->signote, cprm))
1834 if (first && !writenote(&info->auxv, cprm))
1836 if (first && info->files.data &&
1837 !writenote(&info->files, cprm))
1840 for (i = 1; i < info->thread_notes; ++i)
1841 if (t->notes[i].data &&
1842 !writenote(&t->notes[i], cprm))
1852 static void free_note_info(struct elf_note_info *info)
1854 struct elf_thread_core_info *threads = info->thread;
1857 struct elf_thread_core_info *t = threads;
1859 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1860 for (i = 1; i < info->thread_notes; ++i)
1861 kfree(t->notes[i].data);
1864 kfree(info->psinfo.data);
1865 vfree(info->files.data);
1870 /* Here is the structure in which status of each thread is captured. */
1871 struct elf_thread_status
1873 struct list_head list;
1874 struct elf_prstatus prstatus; /* NT_PRSTATUS */
1875 elf_fpregset_t fpu; /* NT_PRFPREG */
1876 struct task_struct *thread;
1877 #ifdef ELF_CORE_COPY_XFPREGS
1878 elf_fpxregset_t xfpu; /* ELF_CORE_XFPREG_TYPE */
1880 struct memelfnote notes[3];
1885 * In order to add the specific thread information for the elf file format,
1886 * we need to keep a linked list of every threads pr_status and then create
1887 * a single section for them in the final core file.
1889 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1892 struct task_struct *p = t->thread;
1895 fill_prstatus(&t->prstatus, p, signr);
1896 elf_core_copy_task_regs(p, &t->prstatus.pr_reg);
1898 fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1901 sz += notesize(&t->notes[0]);
1903 if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1905 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1908 sz += notesize(&t->notes[1]);
1911 #ifdef ELF_CORE_COPY_XFPREGS
1912 if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1913 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1914 sizeof(t->xfpu), &t->xfpu);
1916 sz += notesize(&t->notes[2]);
1922 struct elf_note_info {
1923 struct memelfnote *notes;
1924 struct memelfnote *notes_files;
1925 struct elf_prstatus *prstatus; /* NT_PRSTATUS */
1926 struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
1927 struct list_head thread_list;
1928 elf_fpregset_t *fpu;
1929 #ifdef ELF_CORE_COPY_XFPREGS
1930 elf_fpxregset_t *xfpu;
1932 user_siginfo_t csigdata;
1933 int thread_status_size;
1937 static int elf_note_info_init(struct elf_note_info *info)
1939 memset(info, 0, sizeof(*info));
1940 INIT_LIST_HEAD(&info->thread_list);
1942 /* Allocate space for ELF notes */
1943 info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
1946 info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1949 info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1950 if (!info->prstatus)
1952 info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1955 #ifdef ELF_CORE_COPY_XFPREGS
1956 info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1963 static int fill_note_info(struct elfhdr *elf, int phdrs,
1964 struct elf_note_info *info,
1965 const siginfo_t *siginfo, struct pt_regs *regs)
1967 struct list_head *t;
1968 struct core_thread *ct;
1969 struct elf_thread_status *ets;
1971 if (!elf_note_info_init(info))
1974 for (ct = current->mm->core_state->dumper.next;
1975 ct; ct = ct->next) {
1976 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1980 ets->thread = ct->task;
1981 list_add(&ets->list, &info->thread_list);
1984 list_for_each(t, &info->thread_list) {
1987 ets = list_entry(t, struct elf_thread_status, list);
1988 sz = elf_dump_thread_status(siginfo->si_signo, ets);
1989 info->thread_status_size += sz;
1991 /* now collect the dump for the current */
1992 memset(info->prstatus, 0, sizeof(*info->prstatus));
1993 fill_prstatus(info->prstatus, current, siginfo->si_signo);
1994 elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1997 fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS);
2000 * Set up the notes in similar form to SVR4 core dumps made
2001 * with info from their /proc.
2004 fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
2005 sizeof(*info->prstatus), info->prstatus);
2006 fill_psinfo(info->psinfo, current->group_leader, current->mm);
2007 fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
2008 sizeof(*info->psinfo), info->psinfo);
2010 fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
2011 fill_auxv_note(info->notes + 3, current->mm);
2014 if (fill_files_note(info->notes + info->numnote) == 0) {
2015 info->notes_files = info->notes + info->numnote;
2019 /* Try to dump the FPU. */
2020 info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
2022 if (info->prstatus->pr_fpvalid)
2023 fill_note(info->notes + info->numnote++,
2024 "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
2025 #ifdef ELF_CORE_COPY_XFPREGS
2026 if (elf_core_copy_task_xfpregs(current, info->xfpu))
2027 fill_note(info->notes + info->numnote++,
2028 "LINUX", ELF_CORE_XFPREG_TYPE,
2029 sizeof(*info->xfpu), info->xfpu);
2035 static size_t get_note_info_size(struct elf_note_info *info)
2040 for (i = 0; i < info->numnote; i++)
2041 sz += notesize(info->notes + i);
2043 sz += info->thread_status_size;
2048 static int write_note_info(struct elf_note_info *info,
2049 struct coredump_params *cprm)
2052 struct list_head *t;
2054 for (i = 0; i < info->numnote; i++)
2055 if (!writenote(info->notes + i, cprm))
2058 /* write out the thread status notes section */
2059 list_for_each(t, &info->thread_list) {
2060 struct elf_thread_status *tmp =
2061 list_entry(t, struct elf_thread_status, list);
2063 for (i = 0; i < tmp->num_notes; i++)
2064 if (!writenote(&tmp->notes[i], cprm))
2071 static void free_note_info(struct elf_note_info *info)
2073 while (!list_empty(&info->thread_list)) {
2074 struct list_head *tmp = info->thread_list.next;
2076 kfree(list_entry(tmp, struct elf_thread_status, list));
2079 /* Free data possibly allocated by fill_files_note(): */
2080 if (info->notes_files)
2081 vfree(info->notes_files->data);
2083 kfree(info->prstatus);
2084 kfree(info->psinfo);
2087 #ifdef ELF_CORE_COPY_XFPREGS
2094 static struct vm_area_struct *first_vma(struct task_struct *tsk,
2095 struct vm_area_struct *gate_vma)
2097 struct vm_area_struct *ret = tsk->mm->mmap;
2104 * Helper function for iterating across a vma list. It ensures that the caller
2105 * will visit `gate_vma' prior to terminating the search.
2107 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
2108 struct vm_area_struct *gate_vma)
2110 struct vm_area_struct *ret;
2112 ret = this_vma->vm_next;
2115 if (this_vma == gate_vma)
2120 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
2121 elf_addr_t e_shoff, int segs)
2123 elf->e_shoff = e_shoff;
2124 elf->e_shentsize = sizeof(*shdr4extnum);
2126 elf->e_shstrndx = SHN_UNDEF;
2128 memset(shdr4extnum, 0, sizeof(*shdr4extnum));
2130 shdr4extnum->sh_type = SHT_NULL;
2131 shdr4extnum->sh_size = elf->e_shnum;
2132 shdr4extnum->sh_link = elf->e_shstrndx;
2133 shdr4extnum->sh_info = segs;
2139 * This is a two-pass process; first we find the offsets of the bits,
2140 * and then they are actually written out. If we run out of core limit
2143 static int elf_core_dump(struct coredump_params *cprm)
2148 size_t vma_data_size = 0;
2149 struct vm_area_struct *vma, *gate_vma;
2150 struct elfhdr *elf = NULL;
2151 loff_t offset = 0, dataoff;
2152 struct elf_note_info info = { };
2153 struct elf_phdr *phdr4note = NULL;
2154 struct elf_shdr *shdr4extnum = NULL;
2157 elf_addr_t *vma_filesz = NULL;
2160 * We no longer stop all VM operations.
2162 * This is because those proceses that could possibly change map_count
2163 * or the mmap / vma pages are now blocked in do_exit on current
2164 * finishing this core dump.
2166 * Only ptrace can touch these memory addresses, but it doesn't change
2167 * the map_count or the pages allocated. So no possibility of crashing
2168 * exists while dumping the mm->vm_next areas to the core file.
2171 /* alloc memory for large data structures: too large to be on stack */
2172 elf = kmalloc(sizeof(*elf), GFP_KERNEL);
2176 * The number of segs are recored into ELF header as 16bit value.
2177 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
2179 segs = current->mm->map_count;
2180 segs += elf_core_extra_phdrs();
2182 gate_vma = get_gate_vma(current->mm);
2183 if (gate_vma != NULL)
2186 /* for notes section */
2189 /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
2190 * this, kernel supports extended numbering. Have a look at
2191 * include/linux/elf.h for further information. */
2192 e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
2195 * Collect all the non-memory information about the process for the
2196 * notes. This also sets up the file header.
2198 if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
2206 offset += sizeof(*elf); /* Elf header */
2207 offset += segs * sizeof(struct elf_phdr); /* Program headers */
2209 /* Write notes phdr entry */
2211 size_t sz = get_note_info_size(&info);
2213 sz += elf_coredump_extra_notes_size();
2215 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
2219 fill_elf_note_phdr(phdr4note, sz, offset);
2223 dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
2225 if (segs - 1 > ULONG_MAX / sizeof(*vma_filesz))
2227 vma_filesz = vmalloc((segs - 1) * sizeof(*vma_filesz));
2231 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2232 vma = next_vma(vma, gate_vma)) {
2233 unsigned long dump_size;
2235 dump_size = vma_dump_size(vma, cprm->mm_flags);
2236 vma_filesz[i++] = dump_size;
2237 vma_data_size += dump_size;
2240 offset += vma_data_size;
2241 offset += elf_core_extra_data_size();
2244 if (e_phnum == PN_XNUM) {
2245 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2248 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2253 if (!dump_emit(cprm, elf, sizeof(*elf)))
2256 if (!dump_emit(cprm, phdr4note, sizeof(*phdr4note)))
2259 /* Write program headers for segments dump */
2260 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2261 vma = next_vma(vma, gate_vma)) {
2262 struct elf_phdr phdr;
2264 phdr.p_type = PT_LOAD;
2265 phdr.p_offset = offset;
2266 phdr.p_vaddr = vma->vm_start;
2268 phdr.p_filesz = vma_filesz[i++];
2269 phdr.p_memsz = vma->vm_end - vma->vm_start;
2270 offset += phdr.p_filesz;
2271 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2272 if (vma->vm_flags & VM_WRITE)
2273 phdr.p_flags |= PF_W;
2274 if (vma->vm_flags & VM_EXEC)
2275 phdr.p_flags |= PF_X;
2276 phdr.p_align = ELF_EXEC_PAGESIZE;
2278 if (!dump_emit(cprm, &phdr, sizeof(phdr)))
2282 if (!elf_core_write_extra_phdrs(cprm, offset))
2285 /* write out the notes section */
2286 if (!write_note_info(&info, cprm))
2289 if (elf_coredump_extra_notes_write(cprm))
2293 if (!dump_skip(cprm, dataoff - cprm->pos))
2296 for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
2297 vma = next_vma(vma, gate_vma)) {
2301 end = vma->vm_start + vma_filesz[i++];
2303 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2307 page = get_dump_page(addr);
2309 void *kaddr = kmap(page);
2310 stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
2314 stop = !dump_skip(cprm, PAGE_SIZE);
2319 dump_truncate(cprm);
2321 if (!elf_core_write_extra_data(cprm))
2324 if (e_phnum == PN_XNUM) {
2325 if (!dump_emit(cprm, shdr4extnum, sizeof(*shdr4extnum)))
2333 free_note_info(&info);
2342 #endif /* CONFIG_ELF_CORE */
2344 static int __init init_elf_binfmt(void)
2346 register_binfmt(&elf_format);
2350 static void __exit exit_elf_binfmt(void)
2352 /* Remove the COFF and ELF loaders. */
2353 unregister_binfmt(&elf_format);
2356 core_initcall(init_elf_binfmt);
2357 module_exit(exit_elf_binfmt);
2358 MODULE_LICENSE("GPL");