subsystem. Also has support for calculating CPU cycle events
to determine how many clock cycles in a given period.
+config HAVE_PERF_REGS
+ bool
+ help
+ Support selective register dumps for perf events. This includes
+ bit-mapping of each registers and a unique architecture id.
+
+config HAVE_PERF_USER_STACK_DUMP
+ bool
+ help
+ Support user stack dumps for perf event samples. This needs
+ access to the user stack pointer which is not unified across
+ architectures.
+
config HAVE_ARCH_JUMP_LABEL
bool
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
bool
+ config GENERIC_KERNEL_THREAD
+ bool
+
config HAVE_ARCH_SECCOMP_FILTER
bool
help
See Documentation/prctl/seccomp_filter.txt for details.
+config HAVE_RCU_USER_QS
+ bool
+ help
+ Provide kernel entry/exit hooks necessary for userspace
+ RCU extended quiescent state. Syscalls need to be wrapped inside
+ rcu_user_exit()-rcu_user_enter() through the slow path using
+ TIF_NOHZ flag. Exceptions handlers must be wrapped as well. Irqs
+ are already protected inside rcu_irq_enter/rcu_irq_exit() but
+ preemption or signal handling on irq exit still need to be protected.
+
+config HAVE_VIRT_CPU_ACCOUNTING
+ bool
+
+config HAVE_IRQ_TIME_ACCOUNTING
+ bool
+ help
+ Archs need to ensure they use a high enough resolution clock to
+ support irq time accounting and then call enable_sched_clock_irqtime().
+
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ bool
+
+config HAVE_MOD_ARCH_SPECIFIC
+ bool
+ help
+ The arch uses struct mod_arch_specific to store data. Many arches
+ just need a simple module loader without arch specific data - those
+ should not enable this.
+
+config MODULES_USE_ELF_RELA
+ bool
+ help
+ Modules only use ELF RELA relocations. Modules with ELF REL
+ relocations will give an error.
+
+config MODULES_USE_ELF_REL
+ bool
+ help
+ Modules only use ELF REL relocations. Modules with ELF RELA
+ relocations will give an error.
+
source "kernel/gcov/Kconfig"
select GENERIC_CMOS_UPDATE
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
+ select HAVE_MOD_ARCH_SPECIFIC
+ select MODULES_USE_ELF_RELA
+ select GENERIC_KERNEL_THREAD
help
The Alpha is a 64-bit general-purpose processor designed and
marketed by the Digital Equipment Corporation of blessed memory,
include include/asm-generic/Kbuild.asm
+generic-y += clkdev.h
+
header-y += compiler.h
header-y += console.h
header-y += fpu.h
header-y += reg.h
header-y += regdef.h
header-y += sysinfo.h
+ generic-y += exec.h
#include <linux/tty.h>
#include <linux/console.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <asm/reg.h>
#include <asm/uaccess.h>
/* FIXME -- EV6 and LCA45 know how to power down
the CPU. */
+ rcu_idle_enter();
while (!need_resched())
cpu_relax();
- schedule();
+
+ rcu_idle_exit();
+ schedule_preempt_disabled();
}
}
/*
* Copy an alpha thread..
- *
- * Note the "stack_offset" stuff: when returning to kernel mode, we need
- * to have some extra stack-space for the kernel stack that still exists
- * after the "ret_from_fork". When returning to user mode, we only want
- * the space needed by the syscall stack frame (ie "struct pt_regs").
- * Use the passed "regs" pointer to determine how much space we need
- * for a kernel fork().
*/
int
copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long unused,
+ unsigned long arg,
struct task_struct * p, struct pt_regs * regs)
{
extern void ret_from_fork(void);
+ extern void ret_from_kernel_thread(void);
struct thread_info *childti = task_thread_info(p);
- struct pt_regs * childregs;
- struct switch_stack * childstack, *stack;
- unsigned long stack_offset, settls;
-
- stack_offset = PAGE_SIZE - sizeof(struct pt_regs);
- if (!(regs->ps & 8))
- stack_offset = (PAGE_SIZE-1) & (unsigned long) regs;
- childregs = (struct pt_regs *)
- (stack_offset + PAGE_SIZE + task_stack_page(p));
-
+ struct pt_regs *childregs = task_pt_regs(p);
+ struct switch_stack *childstack, *stack;
+ unsigned long settls;
+
+ childstack = ((struct switch_stack *) childregs) - 1;
+ if (unlikely(!regs)) {
+ /* kernel thread */
+ memset(childstack, 0,
+ sizeof(struct switch_stack) + sizeof(struct pt_regs));
+ childstack->r26 = (unsigned long) ret_from_kernel_thread;
+ childstack->r9 = usp; /* function */
+ childstack->r10 = arg;
+ childregs->hae = alpha_mv.hae_cache,
+ childti->pcb.usp = 0;
+ childti->pcb.ksp = (unsigned long) childstack;
+ childti->pcb.flags = 1; /* set FEN, clear everything else */
+ return 0;
+ }
*childregs = *regs;
settls = regs->r20;
childregs->r0 = 0;
childregs->r20 = 1; /* OSF/1 has some strange fork() semantics. */
regs->r20 = 0;
stack = ((struct switch_stack *) regs) - 1;
- childstack = ((struct switch_stack *) childregs) - 1;
*childstack = *stack;
childstack->r26 = (unsigned long) ret_from_fork;
childti->pcb.usp = usp;
}
EXPORT_SYMBOL(dump_elf_task_fp);
- /*
- * sys_execve() executes a new program.
- */
- asmlinkage int
- do_sys_execve(const char __user *ufilename,
- const char __user *const __user *argv,
- const char __user *const __user *envp, struct pt_regs *regs)
- {
- int error;
- char *filename;
-
- filename = getname(ufilename);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- goto out;
- error = do_execve(filename, argv, envp, regs);
- putname(filename);
- out:
- return error;
- }
-
/*
* Return saved PC of a blocked thread. This assumes the frame
* pointer is the 6th saved long on the kernel stack and that the
}
return pc;
}
-
- int kernel_execve(const char *path, const char *const argv[], const char *const envp[])
- {
- /* Avoid the HAE being gratuitously wrong, which would cause us
- to do the whole turn off interrupts thing and restore it. */
- struct pt_regs regs = {.hae = alpha_mv.hae_cache};
- int err = do_execve(path, argv, envp, ®s);
- if (!err) {
- struct pt_regs *p = current_pt_regs();
- /* copy regs to normal position and off to userland we go... */
- *p = regs;
- __asm__ __volatile__ (
- "mov %0, $sp;"
- "br $31, ret_from_sys_call"
- : : "r"(p));
- }
- return err;
- }
- EXPORT_SYMBOL(kernel_execve);
select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
+ select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES if !XIP_KERNEL
select HAVE_KRETPROBES if (HAVE_KPROBES)
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
select ARCH_BINFMT_ELF_RANDOMIZE_PIE
select HAVE_GENERIC_DMA_COHERENT
+ select HAVE_DEBUG_KMEMLEAK
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO
select HAVE_KERNEL_LZMA
select HARDIRQS_SW_RESEND
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
+ select HAVE_UID16
select ARCH_WANT_IPC_PARSE_VERSION
select HARDIRQS_SW_RESEND
select CPU_PM if (SUSPEND || CPU_IDLE)
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN
+ select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
+ select MODULES_USE_ELF_REL
+ select GENERIC_KERNEL_THREAD
help
The ARM series is a line of low-power-consumption RISC chip designs
licensed by ARM Ltd and targeted at embedded applications and
this feature (eg, building a kernel for a single machine) and
you need to shrink the kernel to the minimal size.
+config NEED_MACH_GPIO_H
+ bool
+ help
+ Select this when mach/gpio.h is required to provide special
+ definitions for this platform. The need for mach/gpio.h should
+ be avoided when possible.
+
config NEED_MACH_IO_H
bool
help
#
choice
prompt "ARM system type"
- default ARCH_VERSATILE
+ default ARCH_MULTIPLATFORM
-config ARCH_SOCFPGA
- bool "Altera SOCFPGA family"
- select ARCH_WANT_OPTIONAL_GPIOLIB
- select ARM_AMBA
- select ARM_GIC
- select CACHE_L2X0
- select CLKDEV_LOOKUP
+config ARCH_MULTIPLATFORM
+ bool "Allow multiple platforms to be selected"
+ select ARM_PATCH_PHYS_VIRT
+ select AUTO_ZRELADDR
select COMMON_CLK
- select CPU_V7
- select DW_APB_TIMER
- select DW_APB_TIMER_OF
- select GENERIC_CLOCKEVENTS
- select GPIO_PL061 if GPIOLIB
- select HAVE_ARM_SCU
+ select MULTI_IRQ_HANDLER
select SPARSE_IRQ
select USE_OF
- help
- This enables support for Altera SOCFPGA Cyclone V platform
+ depends on MMU
config ARCH_INTEGRATOR
bool "ARM Ltd. Integrator family"
select ARM_AMBA
select ARCH_HAS_CPUFREQ
select COMMON_CLK
- select CLK_VERSATILE
+ select COMMON_CLK_VERSATILE
select HAVE_TCM
select ICST
select GENERIC_CLOCKEVENTS
select PLAT_VERSATILE
select PLAT_VERSATILE_FPGA_IRQ
- select NEED_MACH_IO_H
select NEED_MACH_MEMORY_H
select SPARSE_IRQ
select MULTI_IRQ_HANDLER
config ARCH_REALVIEW
bool "ARM Ltd. RealView family"
select ARM_AMBA
- select CLKDEV_LOOKUP
- select HAVE_MACH_CLKDEV
+ select COMMON_CLK
+ select COMMON_CLK_VERSATILE
select ICST
select GENERIC_CLOCKEVENTS
select ARCH_WANT_OPTIONAL_GPIOLIB
select PLAT_VERSATILE
- select PLAT_VERSATILE_CLOCK
select PLAT_VERSATILE_CLCD
select ARM_TIMER_SP804
select GPIO_PL061 if GPIOLIB
select ICST
select GENERIC_CLOCKEVENTS
select ARCH_WANT_OPTIONAL_GPIOLIB
- select NEED_MACH_IO_H if PCI
select PLAT_VERSATILE
select PLAT_VERSATILE_CLOCK
select PLAT_VERSATILE_CLCD
help
This enables support for ARM Ltd Versatile board.
-config ARCH_VEXPRESS
- bool "ARM Ltd. Versatile Express family"
- select ARCH_WANT_OPTIONAL_GPIOLIB
- select ARM_AMBA
- select ARM_TIMER_SP804
- select CLKDEV_LOOKUP
- select COMMON_CLK
- select GENERIC_CLOCKEVENTS
- select HAVE_CLK
- select HAVE_PATA_PLATFORM
- select ICST
- select NO_IOPORT
- select PLAT_VERSATILE
- select PLAT_VERSATILE_CLCD
- select REGULATOR_FIXED_VOLTAGE if REGULATOR
- help
- This enables support for the ARM Ltd Versatile Express boards.
-
config ARCH_AT91
bool "Atmel AT91"
select ARCH_REQUIRE_GPIOLIB
select HAVE_CLK
select CLKDEV_LOOKUP
select IRQ_DOMAIN
+ select NEED_MACH_GPIO_H
select NEED_MACH_IO_H if PCCARD
help
This enables support for systems based on Atmel
AT91RM9200 and AT91SAM9* processors.
-config ARCH_BCMRING
- bool "Broadcom BCMRING"
- depends on MMU
- select CPU_V6
- select ARM_AMBA
- select ARM_TIMER_SP804
- select CLKDEV_LOOKUP
- select GENERIC_CLOCKEVENTS
- select ARCH_WANT_OPTIONAL_GPIOLIB
- help
- Support for Broadcom's BCMRing platform.
-
-config ARCH_HIGHBANK
- bool "Calxeda Highbank-based"
+config ARCH_BCM2835
+ bool "Broadcom BCM2835 family"
select ARCH_WANT_OPTIONAL_GPIOLIB
select ARM_AMBA
- select ARM_GIC
+ select ARM_ERRATA_411920
select ARM_TIMER_SP804
- select CACHE_L2X0
select CLKDEV_LOOKUP
select COMMON_CLK
- select CPU_V7
+ select CPU_V6
select GENERIC_CLOCKEVENTS
- select HAVE_ARM_SCU
- select HAVE_SMP
+ select MULTI_IRQ_HANDLER
select SPARSE_IRQ
select USE_OF
help
- Support for the Calxeda Highbank SoC based boards.
+ This enables support for the Broadcom BCM2835 SoC. This SoC is
+ use in the Raspberry Pi, and Roku 2 devices.
config ARCH_CLPS711X
bool "Cirrus Logic CLPS711x/EP721x/EP731x-based"
select CPU_ARM720T
select ARCH_USES_GETTIMEOFFSET
+ select COMMON_CLK
+ select CLKDEV_LOOKUP
select NEED_MACH_MEMORY_H
help
Support for Cirrus Logic 711x/721x/731x based boards.
help
Support for the Cortina Systems Gemini family SoCs
-config ARCH_PRIMA2
- bool "CSR SiRFSoC PRIMA2 ARM Cortex A9 Platform"
- select CPU_V7
+config ARCH_SIRF
+ bool "CSR SiRF"
select NO_IOPORT
select ARCH_REQUIRE_GPIOLIB
select GENERIC_CLOCKEVENTS
- select CLKDEV_LOOKUP
+ select COMMON_CLK
select GENERIC_IRQ_CHIP
select MIGHT_HAVE_CACHE_L2X0
select PINCTRL
select PINCTRL_SIRF
select USE_OF
- select ZONE_DMA
help
- Support for CSR SiRFSoC ARM Cortex A9 Platform
+ Support for CSR SiRFprimaII/Marco/Polo platforms
config ARCH_EBSA110
bool "EBSA-110"
select FOOTBRIDGE
select GENERIC_CLOCKEVENTS
select HAVE_IDE
- select NEED_MACH_IO_H
+ select NEED_MACH_IO_H if !MMU
select NEED_MACH_MEMORY_H
help
Support for systems based on the DC21285 companion chip
select CLKSRC_MMIO
select COMMON_CLK
select HAVE_CLK_PREPARE
+ select MULTI_IRQ_HANDLER
select PINCTRL
+ select SPARSE_IRQ
select USE_OF
help
Support for Freescale MXS-based family of processors
select PCI
select ARCH_SUPPORTS_MSI
select VMSPLIT_1G
- select NEED_MACH_IO_H
select NEED_MACH_MEMORY_H
select NEED_RET_TO_USER
help
bool "IOP32x-based"
depends on MMU
select CPU_XSCALE
- select NEED_MACH_IO_H
+ select NEED_MACH_GPIO_H
select NEED_RET_TO_USER
select PLAT_IOP
select PCI
bool "IOP33x-based"
depends on MMU
select CPU_XSCALE
- select NEED_MACH_IO_H
+ select NEED_MACH_GPIO_H
select NEED_RET_TO_USER
select PLAT_IOP
select PCI
help
Support for Intel's IXP4XX (XScale) family of processors.
-config ARCH_MVEBU
- bool "Marvell SOCs with Device Tree support"
- select GENERIC_CLOCKEVENTS
- select MULTI_IRQ_HANDLER
- select SPARSE_IRQ
- select CLKSRC_MMIO
- select GENERIC_IRQ_CHIP
- select IRQ_DOMAIN
- select COMMON_CLK
- help
- Support for the Marvell SoC Family with device tree support
-
config ARCH_DOVE
bool "Marvell Dove"
select CPU_V7
- select PCI
select ARCH_REQUIRE_GPIOLIB
select GENERIC_CLOCKEVENTS
- select NEED_MACH_IO_H
- select PLAT_ORION
+ select MIGHT_HAVE_PCI
+ select PLAT_ORION_LEGACY
+ select USB_ARCH_HAS_EHCI
help
Support for the Marvell Dove SoC 88AP510
select PCI
select ARCH_REQUIRE_GPIOLIB
select GENERIC_CLOCKEVENTS
- select NEED_MACH_IO_H
- select PLAT_ORION
+ select PLAT_ORION_LEGACY
help
Support for the following Marvell Kirkwood series SoCs:
88F6180, 88F6192 and 88F6281.
select PCI
select ARCH_REQUIRE_GPIOLIB
select GENERIC_CLOCKEVENTS
- select NEED_MACH_IO_H
- select PLAT_ORION
+ select PLAT_ORION_LEGACY
help
Support for the following Marvell MV78xx0 series SoCs:
MV781x0, MV782x0.
select PCI
select ARCH_REQUIRE_GPIOLIB
select GENERIC_CLOCKEVENTS
- select NEED_MACH_IO_H
- select PLAT_ORION
+ select PLAT_ORION_LEGACY
help
Support for the following Marvell Orion 5x series SoCs:
Orion-1 (5181), Orion-VoIP (5181L), Orion-NAS (5182),
select PLAT_PXA
select SPARSE_IRQ
select GENERIC_ALLOCATOR
+ select NEED_MACH_GPIO_H
help
Support for Marvell's PXA168/PXA910(MMP) and MMP2 processor line.
bool "Micrel/Kendin KS8695"
select CPU_ARM922T
select ARCH_REQUIRE_GPIOLIB
- select ARCH_USES_GETTIMEOFFSET
select NEED_MACH_MEMORY_H
+ select CLKSRC_MMIO
+ select GENERIC_CLOCKEVENTS
help
Support for Micrel/Kendin KS8695 "Centaur" (ARM922T) based
System-on-Chip devices.
select HAVE_CLK
select HAVE_SMP
select MIGHT_HAVE_CACHE_L2X0
- select NEED_MACH_IO_H if PCI
select ARCH_HAS_CPUFREQ
select USE_OF
+ select COMMON_CLK
help
This enables support for NVIDIA Tegra based systems (Tegra APX,
Tegra 6xx and Tegra 2 series).
-config ARCH_PICOXCELL
- bool "Picochip picoXcell"
- select ARCH_REQUIRE_GPIOLIB
- select ARM_PATCH_PHYS_VIRT
- select ARM_VIC
- select CPU_V6K
- select DW_APB_TIMER
- select DW_APB_TIMER_OF
- select GENERIC_CLOCKEVENTS
- select GENERIC_GPIO
- select HAVE_TCM
- select NO_IOPORT
- select SPARSE_IRQ
- select USE_OF
- help
- This enables support for systems based on the Picochip picoXcell
- family of Femtocell devices. The picoxcell support requires device tree
- for all boards.
-
-config ARCH_PNX4008
- bool "Philips Nexperia PNX4008 Mobile"
- select CPU_ARM926T
- select CLKDEV_LOOKUP
- select ARCH_USES_GETTIMEOFFSET
- help
- This enables support for Philips PNX4008 mobile platform.
-
config ARCH_PXA
bool "PXA2xx/PXA3xx-based"
depends on MMU
select MULTI_IRQ_HANDLER
select ARM_CPU_SUSPEND if PM
select HAVE_IDE
+ select NEED_MACH_GPIO_H
help
Support for Intel/Marvell's PXA2xx/PXA3xx processor line.
select CLKDEV_LOOKUP
select ARCH_REQUIRE_GPIOLIB
select HAVE_IDE
+ select NEED_MACH_GPIO_H
select NEED_MACH_MEMORY_H
select SPARSE_IRQ
help
select HAVE_S3C2410_I2C if I2C
select HAVE_S3C_RTC if RTC_CLASS
select HAVE_S3C2410_WATCHDOG if WATCHDOG
+ select NEED_MACH_GPIO_H
select NEED_MACH_IO_H
help
Samsung S3C2410, S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443
select SAMSUNG_GPIOLIB_4BIT
select HAVE_S3C2410_I2C if I2C
select HAVE_S3C2410_WATCHDOG if WATCHDOG
+ select NEED_MACH_GPIO_H
help
Samsung S3C64XX series based systems
select GENERIC_CLOCKEVENTS
select HAVE_S3C2410_I2C if I2C
select HAVE_S3C_RTC if RTC_CLASS
+ select NEED_MACH_GPIO_H
help
Samsung S5P64X0 CPU based systems, such as the Samsung SMDK6440,
SMDK6450.
select HAVE_S3C2410_I2C if I2C
select HAVE_S3C_RTC if RTC_CLASS
select HAVE_S3C2410_WATCHDOG if WATCHDOG
+ select NEED_MACH_GPIO_H
help
Samsung S5PC100 series based systems
select HAVE_S3C2410_I2C if I2C
select HAVE_S3C_RTC if RTC_CLASS
select HAVE_S3C2410_WATCHDOG if WATCHDOG
+ select NEED_MACH_GPIO_H
select NEED_MACH_MEMORY_H
help
Samsung S5PV210/S5PC110 series based systems
select HAVE_S3C_RTC if RTC_CLASS
select HAVE_S3C2410_I2C if I2C
select HAVE_S3C2410_WATCHDOG if WATCHDOG
+ select NEED_MACH_GPIO_H
select NEED_MACH_MEMORY_H
help
Support for SAMSUNG's EXYNOS SoCs (EXYNOS4/5)
select PCI
select ARCH_USES_GETTIMEOFFSET
select NEED_MACH_MEMORY_H
- select NEED_MACH_IO_H
help
Support for the StrongARM based Digital DNARD machine, also known
as "Shark" (<http://www.shark-linux.de/shark.html>).
select COMMON_CLK
select GENERIC_GPIO
select ARCH_REQUIRE_GPIOLIB
+ select SPARSE_IRQ
help
Support for ST-Ericsson U300 series mobile platforms.
select COMMON_CLK
select GENERIC_CLOCKEVENTS
select PINCTRL
+ select PINCTRL_STN8815
select MIGHT_HAVE_CACHE_L2X0
select ARCH_REQUIRE_GPIOLIB
help
select GENERIC_ALLOCATOR
select GENERIC_IRQ_CHIP
select ARCH_HAS_HOLES_MEMORYMODEL
+ select NEED_MACH_GPIO_H
help
Support for TI's DaVinci platform.
select CLKSRC_MMIO
select GENERIC_CLOCKEVENTS
select ARCH_HAS_HOLES_MEMORYMODEL
+ select NEED_MACH_GPIO_H
help
Support for TI's OMAP platform (OMAP1/2/3/4).
select ARCH_HAS_CPUFREQ
select GENERIC_CLOCKEVENTS
select ARCH_REQUIRE_GPIOLIB
+ select USE_OF
+ select COMMON_CLK
+ select HAVE_CLK
+ select CLKDEV_LOOKUP
help
Support for VIA/WonderMedia VT8500/WM85xx System-on-Chip.
Support for Xilinx Zynq ARM Cortex A9 Platform
endchoice
+menu "Multiple platform selection"
+ depends on ARCH_MULTIPLATFORM
+
+comment "CPU Core family selection"
+
+config ARCH_MULTI_V4
+ bool "ARMv4 based platforms (FA526, StrongARM)"
+ select ARCH_MULTI_V4_V5
+ depends on !ARCH_MULTI_V6_V7
+
+config ARCH_MULTI_V4T
+ bool "ARMv4T based platforms (ARM720T, ARM920T, ...)"
+ select ARCH_MULTI_V4_V5
+ depends on !ARCH_MULTI_V6_V7
+
+config ARCH_MULTI_V5
+ bool "ARMv5 based platforms (ARM926T, XSCALE, PJ1, ...)"
+ select ARCH_MULTI_V4_V5
+ depends on !ARCH_MULTI_V6_V7
+
+config ARCH_MULTI_V4_V5
+ bool
+
+config ARCH_MULTI_V6
+ bool "ARMv6 based platforms (ARM11, Scorpion, ...)"
+ select CPU_V6
+ select ARCH_MULTI_V6_V7
+
+config ARCH_MULTI_V7
+ bool "ARMv7 based platforms (Cortex-A, PJ4, Krait)"
+ select CPU_V7
+ select ARCH_VEXPRESS
+ default y
+ select ARCH_MULTI_V6_V7
+
+config ARCH_MULTI_V6_V7
+ bool
+
+config ARCH_MULTI_CPU_AUTO
+ def_bool !(ARCH_MULTI_V4 || ARCH_MULTI_V4T || ARCH_MULTI_V6_V7)
+ select ARCH_MULTI_V5
+
+endmenu
+
#
# This is sorted alphabetically by mach-* pathname. However, plat-*
# Kconfigs may be included either alphabetically (according to the
source "arch/arm/mach-at91/Kconfig"
-source "arch/arm/mach-bcmring/Kconfig"
-
source "arch/arm/mach-clps711x/Kconfig"
source "arch/arm/mach-cns3xxx/Kconfig"
source "arch/arm/mach-h720x/Kconfig"
+source "arch/arm/mach-highbank/Kconfig"
+
source "arch/arm/mach-integrator/Kconfig"
source "arch/arm/mach-iop32x/Kconfig"
source "arch/arm/mach-orion5x/Kconfig"
+source "arch/arm/mach-picoxcell/Kconfig"
+
source "arch/arm/mach-pxa/Kconfig"
source "arch/arm/plat-pxa/Kconfig"
source "arch/arm/plat-samsung/Kconfig"
source "arch/arm/plat-s3c24xx/Kconfig"
+source "arch/arm/mach-socfpga/Kconfig"
+
source "arch/arm/plat-spear/Kconfig"
source "arch/arm/mach-s3c24xx/Kconfig"
source "arch/arm/mach-shmobile/Kconfig"
+source "arch/arm/mach-prima2/Kconfig"
+
source "arch/arm/mach-tegra/Kconfig"
source "arch/arm/mach-u300/Kconfig"
source "arch/arm/mach-vexpress/Kconfig"
source "arch/arm/plat-versatile/Kconfig"
-source "arch/arm/mach-vt8500/Kconfig"
-
source "arch/arm/mach-w90x900/Kconfig"
# Definitions to make life easier
select IRQ_DOMAIN
select COMMON_CLK
+config PLAT_ORION_LEGACY
+ bool
+ select PLAT_ORION
+
config PLAT_PXA
bool
depends on CPU_XSCALE
default y
-config CPU_HAS_PMU
- depends on (CPU_V6 || CPU_V6K || CPU_V7 || XSCALE_PMU) && \
- (!ARCH_OMAP3 || OMAP3_EMU)
- default y
- bool
-
config MULTI_IRQ_HANDLER
bool
help
on systems with an outer cache, the store buffer is drained
explicitly.
+config ARM_ERRATA_775420
+ bool "ARM errata: A data cache maintenance operation which aborts, might lead to deadlock"
+ depends on CPU_V7
+ help
+ This option enables the workaround for the 775420 Cortex-A9 (r2p2,
+ r2p6,r2p8,r2p10,r3p0) erratum. In case a date cache maintenance
+ operation aborts with MMU exception, it might cause the processor
+ to deadlock. This workaround puts DSB before executing ISB if
+ an abort may occur on cache maintenance.
+
+config ARM_ERRATA_782773
+ bool "ARM errata: Updating a translation entry might cause an unexpected translation fault"
+ depends on CPU_V7
+ help
+ This option enables the workaround for the 782773 Cortex-A9 (all r0,
+ r2 and r3 revisions) erratum. It might cause MMU exception in case
+ page table walk happens just after updating the existing
+ with setting page table in L1 data cache.
+
endmenu
source "arch/arm/common/Kconfig"
default 355 if ARCH_U8500
default 264 if MACH_H4700
default 512 if SOC_OMAP5
+ default 288 if ARCH_VT8500
default 0
help
Maximum number of GPIOs in the system.
config HW_PERF_EVENTS
bool "Enable hardware performance counter support for perf events"
- depends on PERF_EVENTS && CPU_HAS_PMU
+ depends on PERF_EVENTS
default y
help
Enable hardware performance counter support for perf events. If
This config option is actually maximum order plus one. For example,
a value of 11 means that the largest free memory block is 2^10 pages.
-config LEDS
- bool "Timer and CPU usage LEDs"
- depends on ARCH_CDB89712 || ARCH_EBSA110 || \
- ARCH_EBSA285 || ARCH_INTEGRATOR || \
- ARCH_LUBBOCK || MACH_MAINSTONE || ARCH_NETWINDER || \
- ARCH_OMAP || ARCH_P720T || ARCH_PXA_IDP || \
- ARCH_SA1100 || ARCH_SHARK || ARCH_VERSATILE || \
- ARCH_AT91 || ARCH_DAVINCI || \
- ARCH_KS8695 || MACH_RD88F5182 || ARCH_REALVIEW
- help
- If you say Y here, the LEDs on your machine will be used
- to provide useful information about your current system status.
-
- If you are compiling a kernel for a NetWinder or EBSA-285, you will
- be able to select which LEDs are active using the options below. If
- you are compiling a kernel for the EBSA-110 or the LART however, the
- red LED will simply flash regularly to indicate that the system is
- still functional. It is safe to say Y here if you have a CATS
- system, but the driver will do nothing.
-
-config LEDS_TIMER
- bool "Timer LED" if (!ARCH_CDB89712 && !ARCH_OMAP) || \
- OMAP_OSK_MISTRAL || MACH_OMAP_H2 \
- || MACH_OMAP_PERSEUS2
- depends on LEDS
- depends on !GENERIC_CLOCKEVENTS
- default y if ARCH_EBSA110
- help
- If you say Y here, one of the system LEDs (the green one on the
- NetWinder, the amber one on the EBSA285, or the red one on the LART)
- will flash regularly to indicate that the system is still
- operational. This is mainly useful to kernel hackers who are
- debugging unstable kernels.
-
- The LART uses the same LED for both Timer LED and CPU usage LED
- functions. You may choose to use both, but the Timer LED function
- will overrule the CPU usage LED.
-
-config LEDS_CPU
- bool "CPU usage LED" if (!ARCH_CDB89712 && !ARCH_EBSA110 && \
- !ARCH_OMAP) \
- || OMAP_OSK_MISTRAL || MACH_OMAP_H2 \
- || MACH_OMAP_PERSEUS2
- depends on LEDS
- help
- If you say Y here, the red LED will be used to give a good real
- time indication of CPU usage, by lighting whenever the idle task
- is not currently executing.
-
- The LART uses the same LED for both Timer LED and CPU usage LED
- functions. You may choose to use both, but the Timer LED function
- will overrule the CPU usage LED.
-
config ALIGNMENT_TRAP
bool
depends on CPU_CP15_MMU
configuration it is safe to say N, otherwise say Y.
config UACCESS_WITH_MEMCPY
- bool "Use kernel mem{cpy,set}() for {copy_to,clear}_user() (EXPERIMENTAL)"
- depends on MMU && EXPERIMENTAL
+ bool "Use kernel mem{cpy,set}() for {copy_to,clear}_user()"
+ depends on MMU
default y if CPU_FEROCEON
help
Implement faster copy_to_user and clear_user methods for CPU
neutralized via a kernel panic.
This feature requires gcc version 4.2 or above.
-config DEPRECATED_PARAM_STRUCT
- bool "Provide old way to pass kernel parameters"
+config XEN_DOM0
+ def_bool y
+ depends on XEN
+
+config XEN
+ bool "Xen guest support on ARM (EXPERIMENTAL)"
+ depends on EXPERIMENTAL && ARM && OF
help
- This was deprecated in 2001 and announced to live on for 5 years.
- Some old boot loaders still use this way.
+ Say Y if you want to run Linux in a Virtual Machine on Xen on ARM.
endmenu
help
Include support for flattened device tree machine descriptions.
+config ATAGS
+ bool "Support for the traditional ATAGS boot data passing" if USE_OF
+ default y
+ help
+ This is the traditional way of passing data to the kernel at boot
+ time. If you are solely relying on the flattened device tree (or
+ the ARM_ATAG_DTB_COMPAT option) then you may unselect this option
+ to remove ATAGS support from your kernel binary. If unsure,
+ leave this to y.
+
+config DEPRECATED_PARAM_STRUCT
+ bool "Provide old way to pass kernel parameters"
+ depends on ATAGS
+ help
+ This was deprecated in 2001 and announced to live on for 5 years.
+ Some old boot loaders still use this way.
+
# Compressed boot loader in ROM. Yes, we really want to ask about
# TEXT and BSS so we preserve their values in the config files.
config ZBOOT_ROM_TEXT
choice
prompt "Kernel command line type" if CMDLINE != ""
default CMDLINE_FROM_BOOTLOADER
+ depends on ATAGS
config CMDLINE_FROM_BOOTLOADER
bool "Use bootloader kernel arguments if available"
config XIP_KERNEL
bool "Kernel Execute-In-Place from ROM"
- depends on !ZBOOT_ROM && !ARM_LPAE
+ depends on !ZBOOT_ROM && !ARM_LPAE && !ARCH_MULTIPLATFORM
help
Execute-In-Place allows the kernel to run from non-volatile storage
directly addressable by the CPU, such as NOR flash. This saves RAM
config ATAGS_PROC
bool "Export atags in procfs"
- depends on KEXEC
+ depends on ATAGS && KEXEC
default y
help
Should the atags used to boot the kernel be exported in an "atags"
source "kernel/power/Kconfig"
config ARCH_SUSPEND_POSSIBLE
- depends on !ARCH_S5PC100 && !ARCH_TEGRA
+ depends on !ARCH_S5PC100
depends on CPU_ARM920T || CPU_ARM926T || CPU_SA1100 || \
CPU_V6 || CPU_V6K || CPU_V7 || CPU_XSC3 || CPU_XSCALE || CPU_MOHAWK
def_bool y
#define IRQ_MODE 0x00000012
#define SVC_MODE 0x00000013
#define ABT_MODE 0x00000017
+#define HYP_MODE 0x0000001a
#define UND_MODE 0x0000001b
#define SYSTEM_MODE 0x0000001f
#define MODE32_BIT 0x00000010
return regs->ARM_sp;
}
+ #define current_pt_regs(void) ({ \
+ register unsigned long sp asm ("sp"); \
+ (struct pt_regs *)((sp | (THREAD_SIZE - 1)) - 7) - 1; \
+ })
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
__u32 syscall; /* syscall number */
__u8 used_cp[16]; /* thread used copro */
unsigned long tp_value;
+#ifdef CONFIG_CRUNCH
struct crunch_state crunchstate;
+#endif
union fp_state fpstate __attribute__((aligned(8)));
union vfp_state vfpstate;
#ifdef CONFIG_ARM_THUMBEE
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
#define TIF_SYSCALL_TRACE 8
#define TIF_SYSCALL_AUDIT 9
- #define TIF_POLLING_NRFLAG 16
+#define TIF_SYSCALL_TRACEPOINT 10
#define TIF_USING_IWMMXT 17
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_RESTORE_SIGMASK 20
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
- #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
+#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
/* Checks for any syscall work in entry-common.S */
-#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT)
+#define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SYSCALL_TRACEPOINT)
/*
* Change these and you break ASM code in entry-common.S
#define __NR_setns (__NR_SYSCALL_BASE+375)
#define __NR_process_vm_readv (__NR_SYSCALL_BASE+376)
#define __NR_process_vm_writev (__NR_SYSCALL_BASE+377)
+ /* 378 for kcmp */
+
+/*
+ * This may need to be greater than __NR_last_syscall+1 in order to
+ * account for the padding in the syscall table
+ */
+#ifdef __KERNEL__
+#define __NR_syscalls (380)
+#endif /* __KERNEL__ */
/*
* The following SWIs are ARM private.
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_SYS_SOCKETCALL
#endif
+ #define __ARCH_WANT_SYS_EXECVE
+ #define __ARCH_WANT_KERNEL_EXECVE
/*
* "Conditional" syscalls
*/
#define __IGNORE_fadvise64_64
#define __IGNORE_migrate_pages
+#define __IGNORE_kcmp
#endif /* __KERNEL__ */
#endif /* __ASM_ARM_UNISTD_H */
CALL(sys_creat)
CALL(sys_link)
/* 10 */ CALL(sys_unlink)
- CALL(sys_execve_wrapper)
+ CALL(sys_execve)
CALL(sys_chdir)
CALL(OBSOLETE(sys_time)) /* used by libc4 */
CALL(sys_mknod)
/* 375 */ CALL(sys_setns)
CALL(sys_process_vm_readv)
CALL(sys_process_vm_writev)
+ CALL(sys_ni_syscall) /* reserved for sys_kcmp */
#ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted
b ret_slow_syscall
ENDPROC(ret_from_fork)
+ ENTRY(ret_from_kernel_thread)
+ UNWIND(.fnstart)
+ UNWIND(.cantunwind)
+ bl schedule_tail
+ mov r0, r4
+ adr lr, BSYM(1f) @ kernel threads should not exit
+ mov pc, r5
+ 1: bl do_exit
+ nop
+ UNWIND(.fnend)
+ ENDPROC(ret_from_kernel_thread)
+
+ /*
+ * turn a kernel thread into userland process
+ * use: ret_from_kernel_execve(struct pt_regs *normal)
+ */
+ ENTRY(ret_from_kernel_execve)
+ mov why, #0 @ not a syscall
+ str why, [r0, #S_R0] @ ... and we want 0 in ->ARM_r0 as well
+ get_thread_info tsk @ thread structure
+ mov sp, r0 @ stack pointer just under pt_regs
+ b ret_slow_syscall
+ ENDPROC(ret_from_kernel_execve)
+
.equ NR_syscalls,0
#define CALL(x) .equ NR_syscalls,NR_syscalls+1
#include "calls.S"
+
+/*
+ * Ensure that the system call table is equal to __NR_syscalls,
+ * which is the value the rest of the system sees
+ */
+.ifne NR_syscalls - __NR_syscalls
+.error "__NR_syscalls is not equal to the size of the syscall table"
+.endif
+
#undef CALL
#define CALL(x) .long x
b sys_vfork
ENDPROC(sys_vfork_wrapper)
- sys_execve_wrapper:
- add r3, sp, #S_OFF
- b sys_execve
- ENDPROC(sys_execve_wrapper)
-
sys_clone_wrapper:
add ip, sp, #S_OFF
str ip, [sp, #4]
#include <linux/random.h>
#include <linux/hw_breakpoint.h>
#include <linux/cpuidle.h>
+#include <linux/leds.h>
#include <asm/cacheflush.h>
-#include <asm/leds.h>
#include <asm/processor.h>
#include <asm/thread_notify.h>
#include <asm/stacktrace.h>
while (1) {
tick_nohz_idle_enter();
rcu_idle_enter();
- leds_event(led_idle_start);
+ ledtrig_cpu(CPU_LED_IDLE_START);
while (!need_resched()) {
#ifdef CONFIG_HOTPLUG_CPU
if (cpu_is_offline(smp_processor_id()))
} else
local_irq_enable();
}
- leds_event(led_idle_end);
+ ledtrig_cpu(CPU_LED_IDLE_END);
rcu_idle_exit();
tick_nohz_idle_exit();
schedule_preempt_disabled();
}
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+ asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
int
copy_thread(unsigned long clone_flags, unsigned long stack_start,
struct thread_info *thread = task_thread_info(p);
struct pt_regs *childregs = task_pt_regs(p);
- *childregs = *regs;
- childregs->ARM_r0 = 0;
- childregs->ARM_sp = stack_start;
-
memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
+
+ if (likely(regs)) {
+ *childregs = *regs;
+ childregs->ARM_r0 = 0;
+ childregs->ARM_sp = stack_start;
+ thread->cpu_context.pc = (unsigned long)ret_from_fork;
+ } else {
+ thread->cpu_context.r4 = stk_sz;
+ thread->cpu_context.r5 = stack_start;
+ thread->cpu_context.pc = (unsigned long)ret_from_kernel_thread;
+ childregs->ARM_cpsr = SVC_MODE;
+ }
thread->cpu_context.sp = (unsigned long)childregs;
- thread->cpu_context.pc = (unsigned long)ret_from_fork;
clear_ptrace_hw_breakpoint(p);
}
EXPORT_SYMBOL(dump_fpu);
- /*
- * Shuffle the argument into the correct register before calling the
- * thread function. r4 is the thread argument, r5 is the pointer to
- * the thread function, and r6 points to the exit function.
- */
- extern void kernel_thread_helper(void);
- asm( ".pushsection .text\n"
- " .align\n"
- " .type kernel_thread_helper, #function\n"
- "kernel_thread_helper:\n"
- #ifdef CONFIG_TRACE_IRQFLAGS
- " bl trace_hardirqs_on\n"
- #endif
- " msr cpsr_c, r7\n"
- " mov r0, r4\n"
- " mov lr, r6\n"
- " mov pc, r5\n"
- " .size kernel_thread_helper, . - kernel_thread_helper\n"
- " .popsection");
-
- #ifdef CONFIG_ARM_UNWIND
- extern void kernel_thread_exit(long code);
- asm( ".pushsection .text\n"
- " .align\n"
- " .type kernel_thread_exit, #function\n"
- "kernel_thread_exit:\n"
- " .fnstart\n"
- " .cantunwind\n"
- " bl do_exit\n"
- " nop\n"
- " .fnend\n"
- " .size kernel_thread_exit, . - kernel_thread_exit\n"
- " .popsection");
- #else
- #define kernel_thread_exit do_exit
- #endif
-
- /*
- * Create a kernel thread.
- */
- pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
- {
- struct pt_regs regs;
-
- memset(®s, 0, sizeof(regs));
-
- regs.ARM_r4 = (unsigned long)arg;
- regs.ARM_r5 = (unsigned long)fn;
- regs.ARM_r6 = (unsigned long)kernel_thread_exit;
- regs.ARM_r7 = SVC_MODE | PSR_ENDSTATE | PSR_ISETSTATE;
- regs.ARM_pc = (unsigned long)kernel_thread_helper;
- regs.ARM_cpsr = regs.ARM_r7 | PSR_I_BIT;
-
- return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL);
- }
- EXPORT_SYMBOL(kernel_thread);
-
unsigned long get_wchan(struct task_struct *p)
{
struct stackframe frame;
include include/asm-generic/Kbuild.asm
+generic-y += clkdev.h
+ generic-y += exec.h
+
header-y += cachectl.h
select OF
select OF_EARLY_FLATTREE
select GENERIC_CLOCKEVENTS
+ select MODULES_USE_ELF_RELA
+ select GENERIC_KERNEL_THREAD
config MMU
def_bool n
-include include/asm-generic/Kbuild.asm
generic-y += atomic.h
generic-y += auxvec.h
+generic-y += barrier.h
generic-y += bitsperlong.h
generic-y += bugs.h
generic-y += cputime.h
generic-y += dma.h
generic-y += emergency-restart.h
generic-y += errno.h
+ generic-y += exec.h
generic-y += fb.h
generic-y += fcntl.h
generic-y += futex.h
generic-y += shmbuf.h
generic-y += shmparam.h
generic-y += siginfo.h
+generic-y += signal.h
generic-y += socket.h
generic-y += sockios.h
generic-y += stat.h
--- /dev/null
+/*
+ * Copyright (C) 2011 Texas Instruments Incorporated
+ *
+ * Based on arch/tile version.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
++#define __ARCH_WANT_KERNEL_EXECVE
++#define __ARCH_WANT_SYS_EXECVE
++
+/* Use the standard ABI for syscalls. */
+#include <asm-generic/unistd.h>
+
+/* C6X-specific syscalls. */
+#define __NR_cache_sync (__NR_arch_specific_syscall + 0)
+__SYSCALL(__NR_cache_sync, sys_cache_sync)
header-y += rs485.h
header-y += sync_serial.h
+generic-y += module.h
+generic-y += clkdev.h
+ generic-y += exec.h
select HAVE_ARCH_TRACEHOOK
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
+ select HAVE_UID16
select HAVE_GENERIC_HARDIRQS
select GENERIC_IRQ_SHOW
+ select HAVE_DEBUG_BUGVERBOSE
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_CPU_DEVICES
select ARCH_WANT_IPC_PARSE_VERSION
+ select GENERIC_KERNEL_THREAD
config ZONE_DMA
bool
header-y += registers.h
header-y += termios.h
++
+generic-y += clkdev.h
+ generic-y += exec.h
#include <linux/reboot.h>
#include <linux/interrupt.h>
#include <linux/pagemap.h>
+#include <linux/rcupdate.h>
#include <asm/asm-offsets.h>
#include <asm/uaccess.h>
#include "local.h"
asmlinkage void ret_from_fork(void);
+ asmlinkage void ret_from_kernel_thread(void);
#include <asm/pgalloc.h>
{
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched()) {
check_pgt_cache();
if (!frv_dma_inprogress && idle)
idle();
}
+ rcu_idle_exit();
schedule_preempt_disabled();
}
* set up the kernel stack and exception frames for a new process
*/
int copy_thread(unsigned long clone_flags,
- unsigned long usp, unsigned long topstk,
+ unsigned long usp, unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
- struct pt_regs *childregs0, *childregs, *regs0;
+ struct pt_regs *childregs;
- regs0 = __kernel_frame0_ptr;
- childregs0 = (struct pt_regs *)
+ childregs = (struct pt_regs *)
(task_stack_page(p) + THREAD_SIZE - FRV_FRAME0_SIZE);
- childregs = childregs0;
-
- /* set up the userspace frame (the only place that the USP is stored) */
- *childregs0 = *regs0;
-
- childregs0->gr8 = 0;
- childregs0->sp = usp;
- childregs0->next_frame = NULL;
-
- /* set up the return kernel frame if called from kernel_thread() */
- if (regs != regs0) {
- childregs--;
- *childregs = *regs;
- childregs->sp = (unsigned long) childregs0;
- childregs->next_frame = childregs0;
- childregs->gr15 = (unsigned long) task_thread_info(p);
- childregs->gr29 = (unsigned long) p;
- }
p->set_child_tid = p->clear_child_tid = NULL;
p->thread.sp = (unsigned long) childregs;
p->thread.fp = 0;
p->thread.lr = 0;
- p->thread.pc = (unsigned long) ret_from_fork;
- p->thread.frame0 = childregs0;
+ p->thread.frame0 = childregs;
+
+ if (unlikely(!regs)) {
+ memset(childregs, 0, sizeof(struct pt_regs));
+ childregs->gr9 = usp; /* function */
+ childregs->gr8 = arg;
+ chilregs->psr = PSR_S;
+ p->thread.pc = (unsigned long) ret_from_kernel_thread;
+ save_user_regs(p->thread.user);
+ return 0;
+ }
+
+ /* set up the userspace frame (the only place that the USP is stored) */
+ *childregs = *regs;
+
+ childregs->sp = usp;
+ childregs->next_frame = NULL;
+
+ p->thread.pc = (unsigned long) ret_from_fork;
/* the new TLS pointer is passed in as arg #5 to sys_clone() */
if (clone_flags & CLONE_SETTLS)
return 0;
} /* end copy_thread() */
- /*
- * sys_execve() executes a new program.
- */
- asmlinkage int sys_execve(const char __user *name,
- const char __user *const __user *argv,
- const char __user *const __user *envp)
- {
- int error;
- char * filename;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = do_execve(filename, argv, envp, __frame);
- putname(filename);
- return error;
- }
-
unsigned long get_wchan(struct task_struct *p)
{
struct pt_regs *regs0;
include include/asm-generic/Kbuild.asm
+
+generic-y += module.h
+generic-y += clkdev.h
+ generic-y += exec.h
-include include/asm-generic/Kbuild.asm
-
-header-y += break.h
-header-y += cmpxchg.h
-header-y += fpu.h
-header-y += gcc_intrin.h
-header-y += ia64regs.h
-header-y += intel_intrin.h
-header-y += intrinsics.h
-header-y += perfmon.h
-header-y += perfmon_default_smpl.h
-header-y += ptrace_offsets.h
-header-y += rse.h
-header-y += ucontext.h
-header-y += ustack.h
+generic-y += clkdev.h
+ generic-y += exec.h
si.si_errno = 0;
si.si_code = SI_KERNEL;
si.si_pid = task_pid_vnr(current);
- si.si_uid = current_uid();
+ si.si_uid = from_kuid_munged(current_user_ns(), current_uid());
si.si_addr = sc;
force_sig_info(SIGSEGV, &si, current);
return retval;
si.si_errno = 0;
si.si_code = SI_KERNEL;
si.si_pid = task_pid_vnr(current);
- si.si_uid = current_uid();
+ si.si_uid = from_kuid_munged(current_user_ns(), current_uid());
si.si_addr = addr;
force_sig_info(SIGSEGV, &si, current);
return 0;
long restart = in_syscall;
long errno = scr->pt.r8;
- /*
- * In the ia64_leave_kernel code path, we want the common case to go fast, which
- * is why we may in certain cases get here from kernel mode. Just return without
- * doing anything if so.
- */
- if (!user_mode(&scr->pt))
- return;
-
/*
* This only loops in the rare cases of handle_signal() failing, in which case we
* need to push through a forced SIGSEGV.
include include/asm-generic/Kbuild.asm
+generic-y += module.h
+generic-y += clkdev.h
+ generic-y += exec.h
default y
select HAVE_IDE
select HAVE_AOUT if MMU
+ select HAVE_DEBUG_BUGVERBOSE
select HAVE_GENERIC_HARDIRQS
select GENERIC_IRQ_SHOW
select GENERIC_ATOMIC64
+ select HAVE_UID16
select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
select GENERIC_CPU_DEVICES
select GENERIC_STRNCPY_FROM_USER if MMU
select FPU if MMU
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE
+ select HAVE_MOD_ARCH_SPECIFIC
+ select MODULES_USE_ELF_REL
+ select MODULES_USE_ELF_RELA
+ select GENERIC_KERNEL_THREAD
config RWSEM_GENERIC_SPINLOCK
bool
header-y += cachectl.h
generic-y += bitsperlong.h
+generic-y += clkdev.h
generic-y += cputime.h
generic-y += device.h
generic-y += emergency-restart.h
generic-y += errno.h
+ generic-y += exec.h
generic-y += futex.h
generic-y += ioctl.h
generic-y += ipcbuf.h
#include <linux/reboot.h>
#include <linux/init_task.h>
#include <linux/mqueue.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
#include <asm/traps.h>
asmlinkage void ret_from_fork(void);
+ asmlinkage void ret_from_kernel_thread(void);
/*
{
/* endless idle loop with no priority at all */
while (1) {
+ rcu_idle_enter();
while (!need_resched())
idle();
+ rcu_idle_exit();
schedule_preempt_disabled();
}
}
printk("USP: %08lx\n", rdusp());
}
- /*
- * Create a kernel thread
- */
- int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
- {
- int pid;
- mm_segment_t fs;
-
- fs = get_fs();
- set_fs (KERNEL_DS);
-
- {
- register long retval __asm__ ("d0");
- register long clone_arg __asm__ ("d1") = flags | CLONE_VM | CLONE_UNTRACED;
-
- retval = __NR_clone;
- __asm__ __volatile__
- ("clrl %%d2\n\t"
- "trap #0\n\t" /* Linux/m68k system call */
- "tstl %0\n\t" /* child or parent */
- "jne 1f\n\t" /* parent - jump */
- #ifdef CONFIG_MMU
- "lea %%sp@(%c7),%6\n\t" /* reload current */
- "movel %6@,%6\n\t"
- #endif
- "movel %3,%%sp@-\n\t" /* push argument */
- "jsr %4@\n\t" /* call fn */
- "movel %0,%%d1\n\t" /* pass exit value */
- "movel %2,%%d0\n\t" /* exit */
- "trap #0\n"
- "1:"
- : "+d" (retval)
- : "i" (__NR_clone), "i" (__NR_exit),
- "r" (arg), "a" (fn), "d" (clone_arg), "r" (current),
- "i" (-THREAD_SIZE)
- : "d2");
-
- pid = retval;
- }
-
- set_fs (fs);
- return pid;
- }
- EXPORT_SYMBOL(kernel_thread);
-
void flush_thread(void)
{
current->thread.fs = __USER_DS;
}
int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long unused,
+ unsigned long arg,
struct task_struct * p, struct pt_regs * regs)
{
struct pt_regs * childregs;
- struct switch_stack * childstack, *stack;
- unsigned long *retp;
+ struct switch_stack *childstack;
childregs = (struct pt_regs *) (task_stack_page(p) + THREAD_SIZE) - 1;
-
- *childregs = *regs;
- childregs->d0 = 0;
-
- retp = ((unsigned long *) regs);
- stack = ((struct switch_stack *) retp) - 1;
-
childstack = ((struct switch_stack *) childregs) - 1;
- *childstack = *stack;
- childstack->retpc = (unsigned long)ret_from_fork;
p->thread.usp = usp;
p->thread.ksp = (unsigned long)childstack;
-
- if (clone_flags & CLONE_SETTLS)
- task_thread_info(p)->tp_value = regs->d5;
+ p->thread.esp0 = (unsigned long)childregs;
/*
* Must save the current SFC/DFC value, NOT the value when
*/
p->thread.fs = get_fs().seg;
+ if (unlikely(!regs)) {
+ /* kernel thread */
+ memset(childstack, 0,
+ sizeof(struct switch_stack) + sizeof(struct pt_regs));
+ childregs->sr = PS_S;
+ childstack->a3 = usp; /* function */
+ childstack->d7 = arg;
+ childstack->retpc = (unsigned long)ret_from_kernel_thread;
+ p->thread.usp = 0;
+ return 0;
+ }
+ *childregs = *regs;
+ childregs->d0 = 0;
+
+ *childstack = ((struct switch_stack *) regs)[-1];
+ childstack->retpc = (unsigned long)ret_from_fork;
+
+ if (clone_flags & CLONE_SETTLS)
+ task_thread_info(p)->tp_value = regs->d5;
+
#ifdef CONFIG_FPU
if (!FPU_IS_EMU) {
/* Copy the current fpu state */
EXPORT_SYMBOL(dump_fpu);
#endif /* CONFIG_FPU */
- /*
- * sys_execve() executes a new program.
- */
- asmlinkage int sys_execve(const char __user *name,
- const char __user *const __user *argv,
- const char __user *const __user *envp)
- {
- int error;
- char * filename;
- struct pt_regs *regs = (struct pt_regs *) &name;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = do_execve(filename, argv, envp, regs);
- putname(filename);
- return error;
- }
-
unsigned long get_wchan(struct task_struct *p)
{
unsigned long fp, pc;
-include include/asm-generic/Kbuild.asm
--
-header-y += elf.h
+generic-y += clkdev.h
+ generic-y += exec.h
set_fs(USER_DS);
- /* the tracer may want to single-step inside the handler */
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
#ifdef DEBUG_SIG
printk(KERN_INFO "SIG deliver (%s:%d): sp=%p pc=%08lx\n",
current->comm, current->pid, frame, regs->pc);
case -ERESTARTNOINTR:
do_restart:
/* offset of 4 bytes to re-execute trap (brki) instruction */
-#ifndef CONFIG_MMU
regs->pc -= 4;
-#else
- /* offset of 8 bytes required = 4 for rtbd
- offset, plus 4 for size of
- "brki r14,8"
- instruction. */
- regs->pc -= 8;
-#endif
break;
}
}
if (ret)
return;
- signal_delivered(sig, info, ka, regs, 0);
+ signal_delivered(sig, info, ka, regs,
+ test_thread_flag(TIF_SINGLESTEP));
}
/*
select HAVE_ARCH_KGDB
select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER
select GENERIC_CLOCKEVENTS
+ select MODULES_USE_ELF_RELA
+ select GENERIC_KERNEL_THREAD
config AM33_2
def_bool n
include include/asm-generic/Kbuild.asm
+generic-y += clkdev.h
+ generic-y += exec.h
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/io.h>
{
/* endless idle loop with no priority at all */
for (;;) {
+ rcu_idle_enter();
while (!need_resched()) {
void (*idle)(void);
}
idle();
}
+ rcu_idle_exit();
schedule_preempt_disabled();
}
{
}
- /*
- * create a kernel thread
- */
- int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
- {
- struct pt_regs regs;
-
- memset(®s, 0, sizeof(regs));
-
- regs.a2 = (unsigned long) fn;
- regs.d2 = (unsigned long) arg;
- regs.pc = (unsigned long) kernel_thread_helper;
- local_save_flags(regs.epsw);
- regs.epsw |= EPSW_IE | EPSW_IM_7;
-
- /* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0,
- NULL, NULL);
- }
- EXPORT_SYMBOL(kernel_thread);
-
/*
* free current thread data structures etc..
*/
struct task_struct *p, struct pt_regs *kregs)
{
struct thread_info *ti = task_thread_info(p);
- struct pt_regs *c_uregs, *c_kregs, *uregs;
+ struct pt_regs *c_regs;
unsigned long c_ksp;
- uregs = current->thread.uregs;
-
c_ksp = (unsigned long) task_stack_page(p) + THREAD_SIZE;
/* allocate the userspace exception frame and set it up */
c_ksp -= sizeof(struct pt_regs);
- c_uregs = (struct pt_regs *) c_ksp;
+ c_regs = (struct pt_regs *) c_ksp;
+ c_ksp -= 12; /* allocate function call ABI slack */
- p->thread.uregs = c_uregs;
- *c_uregs = *uregs;
- c_uregs->sp = c_usp;
- c_uregs->epsw &= ~EPSW_FE; /* my FPU */
+ /* set up things up so the scheduler can start the new task */
+ p->thread.uregs = c_regs;
+ ti->frame = c_regs;
+ p->thread.a3 = (unsigned long) c_regs;
+ p->thread.sp = c_ksp;
+ p->thread.wchan = p->thread.pc;
+ p->thread.usp = c_usp;
- c_ksp -= 12; /* allocate function call ABI slack */
+ if (unlikely(!kregs)) {
+ memset(c_regs, 0, sizeof(struct pt_regs));
+ c_regs->a0 = c_usp; /* function */
+ c_regs->d0 = ustk_size; /* argument */
+ local_save_flags(c_regs->epsw);
+ c_regs->epsw |= EPSW_IE | EPSW_IM_7;
+ p->thread.pc = (unsigned long) ret_from_kernel_thread;
+ return 0;
+ }
+ *c_regs = *kregs;
+ c_regs->sp = c_usp;
+ c_regs->epsw &= ~EPSW_FE; /* my FPU */
/* the new TLS pointer is passed in as arg #5 to sys_clone() */
if (clone_flags & CLONE_SETTLS)
- c_uregs->e2 = current_frame()->d3;
-
- /* set up the return kernel frame if called from kernel_thread() */
- c_kregs = c_uregs;
- if (kregs != uregs) {
- c_ksp -= sizeof(struct pt_regs);
- c_kregs = (struct pt_regs *) c_ksp;
- *c_kregs = *kregs;
- c_kregs->sp = c_usp;
- c_kregs->next = c_uregs;
- #ifdef CONFIG_MN10300_CURRENT_IN_E2
- c_kregs->e2 = (unsigned long) p; /* current */
- #endif
-
- c_ksp -= 12; /* allocate function call ABI slack */
- }
+ c_regs->e2 = current_frame()->d3;
- /* set up things up so the scheduler can start the new task */
- ti->frame = c_kregs;
- p->thread.a3 = (unsigned long) c_kregs;
- p->thread.sp = c_ksp;
p->thread.pc = (unsigned long) ret_from_fork;
- p->thread.wchan = (unsigned long) ret_from_fork;
- p->thread.usp = c_usp;
return 0;
}
current_frame(), 0, NULL, NULL);
}
- asmlinkage long sys_execve(const char __user *name,
- const char __user *const __user *argv,
- const char __user *const __user *envp)
- {
- char *filename;
- int error;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = do_execve(filename, argv, envp, current_frame());
- putname(filename);
- return error;
- }
-
unsigned long get_wchan(struct task_struct *p)
{
return p->thread.wchan;
include include/asm-generic/Kbuild.asm
header-y += pdc.h
+generic-y += clkdev.h
generic-y += word-at-a-time.h
+ generic-y += exec.h
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
+ select SYSCTL_EXCEPTION_TRACE
select ARCH_WANT_OPTIONAL_GPIOLIB
select HAVE_IDE
select HAVE_IOREMAP_PROT
select HAVE_DMA_API_DEBUG
select USE_GENERIC_SMP_HELPERS if SMP
select HAVE_OPROFILE
+ select HAVE_DEBUG_KMEMLEAK
select HAVE_SYSCALL_WRAPPERS if PPC64
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select GENERIC_CLOCKEVENTS
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
+ select HAVE_MOD_ARCH_SPECIFIC
+ select MODULES_USE_ELF_RELA
+ select GENERIC_KERNEL_THREAD
config EARLY_PRINTK
bool
config ARCH_SUSPEND_POSSIBLE
def_bool y
depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \
- (PPC_85xx && !SMP) || PPC_86xx || PPC_PSERIES || 44x || 40x
+ (PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \
+ || 44x || 40x
config PPC_DCR_NATIVE
bool
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y
+config ARCH_SUPPORTS_UPROBES
+ def_bool y
+
config PPC_ADV_DEBUG_REGS
bool
depends on 40x || BOOKE
config HOTPLUG_CPU
bool "Support for enabling/disabling CPUs"
- depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC || PPC_POWERNV)
+ depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || \
+ PPC_PMAC || PPC_POWERNV || (PPC_85xx && !PPC_E500MC))
---help---
Say Y here to be able to disable and re-enable individual
CPUs at runtime on SMP machines.
when dealing with POWER5 cpus at a cost of slightly increased
overhead in some places. If unsure say N here.
+config PPC_DENORMALISATION
+ bool "PowerPC denormalisation exception handling"
+ depends on PPC_BOOK3S_64
+ default "n"
+ ---help---
+ Add support for handling denormalisation of single precision
+ values. Useful for bare metal only. If unsure say Y here.
+
config CMDLINE_BOOL
bool "Default bootloader kernel arguments"
void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
void release_thread(struct task_struct *);
- /* Create a new kernel thread. */
- extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
/* Lazy FPU handling on uni-processor */
extern struct task_struct *last_task_used_math;
extern struct task_struct *last_task_used_altivec;
#endif
#ifdef CONFIG_PPC64
-/* 64-bit user address space is 44-bits (16TB user VM) */
-#define TASK_SIZE_USER64 (0x0000100000000000UL)
+/* 64-bit user address space is 46-bits (64TB user VM) */
+#define TASK_SIZE_USER64 (0x0000400000000000UL)
/*
* 32-bit user address space is 4GB - 1 page
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif
unsigned long dabr; /* Data address breakpoint register */
+ unsigned long dabrx; /* ... extension */
+ unsigned long trap_nr; /* last trap # on this thread */
#ifdef CONFIG_ALTIVEC
/* Complete AltiVec register set */
vector128 vr[32] __attribute__((aligned(16)));
#define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
#define TIF_NOERROR 12 /* Force successful syscall return */
#define TIF_NOTIFY_RESUME 13 /* callback before returning to user */
+#define TIF_UPROBE 14 /* breakpointed or single-stepping */
#define TIF_SYSCALL_TRACEPOINT 15 /* syscall tracepoint instrumentation */
+#define TIF_EMULATE_STACK_STORE 16 /* Is an instruction emulation
+ for stack store? */
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_RESTOREALL (1<<TIF_RESTOREALL)
#define _TIF_NOERROR (1<<TIF_NOERROR)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
+#define _TIF_UPROBE (1<<TIF_UPROBE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
+#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
- _TIF_NOTIFY_RESUME)
+ _TIF_NOTIFY_RESUME | _TIF_UPROBE)
#define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
/* Bits in local_flags */
#define is_32bit_task() (1)
#endif
+ #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
+
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
#define __ARCH_WANT_COMPAT_SYS_TIME
#define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
#define __ARCH_WANT_SYS_NEWFSTATAT
+#define __ARCH_WANT_COMPAT_SYS_SENDFILE
#endif
+ #define __ARCH_WANT_SYS_EXECVE
+ #define __ARCH_WANT_KERNEL_EXECVE
/*
* "Conditional" syscalls
bl schedule_tail
li r3,0
b ret_from_syscall
+
+ .globl ret_from_kernel_thread
+ ret_from_kernel_thread:
+ REST_NVGPRS(r1)
+ bl schedule_tail
+ mtlr r14
+ mr r3,r15
+ PPC440EP_ERR42
+ blrl
+ li r3,0
+ b do_exit # no return
+
+ .globl __ret_from_kernel_execve
+ __ret_from_kernel_execve:
+ addi r1,r3,-STACK_FRAME_OVERHEAD
+ b ret_from_syscall
/* Traced system call support */
syscall_dotrace:
bnel- load_dbcr0
#endif
-#ifdef CONFIG_PREEMPT
b restore
/* N.B. the only way to get here is from the beq following ret_from_except. */
resume_kernel:
- /* check current_thread_info->preempt_count */
+ /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
CURRENT_THREAD_INFO(r9, r1)
+ lwz r8,TI_FLAGS(r9)
+ andis. r8,r8,_TIF_EMULATE_STACK_STORE@h
+ beq+ 1f
+
+ addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+
+ lwz r3,GPR1(r1)
+ subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
+ mr r4,r1 /* src: current exception frame */
+ mr r1,r3 /* Reroute the trampoline frame to r1 */
+
+ /* Copy from the original to the trampoline. */
+ li r5,INT_FRAME_SIZE/4 /* size: INT_FRAME_SIZE */
+ li r6,0 /* start offset: 0 */
+ mtctr r5
+2: lwzx r0,r6,r4
+ stwx r0,r6,r3
+ addi r6,r6,4
+ bdnz 2b
+
+ /* Do real store operation to complete stwu */
+ lwz r5,GPR1(r1)
+ stw r8,0(r5)
+
+ /* Clear _TIF_EMULATE_STACK_STORE flag */
+ lis r11,_TIF_EMULATE_STACK_STORE@h
+ addi r5,r9,TI_FLAGS
+0: lwarx r8,0,r5
+ andc r8,r8,r11
+#ifdef CONFIG_IBM405_ERR77
+ dcbt 0,r5
+#endif
+ stwcx. r8,0,r5
+ bne- 0b
+1:
+
+#ifdef CONFIG_PREEMPT
+ /* check current_thread_info->preempt_count */
lwz r0,TI_PREEMPT(r9)
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
bne restore
- lwz r0,TI_FLAGS(r9)
- andi. r0,r0,_TIF_NEED_RESCHED
+ andi. r8,r8,_TIF_NEED_RESCHED
beq+ restore
+ lwz r3,_MSR(r1)
andi. r0,r3,MSR_EE /* interrupts off? */
beq restore /* don't schedule if so */
#ifdef CONFIG_TRACE_IRQFLAGS
*/
bl trace_hardirqs_on
#endif
-#else
-resume_kernel:
#endif /* CONFIG_PREEMPT */
/* interrupts are hard-disabled at this point */
li r3,0
b syscall_exit
+ _GLOBAL(ret_from_kernel_thread)
+ bl .schedule_tail
+ REST_NVGPRS(r1)
+ REST_GPR(2,r1)
+ mtlr r14
+ mr r3,r15
+ blrl
+ li r3,0
+ b .do_exit # no return
+
+ _GLOBAL(__ret_from_kernel_execve)
+ addi r1,r3,-STACK_FRAME_OVERHEAD
+ li r10,1
+ std r10,SOFTE(r1)
+ b syscall_exit
+
.section ".toc","aw"
DSCR_DEFAULT:
.tc dscr_default[TC],dscr_default
b .ret_from_except
resume_kernel:
+ /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
+ CURRENT_THREAD_INFO(r9, r1)
+ ld r8,TI_FLAGS(r9)
+ andis. r8,r8,_TIF_EMULATE_STACK_STORE@h
+ beq+ 1f
+
+ addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+
+ lwz r3,GPR1(r1)
+ subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
+ mr r4,r1 /* src: current exception frame */
+ mr r1,r3 /* Reroute the trampoline frame to r1 */
+
+ /* Copy from the original to the trampoline. */
+ li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */
+ li r6,0 /* start offset: 0 */
+ mtctr r5
+2: ldx r0,r6,r4
+ stdx r0,r6,r3
+ addi r6,r6,8
+ bdnz 2b
+
+ /* Do real store operation to complete stwu */
+ lwz r5,GPR1(r1)
+ std r8,0(r5)
+
+ /* Clear _TIF_EMULATE_STACK_STORE flag */
+ lis r11,_TIF_EMULATE_STACK_STORE@h
+ addi r5,r9,TI_FLAGS
+ ldarx r4,0,r5
+ andc r4,r4,r11
+ stdcx. r4,0,r5
+ bne- 0b
+1:
+
#ifdef CONFIG_PREEMPT
/* Check if we need to preempt */
andi. r0,r4,_TIF_NEED_RESCHED
#include <asm/dcr.h>
#include <asm/ftrace.h>
#include <asm/switch_to.h>
+#include <asm/epapr_hcalls.h>
#ifdef CONFIG_PPC32
extern void transfer_to_handler(void);
#endif /* CONFIG_PCI */
EXPORT_SYMBOL(start_thread);
- EXPORT_SYMBOL(kernel_thread);
EXPORT_SYMBOL(giveup_fpu);
#ifdef CONFIG_ALTIVEC
#ifdef CONFIG_PPC_BOOK3S_64
EXPORT_SYMBOL_GPL(mmu_psize_defs);
#endif
+
+#ifdef CONFIG_EPAPR_PARAVIRT
+EXPORT_SYMBOL(epapr_hypercall_start);
+#endif
{
siginfo_t info;
+ current->thread.trap_nr = signal_code;
if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
11, SIGSEGV) == NOTIFY_STOP)
return;
{
siginfo_t info;
+ current->thread.trap_nr = TRAP_HWBKPT;
if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
11, SIGSEGV) == NOTIFY_STOP)
return;
return;
/* Clear the DABR */
- set_dabr(0);
+ set_dabr(0, 0);
/* Deliver the signal to userspace */
info.si_signo = SIGTRAP;
{
if (thread->dabr) {
thread->dabr = 0;
- set_dabr(0);
+ thread->dabrx = 0;
+ set_dabr(0, 0);
}
}
#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-int set_dabr(unsigned long dabr)
+int set_dabr(unsigned long dabr, unsigned long dabrx)
{
__get_cpu_var(current_dabr) = dabr;
if (ppc_md.set_dabr)
- return ppc_md.set_dabr(dabr);
+ return ppc_md.set_dabr(dabr, dabrx);
/* XXX should we have a CPU_FTR_HAS_DABR ? */
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
#endif
#elif defined(CONFIG_PPC_BOOK3S)
mtspr(SPRN_DABR, dabr);
+ mtspr(SPRN_DABRX, dabrx);
#endif
-
-
return 0;
}
*/
#ifndef CONFIG_HAVE_HW_BREAKPOINT
if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
- set_dabr(new->thread.dabr);
+ set_dabr(new->thread.dabr, new->thread.dabrx);
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif
local_irq_save(flags);
- account_system_vtime(current);
- account_process_vtime(current);
-
/*
* We can't take a PMU exception inside _switch() since there is a
* window where the kernel stack SLB and the kernel stack are out
extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long unused, struct task_struct *p,
+ unsigned long arg, struct task_struct *p,
struct pt_regs *regs)
{
struct pt_regs *childregs, *kregs;
extern void ret_from_fork(void);
+ extern void ret_from_kernel_thread(void);
+ void (*f)(void);
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
- CHECK_FULL_REGS(regs);
/* Copy registers */
sp -= sizeof(struct pt_regs);
childregs = (struct pt_regs *) sp;
- *childregs = *regs;
- if ((childregs->msr & MSR_PR) == 0) {
+ if (!regs) {
/* for kernel thread, set `current' and stackptr in new task */
+ memset(childregs, 0, sizeof(struct pt_regs));
childregs->gpr[1] = sp + sizeof(struct pt_regs);
- #ifdef CONFIG_PPC32
- childregs->gpr[2] = (unsigned long) p;
- #else
+ #ifdef CONFIG_PPC64
+ childregs->gpr[14] = *(unsigned long *)usp;
+ childregs->gpr[2] = ((unsigned long *)usp)[1],
clear_tsk_thread_flag(p, TIF_32BIT);
+ #else
+ childregs->gpr[14] = usp; /* function */
+ childregs->gpr[2] = (unsigned long) p;
#endif
+ childregs->gpr[15] = arg;
p->thread.regs = NULL; /* no user register state */
+ f = ret_from_kernel_thread;
} else {
+ CHECK_FULL_REGS(regs);
+ *childregs = *regs;
childregs->gpr[1] = usp;
p->thread.regs = childregs;
+ childregs->gpr[3] = 0; /* Result from fork() */
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_PPC64
if (!is_32bit_task())
#endif
childregs->gpr[2] = childregs->gpr[6];
}
+
+ f = ret_from_fork;
}
- childregs->gpr[3] = 0; /* Result from fork() */
sp -= STACK_FRAME_OVERHEAD;
/*
p->thread.dscr = current->thread.dscr;
}
#endif
-
/*
* The PPC64 ABI makes use of a TOC to contain function
* pointers. The function (ret_from_except) is actually a pointer
* to the TOC entry. The first entry is a pointer to the actual
* function.
- */
+ */
#ifdef CONFIG_PPC64
- kregs->nip = *((unsigned long *)ret_from_fork);
+ kregs->nip = *((unsigned long *)f);
#else
- kregs->nip = (unsigned long)ret_from_fork;
+ kregs->nip = (unsigned long)f;
#endif
-
return 0;
}
regs, 0, NULL, NULL);
}
- int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
- unsigned long a3, unsigned long a4, unsigned long a5,
- struct pt_regs *regs)
+ void __ret_from_kernel_execve(struct pt_regs *normal)
+ __noreturn;
+
+ void ret_from_kernel_execve(struct pt_regs *normal)
{
- int error;
- char *filename;
-
- filename = getname((const char __user *) a0);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- goto out;
- flush_fp_to_thread(current);
- flush_altivec_to_thread(current);
- flush_spe_to_thread(current);
- error = do_execve(filename,
- (const char __user *const __user *) a1,
- (const char __user *const __user *) a2, regs);
- putname(filename);
- out:
- return error;
+ set_thread_flag(TIF_RESTOREALL);
+ __ret_from_kernel_execve(normal);
}
static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
* proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
* and the register representation of a signed int (msr in 64-bit mode) is performed.
*/
-asmlinkage long compat_sys_sendfile(u32 out_fd, u32 in_fd, compat_off_t __user * offset, u32 count)
+asmlinkage long compat_sys_sendfile_wrapper(u32 out_fd, u32 in_fd,
+ compat_off_t __user *offset, u32 count)
{
- mm_segment_t old_fs = get_fs();
- int ret;
- off_t of;
- off_t __user *up;
-
- if (offset && get_user(of, offset))
- return -EFAULT;
-
- /* The __user pointer cast is valid because of the set_fs() */
- set_fs(KERNEL_DS);
- up = offset ? (off_t __user *) &of : NULL;
- ret = sys_sendfile((int)out_fd, (int)in_fd, up, count);
- set_fs(old_fs);
-
- if (offset && put_user(of, offset))
- return -EFAULT;
-
- return ret;
+ return compat_sys_sendfile((int)out_fd, (int)in_fd, offset, count);
}
-asmlinkage int compat_sys_sendfile64(int out_fd, int in_fd, compat_loff_t __user *offset, s32 count)
+asmlinkage long compat_sys_sendfile64_wrapper(u32 out_fd, u32 in_fd,
+ compat_loff_t __user *offset, u32 count)
{
- mm_segment_t old_fs = get_fs();
- int ret;
- loff_t lof;
- loff_t __user *up;
-
- if (offset && get_user(lof, offset))
- return -EFAULT;
-
- /* The __user pointer cast is valid because of the set_fs() */
- set_fs(KERNEL_DS);
- up = offset ? (loff_t __user *) &lof : NULL;
- ret = sys_sendfile64(out_fd, in_fd, up, count);
- set_fs(old_fs);
-
- if (offset && put_user(lof, offset))
- return -EFAULT;
-
- return ret;
+ return sys_sendfile((int)out_fd, (int)in_fd,
+ (off_t __user *)offset, count);
}
- long compat_sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
- unsigned long a3, unsigned long a4, unsigned long a5,
- struct pt_regs *regs)
- {
- int error;
- char * filename;
-
- filename = getname((char __user *) a0);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- goto out;
- flush_fp_to_thread(current);
- flush_altivec_to_thread(current);
-
- error = compat_do_execve(filename, compat_ptr(a1), compat_ptr(a2), regs);
-
- putname(filename);
-
- out:
- return error;
- }
-
/* Note: it is necessary to treat option as an unsigned int,
* with the corresponding cast to a signed int to insure that the
* proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
config PGSTE
def_bool y if KVM
-config VIRT_CPU_ACCOUNTING
+config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y
-config ARCH_SUPPORTS_DEBUG_PAGEALLOC
+config KEXEC
+ def_bool y
+
+config AUDIT_ARCH
def_bool y
config S390
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_C_RECORDMCOUNT
select HAVE_SYSCALL_TRACEPOINTS
+ select SYSCTL_EXCEPTION_TRACE
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select HAVE_DEBUG_KMEMLEAK
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_XZ
select HAVE_ARCH_MUTEX_CPU_RELAX
select HAVE_ARCH_JUMP_LABEL if !MARCH_G5
+ select HAVE_BPF_JIT if 64BIT && PACK_STACK
select ARCH_SAVE_PAGE_KEYS if HIBERNATION
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_MEMBLOCK
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_CMPXCHG_LOCAL
+ select HAVE_CMPXCHG_DOUBLE
+ select HAVE_VIRT_CPU_ACCOUNTING
+ select VIRT_CPU_ACCOUNTING
select ARCH_DISCARD_MEMBLOCK
select BUILDTIME_EXTABLE_SORT
select ARCH_INLINE_SPIN_TRYLOCK
select ARCH_INLINE_WRITE_UNLOCK_BH
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+ select HAVE_UID16 if 32BIT
select ARCH_WANT_IPC_PARSE_VERSION
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select GENERIC_CLOCKEVENTS
select KTIME_SCALAR if 32BIT
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_MOD_ARCH_SPECIFIC
+ select MODULES_USE_ELF_RELA
+ select GENERIC_KERNEL_THREAD
config SCHED_OMIT_FRAME_POINTER
def_bool y
source "kernel/Kconfig.freezer"
-menu "Base setup"
+menu "Processor type and features"
+
+config HAVE_MARCH_Z900_FEATURES
+ def_bool n
+
+config HAVE_MARCH_Z990_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z900_FEATURES
+
+config HAVE_MARCH_Z9_109_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z990_FEATURES
+
+config HAVE_MARCH_Z10_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z9_109_FEATURES
+
+config HAVE_MARCH_Z196_FEATURES
+ def_bool n
+ select HAVE_MARCH_Z10_FEATURES
+
+choice
+ prompt "Processor type"
+ default MARCH_G5
+
+config MARCH_G5
+ bool "System/390 model G5 and G6"
+ depends on !64BIT
+ help
+ Select this to build a 31 bit kernel that works
+ on all ESA/390 and z/Architecture machines.
-comment "Processor type and features"
+config MARCH_Z900
+ bool "IBM zSeries model z800 and z900"
+ select HAVE_MARCH_Z900_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for model z800/z900 (2064 and
+ 2066 series). This will enable some optimizations that are not
+ available on older ESA/390 (31 Bit) only CPUs.
+
+config MARCH_Z990
+ bool "IBM zSeries model z890 and z990"
+ select HAVE_MARCH_Z990_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for model z890/z990 (2084 and
+ 2086 series). The kernel will be slightly faster but will not work
+ on older machines.
+
+config MARCH_Z9_109
+ bool "IBM System z9"
+ select HAVE_MARCH_Z9_109_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for IBM System z9 (2094 and
+ 2096 series). The kernel will be slightly faster but will not work
+ on older machines.
+
+config MARCH_Z10
+ bool "IBM System z10"
+ select HAVE_MARCH_Z10_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for IBM System z10 (2097 and
+ 2098 series). The kernel will be slightly faster but will not work
+ on older machines.
+
+config MARCH_Z196
+ bool "IBM zEnterprise 114 and 196"
+ select HAVE_MARCH_Z196_FEATURES if 64BIT
+ help
+ Select this to enable optimizations for IBM zEnterprise 114 and 196
+ (2818 and 2817 series). The kernel will be slightly faster but will
+ not work on older machines.
+
+endchoice
config 64BIT
def_bool y
config 32BIT
def_bool y if !64BIT
+config COMPAT
+ def_bool y
+ prompt "Kernel support for 31 bit emulation"
+ depends on 64BIT
+ select COMPAT_BINFMT_ELF if BINFMT_ELF
+ select ARCH_WANT_OLD_COMPAT_IPC
+ help
+ Select this option if you want to enable your system kernel to
+ handle system-calls from ELF binaries for 31 bit ESA. This option
+ (and some other stuff like libraries and such) is needed for
+ executing 31 bit applications. It is safe to say "Y".
+
+config SYSVIPC_COMPAT
+ def_bool y if COMPAT && SYSVIPC
+
+config KEYS_COMPAT
+ def_bool y if COMPAT && KEYS
+
config SMP
def_bool y
prompt "Symmetric multi-processing support"
Book scheduler support improves the CPU scheduler's decision making
when dealing with machines that have several books.
+source kernel/Kconfig.preempt
+
config MATHEMU
def_bool y
prompt "IEEE FPU emulation"
on older ESA/390 machines. Say Y unless you know your machine doesn't
need this.
-config COMPAT
- def_bool y
- prompt "Kernel support for 31 bit emulation"
- depends on 64BIT
- select COMPAT_BINFMT_ELF if BINFMT_ELF
- select ARCH_WANT_OLD_COMPAT_IPC
- help
- Select this option if you want to enable your system kernel to
- handle system-calls from ELF binaries for 31 bit ESA. This option
- (and some other stuff like libraries and such) is needed for
- executing 31 bit applications. It is safe to say "Y".
+source kernel/Kconfig.hz
-config SYSVIPC_COMPAT
- def_bool y if COMPAT && SYSVIPC
+endmenu
-config KEYS_COMPAT
- def_bool y if COMPAT && KEYS
+menu "Memory setup"
-config AUDIT_ARCH
+config ARCH_SPARSEMEM_ENABLE
def_bool y
+ select SPARSEMEM_VMEMMAP_ENABLE
+ select SPARSEMEM_VMEMMAP
+ select SPARSEMEM_STATIC if !64BIT
-config HAVE_MARCH_Z900_FEATURES
- def_bool n
-
-config HAVE_MARCH_Z990_FEATURES
- def_bool n
- select HAVE_MARCH_Z900_FEATURES
-
-config HAVE_MARCH_Z9_109_FEATURES
- def_bool n
- select HAVE_MARCH_Z990_FEATURES
-
-config HAVE_MARCH_Z10_FEATURES
- def_bool n
- select HAVE_MARCH_Z9_109_FEATURES
-
-config HAVE_MARCH_Z196_FEATURES
- def_bool n
- select HAVE_MARCH_Z10_FEATURES
-
-comment "Code generation options"
-
-choice
- prompt "Processor type"
- default MARCH_G5
-
-config MARCH_G5
- bool "System/390 model G5 and G6"
- depends on !64BIT
- help
- Select this to build a 31 bit kernel that works
- on all ESA/390 and z/Architecture machines.
-
-config MARCH_Z900
- bool "IBM zSeries model z800 and z900"
- select HAVE_MARCH_Z900_FEATURES if 64BIT
- help
- Select this to enable optimizations for model z800/z900 (2064 and
- 2066 series). This will enable some optimizations that are not
- available on older ESA/390 (31 Bit) only CPUs.
+config ARCH_SPARSEMEM_DEFAULT
+ def_bool y
-config MARCH_Z990
- bool "IBM zSeries model z890 and z990"
- select HAVE_MARCH_Z990_FEATURES if 64BIT
- help
- Select this to enable optimizations for model z890/z990 (2084 and
- 2086 series). The kernel will be slightly faster but will not work
- on older machines.
+config ARCH_SELECT_MEMORY_MODEL
+ def_bool y
-config MARCH_Z9_109
- bool "IBM System z9"
- select HAVE_MARCH_Z9_109_FEATURES if 64BIT
- help
- Select this to enable optimizations for IBM System z9 (2094 and
- 2096 series). The kernel will be slightly faster but will not work
- on older machines.
+config ARCH_ENABLE_MEMORY_HOTPLUG
+ def_bool y if SPARSEMEM
-config MARCH_Z10
- bool "IBM System z10"
- select HAVE_MARCH_Z10_FEATURES if 64BIT
- help
- Select this to enable optimizations for IBM System z10 (2097 and
- 2098 series). The kernel will be slightly faster but will not work
- on older machines.
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+ def_bool y
-config MARCH_Z196
- bool "IBM zEnterprise 114 and 196"
- select HAVE_MARCH_Z196_FEATURES if 64BIT
- help
- Select this to enable optimizations for IBM zEnterprise 114 and 196
- (2818 and 2817 series). The kernel will be slightly faster but will
- not work on older machines.
+config FORCE_MAX_ZONEORDER
+ int
+ default "9"
-endchoice
+source "mm/Kconfig"
config PACK_STACK
def_bool y
Say N if you are unsure.
-comment "Kernel preemption"
-
-source "kernel/Kconfig.preempt"
-
-config ARCH_SPARSEMEM_ENABLE
- def_bool y
- select SPARSEMEM_VMEMMAP_ENABLE
- select SPARSEMEM_VMEMMAP
- select SPARSEMEM_STATIC if !64BIT
-
-config ARCH_SPARSEMEM_DEFAULT
- def_bool y
-
-config ARCH_SELECT_MEMORY_MODEL
- def_bool y
-
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y if SPARSEMEM
-
-config ARCH_ENABLE_MEMORY_HOTREMOVE
- def_bool y
-
-config ARCH_HIBERNATION_POSSIBLE
- def_bool y if 64BIT
-
-source "mm/Kconfig"
+endmenu
-comment "I/O subsystem configuration"
+menu "I/O subsystem"
config QDIO
def_tristate y
If unsure, say N.
-comment "Misc"
+config SCM_BUS
+ def_bool y
+ depends on 64BIT
+ prompt "SCM bus driver"
+ help
+ Bus driver for Storage Class Memory.
+
+config EADM_SCH
+ def_tristate m
+ prompt "Support for EADM subchannels"
+ depends on SCM_BUS
+ help
+ This driver allows usage of EADM subchannels. EADM subchannels act
+ as a communication vehicle for SCM increments.
+
+ To compile this driver as a module, choose M here: the
+ module will be called eadm_sch.
+
+endmenu
+
+menu "Dump support"
+
+config CRASH_DUMP
+ bool "kernel crash dumps"
+ depends on 64BIT && SMP
+ select KEXEC
+ help
+ Generate crash dump after being started by kexec.
+ Crash dump kernels are loaded in the main kernel with kexec-tools
+ into a specially reserved region and then later executed after
+ a crash by kdump/kexec.
+ For more details see Documentation/kdump/kdump.txt
+
+config ZFCPDUMP
+ def_bool n
+ prompt "zfcpdump support"
+ select SMP
+ help
+ Select this option if you want to build an zfcpdump enabled kernel.
+ Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+
+endmenu
+
+menu "Executable file formats / Emulations"
source "fs/Kconfig.binfmt"
-config FORCE_MAX_ZONEORDER
- int
- default "9"
+config SECCOMP
+ def_bool y
+ prompt "Enable seccomp to safely compute untrusted bytecode"
+ depends on PROC_FS
+ help
+ This kernel feature is useful for number crunching applications
+ that may need to compute untrusted bytecode during their
+ execution. By using pipes or other transports made available to
+ the process as file descriptors supporting the read/write
+ syscalls, it's possible to isolate those applications in
+ their own address space using seccomp. Once seccomp is
+ enabled via /proc/<pid>/seccomp, it cannot be disabled
+ and the task is only allowed to execute a few safe syscalls
+ defined by each seccomp mode.
+
+ If unsure, say Y.
+
+endmenu
+
+menu "Power Management"
+
+config ARCH_HIBERNATION_POSSIBLE
+ def_bool y if 64BIT
+
+source "kernel/power/Kconfig"
+
+endmenu
+
+source "net/Kconfig"
+
+config PCMCIA
+ def_bool n
+
+config CCW
+ def_bool y
+
+source "drivers/Kconfig"
+
+source "fs/Kconfig"
+
+source "arch/s390/Kconfig.debug"
+
+source "security/Kconfig"
+
+source "crypto/Kconfig"
+
+source "lib/Kconfig"
+
+menu "Virtualization"
config PFAULT
def_bool y
this option.
config SHARED_KERNEL
- def_bool y
- prompt "VM shared kernel support"
+ bool "VM shared kernel support"
+ depends on !JUMP_LABEL
help
Select this option, if you want to share the text segment of the
Linux kernel between different VM guests. This reduces memory
This can also be compiled as a module, which will be called
appldata_net_sum.o.
-source kernel/Kconfig.hz
-
config S390_HYPFS_FS
def_bool y
prompt "s390 hypervisor file system support"
This is a virtual file system intended to provide accounting
information in an s390 hypervisor environment.
-config KEXEC
- def_bool n
- prompt "kexec system call"
- help
- kexec is a system call that implements the ability to shutdown your
- current kernel, and to start another kernel. It is like a reboot
- but is independent of hardware/microcode support.
-
-config CRASH_DUMP
- bool "kernel crash dumps"
- depends on 64BIT && SMP
- select KEXEC
- help
- Generate crash dump after being started by kexec.
- Crash dump kernels are loaded in the main kernel with kexec-tools
- into a specially reserved region and then later executed after
- a crash by kdump/kexec.
- For more details see Documentation/kdump/kdump.txt
-
-config ZFCPDUMP
- def_bool n
- prompt "zfcpdump support"
- select SMP
- help
- Select this option if you want to build an zfcpdump enabled kernel.
- Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
+source "arch/s390/kvm/Kconfig"
config S390_GUEST
def_bool y
- prompt "s390 guest support for KVM (EXPERIMENTAL)"
+ prompt "s390 support for virtio devices (EXPERIMENTAL)"
depends on 64BIT && EXPERIMENTAL
select VIRTUALIZATION
select VIRTIO
- select VIRTIO_RING
select VIRTIO_CONSOLE
help
- Select this option if you want to run the kernel as a guest under
- the KVM hypervisor. This will add detection for KVM as well as a
- virtio transport. If KVM is detected, the virtio console will be
- the default console.
-
-config SECCOMP
- def_bool y
- prompt "Enable seccomp to safely compute untrusted bytecode"
- depends on PROC_FS
- help
- This kernel feature is useful for number crunching applications
- that may need to compute untrusted bytecode during their
- execution. By using pipes or other transports made available to
- the process as file descriptors supporting the read/write
- syscalls, it's possible to isolate those applications in
- their own address space using seccomp. Once seccomp is
- enabled via /proc/<pid>/seccomp, it cannot be disabled
- and the task is only allowed to execute a few safe syscalls
- defined by each seccomp mode.
-
- If unsure, say Y.
-
-endmenu
+ Enabling this option adds support for virtio based paravirtual device
+ drivers on s390.
-menu "Power Management"
-
-source "kernel/power/Kconfig"
+ Select this option if you want to run the kernel as a guest under
+ the KVM hypervisor.
endmenu
-
-source "net/Kconfig"
-
-config PCMCIA
- def_bool n
-
-config CCW
- def_bool y
-
-source "drivers/Kconfig"
-
-source "fs/Kconfig"
-
-source "arch/s390/Kconfig.debug"
-
-source "security/Kconfig"
-
-source "crypto/Kconfig"
-
-source "lib/Kconfig"
-
-source "arch/s390/kvm/Kconfig"
#ifndef __ASM_S390_PROCESSOR_H
#define __ASM_S390_PROCESSOR_H
+#ifndef __ASSEMBLY__
+
#include <linux/linkage.h>
#include <linux/irqflags.h>
#include <asm/cpu.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/setup.h>
+#include <asm/runtime_instr.h>
/*
* Default implementation of macro that returns current
extern void s390_adjust_jiffies(void);
extern const struct seq_operations cpuinfo_op;
extern int sysctl_ieee_emulation_warnings;
+ extern void execve_tail(void);
/*
* User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
unsigned long gmap_addr; /* address of last gmap fault. */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
+ unsigned long per_flags; /* Flags to control debug behavior */
/* pfault_wait is used to block the process on a pfault event */
unsigned long pfault_wait;
struct list_head list;
+ /* cpu runtime instrumentation */
+ struct runtime_instr_cb *ri_cb;
+ int ri_signum;
+#ifdef CONFIG_64BIT
+ unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
+#endif
};
+#define PER_FLAG_NO_TE 1UL /* Flag to disable transactions. */
+
typedef struct thread_struct thread_struct;
/*
regs->psw.mask = psw_user_bits | PSW_MASK_EA | PSW_MASK_BA; \
regs->psw.addr = new_psw | PSW_ADDR_AMODE; \
regs->gprs[15] = new_stackp; \
+ execve_tail(); \
} while (0)
#define start_thread31(regs, new_psw, new_stackp) do { \
__tlb_flush_mm(current->mm); \
crst_table_downgrade(current->mm, 1UL << 31); \
update_mm(current->mm, current); \
+ execve_tail(); \
} while (0)
/* Forward declaration, a strange C thing */
struct mm_struct;
struct seq_file;
+#ifdef CONFIG_64BIT
+extern void show_cacheinfo(struct seq_file *m);
+#else
+static inline void show_cacheinfo(struct seq_file *m) { }
+#endif
+
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
- extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
/*
* Return saved PC of a blocked thread.
extern unsigned long thread_saved_pc(struct task_struct *t);
extern void show_code(struct pt_regs *regs);
+extern void print_fn_code(unsigned char *code, unsigned long len);
+extern int insn_to_mnemonic(unsigned char *instruction, char buf[8]);
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) \
#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
-/*
- * Helper macro for exception table entries
- */
-#ifndef CONFIG_64BIT
-#define EX_TABLE(_fault,_target) \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long " #_fault "," #_target "\n" \
- ".previous\n"
-#else
-#define EX_TABLE(_fault,_target) \
- ".section __ex_table,\"a\"\n" \
- " .align 8\n" \
- " .quad " #_fault "," #_target "\n" \
- ".previous\n"
-#endif
-
extern int memcpy_real(void *, void *, size_t);
extern void memcpy_absolute(void *, void *, size_t);
memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
}
-#endif /* __ASM_S390_PROCESSOR_H */
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target) \
+ ".section __ex_table,\"a\"\n" \
+ ".align 4\n" \
+ ".long (" #_fault ") - .\n" \
+ ".long (" #_target ") - .\n" \
+ ".previous\n"
+
+#else /* __ASSEMBLY__ */
+
+#define EX_TABLE(_fault, _target) \
+ .section __ex_table,"a" ; \
+ .align 4 ; \
+ .long (_fault) - . ; \
+ .long (_target) - . ; \
+ .previous
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_S390_PROCESSOR_H */
*
* Derived from "include/asm-i386/unistd.h"
*/
-
#ifndef _ASM_S390_UNISTD_H_
#define _ASM_S390_UNISTD_H_
-/*
- * This file contains the system call numbers.
- */
-
-#define __NR_exit 1
-#define __NR_fork 2
-#define __NR_read 3
-#define __NR_write 4
-#define __NR_open 5
-#define __NR_close 6
-#define __NR_restart_syscall 7
-#define __NR_creat 8
-#define __NR_link 9
-#define __NR_unlink 10
-#define __NR_execve 11
-#define __NR_chdir 12
-#define __NR_mknod 14
-#define __NR_chmod 15
-#define __NR_lseek 19
-#define __NR_getpid 20
-#define __NR_mount 21
-#define __NR_umount 22
-#define __NR_ptrace 26
-#define __NR_alarm 27
-#define __NR_pause 29
-#define __NR_utime 30
-#define __NR_access 33
-#define __NR_nice 34
-#define __NR_sync 36
-#define __NR_kill 37
-#define __NR_rename 38
-#define __NR_mkdir 39
-#define __NR_rmdir 40
-#define __NR_dup 41
-#define __NR_pipe 42
-#define __NR_times 43
-#define __NR_brk 45
-#define __NR_signal 48
-#define __NR_acct 51
-#define __NR_umount2 52
-#define __NR_ioctl 54
-#define __NR_fcntl 55
-#define __NR_setpgid 57
-#define __NR_umask 60
-#define __NR_chroot 61
-#define __NR_ustat 62
-#define __NR_dup2 63
-#define __NR_getppid 64
-#define __NR_getpgrp 65
-#define __NR_setsid 66
-#define __NR_sigaction 67
-#define __NR_sigsuspend 72
-#define __NR_sigpending 73
-#define __NR_sethostname 74
-#define __NR_setrlimit 75
-#define __NR_getrusage 77
-#define __NR_gettimeofday 78
-#define __NR_settimeofday 79
-#define __NR_symlink 83
-#define __NR_readlink 85
-#define __NR_uselib 86
-#define __NR_swapon 87
-#define __NR_reboot 88
-#define __NR_readdir 89
-#define __NR_mmap 90
-#define __NR_munmap 91
-#define __NR_truncate 92
-#define __NR_ftruncate 93
-#define __NR_fchmod 94
-#define __NR_getpriority 96
-#define __NR_setpriority 97
-#define __NR_statfs 99
-#define __NR_fstatfs 100
-#define __NR_socketcall 102
-#define __NR_syslog 103
-#define __NR_setitimer 104
-#define __NR_getitimer 105
-#define __NR_stat 106
-#define __NR_lstat 107
-#define __NR_fstat 108
-#define __NR_lookup_dcookie 110
-#define __NR_vhangup 111
-#define __NR_idle 112
-#define __NR_wait4 114
-#define __NR_swapoff 115
-#define __NR_sysinfo 116
-#define __NR_ipc 117
-#define __NR_fsync 118
-#define __NR_sigreturn 119
-#define __NR_clone 120
-#define __NR_setdomainname 121
-#define __NR_uname 122
-#define __NR_adjtimex 124
-#define __NR_mprotect 125
-#define __NR_sigprocmask 126
-#define __NR_create_module 127
-#define __NR_init_module 128
-#define __NR_delete_module 129
-#define __NR_get_kernel_syms 130
-#define __NR_quotactl 131
-#define __NR_getpgid 132
-#define __NR_fchdir 133
-#define __NR_bdflush 134
-#define __NR_sysfs 135
-#define __NR_personality 136
-#define __NR_afs_syscall 137 /* Syscall for Andrew File System */
-#define __NR_getdents 141
-#define __NR_flock 143
-#define __NR_msync 144
-#define __NR_readv 145
-#define __NR_writev 146
-#define __NR_getsid 147
-#define __NR_fdatasync 148
-#define __NR__sysctl 149
-#define __NR_mlock 150
-#define __NR_munlock 151
-#define __NR_mlockall 152
-#define __NR_munlockall 153
-#define __NR_sched_setparam 154
-#define __NR_sched_getparam 155
-#define __NR_sched_setscheduler 156
-#define __NR_sched_getscheduler 157
-#define __NR_sched_yield 158
-#define __NR_sched_get_priority_max 159
-#define __NR_sched_get_priority_min 160
-#define __NR_sched_rr_get_interval 161
-#define __NR_nanosleep 162
-#define __NR_mremap 163
-#define __NR_query_module 167
-#define __NR_poll 168
-#define __NR_nfsservctl 169
-#define __NR_prctl 172
-#define __NR_rt_sigreturn 173
-#define __NR_rt_sigaction 174
-#define __NR_rt_sigprocmask 175
-#define __NR_rt_sigpending 176
-#define __NR_rt_sigtimedwait 177
-#define __NR_rt_sigqueueinfo 178
-#define __NR_rt_sigsuspend 179
-#define __NR_pread64 180
-#define __NR_pwrite64 181
-#define __NR_getcwd 183
-#define __NR_capget 184
-#define __NR_capset 185
-#define __NR_sigaltstack 186
-#define __NR_sendfile 187
-#define __NR_getpmsg 188
-#define __NR_putpmsg 189
-#define __NR_vfork 190
-#define __NR_pivot_root 217
-#define __NR_mincore 218
-#define __NR_madvise 219
-#define __NR_getdents64 220
-#define __NR_readahead 222
-#define __NR_setxattr 224
-#define __NR_lsetxattr 225
-#define __NR_fsetxattr 226
-#define __NR_getxattr 227
-#define __NR_lgetxattr 228
-#define __NR_fgetxattr 229
-#define __NR_listxattr 230
-#define __NR_llistxattr 231
-#define __NR_flistxattr 232
-#define __NR_removexattr 233
-#define __NR_lremovexattr 234
-#define __NR_fremovexattr 235
-#define __NR_gettid 236
-#define __NR_tkill 237
-#define __NR_futex 238
-#define __NR_sched_setaffinity 239
-#define __NR_sched_getaffinity 240
-#define __NR_tgkill 241
-/* Number 242 is reserved for tux */
-#define __NR_io_setup 243
-#define __NR_io_destroy 244
-#define __NR_io_getevents 245
-#define __NR_io_submit 246
-#define __NR_io_cancel 247
-#define __NR_exit_group 248
-#define __NR_epoll_create 249
-#define __NR_epoll_ctl 250
-#define __NR_epoll_wait 251
-#define __NR_set_tid_address 252
-#define __NR_fadvise64 253
-#define __NR_timer_create 254
-#define __NR_timer_settime (__NR_timer_create+1)
-#define __NR_timer_gettime (__NR_timer_create+2)
-#define __NR_timer_getoverrun (__NR_timer_create+3)
-#define __NR_timer_delete (__NR_timer_create+4)
-#define __NR_clock_settime (__NR_timer_create+5)
-#define __NR_clock_gettime (__NR_timer_create+6)
-#define __NR_clock_getres (__NR_timer_create+7)
-#define __NR_clock_nanosleep (__NR_timer_create+8)
-/* Number 263 is reserved for vserver */
-#define __NR_statfs64 265
-#define __NR_fstatfs64 266
-#define __NR_remap_file_pages 267
-/* Number 268 is reserved for new sys_mbind */
-/* Number 269 is reserved for new sys_get_mempolicy */
-/* Number 270 is reserved for new sys_set_mempolicy */
-#define __NR_mq_open 271
-#define __NR_mq_unlink 272
-#define __NR_mq_timedsend 273
-#define __NR_mq_timedreceive 274
-#define __NR_mq_notify 275
-#define __NR_mq_getsetattr 276
-#define __NR_kexec_load 277
-#define __NR_add_key 278
-#define __NR_request_key 279
-#define __NR_keyctl 280
-#define __NR_waitid 281
-#define __NR_ioprio_set 282
-#define __NR_ioprio_get 283
-#define __NR_inotify_init 284
-#define __NR_inotify_add_watch 285
-#define __NR_inotify_rm_watch 286
-/* Number 287 is reserved for new sys_migrate_pages */
-#define __NR_openat 288
-#define __NR_mkdirat 289
-#define __NR_mknodat 290
-#define __NR_fchownat 291
-#define __NR_futimesat 292
-#define __NR_unlinkat 294
-#define __NR_renameat 295
-#define __NR_linkat 296
-#define __NR_symlinkat 297
-#define __NR_readlinkat 298
-#define __NR_fchmodat 299
-#define __NR_faccessat 300
-#define __NR_pselect6 301
-#define __NR_ppoll 302
-#define __NR_unshare 303
-#define __NR_set_robust_list 304
-#define __NR_get_robust_list 305
-#define __NR_splice 306
-#define __NR_sync_file_range 307
-#define __NR_tee 308
-#define __NR_vmsplice 309
-/* Number 310 is reserved for new sys_move_pages */
-#define __NR_getcpu 311
-#define __NR_epoll_pwait 312
-#define __NR_utimes 313
-#define __NR_fallocate 314
-#define __NR_utimensat 315
-#define __NR_signalfd 316
-#define __NR_timerfd 317
-#define __NR_eventfd 318
-#define __NR_timerfd_create 319
-#define __NR_timerfd_settime 320
-#define __NR_timerfd_gettime 321
-#define __NR_signalfd4 322
-#define __NR_eventfd2 323
-#define __NR_inotify_init1 324
-#define __NR_pipe2 325
-#define __NR_dup3 326
-#define __NR_epoll_create1 327
-#define __NR_preadv 328
-#define __NR_pwritev 329
-#define __NR_rt_tgsigqueueinfo 330
-#define __NR_perf_event_open 331
-#define __NR_fanotify_init 332
-#define __NR_fanotify_mark 333
-#define __NR_prlimit64 334
-#define __NR_name_to_handle_at 335
-#define __NR_open_by_handle_at 336
-#define __NR_clock_adjtime 337
-#define __NR_syncfs 338
-#define __NR_setns 339
-#define __NR_process_vm_readv 340
-#define __NR_process_vm_writev 341
-#define NR_syscalls 342
-
-/*
- * There are some system calls that are not present on 64 bit, some
- * have a different name although they do the same (e.g. __NR_chown32
- * is __NR_chown on 64 bit).
- */
-#ifndef __s390x__
-
-#define __NR_time 13
-#define __NR_lchown 16
-#define __NR_setuid 23
-#define __NR_getuid 24
-#define __NR_stime 25
-#define __NR_setgid 46
-#define __NR_getgid 47
-#define __NR_geteuid 49
-#define __NR_getegid 50
-#define __NR_setreuid 70
-#define __NR_setregid 71
-#define __NR_getrlimit 76
-#define __NR_getgroups 80
-#define __NR_setgroups 81
-#define __NR_fchown 95
-#define __NR_ioperm 101
-#define __NR_setfsuid 138
-#define __NR_setfsgid 139
-#define __NR__llseek 140
-#define __NR__newselect 142
-#define __NR_setresuid 164
-#define __NR_getresuid 165
-#define __NR_setresgid 170
-#define __NR_getresgid 171
-#define __NR_chown 182
-#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */
-#define __NR_mmap2 192
-#define __NR_truncate64 193
-#define __NR_ftruncate64 194
-#define __NR_stat64 195
-#define __NR_lstat64 196
-#define __NR_fstat64 197
-#define __NR_lchown32 198
-#define __NR_getuid32 199
-#define __NR_getgid32 200
-#define __NR_geteuid32 201
-#define __NR_getegid32 202
-#define __NR_setreuid32 203
-#define __NR_setregid32 204
-#define __NR_getgroups32 205
-#define __NR_setgroups32 206
-#define __NR_fchown32 207
-#define __NR_setresuid32 208
-#define __NR_getresuid32 209
-#define __NR_setresgid32 210
-#define __NR_getresgid32 211
-#define __NR_chown32 212
-#define __NR_setuid32 213
-#define __NR_setgid32 214
-#define __NR_setfsuid32 215
-#define __NR_setfsgid32 216
-#define __NR_fcntl64 221
-#define __NR_sendfile64 223
-#define __NR_fadvise64_64 264
-#define __NR_fstatat64 293
-
-#else
-
-#define __NR_select 142
-#define __NR_getrlimit 191 /* SuS compliant getrlimit */
-#define __NR_lchown 198
-#define __NR_getuid 199
-#define __NR_getgid 200
-#define __NR_geteuid 201
-#define __NR_getegid 202
-#define __NR_setreuid 203
-#define __NR_setregid 204
-#define __NR_getgroups 205
-#define __NR_setgroups 206
-#define __NR_fchown 207
-#define __NR_setresuid 208
-#define __NR_getresuid 209
-#define __NR_setresgid 210
-#define __NR_getresgid 211
-#define __NR_chown 212
-#define __NR_setuid 213
-#define __NR_setgid 214
-#define __NR_setfsuid 215
-#define __NR_setfsgid 216
-#define __NR_newfstatat 293
-
-#endif
+#include <uapi/asm/unistd.h>
-#ifdef __KERNEL__
#ifndef CONFIG_64BIT
#define __IGNORE_select
# define __ARCH_WANT_COMPAT_SYS_TIME
# define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
# endif
+ #define __ARCH_WANT_SYS_EXECVE
+ #define __ARCH_WANT_KERNEL_EXECVE
/*
* "Conditional" syscalls
*/
#define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
-#endif /* __KERNEL__ */
#endif /* _ASM_S390_UNISTD_H_ */
low2highuid(suid));
}
-asmlinkage long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user *suid)
+asmlinkage long sys32_getresuid16(u16 __user *ruidp, u16 __user *euidp, u16 __user *suidp)
{
+ const struct cred *cred = current_cred();
int retval;
+ u16 ruid, euid, suid;
- if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) &&
- !(retval = put_user(high2lowuid(current->cred->euid), euid)))
- retval = put_user(high2lowuid(current->cred->suid), suid);
+ ruid = high2lowuid(from_kuid_munged(cred->user_ns, cred->uid));
+ euid = high2lowuid(from_kuid_munged(cred->user_ns, cred->euid));
+ suid = high2lowuid(from_kuid_munged(cred->user_ns, cred->suid));
+
+ if (!(retval = put_user(ruid, ruidp)) &&
+ !(retval = put_user(euid, euidp)))
+ retval = put_user(suid, suidp);
return retval;
}
low2highgid(sgid));
}
-asmlinkage long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user *sgid)
+asmlinkage long sys32_getresgid16(u16 __user *rgidp, u16 __user *egidp, u16 __user *sgidp)
{
+ const struct cred *cred = current_cred();
int retval;
+ u16 rgid, egid, sgid;
+
+ rgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->gid));
+ egid = high2lowgid(from_kgid_munged(cred->user_ns, cred->egid));
+ sgid = high2lowgid(from_kgid_munged(cred->user_ns, cred->sgid));
- if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) &&
- !(retval = put_user(high2lowgid(current->cred->egid), egid)))
- retval = put_user(high2lowgid(current->cred->sgid), sgid);
+ if (!(retval = put_user(rgid, rgidp)) &&
+ !(retval = put_user(egid, egidp)))
+ retval = put_user(sgid, sgidp);
return retval;
}
asmlinkage long sys32_getuid16(void)
{
- return high2lowuid(current->cred->uid);
+ return high2lowuid(from_kuid_munged(current_user_ns(), current_uid()));
}
asmlinkage long sys32_geteuid16(void)
{
- return high2lowuid(current->cred->euid);
+ return high2lowuid(from_kuid_munged(current_user_ns(), current_euid()));
}
asmlinkage long sys32_getgid16(void)
{
- return high2lowgid(current->cred->gid);
+ return high2lowgid(from_kgid_munged(current_user_ns(), current_gid()));
}
asmlinkage long sys32_getegid16(void)
{
- return high2lowgid(current->cred->egid);
+ return high2lowgid(from_kgid_munged(current_user_ns(), current_egid()));
}
/*
return ret;
}
- /*
- * sys32_execve() executes a new program after the asm stub has set
- * things up for us. This should basically do what I want it to.
- */
- asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
- compat_uptr_t __user *envp)
- {
- struct pt_regs *regs = task_pt_regs(current);
- char *filename;
- long rc;
-
- filename = getname(name);
- rc = PTR_ERR(filename);
- if (IS_ERR(filename))
- return rc;
- rc = compat_do_execve(filename, argv, envp, regs);
- if (rc)
- goto out;
- current->thread.fp_regs.fpc=0;
- asm volatile("sfpc %0,0" : : "d" (0));
- rc = regs->gprs[2];
- out:
- putname(filename);
- return rc;
- }
-
asmlinkage long sys32_pread64(unsigned int fd, char __user *ubuf,
size_t count, u32 poshi, u32 poslo)
{
__u32 sa_flags;
__u32 sa_restorer; /* Another 32 bit pointer */
};
-
-typedef struct compat_siginfo {
- int si_signo;
- int si_errno;
- int si_code;
-
- union {
- int _pad[((128/sizeof(int)) - 3)];
-
- /* kill() */
- struct {
- pid_t _pid; /* sender's pid */
- uid_t _uid; /* sender's uid */
- } _kill;
-
- /* POSIX.1b timers */
- struct {
- compat_timer_t _tid; /* timer id */
- int _overrun; /* overrun count */
- compat_sigval_t _sigval; /* same as below */
- int _sys_private; /* not to be passed to user */
- } _timer;
-
- /* POSIX.1b signals */
- struct {
- pid_t _pid; /* sender's pid */
- uid_t _uid; /* sender's uid */
- compat_sigval_t _sigval;
- } _rt;
-
- /* SIGCHLD */
- struct {
- pid_t _pid; /* which child */
- uid_t _uid; /* sender's uid */
- int _status;/* exit code */
- compat_clock_t _utime;
- compat_clock_t _stime;
- } _sigchld;
-
- /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
- struct {
- __u32 _addr; /* faulting insn/memory ref. - pointer */
- } _sigfault;
-
- /* SIGPOLL */
- struct {
- int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
- int _fd;
- } _sigpoll;
- } _sifields;
-} compat_siginfo_t;
-
-/*
- * How these fields are to be accessed.
- */
-#define si_pid _sifields._kill._pid
-#define si_uid _sifields._kill._uid
-#define si_status _sifields._sigchld._status
-#define si_utime _sifields._sigchld._utime
-#define si_stime _sifields._sigchld._stime
-#define si_value _sifields._rt._sigval
-#define si_int _sifields._rt._sigval.sival_int
-#define si_ptr _sifields._rt._sigval.sival_ptr
-#define si_addr _sifields._sigfault._addr
-#define si_band _sifields._sigpoll._band
-#define si_fd _sifields._sigpoll._fd
-#define si_tid _sifields._timer._tid
-#define si_overrun _sifields._timer._overrun
/* asm/sigcontext.h */
typedef union
compat_sigset_t __user *oset, size_t sigsetsize);
long sys32_rt_sigpending(compat_sigset_t __user *set, size_t sigsetsize);
long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo);
- long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
- compat_uptr_t __user *envp);
long sys32_init_module(void __user *umod, unsigned long len,
const char __user *uargs);
long sys32_delete_module(const char __user *name_user, unsigned int flags);
llgtr %r2,%r2 # char *
llgtr %r3,%r3 # compat_uptr_t *
llgtr %r4,%r4 # compat_uptr_t *
- jg sys32_execve # branch to system call
+ jg compat_sys_execve # branch to system call
ENTRY(sys_fanotify_init_wrapper)
llgfr %r2,%r2 # unsigned int
llgf %r0,164(%r15) # unsigned long
stg %r0,160(%r15)
jg compat_sys_process_vm_writev
+
+ENTRY(sys_s390_runtime_instr_wrapper)
+ lgfr %r2,%r2 # int
+ lgfr %r3,%r3 # int
+ jg sys_s390_runtime_instr
+
+ENTRY(sys_kcmp_wrapper)
+ lgfr %r2,%r2 # pid_t
+ lgfr %r3,%r3 # pid_t
+ lgfr %r4,%r4 # int
+ llgfr %r5,%r5 # unsigned long
+ llgfr %r6,%r6 # unsigned long
+ jg sys_kcmp
#include <linux/init.h>
#include <linux/linkage.h>
+#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/ptrace.h>
jno sysc_return
lmg %r2,%r7,__PT_R2(%r11) # load svc arguments
lghi %r8,0 # svc 0 returns -ENOSYS
- lh %r1,__PT_INT_CODE+2(%r11) # load new svc number
+ llgh %r1,__PT_INT_CODE+2(%r11) # load new svc number
cghi %r1,NR_syscalls
jnl sysc_nr_ok # invalid svc number -> do svc 0
slag %r8,%r1,2
la %r11,STACK_FRAME_OVERHEAD(%r15)
lg %r12,__LC_THREAD_INFO
tm __PT_PSW+1(%r11),0x01 # forking a kernel thread ?
- jo 0f
- stg %r15,__PT_R15(%r11) # store stack pointer for new kthread
- 0: brasl %r14,schedule_tail
+ je 1f
+ brasl %r14,schedule_tail
TRACE_IRQS_ON
ssm __LC_SVC_NEW_PSW # reenable interrupts
j sysc_tracenogo
-
- #
- # kernel_execve function needs to deal with pt_regs that is not
- # at the usual place
- #
- ENTRY(kernel_execve)
- stmg %r12,%r15,96(%r15)
- lgr %r14,%r15
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- stg %r14,__SF_BACKCHAIN(%r15)
- la %r12,STACK_FRAME_OVERHEAD(%r15)
- xc 0(__PT_SIZE,%r12),0(%r12)
- lgr %r5,%r12
- brasl %r14,do_execve
- ltgfr %r2,%r2
- je 0f
- aghi %r15,(STACK_FRAME_OVERHEAD + __PT_SIZE)
- lmg %r12,%r15,96(%r15)
- br %r14
- # execve succeeded.
- 0: ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
- lg %r15,__LC_KERNEL_STACK # load ksp
- aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
- la %r11,STACK_FRAME_OVERHEAD(%r15)
- mvc 0(__PT_SIZE,%r11),0(%r12) # copy pt_regs
- lg %r12,__LC_THREAD_INFO
+ 1: # it's a kernel thread
+ stg %r15,__PT_R15(%r11) # store stack pointer for new kthread
+ brasl %r14,schedule_tail
+ TRACE_IRQS_ON
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lmg %r9,%r11,__PT_R9(%r11) # load gprs
+ ENTRY(kernel_thread_starter)
+ la %r2,0(%r10)
+ basr %r14,%r9
+ la %r2,0
+ br %r11 # do_exit
+
+ ENTRY(ret_from_kernel_execve)
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ lgr %r15,%r2
+ lgr %r11,%r2
+ aghi %r15,-STACK_FRAME_OVERHEAD
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
+ lg %r12,__LC_THREAD_INFO
ssm __LC_SVC_NEW_PSW # reenable interrupts
- brasl %r14,execve_tail
j sysc_return
/*
1: UPDATE_VTIME %r14,__LC_SYNC_ENTER_TIMER
LAST_BREAK %r14
lg %r15,__LC_KERNEL_STACK
+ lg %r14,__TI_task(%r12)
+ lghi %r13,__LC_PGM_TDB
+ tm __LC_PGM_ILC+2,0x02 # check for transaction abort
+ jz 2f
+ mvc __THREAD_trap_tdb(256,%r14),0(%r13)
2: aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
stg %r10,__PT_ARGS(%r11)
tm __LC_PGM_ILC+3,0x80 # check for per exception
jz 0f
- lg %r1,__TI_task(%r12)
tmhh %r8,0x0001 # kernel per event ?
jz pgm_kprobe
oi __TI_flags+7(%r12),_TIF_PER_TRAP
- mvc __THREAD_per_address(8,%r1),__LC_PER_ADDRESS
- mvc __THREAD_per_cause(2,%r1),__LC_PER_CAUSE
- mvc __THREAD_per_paid(1,%r1),__LC_PER_PAID
+ mvc __THREAD_per_address(8,%r14),__LC_PER_ADDRESS
+ mvc __THREAD_per_cause(2,%r14),__LC_PER_CAUSE
+ mvc __THREAD_per_paid(1,%r14),__LC_PER_PAID
0: REENABLE_IRQS
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
larl %r1,pgm_check_table
.Lhost_id:
.quad 0
- .section __ex_table,"a"
- .quad sie_loop,sie_fault
- .previous
+ EX_TABLE(sie_loop,sie_fault)
#endif
.section .rodata, "a"
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/vtimer.h>
+#include <asm/exec.h>
#include <asm/irq.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/switch_to.h>
+#include <asm/runtime_instr.h>
#include "entry.h"
asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
extern void __kprobes kernel_thread_starter(void);
- asm(
- ".section .kprobes.text, \"ax\"\n"
- ".global kernel_thread_starter\n"
- "kernel_thread_starter:\n"
- " la 2,0(10)\n"
- " basr 14,9\n"
- " la 2,0\n"
- " br 11\n"
- ".previous\n");
-
- int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
- {
- struct pt_regs regs;
-
- memset(®s, 0, sizeof(regs));
- regs.psw.mask = psw_kernel_bits |
- PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
- regs.psw.addr = (unsigned long) kernel_thread_starter | PSW_ADDR_AMODE;
- regs.gprs[9] = (unsigned long) fn;
- regs.gprs[10] = (unsigned long) arg;
- regs.gprs[11] = (unsigned long) do_exit;
- regs.orig_gpr2 = -1;
-
- /* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
- 0, ®s, 0, NULL, NULL);
- }
- EXPORT_SYMBOL(kernel_thread);
-
/*
* Free current thread data structures etc..
*/
void exit_thread(void)
{
+ exit_thread_runtime_instr();
}
void flush_thread(void)
}
int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
- unsigned long unused,
+ unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
struct thread_info *ti;
frame = container_of(task_pt_regs(p), struct fake_frame, childregs);
p->thread.ksp = (unsigned long) frame;
- /* Store access registers to kernel stack of new process. */
- frame->childregs = *regs;
- frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */
- frame->childregs.gprs[15] = new_stackp;
- frame->sf.back_chain = 0;
+ /* Save access registers to new thread structure. */
+ save_access_regs(&p->thread.acrs[0]);
+ /* start new process with ar4 pointing to the correct address space */
+ p->thread.mm_segment = get_fs();
+ /* Don't copy debug registers */
+ memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
+ memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
+ clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
+ clear_tsk_thread_flag(p, TIF_PER_TRAP);
+ /* Initialize per thread user and system timer values */
+ ti = task_thread_info(p);
+ ti->user_timer = 0;
+ ti->system_timer = 0;
+ frame->sf.back_chain = 0;
/* new return point is ret_from_fork */
frame->sf.gprs[8] = (unsigned long) ret_from_fork;
-
/* fake return stack for resume(), don't go back to schedule */
frame->sf.gprs[9] = (unsigned long) frame;
- /* Save access registers to new thread structure. */
- save_access_regs(&p->thread.acrs[0]);
+ /* Store access registers to kernel stack of new process. */
+ if (unlikely(!regs)) {
+ /* kernel thread */
+ memset(&frame->childregs, 0, sizeof(struct pt_regs));
+ frame->childregs.psw.mask = psw_kernel_bits | PSW_MASK_DAT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ frame->childregs.psw.addr = PSW_ADDR_AMODE |
+ (unsigned long) kernel_thread_starter;
+ frame->childregs.gprs[9] = new_stackp; /* function */
+ frame->childregs.gprs[10] = arg;
+ frame->childregs.gprs[11] = (unsigned long) do_exit;
+ frame->childregs.orig_gpr2 = -1;
+
+ return 0;
+ }
+ frame->childregs = *regs;
+ frame->childregs.gprs[2] = 0; /* child returns 0 on fork. */
+ frame->childregs.gprs[15] = new_stackp;
+ /* Don't copy runtime instrumentation info */
+ p->thread.ri_cb = NULL;
+ p->thread.ri_signum = 0;
+ frame->childregs.psw.mask &= ~PSW_MASK_RI;
+
#ifndef CONFIG_64BIT
/*
* save fprs to current->thread.fp_regs to merge them with
}
}
#endif /* CONFIG_64BIT */
- /* start new process with ar4 pointing to the correct address space */
- p->thread.mm_segment = get_fs();
- /* Don't copy debug registers */
- memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
- memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
- clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
- clear_tsk_thread_flag(p, TIF_PER_TRAP);
- /* Initialize per thread user and system timer values */
- ti = task_thread_info(p);
- ti->user_timer = 0;
- ti->system_timer = 0;
return 0;
}
asm volatile("sfpc %0,%0" : : "d" (0));
}
- /*
- * sys_execve() executes a new program.
- */
- SYSCALL_DEFINE3(execve, const char __user *, name,
- const char __user *const __user *, argv,
- const char __user *const __user *, envp)
- {
- struct pt_regs *regs = task_pt_regs(current);
- char *filename;
- long rc;
-
- filename = getname(name);
- rc = PTR_ERR(filename);
- if (IS_ERR(filename))
- return rc;
- rc = do_execve(filename, argv, envp, regs);
- if (rc)
- goto out;
- execve_tail();
- rc = regs->gprs[2];
- out:
- putname(filename);
- return rc;
- }
-
/*
* fill in the FPU structure for a core dump.
*/
# User exported sparc header files
-include include/asm-generic/Kbuild.asm
-header-y += apc.h
-header-y += asi.h
-header-y += display7seg.h
-header-y += envctrl.h
-header-y += fbio.h
-header-y += jsflash.h
-header-y += openpromio.h
-header-y += perfctr.h
-header-y += psrcompat.h
-header-y += psr.h
-header-y += pstate.h
-header-y += traps.h
-header-y += uctx.h
-header-y += utrap.h
-header-y += watchdog.h
+generic-y += clkdev.h
generic-y += div64.h
+ generic-y += exec.h
generic-y += local64.h
generic-y += irq_regs.h
generic-y += local.h
+generic-y += module.h
generic-y += word-at-a-time.h
generic-y += bug.h
generic-y += bugs.h
+generic-y += clkdev.h
generic-y += cputime.h
generic-y += div64.h
generic-y += emergency-restart.h
generic-y += errno.h
+ generic-y += exec.h
generic-y += fb.h
generic-y += fcntl.h
generic-y += ioctl.h
sigset_t uc_sigmask; /* mask last for extensibility */
};
-#define COMPAT_SI_PAD_SIZE ((SI_MAX_SIZE - 3 * sizeof(int)) / sizeof(int))
-
-struct compat_siginfo {
- int si_signo;
- int si_errno;
- int si_code;
-
- union {
- int _pad[COMPAT_SI_PAD_SIZE];
-
- /* kill() */
- struct {
- unsigned int _pid; /* sender's pid */
- unsigned int _uid; /* sender's uid */
- } _kill;
-
- /* POSIX.1b timers */
- struct {
- compat_timer_t _tid; /* timer id */
- int _overrun; /* overrun count */
- compat_sigval_t _sigval; /* same as below */
- int _sys_private; /* not to be passed to user */
- int _overrun_incr; /* amount to add to overrun */
- } _timer;
-
- /* POSIX.1b signals */
- struct {
- unsigned int _pid; /* sender's pid */
- unsigned int _uid; /* sender's uid */
- compat_sigval_t _sigval;
- } _rt;
-
- /* SIGCHLD */
- struct {
- unsigned int _pid; /* which child */
- unsigned int _uid; /* sender's uid */
- int _status; /* exit code */
- compat_clock_t _utime;
- compat_clock_t _stime;
- } _sigchld;
-
- /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
- struct {
- unsigned int _addr; /* faulting insn/memory ref. */
-#ifdef __ARCH_SI_TRAPNO
- int _trapno; /* TRAP # which caused the signal */
-#endif
- } _sigfault;
-
- /* SIGPOLL */
- struct {
- int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
- int _fd;
- } _sigpoll;
- } _sifields;
-};
-
struct compat_rt_sigframe {
unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */
struct compat_siginfo info;
regs->regs[1] = ptr_to_compat_reg(&frame->info);
regs->regs[2] = ptr_to_compat_reg(&frame->uc);
regs->flags |= PT_FLAGS_CALLER_SAVES;
-
- /*
- * Notify any tracer that was single-stepping it.
- * The tracer may want to single-step inside the
- * handler too.
- */
- if (test_thread_flag(TIF_SINGLESTEP))
- ptrace_notify(SIGTRAP);
-
return 0;
give_sigsegv:
generic-y += auxvec.h
generic-y += bitsperlong.h
generic-y += bugs.h
+generic-y += clkdev.h
generic-y += cputime.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
generic-y += emergency-restart.h
generic-y += errno.h
+ generic-y += exec.h
generic-y += fb.h
generic-y += fcntl.h
generic-y += ftrace.h
Say no to build a 32-bit kernel - formerly known as i386
config X86_32
- def_bool !64BIT
+ def_bool y
+ depends on !64BIT
select CLKSRC_I8253
+ select HAVE_UID16
config X86_64
- def_bool 64BIT
+ def_bool y
+ depends on 64BIT
select X86_DEV_DMA_OPS
### Arch settings
select HAVE_KRETPROBES
select HAVE_OPTPROBES
select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FENTRY if X86_64
select HAVE_C_RECORDMCOUNT
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_FP_TEST
select HAVE_FUNCTION_TRACE_MCOUNT_TEST
select HAVE_SYSCALL_TRACEPOINTS
+ select SYSCTL_EXCEPTION_TRACE
select HAVE_KVM
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS
select HAVE_PERF_EVENTS_NMI
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_DEBUG_KMEMLEAK
select ANON_INODES
select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
select HAVE_CMPXCHG_LOCAL if !M386
select IRQ_FORCED_THREADING
select USE_GENERIC_SMP_HELPERS if SMP
select HAVE_BPF_JIT if X86_64
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select CLKEVT_I8253
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_IOMAP
select KTIME_SCALAR if X86_32
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
+ select MODULES_USE_ELF_REL if X86_32
+ select MODULES_USE_ELF_RELA if X86_64
+ select HAVE_RCU_USER_QS if X86_64
+ select HAVE_IRQ_TIME_ACCOUNTING
+ select GENERIC_KERNEL_THREAD
config INSTRUCTION_DECODER
- def_bool (KPROBES || PERF_EVENTS || UPROBES)
+ def_bool y
+ depends on KPROBES || PERF_EVENTS || UPROBES
config OUTPUT_FORMAT
string
bool
config NEED_DMA_MAP_STATE
- def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG)
+ def_bool y
+ depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG
config NEED_SG_DMA_LENGTH
def_bool y
config GENERIC_ISA_DMA
- def_bool ISA_DMA_API
+ def_bool y
+ depends on ISA_DMA_API
config GENERIC_BUG
def_bool y
bool
config ARCH_MAY_HAVE_PC_FDC
- def_bool ISA_DMA_API
+ def_bool y
+ depends on ISA_DMA_API
config RWSEM_GENERIC_SPINLOCK
- def_bool !X86_XADD
+ def_bool y
+ depends on !X86_XADD
config RWSEM_XCHGADD_ALGORITHM
- def_bool X86_XADD
+ def_bool y
+ depends on X86_XADD
config GENERIC_CALIBRATE_DELAY
def_bool y
If in doubt, say "Y".
+config KVMTOOL_TEST_ENABLE
+ bool "Enable options to create a bootable tools/kvm/ kernel"
+ select NET
+ select NETDEVICES
+ select PCI
+ select BLOCK
+ select BLK_DEV
+ select NETWORK_FILESYSTEMS
+ select INET
+ select EXPERIMENTAL
+ select SERIAL_8250
+ select SERIAL_8250_CONSOLE
+ select IP_PNP
+ select IP_PNP_DHCP
+ select BINFMT_ELF
+ select PCI_MSI
+ select HAVE_ARCH_KGDB
+ select DEBUG_KERNEL
+ select KGDB
+ select KGDB_SERIAL_CONSOLE
+ select VIRTUALIZATION
+ select VIRTIO
+ select VIRTIO_RING
+ select VIRTIO_PCI
+ select VIRTIO_BLK
+ select VIRTIO_CONSOLE
+ select VIRTIO_NET
+ select 9P_FS
+ select NET_9P
+ select NET_9P_VIRTIO
+
menuconfig PARAVIRT_GUEST
bool "Paravirtualized guest support"
---help---
source "arch/x86/xen/Kconfig"
-config KVM_CLOCK
- bool "KVM paravirtualized clock"
- select PARAVIRT
- select PARAVIRT_CLOCK
- ---help---
- Turning on this option will allow you to run a paravirtualized clock
- when running over the KVM hypervisor. Instead of relying on a PIT
- (or probably other) emulation by the underlying device model, the host
- provides the guest with timing infrastructure such as time of day, and
- system time
-
config KVM_GUEST
- bool "KVM Guest support"
+ bool "KVM Guest support (including kvmclock)"
select PARAVIRT
+ select PARAVIRT
+ select PARAVIRT_CLOCK
+ default y if PARAVIRT_GUEST
---help---
This option enables various optimizations for running under the KVM
- hypervisor.
+ hypervisor. It includes a paravirtualized clock, so that instead
+ of relying on a PIT (or probably other) emulation by the
+ underlying device model, the host provides the guest with
+ timing infrastructure such as time of day, and system time
source "arch/x86/lguest/Kconfig"
def_bool y if X86_64
---help---
Support for software bounce buffers used on x86-64 systems
- which don't have a hardware IOMMU (e.g. the current generation
- of Intel's x86-64 CPUs). Using this PCI devices which can only
- access 32-bits of memory can be used on systems with more than
- 3 GB of memory. If unsure, say Y.
+ which don't have a hardware IOMMU. Using this PCI devices
+ which can only access 32-bits of memory can be used on systems
+ with more than 3 GB of memory.
+ If unsure, say Y.
config IOMMU_HELPER
- def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
+ def_bool y
+ depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU
config MAXSMP
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
-config IRQ_TIME_ACCOUNTING
- bool "Fine granularity task level IRQ time accounting"
- default n
- ---help---
- Select this option to enable fine granularity task irq time
- accounting. This is done by reading a timestamp on each
- transitions between softirq and hardirq state, so there can be a
- small performance impact.
-
- If in doubt, say N here.
-
source "kernel/Kconfig.preempt"
config X86_UP_APIC
config X86_MCE
bool "Machine Check / overheating reporting"
+ default y
---help---
Machine Check support allows the processor to notify the
kernel if it detects a problem (e.g. overheating, data corruption).
Say N otherwise.
config MICROCODE
- tristate "/dev/cpu/microcode - microcode support"
+ tristate "CPU microcode loading support"
select FW_LOADER
---help---
+
If you say Y here, you will be able to update the microcode on
certain Intel and AMD processors. The Intel support is for the
- IA32 family, e.g. Pentium Pro, Pentium II, Pentium III,
- Pentium 4, Xeon etc. The AMD support is for family 0x10 and
- 0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra.
- You will obviously need the actual microcode binary data itself
- which is not shipped with the Linux kernel.
+ IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
+ Xeon etc. The AMD support is for families 0x10 and later. You will
+ obviously need the actual microcode binary data itself which is not
+ shipped with the Linux kernel.
This option selects the general module only, you need to select
at least one vendor specific module as well.
- To compile this driver as a module, choose M here: the
- module will be called microcode.
+ To compile this driver as a module, choose M here: the module
+ will be called microcode.
config MICROCODE_INTEL
- bool "Intel microcode patch loading support"
+ bool "Intel microcode loading support"
depends on MICROCODE
default MICROCODE
select FW_LOADER
<http://www.urbanmyth.org/microcode/>.
config MICROCODE_AMD
- bool "AMD microcode patch loading support"
+ bool "AMD microcode loading support"
depends on MICROCODE
select FW_LOADER
---help---
consumes more pagetable space per process.
config ARCH_PHYS_ADDR_T_64BIT
- def_bool X86_64 || X86_PAE
+ def_bool y
+ depends on X86_64 || X86_PAE
config ARCH_DMA_ADDR_T_64BIT
- def_bool X86_64 || HIGHMEM64G
+ def_bool y
+ depends on X86_64 || HIGHMEM64G
config DIRECT_GBPAGES
bool "Enable 1GB pages for kernel pagetables" if EXPERT
depends on ARCH_SPARSEMEM_ENABLE
config ARCH_MEMORY_PROBE
- def_bool X86_64
- depends on MEMORY_HOTPLUG
+ def_bool y
+ depends on X86_64 && MEMORY_HOTPLUG
config ARCH_PROC_KCORE_TEXT
def_bool y
If supported, this is a high bandwidth, cryptographically
secure hardware random number generator.
+config X86_SMAP
+ def_bool y
+ prompt "Supervisor Mode Access Prevention" if EXPERT
+ ---help---
+ Supervisor Mode Access Prevention (SMAP) is a security
+ feature in newer Intel processors. There is a small
+ performance cost if this enabled and turned on; there is
+ also a small increase in the kernel size if this is enabled.
+
+ If unsure, say Y.
+
config EFI
bool "EFI runtime service support"
depends on ACPI
config PCI_CNB20LE_QUIRK
bool "Read CNB20LE Host Bridge Windows" if EXPERT
- default n
depends on PCI && EXPERIMENTAL
help
Read the PCI windows out of the CNB20LE host bridge. This allows
bool "IA32 Emulation"
depends on X86_64
select COMPAT_BINFMT_ELF
+ select HAVE_UID16
---help---
Include code to run legacy 32-bit programs under a
64-bit kernel. You should likely turn this on, unless you're
depends on IA32_EMULATION || X86_X32
select ARCH_WANT_OLD_COMPAT_IPC
+if COMPAT
config COMPAT_FOR_U64_ALIGNMENT
- def_bool COMPAT
- depends on X86_64
+ def_bool y
config SYSVIPC_COMPAT
def_bool y
- depends on COMPAT && SYSVIPC
+ depends on SYSVIPC
config KEYS_COMPAT
- bool
- depends on COMPAT && KEYS
- default y
+ def_bool y
+ depends on KEYS
+endif
endmenu
#include <asm/segment.h>
#include <asm/irqflags.h>
#include <asm/asm.h>
+#include <asm/smap.h>
#include <linux/linkage.h>
#include <linux/err.h>
SAVE_ARGS 0,1,0
/* no need to do an access_ok check here because rbp has been
32bit zero extended */
+ ASM_STAC
1: movl (%rbp),%ebp
_ASM_EXTABLE(1b,ia32_badarg)
+ ASM_CLAC
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
CFI_REMEMBER_STATE
/* no need to do an access_ok check here because r8 has been
32bit zero extended */
/* hardware stack frame is complete now */
+ ASM_STAC
1: movl (%r8),%r9d
_ASM_EXTABLE(1b,ia32_badarg)
+ ASM_CLAC
orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
CFI_REMEMBER_STATE
END(ia32_cstar_target)
ia32_badarg:
+ ASM_CLAC
movq $-EFAULT,%rax
jmp ia32_sysret
CFI_ENDPROC
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
- PTREGSCALL stub32_execve, sys32_execve, %rcx
+ PTREGSCALL stub32_execve, compat_sys_execve, %rcx
PTREGSCALL stub32_fork, sys_fork, %rdi
PTREGSCALL stub32_clone, sys32_clone, %rdx
PTREGSCALL stub32_vfork, sys_vfork, %rdi
return ret;
}
-asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
+asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr,
int options)
{
return compat_sys_wait4(pid, stat_addr, options, NULL);
return ret;
}
- asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
- compat_uptr_t __user *envp, struct pt_regs *regs)
- {
- long error;
- char *filename;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = compat_do_execve(filename, argv, envp, regs);
- putname(filename);
- return error;
- }
-
asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
struct pt_regs *regs)
{
DECLARE_PER_CPU(char *, irq_stack_ptr);
DECLARE_PER_CPU(unsigned int, irq_count);
-extern unsigned long kernel_eflags;
extern asmlinkage void ignore_sysret(void);
#else /* X86_64 */
#ifdef CONFIG_CC_STACKPROTECTOR
} mm_segment_t;
- /*
- * create a kernel thread without removing it from tasklists
- */
- extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
}
+extern void set_task_blockstep(struct task_struct *task, bool on);
+
/*
* from system description table in BIOS. Mostly for MCA use, but
* others may find it useful:
struct old_sigaction32 __user *);
asmlinkage long sys32_alarm(unsigned int);
-asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
+asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int);
asmlinkage long sys32_sysfs(int, u32, u32);
asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
asmlinkage long sys32_personality(unsigned long);
asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
- asmlinkage long sys32_execve(const char __user *, compat_uptr_t __user *,
- compat_uptr_t __user *, struct pt_regs *);
asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
long sys32_lseek(unsigned int, int, unsigned int);
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
- #define TIF_IRET 5 /* force IRET */
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_FORK 18 /* ret_from_fork */
+#define TIF_NOHZ 19 /* in adaptive nohz mode */
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
#define TIF_DEBUG 21 /* uses debug registers */
#define TIF_IO_BITMAP 22 /* uses I/O bitmap */
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
- #define _TIF_IRET (1 << TIF_IRET)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_FORK (1 << TIF_FORK)
+#define _TIF_NOHZ (1 << TIF_NOHZ)
#define _TIF_DEBUG (1 << TIF_DEBUG)
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
/* work to do in syscall_trace_enter() */
#define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
- _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
+ _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
+ _TIF_NOHZ)
/* work to do in syscall_trace_leave() */
#define _TIF_WORK_SYSCALL_EXIT \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
- _TIF_SYSCALL_TRACEPOINT)
+ _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK \
- ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT)
+ ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
+ _TIF_NOHZ)
/* Only used for 64 bit */
#define _TIF_DO_NOTIFY_MASK \
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
- obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
+ obj-$(CONFIG_X86_32) += i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-y += syscall_$(BITS).o
obj-$(CONFIG_X86_64) += vsyscall_64.o
obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
-obj-$(CONFIG_KVM_GUEST) += kvm.o
-obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
+obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_OF) += devicetree.o
obj-$(CONFIG_UPROBES) += uprobes.o
+obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
+#include <asm/smap.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
CFI_ENDPROC
END(ret_from_fork)
+ ENTRY(ret_from_kernel_execve)
+ movl %eax, %esp
+ movl $0,PT_EAX(%esp)
+ GET_THREAD_INFO(%ebp)
+ jmp syscall_exit
+ END(ret_from_kernel_execve)
+
/*
* Interrupt exit functions should be protected against kprobes
*/
andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
#else
/*
- * We can be coming here from a syscall done in the kernel space,
- * e.g. a failed kernel_execve().
+ * We can be coming here from child spawned by kernel_thread().
*/
movl PT_CS(%esp), %eax
andl $SEGMENT_RPL_MASK, %eax
*/
cmpl $__PAGE_OFFSET-3,%ebp
jae syscall_fault
+ ASM_STAC
1: movl (%ebp),%ebp
+ ASM_CLAC
movl %ebp,PT_EBP(%esp)
_ASM_EXTABLE(1b,syscall_fault)
# system call handler stub
ENTRY(system_call)
RING0_INT_FRAME # can't unwind into user space anyway
+ ASM_CLAC
pushl_cfi %eax # save orig_eax
SAVE_ALL
GET_THREAD_INFO(%ebp)
movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or
# vm86-space
+ 1:
+ #else
+ movl %esp, %eax
+ #endif
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
movb PT_CS(%esp), %bl
call do_notify_resume
jmp resume_userspace
+ #ifdef CONFIG_VM86
ALIGN
work_notifysig_v86:
pushl_cfi %ecx # save ti_flags for do_notify_resume
call save_v86_state # %eax contains pt_regs pointer
popl_cfi %ecx
movl %eax, %esp
- #else
- movl %esp, %eax
+ jmp 1b
#endif
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- movb PT_CS(%esp), %bl
- andb $SEGMENT_RPL_MASK, %bl
- cmpb $USER_RPL, %bl
- jb resume_kernel
- xorl %edx, %edx
- call do_notify_resume
- jmp resume_userspace
END(work_pending)
# perform syscall exit tracing
RING0_INT_FRAME # can't unwind into user space anyway
syscall_fault:
+ ASM_CLAC
GET_THREAD_INFO(%ebp)
movl $-EFAULT,PT_EAX(%esp)
jmp resume_userspace
PTREGSCALL1(iopl)
PTREGSCALL0(fork)
PTREGSCALL0(vfork)
- PTREGSCALL3(execve)
PTREGSCALL2(sigaltstack)
PTREGSCALL0(sigreturn)
PTREGSCALL0(rt_sigreturn)
*/
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
+ ASM_CLAC
addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
SAVE_ALL
TRACE_IRQS_OFF
#define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \
RING0_INT_FRAME; \
+ ASM_CLAC; \
pushl_cfi $~(nr); \
SAVE_ALL; \
TRACE_IRQS_OFF \
ENTRY(coprocessor_error)
RING0_INT_FRAME
+ ASM_CLAC
pushl_cfi $0
pushl_cfi $do_coprocessor_error
jmp error_code
ENTRY(simd_coprocessor_error)
RING0_INT_FRAME
+ ASM_CLAC
pushl_cfi $0
#ifdef CONFIG_X86_INVD_BUG
/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
ENTRY(device_not_available)
RING0_INT_FRAME
+ ASM_CLAC
pushl_cfi $-1 # mark this as an int
pushl_cfi $do_device_not_available
jmp error_code
ENTRY(overflow)
RING0_INT_FRAME
+ ASM_CLAC
pushl_cfi $0
pushl_cfi $do_overflow
jmp error_code
ENTRY(bounds)
RING0_INT_FRAME
+ ASM_CLAC
pushl_cfi $0
pushl_cfi $do_bounds
jmp error_code
ENTRY(invalid_op)
RING0_INT_FRAME
+ ASM_CLAC
pushl_cfi $0
pushl_cfi $do_invalid_op
jmp error_code
ENTRY(coprocessor_segment_overrun)
RING0_INT_FRAME
+ ASM_CLAC
pushl_cfi $0
pushl_cfi $do_coprocessor_segment_overrun
jmp error_code
ENTRY(invalid_TSS)
RING0_EC_FRAME
+ ASM_CLAC
pushl_cfi $do_invalid_TSS
jmp error_code
CFI_ENDPROC
ENTRY(segment_not_present)
RING0_EC_FRAME
+ ASM_CLAC
pushl_cfi $do_segment_not_present
jmp error_code
CFI_ENDPROC
ENTRY(stack_segment)
RING0_EC_FRAME
+ ASM_CLAC
pushl_cfi $do_stack_segment
jmp error_code
CFI_ENDPROC
ENTRY(alignment_check)
RING0_EC_FRAME
+ ASM_CLAC
pushl_cfi $do_alignment_check
jmp error_code
CFI_ENDPROC
ENTRY(divide_error)
RING0_INT_FRAME
+ ASM_CLAC
pushl_cfi $0 # no error code
pushl_cfi $do_divide_error
jmp error_code
#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
RING0_INT_FRAME
+ ASM_CLAC
pushl_cfi $0
pushl_cfi machine_check_vector
jmp error_code
ENTRY(spurious_interrupt_bug)
RING0_INT_FRAME
+ ASM_CLAC
pushl_cfi $0
pushl_cfi $do_spurious_interrupt_bug
jmp error_code
*/
.popsection
- ENTRY(kernel_thread_helper)
- pushl $0 # fake return address for unwinder
+ ENTRY(ret_from_kernel_thread)
CFI_STARTPROC
- movl %edi,%eax
- call *%esi
+ pushl_cfi %eax
+ call schedule_tail
+ GET_THREAD_INFO(%ebp)
+ popl_cfi %eax
+ pushl_cfi $0x0202 # Reset kernel eflags
+ popfl_cfi
+ movl PT_EBP(%esp),%eax
+ call *PT_EBX(%esp)
call do_exit
ud2 # padding for call trace
CFI_ENDPROC
- ENDPROC(kernel_thread_helper)
+ ENDPROC(ret_from_kernel_thread)
#ifdef CONFIG_XEN
/* Xen doesn't set %esp to be precisely what the normal sysenter
pushl %eax
pushl %ecx
pushl %edx
- movl 0xc(%esp), %eax
+ pushl $0 /* Pass NULL as regs pointer */
+ movl 4*4(%esp), %eax
movl 0x4(%ebp), %edx
+ leal function_trace_op, %ecx
subl $MCOUNT_INSN_SIZE, %eax
.globl ftrace_call
ftrace_call:
call ftrace_stub
+ addl $4,%esp /* skip NULL pointer */
popl %edx
popl %ecx
popl %eax
+ftrace_ret:
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
.globl ftrace_graph_call
ftrace_graph_call:
ret
END(ftrace_caller)
+ENTRY(ftrace_regs_caller)
+ pushf /* push flags before compare (in cs location) */
+ cmpl $0, function_trace_stop
+ jne ftrace_restore_flags
+
+ /*
+ * i386 does not save SS and ESP when coming from kernel.
+ * Instead, to get sp, ®s->sp is used (see ptrace.h).
+ * Unfortunately, that means eflags must be at the same location
+ * as the current return ip is. We move the return ip into the
+ * ip location, and move flags into the return ip location.
+ */
+ pushl 4(%esp) /* save return ip into ip slot */
+
+ pushl $0 /* Load 0 into orig_ax */
+ pushl %gs
+ pushl %fs
+ pushl %es
+ pushl %ds
+ pushl %eax
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %edx
+ pushl %ecx
+ pushl %ebx
+
+ movl 13*4(%esp), %eax /* Get the saved flags */
+ movl %eax, 14*4(%esp) /* Move saved flags into regs->flags location */
+ /* clobbering return ip */
+ movl $__KERNEL_CS,13*4(%esp)
+
+ movl 12*4(%esp), %eax /* Load ip (1st parameter) */
+ subl $MCOUNT_INSN_SIZE, %eax /* Adjust ip */
+ movl 0x4(%ebp), %edx /* Load parent ip (2nd parameter) */
+ leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
+ pushl %esp /* Save pt_regs as 4th parameter */
+
+GLOBAL(ftrace_regs_call)
+ call ftrace_stub
+
+ addl $4, %esp /* Skip pt_regs */
+ movl 14*4(%esp), %eax /* Move flags back into cs */
+ movl %eax, 13*4(%esp) /* Needed to keep addl from modifying flags */
+ movl 12*4(%esp), %eax /* Get return ip from regs->ip */
+ movl %eax, 14*4(%esp) /* Put return ip back for ret */
+
+ popl %ebx
+ popl %ecx
+ popl %edx
+ popl %esi
+ popl %edi
+ popl %ebp
+ popl %eax
+ popl %ds
+ popl %es
+ popl %fs
+ popl %gs
+ addl $8, %esp /* Skip orig_ax and ip */
+ popf /* Pop flags at end (no addl to corrupt flags) */
+ jmp ftrace_ret
+
+ftrace_restore_flags:
+ popf
+ jmp ftrace_stub
#else /* ! CONFIG_DYNAMIC_FTRACE */
ENTRY(mcount)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
- cmpl $0, function_trace_stop
- jne ftrace_stub
-
pushl %eax
pushl %ecx
pushl %edx
ENTRY(page_fault)
RING0_EC_FRAME
+ ASM_CLAC
pushl_cfi $do_page_fault
ALIGN
error_code:
ENTRY(debug)
RING0_INT_FRAME
+ ASM_CLAC
cmpl $ia32_sysenter_target,(%esp)
jne debug_stack_correct
FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
*/
ENTRY(nmi)
RING0_INT_FRAME
+ ASM_CLAC
pushl_cfi %eax
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
ENTRY(int3)
RING0_INT_FRAME
+ ASM_CLAC
pushl_cfi $-1 # mark this as an int
SAVE_ALL
TRACE_IRQS_OFF
#ifdef CONFIG_KVM_GUEST
ENTRY(async_page_fault)
RING0_EC_FRAME
+ ASM_CLAC
pushl_cfi $do_async_page_fault
jmp error_code
CFI_ENDPROC
#include <asm/ftrace.h>
#include <asm/percpu.h>
#include <asm/asm.h>
+#include <asm/rcu.h>
+#include <asm/smap.h>
#include <linux/err.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
.section .entry.text, "ax"
#ifdef CONFIG_FUNCTION_TRACER
+
+#ifdef CC_USING_FENTRY
+# define function_hook __fentry__
+#else
+# define function_hook mcount
+#endif
+
#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(mcount)
+
+ENTRY(function_hook)
retq
-END(mcount)
+END(function_hook)
+
+/* skip is set if stack has been adjusted */
+.macro ftrace_caller_setup skip=0
+ MCOUNT_SAVE_FRAME \skip
+
+ /* Load the ftrace_ops into the 3rd parameter */
+ leaq function_trace_op, %rdx
+
+ /* Load ip into the first parameter */
+ movq RIP(%rsp), %rdi
+ subq $MCOUNT_INSN_SIZE, %rdi
+ /* Load the parent_ip into the second parameter */
+#ifdef CC_USING_FENTRY
+ movq SS+16(%rsp), %rsi
+#else
+ movq 8(%rbp), %rsi
+#endif
+.endm
ENTRY(ftrace_caller)
+ /* Check if tracing was disabled (quick check) */
cmpl $0, function_trace_stop
jne ftrace_stub
- MCOUNT_SAVE_FRAME
-
- movq 0x38(%rsp), %rdi
- movq 8(%rbp), %rsi
- subq $MCOUNT_INSN_SIZE, %rdi
+ ftrace_caller_setup
+ /* regs go into 4th parameter (but make it NULL) */
+ movq $0, %rcx
GLOBAL(ftrace_call)
call ftrace_stub
MCOUNT_RESTORE_FRAME
+ftrace_return:
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
GLOBAL(ftrace_graph_call)
retq
END(ftrace_caller)
+ENTRY(ftrace_regs_caller)
+ /* Save the current flags before compare (in SS location)*/
+ pushfq
+
+ /* Check if tracing was disabled (quick check) */
+ cmpl $0, function_trace_stop
+ jne ftrace_restore_flags
+
+ /* skip=8 to skip flags saved in SS */
+ ftrace_caller_setup 8
+
+ /* Save the rest of pt_regs */
+ movq %r15, R15(%rsp)
+ movq %r14, R14(%rsp)
+ movq %r13, R13(%rsp)
+ movq %r12, R12(%rsp)
+ movq %r11, R11(%rsp)
+ movq %r10, R10(%rsp)
+ movq %rbp, RBP(%rsp)
+ movq %rbx, RBX(%rsp)
+ /* Copy saved flags */
+ movq SS(%rsp), %rcx
+ movq %rcx, EFLAGS(%rsp)
+ /* Kernel segments */
+ movq $__KERNEL_DS, %rcx
+ movq %rcx, SS(%rsp)
+ movq $__KERNEL_CS, %rcx
+ movq %rcx, CS(%rsp)
+ /* Stack - skipping return address */
+ leaq SS+16(%rsp), %rcx
+ movq %rcx, RSP(%rsp)
+
+ /* regs go into 4th parameter */
+ leaq (%rsp), %rcx
+
+GLOBAL(ftrace_regs_call)
+ call ftrace_stub
+
+ /* Copy flags back to SS, to restore them */
+ movq EFLAGS(%rsp), %rax
+ movq %rax, SS(%rsp)
+
+ /* Handlers can change the RIP */
+ movq RIP(%rsp), %rax
+ movq %rax, SS+8(%rsp)
+
+ /* restore the rest of pt_regs */
+ movq R15(%rsp), %r15
+ movq R14(%rsp), %r14
+ movq R13(%rsp), %r13
+ movq R12(%rsp), %r12
+ movq R10(%rsp), %r10
+ movq RBP(%rsp), %rbp
+ movq RBX(%rsp), %rbx
+
+ /* skip=8 to skip flags saved in SS */
+ MCOUNT_RESTORE_FRAME 8
+
+ /* Restore flags */
+ popfq
+
+ jmp ftrace_return
+ftrace_restore_flags:
+ popfq
+ jmp ftrace_stub
+
+END(ftrace_regs_caller)
+
+
#else /* ! CONFIG_DYNAMIC_FTRACE */
-ENTRY(mcount)
+
+ENTRY(function_hook)
cmpl $0, function_trace_stop
jne ftrace_stub
trace:
MCOUNT_SAVE_FRAME
- movq 0x38(%rsp), %rdi
+ movq RIP(%rsp), %rdi
+#ifdef CC_USING_FENTRY
+ movq SS+16(%rsp), %rsi
+#else
movq 8(%rbp), %rsi
+#endif
subq $MCOUNT_INSN_SIZE, %rdi
call *ftrace_trace_function
MCOUNT_RESTORE_FRAME
jmp ftrace_stub
-END(mcount)
+END(function_hook)
#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
- cmpl $0, function_trace_stop
- jne ftrace_stub
-
MCOUNT_SAVE_FRAME
+#ifdef CC_USING_FENTRY
+ leaq SS+16(%rsp), %rdi
+ movq $0, %rdx /* No framepointers needed */
+#else
leaq 8(%rbp), %rdi
- movq 0x38(%rsp), %rsi
movq (%rbp), %rdx
+#endif
+ movq RIP(%rsp), %rsi
subq $MCOUNT_INSN_SIZE, %rsi
call prepare_ftrace_return
.macro SAVE_ARGS_IRQ
cld
/* start from rbp in pt_regs and jump over */
- movq_cfi rdi, RDI-RBP
- movq_cfi rsi, RSI-RBP
- movq_cfi rdx, RDX-RBP
- movq_cfi rcx, RCX-RBP
- movq_cfi rax, RAX-RBP
- movq_cfi r8, R8-RBP
- movq_cfi r9, R9-RBP
- movq_cfi r10, R10-RBP
- movq_cfi r11, R11-RBP
+ movq_cfi rdi, (RDI-RBP)
+ movq_cfi rsi, (RSI-RBP)
+ movq_cfi rdx, (RDX-RBP)
+ movq_cfi rcx, (RCX-RBP)
+ movq_cfi rax, (RAX-RBP)
+ movq_cfi r8, (R8-RBP)
+ movq_cfi r9, (R9-RBP)
+ movq_cfi r10, (R10-RBP)
+ movq_cfi r11, (R11-RBP)
/* Save rbp so that we can unwind from get_irq_regs() */
movq_cfi rbp, 0
.endm
ENTRY(save_rest)
- PARTIAL_FRAME 1 REST_SKIP+8
+ PARTIAL_FRAME 1 (REST_SKIP+8)
movq 5*8+16(%rsp), %r11 /* save return address */
movq_cfi rbx, RBX+16
movq_cfi rbp, RBP+16
LOCK ; btr $TIF_FORK,TI_flags(%r8)
- pushq_cfi kernel_eflags(%rip)
+ pushq_cfi $0x0002
popfq_cfi # reset kernel eflags
call schedule_tail # rdi: 'prev' task parameter
RESTORE_REST
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
- jz retint_restore_args
+ jz 1f
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call
RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
jmp ret_from_sys_call # go to the SYSRET fastpath
+ 1:
+ subq $REST_SKIP, %rsp # move the stack pointer back
+ CFI_ADJUST_CFA_OFFSET REST_SKIP
+ movq %rbp, %rdi
+ call *%rbx
+ # exit
+ mov %eax, %edi
+ call do_exit
+ ud2 # padding for call trace
+
CFI_ENDPROC
END(ret_from_fork)
* System call entry. Up to 6 arguments in registers are supported.
*
* SYSCALL does not save anything on the stack and does not change the
- * stack pointer.
+ * stack pointer. However, it does mask the flags register for us, so
+ * CLD and CLAC are not needed.
*/
/*
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
- call schedule
+ SCHEDULE_USER
popq_cfi %rdi
jmp sysret_check
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
- call schedule
+ SCHEDULE_USER
popq_cfi %rdi
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
PARTIAL_FRAME 0
SAVE_REST
FIXUP_TOP_OF_STACK %r11
- movq %rsp, %rcx
call sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
PARTIAL_FRAME 0
SAVE_REST
FIXUP_TOP_OF_STACK %r11
- movq %rsp, %rcx
- call sys32_execve
+ call compat_sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
RESTORE_REST
*/
.p2align CONFIG_X86_L1_CACHE_SHIFT
common_interrupt:
+ ASM_CLAC
XCPT_FRAME
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
pushq_cfi %rdi
- call schedule
+ SCHEDULE_USER
popq_cfi %rdi
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
*/
.macro apicinterrupt num sym do_sym
ENTRY(\sym)
+ ASM_CLAC
INTR_FRAME
pushq_cfi $~(\num)
.Lcommon_\sym:
*/
.macro zeroentry sym do_sym
ENTRY(\sym)
+ ASM_CLAC
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
.macro paranoidzeroentry sym do_sym
ENTRY(\sym)
+ ASM_CLAC
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
.macro paranoidzeroentry_ist sym do_sym ist
ENTRY(\sym)
+ ASM_CLAC
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
.macro errorentry sym do_sym
ENTRY(\sym)
+ ASM_CLAC
XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
subq $ORIG_RAX-R15, %rsp
/* error code is on the stack already */
.macro paranoiderrorentry sym do_sym
ENTRY(\sym)
+ ASM_CLAC
XCPT_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
subq $ORIG_RAX-R15, %rsp
jmp 2b
.previous
- ENTRY(kernel_thread_helper)
- pushq $0 # fake return address
- CFI_STARTPROC
- /*
- * Here we are in the child and the registers are set as they were
- * at kernel_thread() invocation in the parent.
- */
- call *%rsi
- # exit
- mov %eax, %edi
- call do_exit
- ud2 # padding for call trace
- CFI_ENDPROC
- END(kernel_thread_helper)
-
- /*
- * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
- *
- * C extern interface:
- * extern long execve(const char *name, char **argv, char **envp)
- *
- * asm input arguments:
- * rdi: name, rsi: argv, rdx: envp
- *
- * We want to fallback into:
- * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
- *
- * do_sys_execve asm fallback arguments:
- * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
- */
- ENTRY(kernel_execve)
- CFI_STARTPROC
- FAKE_STACK_FRAME $0
- SAVE_ALL
- movq %rsp,%rcx
- call sys_execve
- movq %rax, RAX(%rsp)
- RESTORE_REST
- testq %rax,%rax
- je int_ret_from_sys_call
- RESTORE_ARGS
- UNFAKE_STACK_FRAME
- ret
- CFI_ENDPROC
- END(kernel_execve)
+ ENTRY(ret_from_kernel_execve)
+ movq %rdi, %rsp
+ movl $0, RAX(%rsp)
+ // RESTORE_REST
+ movq 0*8(%rsp), %r15
+ movq 1*8(%rsp), %r14
+ movq 2*8(%rsp), %r13
+ movq 3*8(%rsp), %r12
+ movq 4*8(%rsp), %rbp
+ movq 5*8(%rsp), %rbx
+ addq $(6*8), %rsp
+ jmp int_ret_from_sys_call
+ END(ret_from_kernel_execve)
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(call_softirq)
paranoid_schedule:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
- call schedule
+ SCHEDULE_USER
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
jmp paranoid_userspace
{
int ret;
- unlazy_fpu(src);
-
*dst = *src;
if (fpu_allocated(&src->thread.fpu)) {
memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu));
ret = fpu_alloc(&dst->thread.fpu);
if (ret)
return ret;
- fpu_copy(&dst->thread.fpu, &src->thread.fpu);
+ fpu_copy(dst, src);
}
return 0;
}
SLAB_PANIC | SLAB_NOTRACK, NULL);
}
-static inline void drop_fpu(struct task_struct *tsk)
-{
- /*
- * Forget coprocessor state..
- */
- tsk->fpu_counter = 0;
- clear_fpu(tsk);
- clear_used_math();
-}
-
/*
* Free current thread data structures etc..
*/
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
- drop_fpu(tsk);
+ drop_init_fpu(tsk);
+ /*
+ * Free the FPU state for non xsave platforms. They get reallocated
+ * lazily at the first use.
+ */
+ if (!use_eager_fpu())
+ free_thread_xstate(tsk);
}
static void hard_disable_TSC(void)
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}
- /*
- * This gets run with %si containing the
- * function to call, and %di containing
- * the "args".
- */
- extern void kernel_thread_helper(void);
-
- /*
- * Create a kernel thread
- */
- int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
- {
- struct pt_regs regs;
-
- memset(®s, 0, sizeof(regs));
-
- regs.si = (unsigned long) fn;
- regs.di = (unsigned long) arg;
-
- #ifdef CONFIG_X86_32
- regs.ds = __USER_DS;
- regs.es = __USER_DS;
- regs.fs = __KERNEL_PERCPU;
- regs.gs = __KERNEL_STACK_CANARY;
- #else
- regs.ss = __KERNEL_DS;
- #endif
-
- regs.orig_ax = -1;
- regs.ip = (unsigned long) kernel_thread_helper;
- regs.cs = __KERNEL_CS | get_kernel_rpl();
- regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
-
- /* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL);
- }
- EXPORT_SYMBOL(kernel_thread);
-
- /*
- * sys_execve() executes a new program.
- */
- long sys_execve(const char __user *name,
- const char __user *const __user *argv,
- const char __user *const __user *envp, struct pt_regs *regs)
- {
- long error;
- char *filename;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = do_execve(filename, argv, envp, regs);
-
- #ifdef CONFIG_X86_32
- if (error == 0) {
- /* Make sure we don't return using sysenter.. */
- set_thread_flag(TIF_IRET);
- }
- #endif
-
- putname(filename);
- return error;
- }
-
/*
* Idle related variables and functions
*/
#include <asm/switch_to.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+ asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
/*
* Return saved PC of a blocked thread.
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
- unsigned long unused,
+ unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
- struct pt_regs *childregs;
+ struct pt_regs *childregs = task_pt_regs(p);
struct task_struct *tsk;
int err;
- childregs = task_pt_regs(p);
+ p->thread.sp = (unsigned long) childregs;
+ p->thread.sp0 = (unsigned long) (childregs+1);
+
+ if (unlikely(!regs)) {
+ /* kernel thread */
+ memset(childregs, 0, sizeof(struct pt_regs));
+ p->thread.ip = (unsigned long) ret_from_kernel_thread;
+ task_user_gs(p) = __KERNEL_STACK_CANARY;
+ childregs->ds = __USER_DS;
+ childregs->es = __USER_DS;
+ childregs->fs = __KERNEL_PERCPU;
+ childregs->bx = sp; /* function */
+ childregs->bp = arg;
+ childregs->orig_ax = -1;
+ childregs->cs = __KERNEL_CS | get_kernel_rpl();
+ childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ p->fpu_counter = 0;
+ p->thread.io_bitmap_ptr = NULL;
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+ return 0;
+ }
*childregs = *regs;
childregs->ax = 0;
childregs->sp = sp;
- p->thread.sp = (unsigned long) childregs;
- p->thread.sp0 = (unsigned long) (childregs+1);
-
p->thread.ip = (unsigned long) ret_from_fork;
-
task_user_gs(p) = get_user_gs(regs);
p->fpu_counter = 0;
regs->cs = __USER_CS;
regs->ip = new_ip;
regs->sp = new_sp;
- /*
- * Free the old FP and other extended state
- */
- free_thread_xstate(current);
+ regs->flags = X86_EFLAGS_IF;
+ /*
+ * force it to the iret return path by making it look as if there was
+ * some work pending.
+ */
+ set_thread_flag(TIF_NOTIFY_RESUME);
}
EXPORT_SYMBOL_GPL(start_thread);
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
- unsigned long unused,
+ unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
int err;
struct pt_regs *childregs;
struct task_struct *me = current;
- childregs = ((struct pt_regs *)
- (THREAD_SIZE + task_stack_page(p))) - 1;
- *childregs = *regs;
-
- childregs->ax = 0;
- if (user_mode(regs))
- childregs->sp = sp;
- else
- childregs->sp = (unsigned long)childregs;
-
+ p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+ childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs;
- p->thread.sp0 = (unsigned long) (childregs+1);
p->thread.usersp = me->thread.usersp;
-
set_tsk_thread_flag(p, TIF_FORK);
-
p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
+ if (unlikely(!regs)) {
+ /* kernel thread */
+ memset(childregs, 0, sizeof(struct pt_regs));
+ childregs->sp = (unsigned long)childregs;
+ childregs->ss = __KERNEL_DS;
+ childregs->bx = sp; /* function */
+ childregs->bp = arg;
+ childregs->orig_ax = -1;
+ childregs->cs = __KERNEL_CS | get_kernel_rpl();
+ childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ return 0;
+ }
+ *childregs = *regs;
+
+ childregs->ax = 0;
+ childregs->sp = sp;
err = -ENOMEM;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
regs->cs = _cs;
regs->ss = _ss;
regs->flags = X86_EFLAGS_IF;
- /*
- * Free the old FP and other extended state
- */
- free_thread_xstate(current);
}
void
regs->orig_ax = -1; /* disable syscall checks */
get_user_ex(buf, &sc->fpstate);
- err |= restore_i387_xstate(buf);
get_user_ex(*pax, &sc->ax);
} get_user_catch(err);
+ err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
+
return err;
}
void __user **fpstate)
{
/* Default to using normal stack */
+ unsigned long math_size = 0;
unsigned long sp = regs->sp;
+ unsigned long buf_fx = 0;
int onsigstack = on_sig_stack(sp);
-#ifdef CONFIG_X86_64
/* redzone */
- sp -= 128;
-#endif /* CONFIG_X86_64 */
+ if (config_enabled(CONFIG_X86_64))
+ sp -= 128;
if (!onsigstack) {
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (current->sas_ss_size)
sp = current->sas_ss_sp + current->sas_ss_size;
- } else {
-#ifdef CONFIG_X86_32
- /* This is the legacy signal stack switching. */
- if ((regs->ss & 0xffff) != __USER_DS &&
- !(ka->sa.sa_flags & SA_RESTORER) &&
- ka->sa.sa_restorer)
+ } else if (config_enabled(CONFIG_X86_32) &&
+ (regs->ss & 0xffff) != __USER_DS &&
+ !(ka->sa.sa_flags & SA_RESTORER) &&
+ ka->sa.sa_restorer) {
+ /* This is the legacy signal stack switching. */
sp = (unsigned long) ka->sa.sa_restorer;
-#endif /* CONFIG_X86_32 */
}
}
if (used_math()) {
- sp -= sig_xstate_size;
-#ifdef CONFIG_X86_64
- sp = round_down(sp, 64);
-#endif /* CONFIG_X86_64 */
+ sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32),
+ &buf_fx, &math_size);
*fpstate = (void __user *)sp;
}
if (onsigstack && !likely(on_sig_stack(sp)))
return (void __user *)-1L;
- /* save i387 state */
- if (used_math() && save_i387_xstate(*fpstate) < 0)
+ /* save i387 and extended state */
+ if (used_math() &&
+ save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0)
return (void __user *)-1L;
return (void __user *)sp;
put_user_ex(sig, &frame->sig);
put_user_ex(&frame->info, &frame->pinfo);
put_user_ex(&frame->uc, &frame->puc);
- err |= copy_siginfo_to_user(&frame->info, info);
/* Create the ucontext. */
if (cpu_has_xsave)
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
- regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
*/
put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
} put_user_catch(err);
+
+ err |= copy_siginfo_to_user(&frame->info, info);
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+ regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
return -EFAULT;
put_user_ex(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags);
put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
}
} put_user_catch(err);
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
if (err)
return -EFAULT;
}
#endif /* CONFIG_X86_32 */
+static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
+ siginfo_t *info, compat_sigset_t *set,
+ struct pt_regs *regs)
+{
+#ifdef CONFIG_X86_X32_ABI
+ struct rt_sigframe_x32 __user *frame;
+ void __user *restorer;
+ int err = 0;
+ void __user *fpstate = NULL;
+
+ frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
+
+ if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ return -EFAULT;
+
+ if (ka->sa.sa_flags & SA_SIGINFO) {
+ if (copy_siginfo_to_user32(&frame->info, info))
+ return -EFAULT;
+ }
+
+ put_user_try {
+ /* Create the ucontext. */
+ if (cpu_has_xsave)
+ put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
+ else
+ put_user_ex(0, &frame->uc.uc_flags);
+ put_user_ex(0, &frame->uc.uc_link);
+ put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+ put_user_ex(sas_ss_flags(regs->sp),
+ &frame->uc.uc_stack.ss_flags);
+ put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ put_user_ex(0, &frame->uc.uc__pad0);
+
+ if (ka->sa.sa_flags & SA_RESTORER) {
+ restorer = ka->sa.sa_restorer;
+ } else {
+ /* could use a vstub here */
+ restorer = NULL;
+ err |= -EFAULT;
+ }
+ put_user_ex(restorer, &frame->pretcode);
+ } put_user_catch(err);
+
+ err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+ regs, set->sig[0]);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+
+ if (err)
+ return -EFAULT;
+
+ /* Set up registers for signal handler */
+ regs->sp = (unsigned long) frame;
+ regs->ip = (unsigned long) ka->sa.sa_handler;
+
+ /* We use the x32 calling convention here... */
+ regs->di = sig;
+ regs->si = (unsigned long) &frame->info;
+ regs->dx = (unsigned long) &frame->uc;
+
+ loadsegment(ds, __USER_DS);
+ loadsegment(es, __USER_DS);
+
+ regs->cs = __USER_CS;
+ regs->ss = __USER_DS;
+#endif /* CONFIG_X86_X32_ABI */
+
+ return 0;
+}
+
#ifdef CONFIG_X86_32
/*
* Atomically swap in the new signal mask, and wait for a signal.
return sig;
}
-#ifdef CONFIG_X86_32
-
-#define is_ia32 1
-#define ia32_setup_frame __setup_frame
-#define ia32_setup_rt_frame __setup_rt_frame
-
-#else /* !CONFIG_X86_32 */
-
-#ifdef CONFIG_IA32_EMULATION
-#define is_ia32 test_thread_flag(TIF_IA32)
-#else /* !CONFIG_IA32_EMULATION */
-#define is_ia32 0
-#endif /* CONFIG_IA32_EMULATION */
-
-#ifdef CONFIG_X86_X32_ABI
-#define is_x32 test_thread_flag(TIF_X32)
-
-static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
- siginfo_t *info, compat_sigset_t *set,
- struct pt_regs *regs);
-#else /* !CONFIG_X86_X32_ABI */
-#define is_x32 0
-#endif /* CONFIG_X86_X32_ABI */
-
-int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct k_sigaction *ka,
- sigset_t *set, struct pt_regs *regs);
-
-#endif /* CONFIG_X86_32 */
-
static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct pt_regs *regs)
{
int usig = signr_convert(sig);
sigset_t *set = sigmask_to_save();
+ compat_sigset_t *cset = (compat_sigset_t *) set;
/* Set up the stack frame */
- if (is_ia32) {
+ if (is_ia32_frame()) {
if (ka->sa.sa_flags & SA_SIGINFO)
- return ia32_setup_rt_frame(usig, ka, info, set, regs);
+ return ia32_setup_rt_frame(usig, ka, info, cset, regs);
else
- return ia32_setup_frame(usig, ka, set, regs);
-#ifdef CONFIG_X86_X32_ABI
- } else if (is_x32) {
- return x32_setup_rt_frame(usig, ka, info,
- (compat_sigset_t *)set, regs);
-#endif
+ return ia32_setup_frame(usig, ka, cset, regs);
+ } else if (is_x32_frame()) {
+ return x32_setup_rt_frame(usig, ka, info, cset, regs);
} else {
return __setup_rt_frame(sig, ka, info, set, regs);
}
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
+ rcu_user_exit();
+
#ifdef CONFIG_X86_MCE
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
}
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
- #ifdef CONFIG_X86_32
- clear_thread_flag(TIF_IRET);
- #endif /* CONFIG_X86_32 */
-
+
+ rcu_user_enter();
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
}
#ifdef CONFIG_X86_X32_ABI
-static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
- siginfo_t *info, compat_sigset_t *set,
- struct pt_regs *regs)
-{
- struct rt_sigframe_x32 __user *frame;
- void __user *restorer;
- int err = 0;
- void __user *fpstate = NULL;
-
- frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
-
- if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
- return -EFAULT;
-
- if (ka->sa.sa_flags & SA_SIGINFO) {
- if (copy_siginfo_to_user32(&frame->info, info))
- return -EFAULT;
- }
-
- put_user_try {
- /* Create the ucontext. */
- if (cpu_has_xsave)
- put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
- else
- put_user_ex(0, &frame->uc.uc_flags);
- put_user_ex(0, &frame->uc.uc_link);
- put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- put_user_ex(sas_ss_flags(regs->sp),
- &frame->uc.uc_stack.ss_flags);
- put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
- put_user_ex(0, &frame->uc.uc__pad0);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
- regs, set->sig[0]);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
- if (ka->sa.sa_flags & SA_RESTORER) {
- restorer = ka->sa.sa_restorer;
- } else {
- /* could use a vstub here */
- restorer = NULL;
- err |= -EFAULT;
- }
- put_user_ex(restorer, &frame->pretcode);
- } put_user_catch(err);
-
- if (err)
- return -EFAULT;
-
- /* Set up registers for signal handler */
- regs->sp = (unsigned long) frame;
- regs->ip = (unsigned long) ka->sa.sa_handler;
-
- /* We use the x32 calling convention here... */
- regs->di = sig;
- regs->si = (unsigned long) &frame->info;
- regs->dx = (unsigned long) &frame->uc;
-
- loadsegment(ds, __USER_DS);
- loadsegment(es, __USER_DS);
-
- regs->cs = __USER_CS;
- regs->ss = __USER_DS;
-
- return 0;
-}
-
asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
{
struct rt_sigframe_x32 __user *frame;
config UML_X86
def_bool y
select GENERIC_FIND_FIRST_BIT
+ select GENERIC_KERNEL_THREAD
config 64BIT
bool "64-bit kernel" if SUBARCH = "x86"
config X86_32
def_bool !64BIT
select HAVE_AOUT
+ select ARCH_WANT_IPC_PARSE_VERSION
+ select MODULES_USE_ELF_REL
config X86_64
def_bool 64BIT
+ select MODULES_USE_ELF_RELA
config RWSEM_XCHGADD_ALGORITHM
def_bool X86_XADD && 64BIT
include include/asm-generic/Kbuild.asm
+generic-y += clkdev.h
+ generic-y += exec.h
#include <linux/compiler.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
#include <linux/security.h>
#include <linux/random.h>
#include <linux/elf.h>
#include <asm/uaccess.h>
#include <asm/param.h>
#include <asm/page.h>
- #include <asm/exec.h>
+#ifndef user_long_t
+#define user_long_t long
+#endif
+#ifndef user_siginfo_t
+#define user_siginfo_t siginfo_t
+#endif
+
static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
static int load_elf_library(struct file *);
static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
}
if (elf_interpreter) {
- unsigned long uninitialized_var(interp_map_addr);
+ unsigned long interp_map_addr = 0;
elf_entry = load_elf_interp(&loc->interp_elf_ex,
interpreter,
if (always_dump_vma(vma))
goto whole;
- if (vma->vm_flags & VM_NODUMP)
+ if (vma->vm_flags & VM_DONTDUMP)
return 0;
/* Hugetlb memory check */
}
/* Do not dump I/O mapped devices or special mappings */
- if (vma->vm_flags & (VM_IO | VM_RESERVED))
+ if (vma->vm_flags & VM_IO)
return 0;
/* By default, dump shared memory if mapped from an anonymous file. */
fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
}
+static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
+ siginfo_t *siginfo)
+{
+ mm_segment_t old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ copy_siginfo_to_user((user_siginfo_t __user *) csigdata, siginfo);
+ set_fs(old_fs);
+ fill_note(note, "CORE", NT_SIGINFO, sizeof(*csigdata), csigdata);
+}
+
+#define MAX_FILE_NOTE_SIZE (4*1024*1024)
+/*
+ * Format of NT_FILE note:
+ *
+ * long count -- how many files are mapped
+ * long page_size -- units for file_ofs
+ * array of [COUNT] elements of
+ * long start
+ * long end
+ * long file_ofs
+ * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
+ */
+static void fill_files_note(struct memelfnote *note)
+{
+ struct vm_area_struct *vma;
+ unsigned count, size, names_ofs, remaining, n;
+ user_long_t *data;
+ user_long_t *start_end_ofs;
+ char *name_base, *name_curpos;
+
+ /* *Estimated* file count and total data size needed */
+ count = current->mm->map_count;
+ size = count * 64;
+
+ names_ofs = (2 + 3 * count) * sizeof(data[0]);
+ alloc:
+ if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
+ goto err;
+ size = round_up(size, PAGE_SIZE);
+ data = vmalloc(size);
+ if (!data)
+ goto err;
+
+ start_end_ofs = data + 2;
+ name_base = name_curpos = ((char *)data) + names_ofs;
+ remaining = size - names_ofs;
+ count = 0;
+ for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
+ struct file *file;
+ const char *filename;
+
+ file = vma->vm_file;
+ if (!file)
+ continue;
+ filename = d_path(&file->f_path, name_curpos, remaining);
+ if (IS_ERR(filename)) {
+ if (PTR_ERR(filename) == -ENAMETOOLONG) {
+ vfree(data);
+ size = size * 5 / 4;
+ goto alloc;
+ }
+ continue;
+ }
+
+ /* d_path() fills at the end, move name down */
+ /* n = strlen(filename) + 1: */
+ n = (name_curpos + remaining) - filename;
+ remaining = filename - name_curpos;
+ memmove(name_curpos, filename, n);
+ name_curpos += n;
+
+ *start_end_ofs++ = vma->vm_start;
+ *start_end_ofs++ = vma->vm_end;
+ *start_end_ofs++ = vma->vm_pgoff;
+ count++;
+ }
+
+ /* Now we know exact count of files, can store it */
+ data[0] = count;
+ data[1] = PAGE_SIZE;
+ /*
+ * Count usually is less than current->mm->map_count,
+ * we need to move filenames down.
+ */
+ n = current->mm->map_count - count;
+ if (n != 0) {
+ unsigned shift_bytes = n * 3 * sizeof(data[0]);
+ memmove(name_base - shift_bytes, name_base,
+ name_curpos - name_base);
+ name_curpos -= shift_bytes;
+ }
+
+ size = name_curpos - (char *)data;
+ fill_note(note, "CORE", NT_FILE, size, data);
+ err: ;
+}
+
#ifdef CORE_DUMP_USE_REGSET
#include <linux/regset.h>
struct elf_note_info {
struct elf_thread_core_info *thread;
struct memelfnote psinfo;
+ struct memelfnote signote;
struct memelfnote auxv;
+ struct memelfnote files;
+ user_siginfo_t csigdata;
size_t size;
int thread_notes;
};
static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info,
- long signr, struct pt_regs *regs)
+ siginfo_t *siginfo, struct pt_regs *regs)
{
struct task_struct *dump_task = current;
const struct user_regset_view *view = task_user_regset_view(dump_task);
* Now fill in each thread's information.
*/
for (t = info->thread; t != NULL; t = t->next)
- if (!fill_thread_core_info(t, view, signr, &info->size))
+ if (!fill_thread_core_info(t, view, siginfo->si_signo, &info->size))
return 0;
/*
fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
info->size += notesize(&info->psinfo);
+ fill_siginfo_note(&info->signote, &info->csigdata, siginfo);
+ info->size += notesize(&info->signote);
+
fill_auxv_note(&info->auxv, current->mm);
info->size += notesize(&info->auxv);
+ fill_files_note(&info->files);
+ info->size += notesize(&info->files);
+
return 1;
}
if (first && !writenote(&info->psinfo, file, foffset))
return 0;
+ if (first && !writenote(&info->signote, file, foffset))
+ return 0;
if (first && !writenote(&info->auxv, file, foffset))
return 0;
+ if (first && !writenote(&info->files, file, foffset))
+ return 0;
for (i = 1; i < info->thread_notes; ++i)
if (t->notes[i].data &&
kfree(t);
}
kfree(info->psinfo.data);
+ vfree(info->files.data);
}
#else
#ifdef ELF_CORE_COPY_XFPREGS
elf_fpxregset_t *xfpu;
#endif
+ user_siginfo_t csigdata;
int thread_status_size;
int numnote;
};
memset(info, 0, sizeof(*info));
INIT_LIST_HEAD(&info->thread_list);
- /* Allocate space for six ELF notes */
- info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
+ /* Allocate space for ELF notes */
+ info->notes = kmalloc(8 * sizeof(struct memelfnote), GFP_KERNEL);
if (!info->notes)
return 0;
info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
if (!info->psinfo)
- goto notes_free;
+ return 0;
info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
if (!info->prstatus)
- goto psinfo_free;
+ return 0;
info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
if (!info->fpu)
- goto prstatus_free;
+ return 0;
#ifdef ELF_CORE_COPY_XFPREGS
info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
if (!info->xfpu)
- goto fpu_free;
+ return 0;
#endif
return 1;
-#ifdef ELF_CORE_COPY_XFPREGS
- fpu_free:
- kfree(info->fpu);
-#endif
- prstatus_free:
- kfree(info->prstatus);
- psinfo_free:
- kfree(info->psinfo);
- notes_free:
- kfree(info->notes);
- return 0;
}
static int fill_note_info(struct elfhdr *elf, int phdrs,
struct elf_note_info *info,
- long signr, struct pt_regs *regs)
+ siginfo_t *siginfo, struct pt_regs *regs)
{
struct list_head *t;
if (!elf_note_info_init(info))
return 0;
- if (signr) {
+ if (siginfo->si_signo) {
struct core_thread *ct;
struct elf_thread_status *ets;
int sz;
ets = list_entry(t, struct elf_thread_status, list);
- sz = elf_dump_thread_status(signr, ets);
+ sz = elf_dump_thread_status(siginfo->si_signo, ets);
info->thread_status_size += sz;
}
}
/* now collect the dump for the current */
memset(info->prstatus, 0, sizeof(*info->prstatus));
- fill_prstatus(info->prstatus, current, signr);
+ fill_prstatus(info->prstatus, current, siginfo->si_signo);
elf_core_copy_regs(&info->prstatus->pr_reg, regs);
/* Set up header */
fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
sizeof(*info->psinfo), info->psinfo);
- info->numnote = 2;
+ fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
+ fill_auxv_note(info->notes + 3, current->mm);
+ fill_files_note(info->notes + 4);
- fill_auxv_note(&info->notes[info->numnote++], current->mm);
+ info->numnote = 5;
/* Try to dump the FPU. */
info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
kfree(list_entry(tmp, struct elf_thread_status, list));
}
+ /* Free data allocated by fill_files_note(): */
+ vfree(info->notes[4].data);
+
kfree(info->prstatus);
kfree(info->psinfo);
kfree(info->notes);
* Collect all the non-memory information about the process for the
* notes. This also sets up the file header.
*/
- if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
+ if (!fill_note_info(elf, e_phnum, &info, cprm->siginfo, cprm->regs))
goto cleanup;
has_dumped = 1;
#include <asm/uaccess.h>
#include <asm/param.h>
#include <asm/pgalloc.h>
- #include <asm/exec.h>
typedef char *elf_caddr_t;
int dump_ok;
/* Do not dump I/O mapped devices or special mappings */
- if (vma->vm_flags & (VM_IO | VM_RESERVED)) {
+ if (vma->vm_flags & VM_IO) {
kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags);
return 0;
}
goto cleanup;
#endif
- if (cprm->signr) {
+ if (cprm->siginfo->si_signo) {
struct core_thread *ct;
struct elf_thread_status *tmp;
int sz;
tmp = list_entry(t, struct elf_thread_status, list);
- sz = elf_dump_thread_status(cprm->signr, tmp);
+ sz = elf_dump_thread_status(cprm->siginfo->si_signo, tmp);
thread_status_size += sz;
}
}
/* now collect the dump for the current */
- fill_prstatus(prstatus, current, cprm->signr);
+ fill_prstatus(prstatus, current, cprm->siginfo->si_signo);
elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
segs = current->mm->map_count;
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/tlb.h>
- #include <asm/exec.h>
#include <trace/events/task.h>
#include "internal.h"
+#include "coredump.h"
#include <trace/events/sched.h>
-int core_uses_pid;
-char core_pattern[CORENAME_MAX_SIZE] = "core";
-unsigned int core_pipe_limit;
int suid_dumpable = 0;
-struct core_name {
- char *corename;
- int used, size;
-};
-static atomic_t call_count = ATOMIC_INIT(1);
-
-/* The maximal length of core_pattern is also specified in sysctl.c */
-
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);
union {
const char __user *const __user *native;
#ifdef CONFIG_COMPAT
- compat_uptr_t __user *compat;
+ const compat_uptr_t __user *compat;
#endif
} ptr;
};
* process cleanup to remove whatever mess we made.
*/
if (length != move_page_tables(vma, old_start,
- vma, new_start, length))
+ vma, new_start, length, false))
return -ENOMEM;
lru_add_drain();
sig->notify_count--;
while (sig->notify_count) {
- __set_current_state(TASK_UNINTERRUPTIBLE);
+ __set_current_state(TASK_KILLABLE);
spin_unlock_irq(lock);
schedule();
+ if (unlikely(__fatal_signal_pending(tsk)))
+ goto killed;
spin_lock_irq(lock);
}
spin_unlock_irq(lock);
write_lock_irq(&tasklist_lock);
if (likely(leader->exit_state))
break;
- __set_current_state(TASK_UNINTERRUPTIBLE);
+ __set_current_state(TASK_KILLABLE);
write_unlock_irq(&tasklist_lock);
schedule();
+ if (unlikely(__fatal_signal_pending(tsk)))
+ goto killed;
}
/*
BUG_ON(!thread_group_leader(tsk));
return 0;
-}
-
-/*
- * These functions flushes out all traces of the currently running executable
- * so that a new one can be started
- */
-static void flush_old_files(struct files_struct * files)
-{
- long j = -1;
- struct fdtable *fdt;
-
- spin_lock(&files->file_lock);
- for (;;) {
- unsigned long set, i;
- j++;
- i = j * BITS_PER_LONG;
- fdt = files_fdtable(files);
- if (i >= fdt->max_fds)
- break;
- set = fdt->close_on_exec[j];
- if (!set)
- continue;
- fdt->close_on_exec[j] = 0;
- spin_unlock(&files->file_lock);
- for ( ; set ; i++,set >>= 1) {
- if (set & 1) {
- sys_close(i);
- }
- }
- spin_lock(&files->file_lock);
-
- }
- spin_unlock(&files->file_lock);
+killed:
+ /* protects against exit_notify() and __exit_signal() */
+ read_lock(&tasklist_lock);
+ sig->group_exit_task = NULL;
+ sig->notify_count = 0;
+ read_unlock(&tasklist_lock);
+ return -EAGAIN;
}
char *get_task_comm(char *buf, struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(get_task_comm);
+/*
+ * These functions flushes out all traces of the currently running executable
+ * so that a new one can be started
+ */
+
void set_task_comm(struct task_struct *tsk, char *buf)
{
task_lock(tsk);
current->sas_ss_sp = current->sas_ss_size = 0;
if (uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid()))
- set_dumpable(current->mm, 1);
+ set_dumpable(current->mm, SUID_DUMPABLE_ENABLED);
else
set_dumpable(current->mm, suid_dumpable);
current->self_exec_id++;
flush_signal_handlers(current, 0);
- flush_old_files(current->files);
+ do_close_on_exec(current->files);
}
EXPORT_SYMBOL(setup_new_exec);
}
#ifdef CONFIG_COMPAT
- int compat_do_execve(char *filename,
- compat_uptr_t __user *__argv,
- compat_uptr_t __user *__envp,
+ int compat_do_execve(const char *filename,
+ const compat_uptr_t __user *__argv,
+ const compat_uptr_t __user *__envp,
struct pt_regs *regs)
{
struct user_arg_ptr argv = {
EXPORT_SYMBOL(set_binfmt);
-static int expand_corename(struct core_name *cn)
-{
- char *old_corename = cn->corename;
-
- cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
- cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
-
- if (!cn->corename) {
- kfree(old_corename);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static int cn_printf(struct core_name *cn, const char *fmt, ...)
-{
- char *cur;
- int need;
- int ret;
- va_list arg;
-
- va_start(arg, fmt);
- need = vsnprintf(NULL, 0, fmt, arg);
- va_end(arg);
-
- if (likely(need < cn->size - cn->used - 1))
- goto out_printf;
-
- ret = expand_corename(cn);
- if (ret)
- goto expand_fail;
-
-out_printf:
- cur = cn->corename + cn->used;
- va_start(arg, fmt);
- vsnprintf(cur, need + 1, fmt, arg);
- va_end(arg);
- cn->used += need;
- return 0;
-
-expand_fail:
- return ret;
-}
-
-static void cn_escape(char *str)
-{
- for (; *str; str++)
- if (*str == '/')
- *str = '!';
-}
-
-static int cn_print_exe_file(struct core_name *cn)
-{
- struct file *exe_file;
- char *pathbuf, *path;
- int ret;
-
- exe_file = get_mm_exe_file(current->mm);
- if (!exe_file) {
- char *commstart = cn->corename + cn->used;
- ret = cn_printf(cn, "%s (path unknown)", current->comm);
- cn_escape(commstart);
- return ret;
- }
-
- pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
- if (!pathbuf) {
- ret = -ENOMEM;
- goto put_exe_file;
- }
-
- path = d_path(&exe_file->f_path, pathbuf, PATH_MAX);
- if (IS_ERR(path)) {
- ret = PTR_ERR(path);
- goto free_buf;
- }
-
- cn_escape(path);
-
- ret = cn_printf(cn, "%s", path);
-
-free_buf:
- kfree(pathbuf);
-put_exe_file:
- fput(exe_file);
- return ret;
-}
-
-/* format_corename will inspect the pattern parameter, and output a
- * name into corename, which must have space for at least
- * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
- */
-static int format_corename(struct core_name *cn, long signr)
-{
- const struct cred *cred = current_cred();
- const char *pat_ptr = core_pattern;
- int ispipe = (*pat_ptr == '|');
- int pid_in_pattern = 0;
- int err = 0;
-
- cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
- cn->corename = kmalloc(cn->size, GFP_KERNEL);
- cn->used = 0;
-
- if (!cn->corename)
- return -ENOMEM;
-
- /* Repeat as long as we have more pattern to process and more output
- space */
- while (*pat_ptr) {
- if (*pat_ptr != '%') {
- if (*pat_ptr == 0)
- goto out;
- err = cn_printf(cn, "%c", *pat_ptr++);
- } else {
- switch (*++pat_ptr) {
- /* single % at the end, drop that */
- case 0:
- goto out;
- /* Double percent, output one percent */
- case '%':
- err = cn_printf(cn, "%c", '%');
- break;
- /* pid */
- case 'p':
- pid_in_pattern = 1;
- err = cn_printf(cn, "%d",
- task_tgid_vnr(current));
- break;
- /* uid */
- case 'u':
- err = cn_printf(cn, "%d", cred->uid);
- break;
- /* gid */
- case 'g':
- err = cn_printf(cn, "%d", cred->gid);
- break;
- /* signal that caused the coredump */
- case 's':
- err = cn_printf(cn, "%ld", signr);
- break;
- /* UNIX time of coredump */
- case 't': {
- struct timeval tv;
- do_gettimeofday(&tv);
- err = cn_printf(cn, "%lu", tv.tv_sec);
- break;
- }
- /* hostname */
- case 'h': {
- char *namestart = cn->corename + cn->used;
- down_read(&uts_sem);
- err = cn_printf(cn, "%s",
- utsname()->nodename);
- up_read(&uts_sem);
- cn_escape(namestart);
- break;
- }
- /* executable */
- case 'e': {
- char *commstart = cn->corename + cn->used;
- err = cn_printf(cn, "%s", current->comm);
- cn_escape(commstart);
- break;
- }
- case 'E':
- err = cn_print_exe_file(cn);
- break;
- /* core limit size */
- case 'c':
- err = cn_printf(cn, "%lu",
- rlimit(RLIMIT_CORE));
- break;
- default:
- break;
- }
- ++pat_ptr;
- }
-
- if (err)
- return err;
- }
-
- /* Backward compatibility with core_uses_pid:
- *
- * If core_pattern does not include a %p (as is the default)
- * and core_uses_pid is set, then .%pid will be appended to
- * the filename. Do not do this for piped commands. */
- if (!ispipe && !pid_in_pattern && core_uses_pid) {
- err = cn_printf(cn, ".%d", task_tgid_vnr(current));
- if (err)
- return err;
- }
-out:
- return ispipe;
-}
-
-static int zap_process(struct task_struct *start, int exit_code)
-{
- struct task_struct *t;
- int nr = 0;
-
- start->signal->flags = SIGNAL_GROUP_EXIT;
- start->signal->group_exit_code = exit_code;
- start->signal->group_stop_count = 0;
-
- t = start;
- do {
- task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
- if (t != current && t->mm) {
- sigaddset(&t->pending.signal, SIGKILL);
- signal_wake_up(t, 1);
- nr++;
- }
- } while_each_thread(start, t);
-
- return nr;
-}
-
-static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
- struct core_state *core_state, int exit_code)
-{
- struct task_struct *g, *p;
- unsigned long flags;
- int nr = -EAGAIN;
-
- spin_lock_irq(&tsk->sighand->siglock);
- if (!signal_group_exit(tsk->signal)) {
- mm->core_state = core_state;
- nr = zap_process(tsk, exit_code);
- }
- spin_unlock_irq(&tsk->sighand->siglock);
- if (unlikely(nr < 0))
- return nr;
-
- if (atomic_read(&mm->mm_users) == nr + 1)
- goto done;
- /*
- * We should find and kill all tasks which use this mm, and we should
- * count them correctly into ->nr_threads. We don't take tasklist
- * lock, but this is safe wrt:
- *
- * fork:
- * None of sub-threads can fork after zap_process(leader). All
- * processes which were created before this point should be
- * visible to zap_threads() because copy_process() adds the new
- * process to the tail of init_task.tasks list, and lock/unlock
- * of ->siglock provides a memory barrier.
- *
- * do_exit:
- * The caller holds mm->mmap_sem. This means that the task which
- * uses this mm can't pass exit_mm(), so it can't exit or clear
- * its ->mm.
- *
- * de_thread:
- * It does list_replace_rcu(&leader->tasks, ¤t->tasks),
- * we must see either old or new leader, this does not matter.
- * However, it can change p->sighand, so lock_task_sighand(p)
- * must be used. Since p->mm != NULL and we hold ->mmap_sem
- * it can't fail.
- *
- * Note also that "g" can be the old leader with ->mm == NULL
- * and already unhashed and thus removed from ->thread_group.
- * This is OK, __unhash_process()->list_del_rcu() does not
- * clear the ->next pointer, we will find the new leader via
- * next_thread().
- */
- rcu_read_lock();
- for_each_process(g) {
- if (g == tsk->group_leader)
- continue;
- if (g->flags & PF_KTHREAD)
- continue;
- p = g;
- do {
- if (p->mm) {
- if (unlikely(p->mm == mm)) {
- lock_task_sighand(p, &flags);
- nr += zap_process(p, exit_code);
- unlock_task_sighand(p, &flags);
- }
- break;
- }
- } while_each_thread(g, p);
- }
- rcu_read_unlock();
-done:
- atomic_set(&core_state->nr_threads, nr);
- return nr;
-}
-
-static int coredump_wait(int exit_code, struct core_state *core_state)
-{
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
- int core_waiters = -EBUSY;
-
- init_completion(&core_state->startup);
- core_state->dumper.task = tsk;
- core_state->dumper.next = NULL;
-
- down_write(&mm->mmap_sem);
- if (!mm->core_state)
- core_waiters = zap_threads(tsk, mm, core_state, exit_code);
- up_write(&mm->mmap_sem);
-
- if (core_waiters > 0) {
- struct core_thread *ptr;
-
- wait_for_completion(&core_state->startup);
- /*
- * Wait for all the threads to become inactive, so that
- * all the thread context (extended register state, like
- * fpu etc) gets copied to the memory.
- */
- ptr = core_state->dumper.next;
- while (ptr != NULL) {
- wait_task_inactive(ptr->task, 0);
- ptr = ptr->next;
- }
- }
-
- return core_waiters;
-}
-
-static void coredump_finish(struct mm_struct *mm)
-{
- struct core_thread *curr, *next;
- struct task_struct *task;
-
- next = mm->core_state->dumper.next;
- while ((curr = next) != NULL) {
- next = curr->next;
- task = curr->task;
- /*
- * see exit_mm(), curr->task must not see
- * ->task == NULL before we read ->next.
- */
- smp_mb();
- curr->task = NULL;
- wake_up_process(task);
- }
-
- mm->core_state = NULL;
-}
-
/*
* set_dumpable converts traditional three-value dumpable to two flags and
* stores them into mm->flags. It modifies lower two bits of mm->flags, but
}
}
-static int __get_dumpable(unsigned long mm_flags)
+int __get_dumpable(unsigned long mm_flags)
{
int ret;
{
return __get_dumpable(mm->flags);
}
-static void wait_for_dump_helpers(struct file *file)
-{
- struct pipe_inode_info *pipe;
-
- pipe = file->f_path.dentry->d_inode->i_pipe;
-
- pipe_lock(pipe);
- pipe->readers++;
- pipe->writers--;
-
- while ((pipe->readers > 1) && (!signal_pending(current))) {
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- pipe_wait(pipe);
- }
-
- pipe->readers--;
- pipe->writers++;
- pipe_unlock(pipe);
-
-}
-
-
-/*
- * umh_pipe_setup
- * helper function to customize the process used
- * to collect the core in userspace. Specifically
- * it sets up a pipe and installs it as fd 0 (stdin)
- * for the process. Returns 0 on success, or
- * PTR_ERR on failure.
- * Note that it also sets the core limit to 1. This
- * is a special value that we use to trap recursive
- * core dumps
- */
-static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
-{
- struct file *files[2];
- struct fdtable *fdt;
- struct coredump_params *cp = (struct coredump_params *)info->data;
- struct files_struct *cf = current->files;
- int err = create_pipe_files(files, 0);
- if (err)
- return err;
-
- cp->file = files[1];
-
- sys_close(0);
- fd_install(0, files[0]);
- spin_lock(&cf->file_lock);
- fdt = files_fdtable(cf);
- __set_open_fd(0, fdt);
- __clear_close_on_exec(0, fdt);
- spin_unlock(&cf->file_lock);
-
- /* and disallow core files too */
- current->signal->rlim[RLIMIT_CORE] = (struct rlimit){1, 1};
-
- return 0;
-}
-
-void do_coredump(long signr, int exit_code, struct pt_regs *regs)
-{
- struct core_state core_state;
- struct core_name cn;
- struct mm_struct *mm = current->mm;
- struct linux_binfmt * binfmt;
- const struct cred *old_cred;
- struct cred *cred;
- int retval = 0;
- int flag = 0;
- int ispipe;
- bool need_nonrelative = false;
- static atomic_t core_dump_count = ATOMIC_INIT(0);
- struct coredump_params cprm = {
- .signr = signr,
- .regs = regs,
- .limit = rlimit(RLIMIT_CORE),
- /*
- * We must use the same mm->flags while dumping core to avoid
- * inconsistency of bit flags, since this flag is not protected
- * by any locks.
- */
- .mm_flags = mm->flags,
- };
-
- audit_core_dumps(signr);
-
- binfmt = mm->binfmt;
- if (!binfmt || !binfmt->core_dump)
- goto fail;
- if (!__get_dumpable(cprm.mm_flags))
- goto fail;
-
- cred = prepare_creds();
- if (!cred)
- goto fail;
- /*
- * We cannot trust fsuid as being the "true" uid of the process
- * nor do we know its entire history. We only know it was tainted
- * so we dump it as root in mode 2, and only into a controlled
- * environment (pipe handler or fully qualified path).
- */
- if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
- /* Setuid core dump mode */
- flag = O_EXCL; /* Stop rewrite attacks */
- cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
- need_nonrelative = true;
- }
-
- retval = coredump_wait(exit_code, &core_state);
- if (retval < 0)
- goto fail_creds;
-
- old_cred = override_creds(cred);
-
- /*
- * Clear any false indication of pending signals that might
- * be seen by the filesystem code called to write the core file.
- */
- clear_thread_flag(TIF_SIGPENDING);
-
- ispipe = format_corename(&cn, signr);
-
- if (ispipe) {
- int dump_count;
- char **helper_argv;
-
- if (ispipe < 0) {
- printk(KERN_WARNING "format_corename failed\n");
- printk(KERN_WARNING "Aborting core\n");
- goto fail_corename;
- }
-
- if (cprm.limit == 1) {
- /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
- *
- * Normally core limits are irrelevant to pipes, since
- * we're not writing to the file system, but we use
- * cprm.limit of 1 here as a speacial value, this is a
- * consistent way to catch recursive crashes.
- * We can still crash if the core_pattern binary sets
- * RLIM_CORE = !1, but it runs as root, and can do
- * lots of stupid things.
- *
- * Note that we use task_tgid_vnr here to grab the pid
- * of the process group leader. That way we get the
- * right pid if a thread in a multi-threaded
- * core_pattern process dies.
- */
- printk(KERN_WARNING
- "Process %d(%s) has RLIMIT_CORE set to 1\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Aborting core\n");
- goto fail_unlock;
- }
- cprm.limit = RLIM_INFINITY;
-
- dump_count = atomic_inc_return(&core_dump_count);
- if (core_pipe_limit && (core_pipe_limit < dump_count)) {
- printk(KERN_WARNING "Pid %d(%s) over core_pipe_limit\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
- goto fail_dropcount;
- }
-
- helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
- if (!helper_argv) {
- printk(KERN_WARNING "%s failed to allocate memory\n",
- __func__);
- goto fail_dropcount;
- }
-
- retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
- NULL, UMH_WAIT_EXEC, umh_pipe_setup,
- NULL, &cprm);
- argv_free(helper_argv);
- if (retval) {
- printk(KERN_INFO "Core dump to %s pipe failed\n",
- cn.corename);
- goto close_fail;
- }
- } else {
- struct inode *inode;
-
- if (cprm.limit < binfmt->min_coredump)
- goto fail_unlock;
-
- if (need_nonrelative && cn.corename[0] != '/') {
- printk(KERN_WARNING "Pid %d(%s) can only dump core "\
- "to fully qualified path!\n",
- task_tgid_vnr(current), current->comm);
- printk(KERN_WARNING "Skipping core dump\n");
- goto fail_unlock;
- }
-
- cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
- 0600);
- if (IS_ERR(cprm.file))
- goto fail_unlock;
-
- inode = cprm.file->f_path.dentry->d_inode;
- if (inode->i_nlink > 1)
- goto close_fail;
- if (d_unhashed(cprm.file->f_path.dentry))
- goto close_fail;
- /*
- * AK: actually i see no reason to not allow this for named
- * pipes etc, but keep the previous behaviour for now.
- */
- if (!S_ISREG(inode->i_mode))
- goto close_fail;
- /*
- * Dont allow local users get cute and trick others to coredump
- * into their pre-created files.
- */
- if (!uid_eq(inode->i_uid, current_fsuid()))
- goto close_fail;
- if (!cprm.file->f_op || !cprm.file->f_op->write)
- goto close_fail;
- if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
- goto close_fail;
- }
-
- retval = binfmt->core_dump(&cprm);
- if (retval)
- current->signal->group_exit_code |= 0x80;
-
- if (ispipe && core_pipe_limit)
- wait_for_dump_helpers(cprm.file);
-close_fail:
- if (cprm.file)
- filp_close(cprm.file, NULL);
-fail_dropcount:
- if (ispipe)
- atomic_dec(&core_dump_count);
-fail_unlock:
- kfree(cn.corename);
-fail_corename:
- coredump_finish(mm);
- revert_creds(old_cred);
-fail_creds:
- put_cred(cred);
-fail:
- return;
-}
-
-/*
- * Core dumping helper functions. These are the only things you should
- * do on a core-file: use only these functions to write out all the
- * necessary info.
- */
-int dump_write(struct file *file, const void *addr, int nr)
-{
- return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
-}
-EXPORT_SYMBOL(dump_write);
-
-int dump_seek(struct file *file, loff_t off)
-{
- int ret = 1;
-
- if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
- if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
- return 0;
- } else {
- char *buf = (char *)get_zeroed_page(GFP_KERNEL);
-
- if (!buf)
- return 0;
- while (off > 0) {
- unsigned long n = off;
-
- if (n > PAGE_SIZE)
- n = PAGE_SIZE;
- if (!dump_write(file, buf, n)) {
- ret = 0;
- break;
- }
- off -= n;
- }
- free_page((unsigned long)buf);
- }
- return ret;
-}
-EXPORT_SYMBOL(dump_seek);
-
+
+ #ifdef __ARCH_WANT_SYS_EXECVE
+ SYSCALL_DEFINE3(execve,
+ const char __user *, filename,
+ const char __user *const __user *, argv,
+ const char __user *const __user *, envp)
+ {
+ const char *path = getname(filename);
+ int error = PTR_ERR(path);
+ if (!IS_ERR(path)) {
+ error = do_execve(path, argv, envp, current_pt_regs());
+ putname(path);
+ }
+ return error;
+ }
+ #ifdef CONFIG_COMPAT
+ asmlinkage long compat_sys_execve(const char __user * filename,
+ const compat_uptr_t __user * argv,
+ const compat_uptr_t __user * envp)
+ {
+ const char *path = getname(filename);
+ int error = PTR_ERR(path);
+ if (!IS_ERR(path)) {
+ error = compat_do_execve(path, argv, envp, current_pt_regs());
+ putname(path);
+ }
+ return error;
+ }
+ #endif
+ #endif
+
+ #ifdef __ARCH_WANT_KERNEL_EXECVE
+ int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
+ {
+ struct pt_regs *p = current_pt_regs();
+ int ret;
+
+ ret = do_execve(filename,
+ (const char __user *const __user *)argv,
+ (const char __user *const __user *)envp, p);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * We were successful. We won't be returning to our caller, but
+ * instead to user space by manipulating the kernel stack.
+ */
+ ret_from_kernel_execve(p);
+ }
+ #endif
#ifdef __KERNEL__
#include <linux/sched.h>
+ #include <linux/unistd.h>
+ #include <asm/exec.h>
#define CORENAME_MAX_SIZE 128
/* Function parameter for binfmt->coredump */
struct coredump_params {
- long signr;
+ siginfo_t *siginfo;
struct pt_regs *regs;
struct file *file;
unsigned long limit;
struct linux_binprm *bprm);
extern int prepare_bprm_creds(struct linux_binprm *bprm);
extern void install_exec_creds(struct linux_binprm *bprm);
-extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
extern void set_binfmt(struct linux_binfmt *new);
extern void free_bprm(struct linux_binprm *);
+ #ifdef __ARCH_WANT_KERNEL_EXECVE
+ extern void ret_from_kernel_execve(struct pt_regs *normal) __noreturn;
+ #endif
+
#endif /* __KERNEL__ */
#endif /* _LINUX_BINFMTS_H */
char f_fpack[6];
};
-typedef union compat_sigval {
- compat_int_t sival_int;
- compat_uptr_t sival_ptr;
-} compat_sigval_t;
-
#define COMPAT_SIGEV_PAD_SIZE ((SIGEV_MAX_SIZE/sizeof(int)) - 3)
typedef struct compat_sigevent {
const struct compat_iovec __user *vec,
unsigned long vlen, u32 pos_low, u32 pos_high);
- int compat_do_execve(char *filename, compat_uptr_t __user *argv,
- compat_uptr_t __user *envp, struct pt_regs *regs);
+ int compat_do_execve(const char *filename, const compat_uptr_t __user *argv,
+ const compat_uptr_t __user *envp, struct pt_regs *regs);
+ #ifdef __ARCH_WANT_SYS_EXECVE
+ asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
+ const compat_uptr_t __user *envp);
+ #endif
asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
compat_ulong_t __user *outp, compat_ulong_t __user *exp,
unsigned long liovcnt, const struct compat_iovec __user *rvec,
unsigned long riovcnt, unsigned long flags);
+asmlinkage long compat_sys_sendfile(int out_fd, int in_fd,
+ compat_off_t __user *offset, compat_size_t count);
+
#else
#define is_compat_task() (0)
#define PTRACE_MODE_READ 0x01
#define PTRACE_MODE_ATTACH 0x02
#define PTRACE_MODE_NOAUDIT 0x04
-/* Returns 0 on success, -errno on denial. */
-extern int __ptrace_may_access(struct task_struct *task, unsigned int mode);
/* Returns true on success, false on denial. */
extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);
#define arch_ptrace_stop(code, info) do { } while (0)
#endif
+ #ifndef current_pt_regs
+ #define current_pt_regs() task_pt_regs(current)
+ #endif
+
extern int task_current_syscall(struct task_struct *target, long *callno,
unsigned long args[6], unsigned int maxargs,
unsigned long *sp, unsigned long *pc);
extern int runqueue_is_locked(int cpu);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
-extern void select_nohz_load_balancer(int stop_tick);
+extern void nohz_balance_enter_idle(int cpu);
extern void set_cpu_sd_state_idle(void);
extern int get_nohz_timer_target(void);
#else
-static inline void select_nohz_load_balancer(int stop_tick) { }
+static inline void nohz_balance_enter_idle(int cpu) { }
static inline void set_cpu_sd_state_idle(void) { }
#endif
#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
+#define MMF_HAS_UPROBES 19 /* has uprobes */
+#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
+
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
struct sighand_struct {
struct rw_semaphore group_rwsem;
#endif
- int oom_adj; /* OOM kill score adjustment (bit shift) */
int oom_score_adj; /* OOM kill score adjustment */
int oom_score_adj_min; /* OOM kill score adjustment minimum value.
* Only settable by CAP_SYS_RESOURCE. */
* (notably. ptrace) */
};
-/* Context switch must be unlocked if interrupts are to be enabled */
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
-# define __ARCH_WANT_UNLOCKED_CTXSW
-#endif
-
/*
* Bits in flags field of signal_struct.
*/
#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
-#define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */
#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
+#define SD_NUMA 0x4000 /* cross-node balancing */
extern int __weak arch_sd_sibiling_asym_packing(void);
struct audit_context *audit_context;
#ifdef CONFIG_AUDITSYSCALL
- uid_t loginuid;
+ kuid_t loginuid;
unsigned int sessionid;
#endif
struct seccomp seccomp;
short il_next;
short pref_node_fork;
#endif
+#ifdef CONFIG_SCHED_NUMA
+ int node; /* task home node */
+ int node_curr, node_last; /* home node filter */
+ u64 node_stamp; /* migration stamp */
+ u64 numa_runtime_stamp;
+ u64 numa_walltime_stamp;
+ unsigned long numa_contrib;
+#endif /* CONFIG_SCHED_NUMA */
+
struct rcu_head rcu;
/*
* cache last used pipe for splice
*/
struct pipe_inode_info *splice_pipe;
+
+ struct page_frag task_frag;
+
#ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info *delays;
#endif
/* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
+static inline int tsk_home_node(struct task_struct *p)
+{
+#ifdef CONFIG_SCHED_NUMA
+ return p->node;
+#else
+ return -1;
+#endif
+}
+
/*
* Priority of a process goes from 0..MAX_PRIO-1, valid RT
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
#endif
+static inline void rcu_switch(struct task_struct *prev,
+ struct task_struct *next)
+{
+#ifdef CONFIG_RCU_USER_QS
+ rcu_user_hooks_switch(prev, next);
+#endif
+}
+
static inline void tsk_restore_flags(struct task_struct *task,
unsigned long orig_flags, unsigned long flags)
{
extern unsigned int sysctl_sched_time_avg;
extern unsigned int sysctl_timer_migration;
extern unsigned int sysctl_sched_shares_window;
+extern unsigned int sysctl_sched_numa_task_period;
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length,
loff_t *ppos);
-#endif
-#ifdef CONFIG_SCHED_DEBUG
+
static inline unsigned int get_sysctl_timer_migration(void)
{
return sysctl_timer_migration;
}
-#else
+#else /* CONFIG_SCHED_DEBUG */
static inline unsigned int get_sysctl_timer_migration(void)
{
return 1;
}
-#endif
+#endif /* CONFIG_SCHED_DEBUG */
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;
const char __user * const __user *, struct pt_regs *);
extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
struct task_struct *fork_idle(int);
+ #ifdef CONFIG_GENERIC_KERNEL_THREAD
+ extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
+ #endif
extern void set_task_comm(struct task_struct *tsk, char *from);
extern char *get_task_comm(char *to, struct task_struct *tsk);
tsk->btrace_seq = 0;
#endif
tsk->splice_pipe = NULL;
+ tsk->task_frag.page = NULL;
account_kernel_stack(ti, 1);
down_write(&oldmm->mmap_sem);
flush_cache_dup_mm(oldmm);
+ uprobe_dup_mmap(oldmm, mm);
/*
* Not linked in yet - no deadlock potential:
*/
mapping->i_mmap_writable++;
flush_dcache_mmap_lock(mapping);
/* insert tmp into the share list, just after mpnt */
- vma_prio_tree_add(tmp, mpnt);
+ if (unlikely(tmp->vm_flags & VM_NONLINEAR))
+ vma_nonlinear_insert(tmp,
+ &mapping->i_mmap_nonlinear);
+ else
+ vma_interval_tree_insert_after(tmp, mpnt,
+ &mapping->i_mmap);
flush_dcache_mmap_unlock(mapping);
mutex_unlock(&mapping->i_mmap_mutex);
}
if (retval)
goto out;
-
- if (file)
- uprobe_mmap(tmp);
}
/* a new mm has just been created */
arch_dup_mmap(oldmm, mm);
}
EXPORT_SYMBOL_GPL(mmput);
-/*
- * We added or removed a vma mapping the executable. The vmas are only mapped
- * during exec and are not mapped with the mmap system call.
- * Callers must hold down_write() on the mm's mmap_sem for these
- */
-void added_exe_file_vma(struct mm_struct *mm)
-{
- mm->num_exe_file_vmas++;
-}
-
-void removed_exe_file_vma(struct mm_struct *mm)
-{
- mm->num_exe_file_vmas--;
- if ((mm->num_exe_file_vmas == 0) && mm->exe_file) {
- fput(mm->exe_file);
- mm->exe_file = NULL;
- }
-
-}
-
void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
{
if (new_exe_file)
if (mm->exe_file)
fput(mm->exe_file);
mm->exe_file = new_exe_file;
- mm->num_exe_file_vmas = 0;
}
struct file *get_mm_exe_file(struct mm_struct *mm)
{
struct file *exe_file;
- /* We need mmap_sem to protect against races with removal of
- * VM_EXECUTABLE vmas */
+ /* We need mmap_sem to protect against races with removal of exe_file */
down_read(&mm->mmap_sem);
exe_file = mm->exe_file;
if (exe_file)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
mm->pmd_huge_pte = NULL;
#endif
- uprobe_reset_state(mm);
-
if (!mm_init(mm, tsk))
goto fail_nomem;
init_rwsem(&sig->group_rwsem);
#endif
- sig->oom_adj = current->signal->oom_adj;
sig->oom_score_adj = current->signal->oom_score_adj;
sig->oom_score_adj_min = current->signal->oom_score_adj_min;
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
p->irq_events = 0;
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- p->hardirqs_enabled = 1;
-#else
p->hardirqs_enabled = 0;
-#endif
p->hardirq_enable_ip = 0;
p->hardirq_enable_event = 0;
p->hardirq_disable_ip = _THIS_IP_;
* requested, no event is reported; otherwise, report if the event
* for the type of forking is enabled.
*/
- if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) {
+ if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) {
if (clone_flags & CLONE_VFORK)
trace = PTRACE_EVENT_VFORK;
else if ((clone_flags & CSIGNAL) != SIGCHLD)
return nr;
}
+ #ifdef CONFIG_GENERIC_KERNEL_THREAD
+ /*
+ * Create a kernel thread.
+ */
+ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+ {
+ return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL,
+ (unsigned long)arg, NULL, NULL);
+ }
+ #endif
+
#ifndef ARCH_MIN_MMSTRUCT_ALIGN
#define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif
#ifdef CONFIG_SMP
#ifndef tsk_is_polling
- #define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
+ #define tsk_is_polling(t) 0
#endif
void resched_task(struct task_struct *p)
dequeue_task(rq, p, flags);
}
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-
-/*
- * There are no locks covering percpu hardirq/softirq time.
- * They are only modified in account_system_vtime, on corresponding CPU
- * with interrupts disabled. So, writes are safe.
- * They are read and saved off onto struct rq in update_rq_clock().
- * This may result in other CPU reading this CPU's irq time and can
- * race with irq/account_system_vtime on this CPU. We would either get old
- * or new value with a side effect of accounting a slice of irq time to wrong
- * task when irq is in progress while we read rq->clock. That is a worthy
- * compromise in place of having locks on each irq in account_system_time.
- */
-static DEFINE_PER_CPU(u64, cpu_hardirq_time);
-static DEFINE_PER_CPU(u64, cpu_softirq_time);
-
-static DEFINE_PER_CPU(u64, irq_start_time);
-static int sched_clock_irqtime;
-
-void enable_sched_clock_irqtime(void)
-{
- sched_clock_irqtime = 1;
-}
-
-void disable_sched_clock_irqtime(void)
-{
- sched_clock_irqtime = 0;
-}
-
-#ifndef CONFIG_64BIT
-static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
-
-static inline void irq_time_write_begin(void)
-{
- __this_cpu_inc(irq_time_seq.sequence);
- smp_wmb();
-}
-
-static inline void irq_time_write_end(void)
-{
- smp_wmb();
- __this_cpu_inc(irq_time_seq.sequence);
-}
-
-static inline u64 irq_time_read(int cpu)
-{
- u64 irq_time;
- unsigned seq;
-
- do {
- seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
- irq_time = per_cpu(cpu_softirq_time, cpu) +
- per_cpu(cpu_hardirq_time, cpu);
- } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
-
- return irq_time;
-}
-#else /* CONFIG_64BIT */
-static inline void irq_time_write_begin(void)
-{
-}
-
-static inline void irq_time_write_end(void)
-{
-}
-
-static inline u64 irq_time_read(int cpu)
-{
- return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
-}
-#endif /* CONFIG_64BIT */
-
-/*
- * Called before incrementing preempt_count on {soft,}irq_enter
- * and before decrementing preempt_count on {soft,}irq_exit.
- */
-void account_system_vtime(struct task_struct *curr)
-{
- unsigned long flags;
- s64 delta;
- int cpu;
-
- if (!sched_clock_irqtime)
- return;
-
- local_irq_save(flags);
-
- cpu = smp_processor_id();
- delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
- __this_cpu_add(irq_start_time, delta);
-
- irq_time_write_begin();
- /*
- * We do not account for softirq time from ksoftirqd here.
- * We want to continue accounting softirq time to ksoftirqd thread
- * in that case, so as not to confuse scheduler with a special task
- * that do not consume any time, but still wants to run.
- */
- if (hardirq_count())
- __this_cpu_add(cpu_hardirq_time, delta);
- else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
- __this_cpu_add(cpu_softirq_time, delta);
-
- irq_time_write_end();
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(account_system_vtime);
-
-#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-
-#ifdef CONFIG_PARAVIRT
-static inline u64 steal_ticks(u64 steal)
-{
- if (unlikely(steal > NSEC_PER_SEC))
- return div_u64(steal, TICK_NSEC);
-
- return __iter_div_u64_rem(steal, TICK_NSEC, &steal);
-}
-#endif
-
static void update_rq_clock_task(struct rq *rq, s64 delta)
{
/*
#endif
}
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-static int irqtime_account_hi_update(void)
-{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
- unsigned long flags;
- u64 latest_ns;
- int ret = 0;
-
- local_irq_save(flags);
- latest_ns = this_cpu_read(cpu_hardirq_time);
- if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
- ret = 1;
- local_irq_restore(flags);
- return ret;
-}
-
-static int irqtime_account_si_update(void)
-{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
- unsigned long flags;
- u64 latest_ns;
- int ret = 0;
-
- local_irq_save(flags);
- latest_ns = this_cpu_read(cpu_softirq_time);
- if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
- ret = 1;
- local_irq_restore(flags);
- return ret;
-}
-
-#else /* CONFIG_IRQ_TIME_ACCOUNTING */
-
-#define sched_clock_irqtime (0)
-
-#endif
-
void sched_set_stop_task(int cpu, struct task_struct *stop)
{
struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
smp_send_reschedule(cpu);
}
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
-static int ttwu_activate_remote(struct task_struct *p, int wake_flags)
-{
- struct rq *rq;
- int ret = 0;
-
- rq = __task_rq_lock(p);
- if (p->on_cpu) {
- ttwu_activate(rq, p, ENQUEUE_WAKEUP);
- ttwu_do_wakeup(rq, p, wake_flags);
- ret = 1;
- }
- __task_rq_unlock(rq);
-
- return ret;
-
-}
-#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
-
bool cpus_share_cache(int this_cpu, int that_cpu)
{
return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
* If the owning (remote) cpu is still in the middle of schedule() with
* this task as prev, wait until its done referencing the task.
*/
- while (p->on_cpu) {
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- /*
- * In case the architecture enables interrupts in
- * context_switch(), we cannot busy wait, since that
- * would lead to deadlocks when an interrupt hits and
- * tries to wake up @prev. So bail and do a complete
- * remote wakeup.
- */
- if (ttwu_activate_remote(p, wake_flags))
- goto stat;
-#else
+ while (p->on_cpu)
cpu_relax();
-#endif
- }
/*
* Pairs with the smp_wmb() in finish_lock_switch().
*/
#ifdef CONFIG_PREEMPT_NOTIFIERS
INIT_HLIST_HEAD(&p->preempt_notifiers);
#endif
+
+#ifdef CONFIG_SCHED_NUMA
+ if (p->mm && atomic_read(&p->mm->mm_users) == 1) {
+ p->mm->numa_big = 0;
+ p->mm->numa_next_scan = jiffies;
+ p->mm->numa_migrate_success = 0;
+ p->mm->numa_migrate_failed = 0;
+ }
+
+ p->node = -1;
+ p->node_curr = -1;
+ p->node_last = -1;
+ p->node_stamp = 0ULL;
+ p->numa_runtime_stamp = 0;
+ p->numa_walltime_stamp = local_clock();
+#endif /* CONFIG_SCHED_NUMA */
}
/*
* Manfred Spraul <manfred@colorfullife.com>
*/
prev_state = prev->state;
+ vtime_task_switch(prev);
finish_arch_switch(prev);
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- local_irq_disable();
-#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
perf_event_task_sched_in(prev, current);
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
- local_irq_enable();
-#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
finish_lock_switch(rq, prev);
finish_arch_post_lock_switch();
#endif
/* Here we just switch the register state and the stack. */
+ rcu_switch(prev, next);
switch_to(prev, next, prev);
barrier();
return ns;
}
-#ifdef CONFIG_CGROUP_CPUACCT
-struct cgroup_subsys cpuacct_subsys;
-struct cpuacct root_cpuacct;
-#endif
-
-static inline void task_group_account_field(struct task_struct *p, int index,
- u64 tmp)
-{
-#ifdef CONFIG_CGROUP_CPUACCT
- struct kernel_cpustat *kcpustat;
- struct cpuacct *ca;
-#endif
- /*
- * Since all updates are sure to touch the root cgroup, we
- * get ourselves ahead and touch it first. If the root cgroup
- * is the only cgroup, then nothing else should be necessary.
- *
- */
- __get_cpu_var(kernel_cpustat).cpustat[index] += tmp;
-
-#ifdef CONFIG_CGROUP_CPUACCT
- if (unlikely(!cpuacct_subsys.active))
- return;
-
- rcu_read_lock();
- ca = task_ca(p);
- while (ca && (ca != &root_cpuacct)) {
- kcpustat = this_cpu_ptr(ca->cpustat);
- kcpustat->cpustat[index] += tmp;
- ca = parent_ca(ca);
- }
- rcu_read_unlock();
-#endif
-}
-
-
-/*
- * Account user cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @cputime: the cpu time spent in user space since the last update
- * @cputime_scaled: cputime scaled by cpu frequency
- */
-void account_user_time(struct task_struct *p, cputime_t cputime,
- cputime_t cputime_scaled)
-{
- int index;
-
- /* Add user time to process. */
- p->utime += cputime;
- p->utimescaled += cputime_scaled;
- account_group_user_time(p, cputime);
-
- index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
-
- /* Add user time to cpustat. */
- task_group_account_field(p, index, (__force u64) cputime);
-
- /* Account for user time used */
- acct_update_integrals(p);
-}
-
-/*
- * Account guest cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @cputime: the cpu time spent in virtual machine since the last update
- * @cputime_scaled: cputime scaled by cpu frequency
- */
-static void account_guest_time(struct task_struct *p, cputime_t cputime,
- cputime_t cputime_scaled)
-{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
-
- /* Add guest time to process. */
- p->utime += cputime;
- p->utimescaled += cputime_scaled;
- account_group_user_time(p, cputime);
- p->gtime += cputime;
-
- /* Add guest time to cpustat. */
- if (TASK_NICE(p) > 0) {
- cpustat[CPUTIME_NICE] += (__force u64) cputime;
- cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
- } else {
- cpustat[CPUTIME_USER] += (__force u64) cputime;
- cpustat[CPUTIME_GUEST] += (__force u64) cputime;
- }
-}
-
-/*
- * Account system cpu time to a process and desired cpustat field
- * @p: the process that the cpu time gets accounted to
- * @cputime: the cpu time spent in kernel space since the last update
- * @cputime_scaled: cputime scaled by cpu frequency
- * @target_cputime64: pointer to cpustat field that has to be updated
- */
-static inline
-void __account_system_time(struct task_struct *p, cputime_t cputime,
- cputime_t cputime_scaled, int index)
-{
- /* Add system time to process. */
- p->stime += cputime;
- p->stimescaled += cputime_scaled;
- account_group_system_time(p, cputime);
-
- /* Add system time to cpustat. */
- task_group_account_field(p, index, (__force u64) cputime);
-
- /* Account for system time used */
- acct_update_integrals(p);
-}
-
-/*
- * Account system cpu time to a process.
- * @p: the process that the cpu time gets accounted to
- * @hardirq_offset: the offset to subtract from hardirq_count()
- * @cputime: the cpu time spent in kernel space since the last update
- * @cputime_scaled: cputime scaled by cpu frequency
- */
-void account_system_time(struct task_struct *p, int hardirq_offset,
- cputime_t cputime, cputime_t cputime_scaled)
-{
- int index;
-
- if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
- account_guest_time(p, cputime, cputime_scaled);
- return;
- }
-
- if (hardirq_count() - hardirq_offset)
- index = CPUTIME_IRQ;
- else if (in_serving_softirq())
- index = CPUTIME_SOFTIRQ;
- else
- index = CPUTIME_SYSTEM;
-
- __account_system_time(p, cputime, cputime_scaled, index);
-}
-
-/*
- * Account for involuntary wait time.
- * @cputime: the cpu time spent in involuntary wait
- */
-void account_steal_time(cputime_t cputime)
-{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
-
- cpustat[CPUTIME_STEAL] += (__force u64) cputime;
-}
-
-/*
- * Account for idle time.
- * @cputime: the cpu time spent in idle wait
- */
-void account_idle_time(cputime_t cputime)
-{
- u64 *cpustat = kcpustat_this_cpu->cpustat;
- struct rq *rq = this_rq();
-
- if (atomic_read(&rq->nr_iowait) > 0)
- cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
- else
- cpustat[CPUTIME_IDLE] += (__force u64) cputime;
-}
-
-static __always_inline bool steal_account_process_tick(void)
-{
-#ifdef CONFIG_PARAVIRT
- if (static_key_false(¶virt_steal_enabled)) {
- u64 steal, st = 0;
-
- steal = paravirt_steal_clock(smp_processor_id());
- steal -= this_rq()->prev_steal_time;
-
- st = steal_ticks(steal);
- this_rq()->prev_steal_time += st * TICK_NSEC;
-
- account_steal_time(st);
- return st;
- }
-#endif
- return false;
-}
-
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-
-#ifdef CONFIG_IRQ_TIME_ACCOUNTING
-/*
- * Account a tick to a process and cpustat
- * @p: the process that the cpu time gets accounted to
- * @user_tick: is the tick from userspace
- * @rq: the pointer to rq
- *
- * Tick demultiplexing follows the order
- * - pending hardirq update
- * - pending softirq update
- * - user_time
- * - idle_time
- * - system time
- * - check for guest_time
- * - else account as system_time
- *
- * Check for hardirq is done both for system and user time as there is
- * no timer going off while we are on hardirq and hence we may never get an
- * opportunity to update it solely in system time.
- * p->stime and friends are only updated on system time and not on irq
- * softirq as those do not count in task exec_runtime any more.
- */
-static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
- struct rq *rq)
-{
- cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
- u64 *cpustat = kcpustat_this_cpu->cpustat;
-
- if (steal_account_process_tick())
- return;
-
- if (irqtime_account_hi_update()) {
- cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy;
- } else if (irqtime_account_si_update()) {
- cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy;
- } else if (this_cpu_ksoftirqd() == p) {
- /*
- * ksoftirqd time do not get accounted in cpu_softirq_time.
- * So, we have to handle it separately here.
- * Also, p->stime needs to be updated for ksoftirqd.
- */
- __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
- CPUTIME_SOFTIRQ);
- } else if (user_tick) {
- account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
- } else if (p == rq->idle) {
- account_idle_time(cputime_one_jiffy);
- } else if (p->flags & PF_VCPU) { /* System time or guest time */
- account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled);
- } else {
- __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled,
- CPUTIME_SYSTEM);
- }
-}
-
-static void irqtime_account_idle_ticks(int ticks)
-{
- int i;
- struct rq *rq = this_rq();
-
- for (i = 0; i < ticks; i++)
- irqtime_account_process_tick(current, 0, rq);
-}
-#else /* CONFIG_IRQ_TIME_ACCOUNTING */
-static void irqtime_account_idle_ticks(int ticks) {}
-static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
- struct rq *rq) {}
-#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
-
-/*
- * Account a single tick of cpu time.
- * @p: the process that the cpu time gets accounted to
- * @user_tick: indicates if the tick is a user or a system tick
- */
-void account_process_tick(struct task_struct *p, int user_tick)
-{
- cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
- struct rq *rq = this_rq();
-
- if (sched_clock_irqtime) {
- irqtime_account_process_tick(p, user_tick, rq);
- return;
- }
-
- if (steal_account_process_tick())
- return;
-
- if (user_tick)
- account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
- else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
- account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
- one_jiffy_scaled);
- else
- account_idle_time(cputime_one_jiffy);
-}
-
-/*
- * Account multiple ticks of steal time.
- * @p: the process from which the cpu time has been stolen
- * @ticks: number of stolen ticks
- */
-void account_steal_ticks(unsigned long ticks)
-{
- account_steal_time(jiffies_to_cputime(ticks));
-}
-
-/*
- * Account multiple ticks of idle time.
- * @ticks: number of stolen ticks
- */
-void account_idle_ticks(unsigned long ticks)
-{
-
- if (sched_clock_irqtime) {
- irqtime_account_idle_ticks(ticks);
- return;
- }
-
- account_idle_time(jiffies_to_cputime(ticks));
-}
-
-#endif
-
-/*
- * Use precise platform statistics if available:
- */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
-{
- *ut = p->utime;
- *st = p->stime;
-}
-
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
-{
- struct task_cputime cputime;
-
- thread_group_cputime(p, &cputime);
-
- *ut = cputime.utime;
- *st = cputime.stime;
-}
-#else
-
-#ifndef nsecs_to_cputime
-# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
-#endif
-
-static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total)
-{
- u64 temp = (__force u64) rtime;
-
- temp *= (__force u64) utime;
-
- if (sizeof(cputime_t) == 4)
- temp = div_u64(temp, (__force u32) total);
- else
- temp = div64_u64(temp, (__force u64) total);
-
- return (__force cputime_t) temp;
-}
-
-void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
-{
- cputime_t rtime, utime = p->utime, total = utime + p->stime;
-
- /*
- * Use CFS's precise accounting:
- */
- rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
-
- if (total)
- utime = scale_utime(utime, rtime, total);
- else
- utime = rtime;
-
- /*
- * Compare with previous values, to keep monotonicity:
- */
- p->prev_utime = max(p->prev_utime, utime);
- p->prev_stime = max(p->prev_stime, rtime - p->prev_utime);
-
- *ut = p->prev_utime;
- *st = p->prev_stime;
-}
-
-/*
- * Must be called with siglock held.
- */
-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
-{
- struct signal_struct *sig = p->signal;
- struct task_cputime cputime;
- cputime_t rtime, utime, total;
-
- thread_group_cputime(p, &cputime);
-
- total = cputime.utime + cputime.stime;
- rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
-
- if (total)
- utime = scale_utime(cputime.utime, rtime, total);
- else
- utime = rtime;
-
- sig->prev_utime = max(sig->prev_utime, utime);
- sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime);
-
- *ut = sig->prev_utime;
- *st = sig->prev_stime;
-}
-#endif
-
/*
* This function gets called by the timer code, with HZ frequency.
* We call it with interrupts disabled.
/*
* __schedule() is the main scheduler function.
+ *
+ * The main means of driving the scheduler and thus entering this function are:
+ *
+ * 1. Explicit blocking: mutex, semaphore, waitqueue, etc.
+ *
+ * 2. TIF_NEED_RESCHED flag is checked on interrupt and userspace return
+ * paths. For example, see arch/x86/entry_64.S.
+ *
+ * To drive preemption between tasks, the scheduler sets the flag in timer
+ * interrupt handler scheduler_tick().
+ *
+ * 3. Wakeups don't really cause entry into schedule(). They add a
+ * task to the run-queue and that's it.
+ *
+ * Now, if the new task added to the run-queue preempts the current
+ * task, then the wakeup sets TIF_NEED_RESCHED and schedule() gets
+ * called on the nearest possible occasion:
+ *
+ * - If the kernel is preemptible (CONFIG_PREEMPT=y):
+ *
+ * - in syscall or exception context, at the next outmost
+ * preempt_enable(). (this might be as soon as the wake_up()'s
+ * spin_unlock()!)
+ *
+ * - in IRQ context, return from interrupt-handler to
+ * preemptible context
+ *
+ * - If the kernel is not preemptible (CONFIG_PREEMPT is not set)
+ * then at the next:
+ *
+ * - cond_resched() call
+ * - explicit schedule() call
+ * - return from syscall or exception to user-space
+ * - return from interrupt-handler to user-space
*/
static void __sched __schedule(void)
{
}
EXPORT_SYMBOL(schedule);
+#ifdef CONFIG_RCU_USER_QS
+asmlinkage void __sched schedule_user(void)
+{
+ /*
+ * If we come here after a random call to set_need_resched(),
+ * or we have been woken up remotely but the IPI has not yet arrived,
+ * we haven't yet exited the RCU idle mode. Do it here manually until
+ * we find a better solution.
+ */
+ rcu_user_exit();
+ schedule();
+ rcu_user_enter();
+}
+#endif
+
/**
* schedule_preempt_disabled - called with preemption disabled
*
/* Catch callers which need to be fixed */
BUG_ON(ti->preempt_count || !irqs_disabled());
+ rcu_user_exit();
do {
add_preempt_count(PREEMPT_ACTIVE);
local_irq_enable();
*/
if (preempt && rq != p_rq)
resched_task(p_rq->curr);
- } else {
- /*
- * We might have set it in task_yield_fair(), but are
- * not going to schedule(), so don't want to skip
- * the next update.
- */
- rq->skip_clock_update = 0;
}
out:
*tablep = NULL;
}
+static int min_load_idx = 0;
+static int max_load_idx = CPU_LOAD_IDX_MAX;
+
static void
set_table_entry(struct ctl_table *entry,
const char *procname, void *data, int maxlen,
- umode_t mode, proc_handler *proc_handler)
+ umode_t mode, proc_handler *proc_handler,
+ bool load_idx)
{
entry->procname = procname;
entry->data = data;
entry->maxlen = maxlen;
entry->mode = mode;
entry->proc_handler = proc_handler;
+
+ if (load_idx) {
+ entry->extra1 = &min_load_idx;
+ entry->extra2 = &max_load_idx;
+ }
}
static struct ctl_table *
return NULL;
set_table_entry(&table[0], "min_interval", &sd->min_interval,
- sizeof(long), 0644, proc_doulongvec_minmax);
+ sizeof(long), 0644, proc_doulongvec_minmax, false);
set_table_entry(&table[1], "max_interval", &sd->max_interval,
- sizeof(long), 0644, proc_doulongvec_minmax);
+ sizeof(long), 0644, proc_doulongvec_minmax, false);
set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
- sizeof(int), 0644, proc_dointvec_minmax);
+ sizeof(int), 0644, proc_dointvec_minmax, true);
set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
- sizeof(int), 0644, proc_dointvec_minmax);
+ sizeof(int), 0644, proc_dointvec_minmax, true);
set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
- sizeof(int), 0644, proc_dointvec_minmax);
+ sizeof(int), 0644, proc_dointvec_minmax, true);
set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
- sizeof(int), 0644, proc_dointvec_minmax);
+ sizeof(int), 0644, proc_dointvec_minmax, true);
set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
- sizeof(int), 0644, proc_dointvec_minmax);
+ sizeof(int), 0644, proc_dointvec_minmax, true);
set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
- sizeof(int), 0644, proc_dointvec_minmax);
+ sizeof(int), 0644, proc_dointvec_minmax, false);
set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
- sizeof(int), 0644, proc_dointvec_minmax);
+ sizeof(int), 0644, proc_dointvec_minmax, false);
set_table_entry(&table[9], "cache_nice_tries",
&sd->cache_nice_tries,
- sizeof(int), 0644, proc_dointvec_minmax);
+ sizeof(int), 0644, proc_dointvec_minmax, false);
set_table_entry(&table[10], "flags", &sd->flags,
- sizeof(int), 0644, proc_dointvec_minmax);
+ sizeof(int), 0644, proc_dointvec_minmax, false);
set_table_entry(&table[11], "name", sd->name,
- CORENAME_MAX_SIZE, 0444, proc_dostring);
+ CORENAME_MAX_SIZE, 0444, proc_dostring, false);
/* &table[12] is terminator */
return table;
migrate_tasks(cpu);
BUG_ON(rq->nr_running != 1); /* the migration thread */
raw_spin_unlock_irqrestore(&rq->lock, flags);
+ break;
+ case CPU_DEAD:
calc_load_migrate(rq);
break;
#endif
DEFINE_PER_CPU(struct sched_domain *, sd_llc);
DEFINE_PER_CPU(int, sd_llc_id);
-static void update_top_cache_domain(int cpu)
+DEFINE_PER_CPU(struct sched_domain *, sd_node);
+
+static void update_domain_cache(int cpu)
{
struct sched_domain *sd;
int id = cpu;
rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
per_cpu(sd_llc_id, cpu) = id;
+
+ for_each_domain(cpu, sd) {
+ if (cpumask_equal(sched_domain_span(sd),
+ cpumask_of_node(cpu_to_node(cpu))))
+ goto got_node;
+ }
+ sd = NULL;
+got_node:
+ rcu_assign_pointer(per_cpu(sd_node, cpu), sd);
}
/*
rcu_assign_pointer(rq->sd, sd);
destroy_sched_domains(tmp, cpu);
- update_top_cache_domain(cpu);
+ update_domain_cache(cpu);
}
/* cpus with isolated domains */
static struct sched_domain_topology_level *sched_domain_topology = default_topology;
+#ifdef CONFIG_SCHED_NUMA
+
+/*
+ * Requeues a task ensuring its on the right load-balance list so
+ * that it might get migrated to its new home.
+ *
+ * Since home-node is pure preference there's no hard migrate to force
+ * us anywhere, this also allows us to call this from atomic context if
+ * required.
+ */
+void sched_setnode(struct task_struct *p, int node)
+{
+ unsigned long flags;
+ int on_rq, running;
+ struct rq *rq;
+
+ rq = task_rq_lock(p, &flags);
+ on_rq = p->on_rq;
+ running = task_current(rq, p);
+
+ if (on_rq)
+ dequeue_task(rq, p, 0);
+ if (running)
+ p->sched_class->put_prev_task(rq, p);
+
+ p->node = p->node_curr = p->node_last = node;
+
+ if (running)
+ p->sched_class->set_curr_task(rq);
+ if (on_rq)
+ enqueue_task(rq, p, 0);
+ task_rq_unlock(rq, p, &flags);
+}
+
+#endif /* CONFIG_SCHED_NUMA */
+
#ifdef CONFIG_NUMA
static int sched_domains_numa_levels;
| 0*SD_BALANCE_FORK
| 0*SD_BALANCE_WAKE
| 0*SD_WAKE_AFFINE
- | 0*SD_PREFER_LOCAL
| 0*SD_SHARE_CPUPOWER
| 0*SD_SHARE_PKG_RESOURCES
| 1*SD_SERIALIZE
| 0*SD_PREFER_SIBLING
+ | 1*SD_NUMA
| sd_local_flags(level)
,
.last_balance = jiffies,
* numbers.
*/
+ /*
+ * Here, we should temporarily reset sched_domains_numa_levels to 0.
+ * If it fails to allocate memory for array sched_domains_numa_masks[][],
+ * the array will contain less then 'level' members. This could be
+ * dangerous when we use it to iterate array sched_domains_numa_masks[][]
+ * in other functions.
+ *
+ * We reset it to 'level' at the end of this function.
+ */
+ sched_domains_numa_levels = 0;
+
sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL);
if (!sched_domains_numa_masks)
return;
}
sched_domain_topology = tl;
+
+ sched_domains_numa_levels = level;
+}
+
+static void sched_domains_numa_masks_set(int cpu)
+{
+ int i, j;
+ int node = cpu_to_node(cpu);
+
+ for (i = 0; i < sched_domains_numa_levels; i++) {
+ for (j = 0; j < nr_node_ids; j++) {
+ if (node_distance(j, node) <= sched_domains_numa_distance[i])
+ cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
+ }
+ }
+}
+
+static void sched_domains_numa_masks_clear(int cpu)
+{
+ int i, j;
+ for (i = 0; i < sched_domains_numa_levels; i++) {
+ for (j = 0; j < nr_node_ids; j++)
+ cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
+ }
+}
+
+/*
+ * Update sched_domains_numa_masks[level][node] array when new cpus
+ * are onlined.
+ */
+static int sched_domains_numa_masks_update(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+{
+ int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ sched_domains_numa_masks_set(cpu);
+ break;
+
+ case CPU_DEAD:
+ sched_domains_numa_masks_clear(cpu);
+ break;
+
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
}
#else
static inline void sched_init_numa(void)
{
}
+
+static int sched_domains_numa_masks_update(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+{
+ return 0;
+}
#endif /* CONFIG_NUMA */
static int __sdt_alloc(const struct cpumask *cpu_map)
mutex_unlock(&sched_domains_mutex);
put_online_cpus();
+ hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
rq->avg_idle = 2*sysctl_sched_migration_cost;
INIT_LIST_HEAD(&rq->cfs_tasks);
+#ifdef CONFIG_SCHED_NUMA
+ INIT_LIST_HEAD(&rq->offnode_tasks);
+ rq->offnode_running = 0;
+ rq->offnode_weight = 0;
+#endif
rq_attach_root(rq, &def_root_domain);
#ifdef CONFIG_NO_HZ
* (balbir@in.ibm.com).
*/
+struct cpuacct root_cpuacct;
+
/* create a new cpu accounting group */
static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp)
{