depends on TRACING_SUPPORT
select TRACING
select RING_BUFFER
+ select RING_BUFFER_ALLOW_SWAP
help
OProfile is a profiling system capable of profiling the
whole system, include the kernel, kernel modules, libraries,
config HAVE_DEFAULT_NO_SPIN_MUTEXES
bool
+config HAVE_HW_BREAKPOINT
+ bool
+
+
source "kernel/gcov/Kconfig"
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_IDE
select HAVE_OPROFILE
- select HAVE_PERF_COUNTERS if (!M386 && !M486)
+ select HAVE_PERF_EVENTS if (!M386 && !M486)
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
+ select HAVE_HW_BREAKPOINT
select HAVE_ARCH_KMEMCHECK
config OUTPUT_FORMAT
config HAVE_LATENCYTOP_SUPPORT
def_bool y
- config FAST_CMPXCHG_LOCAL
- bool
- default y
-
config MMU
def_bool y
config HAVE_SETUP_PER_CPU_AREA
def_bool y
- config HAVE_DYNAMIC_PER_CPU_AREA
+ config NEED_PER_CPU_EMBED_FIRST_CHUNK
+ def_bool y
+
+ config NEED_PER_CPU_PAGE_FIRST_CHUNK
def_bool y
config HAVE_CPUMASK_OF_CPU_MAP
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
def_bool y
+ config HAVE_INTEL_TXT
+ def_bool y
+ depends on EXPERIMENTAL && DMAR && ACPI
+
# Use the generic interrupt handling code in kernel/irq/:
config GENERIC_HARDIRQS
bool
SGI 320/540 (Visual Workstation)
Summit/EXA (IBM x440)
Unisys ES7000 IA32 series
+ Moorestown MID devices
If you have one of these systems, or if you want to build a
generic distribution kernel, say Y here - otherwise say N.
If unsure, choose "PC-compatible" instead.
+ config X86_MRST
+ bool "Moorestown MID platform"
+ depends on X86_32
+ depends on X86_EXTENDED_PLATFORM
+ ---help---
+ Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
+ Internet Device(MID) platform. Moorestown consists of two chips:
+ Lincroft (CPU core, graphics, and memory controller) and Langwell IOH.
+ Unlike standard x86 PCs, Moorestown does not have many legacy devices
+ nor standard legacy replacement devices/features. e.g. Moorestown does
+ not contain i8259, i8254, HPET, legacy BIOS, most of the io ports.
+
config X86_RDC321X
bool "RDC R-321x SoC"
depends on X86_32
of Flat Logical. You will need a new lynxer.elf file to flash your
firmware with - send email to <Martin.Bligh@us.ibm.com>.
+ config X86_SUPPORTS_MEMORY_FAILURE
+ bool
+ # MCE code calls memory_failure():
+ depends on X86_MCE
+ # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags:
+ depends on !X86_NUMAQ
+ # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH:
+ depends on X86_64 || !SPARSEMEM
+ select ARCH_SUPPORTS_MEMORY_FAILURE
+ default y
+
config X86_VISWS
bool "SGI 320/540 (Visual Workstation)"
depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT
source "arch/x86/xen/Kconfig"
config VMI
- bool "VMI Guest support"
+ bool "VMI Guest support (DEPRECATED)"
select PARAVIRT
depends on X86_32
---help---
at the moment), by linking the kernel to a GPL-ed ROM module
provided by the hypervisor.
+ As of September 2009, VMware has started a phased retirement
+ of this feature from VMware's products. Please see
+ feature-removal-schedule.txt for details. If you are
+ planning to enable this option, please note that you cannot
+ live migrate a VMI enabled VM to a future VMware product,
+ which doesn't support VMI. So if you expect your kernel to
+ seamlessly migrate to newer VMware products, keep this
+ disabled.
+
config KVM_CLOCK
bool "KVM paravirtualized clock"
select PARAVIRT
increased on these systems.
config X86_MCE
- bool "Machine Check Exception"
+ bool "Machine Check / overheating reporting"
---help---
- Machine Check Exception support allows the processor to notify the
- kernel if it detects a problem (e.g. overheating, component failure).
+ Machine Check support allows the processor to notify the
+ kernel if it detects a problem (e.g. overheating, data corruption).
The action the kernel takes depends on the severity of the problem,
- ranging from a warning message on the console, to halting the machine.
- Your processor must be a Pentium or newer to support this - check the
- flags in /proc/cpuinfo for mce. Note that some older Pentium systems
- have a design flaw which leads to false MCE events - hence MCE is
- disabled on all P5 processors, unless explicitly enabled with "mce"
- as a boot argument. Similarly, if MCE is built in and creates a
- problem on some new non-standard machine, you can boot with "nomce"
- to disable it. MCE support simply ignores non-MCE processors like
- the 386 and 486, so nearly everyone can say Y here.
-
- config X86_OLD_MCE
- depends on X86_32 && X86_MCE
- bool "Use legacy machine check code (will go away)"
- default n
- select X86_ANCIENT_MCE
- ---help---
- Use the old i386 machine check code. This is merely intended for
- testing in a transition period. Try this if you run into any machine
- check related software problems, but report the problem to
- linux-kernel. When in doubt say no.
-
- config X86_NEW_MCE
- depends on X86_MCE
- bool
- default y if (!X86_OLD_MCE && X86_32) || X86_64
+ ranging from warning messages to halting the machine.
config X86_MCE_INTEL
def_bool y
prompt "Intel MCE features"
- depends on X86_NEW_MCE && X86_LOCAL_APIC
+ depends on X86_MCE && X86_LOCAL_APIC
---help---
Additional support for intel specific MCE features such as
the thermal monitor.
config X86_MCE_AMD
def_bool y
prompt "AMD MCE features"
- depends on X86_NEW_MCE && X86_LOCAL_APIC
+ depends on X86_MCE && X86_LOCAL_APIC
---help---
Additional support for AMD specific MCE features such as
the DRAM Error Threshold.
config X86_ANCIENT_MCE
def_bool n
- depends on X86_32
+ depends on X86_32 && X86_MCE
prompt "Support for old Pentium 5 / WinChip machine checks"
---help---
Include support for machine check handling on old Pentium 5 or WinChip
default y
config X86_MCE_INJECT
- depends on X86_NEW_MCE
+ depends on X86_MCE
tristate "Machine check injector support"
---help---
Provide support for injecting machine checks for testing purposes.
If you don't know what a machine check is and you don't do kernel
QA it is safe to say n.
- config X86_MCE_NONFATAL
- tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
- depends on X86_OLD_MCE
- ---help---
- Enabling this feature starts a timer that triggers every 5 seconds which
- will look at the machine check registers to see if anything happened.
- Non-fatal problems automatically get corrected (but still logged).
- Disable this if you don't want to see these messages.
- Seeing the messages this option prints out may be indicative of dying
- or out-of-spec (ie, overclocked) hardware.
- This option only does something on certain CPUs.
- (AMD Athlon/Duron and Intel Pentium 4)
-
- config X86_MCE_P4THERMAL
- bool "check for P4 thermal throttling interrupt."
- depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP)
- ---help---
- Enabling this feature will cause a message to be printed when the P4
- enters thermal throttling.
-
config X86_THERMAL_VECTOR
def_bool y
- depends on X86_MCE_P4THERMAL || X86_MCE_INTEL
+ depends on X86_MCE_INTEL
config VM86
bool "Enable VM86 support" if EMBEDDED
def_bool y
depends on NUMA && X86_32
+ config ARCH_PROC_KCORE_TEXT
+ def_bool y
+ depends on X86_64 && PROC_KCORE
+
config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on X86_64
If unsure, say Y.
+ config ARCH_USES_PG_UNCACHED
+ def_bool y
+ depends on X86_PAT
+
config EFI
bool "EFI runtime service support"
depends on ACPI
source "drivers/acpi/Kconfig"
+ source "drivers/sfi/Kconfig"
+
config X86_APM_BOOT
bool
default y
config PCI_MMCONFIG
def_bool y
- depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
+ depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY)
config PCI_OLPC
def_bool y
config DMAR_BROKEN_GFX_WA
def_bool n
prompt "Workaround broken graphics drivers (going away soon)"
- depends on DMAR
+ depends on DMAR && BROKEN
---help---
Current Graphics drivers tend to use physical address
for DMA and avoid using DMA APIs. Setting this config
#include <linux/cpumask.h>
#include <linux/cache.h>
#include <linux/threads.h>
+ #include <linux/math64.h>
#include <linux/init.h>
+#define HBP_NUM 4
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
#endif
unsigned long gs;
/* Hardware debugging registers: */
- unsigned long debugreg0;
- unsigned long debugreg1;
- unsigned long debugreg2;
- unsigned long debugreg3;
+ unsigned long debugreg[HBP_NUM];
unsigned long debugreg6;
unsigned long debugreg7;
+ /* Hardware breakpoint info */
+ struct hw_breakpoint *hbp[HBP_NUM];
/* Fault info: */
unsigned long cr2;
unsigned long trap_no;
extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val);
+ extern int amd_get_nb_id(int cpu);
+
+ struct aperfmperf {
+ u64 aperf, mperf;
+ };
+
+ static inline void get_aperfmperf(struct aperfmperf *am)
+ {
+ WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF));
+
+ rdmsrl(MSR_IA32_APERF, am->aperf);
+ rdmsrl(MSR_IA32_MPERF, am->mperf);
+ }
+
+ #define APERFMPERF_SHIFT 10
+
+ static inline
+ unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,
+ struct aperfmperf *new)
+ {
+ u64 aperf = new->aperf - old->aperf;
+ u64 mperf = new->mperf - old->mperf;
+ unsigned long ratio = aperf;
+
+ mperf >>= APERFMPERF_SHIFT;
+ if (mperf)
+ ratio = div64_u64(aperf, mperf);
+
+ return ratio;
+ }
+
#endif /* _ASM_X86_PROCESSOR_H */
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
- obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
- obj-y += setup.o i8259.o irqinit.o
+ obj-y += time.o ioport.o ldt.o dumpstack.o
+ obj-y += setup.o x86_init.o i8259.o irqinit.o
obj-$(CONFIG_X86_VISWS) += visws_quirks.o
obj-$(CONFIG_X86_32) += probe_roms_32.o
obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
-obj-y += alternative.o i8253.o pci-nommu.o
+obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
obj-y += tsc.o io_delay.o rtc.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_IA32_EMULATION) += tls.o
obj-y += step.o
+ obj-$(CONFIG_INTEL_TXT) += tboot.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
obj-y += acpi/
+ obj-$(CONFIG_SFI) += sfi.o
obj-y += reboot.o
obj-$(CONFIG_MCA) += mca_32.o
obj-$(CONFIG_X86_MSR) += msr.o
scx200-y += scx200_32.o
obj-$(CONFIG_OLPC) += olpc.o
+ obj-$(CONFIG_X86_MRST) += mrst.o
microcode-y := microcode_core.o
microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
- #include <trace/power.h>
+ #include <trace/events/power.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
+#include <asm/hw_breakpoint.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
struct kmem_cache *task_xstate_cachep;
- DEFINE_TRACE(power_start);
- DEFINE_TRACE(power_end);
-
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
*dst = *src;
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
WARN(tsk->thread.ds_ctx, "leaking DS context\n");
}
clear_tsk_thread_flag(tsk, TIF_DEBUG);
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
+ if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
+ flush_thread_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
void default_idle(void)
{
if (hlt_use_halt()) {
- struct power_trace it;
-
- trace_power_start(&it, POWER_CSTATE, 1);
+ trace_power_start(POWER_CSTATE, 1);
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
else
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
- trace_power_end(&it);
} else {
local_irq_enable();
/* loop is done by the caller */
*/
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
- struct power_trace it;
-
- trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
+ trace_power_start(POWER_CSTATE, (ax>>4)+1);
if (!need_resched()) {
if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)¤t_thread_info()->flags);
if (!need_resched())
__mwait(ax, cx);
}
- trace_power_end(&it);
}
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
- struct power_trace it;
if (!need_resched()) {
- trace_power_start(&it, POWER_CSTATE, 1);
+ trace_power_start(POWER_CSTATE, 1);
if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)¤t_thread_info()->flags);
__sti_mwait(0, 0);
else
local_irq_enable();
- trace_power_end(&it);
} else
local_irq_enable();
}
*/
static void poll_idle(void)
{
- struct power_trace it;
-
- trace_power_start(&it, POWER_CSTATE, 0);
+ trace_power_start(POWER_CSTATE, 0);
local_irq_enable();
while (!need_resched())
cpu_relax();
- trace_power_end(&it);
+ trace_power_end(0);
}
/*
void __init init_c1e_mask(void)
{
/* If we're using c1e_idle, we need to allocate c1e_mask. */
- if (pm_idle == c1e_idle) {
- alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
- cpumask_clear(c1e_mask);
- }
+ if (pm_idle == c1e_idle)
+ zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
}
static int __init idle_setup(char *str)
#include <asm/prctl.h>
#include <asm/proto.h>
#include <asm/ds.h>
+#include <asm/hw_breakpoint.h>
#include "tls.h"
return 0;
}
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
- return TASK_SIZE - 3;
-}
-
#else /* CONFIG_X86_64 */
#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
return 0;
}
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
-#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(task, TIF_IA32))
- return IA32_PAGE_OFFSET - 3;
-#endif
- return TASK_SIZE_MAX - 7;
-}
-
#endif /* CONFIG_X86_32 */
static unsigned long get_flags(struct task_struct *task)
return set_flags(child, value);
#ifdef CONFIG_X86_64
- /*
- * Orig_ax is really just a flag with small positive and
- * negative values, so make sure to always sign-extend it
- * from 32 bits so that it works correctly regardless of
- * whether we come from a 32-bit environment or not.
- */
- case offsetof(struct user_regs_struct, orig_ax):
- value = (long) (s32) value;
- break;
-
case offsetof(struct user_regs_struct,fs_base):
if (value >= TASK_SIZE_OF(child))
return -EIO;
}
/*
- * This function is trivial and will be inlined by the compiler.
- * Having it separates the implementation details of debug
- * registers from the interface details of ptrace.
+ * Decode the length and type bits for a particular breakpoint as
+ * stored in debug register 7. Return the "enabled" status.
*/
-static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
+static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
+ unsigned *type)
{
- switch (n) {
- case 0: return child->thread.debugreg0;
- case 1: return child->thread.debugreg1;
- case 2: return child->thread.debugreg2;
- case 3: return child->thread.debugreg3;
- case 6: return child->thread.debugreg6;
- case 7: return child->thread.debugreg7;
- }
- return 0;
+ int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
+
+ *len = (bp_info & 0xc) | 0x40;
+ *type = (bp_info & 0x3) | 0x80;
+ return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
}
-static int ptrace_set_debugreg(struct task_struct *child,
- int n, unsigned long data)
+static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
{
+ struct thread_struct *thread = &(current->thread);
int i;
- if (unlikely(n == 4 || n == 5))
- return -EIO;
+ /*
+ * Store in the virtual DR6 register the fact that the breakpoint
+ * was hit so the thread's debugger will see it.
+ */
+ for (i = 0; i < hbp_kernel_pos; i++)
+ /*
+ * We will check bp->info.address against the address stored in
+ * thread's hbp structure and not debugreg[i]. This is to ensure
+ * that the corresponding bit for 'i' in DR7 register is enabled
+ */
+ if (bp->info.address == thread->hbp[i]->info.address)
+ break;
- if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
- return -EIO;
+ thread->debugreg6 |= (DR_TRAP0 << i);
+}
- switch (n) {
- case 0: child->thread.debugreg0 = data; break;
- case 1: child->thread.debugreg1 = data; break;
- case 2: child->thread.debugreg2 = data; break;
- case 3: child->thread.debugreg3 = data; break;
+/*
+ * Handle ptrace writes to debug register 7.
+ */
+static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ unsigned long old_dr7 = thread->debugreg7;
+ int i, orig_ret = 0, rc = 0;
+ int enabled, second_pass = 0;
+ unsigned len, type;
+ struct hw_breakpoint *bp;
+
+ data &= ~DR_CONTROL_RESERVED;
+restore:
+ /*
+ * Loop through all the hardware breakpoints, making the
+ * appropriate changes to each.
+ */
+ for (i = 0; i < HBP_NUM; i++) {
+ enabled = decode_dr7(data, i, &len, &type);
+ bp = thread->hbp[i];
+
+ if (!enabled) {
+ if (bp) {
+ /* Don't unregister the breakpoints right-away,
+ * unless all register_user_hw_breakpoint()
+ * requests have succeeded. This prevents
+ * any window of opportunity for debug
+ * register grabbing by other users.
+ */
+ if (!second_pass)
+ continue;
+ unregister_user_hw_breakpoint(tsk, bp);
+ kfree(bp);
+ }
+ continue;
+ }
+ if (!bp) {
+ rc = -ENOMEM;
+ bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
+ if (bp) {
+ bp->info.address = thread->debugreg[i];
+ bp->triggered = ptrace_triggered;
+ bp->info.len = len;
+ bp->info.type = type;
+ rc = register_user_hw_breakpoint(tsk, bp);
+ if (rc)
+ kfree(bp);
+ }
+ } else
+ rc = modify_user_hw_breakpoint(tsk, bp);
+ if (rc)
+ break;
+ }
+ /*
+ * Make a second pass to free the remaining unused breakpoints
+ * or to restore the original breakpoints if an error occurred.
+ */
+ if (!second_pass) {
+ second_pass = 1;
+ if (rc < 0) {
+ orig_ret = rc;
+ data = old_dr7;
+ }
+ goto restore;
+ }
+ return ((orig_ret < 0) ? orig_ret : rc);
+}
- case 6:
- if ((data & ~0xffffffffUL) != 0)
- return -EIO;
- child->thread.debugreg6 = data;
- break;
+/*
+ * Handle PTRACE_PEEKUSR calls for the debug register area.
+ */
+static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ unsigned long val = 0;
+
+ if (n < HBP_NUM)
+ val = thread->debugreg[n];
+ else if (n == 6)
+ val = thread->debugreg6;
+ else if (n == 7)
+ val = thread->debugreg7;
+ return val;
+}
- case 7:
- /*
- * Sanity-check data. Take one half-byte at once with
- * check = (val >> (16 + 4*i)) & 0xf. It contains the
- * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
- * 2 and 3 are LENi. Given a list of invalid values,
- * we do mask |= 1 << invalid_value, so that
- * (mask >> check) & 1 is a correct test for invalid
- * values.
- *
- * R/Wi contains the type of the breakpoint /
- * watchpoint, LENi contains the length of the watched
- * data in the watchpoint case.
- *
- * The invalid values are:
- * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
- * - R/Wi == 0x10 (break on I/O reads or writes), so
- * mask |= 0x4444.
- * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
- * 0x1110.
- *
- * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
- *
- * See the Intel Manual "System Programming Guide",
- * 15.2.4
- *
- * Note that LENi == 0x10 is defined on x86_64 in long
- * mode (i.e. even for 32-bit userspace software, but
- * 64-bit kernel), so the x86_64 mask value is 0x5454.
- * See the AMD manual no. 24593 (AMD64 System Programming)
- */
-#ifdef CONFIG_X86_32
-#define DR7_MASK 0x5f54
-#else
-#define DR7_MASK 0x5554
-#endif
- data &= ~DR_CONTROL_RESERVED;
- for (i = 0; i < 4; i++)
- if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
- return -EIO;
- child->thread.debugreg7 = data;
- if (data)
- set_tsk_thread_flag(child, TIF_DEBUG);
- else
- clear_tsk_thread_flag(child, TIF_DEBUG);
- break;
+/*
+ * Handle PTRACE_POKEUSR calls for the debug register area.
+ */
+int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ int rc = 0;
+
+ /* There are no DR4 or DR5 registers */
+ if (n == 4 || n == 5)
+ return -EIO;
+
+ if (n == 6) {
+ tsk->thread.debugreg6 = val;
+ goto ret_path;
}
+ if (n < HBP_NUM) {
+ if (thread->hbp[n]) {
+ if (arch_check_va_in_userspace(val,
+ thread->hbp[n]->info.len) == 0) {
+ rc = -EIO;
+ goto ret_path;
+ }
+ thread->hbp[n]->info.address = val;
+ }
+ thread->debugreg[n] = val;
+ }
+ /* All that's left is DR7 */
+ if (n == 7)
+ rc = ptrace_write_dr7(tsk, val);
- return 0;
+ret_path:
+ return rc;
}
/*
case offsetof(struct user32, regs.orig_eax):
/*
- * Sign-extend the value so that orig_eax = -1
- * causes (long)orig_ax < 0 tests to fire correctly.
+ * A 32-bit debugger setting orig_eax means to restore
+ * the state of the task restarting a 32-bit syscall.
+ * Make sure we interpret the -ERESTART* codes correctly
+ * in case the task is not actually still sitting at the
+ * exit from a 32-bit syscall with TS_COMPAT still set.
*/
- regs->orig_ax = (long) (s32) value;
+ regs->orig_ax = value;
+ if (syscall_get_nr(child, regs) >= 0)
+ task_thread_info(child)->status |= TS_COMPAT;
break;
case offsetof(struct user32, regs.eflags):
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
- /*
- * Re-enable any watchpoints before delivering the
- * signal to user space. The processor register will
- * have been cleared if the watchpoint triggered
- * inside the kernel.
- */
- if (current->thread.debugreg7)
- set_debugreg(current->thread.debugreg7, 7);
-
/* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
/*
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
- #ifdef CONFIG_X86_NEW_MCE
+ #ifdef CONFIG_X86_MCE
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
mce_notify_process();
#include <linux/bootmem.h>
#include <linux/err.h>
#include <linux/nmi.h>
+ #include <linux/tboot.h>
#include <asm/acpi.h>
#include <asm/desc.h>
#include <asm/apic.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
+#include <asm/debugreg.h>
#include <linux/mc146818rtc.h>
#include <asm/smpboot_hooks.h>
/* enable local interrupts */
local_irq_enable();
- setup_secondary_clock();
+ x86_cpuinit.setup_percpu_clockev();
wmb();
+ load_debug_registers();
cpu_idle();
}
#endif
current_thread_info()->cpu = 0; /* needed? */
for_each_possible_cpu(i) {
- alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
- alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
- cpumask_clear(per_cpu(cpu_core_map, i));
- cpumask_clear(per_cpu(cpu_sibling_map, i));
- cpumask_clear(cpu_data(i).llc_shared_map);
+ zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
}
set_cpu_sibling_map(0);
printk(KERN_INFO "CPU%d: ", 0);
print_cpu_info(&cpu_data(0));
- setup_boot_clock();
+ x86_init.timers.setup_percpu_clockev();
if (is_uv_system())
uv_system_init();
+
+ set_mtrr_aps_delayed_init();
out:
preempt_enable();
}
+
+ void arch_enable_nonboot_cpus_begin(void)
+ {
+ set_mtrr_aps_delayed_init();
+ }
+
+ void arch_enable_nonboot_cpus_end(void)
+ {
+ mtrr_aps_init();
+ }
+
/*
* Early setup to make printk work.
*/
setup_ioapic_dest();
#endif
check_nmi_watchdog();
+ mtrr_aps_init();
}
static int __initdata setup_possible_cpus = -1;
remove_cpu_from_maps(cpu);
unlock_vector_lock();
fixup_irqs();
+ hw_breakpoint_disable();
}
int native_cpu_disable(void)
void native_play_dead(void)
{
play_dead_common();
+ tboot_shutdown(TB_SHUTDOWN_WFS);
wbinvd_halt();
}
#include <linux/spinlock.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
- #include <linux/utsname.h>
#include <linux/kdebug.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/mach_traps.h>
#ifdef CONFIG_X86_64
+ #include <asm/x86_init.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
#else
#include <asm/processor-flags.h>
#include <asm/setup.h>
- #include <asm/traps.h>
asmlinkage int system_call(void);
/*
* The IDT has to be page-aligned to simplify the Pentium
- * F0 0F bug workaround.. We have a special link segment
- * for this.
+ * F0 0F bug workaround.
*/
- gate_desc idt_table[NR_VECTORS]
- __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
+ gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
#endif
DECLARE_BITMAP(used_vectors, NR_VECTORS);
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
- unsigned long condition;
+ unsigned long dr6;
int si_code;
- get_debugreg(condition, 6);
+ get_debugreg(dr6, 6);
/* Catch kmemcheck conditions first of all! */
- if (condition & DR_STEP && kmemcheck_trap(regs))
+ if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
return;
+ /* DR6 may or may not be cleared by the CPU */
+ set_debugreg(0, 6);
/*
* The processor cleared BTF, so don't mark that we need it set.
*/
clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
tsk->thread.debugctlmsr = 0;
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
- SIGTRAP) == NOTIFY_STOP)
+ /* Store the virtualized DR6 value */
+ tsk->thread.debugreg6 = dr6;
+
+ if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
+ SIGTRAP) == NOTIFY_STOP)
return;
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
- /* Mask out spurious debug traps due to lazy DR7 setting */
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->thread.debugreg7)
- goto clear_dr7;
+ if (regs->flags & X86_VM_MASK) {
+ handle_vm86_trap((struct kernel_vm86_regs *) regs,
+ error_code, 1);
+ return;
}
-#ifdef CONFIG_X86_32
- if (regs->flags & X86_VM_MASK)
- goto debug_vm86;
-#endif
-
- /* Save debug status register where ptrace can see it */
- tsk->thread.debugreg6 = condition;
-
/*
- * Single-stepping through TF: make sure we ignore any events in
- * kernel space (but re-enable TF when returning to user mode).
+ * Single-stepping through system calls: ignore any exceptions in
+ * kernel space, but re-enable TF when returning to user mode.
+ *
+ * We already checked v86 mode above, so we can check for kernel mode
+ * by just checking the CPL of CS.
*/
- if (condition & DR_STEP) {
- if (!user_mode(regs))
- goto clear_TF_reenable;
+ if ((dr6 & DR_STEP) && !user_mode(regs)) {
+ tsk->thread.debugreg6 &= ~DR_STEP;
+ set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+ regs->flags &= ~X86_EFLAGS_TF;
}
-
- si_code = get_si_code(condition);
- /* Ok, finally something we can handle */
- send_sigtrap(tsk, regs, error_code, si_code);
-
- /*
- * Disable additional traps. They'll be re-enabled when
- * the signal is delivered.
- */
-clear_dr7:
- set_debugreg(0, 7);
+ si_code = get_si_code(tsk->thread.debugreg6);
+ if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
+ send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
- return;
-#ifdef CONFIG_X86_32
-debug_vm86:
- /* reenable preemption: handle_vm86_trap() might sleep */
- dec_preempt_count();
- handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
- conditional_cli(regs);
- return;
-#endif
-
-clear_TF_reenable:
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->flags &= ~X86_EFLAGS_TF;
- preempt_conditional_cli(regs);
return;
}
*/
cpu_init();
- #ifdef CONFIG_X86_32
- x86_quirk_trap_init();
- #endif
+ x86_init.irqs.trap_init();
}
if (cpuid->nent < 1)
goto out;
+ if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
+ cpuid->nent = KVM_MAX_CPUID_ENTRIES;
r = -ENOMEM;
cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
if (!cpuid_entries)
kvm_x86_ops->run(vcpu, kvm_run);
if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
- set_debugreg(current->thread.debugreg0, 0);
- set_debugreg(current->thread.debugreg1, 1);
- set_debugreg(current->thread.debugreg2, 2);
- set_debugreg(current->thread.debugreg3, 3);
+ set_debugreg(current->thread.debugreg[0], 0);
+ set_debugreg(current->thread.debugreg[1], 1);
+ set_debugreg(current->thread.debugreg[2], 2);
+ set_debugreg(current->thread.debugreg[3], 3);
set_debugreg(current->thread.debugreg6, 6);
set_debugreg(current->thread.debugreg7, 7);
}
#include <asm/mce.h>
#include <asm/xcr.h>
#include <asm/suspend.h>
+#include <asm/debugreg.h>
#ifdef CONFIG_X86_32
static struct saved_context saved_context;
ctxt->cr4 = read_cr4();
ctxt->cr8 = read_cr8();
#endif
+ hw_breakpoint_disable();
}
/* Needed by apm.c */
/*
* Now maybe reload the debug registers
*/
- if (current->thread.debugreg7) {
-#ifdef CONFIG_X86_32
- set_debugreg(current->thread.debugreg0, 0);
- set_debugreg(current->thread.debugreg1, 1);
- set_debugreg(current->thread.debugreg2, 2);
- set_debugreg(current->thread.debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(current->thread.debugreg6, 6);
- set_debugreg(current->thread.debugreg7, 7);
-#else
- /* CONFIG_X86_64 */
- loaddebug(¤t->thread, 0);
- loaddebug(¤t->thread, 1);
- loaddebug(¤t->thread, 2);
- loaddebug(¤t->thread, 3);
- /* no 4 and 5 */
- loaddebug(¤t->thread, 6);
- loaddebug(¤t->thread, 7);
-#endif
- }
-
+ load_debug_registers();
}
/**
fix_processor_context();
do_fpu_end();
- mtrr_ap_init();
-
- #ifdef CONFIG_X86_OLD_MCE
- mcheck_init(&boot_cpu_data);
- #endif
+ mtrr_bp_restore();
}
/* Needed by apm.c */
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup.o
- obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
- obj-$(CONFIG_MARKERS) += marker.o
obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
- obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/
obj-$(CONFIG_RING_BUFFER) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
- obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
+ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
config HAVE_FTRACE_NMI_ENTER
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_FUNCTION_TRACER
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_FUNCTION_GRAPH_TRACER
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_FUNCTION_GRAPH_FP_TEST
bool
config HAVE_FUNCTION_TRACE_MCOUNT_TEST
bool
help
- This gets selected when the arch tests the function_trace_stop
- variable at the mcount call site. Otherwise, this variable
- is tested by the called function.
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_DYNAMIC_FTRACE
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_FTRACE_MCOUNT_RECORD
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config HAVE_HW_BRANCH_TRACER
bool
config HAVE_SYSCALL_TRACEPOINTS
bool
+ help
+ See Documentation/trace/ftrace-implementation.txt
config TRACER_MAX_TRACE
bool
# This allows those options to appear when no other tracer is selected. But the
# options do not appear when something else selects it. We need the two options
# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
- # hidding of the automatic options options.
+ # hidding of the automatic options.
config TRACING
bool
power management decisions, specifically the C-state and P-state
behavior.
+config KSYM_TRACER
+ bool "Trace read and write access on kernel memory locations"
+ depends on HAVE_HW_BREAKPOINT
+ select TRACING
+ help
+ This tracer helps find read and write operations on any given kernel
+ symbol i.e. /proc/kallsyms.
+
+config PROFILE_KSYM_TRACER
+ bool "Profile all kernel memory accesses on 'watched' variables"
+ depends on KSYM_TRACER
+ help
+ This tracer profiles kernel accesses on variables watched through the
+ ksym tracer ftrace plugin. Depending upon the hardware, all read
+ and write operations on kernel variables can be monitored for
+ accesses.
+
+ The results will be displayed in:
+ /debugfs/tracing/profile_ksym
+
+ Say N if unsure.
config STACK_TRACER
bool "Trace max stack"
functioning properly. It will do tests on all the configured
tracers of ftrace.
+ config EVENT_TRACE_TEST_SYSCALLS
+ bool "Run selftest on syscall events"
+ depends on FTRACE_STARTUP_TEST
+ help
+ This option will also enable testing every syscall event.
+ It only enables the event and disables it and runs various loads
+ with the event enabled. This adds a bit more time for kernel boot
+ up since it runs this on every system call defined.
+
+ TBD - enable a way to actually call the syscalls as we test their
+ events
+
config MMIOTRACE
bool "Memory mapped IO tracing"
depends on HAVE_MMIOTRACE_SUPPORT && PCI
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
- obj-$(CONFIG_POWER_TRACER) += trace_power.o
obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
+obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
+ obj-$(CONFIG_EVENT_TRACING) += power-traces.o
libftrace-y := ftrace.o
#include <linux/clocksource.h>
#include <linux/ring_buffer.h>
#include <linux/mmiotrace.h>
+ #include <linux/tracepoint.h>
#include <linux/ftrace.h>
#include <trace/boot.h>
#include <linux/kmemtrace.h>
- #include <trace/power.h>
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
+#ifdef CONFIG_KSYM_TRACER
+#include <asm/hw_breakpoint.h>
+#endif
+
enum trace_type {
__TRACE_FIRST_TYPE = 0,
TRACE_HW_BRANCHES,
TRACE_KMEM_ALLOC,
TRACE_KMEM_FREE,
- TRACE_POWER,
TRACE_BLK,
+ TRACE_KSYM,
__TRACE_LAST_TYPE,
};
- /*
- * Function trace entry - function address and parent function addres:
- */
- struct ftrace_entry {
- struct trace_entry ent;
- unsigned long ip;
- unsigned long parent_ip;
- };
-
- /* Function call entry */
- struct ftrace_graph_ent_entry {
- struct trace_entry ent;
- struct ftrace_graph_ent graph_ent;
+ enum kmemtrace_type_id {
+ KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
+ KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
+ KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
};
- /* Function return entry */
- struct ftrace_graph_ret_entry {
- struct trace_entry ent;
- struct ftrace_graph_ret ret;
- };
extern struct tracer boot_tracer;
- /*
- * Context switch trace entry - which task (and prio) we switched from/to:
- */
- struct ctx_switch_entry {
- struct trace_entry ent;
- unsigned int prev_pid;
- unsigned char prev_prio;
- unsigned char prev_state;
- unsigned int next_pid;
- unsigned char next_prio;
- unsigned char next_state;
- unsigned int next_cpu;
- };
-
- /*
- * Special (free-form) trace entry:
- */
- struct special_entry {
- struct trace_entry ent;
- unsigned long arg1;
- unsigned long arg2;
- unsigned long arg3;
- };
-
- /*
- * Stack-trace entry:
- */
-
- #define FTRACE_STACK_ENTRIES 8
-
- struct stack_entry {
- struct trace_entry ent;
- unsigned long caller[FTRACE_STACK_ENTRIES];
- };
-
- struct userstack_entry {
- struct trace_entry ent;
- unsigned long caller[FTRACE_STACK_ENTRIES];
- };
-
- /*
- * trace_printk entry:
- */
- struct bprint_entry {
- struct trace_entry ent;
- unsigned long ip;
- const char *fmt;
- u32 buf[];
- };
-
- struct print_entry {
- struct trace_entry ent;
- unsigned long ip;
- char buf[];
- };
-
- #define TRACE_OLD_SIZE 88
-
- struct trace_field_cont {
- unsigned char type;
- /* Temporary till we get rid of this completely */
- char buf[TRACE_OLD_SIZE - 1];
- };
+ #undef __field
+ #define __field(type, item) type item;
- struct trace_mmiotrace_rw {
- struct trace_entry ent;
- struct mmiotrace_rw rw;
- };
+ #undef __field_struct
+ #define __field_struct(type, item) __field(type, item)
- struct trace_mmiotrace_map {
- struct trace_entry ent;
- struct mmiotrace_map map;
- };
+ #undef __field_desc
+ #define __field_desc(type, container, item)
- struct trace_boot_call {
- struct trace_entry ent;
- struct boot_trace_call boot_call;
- };
+ #undef __array
+ #define __array(type, item, size) type item[size];
- struct trace_boot_ret {
- struct trace_entry ent;
- struct boot_trace_ret boot_ret;
- };
+ #undef __array_desc
+ #define __array_desc(type, container, item, size)
- #define TRACE_FUNC_SIZE 30
- #define TRACE_FILE_SIZE 20
- struct trace_branch {
- struct trace_entry ent;
- unsigned line;
- char func[TRACE_FUNC_SIZE+1];
- char file[TRACE_FILE_SIZE+1];
- char correct;
- };
+ #undef __dynamic_array
+ #define __dynamic_array(type, item) type item[];
- struct hw_branch_entry {
- struct trace_entry ent;
- u64 from;
- u64 to;
- };
+ #undef F_STRUCT
+ #define F_STRUCT(args...) args
- struct trace_power {
- struct trace_entry ent;
- struct power_trace state_data;
- };
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
+ struct struct_name { \
+ struct trace_entry ent; \
+ tstruct \
+ }
- enum kmemtrace_type_id {
- KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
- KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
- KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
- };
+ #undef TP_ARGS
+ #define TP_ARGS(args...) args
- struct kmemtrace_alloc_entry {
- struct trace_entry ent;
- enum kmemtrace_type_id type_id;
- unsigned long call_site;
- const void *ptr;
- size_t bytes_req;
- size_t bytes_alloc;
- gfp_t gfp_flags;
- int node;
- };
+ #undef FTRACE_ENTRY_DUP
+ #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk)
- struct kmemtrace_free_entry {
- struct trace_entry ent;
- enum kmemtrace_type_id type_id;
- unsigned long call_site;
- const void *ptr;
- };
+ #include "trace_entries.h"
+ /*
+ * syscalls are special, and need special handling, this is why
+ * they are not included in trace_entries.h
+ */
struct syscall_trace_enter {
struct trace_entry ent;
int nr;
unsigned long ret;
};
- #define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
- extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
-
- struct ksym_trace_entry {
- struct trace_entry ent;
- unsigned long ip;
- unsigned char type;
- char ksym_name[KSYM_NAME_LEN];
- char cmd[TASK_COMM_LEN];
- };
-
/*
* trace_flag_type is an enumeration that holds different
* states when a trace occurs. These are:
* IRQS_OFF - interrupts were disabled
* IRQS_NOSUPPORT - arch does not support irqs_disabled_flags
- * NEED_RESCED - reschedule is requested
+ * NEED_RESCHED - reschedule is requested
* HARDIRQ - inside an interrupt handler
* SOFTIRQ - inside a softirq handler
*/
IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
TRACE_GRAPH_RET); \
IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
- IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
TRACE_KMEM_FREE); \
+ IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
__ftrace_bad_type(); \
} while (0)
struct tracer *next;
int print_max;
struct tracer_flags *flags;
- struct tracer_stat *stats;
};
void tracing_start_sched_switch_record(void);
int register_tracer(struct tracer *type);
void unregister_tracer(struct tracer *type);
+ int is_tracing_stopped(void);
+
++#define KSYM_SELFTEST_ENTRY "ksym_selftest_dummy"
++extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
+
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
#ifdef CONFIG_TRACER_MAX_TRACE
extern cycle_t ftrace_now(int cpu);
- #ifdef CONFIG_CONTEXT_SWITCH_TRACER
- typedef void
- (*tracer_switch_func_t)(void *private,
- void *__rq,
- struct task_struct *prev,
- struct task_struct *next);
-
- struct tracer_switch_ops {
- tracer_switch_func_t func;
- void *private;
- struct tracer_switch_ops *next;
- };
- #endif /* CONFIG_CONTEXT_SWITCH_TRACER */
-
extern void trace_find_cmdline(int pid, char comm[]);
#ifdef CONFIG_DYNAMIC_FTRACE
struct trace_array *tr);
extern int trace_selftest_startup_hw_branches(struct tracer *trace,
struct trace_array *tr);
+extern int trace_selftest_startup_ksym(struct tracer *trace,
+ struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
return 0;
}
#else
- static inline int ftrace_trace_addr(unsigned long addr)
- {
- return 1;
- }
static inline int ftrace_graph_addr(unsigned long addr)
{
return 1;
}
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
- extern struct pid *ftrace_pid_trace;
+ extern struct list_head ftrace_pids;
#ifdef CONFIG_FUNCTION_TRACER
static inline int ftrace_trace_task(struct task_struct *task)
{
- if (!ftrace_pid_trace)
+ if (list_empty(&ftrace_pids))
return 1;
return test_tsk_trace_trace(task);
}
#endif
+ /*
+ * struct trace_parser - servers for reading the user input separated by spaces
+ * @cont: set if the input is not complete - no final space char was found
+ * @buffer: holds the parsed user input
+ * @idx: user input lenght
+ * @size: buffer size
+ */
+ struct trace_parser {
+ bool cont;
+ char *buffer;
+ unsigned idx;
+ unsigned size;
+ };
+
+ static inline bool trace_parser_loaded(struct trace_parser *parser)
+ {
+ return (parser->idx != 0);
+ }
+
+ static inline bool trace_parser_cont(struct trace_parser *parser)
+ {
+ return parser->cont;
+ }
+
+ static inline void trace_parser_clear(struct trace_parser *parser)
+ {
+ parser->cont = false;
+ parser->idx = 0;
+ }
+
+ extern int trace_parser_get_init(struct trace_parser *parser, int size);
+ extern void trace_parser_put(struct trace_parser *parser);
+ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
+ size_t cnt, loff_t *ppos);
+
/*
* trace_iterator_flags is an enumeration that defines bit
* positions into trace_flags that controls the output.
int n_preds;
struct filter_pred **preds;
char *filter_string;
- bool no_reset;
};
struct event_subsystem {
};
struct filter_pred;
+ struct regex;
typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
int val1, int val2);
- struct filter_pred {
- filter_pred_fn_t fn;
- u64 val;
- char str_val[MAX_FILTER_STR_VAL];
- int str_len;
- char *field_name;
- int offset;
- int not;
- int op;
- int pop_n;
+ typedef int (*regex_match_func)(char *str, struct regex *r, int len);
+
+ enum regex_type {
+ MATCH_FULL = 0,
+ MATCH_FRONT_ONLY,
+ MATCH_MIDDLE_ONLY,
+ MATCH_END_ONLY,
+ };
+
+ struct regex {
+ char pattern[MAX_FILTER_STR_VAL];
+ int len;
+ int field_len;
+ regex_match_func match;
};
+ struct filter_pred {
+ filter_pred_fn_t fn;
+ u64 val;
+ struct regex regex;
+ char *field_name;
+ int offset;
+ int not;
+ int op;
+ int pop_n;
+ };
+
+ extern enum regex_type
+ filter_parse_regex(char *buff, int len, char **search, int *not);
extern void print_event_filter(struct ftrace_event_call *call,
struct trace_seq *s);
extern int apply_event_filter(struct ftrace_event_call *call,
struct ring_buffer *buffer,
struct ring_buffer_event *event)
{
- if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
+ if (unlikely(call->filter_active) &&
+ !filter_match_preds(call->filter, rec)) {
ring_buffer_discard_commit(buffer, event);
return 1;
}
return 0;
}
- #define DEFINE_COMPARISON_PRED(type) \
- static int filter_pred_##type(struct filter_pred *pred, void *event, \
- int val1, int val2) \
- { \
- type *addr = (type *)(event + pred->offset); \
- type val = (type)pred->val; \
- int match = 0; \
- \
- switch (pred->op) { \
- case OP_LT: \
- match = (*addr < val); \
- break; \
- case OP_LE: \
- match = (*addr <= val); \
- break; \
- case OP_GT: \
- match = (*addr > val); \
- break; \
- case OP_GE: \
- match = (*addr >= val); \
- break; \
- default: \
- break; \
- } \
- \
- return match; \
- }
-
- #define DEFINE_EQUALITY_PRED(size) \
- static int filter_pred_##size(struct filter_pred *pred, void *event, \
- int val1, int val2) \
- { \
- u##size *addr = (u##size *)(event + pred->offset); \
- u##size val = (u##size)pred->val; \
- int match; \
- \
- match = (val == *addr) ^ pred->not; \
- \
- return match; \
- }
-
extern struct mutex event_mutex;
extern struct list_head ftrace_events;
extern const char *__start___trace_bprintk_fmt[];
extern const char *__stop___trace_bprintk_fmt[];
- #undef TRACE_EVENT_FORMAT
- #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \
+ #undef FTRACE_ENTRY
+ #define FTRACE_ENTRY(call, struct_name, id, tstruct, print) \
extern struct ftrace_event_call event_##call;
- #undef TRACE_EVENT_FORMAT_NOFILTER
- #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt)
- #include "trace_event_types.h"
+ #undef FTRACE_ENTRY_DUP
+ #define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print) \
+ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
+ #include "trace_entries.h"
#endif /* _LINUX_KERNEL_TRACE_H */
--- /dev/null
+ /*
+ * This file defines the trace event structures that go into the ring
+ * buffer directly. They are created via macros so that changes for them
+ * appear in the format file. Using macros will automate this process.
+ *
+ * The macro used to create a ftrace data structure is:
+ *
+ * FTRACE_ENTRY( name, struct_name, id, structure, print )
+ *
+ * @name: the name used the event name, as well as the name of
+ * the directory that holds the format file.
+ *
+ * @struct_name: the name of the structure that is created.
+ *
+ * @id: The event identifier that is used to detect what event
+ * this is from the ring buffer.
+ *
+ * @structure: the structure layout
+ *
+ * - __field( type, item )
+ * This is equivalent to declaring
+ * type item;
+ * in the structure.
+ * - __array( type, item, size )
+ * This is equivalent to declaring
+ * type item[size];
+ * in the structure.
+ *
+ * * for structures within structures, the format of the internal
+ * structure is layed out. This allows the internal structure
+ * to be deciphered for the format file. Although these macros
+ * may become out of sync with the internal structure, they
+ * will create a compile error if it happens. Since the
+ * internel structures are just tracing helpers, this is not
+ * an issue.
+ *
+ * When an internal structure is used, it should use:
+ *
+ * __field_struct( type, item )
+ *
+ * instead of __field. This will prevent it from being shown in
+ * the output file. The fields in the structure should use.
+ *
+ * __field_desc( type, container, item )
+ * __array_desc( type, container, item, len )
+ *
+ * type, item and len are the same as __field and __array, but
+ * container is added. This is the name of the item in
+ * __field_struct that this is describing.
+ *
+ *
+ * @print: the print format shown to users in the format file.
+ */
+
+ /*
+ * Function trace entry - function address and parent function addres:
+ */
+ FTRACE_ENTRY(function, ftrace_entry,
+
+ TRACE_FN,
+
+ F_STRUCT(
+ __field( unsigned long, ip )
+ __field( unsigned long, parent_ip )
+ ),
+
+ F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip)
+ );
+
+ /* Function call entry */
+ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
+
+ TRACE_GRAPH_ENT,
+
+ F_STRUCT(
+ __field_struct( struct ftrace_graph_ent, graph_ent )
+ __field_desc( unsigned long, graph_ent, func )
+ __field_desc( int, graph_ent, depth )
+ ),
+
+ F_printk("--> %lx (%d)", __entry->func, __entry->depth)
+ );
+
+ /* Function return entry */
+ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
+
+ TRACE_GRAPH_RET,
+
+ F_STRUCT(
+ __field_struct( struct ftrace_graph_ret, ret )
+ __field_desc( unsigned long, ret, func )
+ __field_desc( unsigned long long, ret, calltime)
+ __field_desc( unsigned long long, ret, rettime )
+ __field_desc( unsigned long, ret, overrun )
+ __field_desc( int, ret, depth )
+ ),
+
+ F_printk("<-- %lx (%d) (start: %llx end: %llx) over: %d",
+ __entry->func, __entry->depth,
+ __entry->calltime, __entry->rettime,
+ __entry->depth)
+ );
+
+ /*
+ * Context switch trace entry - which task (and prio) we switched from/to:
+ *
+ * This is used for both wakeup and context switches. We only want
+ * to create one structure, but we need two outputs for it.
+ */
+ #define FTRACE_CTX_FIELDS \
+ __field( unsigned int, prev_pid ) \
+ __field( unsigned char, prev_prio ) \
+ __field( unsigned char, prev_state ) \
+ __field( unsigned int, next_pid ) \
+ __field( unsigned char, next_prio ) \
+ __field( unsigned char, next_state ) \
+ __field( unsigned int, next_cpu )
+
+ FTRACE_ENTRY(context_switch, ctx_switch_entry,
+
+ TRACE_CTX,
+
+ F_STRUCT(
+ FTRACE_CTX_FIELDS
+ ),
+
+ F_printk("%u:%u:%u ==> %u:%u:%u [%03u]",
+ __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
+ __entry->next_pid, __entry->next_prio, __entry->next_state,
+ __entry->next_cpu
+ )
+ );
+
+ /*
+ * FTRACE_ENTRY_DUP only creates the format file, it will not
+ * create another structure.
+ */
+ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
+
+ TRACE_WAKE,
+
+ F_STRUCT(
+ FTRACE_CTX_FIELDS
+ ),
+
+ F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]",
+ __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
+ __entry->next_pid, __entry->next_prio, __entry->next_state,
+ __entry->next_cpu
+ )
+ );
+
+ /*
+ * Special (free-form) trace entry:
+ */
+ FTRACE_ENTRY(special, special_entry,
+
+ TRACE_SPECIAL,
+
+ F_STRUCT(
+ __field( unsigned long, arg1 )
+ __field( unsigned long, arg2 )
+ __field( unsigned long, arg3 )
+ ),
+
+ F_printk("(%08lx) (%08lx) (%08lx)",
+ __entry->arg1, __entry->arg2, __entry->arg3)
+ );
+
+ /*
+ * Stack-trace entry:
+ */
+
+ #define FTRACE_STACK_ENTRIES 8
+
+ FTRACE_ENTRY(kernel_stack, stack_entry,
+
+ TRACE_STACK,
+
+ F_STRUCT(
+ __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
+ ),
+
+ F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
+ "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
+ __entry->caller[0], __entry->caller[1], __entry->caller[2],
+ __entry->caller[3], __entry->caller[4], __entry->caller[5],
+ __entry->caller[6], __entry->caller[7])
+ );
+
+ FTRACE_ENTRY(user_stack, userstack_entry,
+
+ TRACE_USER_STACK,
+
+ F_STRUCT(
+ __field( unsigned int, tgid )
+ __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
+ ),
+
+ F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
+ "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
+ __entry->caller[0], __entry->caller[1], __entry->caller[2],
+ __entry->caller[3], __entry->caller[4], __entry->caller[5],
+ __entry->caller[6], __entry->caller[7])
+ );
+
+ /*
+ * trace_printk entry:
+ */
+ FTRACE_ENTRY(bprint, bprint_entry,
+
+ TRACE_BPRINT,
+
+ F_STRUCT(
+ __field( unsigned long, ip )
+ __field( const char *, fmt )
+ __dynamic_array( u32, buf )
+ ),
+
+ F_printk("%08lx fmt:%p",
+ __entry->ip, __entry->fmt)
+ );
+
+ FTRACE_ENTRY(print, print_entry,
+
+ TRACE_PRINT,
+
+ F_STRUCT(
+ __field( unsigned long, ip )
+ __dynamic_array( char, buf )
+ ),
+
+ F_printk("%08lx %s",
+ __entry->ip, __entry->buf)
+ );
+
+ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
+
+ TRACE_MMIO_RW,
+
+ F_STRUCT(
+ __field_struct( struct mmiotrace_rw, rw )
+ __field_desc( resource_size_t, rw, phys )
+ __field_desc( unsigned long, rw, value )
+ __field_desc( unsigned long, rw, pc )
+ __field_desc( int, rw, map_id )
+ __field_desc( unsigned char, rw, opcode )
+ __field_desc( unsigned char, rw, width )
+ ),
+
+ F_printk("%lx %lx %lx %d %x %x",
+ (unsigned long)__entry->phys, __entry->value, __entry->pc,
+ __entry->map_id, __entry->opcode, __entry->width)
+ );
+
+ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
+
+ TRACE_MMIO_MAP,
+
+ F_STRUCT(
+ __field_struct( struct mmiotrace_map, map )
+ __field_desc( resource_size_t, map, phys )
+ __field_desc( unsigned long, map, virt )
+ __field_desc( unsigned long, map, len )
+ __field_desc( int, map, map_id )
+ __field_desc( unsigned char, map, opcode )
+ ),
+
+ F_printk("%lx %lx %lx %d %x",
+ (unsigned long)__entry->phys, __entry->virt, __entry->len,
+ __entry->map_id, __entry->opcode)
+ );
+
+ FTRACE_ENTRY(boot_call, trace_boot_call,
+
+ TRACE_BOOT_CALL,
+
+ F_STRUCT(
+ __field_struct( struct boot_trace_call, boot_call )
+ __field_desc( pid_t, boot_call, caller )
+ __array_desc( char, boot_call, func, KSYM_SYMBOL_LEN)
+ ),
+
+ F_printk("%d %s", __entry->caller, __entry->func)
+ );
+
+ FTRACE_ENTRY(boot_ret, trace_boot_ret,
+
+ TRACE_BOOT_RET,
+
+ F_STRUCT(
+ __field_struct( struct boot_trace_ret, boot_ret )
+ __array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN)
+ __field_desc( int, boot_ret, result )
+ __field_desc( unsigned long, boot_ret, duration )
+ ),
+
+ F_printk("%s %d %lx",
+ __entry->func, __entry->result, __entry->duration)
+ );
+
+ #define TRACE_FUNC_SIZE 30
+ #define TRACE_FILE_SIZE 20
+
+ FTRACE_ENTRY(branch, trace_branch,
+
+ TRACE_BRANCH,
+
+ F_STRUCT(
+ __field( unsigned int, line )
+ __array( char, func, TRACE_FUNC_SIZE+1 )
+ __array( char, file, TRACE_FILE_SIZE+1 )
+ __field( char, correct )
+ ),
+
+ F_printk("%u:%s:%s (%u)",
+ __entry->line,
+ __entry->func, __entry->file, __entry->correct)
+ );
+
+ FTRACE_ENTRY(hw_branch, hw_branch_entry,
+
+ TRACE_HW_BRANCHES,
+
+ F_STRUCT(
+ __field( u64, from )
+ __field( u64, to )
+ ),
+
+ F_printk("from: %llx to: %llx", __entry->from, __entry->to)
+ );
+
+ FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
+
+ TRACE_KMEM_ALLOC,
+
+ F_STRUCT(
+ __field( enum kmemtrace_type_id, type_id )
+ __field( unsigned long, call_site )
+ __field( const void *, ptr )
+ __field( size_t, bytes_req )
+ __field( size_t, bytes_alloc )
+ __field( gfp_t, gfp_flags )
+ __field( int, node )
+ ),
+
+ F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
+ " flags:%x node:%d",
+ __entry->type_id, __entry->call_site, __entry->ptr,
+ __entry->bytes_req, __entry->bytes_alloc,
+ __entry->gfp_flags, __entry->node)
+ );
+
+ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
+
+ TRACE_KMEM_FREE,
+
+ F_STRUCT(
+ __field( enum kmemtrace_type_id, type_id )
+ __field( unsigned long, call_site )
+ __field( const void *, ptr )
+ ),
+
+ F_printk("type:%u call_site:%lx ptr:%p",
+ __entry->type_id, __entry->call_site, __entry->ptr)
+ );
++
++FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
++
++ TRACE_KSYM,
++
++ F_STRUCT(
++ __field( unsigned long, ip )
++ __field( unsigned char, type )
++ __array( char , ksym_name, KSYM_NAME_LEN )
++ __array( char , cmd, TASK_COMM_LEN )
++ ),
++
++ F_printk("ip: %pF type: %d ksym_name: %s cmd: %s",
++ (void *)__entry->ip, (unsigned int)__entry->type,
++ __entry->ksym_name, __entry->cmd)
++);
if SAMPLES
- config SAMPLE_MARKERS
- tristate "Build markers examples -- loadable modules only"
- depends on MARKERS && m
- help
- This build markers example modules.
-
config SAMPLE_TRACEPOINTS
tristate "Build tracepoints examples -- loadable modules only"
depends on TRACEPOINTS && m
default m
depends on SAMPLE_KPROBES && KRETPROBES
+config SAMPLE_HW_BREAKPOINT
+ tristate "Build kernel hardware breakpoint examples -- loadable module only"
+ depends on HAVE_HW_BREAKPOINT && m
+ help
+ This builds kernel hardware breakpoint example modules.
+
endif # SAMPLES
# Makefile for Linux samples code
- obj-$(CONFIG_SAMPLES) += markers/ kobject/ kprobes/ tracepoints/ \
- trace_events/ hw_breakpoint/
-obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/
++obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \
++ hw_breakpoint/