behaviour to be specified. Bit 0 enables warnings,
bit 1 enables fixups, and bit 2 sends a segfault.
+ align_va_addr= [X86-64]
+ Align virtual addresses by clearing slice [14:12] when
+ allocating a VMA at process creation time. This option
+ gives you up to 3% performance improvement on AMD F15h
+ machines (where it is enabled by default) for a
+ CPU-intensive style benchmark, and it can vary highly in
+ a microbenchmark depending on workload and compiler.
+
+ 1: only for 32-bit processes
+ 2: only for 64-bit processes
+ on: enable for both 32- and 64-bit processes
+ off: disable for both 32- and 64-bit processes
+
amd_iommu= [HW,X86-84]
Pass parameters to the AMD IOMMU driver in the system.
Possible values are:
noresidual [PPC] Don't use residual data on PReP machines.
+ nordrand [X86] Disable the direct use of the RDRAND
+ instruction even if it is supported by the
+ processor. RDRAND is still available to user
+ space applications.
+
noresume [SWSUSP] Disables resume and restores original swap
space.
def_bool y
depends on X86_PAT
+config ARCH_RANDOM
+ def_bool y
+ prompt "x86 architectural random number generator" if EXPERT
+ ---help---
+ Enable the x86 architectural RDRAND instruction
+ (Intel Bull Mountain technology) to generate random numbers.
+ If supported, this is a high bandwidth, cryptographically
+ secure hardware random number generator.
+
config EFI
bool "EFI runtime service support"
depends on ACPI
--- /dev/null
+/*
+ * This file is part of the Linux kernel.
+ *
+ * Copyright (c) 2011, Intel Corporation
+ * Authors: Fenghua Yu <fenghua.yu@intel.com>,
+ * H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef ASM_X86_ARCHRANDOM_H
+#define ASM_X86_ARCHRANDOM_H
+
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
+#include <asm/nops.h>
+
+#define RDRAND_RETRY_LOOPS 10
+
+#define RDRAND_INT ".byte 0x0f,0xc7,0xf0"
+#ifdef CONFIG_X86_64
+# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0"
+#else
+# define RDRAND_LONG RDRAND_INT
+#endif
+
+#ifdef CONFIG_ARCH_RANDOM
+
+#define GET_RANDOM(name, type, rdrand, nop) \
+static inline int name(type *v) \
+{ \
+ int ok; \
+ alternative_io("movl $0, %0\n\t" \
+ nop, \
+ "\n1: " rdrand "\n\t" \
+ "jc 2f\n\t" \
+ "decl %0\n\t" \
+ "jnz 1b\n\t" \
+ "2:", \
+ X86_FEATURE_RDRAND, \
+ ASM_OUTPUT2("=r" (ok), "=a" (*v)), \
+ "0" (RDRAND_RETRY_LOOPS)); \
+ return ok; \
+}
+
+#ifdef CONFIG_X86_64
+
+GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5);
+GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4);
+
+#else
+
+GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3);
+GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
+
+#endif /* CONFIG_X86_64 */
+
+#endif /* CONFIG_ARCH_RANDOM */
+
+extern void x86_init_rdrand(struct cpuinfo_x86 *c);
+
+#endif /* ASM_X86_ARCHRANDOM_H */
#endif
+#define xadd(ptr, inc) \
+ do { \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ asm volatile (LOCK_PREFIX "xaddb %b0, %1\n" \
+ : "+r" (inc), "+m" (*(ptr)) \
+ : : "memory", "cc"); \
+ break; \
+ case 2: \
+ asm volatile (LOCK_PREFIX "xaddw %w0, %1\n" \
+ : "+r" (inc), "+m" (*(ptr)) \
+ : : "memory", "cc"); \
+ break; \
+ case 4: \
+ asm volatile (LOCK_PREFIX "xaddl %0, %1\n" \
+ : "+r" (inc), "+m" (*(ptr)) \
+ : : "memory", "cc"); \
+ break; \
+ } \
+ } while(0)
+
#define cmpxchg8b(ptr, o1, o2, n1, n2) \
({ \
char __ret; \
cmpxchg_local((ptr), (o), (n)); \
})
+#define xadd(ptr, inc) \
+ do { \
+ switch (sizeof(*(ptr))) { \
+ case 1: \
+ asm volatile (LOCK_PREFIX "xaddb %b0, %1\n" \
+ : "+r" (inc), "+m" (*(ptr)) \
+ : : "memory", "cc"); \
+ break; \
+ case 2: \
+ asm volatile (LOCK_PREFIX "xaddw %w0, %1\n" \
+ : "+r" (inc), "+m" (*(ptr)) \
+ : : "memory", "cc"); \
+ break; \
+ case 4: \
+ asm volatile (LOCK_PREFIX "xaddl %0, %1\n" \
+ : "+r" (inc), "+m" (*(ptr)) \
+ : : "memory", "cc"); \
+ break; \
+ case 8: \
+ asm volatile (LOCK_PREFIX "xaddq %q0, %1\n" \
+ : "+r" (inc), "+m" (*(ptr)) \
+ : : "memory", "cc"); \
+ break; \
+ } \
+ } while(0)
+
#define cmpxchg16b(ptr, o1, o2, n1, n2) \
({ \
char __ret; \
desc->base2 = (info->base_addr & 0xff000000) >> 24;
/*
- * Don't allow setting of the lm bit. It is useless anyway
- * because 64bit system calls require __USER_CS:
+ * Don't allow setting of the lm bit. It would confuse
+ * user_64bit_mode and would get overridden by sysret anyway.
*/
desc->l = 0;
}
/*
* ELF register definitions..
*/
+#include <linux/thread_info.h>
#include <asm/ptrace.h>
#include <asm/user.h>
extern unsigned long arch_randomize_brk(struct mm_struct *mm);
#define arch_randomize_brk arch_randomize_brk
+/*
+ * True on X86_32 or when emulating IA32 on X86_64
+ */
+static inline int mmap_is_ia32(void)
+{
+#ifdef CONFIG_X86_32
+ return 1;
+#endif
+#ifdef CONFIG_IA32_EMULATION
+ if (test_thread_flag(TIF_IA32))
+ return 1;
+#endif
+ return 0;
+}
+
+/* The first two values are special, do not change. See align_addr() */
+enum align_flags {
+ ALIGN_VA_32 = BIT(0),
+ ALIGN_VA_64 = BIT(1),
+ ALIGN_VDSO = BIT(2),
+ ALIGN_TOPDOWN = BIT(3),
+};
+
+struct va_alignment {
+ int flags;
+ unsigned long mask;
+} ____cacheline_aligned;
+
+extern struct va_alignment va_align;
+extern unsigned long align_addr(unsigned long, struct file *, enum align_flags);
#endif /* _ASM_X86_ELF_H */
#include <asm/desc_defs.h>
#include <asm/kmap_types.h>
+#include <asm/pgtable_types.h>
struct page;
struct thread_struct;
struct pv_info {
unsigned int kernel_rpl;
int shared_kernel_pmd;
+
+#ifdef CONFIG_X86_64
+ u16 extra_user_64bit_cs; /* __USER_CS if none */
+#endif
+
int paravirt_enabled;
const char *name;
};
/* Install a pte for a particular vaddr in kernel space. */
void set_pte_vaddr(unsigned long vaddr, pte_t pte);
-extern void native_pagetable_reserve(u64 start, u64 end);
#ifdef CONFIG_X86_32
extern void native_pagetable_setup_start(pgd_t *base);
extern void native_pagetable_setup_done(pgd_t *base);
#ifdef __KERNEL__
#include <linux/init.h>
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt_types.h>
+#endif
struct cpuinfo_x86;
struct task_struct;
#endif
}
+#ifdef CONFIG_X86_64
+static inline bool user_64bit_mode(struct pt_regs *regs)
+{
+#ifndef CONFIG_PARAVIRT
+ /*
+ * On non-paravirt systems, this is the only long mode CPL 3
+ * selector. We do not allow long mode selectors in the LDT.
+ */
+ return regs->cs == __USER_CS;
+#else
+ /* Headers are too twisted for this to go in paravirt.h. */
+ return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
+#endif
+}
+#endif
+
/*
* X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
* when it traps. The previous stack will be directly underneath the saved
* On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock
* (PPro errata 66, 92)
*/
-# define UNLOCK_LOCK_PREFIX LOCK_PREFIX
+static __always_inline void __ticket_unlock_release(struct arch_spinlock *lock)
+{
+ if (sizeof(lock->tickets.head) == sizeof(u8))
+ asm volatile(LOCK_PREFIX "incb %0"
+ : "+m" (lock->tickets.head) : : "memory");
+ else
+ asm volatile(LOCK_PREFIX "incw %0"
+ : "+m" (lock->tickets.head) : : "memory");
+
+}
#else
-# define UNLOCK_LOCK_PREFIX
+static __always_inline void __ticket_unlock_release(struct arch_spinlock *lock)
+{
+ lock->tickets.head++;
+}
#endif
/*
* save some instructions and make the code more elegant. There really isn't
* much between them in performance though, especially as locks are out of line.
*/
-#if (NR_CPUS < 256)
-#define TICKET_SHIFT 8
-
-static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
+static __always_inline struct __raw_tickets __ticket_spin_claim(struct arch_spinlock *lock)
{
- short inc = 0x0100;
-
- asm volatile (
- LOCK_PREFIX "xaddw %w0, %1\n"
- "1:\t"
- "cmpb %h0, %b0\n\t"
- "je 2f\n\t"
- "rep ; nop\n\t"
- "movb %1, %b0\n\t"
- /* don't need lfence here, because loads are in-order */
- "jmp 1b\n"
- "2:"
- : "+Q" (inc), "+m" (lock->slock)
- :
- : "memory", "cc");
-}
+ register struct __raw_tickets tickets = { .tail = 1 };
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
-{
- int tmp, new;
-
- asm volatile("movzwl %2, %0\n\t"
- "cmpb %h0,%b0\n\t"
- "leal 0x100(%" REG_PTR_MODE "0), %1\n\t"
- "jne 1f\n\t"
- LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
- "1:"
- "sete %b1\n\t"
- "movzbl %b1,%0\n\t"
- : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
- :
- : "memory", "cc");
-
- return tmp;
-}
+ xadd(&lock->tickets, tickets);
-static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
-{
- asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
- : "+m" (lock->slock)
- :
- : "memory", "cc");
+ return tickets;
}
-#else
-#define TICKET_SHIFT 16
-static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
+static __always_inline void __ticket_spin_lock(struct arch_spinlock *lock)
{
- int inc = 0x00010000;
- int tmp;
-
- asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
- "movzwl %w0, %2\n\t"
- "shrl $16, %0\n\t"
- "1:\t"
- "cmpl %0, %2\n\t"
- "je 2f\n\t"
- "rep ; nop\n\t"
- "movzwl %1, %2\n\t"
- /* don't need lfence here, because loads are in-order */
- "jmp 1b\n"
- "2:"
- : "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
- :
- : "memory", "cc");
+ register struct __raw_tickets inc;
+
+ inc = __ticket_spin_claim(lock);
+
+ for (;;) {
+ if (inc.head == inc.tail)
+ goto out;
+ cpu_relax();
+ inc.head = ACCESS_ONCE(lock->tickets.head);
+ }
+out: barrier(); /* make sure nothing creeps before the lock is taken */
}
static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
{
- int tmp;
- int new;
-
- asm volatile("movl %2,%0\n\t"
- "movl %0,%1\n\t"
- "roll $16, %0\n\t"
- "cmpl %0,%1\n\t"
- "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
- "jne 1f\n\t"
- LOCK_PREFIX "cmpxchgl %1,%2\n\t"
- "1:"
- "sete %b1\n\t"
- "movzbl %b1,%0\n\t"
- : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
- :
- : "memory", "cc");
-
- return tmp;
+ arch_spinlock_t old, new;
+
+ old.tickets = ACCESS_ONCE(lock->tickets);
+ if (old.tickets.head != old.tickets.tail)
+ return 0;
+
+ new.head_tail = old.head_tail + (1 << TICKET_SHIFT);
+
+ /* cmpxchg is a full barrier, so nothing can move before it */
+ return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
}
static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
{
- asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
- : "+m" (lock->slock)
- :
- : "memory", "cc");
+ barrier(); /* prevent reordering out of locked region */
+ __ticket_unlock_release(lock);
+ barrier(); /* prevent reordering into locked region */
}
-#endif
static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
{
- int tmp = ACCESS_ONCE(lock->slock);
+ struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
- return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
+ return !!(tmp.tail ^ tmp.head);
}
static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
{
- int tmp = ACCESS_ONCE(lock->slock);
+ struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
- return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
+ return ((tmp.tail - tmp.head) & TICKET_MASK) > 1;
}
#ifndef CONFIG_PARAVIRT_SPINLOCKS
# error "please don't include this file directly"
#endif
+#include <linux/types.h>
+
+#if (CONFIG_NR_CPUS < 256)
+typedef u8 __ticket_t;
+typedef u16 __ticketpair_t;
+#else
+typedef u16 __ticket_t;
+typedef u32 __ticketpair_t;
+#endif
+
+#define TICKET_SHIFT (sizeof(__ticket_t) * 8)
+#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1))
+
typedef struct arch_spinlock {
- unsigned int slock;
+ union {
+ __ticketpair_t head_tail;
+ struct __raw_tickets {
+ __ticket_t head, tail;
+ } tickets;
+ };
} arch_spinlock_t;
-#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
#include <asm/rwlock.h>
void (*banner)(void);
};
-/**
- * struct x86_init_mapping - platform specific initial kernel pagetable setup
- * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
- *
- * For more details on the purpose of this hook, look in
- * init_memory_mapping and the commit that added it.
- */
-struct x86_init_mapping {
- void (*pagetable_reserve)(u64 start, u64 end);
-};
-
/**
* struct x86_init_paging - platform specific paging functions
* @pagetable_setup_start: platform specific pre paging_init() call
struct x86_init_mpparse mpparse;
struct x86_init_irqs irqs;
struct x86_init_oem oem;
- struct x86_init_mapping mapping;
struct x86_init_paging paging;
struct x86_init_timers timers;
struct x86_init_iommu iommu;
((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
APIC_DM_INIT;
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
- mdelay(10);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
obj-y := intel_cacheinfo.o scattered.o topology.o
obj-y += proc.o capflags.o powerflags.o common.o
obj-y += vmware.o hypervisor.o sched.o mshyperv.o
+obj-y += rdrand.o
obj-$(CONFIG_X86_32) += bugs.o
obj-$(CONFIG_X86_64) += bugs_64.o
#include <linux/init.h>
#include <linux/bitops.h>
+#include <linux/elf.h>
#include <linux/mm.h>
#include <linux/io.h>
#endif
}
+static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
+
+ if (c->x86 > 0x10 ||
+ (c->x86 == 0x10 && c->x86_model >= 0x2)) {
+ u64 val;
+
+ rdmsrl(MSR_K7_HWCR, val);
+ if (!(val & BIT(24)))
+ printk(KERN_WARNING FW_BUG "TSC doesn't count "
+ "with P0 frequency!\n");
+ }
+ }
+
+ if (c->x86 == 0x15) {
+ unsigned long upperbit;
+ u32 cpuid, assoc;
+
+ cpuid = cpuid_edx(0x80000005);
+ assoc = cpuid >> 16 & 0xff;
+ upperbit = ((cpuid >> 24) << 10) / assoc;
+
+ va_align.mask = (upperbit - 1) & PAGE_MASK;
+ va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
+ }
+}
+
static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
{
early_init_amd_mc(c);
set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
}
#endif
-
- /* We need to do the following only once */
- if (c != &boot_cpu_data)
- return;
-
- if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
-
- if (c->x86 > 0x10 ||
- (c->x86 == 0x10 && c->x86_model >= 0x2)) {
- u64 val;
-
- rdmsrl(MSR_K7_HWCR, val);
- if (!(val & BIT(24)))
- printk(KERN_WARNING FW_BUG "TSC doesn't count "
- "with P0 frequency!\n");
- }
- }
}
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
.c_size_cache = amd_size_cache,
#endif
.c_early_init = early_init_amd,
+ .c_bsp_init = bsp_init_amd,
.c_init = init_amd,
.c_x86_vendor = X86_VENDOR_AMD,
};
#include <asm/stackprotector.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
+#include <asm/archrandom.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
#include <asm/sections.h>
filter_cpuid_features(c, false);
setup_smep(c);
+
+ if (this_cpu->c_bsp_init)
+ this_cpu->c_bsp_init(c);
}
void __init early_cpu_init(void)
#endif
init_hypervisor(c);
+ x86_init_rdrand(c);
/*
* Clear/Set all flags overriden by options, need do it
struct cpu_model_info c_models[4];
void (*c_early_init)(struct cpuinfo_x86 *);
+ void (*c_bsp_init)(struct cpuinfo_x86 *);
void (*c_init)(struct cpuinfo_x86 *);
void (*c_identify)(struct cpuinfo_x86 *);
unsigned int (*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
break;
case 42: /* SandyBridge */
+ case 45: /* SandyBridge, "Romely-EP" */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
--- /dev/null
+/*
+ * This file is part of the Linux kernel.
+ *
+ * Copyright (c) 2011, Intel Corporation
+ * Authors: Fenghua Yu <fenghua.yu@intel.com>,
+ * H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/archrandom.h>
+#include <asm/sections.h>
+
+static int __init x86_rdrand_setup(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_RDRAND);
+ return 1;
+}
+__setup("nordrand", x86_rdrand_setup);
+
+/* We can't use arch_get_random_long() here since alternatives haven't run */
+static inline int rdrand_long(unsigned long *v)
+{
+ int ok;
+ asm volatile("1: " RDRAND_LONG "\n\t"
+ "jc 2f\n\t"
+ "decl %0\n\t"
+ "jnz 1b\n\t"
+ "2:"
+ : "=r" (ok), "=a" (*v)
+ : "0" (RDRAND_RETRY_LOOPS));
+ return ok;
+}
+
+/*
+ * Force a reseed cycle; we are architecturally guaranteed a reseed
+ * after no more than 512 128-bit chunks of random data. This also
+ * acts as a test of the CPU capability.
+ */
+#define RESEED_LOOP ((512*128)/sizeof(unsigned long))
+
+void __cpuinit x86_init_rdrand(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_ARCH_RANDOM
+ unsigned long tmp;
+ int i, count, ok;
+
+ if (!cpu_has(c, X86_FEATURE_RDRAND))
+ return; /* Nothing to do */
+
+ for (count = i = 0; i < RESEED_LOOP; i++) {
+ ok = rdrand_long(&tmp);
+ if (ok)
+ count++;
+ }
+
+ if (count != RESEED_LOOP)
+ clear_cpu_cap(c, X86_FEATURE_RDRAND);
+#endif
+}
.paravirt_enabled = 0,
.kernel_rpl = 0,
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
+
+#ifdef CONFIG_X86_64
+ .extra_user_64bit_cs = __USER_CS,
+#endif
};
struct pv_init_ops pv_init_ops = {
#ifdef CONFIG_X86_64
case 0x40 ... 0x4f:
- if (regs->cs != __USER_CS)
+ if (!user_64bit_mode(regs))
/* 32-bit mode: register increment */
return 0;
/* 64-bit mode: REX prefix */
#include <asm/ia32.h>
#include <asm/syscalls.h>
+/*
+ * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
+ *
+ * @flags denotes the allocation direction - bottomup or topdown -
+ * or vDSO; see call sites below.
+ */
+unsigned long align_addr(unsigned long addr, struct file *filp,
+ enum align_flags flags)
+{
+ unsigned long tmp_addr;
+
+ /* handle 32- and 64-bit case with a single conditional */
+ if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
+ return addr;
+
+ if (!(current->flags & PF_RANDOMIZE))
+ return addr;
+
+ if (!((flags & ALIGN_VDSO) || filp))
+ return addr;
+
+ tmp_addr = addr;
+
+ /*
+ * We need an address which is <= than the original
+ * one only when in topdown direction.
+ */
+ if (!(flags & ALIGN_TOPDOWN))
+ tmp_addr += va_align.mask;
+
+ tmp_addr &= ~va_align.mask;
+
+ return tmp_addr;
+}
+
+static int __init control_va_addr_alignment(char *str)
+{
+ /* guard against enabling this on other CPU families */
+ if (va_align.flags < 0)
+ return 1;
+
+ if (*str == 0)
+ return 1;
+
+ if (*str == '=')
+ str++;
+
+ if (!strcmp(str, "32"))
+ va_align.flags = ALIGN_VA_32;
+ else if (!strcmp(str, "64"))
+ va_align.flags = ALIGN_VA_64;
+ else if (!strcmp(str, "off"))
+ va_align.flags = 0;
+ else if (!strcmp(str, "on"))
+ va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
+ else
+ return 0;
+
+ return 1;
+}
+__setup("align_va_addr", control_va_addr_alignment);
+
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
unsigned long, fd, unsigned long, off)
start_addr = addr;
full_search:
+
+ addr = align_addr(addr, filp, 0);
+
for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
if (end - len < addr) {
mm->cached_hole_size = vma->vm_start - addr;
addr = vma->vm_end;
+ addr = align_addr(addr, filp, 0);
}
}
/* make sure it can fit in the remaining address space */
if (addr > len) {
- vma = find_vma(mm, addr-len);
- if (!vma || addr <= vma->vm_start)
+ unsigned long tmp_addr = align_addr(addr - len, filp,
+ ALIGN_TOPDOWN);
+
+ vma = find_vma(mm, tmp_addr);
+ if (!vma || tmp_addr + len <= vma->vm_start)
/* remember the address as a hint for next time */
- return mm->free_area_cache = addr-len;
+ return mm->free_area_cache = tmp_addr;
}
if (mm->mmap_base < len)
addr = mm->mmap_base-len;
do {
+ addr = align_addr(addr, filp, ALIGN_TOPDOWN);
+
/*
* Lookup failure means no vma is above this address,
* else if new region fits below vma->vm_start,
#ifdef CONFIG_X86_64
+ . = ALIGN(PAGE_SIZE);
+ __vvar_page = .;
+
+ .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
+ /* work around gold bug 13023 */
+ __vvar_beginning_hack = .;
+
+ /* Place all vvars at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset) \
+ . = __vvar_beginning_hack + offset; \
+ *(.vvar_ ## name)
+#define __VVAR_KERNEL_LDS
+#include <asm/vvar.h>
+#undef __VVAR_KERNEL_LDS
+#undef EMIT_VVAR
+
+ } :data
+
+ . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
+
#define VSYSCALL_ADDR (-10*1024*1024)
#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
- . = ALIGN(4096);
__vsyscall_0 = .;
. = VSYSCALL_ADDR;
.vsyscall : AT(VLOAD(.vsyscall)) {
+ /* work around gold bug 13023 */
+ __vsyscall_beginning_hack = .;
*(.vsyscall_0)
- . = 1024;
+ . = __vsyscall_beginning_hack + 1024;
*(.vsyscall_1)
- . = 2048;
+ . = __vsyscall_beginning_hack + 2048;
*(.vsyscall_2)
- . = 4096; /* Pad the whole page. */
+ . = __vsyscall_beginning_hack + 4096; /* Pad the whole page. */
} :user =0xcc
. = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE);
#undef VVIRT_OFFSET
#undef VVIRT
- __vvar_page = .;
-
- .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
-
- /* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) \
- . = offset; \
- *(.vvar_ ## name)
-#define __VVAR_KERNEL_LDS
-#include <asm/vvar.h>
-#undef __VVAR_KERNEL_LDS
-#undef EMIT_VVAR
-
- } :data
-
- . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
-
#endif /* CONFIG_X86_64 */
/* Init code and data - will be freed after init */
#include <asm/vgtod.h>
#include <asm/traps.h>
+#define CREATE_TRACE_POINTS
+#include "vsyscall_trace.h"
+
DEFINE_VVAR(int, vgetcpu_mode);
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
{
local_irq_enable();
- /*
- * Real 64-bit user mode code has cs == __USER_CS. Anything else
- * is bogus.
- */
- if (regs->cs != __USER_CS) {
+ if (!user_64bit_mode(regs)) {
/*
* If we trapped from kernel mode, we might as well OOPS now
* instead of returning to some random address and OOPSing
* and int 0xcc is two bytes long.
*/
vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
+
+ trace_emulate_vsyscall(vsyscall_nr);
+
if (vsyscall_nr < 0) {
warn_bad_vsyscall(KERN_WARNING, regs,
"illegal int 0xcc (exploit attempt?)");
--- /dev/null
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vsyscall
+
+#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __VSYSCALL_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(emulate_vsyscall,
+
+ TP_PROTO(int nr),
+
+ TP_ARGS(nr),
+
+ TP_STRUCT__entry(__field(int, nr)),
+
+ TP_fast_assign(
+ __entry->nr = nr;
+ ),
+
+ TP_printk("nr = %d", __entry->nr)
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/x86/kernel
+#define TRACE_INCLUDE_FILE vsyscall_trace
+#include <trace/define_trace.h>
.banner = default_banner,
},
- .mapping = {
- .pagetable_reserve = native_pagetable_reserve,
- },
-
.paging = {
.pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done,
* but for now it's good enough to assume that long
* mode only uses well known segments or kernel.
*/
- return (!user_mode(regs)) || (regs->cs == __USER_CS);
+ return (!user_mode(regs) || user_64bit_mode(regs));
#endif
case 0x60:
/* 0x64 thru 0x67 are valid prefixes in all modes. */
#endif
;
-static void __init find_early_table_space(unsigned long end, int use_pse,
- int use_gbpages)
+static unsigned long __init find_early_fixmap_space(void)
{
- unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
+ unsigned long size = 0;
+#ifdef CONFIG_X86_32
+ int kmap_begin_pmd_idx, kmap_end_pmd_idx;
+ int fixmap_begin_pmd_idx, fixmap_end_pmd_idx;
+ int btmap_begin_pmd_idx;
+
+ fixmap_begin_pmd_idx =
+ __fix_to_virt(__end_of_fixed_addresses - 1) >> PMD_SHIFT;
+ /*
+ * fixmap_end_pmd_idx is the end of the fixmap minus the PMD that
+ * has been defined in the data section by head_32.S (see
+ * initial_pg_fixmap).
+ * Note: This is similar to what early_ioremap_page_table_range_init
+ * does except that the "end" has PMD_SIZE expunged as per previous
+ * comment.
+ */
+ fixmap_end_pmd_idx = (FIXADDR_TOP - 1) >> PMD_SHIFT;
+ btmap_begin_pmd_idx = __fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT;
+ kmap_begin_pmd_idx = __fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
+ kmap_end_pmd_idx = __fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
+
+ size = fixmap_end_pmd_idx - fixmap_begin_pmd_idx;
+ /*
+ * early_ioremap_init has already allocated a PMD at
+ * btmap_begin_pmd_idx
+ */
+ if (btmap_begin_pmd_idx < fixmap_end_pmd_idx)
+ size--;
+
+#ifdef CONFIG_HIGHMEM
+ /*
+ * see page_table_kmap_check: if the kmap spans multiple PMDs, make
+ * sure the pte pages are allocated contiguously. It might need up
+ * to two additional pte pages to replace the page declared by
+ * head_32.S and the one allocated by early_ioremap_init, if they
+ * are even partially used for the kmap.
+ */
+ if (kmap_begin_pmd_idx != kmap_end_pmd_idx) {
+ if (kmap_end_pmd_idx == fixmap_end_pmd_idx)
+ size++;
+ if (btmap_begin_pmd_idx >= kmap_begin_pmd_idx &&
+ btmap_begin_pmd_idx <= kmap_end_pmd_idx)
+ size++;
+ }
+#endif
+#endif
+ return (size * PMD_SIZE + PAGE_SIZE - 1) >> PAGE_SHIFT;
+}
+
+static void __init find_early_table_space(unsigned long start,
+ unsigned long end, int use_pse, int use_gbpages)
+{
+ unsigned long pmds = 0, ptes = 0, tables = 0, good_end = end,
+ pud_mapped = 0, pmd_mapped = 0, size = end - start;
phys_addr_t base;
- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
+ pud_mapped = DIV_ROUND_UP(PFN_PHYS(max_pfn_mapped),
+ (PUD_SIZE * PTRS_PER_PUD));
+ pud_mapped *= (PUD_SIZE * PTRS_PER_PUD);
+ pmd_mapped = DIV_ROUND_UP(PFN_PHYS(max_pfn_mapped),
+ (PMD_SIZE * PTRS_PER_PMD));
+ pmd_mapped *= (PMD_SIZE * PTRS_PER_PMD);
+
+ /*
+ * On x86_64 do not limit the size we need to cover with 4KB pages
+ * depending on the initial allocation because head_64.S always uses
+ * 2MB pages.
+ */
+#ifdef CONFIG_X86_32
+ if (start < PFN_PHYS(max_pfn_mapped)) {
+ if (PFN_PHYS(max_pfn_mapped) < end)
+ size -= PFN_PHYS(max_pfn_mapped) - start;
+ else
+ size = 0;
+ }
+#endif
+
+#ifndef __PAGETABLE_PUD_FOLDED
+ if (end > pud_mapped) {
+ unsigned long puds;
+ if (start < pud_mapped)
+ puds = (end - pud_mapped + PUD_SIZE - 1) >> PUD_SHIFT;
+ else
+ puds = (end - start + PUD_SIZE - 1) >> PUD_SHIFT;
+ tables += roundup(puds * sizeof(pud_t), PAGE_SIZE);
+ }
+#endif
if (use_gbpages) {
unsigned long extra;
extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
- } else
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+ }
+#ifndef __PAGETABLE_PMD_FOLDED
+ else if (end > pmd_mapped) {
+ if (start < pmd_mapped)
+ pmds = (end - pmd_mapped + PMD_SIZE - 1) >> PMD_SHIFT;
+ else
+ pmds = (end - start + PMD_SIZE - 1) >> PMD_SHIFT;
+ }
+#endif
tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
unsigned long extra;
extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
-#ifdef CONFIG_X86_32
- extra += PMD_SIZE;
-#endif
ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
} else
- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ ptes = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+ ptes += find_early_fixmap_space();
tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
-#ifdef CONFIG_X86_32
- /* for fixmap */
- tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
+ if (!tables)
+ return;
+#ifdef CONFIG_X86_32
good_end = max_pfn_mapped << PAGE_SHIFT;
#endif
- base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
+ base = memblock_find_in_range(0x00, good_end, tables, PAGE_SIZE);
if (base == MEMBLOCK_ERROR)
panic("Cannot find space for the kernel page tables");
printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
-}
-void __init native_pagetable_reserve(u64 start, u64 end)
-{
- memblock_x86_reserve_range(start, end, "PGTABLE");
+ if (pgt_buf_top > pgt_buf_start)
+ memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
+ pgt_buf_top << PAGE_SHIFT, "PGTABLE");
}
struct map_range {
* nodes are discovered.
*/
if (!after_bootmem)
- find_early_table_space(end, use_pse, use_gbpages);
+ find_early_table_space(start, end, use_pse, use_gbpages);
for (i = 0; i < nr_range; i++)
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
__flush_tlb_all();
- /*
- * Reserve the kernel pagetable pages we used (pgt_buf_start -
- * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
- * so that they can be reused for other purposes.
- *
- * On native it just means calling memblock_x86_reserve_range, on Xen it
- * also means marking RW the pagetable pages that we allocated before
- * but that haven't been used.
- *
- * In fact on xen we mark RO the whole range pgt_buf_start -
- * pgt_buf_top, because we have to make sure that when
- * init_memory_mapping reaches the pagetable pages area, it maps
- * RO all the pagetable pages, including the ones that are beyond
- * pgt_buf_end at that time.
- */
- if (!after_bootmem && pgt_buf_end > pgt_buf_start)
- x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
- PFN_PHYS(pgt_buf_end));
+ if (pgt_buf_end != pgt_buf_top)
+ printk(KERN_DEBUG "initial kernel pagetable allocation wasted %lx"
+ " pages\n", pgt_buf_top - pgt_buf_end);
if (!after_bootmem)
early_memtest(start, end);
#include <linux/sched.h>
#include <asm/elf.h>
+struct __read_mostly va_alignment va_align = {
+ .flags = -1,
+};
+
static unsigned int stack_maxrandom_size(void)
{
unsigned int max = 0;
return max;
}
-
/*
* Top of mmap area (just below the process stack).
*
#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
#define MAX_GAP (TASK_SIZE/6*5)
-/*
- * True on X86_32 or when emulating IA32 on X86_64
- */
-static int mmap_is_ia32(void)
-{
-#ifdef CONFIG_X86_32
- return 1;
-#endif
-#ifdef CONFIG_IA32_EMULATION
- if (test_thread_flag(TIF_IA32))
- return 1;
-#endif
- return 0;
-}
-
static int mmap_is_legacy(void)
{
if (current->personality & ADDR_COMPAT_LAYOUT)
if (inbuf && inlen) {
/* write data to EC */
for (i = 0; i < inlen; i++) {
+ pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]);
+ outb(inbuf[i], 0x68);
if (wait_on_ibf(0x6c, 0)) {
printk(KERN_ERR "olpc-ec: timeout waiting for"
" EC accept data!\n");
goto err;
}
- pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]);
- outb(inbuf[i], 0x68);
}
}
if (outbuf && outlen) {
vdso_start:
.incbin "arch/x86/vdso/vdso.so"
vdso_end:
+ .align PAGE_SIZE /* extra data here leaks to userspace. */
.previous
addr = start + (offset << PAGE_SHIFT);
if (addr >= end)
addr = end;
+
+ /*
+ * page-align it here so that get_unmapped_area doesn't
+ * align it wrongfully again to the next page. addr can come in 4K
+ * unaligned here as a result of stack start randomization.
+ */
+ addr = PAGE_ALIGN(addr);
+ addr = align_addr(addr, NULL, ALIGN_VDSO);
+
return addr;
}
.paravirt_enabled = 1,
.shared_kernel_pmd = 0,
+#ifdef CONFIG_X86_64
+ .extra_user_64bit_cs = FLAT_USER_CS64,
+#endif
+
.name = "Xen",
};
{
}
-static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
-{
- /* reserve the range used */
- native_pagetable_reserve(start, end);
-
- /* set as RW the rest */
- printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
- PFN_PHYS(pgt_buf_top));
- while (end < PFN_PHYS(pgt_buf_top)) {
- make_lowmem_page_readwrite(__va(end));
- end += PAGE_SIZE;
- }
-}
-
static void xen_post_allocator_init(void);
static void __init xen_pagetable_setup_done(pgd_t *base)
# endif
#else
case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+ case VVAR_PAGE:
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
#ifdef CONFIG_X86_64
/* Replicate changes to map the vsyscall page into the user
pagetable vsyscall mapping. */
- if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+ if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
+ idx == VVAR_PAGE) {
unsigned long vaddr = __fix_to_virt(idx);
set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
}
void __init xen_init_mmu_ops(void)
{
- x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;
*/
void get_random_bytes(void *buf, int nbytes)
{
- extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
+ char *p = buf;
+
+ while (nbytes) {
+ unsigned long v;
+ int chunk = min(nbytes, (int)sizeof(unsigned long));
+
+ if (!arch_get_random_long(&v))
+ break;
+
+ memcpy(buf, &v, chunk);
+ p += chunk;
+ nbytes -= chunk;
+ }
+
+ extract_entropy(&nonblocking_pool, p, nbytes, 0, 0);
}
EXPORT_SYMBOL(get_random_bytes);
DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash);
unsigned int get_random_int(void)
{
- __u32 *hash = get_cpu_var(get_random_int_hash);
unsigned int ret;
+ __u32 *hash;
+
+ if (arch_get_random_int(&ret))
+ return ret;
+
+ hash = get_cpu_var(get_random_int_hash);
hash[0] += current->pid + jiffies + get_cycles();
md5_transform(hash, random_int_secret);
static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled)
{
/*
- * We unconditionally cancel the timer here, because otherwise
+ * We always cancel the timer here first, because otherwise
* we could run into BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
* when we manage to start the timer before the callback
* returns HRTIMER_RESTART.
int err = 0;
unsigned long flags;
- if (freq <= 0 || freq > 5000)
+ if (freq <= 0 || freq > RTC_MAX_FREQ)
return -EINVAL;
retry:
spin_lock_irqsave(&rtc->irq_task_lock, flags);
#include <linux/errno.h>
#include <linux/topology.h>
#include <linux/wait.h>
+#include <linux/module.h>
#include <asm/irq.h>
#include <asm/ptrace.h>
return d->msi_desc;
}
-int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
+int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+ struct module *owner);
+
+static inline int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt,
+ int node)
+{
+ return __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE);
+}
+
void irq_free_descs(unsigned int irq, unsigned int cnt);
int irq_reserve_irqs(unsigned int from, unsigned int cnt);
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *dir;
#endif
+ struct module *owner;
const char *name;
} ____cacheline_internodealigned_in_smp;
state->s3 = __seed(i, 15);
}
+#ifdef CONFIG_ARCH_RANDOM
+# include <asm/archrandom.h>
+#else
+static inline int arch_get_random_long(unsigned long *v)
+{
+ return 0;
+}
+static inline int arch_get_random_int(unsigned int *v)
+{
+ return 0;
+}
+#endif
+
#endif /* __KERNEL___ */
#endif /* _LINUX_RANDOM_H */
#define RTC_AF 0x20 /* Alarm interrupt */
#define RTC_UF 0x10 /* Update interrupt for 1Hz RTC */
+
+#define RTC_MAX_FREQ 8192
+
#ifdef __KERNEL__
#include <linux/types.h>
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
- async.o range.o jump_label.o
+ async.o range.o
obj-y += groups.o
ifdef CONFIG_FUNCTION_TRACER
obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
obj-$(CONFIG_PADATA) += padata.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
for (i = gc->irq_base; msk; msk >>= 1, i++) {
- if (!msk & 0x01)
+ if (!(msk & 0x01))
continue;
if (flags & IRQ_GC_INIT_NESTED_LOCK)
raw_spin_unlock(&gc_lock);
for (; msk; msk >>= 1, i++) {
- if (!msk & 0x01)
+ if (!(msk & 0x01))
continue;
/* Remove handler first. That will mask the irq line */
static inline int desc_node(struct irq_desc *desc) { return 0; }
#endif
-static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
+static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
+ struct module *owner)
{
int cpu;
desc->irq_count = 0;
desc->irqs_unhandled = 0;
desc->name = NULL;
+ desc->owner = owner;
for_each_possible_cpu(cpu)
*per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
desc_smp_init(desc, node);
static inline void free_masks(struct irq_desc *desc) { }
#endif
-static struct irq_desc *alloc_desc(int irq, int node)
+static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
{
struct irq_desc *desc;
gfp_t gfp = GFP_KERNEL;
raw_spin_lock_init(&desc->lock);
lockdep_set_class(&desc->lock, &irq_desc_lock_class);
- desc_set_defaults(irq, desc, node);
+ desc_set_defaults(irq, desc, node, owner);
return desc;
kfree(desc);
}
-static int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static int alloc_descs(unsigned int start, unsigned int cnt, int node,
+ struct module *owner)
{
struct irq_desc *desc;
int i;
for (i = 0; i < cnt; i++) {
- desc = alloc_desc(start + i, node);
+ desc = alloc_desc(start + i, node, owner);
if (!desc)
goto err;
mutex_lock(&sparse_irq_lock);
nr_irqs = initcnt;
for (i = 0; i < initcnt; i++) {
- desc = alloc_desc(i, node);
+ desc = alloc_desc(i, node, NULL);
set_bit(i, allocated_irqs);
irq_insert_desc(i, desc);
}
alloc_masks(&desc[i], GFP_KERNEL, node);
raw_spin_lock_init(&desc[i].lock);
lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
- desc_set_defaults(i, &desc[i], node);
+ desc_set_defaults(i, &desc[i], node, NULL);
}
return arch_early_irq_init();
}
dynamic_irq_cleanup(irq);
}
-static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
+ struct module *owner)
{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ struct irq_desc *desc = irq_to_desc(start + i);
+
+ desc->owner = owner;
+ }
return start;
}
* Returns the first irq number or error code
*/
int __ref
-irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
+__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+ struct module *owner)
{
int start, ret;
bitmap_set(allocated_irqs, start, cnt);
mutex_unlock(&sparse_irq_lock);
- return alloc_descs(start, cnt, node);
+ return alloc_descs(start, cnt, node, owner);
err:
mutex_unlock(&sparse_irq_lock);
return ret;
}
-EXPORT_SYMBOL_GPL(irq_alloc_descs);
+EXPORT_SYMBOL_GPL(__irq_alloc_descs);
/**
* irq_reserve_irqs - mark irqs allocated
unsigned long flags;
raw_spin_lock_irqsave(&desc->lock, flags);
- desc_set_defaults(irq, desc, desc_node(desc));
+ desc_set_defaults(irq, desc, desc_node(desc), NULL);
raw_spin_unlock_irqrestore(&desc->lock, flags);
}
if (desc->irq_data.chip == &no_irq_chip)
return -ENOSYS;
+ if (!try_module_get(desc->owner))
+ return -ENODEV;
/*
* Some drivers like serial.c use request_irq() heavily,
* so we have to be careful not to interfere with a
*/
nested = irq_settings_is_nested_thread(desc);
if (nested) {
- if (!new->thread_fn)
- return -EINVAL;
+ if (!new->thread_fn) {
+ ret = -EINVAL;
+ goto out_mput;
+ }
/*
* Replace the primary handler which was provided from
* the driver for non nested interrupt handling by the
t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
new->name);
- if (IS_ERR(t))
- return PTR_ERR(t);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
+ goto out_mput;
+ }
/*
* We keep the reference to the task struct even if
* the thread dies to avoid that the interrupt code
kthread_stop(t);
put_task_struct(t);
}
+out_mput:
+ module_put(desc->owner);
return ret;
}
put_task_struct(action->thread);
}
+ module_put(desc->owner);
return action;
}
if (!thread_fn)
return -EINVAL;
handler = irq_default_primary_handler;
+ irqflags |= IRQF_ONESHOT;
}
action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
if (!class)
class = look_up_lock_class(lock, 0);
- if (DEBUG_LOCKS_WARN_ON(!class))
+ /*
+ * If look_up_lock_class() failed to find a class, we're trying
+ * to test if we hold a lock that has never yet been acquired.
+ * Clearly if the lock hasn't been acquired _ever_, we're not
+ * holding it either, so report failure.
+ */
+ if (!class)
return 0;
if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
power:power_frequency
This is for userspace compatibility
and will vanish after 5 kernel iterations,
- namely 2.6.41.
+ namely 3.1.
config CONTEXT_SWITCH_TRACER
bool
$(OUTPUT)python/perf.so: $(PYRF_OBJS)
$(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
- --quiet build_ext \
- --build-lib='$(OUTPUT)python' \
- --build-temp='$(OUTPUT)python/temp'
+ --quiet build_ext; \
+ mkdir -p $(OUTPUT)python && \
+ cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
#
# No Perl scripts right now:
#
PYTHON_WORD := $(call shell-wordify,$(PYTHON))
- python-clean := $(PYTHON_WORD) util/setup.py clean \
- --build-lib='$(OUTPUT)python' \
- --build-temp='$(OUTPUT)python/temp'
+ # python extension build directories
+ PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
+ PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
+ PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
+ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+
+ python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
ifdef NO_LIBPYTHON
$(call disable-python)
$(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
$(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
+install-python_ext:
+ $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
+
install-doc:
$(MAKE) -C Documentation install
### Cleaning rules
clean:
- $(RM) $(OUTPUT){*.o,*/*.o,*/*/*.o,*/*/*/*.o,$(LIB_FILE),perf-archive}
+ $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS)
$(RM) $(ALL_PROGRAMS) perf
$(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
$(MAKE) -C Documentation/ clean
"-f",
"-m", "1024",
"-c", "1",
- "-e", "lock:lock_acquire:r",
- "-e", "lock:lock_acquired:r",
- "-e", "lock:lock_contended:r",
- "-e", "lock:lock_release:r",
+ "-e", "lock:lock_acquire",
+ "-e", "lock:lock_acquired",
+ "-e", "lock:lock_contended",
+ "-e", "lock:lock_release",
};
static int __cmd_record(int argc, const char **argv)
#include <sched.h>
#include <sys/mman.h>
-#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
-
enum write_mode_t {
WRITE_FORCE,
WRITE_APPEND
static int __cmd_record(int argc, const char **argv)
{
- int i;
struct stat st;
int flags;
int err;
for (;;) {
int hits = samples;
- int thread;
mmap_read_all();
waking++;
}
- if (done) {
- for (i = 0; i < evsel_list->cpus->nr; i++) {
- struct perf_evsel *pos;
-
- list_for_each_entry(pos, &evsel_list->entries, node) {
- for (thread = 0;
- thread < evsel_list->threads->nr;
- thread++)
- ioctl(FD(pos, i, thread),
- PERF_EVENT_IOC_DISABLE);
- }
- }
- }
+ if (done)
+ perf_evlist__disable(evsel_list);
}
if (quiet || signr == SIGUSR1)
{
if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
if (sort__has_parent) {
- fprintf(stderr, "selected --sort parent, but no"
- " callchain data. Did you call"
- " perf record without -g?\n");
+ ui__warning("Selected --sort parent, but no "
+ "callchain data. Did you call "
+ "'perf record' without -g?\n");
return -EINVAL;
}
if (symbol_conf.use_callchain) {
- fprintf(stderr, "selected -g but no callchain data."
- " Did you call perf record without"
- " -g?\n");
+ ui__warning("Selected -g but no callchain data. Did "
+ "you call 'perf record' without -g?\n");
return -1;
}
} else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
!symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
if (callchain_register_param(&callchain_param) < 0) {
- fprintf(stderr, "Can't register callchain"
- " params\n");
+ ui__warning("Can't register callchain "
+ "params.\n");
return -EINVAL;
}
}
.ordered_samples = true,
};
-static int read_events(void)
+static void read_events(bool destroy, struct perf_session **psession)
{
int err = -EINVAL;
struct perf_session *session = perf_session__new(input_name, O_RDONLY,
0, false, &event_ops);
if (session == NULL)
- return -ENOMEM;
+ die("No Memory");
if (perf_session__has_traces(session, "record -R")) {
err = perf_session__process_events(session, &event_ops);
+ if (err)
+ die("Failed to process events, error %d", err);
+
nr_events = session->hists.stats.nr_events[0];
nr_lost_events = session->hists.stats.total_lost;
nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST];
}
- perf_session__delete(session);
- return err;
+ if (destroy)
+ perf_session__delete(session);
+
+ if (psession)
+ *psession = session;
}
static void print_bad_events(void)
static void __cmd_lat(void)
{
struct rb_node *next;
+ struct perf_session *session;
setup_pager();
- read_events();
+ read_events(false, &session);
sort_lat();
printf("\n ---------------------------------------------------------------------------------------------------------------\n");
print_bad_events();
printf("\n");
+ perf_session__delete(session);
}
static struct trace_sched_handler map_ops = {
max_cpu = sysconf(_SC_NPROCESSORS_CONF);
setup_pager();
- read_events();
+ read_events(true, NULL);
print_bad_events();
}
test_calibrations();
- read_events();
+ read_events(true, NULL);
printf("nr_run_events: %ld\n", nr_run_events);
printf("nr_sleep_events: %ld\n", nr_sleep_events);
static const char * const sched_usage[] = {
- "perf sched [<options>] {record|latency|map|replay|trace}",
+ "perf sched [<options>] {record|latency|map|replay|script}",
NULL
};
int perf_config(config_fn_t fn, void *data)
{
int ret = 0, found = 0;
- char *repo_config = NULL;
const char *home = NULL;
/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
home = getenv("HOME");
if (perf_config_global() && home) {
char *user_config = strdup(mkpath("%s/.perfconfig", home));
- if (!access(user_config, R_OK)) {
- ret += perf_config_from_file(fn, user_config, data);
- found += 1;
+ struct stat st;
+
+ if (user_config == NULL) {
+ warning("Not enough memory to process %s/.perfconfig, "
+ "ignoring it.", home);
+ goto out;
}
- free(user_config);
- }
- repo_config = perf_pathdup("config");
- if (!access(repo_config, R_OK)) {
- ret += perf_config_from_file(fn, repo_config, data);
+ if (stat(user_config, &st) < 0)
+ goto out_free;
+
+ if (st.st_uid && (st.st_uid != geteuid())) {
+ warning("File %s not owned by current user or root, "
+ "ignoring it.", user_config);
+ goto out_free;
+ }
+
+ if (!st.st_size)
+ goto out_free;
+
+ ret += perf_config_from_file(fn, user_config, data);
found += 1;
+out_free:
+ free(user_config);
}
- free(repo_config);
+out:
if (found == 0)
return -1;
return ret;
return 0;
}
+void perf_evlist__disable(struct perf_evlist *evlist)
+{
+ int cpu, thread;
+ struct perf_evsel *pos;
+
+ for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
+ list_for_each_entry(pos, &evlist->entries, node) {
+ for (thread = 0; thread < evlist->threads->nr; thread++)
+ ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_DISABLE);
+ }
+ }
+}
+
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
{
int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries;
int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite);
void perf_evlist__munmap(struct perf_evlist *evlist);
+void perf_evlist__disable(struct perf_evlist *evlist);
+
static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
struct cpu_map *cpus,
struct thread_map *threads)
const char *name, bool is_kallsyms)
{
const size_t size = PATH_MAX;
- char *realname, *filename = malloc(size),
- *linkname = malloc(size), *targetname;
+ char *realname, *filename = zalloc(size),
+ *linkname = zalloc(size), *targetname;
int len, err = -1;
if (is_kallsyms) {
int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
{
const size_t size = PATH_MAX;
- char *filename = malloc(size),
- *linkname = malloc(size);
+ char *filename = zalloc(size),
+ *linkname = zalloc(size);
int err = -1;
if (filename == NULL || linkname == NULL)
ret = -ENOMEM;
goto error;
}
- tev->point.module = strdup(module);
- if (tev->point.module == NULL) {
- ret = -ENOMEM;
- goto error;
+
+ if (module) {
+ tev->point.module = strdup(module);
+ if (tev->point.module == NULL) {
+ ret = -ENOMEM;
+ goto error;
+ }
}
+
tev->point.offset = pev->point.offset;
tev->point.retprobe = pev->point.retprobe;
tev->nargs = pev->nargs;
.tp_repr = (reprfunc)pyrf_throttle_event__repr,
};
+static char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object.");
+
+static PyMemberDef pyrf_lost_event__members[] = {
+ sample_members
+ member_def(lost_event, id, T_ULONGLONG, "event id"),
+ member_def(lost_event, lost, T_ULONGLONG, "number of lost events"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent)
+{
+ PyObject *ret;
+ char *s;
+
+ if (asprintf(&s, "{ type: lost, id: %#" PRIx64 ", "
+ "lost: %#" PRIx64 " }",
+ pevent->event.lost.id, pevent->event.lost.lost) < 0) {
+ ret = PyErr_NoMemory();
+ } else {
+ ret = PyString_FromString(s);
+ free(s);
+ }
+ return ret;
+}
+
+static PyTypeObject pyrf_lost_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.lost_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_lost_event__doc,
+ .tp_members = pyrf_lost_event__members,
+ .tp_repr = (reprfunc)pyrf_lost_event__repr,
+};
+
+static char pyrf_read_event__doc[] = PyDoc_STR("perf read event object.");
+
+static PyMemberDef pyrf_read_event__members[] = {
+ sample_members
+ member_def(read_event, pid, T_UINT, "event pid"),
+ member_def(read_event, tid, T_UINT, "event tid"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent)
+{
+ return PyString_FromFormat("{ type: read, pid: %u, tid: %u }",
+ pevent->event.read.pid,
+ pevent->event.read.tid);
+ /*
+ * FIXME: return the array of read values,
+ * making this method useful ;-)
+ */
+}
+
+static PyTypeObject pyrf_read_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.read_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_read_event__doc,
+ .tp_members = pyrf_read_event__members,
+ .tp_repr = (reprfunc)pyrf_read_event__repr,
+};
+
+static char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object.");
+
+static PyMemberDef pyrf_sample_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
+{
+ PyObject *ret;
+ char *s;
+
+ if (asprintf(&s, "{ type: sample }") < 0) {
+ ret = PyErr_NoMemory();
+ } else {
+ ret = PyString_FromString(s);
+ free(s);
+ }
+ return ret;
+}
+
+static PyTypeObject pyrf_sample_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.sample_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_sample_event__doc,
+ .tp_members = pyrf_sample_event__members,
+ .tp_repr = (reprfunc)pyrf_sample_event__repr,
+};
+
static int pyrf_event__setup_types(void)
{
int err;
pyrf_mmap_event__type.tp_new =
pyrf_task_event__type.tp_new =
pyrf_comm_event__type.tp_new =
+ pyrf_lost_event__type.tp_new =
+ pyrf_read_event__type.tp_new =
+ pyrf_sample_event__type.tp_new =
pyrf_throttle_event__type.tp_new = PyType_GenericNew;
err = PyType_Ready(&pyrf_mmap_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_lost_event__type);
if (err < 0)
goto out;
err = PyType_Ready(&pyrf_task_event__type);
err = PyType_Ready(&pyrf_throttle_event__type);
if (err < 0)
goto out;
+ err = PyType_Ready(&pyrf_read_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_sample_event__type);
+ if (err < 0)
+ goto out;
out:
return err;
}
static PyTypeObject *pyrf_event__type[] = {
[PERF_RECORD_MMAP] = &pyrf_mmap_event__type,
- [PERF_RECORD_LOST] = &pyrf_mmap_event__type,
+ [PERF_RECORD_LOST] = &pyrf_lost_event__type,
[PERF_RECORD_COMM] = &pyrf_comm_event__type,
[PERF_RECORD_EXIT] = &pyrf_task_event__type,
[PERF_RECORD_THROTTLE] = &pyrf_throttle_event__type,
[PERF_RECORD_UNTHROTTLE] = &pyrf_throttle_event__type,
[PERF_RECORD_FORK] = &pyrf_task_event__type,
- [PERF_RECORD_READ] = &pyrf_mmap_event__type,
- [PERF_RECORD_SAMPLE] = &pyrf_mmap_event__type,
+ [PERF_RECORD_READ] = &pyrf_read_event__type,
+ [PERF_RECORD_SAMPLE] = &pyrf_sample_event__type,
};
static PyObject *pyrf_event__new(union perf_event *event)
from distutils.core import setup, Extension
from os import getenv
+from distutils.command.build_ext import build_ext as _build_ext
+from distutils.command.install_lib import install_lib as _install_lib
+
+class build_ext(_build_ext):
+ def finalize_options(self):
+ _build_ext.finalize_options(self)
+ self.build_lib = build_lib
+ self.build_temp = build_tmp
+
+class install_lib(_install_lib):
+ def finalize_options(self):
+ _install_lib.finalize_options(self)
+ self.build_dir = build_lib
+
+
cflags = ['-fno-strict-aliasing', '-Wno-write-strings']
cflags += getenv('CFLAGS', '').split()
+build_lib = getenv('PYTHON_EXTBUILD_LIB')
+build_tmp = getenv('PYTHON_EXTBUILD_TMP')
+
perf = Extension('perf',
sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c',
'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
author_email='acme@redhat.com',
license='GPLv2',
url='http://perf.wiki.kernel.org',
- ext_modules=[perf])
+ ext_modules=[perf],
+ cmdclass={'build_ext': build_ext, 'install_lib': install_lib})
dso->adjust_symbols = 0;
if (strncmp(dso->name, "/tmp/perf-", 10) == 0) {
+ struct stat st;
+
+ if (stat(dso->name, &st) < 0)
+ return -1;
+
+ if (st.st_uid && (st.st_uid != geteuid())) {
+ pr_warning("File %s not owned by current user or root, "
+ "ignoring it.\n", dso->name);
+ return -1;
+ }
+
ret = dso__load_perf_map(dso, map, filter);
dso->symtab_type = ret > 0 ? SYMTAB__JAVA_JIT :
SYMTAB__NOT_FOUND;