Merge remote-tracking branch 'tip/auto-latest'

author Stephen Rothwell <sfr@canb.auug.org.au>

Thu, 11 Aug 2011 04:27:07 +0000 (14:27 +1000)

committer Stephen Rothwell <sfr@canb.auug.org.au>

Thu, 11 Aug 2011 04:27:07 +0000 (14:27 +1000)
author Stephen Rothwell <sfr@canb.auug.org.au>
Thu, 11 Aug 2011 04:27:07 +0000 (14:27 +1000)
committer Stephen Rothwell <sfr@canb.auug.org.au>
Thu, 11 Aug 2011 04:27:07 +0000 (14:27 +1000)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt

index a3398cced9d990634f87e697faef3c51e7d5aa00..980c3aa8555f1d42c6c0442061ce850b75e3eb74 100644 (file)
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -306,6 +306,19 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                         behaviour to be specified.  Bit 0 enables warnings,
                         bit 1 enables fixups, and bit 2 sends a segfault.
  
+       align_va_addr=  [X86-64]
+                       Align virtual addresses by clearing slice [14:12] when
+                       allocating a VMA at process creation time. This option
+                       gives you up to 3% performance improvement on AMD F15h
+                       machines (where it is enabled by default) for a
+                       CPU-intensive style benchmark, and it can vary highly in
+                       a microbenchmark depending on workload and compiler.
+
+                       1: only for 32-bit processes
+                       2: only for 64-bit processes
+                       on: enable for both 32- and 64-bit processes
+                       off: disable for both 32- and 64-bit processes
+
         amd_iommu=      [HW,X86-84]
                         Pass parameters to the AMD IOMMU driver in the system.
                         Possible values are:
@@ -1777,6 +1790,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
  
         noresidual      [PPC] Don't use residual data on PReP machines.
  
+       nordrand        [X86] Disable the direct use of the RDRAND
+                       instruction even if it is supported by the
+                       processor.  RDRAND is still available to user
+                       space applications.
+
         noresume        [SWSUSP] Disables resume and restores original swap
                         space.
  
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 6a47bb22657fd3d55835c32b2e834cd42e3ad28c..f1833e34b16b01045db3bb346f3d462e74f445a0 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1452,6 +1452,15 @@ config ARCH_USES_PG_UNCACHED
         def_bool y
         depends on X86_PAT
  
+config ARCH_RANDOM
+       def_bool y
+       prompt "x86 architectural random number generator" if EXPERT
+       ---help---
+         Enable the x86 architectural RDRAND instruction
+         (Intel Bull Mountain technology) to generate random numbers.
+         If supported, this is a high bandwidth, cryptographically
+         secure hardware random number generator.
+
  config EFI
         bool "EFI runtime service support"
         depends on ACPI
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h

new file mode 100644 (file)

index 0000000..0d9ec77
--- /dev/null
+++ b/arch/x86/include/asm/archrandom.h
@@ -0,0 +1,75 @@
+/*
+ * This file is part of the Linux kernel.
+ *
+ * Copyright (c) 2011, Intel Corporation
+ * Authors: Fenghua Yu <fenghua.yu@intel.com>,
+ *          H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef ASM_X86_ARCHRANDOM_H
+#define ASM_X86_ARCHRANDOM_H
+
+#include <asm/processor.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
+#include <asm/nops.h>
+
+#define RDRAND_RETRY_LOOPS     10
+
+#define RDRAND_INT     ".byte 0x0f,0xc7,0xf0"
+#ifdef CONFIG_X86_64
+# define RDRAND_LONG   ".byte 0x48,0x0f,0xc7,0xf0"
+#else
+# define RDRAND_LONG   RDRAND_INT
+#endif
+
+#ifdef CONFIG_ARCH_RANDOM
+
+#define GET_RANDOM(name, type, rdrand, nop)                    \
+static inline int name(type *v)                                        \
+{                                                              \
+       int ok;                                                 \
+       alternative_io("movl $0, %0\n\t"                        \
+                      nop,                                     \
+                      "\n1: " rdrand "\n\t"                    \
+                      "jc 2f\n\t"                              \
+                      "decl %0\n\t"                            \
+                      "jnz 1b\n\t"                             \
+                      "2:",                                    \
+                      X86_FEATURE_RDRAND,                      \
+                      ASM_OUTPUT2("=r" (ok), "=a" (*v)),       \
+                      "0" (RDRAND_RETRY_LOOPS));               \
+       return ok;                                              \
+}
+
+#ifdef CONFIG_X86_64
+
+GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5);
+GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4);
+
+#else
+
+GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3);
+GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
+
+#endif /* CONFIG_X86_64 */
+
+#endif  /* CONFIG_ARCH_RANDOM */
+
+extern void x86_init_rdrand(struct cpuinfo_x86 *c);
+
+#endif /* ASM_X86_ARCHRANDOM_H */
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h

index 3deb7250624c268c3b8ae819f6ef2c8081abc8c4..d4b09d9a98efd962bf442554ab854d34d5ac4dea 100644 (file)
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -280,6 +280,27 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
  
  #endif
  
+#define xadd(ptr, inc)                                                 \
+       do {                                                            \
+               switch (sizeof(*(ptr))) {                               \
+               case 1:                                                 \
+                       asm volatile (LOCK_PREFIX "xaddb %b0, %1\n"     \
+                                     : "+r" (inc), "+m" (*(ptr))       \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case 2:                                                 \
+                       asm volatile (LOCK_PREFIX "xaddw %w0, %1\n"     \
+                                     : "+r" (inc), "+m" (*(ptr))       \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case 4:                                                 \
+                       asm volatile (LOCK_PREFIX "xaddl %0, %1\n"      \
+                                     : "+r" (inc), "+m" (*(ptr))       \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               }                                                       \
+       } while(0)
+
  #define cmpxchg8b(ptr, o1, o2, n1, n2)                         \
  ({                                                             \
         char __ret;                                             \
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h

index 7cf5c0a2443405532b274e872b7e2063ab57de8e..8c5c836eccd6d1e43f2e6303809809f85f832e8a 100644 (file)
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -151,6 +151,32 @@ extern void __cmpxchg_wrong_size(void);
         cmpxchg_local((ptr), (o), (n));                                 \
  })
  
+#define xadd(ptr, inc)                                                 \
+       do {                                                            \
+               switch (sizeof(*(ptr))) {                               \
+               case 1:                                                 \
+                       asm volatile (LOCK_PREFIX "xaddb %b0, %1\n"     \
+                                     : "+r" (inc), "+m" (*(ptr))       \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case 2:                                                 \
+                       asm volatile (LOCK_PREFIX "xaddw %w0, %1\n"     \
+                                     : "+r" (inc), "+m" (*(ptr))       \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case 4:                                                 \
+                       asm volatile (LOCK_PREFIX "xaddl %0, %1\n"      \
+                                     : "+r" (inc), "+m" (*(ptr))       \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               case 8:                                                 \
+                       asm volatile (LOCK_PREFIX "xaddq %q0, %1\n"     \
+                                     : "+r" (inc), "+m" (*(ptr))       \
+                                     : : "memory", "cc");              \
+                       break;                                          \
+               }                                                       \
+       } while(0)
+
  #define cmpxchg16b(ptr, o1, o2, n1, n2)                                \
  ({                                                             \
         char __ret;                                             \
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h

index 7b439d9aea2a5c30a22f1f3b8ba17db7f4011a39..41935fadfdfcca18be2ca724b34e1c45d3116ac1 100644 (file)
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -27,8 +27,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
  
         desc->base2             = (info->base_addr & 0xff000000) >> 24;
         /*
-        * Don't allow setting of the lm bit. It is useless anyway
-        * because 64bit system calls require __USER_CS:
+        * Don't allow setting of the lm bit. It would confuse
+        * user_64bit_mode and would get overridden by sysret anyway.
          */
         desc->l                 = 0;
  }
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h

index f2ad2163109daab72f9fed6bee91839f3e83601e..5f962df30d0f18ed08fe2e321621617d1da39df3 100644 (file)
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -4,6 +4,7 @@
  /*
   * ELF register definitions..
   */
+#include <linux/thread_info.h>
  
  #include <asm/ptrace.h>
  #include <asm/user.h>
@@ -320,4 +321,34 @@ extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
  extern unsigned long arch_randomize_brk(struct mm_struct *mm);
  #define arch_randomize_brk arch_randomize_brk
  
+/*
+ * True on X86_32 or when emulating IA32 on X86_64
+ */
+static inline int mmap_is_ia32(void)
+{
+#ifdef CONFIG_X86_32
+       return 1;
+#endif
+#ifdef CONFIG_IA32_EMULATION
+       if (test_thread_flag(TIF_IA32))
+               return 1;
+#endif
+       return 0;
+}
+
+/* The first two values are special, do not change. See align_addr() */
+enum align_flags {
+       ALIGN_VA_32     = BIT(0),
+       ALIGN_VA_64     = BIT(1),
+       ALIGN_VDSO      = BIT(2),
+       ALIGN_TOPDOWN   = BIT(3),
+};
+
+struct va_alignment {
+       int flags;
+       unsigned long mask;
+} ____cacheline_aligned;
+
+extern struct va_alignment va_align;
+extern unsigned long align_addr(unsigned long, struct file *, enum align_flags);
  #endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h

index 2c76521631112153c3c40e96fcf4275c1c50abe9..8e8b9a4987ee0225d0a7e38504436f53f4c81d30 100644 (file)
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -41,6 +41,7 @@
  
  #include <asm/desc_defs.h>
  #include <asm/kmap_types.h>
+#include <asm/pgtable_types.h>
  
  struct page;
  struct thread_struct;
@@ -63,6 +64,11 @@ struct paravirt_callee_save {
  struct pv_info {
         unsigned int kernel_rpl;
         int shared_kernel_pmd;
+
+#ifdef CONFIG_X86_64
+       u16 extra_user_64bit_cs;  /* __USER_CS if none */
+#endif
+
         int paravirt_enabled;
         const char *name;
  };
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h

index 013286a10c2c03680d10e5a2938f800e90581514..0a1129371f820b028c57b655268c86ac0953dfb2 100644 (file)
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -301,7 +301,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
  /* Install a pte for a particular vaddr in kernel space. */
  void set_pte_vaddr(unsigned long vaddr, pte_t pte);
  
-extern void native_pagetable_reserve(u64 start, u64 end);
  #ifdef CONFIG_X86_32
  extern void native_pagetable_setup_start(pgd_t *base);
  extern void native_pagetable_setup_done(pgd_t *base);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h

index 94e7618fcac8d6fb8bd61c1b356b522187b1218a..35664547125b40f663d09c3e556f7699e630586b 100644 (file)
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -131,6 +131,9 @@ struct pt_regs {
  #ifdef __KERNEL__
  
  #include <linux/init.h>
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt_types.h>
+#endif
  
  struct cpuinfo_x86;
  struct task_struct;
@@ -187,6 +190,22 @@ static inline int v8086_mode(struct pt_regs *regs)
  #endif
  }
  
+#ifdef CONFIG_X86_64
+static inline bool user_64bit_mode(struct pt_regs *regs)
+{
+#ifndef CONFIG_PARAVIRT
+       /*
+        * On non-paravirt systems, this is the only long mode CPL 3
+        * selector.  We do not allow long mode selectors in the LDT.
+        */
+       return regs->cs == __USER_CS;
+#else
+       /* Headers are too twisted for this to go in paravirt.h. */
+       return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
+#endif
+}
+#endif
+
  /*
   * X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
   * when it traps.  The previous stack will be directly underneath the saved
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h

index ee67edf86fdd98929998276ad8e5ab02c93c333a..7edca0d03c450d432298327e4db72a221422f45d 100644 (file)
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -32,9 +32,21 @@
   * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock
   * (PPro errata 66, 92)
   */
-# define UNLOCK_LOCK_PREFIX LOCK_PREFIX
+static __always_inline void __ticket_unlock_release(struct arch_spinlock *lock)
+{
+       if (sizeof(lock->tickets.head) == sizeof(u8))
+               asm volatile(LOCK_PREFIX "incb %0"
+                            : "+m" (lock->tickets.head) : : "memory");
+       else
+               asm volatile(LOCK_PREFIX "incw %0"
+                            : "+m" (lock->tickets.head) : : "memory");
+
+}
  #else
-# define UNLOCK_LOCK_PREFIX
+static __always_inline void __ticket_unlock_release(struct arch_spinlock *lock)
+{
+       lock->tickets.head++;
+}
  #endif
  
  /*
@@ -54,121 +66,63 @@
   * save some instructions and make the code more elegant. There really isn't
   * much between them in performance though, especially as locks are out of line.
   */
-#if (NR_CPUS < 256)
-#define TICKET_SHIFT 8
-
-static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
+static __always_inline struct __raw_tickets __ticket_spin_claim(struct arch_spinlock *lock)
  {
-       short inc = 0x0100;
-
-       asm volatile (
-               LOCK_PREFIX "xaddw %w0, %1\n"
-               "1:\t"
-               "cmpb %h0, %b0\n\t"
-               "je 2f\n\t"
-               "rep ; nop\n\t"
-               "movb %1, %b0\n\t"
-               /* don't need lfence here, because loads are in-order */
-               "jmp 1b\n"
-               "2:"
-               : "+Q" (inc), "+m" (lock->slock)
-               :
-               : "memory", "cc");
-}
+       register struct __raw_tickets tickets = { .tail = 1 };
  
-static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
-{
-       int tmp, new;
-
-       asm volatile("movzwl %2, %0\n\t"
-                    "cmpb %h0,%b0\n\t"
-                    "leal 0x100(%" REG_PTR_MODE "0), %1\n\t"
-                    "jne 1f\n\t"
-                    LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
-                    "1:"
-                    "sete %b1\n\t"
-                    "movzbl %b1,%0\n\t"
-                    : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
-                    :
-                    : "memory", "cc");
-
-       return tmp;
-}
+       xadd(&lock->tickets, tickets);
  
-static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
-{
-       asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
-                    : "+m" (lock->slock)
-                    :
-                    : "memory", "cc");
+       return tickets;
  }
-#else
-#define TICKET_SHIFT 16
  
-static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
+static __always_inline void __ticket_spin_lock(struct arch_spinlock *lock)
  {
-       int inc = 0x00010000;
-       int tmp;
-
-       asm volatile(LOCK_PREFIX "xaddl %0, %1\n"
-                    "movzwl %w0, %2\n\t"
-                    "shrl $16, %0\n\t"
-                    "1:\t"
-                    "cmpl %0, %2\n\t"
-                    "je 2f\n\t"
-                    "rep ; nop\n\t"
-                    "movzwl %1, %2\n\t"
-                    /* don't need lfence here, because loads are in-order */
-                    "jmp 1b\n"
-                    "2:"
-                    : "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
-                    :
-                    : "memory", "cc");
+       register struct __raw_tickets inc;
+
+       inc = __ticket_spin_claim(lock);
+
+       for (;;) {
+               if (inc.head == inc.tail)
+                       goto out;
+               cpu_relax();
+               inc.head = ACCESS_ONCE(lock->tickets.head);
+       }
+out:   barrier();              /* make sure nothing creeps before the lock is taken */
  }
  
  static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
  {
-       int tmp;
-       int new;
-
-       asm volatile("movl %2,%0\n\t"
-                    "movl %0,%1\n\t"
-                    "roll $16, %0\n\t"
-                    "cmpl %0,%1\n\t"
-                    "leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
-                    "jne 1f\n\t"
-                    LOCK_PREFIX "cmpxchgl %1,%2\n\t"
-                    "1:"
-                    "sete %b1\n\t"
-                    "movzbl %b1,%0\n\t"
-                    : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
-                    :
-                    : "memory", "cc");
-
-       return tmp;
+       arch_spinlock_t old, new;
+
+       old.tickets = ACCESS_ONCE(lock->tickets);
+       if (old.tickets.head != old.tickets.tail)
+               return 0;
+
+       new.head_tail = old.head_tail + (1 << TICKET_SHIFT);
+
+       /* cmpxchg is a full barrier, so nothing can move before it */
+       return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
  }
  
  static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
  {
-       asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
-                    : "+m" (lock->slock)
-                    :
-                    : "memory", "cc");
+       barrier();              /* prevent reordering out of locked region */
+       __ticket_unlock_release(lock);
+       barrier();              /* prevent reordering into locked region */
  }
-#endif
  
  static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
  {
-       int tmp = ACCESS_ONCE(lock->slock);
+       struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
  
-       return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
+       return !!(tmp.tail ^ tmp.head);
  }
  
  static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
  {
-       int tmp = ACCESS_ONCE(lock->slock);
+       struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
  
-       return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
+       return ((tmp.tail - tmp.head) & TICKET_MASK) > 1;
  }
  
  #ifndef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h

index 7c7a486fcb6811e68dc140085061e15df8bbd569..8ebd5df7451e28b88a8a155bf3ff7dcb081ddb35 100644 (file)
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -5,11 +5,29 @@
  # error "please don't include this file directly"
  #endif
  
+#include <linux/types.h>
+
+#if (CONFIG_NR_CPUS < 256)
+typedef u8  __ticket_t;
+typedef u16 __ticketpair_t;
+#else
+typedef u16 __ticket_t;
+typedef u32 __ticketpair_t;
+#endif
+
+#define TICKET_SHIFT   (sizeof(__ticket_t) * 8)
+#define TICKET_MASK    ((__ticket_t)((1 << TICKET_SHIFT) - 1))
+
  typedef struct arch_spinlock {
-       unsigned int slock;
+       union {
+               __ticketpair_t head_tail;
+               struct __raw_tickets {
+                       __ticket_t head, tail;
+               } tickets;
+       };
  } arch_spinlock_t;
  
-#define __ARCH_SPIN_LOCK_UNLOCKED      { 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED      { { 0 } }
  
  #include <asm/rwlock.h>
  
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h

index d3d859035af9e1968d0ad9d4a39000331c27c8f1..643ebf2e2ad8733ddbec5ca884ee21bf405c0226 100644 (file)
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -67,17 +67,6 @@ struct x86_init_oem {
         void (*banner)(void);
  };
  
-/**
- * struct x86_init_mapping - platform specific initial kernel pagetable setup
- * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
- *
- * For more details on the purpose of this hook, look in
- * init_memory_mapping and the commit that added it.
- */
-struct x86_init_mapping {
-       void (*pagetable_reserve)(u64 start, u64 end);
-};
-
  /**
   * struct x86_init_paging - platform specific paging functions
   * @pagetable_setup_start:     platform specific pre paging_init() call
@@ -134,7 +123,6 @@ struct x86_init_ops {
         struct x86_init_mpparse         mpparse;
         struct x86_init_irqs            irqs;
         struct x86_init_oem             oem;
-       struct x86_init_mapping         mapping;
         struct x86_init_paging          paging;
         struct x86_init_timers          timers;
         struct x86_init_iommu           iommu;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c

index adc66c3a1fef2417be8741d334d5f8460c23ff10..34b18594e72467212f0e430c329e9fa523f3aa09 100644 (file)
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
             ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
             APIC_DM_INIT;
         uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
-       mdelay(10);
  
         val = (1UL << UVH_IPI_INT_SEND_SHFT) |
             (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile

index 6042981d0309c492bdfb5032312a8e4f7cb0a5d1..0e3a82a41a6673d3aac217d6320c29242f049a62 100644 (file)
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -15,6 +15,7 @@ CFLAGS_common.o               := $(nostackp)
  obj-y                  := intel_cacheinfo.o scattered.o topology.o
  obj-y                  += proc.o capflags.o powerflags.o common.o
  obj-y                  += vmware.o hypervisor.o sched.o mshyperv.o
+obj-y                  += rdrand.o
  
  obj-$(CONFIG_X86_32)   += bugs.o
  obj-$(CONFIG_X86_64)   += bugs_64.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c

index b13ed393dfcee83b330443e378a922ed016d0514..13c6ec81254582e69eae8b6c69b703370b25fb18 100644 (file)
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1,5 +1,6 @@
  #include <linux/init.h>
  #include <linux/bitops.h>
+#include <linux/elf.h>
  #include <linux/mm.h>
  
  #include <linux/io.h>
@@ -410,6 +411,34 @@ static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
  #endif
  }
  
+static void __cpuinit bsp_init_amd(struct cpuinfo_x86 *c)
+{
+       if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
+
+               if (c->x86 > 0x10 ||
+                   (c->x86 == 0x10 && c->x86_model >= 0x2)) {
+                       u64 val;
+
+                       rdmsrl(MSR_K7_HWCR, val);
+                       if (!(val & BIT(24)))
+                               printk(KERN_WARNING FW_BUG "TSC doesn't count "
+                                       "with P0 frequency!\n");
+               }
+       }
+
+       if (c->x86 == 0x15) {
+               unsigned long upperbit;
+               u32 cpuid, assoc;
+
+               cpuid    = cpuid_edx(0x80000005);
+               assoc    = cpuid >> 16 & 0xff;
+               upperbit = ((cpuid >> 24) << 10) / assoc;
+
+               va_align.mask     = (upperbit - 1) & PAGE_MASK;
+               va_align.flags    = ALIGN_VA_32 | ALIGN_VA_64;
+       }
+}
+
  static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
  {
         early_init_amd_mc(c);
@@ -441,23 +470,6 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
                         set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
         }
  #endif
-
-       /* We need to do the following only once */
-       if (c != &boot_cpu_data)
-               return;
-
-       if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
-
-               if (c->x86 > 0x10 ||
-                   (c->x86 == 0x10 && c->x86_model >= 0x2)) {
-                       u64 val;
-
-                       rdmsrl(MSR_K7_HWCR, val);
-                       if (!(val & BIT(24)))
-                               printk(KERN_WARNING FW_BUG "TSC doesn't count "
-                                       "with P0 frequency!\n");
-               }
-       }
  }
  
  static void __cpuinit init_amd(struct cpuinfo_x86 *c)
@@ -679,6 +691,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
         .c_size_cache   = amd_size_cache,
  #endif
         .c_early_init   = early_init_amd,
+       .c_bsp_init     = bsp_init_amd,
         .c_init         = init_amd,
         .c_x86_vendor   = X86_VENDOR_AMD,
  };
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c

index 62184390a60121ec74465ae5e17d01be86853f4a..aa003b13a83131db63e3f594c0cadb34e9f89b93 100644 (file)
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -15,6 +15,7 @@
  #include <asm/stackprotector.h>
  #include <asm/perf_event.h>
  #include <asm/mmu_context.h>
+#include <asm/archrandom.h>
  #include <asm/hypervisor.h>
  #include <asm/processor.h>
  #include <asm/sections.h>
@@ -681,6 +682,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
         filter_cpuid_features(c, false);
  
         setup_smep(c);
+
+       if (this_cpu->c_bsp_init)
+               this_cpu->c_bsp_init(c);
  }
  
  void __init early_cpu_init(void)
@@ -857,6 +861,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
  #endif
  
         init_hypervisor(c);
+       x86_init_rdrand(c);
  
         /*
          * Clear/Set all flags overriden by options, need do it
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h

index e765633f210ed893f56bc23ba26e6cfea6c074fd..1b22dcc51af44dceefe3eed0e532f2aa514c8c65 100644 (file)
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -18,6 +18,7 @@ struct cpu_dev {
         struct          cpu_model_info c_models[4];
  
         void            (*c_early_init)(struct cpuinfo_x86 *);
+       void            (*c_bsp_init)(struct cpuinfo_x86 *);
         void            (*c_init)(struct cpuinfo_x86 *);
         void            (*c_identify)(struct cpuinfo_x86 *);
         unsigned int    (*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c

index 45fbb8f7f549e1cd447228731df4c8d952c4b6e3..f88af2c2a561f2f5e680ba6eabe545a217ae4316 100644 (file)
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1590,6 +1590,7 @@ static __init int intel_pmu_init(void)
                 break;
  
         case 42: /* SandyBridge */
+       case 45: /* SandyBridge, "Romely-EP" */
                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
                        sizeof(hw_cache_event_ids));
  
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c

new file mode 100644 (file)

index 0000000..feca286
--- /dev/null
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -0,0 +1,73 @@
+/*
+ * This file is part of the Linux kernel.
+ *
+ * Copyright (c) 2011, Intel Corporation
+ * Authors: Fenghua Yu <fenghua.yu@intel.com>,
+ *          H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <asm/processor.h>
+#include <asm/archrandom.h>
+#include <asm/sections.h>
+
+static int __init x86_rdrand_setup(char *s)
+{
+       setup_clear_cpu_cap(X86_FEATURE_RDRAND);
+       return 1;
+}
+__setup("nordrand", x86_rdrand_setup);
+
+/* We can't use arch_get_random_long() here since alternatives haven't run */
+static inline int rdrand_long(unsigned long *v)
+{
+       int ok;
+       asm volatile("1: " RDRAND_LONG "\n\t"
+                    "jc 2f\n\t"
+                    "decl %0\n\t"
+                    "jnz 1b\n\t"
+                    "2:"
+                    : "=r" (ok), "=a" (*v)
+                    : "0" (RDRAND_RETRY_LOOPS));
+       return ok;
+}
+
+/*
+ * Force a reseed cycle; we are architecturally guaranteed a reseed
+ * after no more than 512 128-bit chunks of random data.  This also
+ * acts as a test of the CPU capability.
+ */
+#define RESEED_LOOP ((512*128)/sizeof(unsigned long))
+
+void __cpuinit x86_init_rdrand(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_ARCH_RANDOM
+       unsigned long tmp;
+       int i, count, ok;
+
+       if (!cpu_has(c, X86_FEATURE_RDRAND))
+               return;         /* Nothing to do */
+
+       for (count = i = 0; i < RESEED_LOOP; i++) {
+               ok = rdrand_long(&tmp);
+               if (ok)
+                       count++;
+       }
+
+       if (count != RESEED_LOOP)
+               clear_cpu_cap(c, X86_FEATURE_RDRAND);
+#endif
+}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c

index 613a7931ecc180182ff3c2b7470b045c6889e664..d90272e6bc40bc75f34dfbcf158ffce3769203ab 100644 (file)
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -307,6 +307,10 @@ struct pv_info pv_info = {
         .paravirt_enabled = 0,
         .kernel_rpl = 0,
         .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
+
+#ifdef CONFIG_X86_64
+       .extra_user_64bit_cs = __USER_CS,
+#endif
  };
  
  struct pv_init_ops pv_init_ops = {
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c

index 7977f0cfe339076a713ae773bbb221d9cdf406e2..c346d116148866758ca5e602684eb91fcb77f219 100644 (file)
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
  
  #ifdef CONFIG_X86_64
                 case 0x40 ... 0x4f:
-                       if (regs->cs != __USER_CS)
+                       if (!user_64bit_mode(regs))
                                 /* 32-bit mode: register increment */
                                 return 0;
                         /* 64-bit mode: REX prefix */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c

index ff14a5044ce6ec66366f95acbffef82c732d23c5..fe7d2dac7fa3e250bc0b01ac2c8d7c5007bd89f6 100644 (file)
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -18,6 +18,68 @@
  #include <asm/ia32.h>
  #include <asm/syscalls.h>
  
+/*
+ * Align a virtual address to avoid aliasing in the I$ on AMD F15h.
+ *
+ * @flags denotes the allocation direction - bottomup or topdown -
+ * or vDSO; see call sites below.
+ */
+unsigned long align_addr(unsigned long addr, struct file *filp,
+                        enum align_flags flags)
+{
+       unsigned long tmp_addr;
+
+       /* handle 32- and 64-bit case with a single conditional */
+       if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
+               return addr;
+
+       if (!(current->flags & PF_RANDOMIZE))
+               return addr;
+
+       if (!((flags & ALIGN_VDSO) || filp))
+               return addr;
+
+       tmp_addr = addr;
+
+       /*
+        * We need an address which is <= than the original
+        * one only when in topdown direction.
+        */
+       if (!(flags & ALIGN_TOPDOWN))
+               tmp_addr += va_align.mask;
+
+       tmp_addr &= ~va_align.mask;
+
+       return tmp_addr;
+}
+
+static int __init control_va_addr_alignment(char *str)
+{
+       /* guard against enabling this on other CPU families */
+       if (va_align.flags < 0)
+               return 1;
+
+       if (*str == 0)
+               return 1;
+
+       if (*str == '=')
+               str++;
+
+       if (!strcmp(str, "32"))
+               va_align.flags = ALIGN_VA_32;
+       else if (!strcmp(str, "64"))
+               va_align.flags = ALIGN_VA_64;
+       else if (!strcmp(str, "off"))
+               va_align.flags = 0;
+       else if (!strcmp(str, "on"))
+               va_align.flags = ALIGN_VA_32 | ALIGN_VA_64;
+       else
+               return 0;
+
+       return 1;
+}
+__setup("align_va_addr", control_va_addr_alignment);
+
  SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
                 unsigned long, prot, unsigned long, flags,
                 unsigned long, fd, unsigned long, off)
@@ -92,6 +154,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
         start_addr = addr;
  
  full_search:
+
+       addr = align_addr(addr, filp, 0);
+
         for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
                 /* At this point:  (!vma || addr < vma->vm_end). */
                 if (end - len < addr) {
@@ -117,6 +182,7 @@ full_search:
                         mm->cached_hole_size = vma->vm_start - addr;
  
                 addr = vma->vm_end;
+               addr = align_addr(addr, filp, 0);
         }
  }
  
@@ -161,10 +227,13 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
  
         /* make sure it can fit in the remaining address space */
         if (addr > len) {
-               vma = find_vma(mm, addr-len);
-               if (!vma || addr <= vma->vm_start)
+               unsigned long tmp_addr = align_addr(addr - len, filp,
+                                                   ALIGN_TOPDOWN);
+
+               vma = find_vma(mm, tmp_addr);
+               if (!vma || tmp_addr + len <= vma->vm_start)
                         /* remember the address as a hint for next time */
-                       return mm->free_area_cache = addr-len;
+                       return mm->free_area_cache = tmp_addr;
         }
  
         if (mm->mmap_base < len)
@@ -173,6 +242,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
         addr = mm->mmap_base-len;
  
         do {
+               addr = align_addr(addr, filp, ALIGN_TOPDOWN);
+
                 /*
                  * Lookup failure means no vma is above this address,
                  * else if new region fits below vma->vm_start,
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S

index 4aa9c54a9b76e3becf1c9e88fc5b547408e46450..8f3a265476d74d0e98224614b97b6eaae0c4b91c 100644 (file)
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -154,6 +154,26 @@ SECTIONS
  
  #ifdef CONFIG_X86_64
  
+       . = ALIGN(PAGE_SIZE);
+       __vvar_page = .;
+
+       .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
+               /* work around gold bug 13023 */
+               __vvar_beginning_hack = .;
+
+               /* Place all vvars at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset)                        \
+               . = __vvar_beginning_hack + offset;     \
+               *(.vvar_ ## name)
+#define __VVAR_KERNEL_LDS
+#include <asm/vvar.h>
+#undef __VVAR_KERNEL_LDS
+#undef EMIT_VVAR
+
+       } :data
+
+       . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
+
  #define VSYSCALL_ADDR (-10*1024*1024)
  
  #define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
@@ -162,20 +182,21 @@ SECTIONS
  #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
  #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
  
-       . = ALIGN(4096);
         __vsyscall_0 = .;
  
         . = VSYSCALL_ADDR;
         .vsyscall : AT(VLOAD(.vsyscall)) {
+               /* work around gold bug 13023 */
+               __vsyscall_beginning_hack = .;
                 *(.vsyscall_0)
  
-               . = 1024;
+               . = __vsyscall_beginning_hack + 1024;
                 *(.vsyscall_1)
  
-               . = 2048;
+               . = __vsyscall_beginning_hack + 2048;
                 *(.vsyscall_2)
  
-               . = 4096;  /* Pad the whole page. */
+               . = __vsyscall_beginning_hack + 4096;  /* Pad the whole page. */
         } :user =0xcc
         . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE);
  
@@ -185,23 +206,6 @@ SECTIONS
  #undef VVIRT_OFFSET
  #undef VVIRT
  
-       __vvar_page = .;
-
-       .vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
-
-             /* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset)                \
-               . = offset;             \
-               *(.vvar_ ## name)
-#define __VVAR_KERNEL_LDS
-#include <asm/vvar.h>
-#undef __VVAR_KERNEL_LDS
-#undef EMIT_VVAR
-
-       } :data
-
-       . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE);
-
  #endif /* CONFIG_X86_64 */
  
         /* Init code and data - will be freed after init */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c

index dda7dff9cef7e624be6239ca463789e09eedd200..93a0d46d956934d7483b28080959b68f4b76dd58 100644 (file)
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -50,6 +50,9 @@
  #include <asm/vgtod.h>
  #include <asm/traps.h>
  
+#define CREATE_TRACE_POINTS
+#include "vsyscall_trace.h"
+
  DEFINE_VVAR(int, vgetcpu_mode);
  DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
  {
@@ -127,11 +130,7 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
  
         local_irq_enable();
  
-       /*
-        * Real 64-bit user mode code has cs == __USER_CS.  Anything else
-        * is bogus.
-        */
-       if (regs->cs != __USER_CS) {
+       if (!user_64bit_mode(regs)) {
                 /*
                  * If we trapped from kernel mode, we might as well OOPS now
                  * instead of returning to some random address and OOPSing
@@ -150,6 +149,9 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
          * and int 0xcc is two bytes long.
          */
         vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
+
+       trace_emulate_vsyscall(vsyscall_nr);
+
         if (vsyscall_nr < 0) {
                 warn_bad_vsyscall(KERN_WARNING, regs,
                                   "illegal int 0xcc (exploit attempt?)");
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/kernel/vsyscall_trace.h

new file mode 100644 (file)

index 0000000..a8b2ede
--- /dev/null
+++ b/arch/x86/kernel/vsyscall_trace.h
@@ -0,0 +1,29 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vsyscall
+
+#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __VSYSCALL_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(emulate_vsyscall,
+
+           TP_PROTO(int nr),
+
+           TP_ARGS(nr),
+
+           TP_STRUCT__entry(__field(int, nr)),
+
+           TP_fast_assign(
+                          __entry->nr = nr;
+                          ),
+
+           TP_printk("nr = %d", __entry->nr)
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/x86/kernel
+#define TRACE_INCLUDE_FILE vsyscall_trace
+#include <trace/define_trace.h>
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c

index 6f164bd5e14d167d417b2f86e5fc8580047ed371..6eee0828e3272bb53b84960c8e6e0e72e6423194 100644 (file)
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -61,10 +61,6 @@ struct x86_init_ops x86_init __initdata = {
                 .banner                 = default_banner,
         },
  
-       .mapping = {
-               .pagetable_reserve              = native_pagetable_reserve,
-       },
-
         .paging = {
                 .pagetable_setup_start  = native_pagetable_setup_start,
                 .pagetable_setup_done   = native_pagetable_setup_done,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c

index 4d09df054e391822aa7a5e634a4ff2f4f8afabc0..decd51a5f739161d095e5bbc6b42a2e3a967b8c2 100644 (file)
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -105,7 +105,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
                  * but for now it's good enough to assume that long
                  * mode only uses well known segments or kernel.
                  */
-               return (!user_mode(regs)) || (regs->cs == __USER_CS);
+               return (!user_mode(regs) || user_64bit_mode(regs));
  #endif
         case 0x60:
                 /* 0x64 thru 0x67 are valid prefixes in all modes. */
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c

index 30326443ab81d9189224cf0c373ded68da51332d..a90ccc44db6953132a4a56f606bc77f61e39c190 100644 (file)
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -28,22 +28,110 @@ int direct_gbpages
  #endif
  ;
  
-static void __init find_early_table_space(unsigned long end, int use_pse,
-                                         int use_gbpages)
+static unsigned long __init find_early_fixmap_space(void)
  {
-       unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
+       unsigned long size = 0;
+#ifdef CONFIG_X86_32
+       int kmap_begin_pmd_idx, kmap_end_pmd_idx;
+       int fixmap_begin_pmd_idx, fixmap_end_pmd_idx;
+       int btmap_begin_pmd_idx;
+
+       fixmap_begin_pmd_idx =
+               __fix_to_virt(__end_of_fixed_addresses - 1) >> PMD_SHIFT;
+       /*
+        * fixmap_end_pmd_idx is the end of the fixmap minus the PMD that
+        * has been defined in the data section by head_32.S (see
+        * initial_pg_fixmap).
+        * Note: This is similar to what early_ioremap_page_table_range_init
+        * does except that the "end" has PMD_SIZE expunged as per previous
+        * comment.
+        */
+       fixmap_end_pmd_idx = (FIXADDR_TOP - 1) >> PMD_SHIFT;
+       btmap_begin_pmd_idx = __fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT;
+       kmap_begin_pmd_idx = __fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
+       kmap_end_pmd_idx = __fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
+
+       size = fixmap_end_pmd_idx - fixmap_begin_pmd_idx;
+       /*
+        * early_ioremap_init has already allocated a PMD at
+        * btmap_begin_pmd_idx
+        */
+       if (btmap_begin_pmd_idx < fixmap_end_pmd_idx)
+               size--;
+
+#ifdef CONFIG_HIGHMEM
+       /*
+        * see page_table_kmap_check: if the kmap spans multiple PMDs, make
+        * sure the pte pages are allocated contiguously. It might need up
+        * to two additional pte pages to replace the page declared by
+        * head_32.S and the one allocated by early_ioremap_init, if they
+        * are even partially used for the kmap.
+        */
+       if (kmap_begin_pmd_idx != kmap_end_pmd_idx) {
+               if (kmap_end_pmd_idx == fixmap_end_pmd_idx)
+                       size++;
+               if (btmap_begin_pmd_idx >= kmap_begin_pmd_idx &&
+                               btmap_begin_pmd_idx <= kmap_end_pmd_idx)
+                       size++;
+       }
+#endif
+#endif
+       return (size * PMD_SIZE + PAGE_SIZE - 1) >> PAGE_SHIFT;
+}
+
+static void __init find_early_table_space(unsigned long start,
+               unsigned long end, int use_pse, int use_gbpages)
+{
+       unsigned long pmds = 0, ptes = 0, tables = 0, good_end = end,
+                                 pud_mapped = 0, pmd_mapped = 0, size = end - start;
         phys_addr_t base;
  
-       puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
-       tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
+       pud_mapped = DIV_ROUND_UP(PFN_PHYS(max_pfn_mapped),
+                       (PUD_SIZE * PTRS_PER_PUD));
+       pud_mapped *= (PUD_SIZE * PTRS_PER_PUD);
+       pmd_mapped = DIV_ROUND_UP(PFN_PHYS(max_pfn_mapped),
+                       (PMD_SIZE * PTRS_PER_PMD));
+       pmd_mapped *= (PMD_SIZE * PTRS_PER_PMD);
+
+       /*
+        * On x86_64 do not limit the size we need to cover with 4KB pages
+        * depending on the initial allocation because head_64.S always uses
+        * 2MB pages.
+        */
+#ifdef CONFIG_X86_32
+       if (start < PFN_PHYS(max_pfn_mapped)) {
+               if (PFN_PHYS(max_pfn_mapped) < end)
+                       size -= PFN_PHYS(max_pfn_mapped) - start;
+               else
+                       size = 0;
+       }
+#endif
+
+#ifndef __PAGETABLE_PUD_FOLDED
+       if (end > pud_mapped) {
+               unsigned long puds;
+               if (start < pud_mapped)
+                       puds = (end - pud_mapped + PUD_SIZE - 1) >> PUD_SHIFT;
+               else
+                       puds = (end - start + PUD_SIZE - 1) >> PUD_SHIFT;
+               tables += roundup(puds * sizeof(pud_t), PAGE_SIZE);
+       }
+#endif
  
         if (use_gbpages) {
                 unsigned long extra;
  
                 extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
                 pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
-       } else
-               pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+       }
+#ifndef __PAGETABLE_PMD_FOLDED
+       else if (end > pmd_mapped) {
+               if (start < pmd_mapped)
+                       pmds = (end - pmd_mapped + PMD_SIZE - 1) >> PMD_SHIFT;
+               else
+                       pmds = (end - start + PMD_SIZE - 1) >> PMD_SHIFT;
+       }
+#endif
  
         tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
  
@@ -51,23 +139,22 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
                 unsigned long extra;
  
                 extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
-#ifdef CONFIG_X86_32
-               extra += PMD_SIZE;
-#endif
                 ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
         } else
-               ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+               ptes = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+       ptes += find_early_fixmap_space();
  
         tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
  
-#ifdef CONFIG_X86_32
-       /* for fixmap */
-       tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
+       if (!tables)
+               return;
  
+#ifdef CONFIG_X86_32
         good_end = max_pfn_mapped << PAGE_SHIFT;
  #endif
  
-       base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
+       base = memblock_find_in_range(0x00, good_end, tables, PAGE_SIZE);
         if (base == MEMBLOCK_ERROR)
                 panic("Cannot find space for the kernel page tables");
  
@@ -77,11 +164,10 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
  
         printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
                 end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
-}
  
-void __init native_pagetable_reserve(u64 start, u64 end)
-{
-       memblock_x86_reserve_range(start, end, "PGTABLE");
+       if (pgt_buf_top > pgt_buf_start)
+               memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
+                                pgt_buf_top << PAGE_SHIFT, "PGTABLE");
  }
  
  struct map_range {
@@ -261,7 +347,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
          * nodes are discovered.
          */
         if (!after_bootmem)
-               find_early_table_space(end, use_pse, use_gbpages);
+               find_early_table_space(start, end, use_pse, use_gbpages);
  
         for (i = 0; i < nr_range; i++)
                 ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
@@ -275,24 +361,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
  
         __flush_tlb_all();
  
-       /*
-        * Reserve the kernel pagetable pages we used (pgt_buf_start -
-        * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
-        * so that they can be reused for other purposes.
-        *
-        * On native it just means calling memblock_x86_reserve_range, on Xen it
-        * also means marking RW the pagetable pages that we allocated before
-        * but that haven't been used.
-        *
-        * In fact on xen we mark RO the whole range pgt_buf_start -
-        * pgt_buf_top, because we have to make sure that when
-        * init_memory_mapping reaches the pagetable pages area, it maps
-        * RO all the pagetable pages, including the ones that are beyond
-        * pgt_buf_end at that time.
-        */
-       if (!after_bootmem && pgt_buf_end > pgt_buf_start)
-               x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
-                               PFN_PHYS(pgt_buf_end));
+       if (pgt_buf_end != pgt_buf_top)
+               printk(KERN_DEBUG "initial kernel pagetable allocation wasted %lx"
+                               " pages\n", pgt_buf_top - pgt_buf_end);
  
         if (!after_bootmem)
                 early_memtest(start, end);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c

index 1dab5194fd9df9f6bead4b0e09d154bd2b26e178..4b5ba85eb5c95ebb705f8ce420acbff85c052f47 100644 (file)
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -31,6 +31,10 @@
  #include <linux/sched.h>
  #include <asm/elf.h>
  
+struct __read_mostly va_alignment va_align = {
+       .flags = -1,
+};
+
  static unsigned int stack_maxrandom_size(void)
  {
         unsigned int max = 0;
@@ -42,7 +46,6 @@ static unsigned int stack_maxrandom_size(void)
         return max;
  }
  
-
  /*
   * Top of mmap area (just below the process stack).
   *
@@ -51,21 +54,6 @@ static unsigned int stack_maxrandom_size(void)
  #define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
  #define MAX_GAP (TASK_SIZE/6*5)
  
-/*
- * True on X86_32 or when emulating IA32 on X86_64
- */
-static int mmap_is_ia32(void)
-{
-#ifdef CONFIG_X86_32
-       return 1;
-#endif
-#ifdef CONFIG_IA32_EMULATION
-       if (test_thread_flag(TIF_IA32))
-               return 1;
-#endif
-       return 0;
-}
-
  static int mmap_is_legacy(void)
  {
         if (current->personality & ADDR_COMPAT_LAYOUT)
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c

index 8b9940e78e2fa6da6751f115f4d8b0ce87f5b53a..7cce722667b83dd06b506d06b771a572c0843598 100644 (file)
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -161,13 +161,13 @@ restart:
         if (inbuf && inlen) {
                 /* write data to EC */
                 for (i = 0; i < inlen; i++) {
+                       pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
+                       outb(inbuf[i], 0x68);
                         if (wait_on_ibf(0x6c, 0)) {
                                 printk(KERN_ERR "olpc-ec:  timeout waiting for"
                                                 " EC accept data!\n");
                                 goto err;
                         }
-                       pr_devel("olpc-ec:  sending cmd arg 0x%x\n", inbuf[i]);
-                       outb(inbuf[i], 0x68);
                 }
         }
         if (outbuf && outlen) {
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S

index 1b979c12ba85b0d17443e1037580667649cbee1a..01f5e3b4613cb88212dee66e191711ae8bd814c6 100644 (file)
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -9,6 +9,7 @@ __PAGE_ALIGNED_DATA
  vdso_start:
         .incbin "arch/x86/vdso/vdso.so"
  vdso_end:
+       .align PAGE_SIZE /* extra data here leaks to userspace. */
  
  .previous
  
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c

index 316fbca3490e98afe758d7a4e535eaecd4482a8f..153407c35b75bda37e2b5d2675d3abb6280cc9cd 100644 (file)
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -89,6 +89,15 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
         addr = start + (offset << PAGE_SHIFT);
         if (addr >= end)
                 addr = end;
+
+       /*
+        * page-align it here so that get_unmapped_area doesn't
+        * align it wrongfully again to the next page. addr can come in 4K
+        * unaligned here as a result of stack start randomization.
+        */
+       addr = PAGE_ALIGN(addr);
+       addr = align_addr(addr, NULL, ALIGN_VDSO);
+
         return addr;
  }
  
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c

index 974a528458a04d3257234ceb70991b7f01eb3748..e2345af01af0aeeeef02c6306f50ba984c2f9c29 100644 (file)
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -951,6 +951,10 @@ static const struct pv_info xen_info __initconst = {
         .paravirt_enabled = 1,
         .shared_kernel_pmd = 0,
  
+#ifdef CONFIG_X86_64
+       .extra_user_64bit_cs = FLAT_USER_CS64,
+#endif
+
         .name = "Xen",
  };
  
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c

index f987bde77c490666fa27e728b509ce9d8d0ab1b6..d3b4e82abf18f0242dd6636995e27a31b26597a6 100644 (file)
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1196,20 +1196,6 @@ static void __init xen_pagetable_setup_start(pgd_t *base)
  {
  }
  
-static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
-{
-       /* reserve the range used */
-       native_pagetable_reserve(start, end);
-
-       /* set as RW the rest */
-       printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
-                       PFN_PHYS(pgt_buf_top));
-       while (end < PFN_PHYS(pgt_buf_top)) {
-               make_lowmem_page_readwrite(__va(end));
-               end += PAGE_SIZE;
-       }
-}
-
  static void xen_post_allocator_init(void);
  
  static void __init xen_pagetable_setup_done(pgd_t *base)
@@ -1916,6 +1902,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  # endif
  #else
         case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+       case VVAR_PAGE:
  #endif
         case FIX_TEXT_POKE0:
         case FIX_TEXT_POKE1:
@@ -1956,7 +1943,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
  #ifdef CONFIG_X86_64
         /* Replicate changes to map the vsyscall page into the user
            pagetable vsyscall mapping. */
-       if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+       if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
+           idx == VVAR_PAGE) {
                 unsigned long vaddr = __fix_to_virt(idx);
                 set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
         }
@@ -2096,7 +2084,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
  
  void __init xen_init_mmu_ops(void)
  {
-       x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
         x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
         x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
         pv_mmu_ops = xen_mmu_ops;
diff --git a/drivers/char/random.c b/drivers/char/random.c

index c35a785005b08e132f23447ac31365853b5acd9c..f7c93d7be00f307db47a6657bcf8aa5d29bdb3c6 100644 (file)
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -932,7 +932,21 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
   */
  void get_random_bytes(void *buf, int nbytes)
  {
-       extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
+       char *p = buf;
+
+       while (nbytes) {
+               unsigned long v;
+               int chunk = min(nbytes, (int)sizeof(unsigned long));
+               
+               if (!arch_get_random_long(&v))
+                       break;
+               
+               memcpy(buf, &v, chunk);
+               p += chunk;
+               nbytes -= chunk;
+       }
+
+       extract_entropy(&nonblocking_pool, p, nbytes, 0, 0);
  }
  EXPORT_SYMBOL(get_random_bytes);
  
@@ -1318,8 +1332,13 @@ late_initcall(random_int_secret_init);
  DEFINE_PER_CPU(__u32 [MD5_DIGEST_WORDS], get_random_int_hash);
  unsigned int get_random_int(void)
  {
-       __u32 *hash = get_cpu_var(get_random_int_hash);
         unsigned int ret;
+       __u32 *hash;
+
+       if (arch_get_random_int(&ret))
+               return ret;
+
+       hash = get_cpu_var(get_random_int_hash);
  
         hash[0] += current->pid + jiffies + get_cycles();
         md5_transform(hash, random_int_secret);
diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c

index 3195dbd3ec3468910396c029e42d9613e206b283..44e91e598f8d975ae7570b80366a67540611c8da 100644 (file)
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -639,7 +639,7 @@ EXPORT_SYMBOL_GPL(rtc_irq_unregister);
  static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled)
  {
         /*
-        * We unconditionally cancel the timer here, because otherwise
+        * We always cancel the timer here first, because otherwise
          * we could run into BUG_ON(timer->state != HRTIMER_STATE_CALLBACK);
          * when we manage to start the timer before the callback
          * returns HRTIMER_RESTART.
@@ -708,7 +708,7 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq)
         int err = 0;
         unsigned long flags;
  
-       if (freq <= 0 || freq > 5000)
+       if (freq <= 0 || freq > RTC_MAX_FREQ)
                 return -EINVAL;
  retry:
         spin_lock_irqsave(&rtc->irq_task_lock, flags);
diff --git a/include/linux/irq.h b/include/linux/irq.h

index 87a06f345bd2c5820e61ec5a5f9e4d5a77e55d77..59517300a315978e6f41398dc69a6a4f84c0f19e 100644 (file)
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -23,6 +23,7 @@
  #include <linux/errno.h>
  #include <linux/topology.h>
  #include <linux/wait.h>
+#include <linux/module.h>
  
  #include <asm/irq.h>
  #include <asm/ptrace.h>
@@ -547,7 +548,15 @@ static inline struct msi_desc *irq_data_get_msi(struct irq_data *d)
         return d->msi_desc;
  }
  
-int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node);
+int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+               struct module *owner);
+
+static inline int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt,
+               int node)
+{
+       return __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE);
+}
+
  void irq_free_descs(unsigned int irq, unsigned int cnt);
  int irq_reserve_irqs(unsigned int from, unsigned int cnt);
  
diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h

index 2d921b35212c42bbac394430b50fd90914439e24..150134ac709ab28372eb874f9afed2741f150988 100644 (file)
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -66,6 +66,7 @@ struct irq_desc {
  #ifdef CONFIG_PROC_FS
         struct proc_dir_entry   *dir;
  #endif
+       struct module           *owner;
         const char              *name;
  } ____cacheline_internodealigned_in_smp;
  
diff --git a/include/linux/random.h b/include/linux/random.h

index d13059f3ea32e5ab08859c08d71ac04a4d31817d..8f74538c96db5bd8615896593c2825628a29613c 100644 (file)
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -91,6 +91,19 @@ static inline void prandom32_seed(struct rnd_state *state, u64 seed)
         state->s3 = __seed(i, 15);
  }
  
+#ifdef CONFIG_ARCH_RANDOM
+# include <asm/archrandom.h>
+#else
+static inline int arch_get_random_long(unsigned long *v)
+{
+       return 0;
+}
+static inline int arch_get_random_int(unsigned int *v)
+{
+       return 0;
+}
+#endif
+
  #endif /* __KERNEL___ */
  
  #endif /* _LINUX_RANDOM_H */
diff --git a/include/linux/rtc.h b/include/linux/rtc.h

index b27ebea25660bff86dbdf2f86040f26904ab45c7..93f4d035076bc8f295fde5b0a6b5b89e4c5df9ff 100644 (file)
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -97,6 +97,9 @@ struct rtc_pll_info {
  #define RTC_AF 0x20    /* Alarm interrupt */
  #define RTC_UF 0x10    /* Update interrupt for 1Hz RTC */
  
+
+#define RTC_MAX_FREQ   8192
+
  #ifdef __KERNEL__
  
  #include <linux/types.h>
diff --git a/kernel/Makefile b/kernel/Makefile

index d06467fc8f7c9669ad11d8930fc413adb4962c4f..eca595e2fd523e9dac8ac582b5fa2f4400887afa 100644 (file)
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o \
             kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
             hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
             notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
-           async.o range.o jump_label.o
+           async.o range.o
  obj-y += groups.o
  
  ifdef CONFIG_FUNCTION_TRACER
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += events/
  obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
  obj-$(CONFIG_PADATA) += padata.o
  obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
  
  ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
  # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c

index 3a2cab407b93fd77024c2dff4165034b04ca832f..e38544dddb18a143e2a63f2827a56e45f8b1aa53 100644 (file)
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -246,7 +246,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk,
                 gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask);
  
         for (i = gc->irq_base; msk; msk >>= 1, i++) {
-               if (!msk & 0x01)
+               if (!(msk & 0x01))
                         continue;
  
                 if (flags & IRQ_GC_INIT_NESTED_LOCK)
@@ -301,7 +301,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk,
         raw_spin_unlock(&gc_lock);
  
         for (; msk; msk >>= 1, i++) {
-               if (!msk & 0x01)
+               if (!(msk & 0x01))
                         continue;
  
                 /* Remove handler first. That will mask the irq line */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c

index 4c60a50e66b237922381c5b0d0d82d55b436b6dc..cb65d0360e316e03aa09123e945a38feef1476f2 100644 (file)
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { }
  static inline int desc_node(struct irq_desc *desc) { return 0; }
  #endif
  
-static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
+static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
+               struct module *owner)
  {
         int cpu;
  
@@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
         desc->irq_count = 0;
         desc->irqs_unhandled = 0;
         desc->name = NULL;
+       desc->owner = owner;
         for_each_possible_cpu(cpu)
                 *per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
         desc_smp_init(desc, node);
@@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc)
  static inline void free_masks(struct irq_desc *desc) { }
  #endif
  
-static struct irq_desc *alloc_desc(int irq, int node)
+static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
  {
         struct irq_desc *desc;
         gfp_t gfp = GFP_KERNEL;
@@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
         raw_spin_lock_init(&desc->lock);
         lockdep_set_class(&desc->lock, &irq_desc_lock_class);
  
-       desc_set_defaults(irq, desc, node);
+       desc_set_defaults(irq, desc, node, owner);
  
         return desc;
  
@@ -173,13 +175,14 @@ static void free_desc(unsigned int irq)
         kfree(desc);
  }
  
-static int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static int alloc_descs(unsigned int start, unsigned int cnt, int node,
+                      struct module *owner)
  {
         struct irq_desc *desc;
         int i;
  
         for (i = 0; i < cnt; i++) {
-               desc = alloc_desc(start + i, node);
+               desc = alloc_desc(start + i, node, owner);
                 if (!desc)
                         goto err;
                 mutex_lock(&sparse_irq_lock);
@@ -227,7 +230,7 @@ int __init early_irq_init(void)
                 nr_irqs = initcnt;
  
         for (i = 0; i < initcnt; i++) {
-               desc = alloc_desc(i, node);
+               desc = alloc_desc(i, node, NULL);
                 set_bit(i, allocated_irqs);
                 irq_insert_desc(i, desc);
         }
@@ -261,7 +264,7 @@ int __init early_irq_init(void)
                 alloc_masks(&desc[i], GFP_KERNEL, node);
                 raw_spin_lock_init(&desc[i].lock);
                 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
-               desc_set_defaults(i, &desc[i], node);
+               desc_set_defaults(i, &desc[i], node, NULL);
         }
         return arch_early_irq_init();
  }
@@ -276,8 +279,16 @@ static void free_desc(unsigned int irq)
         dynamic_irq_cleanup(irq);
  }
  
-static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
+static inline int alloc_descs(unsigned int start, unsigned int cnt, int node,
+                             struct module *owner)
  {
+       u32 i;
+
+       for (i = 0; i < cnt; i++) {
+               struct irq_desc *desc = irq_to_desc(start + i);
+
+               desc->owner = owner;
+       }
         return start;
  }
  
@@ -337,7 +348,8 @@ EXPORT_SYMBOL_GPL(irq_free_descs);
   * Returns the first irq number or error code
   */
  int __ref
-irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
+__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node,
+                 struct module *owner)
  {
         int start, ret;
  
@@ -366,13 +378,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node)
  
         bitmap_set(allocated_irqs, start, cnt);
         mutex_unlock(&sparse_irq_lock);
-       return alloc_descs(start, cnt, node);
+       return alloc_descs(start, cnt, node, owner);
  
  err:
         mutex_unlock(&sparse_irq_lock);
         return ret;
  }
-EXPORT_SYMBOL_GPL(irq_alloc_descs);
+EXPORT_SYMBOL_GPL(__irq_alloc_descs);
  
  /**
   * irq_reserve_irqs - mark irqs allocated
@@ -440,7 +452,7 @@ void dynamic_irq_cleanup(unsigned int irq)
         unsigned long flags;
  
         raw_spin_lock_irqsave(&desc->lock, flags);
-       desc_set_defaults(irq, desc, desc_node(desc));
+       desc_set_defaults(irq, desc, desc_node(desc), NULL);
         raw_spin_unlock_irqrestore(&desc->lock, flags);
  }
  
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c

index 0a7840aeb0fb9efbc18a6e6e8e6f01de17ed91cd..2e9425889fa8874f742820b0f886f1cf0ee0a77c 100644 (file)
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
  
         if (desc->irq_data.chip == &no_irq_chip)
                 return -ENOSYS;
+       if (!try_module_get(desc->owner))
+               return -ENODEV;
         /*
          * Some drivers like serial.c use request_irq() heavily,
          * so we have to be careful not to interfere with a
@@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
          */
         nested = irq_settings_is_nested_thread(desc);
         if (nested) {
-               if (!new->thread_fn)
-                       return -EINVAL;
+               if (!new->thread_fn) {
+                       ret = -EINVAL;
+                       goto out_mput;
+               }
                 /*
                  * Replace the primary handler which was provided from
                  * the driver for non nested interrupt handling by the
@@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
  
                 t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
                                    new->name);
-               if (IS_ERR(t))
-                       return PTR_ERR(t);
+               if (IS_ERR(t)) {
+                       ret = PTR_ERR(t);
+                       goto out_mput;
+               }
                 /*
                  * We keep the reference to the task struct even if
                  * the thread dies to avoid that the interrupt code
@@ -1095,6 +1101,8 @@ out_thread:
                         kthread_stop(t);
                 put_task_struct(t);
         }
+out_mput:
+       module_put(desc->owner);
         return ret;
  }
  
@@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
                 put_task_struct(action->thread);
         }
  
+       module_put(desc->owner);
         return action;
  }
  
@@ -1322,6 +1331,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
                 if (!thread_fn)
                         return -EINVAL;
                 handler = irq_default_primary_handler;
+               irqflags |= IRQF_ONESHOT;
         }
  
         action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c

index 8c24294e477fe6578cb16ae8c3e270b0633cc6c7..91d67ce3a8d520a5cdc43d7abe6534c59710e79d 100644 (file)
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3111,7 +3111,13 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
                 if (!class)
                         class = look_up_lock_class(lock, 0);
  
-               if (DEBUG_LOCKS_WARN_ON(!class))
+               /*
+                * If look_up_lock_class() failed to find a class, we're trying
+                * to test if we hold a lock that has never yet been acquired.
+                * Clearly if the lock hasn't been acquired _ever_, we're not
+                * holding it either, so report failure.
+                */
+               if (!class)
                         return 0;
  
                 if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig

index 2ad39e556cb4a408ca09ba408e8a1ecf85fd756b..cd3134510f3d0b9d807a532d6d9ab271e98a40f1 100644 (file)
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,7 +82,7 @@ config EVENT_POWER_TRACING_DEPRECATED
           power:power_frequency
           This is for userspace compatibility
           and will vanish after 5 kernel iterations,
-         namely 2.6.41.
+         namely 3.1.
  
  config CONTEXT_SWITCH_TRACER
         bool
diff --git a/tools/perf/Makefile b/tools/perf/Makefile

index 56d62d3fb16715525446a8ce900a2021e06aa9b3..3b8f7b80376b25f31a25eff483393f8248981dd3 100644 (file)
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -181,9 +181,9 @@ strip-libs = $(filter-out -l%,$(1))
  
  $(OUTPUT)python/perf.so: $(PYRF_OBJS)
         $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
-         --quiet build_ext \
-         --build-lib='$(OUTPUT)python' \
-         --build-temp='$(OUTPUT)python/temp'
+         --quiet build_ext; \
+       mkdir -p $(OUTPUT)python && \
+       cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
  #
  # No Perl scripts right now:
  #
@@ -509,9 +509,13 @@ else
  
    PYTHON_WORD := $(call shell-wordify,$(PYTHON))
  
-  python-clean := $(PYTHON_WORD) util/setup.py clean \
-    --build-lib='$(OUTPUT)python' \
-    --build-temp='$(OUTPUT)python/temp'
+  # python extension build directories
+  PYTHON_EXTBUILD     := $(OUTPUT)python_ext_build/
+  PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
+  PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
+  export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+
+  python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
  
    ifdef NO_LIBPYTHON
      $(call disable-python)
@@ -868,6 +872,9 @@ install: all
         $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
         $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
  
+install-python_ext:
+       $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
+
  install-doc:
         $(MAKE) -C Documentation install
  
@@ -895,7 +902,7 @@ quick-install-html:
  ### Cleaning rules
  
  clean:
-       $(RM) $(OUTPUT){*.o,*/*.o,*/*/*.o,*/*/*/*.o,$(LIB_FILE),perf-archive}
+       $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS)
         $(RM) $(ALL_PROGRAMS) perf
         $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
         $(MAKE) -C Documentation/ clean
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c

index 9ac05aafd9b2563e9625945e5dfa93dc98dfabad..899080ace26761c497935e1ad0480faa628bf7c9 100644 (file)
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -942,10 +942,10 @@ static const char *record_args[] = {
         "-f",
         "-m", "1024",
         "-c", "1",
-       "-e", "lock:lock_acquire:r",
-       "-e", "lock:lock_acquired:r",
-       "-e", "lock:lock_contended:r",
-       "-e", "lock:lock_release:r",
+       "-e", "lock:lock_acquire",
+       "-e", "lock:lock_acquired",
+       "-e", "lock:lock_contended",
+       "-e", "lock:lock_release",
  };
  
  static int __cmd_record(int argc, const char **argv)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c

index 80dc5b790e4790c22b7c4f23fcbae6f45b757acb..f6426b496f4aa0ee59c25a84622fc12c2dec32ff 100644 (file)
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -30,8 +30,6 @@
  #include <sched.h>
  #include <sys/mman.h>
  
-#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
-
  enum write_mode_t {
         WRITE_FORCE,
         WRITE_APPEND
@@ -438,7 +436,6 @@ static void mmap_read_all(void)
  
  static int __cmd_record(int argc, const char **argv)
  {
-       int i;
         struct stat st;
         int flags;
         int err;
@@ -682,7 +679,6 @@ static int __cmd_record(int argc, const char **argv)
  
         for (;;) {
                 int hits = samples;
-               int thread;
  
                 mmap_read_all();
  
@@ -693,19 +689,8 @@ static int __cmd_record(int argc, const char **argv)
                         waking++;
                 }
  
-               if (done) {
-                       for (i = 0; i < evsel_list->cpus->nr; i++) {
-                               struct perf_evsel *pos;
-
-                               list_for_each_entry(pos, &evsel_list->entries, node) {
-                                       for (thread = 0;
-                                               thread < evsel_list->threads->nr;
-                                               thread++)
-                                               ioctl(FD(pos, i, thread),
-                                                       PERF_EVENT_IOC_DISABLE);
-                               }
-                       }
-               }
+               if (done)
+                       perf_evlist__disable(evsel_list);
         }
  
         if (quiet || signr == SIGUSR1)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c

index f854efda76869412210ede431f4844a5e386cd34..d7ff277bdb78ba1d4a06f5042f8e50ea8053795b 100644 (file)
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -162,23 +162,22 @@ static int perf_session__setup_sample_type(struct perf_session *self)
  {
         if (!(self->sample_type & PERF_SAMPLE_CALLCHAIN)) {
                 if (sort__has_parent) {
-                       fprintf(stderr, "selected --sort parent, but no"
-                                       " callchain data. Did you call"
-                                       " perf record without -g?\n");
+                       ui__warning("Selected --sort parent, but no "
+                                   "callchain data. Did you call "
+                                   "'perf record' without -g?\n");
                         return -EINVAL;
                 }
                 if (symbol_conf.use_callchain) {
-                       fprintf(stderr, "selected -g but no callchain data."
-                                       " Did you call perf record without"
-                                       " -g?\n");
+                       ui__warning("Selected -g but no callchain data. Did "
+                                   "you call 'perf record' without -g?\n");
                         return -1;
                 }
         } else if (!dont_use_callchains && callchain_param.mode != CHAIN_NONE &&
                    !symbol_conf.use_callchain) {
                         symbol_conf.use_callchain = true;
                         if (callchain_register_param(&callchain_param) < 0) {
-                               fprintf(stderr, "Can't register callchain"
-                                               " params\n");
+                               ui__warning("Can't register callchain "
+                                           "params.\n");
                                 return -EINVAL;
                         }
         }
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c

index dcfe8873c9a1b1b0813373aa2ae9365c639a16e7..5177964943e72ecd96881daf3bd48b71c1937faa 100644 (file)
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1637,23 +1637,29 @@ static struct perf_event_ops event_ops = {
         .ordered_samples        = true,
  };
  
-static int read_events(void)
+static void read_events(bool destroy, struct perf_session **psession)
  {
         int err = -EINVAL;
         struct perf_session *session = perf_session__new(input_name, O_RDONLY,
                                                          0, false, &event_ops);
         if (session == NULL)
-               return -ENOMEM;
+               die("No Memory");
  
         if (perf_session__has_traces(session, "record -R")) {
                 err = perf_session__process_events(session, &event_ops);
+               if (err)
+                       die("Failed to process events, error %d", err);
+
                 nr_events      = session->hists.stats.nr_events[0];
                 nr_lost_events = session->hists.stats.total_lost;
                 nr_lost_chunks = session->hists.stats.nr_events[PERF_RECORD_LOST];
         }
  
-       perf_session__delete(session);
-       return err;
+       if (destroy)
+               perf_session__delete(session);
+
+       if (psession)
+               *psession = session;
  }
  
  static void print_bad_events(void)
@@ -1689,9 +1695,10 @@ static void print_bad_events(void)
  static void __cmd_lat(void)
  {
         struct rb_node *next;
+       struct perf_session *session;
  
         setup_pager();
-       read_events();
+       read_events(false, &session);
         sort_lat();
  
         printf("\n ---------------------------------------------------------------------------------------------------------------\n");
@@ -1717,6 +1724,7 @@ static void __cmd_lat(void)
         print_bad_events();
         printf("\n");
  
+       perf_session__delete(session);
  }
  
  static struct trace_sched_handler map_ops  = {
@@ -1731,7 +1739,7 @@ static void __cmd_map(void)
         max_cpu = sysconf(_SC_NPROCESSORS_CONF);
  
         setup_pager();
-       read_events();
+       read_events(true, NULL);
         print_bad_events();
  }
  
@@ -1744,7 +1752,7 @@ static void __cmd_replay(void)
  
         test_calibrations();
  
-       read_events();
+       read_events(true, NULL);
  
         printf("nr_run_events:        %ld\n", nr_run_events);
         printf("nr_sleep_events:      %ld\n", nr_sleep_events);
@@ -1769,7 +1777,7 @@ static void __cmd_replay(void)
  
  
  static const char * const sched_usage[] = {
-       "perf sched [<options>] {record|latency|map|replay|trace}",
+       "perf sched [<options>] {record|latency|map|replay|script}",
         NULL
  };
  
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c

index e02d78cae70f688fc0fc32957d078cf45796ed75..fe02903f7d0f0d11e0dacaf24d03672f7af1a79a 100644 (file)
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -399,7 +399,6 @@ static int perf_config_global(void)
  int perf_config(config_fn_t fn, void *data)
  {
         int ret = 0, found = 0;
-       char *repo_config = NULL;
         const char *home = NULL;
  
         /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
@@ -414,19 +413,32 @@ int perf_config(config_fn_t fn, void *data)
         home = getenv("HOME");
         if (perf_config_global() && home) {
                 char *user_config = strdup(mkpath("%s/.perfconfig", home));
-               if (!access(user_config, R_OK)) {
-                       ret += perf_config_from_file(fn, user_config, data);
-                       found += 1;
+               struct stat st;
+
+               if (user_config == NULL) {
+                       warning("Not enough memory to process %s/.perfconfig, "
+                               "ignoring it.", home);
+                       goto out;
                 }
-               free(user_config);
-       }
  
-       repo_config = perf_pathdup("config");
-       if (!access(repo_config, R_OK)) {
-               ret += perf_config_from_file(fn, repo_config, data);
+               if (stat(user_config, &st) < 0)
+                       goto out_free;
+
+               if (st.st_uid && (st.st_uid != geteuid())) {
+                       warning("File %s not owned by current user or root, "
+                               "ignoring it.", user_config);
+                       goto out_free;
+               }
+
+               if (!st.st_size)
+                       goto out_free;
+
+               ret += perf_config_from_file(fn, user_config, data);
                 found += 1;
+out_free:
+               free(user_config);
         }
-       free(repo_config);
+out:
         if (found == 0)
                 return -1;
         return ret;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c

index b021ea9265c3e39b9f18fb9e65f731516b49642d..e03e7bc8205e10650ac30abaedfcc5239062e2f3 100644 (file)
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -91,6 +91,19 @@ int perf_evlist__add_default(struct perf_evlist *evlist)
         return 0;
  }
  
+void perf_evlist__disable(struct perf_evlist *evlist)
+{
+       int cpu, thread;
+       struct perf_evsel *pos;
+
+       for (cpu = 0; cpu < evlist->cpus->nr; cpu++) {
+               list_for_each_entry(pos, &evlist->entries, node) {
+                       for (thread = 0; thread < evlist->threads->nr; thread++)
+                               ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_DISABLE);
+               }
+       }
+}
+
  int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
  {
         int nfds = evlist->cpus->nr * evlist->threads->nr * evlist->nr_entries;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h

index b2b862374f371d363bc19ed3cc684fde1de9f6d3..ce85ae9ae57a1feb6e7482049afa5dce491c596b 100644 (file)
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -53,6 +53,8 @@ int perf_evlist__alloc_mmap(struct perf_evlist *evlist);
  int perf_evlist__mmap(struct perf_evlist *evlist, int pages, bool overwrite);
  void perf_evlist__munmap(struct perf_evlist *evlist);
  
+void perf_evlist__disable(struct perf_evlist *evlist);
+
  static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
                                          struct cpu_map *cpus,
                                          struct thread_map *threads)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c

index cb2959a3fb43e12f6e7de76c6a7fc6a04221f8b7..d4f3101773db555a19c05611cf1f45e824fc321d 100644 (file)
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -189,8 +189,8 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir,
                           const char *name, bool is_kallsyms)
  {
         const size_t size = PATH_MAX;
-       char *realname, *filename = malloc(size),
-            *linkname = malloc(size), *targetname;
+       char *realname, *filename = zalloc(size),
+            *linkname = zalloc(size), *targetname;
         int len, err = -1;
  
         if (is_kallsyms) {
@@ -254,8 +254,8 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
  int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir)
  {
         const size_t size = PATH_MAX;
-       char *filename = malloc(size),
-            *linkname = malloc(size);
+       char *filename = zalloc(size),
+            *linkname = zalloc(size);
         int err = -1;
  
         if (filename == NULL || linkname == NULL)
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c

index b82d54fa2c566d420a32e2e675c873a6ce7c7720..1c7bfa5fe0a81052fa1718cf996ef80c21d2e96e 100644 (file)
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1820,11 +1820,15 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
                 ret = -ENOMEM;
                 goto error;
         }
-       tev->point.module = strdup(module);
-       if (tev->point.module == NULL) {
-               ret = -ENOMEM;
-               goto error;
+
+       if (module) {
+               tev->point.module = strdup(module);
+               if (tev->point.module == NULL) {
+                       ret = -ENOMEM;
+                       goto error;
+               }
         }
+
         tev->point.offset = pev->point.offset;
         tev->point.retprobe = pev->point.retprobe;
         tev->nargs = pev->nargs;
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c

index 8e0b5a39d8a739720e621521f8b65710b1bb2486..cbc8f215d4b7d518ade4d3e8edee5a35d92be81d 100644 (file)
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -187,14 +187,117 @@ static PyTypeObject pyrf_throttle_event__type = {
         .tp_repr        = (reprfunc)pyrf_throttle_event__repr,
  };
  
+static char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object.");
+
+static PyMemberDef pyrf_lost_event__members[] = {
+       sample_members
+       member_def(lost_event, id, T_ULONGLONG, "event id"),
+       member_def(lost_event, lost, T_ULONGLONG, "number of lost events"),
+       { .name = NULL, },
+};
+
+static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent)
+{
+       PyObject *ret;
+       char *s;
+
+       if (asprintf(&s, "{ type: lost, id: %#" PRIx64 ", "
+                        "lost: %#" PRIx64 " }",
+                    pevent->event.lost.id, pevent->event.lost.lost) < 0) {
+               ret = PyErr_NoMemory();
+       } else {
+               ret = PyString_FromString(s);
+               free(s);
+       }
+       return ret;
+}
+
+static PyTypeObject pyrf_lost_event__type = {
+       PyVarObject_HEAD_INIT(NULL, 0)
+       .tp_name        = "perf.lost_event",
+       .tp_basicsize   = sizeof(struct pyrf_event),
+       .tp_flags       = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+       .tp_doc         = pyrf_lost_event__doc,
+       .tp_members     = pyrf_lost_event__members,
+       .tp_repr        = (reprfunc)pyrf_lost_event__repr,
+};
+
+static char pyrf_read_event__doc[] = PyDoc_STR("perf read event object.");
+
+static PyMemberDef pyrf_read_event__members[] = {
+       sample_members
+       member_def(read_event, pid, T_UINT, "event pid"),
+       member_def(read_event, tid, T_UINT, "event tid"),
+       { .name = NULL, },
+};
+
+static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent)
+{
+       return PyString_FromFormat("{ type: read, pid: %u, tid: %u }",
+                                  pevent->event.read.pid,
+                                  pevent->event.read.tid);
+       /*
+        * FIXME: return the array of read values,
+        * making this method useful ;-)
+        */
+}
+
+static PyTypeObject pyrf_read_event__type = {
+       PyVarObject_HEAD_INIT(NULL, 0)
+       .tp_name        = "perf.read_event",
+       .tp_basicsize   = sizeof(struct pyrf_event),
+       .tp_flags       = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+       .tp_doc         = pyrf_read_event__doc,
+       .tp_members     = pyrf_read_event__members,
+       .tp_repr        = (reprfunc)pyrf_read_event__repr,
+};
+
+static char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object.");
+
+static PyMemberDef pyrf_sample_event__members[] = {
+       sample_members
+       member_def(perf_event_header, type, T_UINT, "event type"),
+       { .name = NULL, },
+};
+
+static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
+{
+       PyObject *ret;
+       char *s;
+
+       if (asprintf(&s, "{ type: sample }") < 0) {
+               ret = PyErr_NoMemory();
+       } else {
+               ret = PyString_FromString(s);
+               free(s);
+       }
+       return ret;
+}
+
+static PyTypeObject pyrf_sample_event__type = {
+       PyVarObject_HEAD_INIT(NULL, 0)
+       .tp_name        = "perf.sample_event",
+       .tp_basicsize   = sizeof(struct pyrf_event),
+       .tp_flags       = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+       .tp_doc         = pyrf_sample_event__doc,
+       .tp_members     = pyrf_sample_event__members,
+       .tp_repr        = (reprfunc)pyrf_sample_event__repr,
+};
+
  static int pyrf_event__setup_types(void)
  {
         int err;
         pyrf_mmap_event__type.tp_new =
         pyrf_task_event__type.tp_new =
         pyrf_comm_event__type.tp_new =
+       pyrf_lost_event__type.tp_new =
+       pyrf_read_event__type.tp_new =
+       pyrf_sample_event__type.tp_new =
         pyrf_throttle_event__type.tp_new = PyType_GenericNew;
         err = PyType_Ready(&pyrf_mmap_event__type);
+       if (err < 0)
+               goto out;
+       err = PyType_Ready(&pyrf_lost_event__type);
         if (err < 0)
                 goto out;
         err = PyType_Ready(&pyrf_task_event__type);
@@ -206,20 +309,26 @@ static int pyrf_event__setup_types(void)
         err = PyType_Ready(&pyrf_throttle_event__type);
         if (err < 0)
                 goto out;
+       err = PyType_Ready(&pyrf_read_event__type);
+       if (err < 0)
+               goto out;
+       err = PyType_Ready(&pyrf_sample_event__type);
+       if (err < 0)
+               goto out;
  out:
         return err;
  }
  
  static PyTypeObject *pyrf_event__type[] = {
         [PERF_RECORD_MMAP]       = &pyrf_mmap_event__type,
-       [PERF_RECORD_LOST]       = &pyrf_mmap_event__type,
+       [PERF_RECORD_LOST]       = &pyrf_lost_event__type,
         [PERF_RECORD_COMM]       = &pyrf_comm_event__type,
         [PERF_RECORD_EXIT]       = &pyrf_task_event__type,
         [PERF_RECORD_THROTTLE]   = &pyrf_throttle_event__type,
         [PERF_RECORD_UNTHROTTLE] = &pyrf_throttle_event__type,
         [PERF_RECORD_FORK]       = &pyrf_task_event__type,
-       [PERF_RECORD_READ]       = &pyrf_mmap_event__type,
-       [PERF_RECORD_SAMPLE]     = &pyrf_mmap_event__type,
+       [PERF_RECORD_READ]       = &pyrf_read_event__type,
+       [PERF_RECORD_SAMPLE]     = &pyrf_sample_event__type,
  };
  
  static PyObject *pyrf_event__new(union perf_event *event)
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py

index bbc982f5dd8b9ced999577ced4822a21c9a28779..95d370074928841b096a37076040834b8c45cdc9 100644 (file)
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -3,9 +3,27 @@
  from distutils.core import setup, Extension
  from os import getenv
  
+from distutils.command.build_ext   import build_ext   as _build_ext
+from distutils.command.install_lib import install_lib as _install_lib
+
+class build_ext(_build_ext):
+    def finalize_options(self):
+        _build_ext.finalize_options(self)
+        self.build_lib  = build_lib
+        self.build_temp = build_tmp
+
+class install_lib(_install_lib):
+    def finalize_options(self):
+        _install_lib.finalize_options(self)
+        self.build_dir = build_lib
+
+
  cflags = ['-fno-strict-aliasing', '-Wno-write-strings']
  cflags += getenv('CFLAGS', '').split()
  
+build_lib = getenv('PYTHON_EXTBUILD_LIB')
+build_tmp = getenv('PYTHON_EXTBUILD_TMP')
+
  perf = Extension('perf',
                   sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c',
                              'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
@@ -21,4 +39,5 @@ setup(name='perf',
        author_email='acme@redhat.com',
        license='GPLv2',
        url='http://perf.wiki.kernel.org',
-      ext_modules=[perf])
+      ext_modules=[perf],
+      cmdclass={'build_ext': build_ext, 'install_lib': install_lib})
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c

index eec196329fd92c2e0567c8af5535ae1a99fcc4b1..a8b53714542a1524811a44120e330bf4d2ba94f7 100644 (file)
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1504,6 +1504,17 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
         dso->adjust_symbols = 0;
  
         if (strncmp(dso->name, "/tmp/perf-", 10) == 0) {
+               struct stat st;
+
+               if (stat(dso->name, &st) < 0)
+                       return -1;
+
+               if (st.st_uid && (st.st_uid != geteuid())) {
+                       pr_warning("File %s not owned by current user or root, "
+                               "ignoring it.\n", dso->name);
+                       return -1;
+               }
+
                 ret = dso__load_perf_map(dso, map, filter);
                 dso->symtab_type = ret > 0 ? SYMTAB__JAVA_JIT :
                                               SYMTAB__NOT_FOUND;
author	Stephen Rothwell <sfr@canb.auug.org.au>
	Thu, 11 Aug 2011 04:27:07 +0000 (14:27 +1000)
committer	Stephen Rothwell <sfr@canb.auug.org.au>
	Thu, 11 Aug 2011 04:27:07 +0000 (14:27 +1000)
Documentation/kernel-parameters.txt		patch \| blob \| history
arch/x86/Kconfig		patch \| blob \| history
arch/x86/include/asm/archrandom.h	[new file with mode: 0644]	patch \| blob
arch/x86/include/asm/cmpxchg_32.h		patch \| blob \| history
arch/x86/include/asm/cmpxchg_64.h		patch \| blob \| history
arch/x86/include/asm/desc.h		patch \| blob \| history
arch/x86/include/asm/elf.h		patch \| blob \| history
arch/x86/include/asm/paravirt_types.h		patch \| blob \| history
arch/x86/include/asm/pgtable_types.h		patch \| blob \| history
arch/x86/include/asm/ptrace.h		patch \| blob \| history
arch/x86/include/asm/spinlock.h		patch \| blob \| history
arch/x86/include/asm/spinlock_types.h		patch \| blob \| history
arch/x86/include/asm/x86_init.h		patch \| blob \| history
arch/x86/kernel/apic/x2apic_uv_x.c		patch \| blob \| history
arch/x86/kernel/cpu/Makefile		patch \| blob \| history
arch/x86/kernel/cpu/amd.c		patch \| blob \| history
arch/x86/kernel/cpu/common.c		patch \| blob \| history
arch/x86/kernel/cpu/cpu.h		patch \| blob \| history
arch/x86/kernel/cpu/perf_event_intel.c		patch \| blob \| history
arch/x86/kernel/cpu/rdrand.c	[new file with mode: 0644]	patch \| blob
arch/x86/kernel/paravirt.c		patch \| blob \| history
arch/x86/kernel/step.c		patch \| blob \| history
arch/x86/kernel/sys_x86_64.c		patch \| blob \| history
arch/x86/kernel/vmlinux.lds.S		patch \| blob \| history
arch/x86/kernel/vsyscall_64.c		patch \| blob \| history
arch/x86/kernel/vsyscall_trace.h	[new file with mode: 0644]	patch \| blob
arch/x86/kernel/x86_init.c		patch \| blob \| history
arch/x86/mm/fault.c		patch \| blob \| history
arch/x86/mm/init.c		patch \| blob \| history
arch/x86/mm/mmap.c		patch \| blob \| history
arch/x86/platform/olpc/olpc.c		patch \| blob \| history
arch/x86/vdso/vdso.S		patch \| blob \| history
arch/x86/vdso/vma.c		patch \| blob \| history
arch/x86/xen/enlighten.c		patch \| blob \| history
arch/x86/xen/mmu.c		patch \| blob \| history
drivers/char/random.c		patch \| blob \| history
drivers/rtc/interface.c		patch \| blob \| history
include/linux/irq.h		patch \| blob \| history
include/linux/irqdesc.h		patch \| blob \| history
include/linux/random.h		patch \| blob \| history
include/linux/rtc.h		patch \| blob \| history
kernel/Makefile		patch \| blob \| history
kernel/irq/generic-chip.c		patch \| blob \| history
kernel/irq/irqdesc.c		patch \| blob \| history
kernel/irq/manage.c		patch \| blob \| history
kernel/lockdep.c		patch \| blob \| history
kernel/trace/Kconfig		patch \| blob \| history
tools/perf/Makefile		patch \| blob \| history
tools/perf/builtin-lock.c		patch \| blob \| history
tools/perf/builtin-record.c		patch \| blob \| history
tools/perf/builtin-report.c		patch \| blob \| history
tools/perf/builtin-sched.c		patch \| blob \| history
tools/perf/util/config.c		patch \| blob \| history
tools/perf/util/evlist.c		patch \| blob \| history
tools/perf/util/evlist.h		patch \| blob \| history
tools/perf/util/header.c		patch \| blob \| history
tools/perf/util/probe-event.c		patch \| blob \| history
tools/perf/util/python.c		patch \| blob \| history
tools/perf/util/setup.py		patch \| blob \| history
tools/perf/util/symbol.c		patch \| blob \| history