From: Dave Hansen Date: Fri, 12 Feb 2016 21:02:36 +0000 (-0800) Subject: x86/mm/pkeys: Allow kernel to modify user pkey rights register X-Git-Tag: next-20160301~40^2~13^2~2 X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=8459429693395ca9e8d18101300b120ad9171795;p=karo-tx-linux.git x86/mm/pkeys: Allow kernel to modify user pkey rights register The Protection Key Rights for User memory (PKRU) is a 32-bit user-accessible register. It contains two bits for each protection key: one to write-disable (WD) access to memory covered by the key and another to access-disable (AD). Userspace can read/write the register with the RDPKRU and WRPKRU instructions. But, the register is saved and restored with the XSAVE family of instructions, which means we have to treat it like a floating point register. The kernel needs to write to the register if it wants to implement execute-only memory or if it implements a system call to change PKRU. To do this, we need to create a 'pkru_state' buffer, read the old contents in to it, modify it, and then tell the FPU code that there is modified data in there so it can (possibly) move the buffer back in to the registers. This uses the fpu__xfeature_set_state() function that we defined in the previous patch. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20160212210236.0BE13217@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 3cbfae80abb2..1ff49ec29ece 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -921,16 +921,17 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) #define PKRU_AD_BIT 0x1 #define PKRU_WD_BIT 0x2 +#define PKRU_BITS_PER_PKEY 2 static inline bool __pkru_allows_read(u32 pkru, u16 pkey) { - int pkru_pkey_bits = pkey * 2; + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); } static inline bool __pkru_allows_write(u32 pkru, u16 pkey) { - int pkru_pkey_bits = pkey * 2; + int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY; /* * Access-disable disables writes too so we need to check * both bits here. diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 04243c23380c..5061aec2ed5e 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -3,4 +3,7 @@ #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) +extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val); + #endif /*_ASM_X86_PKEYS_H */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 30d144f01eb9..50813c35e9d9 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -5,6 +5,7 @@ */ #include #include +#include #include #include @@ -855,3 +856,76 @@ out: */ fpu__current_fpstate_write_end(); } + +#define NR_VALID_PKRU_BITS (CONFIG_NR_PROTECTION_KEYS * 2) +#define PKRU_VALID_MASK (NR_VALID_PKRU_BITS - 1) + +/* + * This will go out and modify the XSAVE buffer so that PKRU is + * set to a particular state for access to 'pkey'. + * + * PKRU state does affect kernel access to user memory. We do + * not modfiy PKRU *itself* here, only the XSAVE state that will + * be restored in to PKRU when we return back to userspace. + */ +int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, + unsigned long init_val) +{ + struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; + struct pkru_state *old_pkru_state; + struct pkru_state new_pkru_state; + int pkey_shift = (pkey * PKRU_BITS_PER_PKEY); + u32 new_pkru_bits = 0; + + if (!validate_pkey(pkey)) + return -EINVAL; + /* + * This check implies XSAVE support. OSPKE only gets + * set if we enable XSAVE and we enable PKU in XCR0. + */ + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return -EINVAL; + + /* Set the bits we need in PKRU */ + if (init_val & PKEY_DISABLE_ACCESS) + new_pkru_bits |= PKRU_AD_BIT; + if (init_val & PKEY_DISABLE_WRITE) + new_pkru_bits |= PKRU_WD_BIT; + + /* Shift the bits in to the correct place in PKRU for pkey. */ + new_pkru_bits <<= pkey_shift; + + /* Locate old copy of the state in the xsave buffer */ + old_pkru_state = get_xsave_addr(xsave, XFEATURE_MASK_PKRU); + + /* + * When state is not in the buffer, it is in the init + * state, set it manually. Otherwise, copy out the old + * state. + */ + if (!old_pkru_state) + new_pkru_state.pkru = 0; + else + new_pkru_state.pkru = old_pkru_state->pkru; + + /* mask off any old bits in place */ + new_pkru_state.pkru &= ~((PKRU_AD_BIT|PKRU_WD_BIT) << pkey_shift); + /* Set the newly-requested bits */ + new_pkru_state.pkru |= new_pkru_bits; + + /* + * We could theoretically live without zeroing pkru.pad. + * The current XSAVE feature state definition says that + * only bytes 0->3 are used. But we do not want to + * chance leaking kernel stack out to userspace in case a + * memcpy() of the whole xsave buffer was done. + * + * They're in the same cacheline anyway. + */ + new_pkru_state.pad = 0; + + fpu__xfeature_set_state(XFEATURE_MASK_PKRU, &new_pkru_state, + sizeof(new_pkru_state)); + + return 0; +} diff --git a/include/linux/pkeys.h b/include/linux/pkeys.h index 55e465f93a28..fc325b367bd0 100644 --- a/include/linux/pkeys.h +++ b/include/linux/pkeys.h @@ -4,6 +4,11 @@ #include #include +#define PKEY_DISABLE_ACCESS 0x1 +#define PKEY_DISABLE_WRITE 0x2 +#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ + PKEY_DISABLE_WRITE) + #ifdef CONFIG_ARCH_HAS_PKEYS #include #else /* ! CONFIG_ARCH_HAS_PKEYS */