config AUDIT_ARCH
def_bool y
-config S390_EXEC_PROTECT
- def_bool y
- prompt "Data execute protection"
- help
- This option allows to enable a buffer overflow protection for user
- space programs and it also selects the addressing mode option above.
- The kernel parameter noexec=on will enable this feature and also
- switch the addressing modes, default is disabled. Enabling this (via
- kernel parameter) on machines earlier than IBM System z9 this will
- reduce system performance.
-
comment "Code generation options"
choice
} while (0)
#endif /* __s390x__ */
-/*
- * An executable for which elf_read_implies_exec() returns TRUE will
- * have the READ_IMPLIES_EXEC personality flag set automatically.
- */
-#define elf_read_implies_exec(ex, executable_stack) \
-({ \
- if (current->mm->context.noexec && \
- executable_stack != EXSTACK_DISABLE_X) \
- disable_noexec(current->mm, current); \
- current->mm->context.noexec == 0; \
-})
-
#define STACK_RND_MASK 0x7ffUL
#define ARCH_DLINFO \
{
pmd_t *pmdp = (pmd_t *) ptep;
- if (!MACHINE_HAS_IDTE) {
- __pmd_csp(pmdp);
- if (mm->context.noexec) {
- pmdp = get_shadow_table(pmdp);
- __pmd_csp(pmdp);
- }
- return;
- }
-
- __pmd_idte(address, pmdp);
- if (mm->context.noexec) {
- pmdp = get_shadow_table(pmdp);
+ if (MACHINE_HAS_IDTE)
__pmd_idte(address, pmdp);
- }
- return;
+ else
+ __pmd_csp(pmdp);
}
#define huge_ptep_set_access_flags(__vma, __addr, __ptep, __entry, __dirty) \
/* Address space pointer. */
__u32 kernel_asce; /* 0x02ac */
__u32 user_asce; /* 0x02b0 */
- __u32 user_exec_asce; /* 0x02b4 */
+ __u8 pad_0x02b4[0x02b8-0x02b4]; /* 0x02b4 */
/* SMP info area */
__u32 cpu_nr; /* 0x02b8 */
/* Address space pointer. */
__u64 kernel_asce; /* 0x0310 */
__u64 user_asce; /* 0x0318 */
- __u64 user_exec_asce; /* 0x0320 */
+ __u8 pad_0x0320[0x0328-0x0320]; /* 0x0320 */
/* SMP info area */
__u32 cpu_nr; /* 0x0328 */
atomic_t attach_count;
unsigned int flush_mm;
spinlock_t list_lock;
- struct list_head crst_list;
struct list_head pgtable_list;
unsigned long asce_bits;
unsigned long asce_limit;
unsigned long vdso_base;
- int noexec;
int has_pgste; /* The mmu context has extended page tables */
int alloc_pgste; /* cloned contexts will have extended page tables */
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
.context.list_lock = __SPIN_LOCK_UNLOCKED(name.context.list_lock), \
- .context.crst_list = LIST_HEAD_INIT(name.context.crst_list), \
.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list),
#endif
* and if has_pgste is set, it will create extended page
* tables.
*/
- mm->context.noexec = 0;
mm->context.has_pgste = 1;
mm->context.alloc_pgste = 1;
} else {
- mm->context.noexec = (user_mode == SECONDARY_SPACE_MODE);
mm->context.has_pgste = 0;
mm->context.alloc_pgste = 0;
}
S390_lowcore.user_asce = mm->context.asce_bits | __pa(pgd);
if (user_mode != HOME_SPACE_MODE) {
/* Load primary space page table origin. */
- pgd = mm->context.noexec ? get_shadow_table(pgd) : pgd;
- S390_lowcore.user_exec_asce = mm->context.asce_bits | __pa(pgd);
asm volatile(LCTL_OPCODE" 1,1,%0\n"
- : : "m" (S390_lowcore.user_exec_asce) );
+ : : "m" (S390_lowcore.user_asce) );
} else
/* Load home space page table origin. */
asm volatile(LCTL_OPCODE" 13,13,%0"
#define check_pgt_cache() do {} while (0)
-unsigned long *crst_table_alloc(struct mm_struct *, int);
+unsigned long *crst_table_alloc(struct mm_struct *);
void crst_table_free(struct mm_struct *, unsigned long *);
void crst_table_free_rcu(struct mm_struct *, unsigned long *);
unsigned long *page_table_alloc(struct mm_struct *);
void page_table_free(struct mm_struct *, unsigned long *);
void page_table_free_rcu(struct mm_struct *, unsigned long *);
-void disable_noexec(struct mm_struct *, struct task_struct *);
static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
{
static inline void crst_table_init(unsigned long *crst, unsigned long entry)
{
clear_table(crst, entry, sizeof(unsigned long)*2048);
- crst = get_shadow_table(crst);
- if (crst)
- clear_table(crst, entry, sizeof(unsigned long)*2048);
}
#ifndef __s390x__
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
{
- unsigned long *table = crst_table_alloc(mm, mm->context.noexec);
+ unsigned long *table = crst_table_alloc(mm);
if (table)
crst_table_init(table, _REGION3_ENTRY_EMPTY);
return (pud_t *) table;
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr)
{
- unsigned long *table = crst_table_alloc(mm, mm->context.noexec);
+ unsigned long *table = crst_table_alloc(mm);
if (table)
crst_table_init(table, _SEGMENT_ENTRY_EMPTY);
return (pmd_t *) table;
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
{
pgd_populate_kernel(mm, pgd, pud);
- if (mm->context.noexec) {
- pgd = get_shadow_table(pgd);
- pud = get_shadow_table(pud);
- pgd_populate_kernel(mm, pgd, pud);
- }
}
static inline void pud_populate_kernel(struct mm_struct *mm,
static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
{
pud_populate_kernel(mm, pud, pmd);
- if (mm->context.noexec) {
- pud = get_shadow_table(pud);
- pmd = get_shadow_table(pmd);
- pud_populate_kernel(mm, pud, pmd);
- }
}
#endif /* __s390x__ */
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
spin_lock_init(&mm->context.list_lock);
- INIT_LIST_HEAD(&mm->context.crst_list);
INIT_LIST_HEAD(&mm->context.pgtable_list);
- return (pgd_t *)
- crst_table_alloc(mm, user_mode == SECONDARY_SPACE_MODE);
+ return (pgd_t *) crst_table_alloc(mm);
}
#define pgd_free(mm, pgd) crst_table_free(mm, (unsigned long *) pgd)
pmd_t *pmd, pgtable_t pte)
{
pmd_populate_kernel(mm, pmd, pte);
- if (mm->context.noexec) {
- pmd = get_shadow_table(pmd);
- pmd_populate_kernel(mm, pmd, pte + PTRS_PER_PTE);
- }
}
#define pmd_pgtable(pmd) \
#define _PAGE_TYPE_FILE 0x601 /* bit 0x002 is used for offset !! */
#define _PAGE_TYPE_RO 0x200
#define _PAGE_TYPE_RW 0x000
-#define _PAGE_TYPE_EX_RO 0x202
-#define _PAGE_TYPE_EX_RW 0x002
/*
* Only four types for huge pages, using the invalid bit and protection bit
* _PAGE_TYPE_FILE 11?1 -> 11?1
* _PAGE_TYPE_RO 0100 -> 1100
* _PAGE_TYPE_RW 0000 -> 1000
- * _PAGE_TYPE_EX_RO 0110 -> 1110
- * _PAGE_TYPE_EX_RW 0010 -> 1010
*
* pte_none is true for bits combinations 1000, 1010, 1100, 1110
* pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001
#define PAGE_NONE __pgprot(_PAGE_TYPE_NONE)
#define PAGE_RO __pgprot(_PAGE_TYPE_RO)
#define PAGE_RW __pgprot(_PAGE_TYPE_RW)
-#define PAGE_EX_RO __pgprot(_PAGE_TYPE_EX_RO)
-#define PAGE_EX_RW __pgprot(_PAGE_TYPE_EX_RW)
#define PAGE_KERNEL PAGE_RW
#define PAGE_COPY PAGE_RO
/*
- * Dependent on the EXEC_PROTECT option s390 can do execute protection.
- * Write permission always implies read permission. In theory with a
- * primary/secondary page table execute only can be implemented but
- * it would cost an additional bit in the pte to distinguish all the
- * different pte types. To avoid that execute permission currently
- * implies read permission as well.
+ * On s390 the page table entry has an invalid bit and a read-only bit.
+ * Read permission implies execute permission and write permission
+ * implies read permission.
*/
/*xwr*/
#define __P000 PAGE_NONE
#define __P001 PAGE_RO
#define __P010 PAGE_RO
#define __P011 PAGE_RO
-#define __P100 PAGE_EX_RO
-#define __P101 PAGE_EX_RO
-#define __P110 PAGE_EX_RO
-#define __P111 PAGE_EX_RO
+#define __P100 PAGE_RO
+#define __P101 PAGE_RO
+#define __P110 PAGE_RO
+#define __P111 PAGE_RO
#define __S000 PAGE_NONE
#define __S001 PAGE_RO
#define __S010 PAGE_RW
#define __S011 PAGE_RW
-#define __S100 PAGE_EX_RO
-#define __S101 PAGE_EX_RO
-#define __S110 PAGE_EX_RW
-#define __S111 PAGE_EX_RW
-
-#ifndef __s390x__
-# define PxD_SHADOW_SHIFT 1
-#else /* __s390x__ */
-# define PxD_SHADOW_SHIFT 2
-#endif /* __s390x__ */
-
-static inline void *get_shadow_table(void *table)
-{
- unsigned long addr, offset;
- struct page *page;
-
- addr = (unsigned long) table;
- offset = addr & ((PAGE_SIZE << PxD_SHADOW_SHIFT) - 1);
- page = virt_to_page((void *)(addr ^ offset));
- return (void *)(addr_t)(page->index ? (page->index | offset) : 0UL);
-}
+#define __S100 PAGE_RO
+#define __S101 PAGE_RO
+#define __S110 PAGE_RW
+#define __S111 PAGE_RW
/*
* Certain architectures need to do special things when PTEs
pte_t *ptep, pte_t entry)
{
*ptep = entry;
- if (mm->context.noexec) {
- if (!(pte_val(entry) & _PAGE_INVALID) &&
- (pte_val(entry) & _PAGE_SWX))
- pte_val(entry) |= _PAGE_RO;
- else
- pte_val(entry) = _PAGE_TYPE_EMPTY;
- ptep[PTRS_PER_PTE] = entry;
- }
}
/*
static inline void pgd_clear(pgd_t * pgd)
{
- pgd_t *shadow = get_shadow_table(pgd);
-
pgd_clear_kernel(pgd);
- if (shadow)
- pgd_clear_kernel(shadow);
}
static inline void pud_clear_kernel(pud_t *pud)
static inline void pud_clear(pud_t *pud)
{
- pud_t *shadow = get_shadow_table(pud);
-
pud_clear_kernel(pud);
- if (shadow)
- pud_clear_kernel(shadow);
}
-
#endif /* __s390x__ */
static inline void pmd_clear_kernel(pmd_t * pmdp)
static inline void pmd_clear(pmd_t *pmd)
{
- pmd_t *shadow = get_shadow_table(pmd);
-
pmd_clear_kernel(pmd);
- if (shadow)
- pmd_clear_kernel(shadow);
}
static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
pte_val(*ptep) = _PAGE_TYPE_EMPTY;
- if (mm->context.noexec)
- pte_val(ptep[PTRS_PER_PTE]) = _PAGE_TYPE_EMPTY;
}
/*
}
__ptep_ipte(address, ptep);
pte_val(*ptep) = _PAGE_TYPE_EMPTY;
- if (mm->context.noexec) {
- __ptep_ipte(address, ptep + PTRS_PER_PTE);
- pte_val(*(ptep + PTRS_PER_PTE)) = _PAGE_TYPE_EMPTY;
- }
}
/*
* on all cpus instead of doing a local flush if the mm
* only ran on the local cpu.
*/
- if (MACHINE_HAS_IDTE) {
- if (mm->context.noexec)
- __tlb_flush_idte((unsigned long)
- get_shadow_table(mm->pgd) |
- mm->context.asce_bits);
+ if (MACHINE_HAS_IDTE)
__tlb_flush_idte((unsigned long) mm->pgd |
mm->context.asce_bits);
- return;
- }
- __tlb_flush_full(mm);
+ else
+ __tlb_flush_full(mm);
}
static inline void __tlb_flush_mm_cond(struct mm_struct * mm)
DEFINE(__LC_KERNEL_STACK, offsetof(struct _lowcore, kernel_stack));
DEFINE(__LC_ASYNC_STACK, offsetof(struct _lowcore, async_stack));
DEFINE(__LC_PANIC_STACK, offsetof(struct _lowcore, panic_stack));
- DEFINE(__LC_KERNEL_ASCE, offsetof(struct _lowcore, kernel_asce));
- DEFINE(__LC_USER_ASCE, offsetof(struct _lowcore, user_asce));
- DEFINE(__LC_USER_EXEC_ASCE, offsetof(struct _lowcore, user_exec_asce));
DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock));
DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock));
DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags));
*/
static int __init early_parse_switch_amode(char *p)
{
- if (user_mode != SECONDARY_SPACE_MODE)
- user_mode = PRIMARY_SPACE_MODE;
+ user_mode = PRIMARY_SPACE_MODE;
return 0;
}
early_param("switch_amode", early_parse_switch_amode);
{
if (p && strcmp(p, "primary") == 0)
user_mode = PRIMARY_SPACE_MODE;
-#ifdef CONFIG_S390_EXEC_PROTECT
- else if (p && strcmp(p, "secondary") == 0)
- user_mode = SECONDARY_SPACE_MODE;
-#endif
else if (!p || strcmp(p, "home") == 0)
user_mode = HOME_SPACE_MODE;
else
}
early_param("user_mode", early_parse_user_mode);
-#ifdef CONFIG_S390_EXEC_PROTECT
-/*
- * Enable execute protection?
- */
-static int __init early_parse_noexec(char *p)
-{
- if (!strncmp(p, "off", 3))
- return 0;
- user_mode = SECONDARY_SPACE_MODE;
- return 0;
-}
-early_param("noexec", early_parse_noexec);
-#endif /* CONFIG_S390_EXEC_PROTECT */
-
static void setup_addressing_mode(void)
{
- if (user_mode == SECONDARY_SPACE_MODE) {
- if (set_amode_and_uaccess(PSW_ASC_SECONDARY,
- PSW32_ASC_SECONDARY))
- pr_info("Execute protection active, "
- "mvcos available\n");
- else
- pr_info("Execute protection active, "
- "mvcos not available\n");
- } else if (user_mode == PRIMARY_SPACE_MODE) {
+ if (user_mode == PRIMARY_SPACE_MODE) {
if (set_amode_and_uaccess(PSW_ASC_PRIMARY, PSW32_ASC_PRIMARY))
pr_info("Address spaces switched, "
"mvcos available\n");
force_sig_info(SIGBUS, &si, tsk);
}
-#ifdef CONFIG_S390_EXEC_PROTECT
-static noinline int signal_return(struct pt_regs *regs, long int_code,
- unsigned long trans_exc_code)
-{
- u16 instruction;
- int rc;
-
- rc = __get_user(instruction, (u16 __user *) regs->psw.addr);
-
- if (!rc && instruction == 0x0a77) {
- clear_tsk_thread_flag(current, TIF_PER_TRAP);
- if (is_compat_task())
- sys32_sigreturn();
- else
- sys_sigreturn();
- } else if (!rc && instruction == 0x0aad) {
- clear_tsk_thread_flag(current, TIF_PER_TRAP);
- if (is_compat_task())
- sys32_rt_sigreturn();
- else
- sys_rt_sigreturn();
- } else
- do_sigsegv(regs, int_code, SEGV_MAPERR, trans_exc_code);
- return 0;
-}
-#endif /* CONFIG_S390_EXEC_PROTECT */
-
static noinline void do_fault_error(struct pt_regs *regs, long int_code,
unsigned long trans_exc_code, int fault)
{
switch (fault) {
case VM_FAULT_BADACCESS:
-#ifdef CONFIG_S390_EXEC_PROTECT
- if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY &&
- (trans_exc_code & 3) == 0) {
- signal_return(regs, int_code, trans_exc_code);
- break;
- }
-#endif /* CONFIG_S390_EXEC_PROTECT */
case VM_FAULT_BADMAP:
/* Bad memory access. Check if it is kernel or user space. */
if (regs->psw.mask & PSW_MASK_PSTATE) {
int access, fault;
access = VM_READ | VM_EXEC | VM_WRITE;
-#ifdef CONFIG_S390_EXEC_PROTECT
- if ((regs->psw.mask & PSW_MASK_ASC) == PSW_ASC_SECONDARY &&
- (trans_exc_code & 3) == 0)
- access = VM_EXEC;
-#endif
fault = do_exception(regs, access, trans_exc_code);
if (unlikely(fault))
do_fault_error(regs, pgm_int_code & 255, trans_exc_code, fault);
pte_t *pteptr, pte_t pteval)
{
pmd_t *pmdp = (pmd_t *) pteptr;
- pte_t shadow_pteval = pteval;
unsigned long mask;
if (!MACHINE_HAS_HPAGE) {
mask = pte_val(pteval) &
(_SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO);
pte_val(pteval) = (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
- if (mm->context.noexec) {
- pteptr += PTRS_PER_PTE;
- pte_val(shadow_pteval) =
- (_SEGMENT_ENTRY + __pa(pteptr)) | mask;
- }
}
pmd_val(*pmdp) = pte_val(pteval);
- if (mm->context.noexec) {
- pmdp = get_shadow_table(pmdp);
- pmd_val(*pmdp) = pte_val(shadow_pteval);
- }
}
int arch_prepare_hugepage(struct page *page)
static DEFINE_PER_CPU(struct rcu_table_freelist *, rcu_table_freelist);
static void __page_table_free(struct mm_struct *mm, unsigned long *table);
-static void __crst_table_free(struct mm_struct *mm, unsigned long *table);
static struct rcu_table_freelist *rcu_table_freelist_get(struct mm_struct *mm)
{
while (batch->pgt_index > 0)
__page_table_free(batch->mm, batch->table[--batch->pgt_index]);
while (batch->crst_index < RCU_FREELIST_SIZE)
- __crst_table_free(batch->mm, batch->table[batch->crst_index++]);
+ crst_table_free(batch->mm, batch->table[batch->crst_index++]);
free_page((unsigned long) batch);
}
}
early_param("vmalloc", parse_vmalloc);
-unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
+unsigned long *crst_table_alloc(struct mm_struct *mm)
{
struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
if (!page)
return NULL;
- page->index = 0;
- if (noexec) {
- struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
- if (!shadow) {
- __free_pages(page, ALLOC_ORDER);
- return NULL;
- }
- page->index = page_to_phys(shadow);
- }
- spin_lock_bh(&mm->context.list_lock);
- list_add(&page->lru, &mm->context.crst_list);
- spin_unlock_bh(&mm->context.list_lock);
return (unsigned long *) page_to_phys(page);
}
-static void __crst_table_free(struct mm_struct *mm, unsigned long *table)
-{
- unsigned long *shadow = get_shadow_table(table);
-
- if (shadow)
- free_pages((unsigned long) shadow, ALLOC_ORDER);
- free_pages((unsigned long) table, ALLOC_ORDER);
-}
-
void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
- struct page *page = virt_to_page(table);
-
- spin_lock_bh(&mm->context.list_lock);
- list_del(&page->lru);
- spin_unlock_bh(&mm->context.list_lock);
- __crst_table_free(mm, table);
+ free_pages((unsigned long) table, ALLOC_ORDER);
}
void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table)
{
struct rcu_table_freelist *batch;
- struct page *page = virt_to_page(table);
- spin_lock_bh(&mm->context.list_lock);
- list_del(&page->lru);
- spin_unlock_bh(&mm->context.list_lock);
if (atomic_read(&mm->mm_users) < 2 &&
cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
- __crst_table_free(mm, table);
+ crst_table_free(mm, table);
return;
}
batch = rcu_table_freelist_get(mm);
if (!batch) {
smp_call_function(smp_sync, NULL, 1);
- __crst_table_free(mm, table);
+ crst_table_free(mm, table);
return;
}
batch->table[--batch->crst_index] = table;
BUG_ON(limit > (1UL << 53));
repeat:
- table = crst_table_alloc(mm, mm->context.noexec);
+ table = crst_table_alloc(mm);
if (!table)
return -ENOMEM;
spin_lock_bh(&mm->page_table_lock);
unsigned long *table;
unsigned long bits;
- bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
+ bits = (mm->context.has_pgste) ? 3UL : 1UL;
spin_lock_bh(&mm->context.list_lock);
page = NULL;
if (!list_empty(&mm->context.pgtable_list)) {
struct page *page;
unsigned long bits;
- bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
+ bits = (mm->context.has_pgste) ? 3UL : 1UL;
bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock_bh(&mm->context.list_lock);
page_table_free(mm, table);
return;
}
- bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
+ bits = (mm->context.has_pgste) ? 3UL : 1UL;
bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock_bh(&mm->context.list_lock);
rcu_table_freelist_finish();
}
-void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
-{
- struct page *page;
-
- spin_lock_bh(&mm->context.list_lock);
- /* Free shadow region and segment tables. */
- list_for_each_entry(page, &mm->context.crst_list, lru)
- if (page->index) {
- free_pages((unsigned long) page->index, ALLOC_ORDER);
- page->index = 0;
- }
- /* "Free" second halves of page tables. */
- list_for_each_entry(page, &mm->context.pgtable_list, lru)
- page->flags &= ~SECOND_HALVES;
- spin_unlock_bh(&mm->context.list_lock);
- mm->context.noexec = 0;
- update_mm(mm, tsk);
-}
-
/*
* switch on pgstes for its userspace process (for kvm)
*/