2 * Kernel Probes (KProbes)
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 * Copyright IBM Corp. 2002, 2006
20 * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com>
23 #include <linux/kprobes.h>
24 #include <linux/ptrace.h>
25 #include <linux/preempt.h>
26 #include <linux/stop_machine.h>
27 #include <linux/kdebug.h>
28 #include <linux/uaccess.h>
29 #include <asm/cacheflush.h>
30 #include <asm/sections.h>
31 #include <linux/module.h>
32 #include <linux/slab.h>
33 #include <linux/hardirq.h>
35 DEFINE_PER_CPU(struct kprobe *, current_kprobe);
36 DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
38 struct kretprobe_blackpoint kretprobe_blacklist[] = { };
40 DEFINE_INSN_CACHE_OPS(dmainsn);
42 static void *alloc_dmainsn_page(void)
44 return (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
47 static void free_dmainsn_page(void *page)
49 free_page((unsigned long)page);
52 struct kprobe_insn_cache kprobe_dmainsn_slots = {
53 .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
54 .alloc = alloc_dmainsn_page,
55 .free = free_dmainsn_page,
56 .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
57 .insn_size = MAX_INSN_SIZE,
60 static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn)
62 switch (insn[0] >> 8) {
63 case 0x0c: /* bassm */
67 case 0xac: /* stnsm */
68 case 0xad: /* stosm */
73 case 0xb25a: /* bsa */
74 case 0xb240: /* bakr */
75 case 0xb258: /* bsg */
78 case 0xb98d: /* epsw */
84 static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
86 /* default fixup method */
87 int fixup = FIXUP_PSW_NORMAL;
89 switch (insn[0] >> 8) {
92 fixup = FIXUP_RETURN_REGISTER;
93 /* if r2 = 0, no branch will be taken */
94 if ((insn[0] & 0x0f) == 0)
95 fixup |= FIXUP_BRANCH_NOT_TAKEN;
99 fixup = FIXUP_BRANCH_NOT_TAKEN;
103 fixup = FIXUP_RETURN_REGISTER;
108 case 0x87: /* bxle */
109 fixup = FIXUP_BRANCH_NOT_TAKEN;
111 case 0x82: /* lpsw */
112 fixup = FIXUP_NOT_REQUIRED;
114 case 0xb2: /* lpswe */
115 if ((insn[0] & 0xff) == 0xb2)
116 fixup = FIXUP_NOT_REQUIRED;
118 case 0xa7: /* bras */
119 if ((insn[0] & 0x0f) == 0x05)
120 fixup |= FIXUP_RETURN_REGISTER;
123 if ((insn[0] & 0x0f) == 0x05) /* brasl */
124 fixup |= FIXUP_RETURN_REGISTER;
127 switch (insn[2] & 0xff) {
128 case 0x44: /* bxhg */
129 case 0x45: /* bxleg */
130 fixup = FIXUP_BRANCH_NOT_TAKEN;
134 case 0xe3: /* bctg */
135 if ((insn[2] & 0xff) == 0x46)
136 fixup = FIXUP_BRANCH_NOT_TAKEN;
139 switch (insn[2] & 0xff) {
140 case 0xe5: /* clgrb */
141 case 0xe6: /* cgrb */
143 case 0xf7: /* clrb */
144 case 0xfc: /* cgib */
145 case 0xfd: /* cglib */
147 case 0xff: /* clib */
148 fixup = FIXUP_BRANCH_NOT_TAKEN;
156 static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn)
158 /* Check if we have a RIL-b or RIL-c format instruction which
159 * we need to modify in order to avoid instruction emulation. */
160 switch (insn[0] >> 8) {
162 if ((insn[0] & 0x0f) == 0x00) /* larl */
166 switch (insn[0] & 0x0f) {
167 case 0x02: /* llhrl */
168 case 0x04: /* lghrl */
169 case 0x05: /* lhrl */
170 case 0x06: /* llghrl */
171 case 0x07: /* sthrl */
172 case 0x08: /* lgrl */
173 case 0x0b: /* stgrl */
174 case 0x0c: /* lgfrl */
176 case 0x0e: /* llgfrl */
177 case 0x0f: /* strl */
182 switch (insn[0] & 0x0f) {
183 case 0x00: /* exrl */
184 case 0x02: /* pfdrl */
185 case 0x04: /* cghrl */
186 case 0x05: /* chrl */
187 case 0x06: /* clghrl */
188 case 0x07: /* clhrl */
189 case 0x08: /* cgrl */
190 case 0x0a: /* clgrl */
191 case 0x0c: /* cgfrl */
193 case 0x0e: /* clgfrl */
194 case 0x0f: /* clrl */
202 static void __kprobes copy_instruction(struct kprobe *p)
207 memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2);
208 if (!is_insn_relative_long(p->ainsn.insn))
211 * For pc-relative instructions in RIL-b or RIL-c format patch the
212 * RI2 displacement field. We have already made sure that the insn
213 * slot for the patched instruction is within the same 2GB area
214 * as the original instruction (either kernel image or module area).
215 * Therefore the new displacement will always fit.
217 disp = *(s32 *)&p->ainsn.insn[1];
218 addr = (u64)(unsigned long)p->addr;
219 new_addr = (u64)(unsigned long)p->ainsn.insn;
220 new_disp = ((addr + (disp * 2)) - new_addr) / 2;
221 *(s32 *)&p->ainsn.insn[1] = new_disp;
224 static inline int is_kernel_addr(void *addr)
226 return addr < (void *)_end;
229 static inline int is_module_addr(void *addr)
232 BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
233 if (addr < (void *)MODULES_VADDR)
235 if (addr > (void *)MODULES_END)
241 static int __kprobes s390_get_insn_slot(struct kprobe *p)
244 * Get an insn slot that is within the same 2GB area like the original
245 * instruction. That way instructions with a 32bit signed displacement
246 * field can be patched and executed within the insn slot.
248 p->ainsn.insn = NULL;
249 if (is_kernel_addr(p->addr))
250 p->ainsn.insn = get_dmainsn_slot();
251 if (is_module_addr(p->addr))
252 p->ainsn.insn = get_insn_slot();
253 return p->ainsn.insn ? 0 : -ENOMEM;
256 static void __kprobes s390_free_insn_slot(struct kprobe *p)
260 if (is_kernel_addr(p->addr))
261 free_dmainsn_slot(p->ainsn.insn, 0);
263 free_insn_slot(p->ainsn.insn, 0);
264 p->ainsn.insn = NULL;
267 int __kprobes arch_prepare_kprobe(struct kprobe *p)
269 if ((unsigned long) p->addr & 0x01)
271 /* Make sure the probe isn't going on a difficult instruction */
272 if (is_prohibited_opcode(p->addr))
274 if (s390_get_insn_slot(p))
276 p->opcode = *p->addr;
281 struct ins_replace_args {
282 kprobe_opcode_t *ptr;
283 kprobe_opcode_t opcode;
286 static int __kprobes swap_instruction(void *aref)
288 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
289 unsigned long status = kcb->kprobe_status;
290 struct ins_replace_args *args = aref;
292 kcb->kprobe_status = KPROBE_SWAP_INST;
293 probe_kernel_write(args->ptr, &args->opcode, sizeof(args->opcode));
294 kcb->kprobe_status = status;
298 void __kprobes arch_arm_kprobe(struct kprobe *p)
300 struct ins_replace_args args;
303 args.opcode = BREAKPOINT_INSTRUCTION;
304 stop_machine(swap_instruction, &args, NULL);
307 void __kprobes arch_disarm_kprobe(struct kprobe *p)
309 struct ins_replace_args args;
312 args.opcode = p->opcode;
313 stop_machine(swap_instruction, &args, NULL);
316 void __kprobes arch_remove_kprobe(struct kprobe *p)
318 s390_free_insn_slot(p);
321 static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb,
322 struct pt_regs *regs,
325 struct per_regs per_kprobe;
327 /* Set up the PER control registers %cr9-%cr11 */
328 per_kprobe.control = PER_EVENT_IFETCH;
329 per_kprobe.start = ip;
332 /* Save control regs and psw mask */
333 __ctl_store(kcb->kprobe_saved_ctl, 9, 11);
334 kcb->kprobe_saved_imask = regs->psw.mask &
335 (PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
337 /* Set PER control regs, turns on single step for the given address */
338 __ctl_load(per_kprobe, 9, 11);
339 regs->psw.mask |= PSW_MASK_PER;
340 regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
341 regs->psw.addr = ip | PSW_ADDR_AMODE;
344 static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb,
345 struct pt_regs *regs,
348 /* Restore control regs and psw mask, set new psw address */
349 __ctl_load(kcb->kprobe_saved_ctl, 9, 11);
350 regs->psw.mask &= ~PSW_MASK_PER;
351 regs->psw.mask |= kcb->kprobe_saved_imask;
352 regs->psw.addr = ip | PSW_ADDR_AMODE;
356 * Activate a kprobe by storing its pointer to current_kprobe. The
357 * previous kprobe is stored in kcb->prev_kprobe. A stack of up to
358 * two kprobes can be active, see KPROBE_REENTER.
360 static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
362 kcb->prev_kprobe.kp = __get_cpu_var(current_kprobe);
363 kcb->prev_kprobe.status = kcb->kprobe_status;
364 __get_cpu_var(current_kprobe) = p;
368 * Deactivate a kprobe by backing up to the previous state. If the
369 * current state is KPROBE_REENTER prev_kprobe.kp will be non-NULL,
370 * for any other state prev_kprobe.kp will be NULL.
372 static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb)
374 __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
375 kcb->kprobe_status = kcb->prev_kprobe.status;
378 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
379 struct pt_regs *regs)
381 ri->ret_addr = (kprobe_opcode_t *) regs->gprs[14];
383 /* Replace the return addr with trampoline addr */
384 regs->gprs[14] = (unsigned long) &kretprobe_trampoline;
387 static void __kprobes kprobe_reenter_check(struct kprobe_ctlblk *kcb,
390 switch (kcb->kprobe_status) {
391 case KPROBE_HIT_SSDONE:
392 case KPROBE_HIT_ACTIVE:
393 kprobes_inc_nmissed_count(p);
399 * A kprobe on the code path to single step an instruction
400 * is a BUG. The code path resides in the .kprobes.text
401 * section and is executed with interrupts disabled.
403 printk(KERN_EMERG "Invalid kprobe detected at %p.\n", p->addr);
409 static int __kprobes kprobe_handler(struct pt_regs *regs)
411 struct kprobe_ctlblk *kcb;
415 * We want to disable preemption for the entire duration of kprobe
416 * processing. That includes the calls to the pre/post handlers
417 * and single stepping the kprobe instruction.
420 kcb = get_kprobe_ctlblk();
421 p = get_kprobe((void *)((regs->psw.addr & PSW_ADDR_INSN) - 2));
424 if (kprobe_running()) {
426 * We have hit a kprobe while another is still
427 * active. This can happen in the pre and post
428 * handler. Single step the instruction of the
429 * new probe but do not call any handler function
430 * of this secondary kprobe.
431 * push_kprobe and pop_kprobe saves and restores
432 * the currently active kprobe.
434 kprobe_reenter_check(kcb, p);
436 kcb->kprobe_status = KPROBE_REENTER;
439 * If we have no pre-handler or it returned 0, we
440 * continue with single stepping. If we have a
441 * pre-handler and it returned non-zero, it prepped
442 * for calling the break_handler below on re-entry
443 * for jprobe processing, so get out doing nothing
447 kcb->kprobe_status = KPROBE_HIT_ACTIVE;
448 if (p->pre_handler && p->pre_handler(p, regs))
450 kcb->kprobe_status = KPROBE_HIT_SS;
452 enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn);
454 } else if (kprobe_running()) {
455 p = __get_cpu_var(current_kprobe);
456 if (p->break_handler && p->break_handler(p, regs)) {
458 * Continuation after the jprobe completed and
459 * caused the jprobe_return trap. The jprobe
460 * break_handler "returns" to the original
461 * function that still has the kprobe breakpoint
462 * installed. We continue with single stepping.
464 kcb->kprobe_status = KPROBE_HIT_SS;
465 enable_singlestep(kcb, regs,
466 (unsigned long) p->ainsn.insn);
469 * No kprobe at this address and the current kprobe
470 * has no break handler (no jprobe!). The kernel just
471 * exploded, let the standard trap handler pick up the
475 * No kprobe at this address and no active kprobe. The trap has
476 * not been caused by a kprobe breakpoint. The race of breakpoint
477 * vs. kprobe remove does not exist because on s390 as we use
478 * stop_machine to arm/disarm the breakpoints.
480 preempt_enable_no_resched();
485 * Function return probe trampoline:
486 * - init_kprobes() establishes a probepoint here
487 * - When the probed function returns, this probe
488 * causes the handlers to fire
490 static void __used kretprobe_trampoline_holder(void)
492 asm volatile(".global kretprobe_trampoline\n"
493 "kretprobe_trampoline: bcr 0,0\n");
497 * Called when the probe at kretprobe trampoline is hit
499 static int __kprobes trampoline_probe_handler(struct kprobe *p,
500 struct pt_regs *regs)
502 struct kretprobe_instance *ri;
503 struct hlist_head *head, empty_rp;
504 struct hlist_node *tmp;
505 unsigned long flags, orig_ret_address;
506 unsigned long trampoline_address;
507 kprobe_opcode_t *correct_ret_addr;
509 INIT_HLIST_HEAD(&empty_rp);
510 kretprobe_hash_lock(current, &head, &flags);
513 * It is possible to have multiple instances associated with a given
514 * task either because an multiple functions in the call path
515 * have a return probe installed on them, and/or more than one return
516 * return probe was registered for a target function.
518 * We can handle this because:
519 * - instances are always inserted at the head of the list
520 * - when multiple return probes are registered for the same
521 * function, the first instance's ret_addr will point to the
522 * real return address, and all the rest will point to
523 * kretprobe_trampoline
526 orig_ret_address = 0;
527 correct_ret_addr = NULL;
528 trampoline_address = (unsigned long) &kretprobe_trampoline;
529 hlist_for_each_entry_safe(ri, tmp, head, hlist) {
530 if (ri->task != current)
531 /* another task is sharing our hash bucket */
534 orig_ret_address = (unsigned long) ri->ret_addr;
536 if (orig_ret_address != trampoline_address)
538 * This is the real return address. Any other
539 * instances associated with this task are for
540 * other calls deeper on the call stack
545 kretprobe_assert(ri, orig_ret_address, trampoline_address);
547 correct_ret_addr = ri->ret_addr;
548 hlist_for_each_entry_safe(ri, tmp, head, hlist) {
549 if (ri->task != current)
550 /* another task is sharing our hash bucket */
553 orig_ret_address = (unsigned long) ri->ret_addr;
555 if (ri->rp && ri->rp->handler) {
556 ri->ret_addr = correct_ret_addr;
557 ri->rp->handler(ri, regs);
560 recycle_rp_inst(ri, &empty_rp);
562 if (orig_ret_address != trampoline_address)
564 * This is the real return address. Any other
565 * instances associated with this task are for
566 * other calls deeper on the call stack
571 regs->psw.addr = orig_ret_address | PSW_ADDR_AMODE;
573 pop_kprobe(get_kprobe_ctlblk());
574 kretprobe_hash_unlock(current, &flags);
575 preempt_enable_no_resched();
577 hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
578 hlist_del(&ri->hlist);
582 * By returning a non-zero value, we are telling
583 * kprobe_handler() that we don't want the post_handler
584 * to run (and have re-enabled preemption)
590 * Called after single-stepping. p->addr is the address of the
591 * instruction whose first byte has been replaced by the "breakpoint"
592 * instruction. To avoid the SMP problems that can occur when we
593 * temporarily put back the original opcode to single-step, we
594 * single-stepped a copy of the instruction. The address of this
595 * copy is p->ainsn.insn.
597 static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
599 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
600 unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
601 int fixup = get_fixup_type(p->ainsn.insn);
603 if (fixup & FIXUP_PSW_NORMAL)
604 ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
606 if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
607 int ilen = ((p->ainsn.insn[0] >> 14) + 3) & -2;
608 if (ip - (unsigned long) p->ainsn.insn == ilen)
609 ip = (unsigned long) p->addr + ilen;
612 if (fixup & FIXUP_RETURN_REGISTER) {
613 int reg = (p->ainsn.insn[0] & 0xf0) >> 4;
614 regs->gprs[reg] += (unsigned long) p->addr -
615 (unsigned long) p->ainsn.insn;
618 disable_singlestep(kcb, regs, ip);
621 static int __kprobes post_kprobe_handler(struct pt_regs *regs)
623 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
624 struct kprobe *p = kprobe_running();
629 if (kcb->kprobe_status != KPROBE_REENTER && p->post_handler) {
630 kcb->kprobe_status = KPROBE_HIT_SSDONE;
631 p->post_handler(p, regs, 0);
634 resume_execution(p, regs);
636 preempt_enable_no_resched();
639 * if somebody else is singlestepping across a probe point, psw mask
640 * will have PER set, in which case, continue the remaining processing
641 * of do_single_step, as if this is not a probe hit.
643 if (regs->psw.mask & PSW_MASK_PER)
649 static int __kprobes kprobe_trap_handler(struct pt_regs *regs, int trapnr)
651 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
652 struct kprobe *p = kprobe_running();
653 const struct exception_table_entry *entry;
655 switch(kcb->kprobe_status) {
656 case KPROBE_SWAP_INST:
657 /* We are here because the instruction replacement failed */
662 * We are here because the instruction being single
663 * stepped caused a page fault. We reset the current
664 * kprobe and the nip points back to the probe address
665 * and allow the page fault handler to continue as a
668 disable_singlestep(kcb, regs, (unsigned long) p->addr);
670 preempt_enable_no_resched();
672 case KPROBE_HIT_ACTIVE:
673 case KPROBE_HIT_SSDONE:
675 * We increment the nmissed count for accounting,
676 * we can also use npre/npostfault count for accouting
677 * these specific fault cases.
679 kprobes_inc_nmissed_count(p);
682 * We come here because instructions in the pre/post
683 * handler caused the page_fault, this could happen
684 * if handler tries to access user space by
685 * copy_from_user(), get_user() etc. Let the
686 * user-specified handler try to fix it first.
688 if (p->fault_handler && p->fault_handler(p, regs, trapnr))
692 * In case the user-specified fault handler returned
693 * zero, try to fix up.
695 entry = search_exception_tables(regs->psw.addr & PSW_ADDR_INSN);
697 regs->psw.addr = extable_fixup(entry) | PSW_ADDR_AMODE;
702 * fixup_exception() could not handle it,
703 * Let do_page_fault() fix it.
712 int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
716 if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
718 ret = kprobe_trap_handler(regs, trapnr);
719 if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
720 local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
725 * Wrapper routine to for handling exceptions.
727 int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
728 unsigned long val, void *data)
730 struct die_args *args = (struct die_args *) data;
731 struct pt_regs *regs = args->regs;
732 int ret = NOTIFY_DONE;
734 if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
739 if (kprobe_handler(regs))
743 if (post_kprobe_handler(regs))
747 if (!preemptible() && kprobe_running() &&
748 kprobe_trap_handler(regs, args->trapnr))
755 if (regs->psw.mask & (PSW_MASK_IO | PSW_MASK_EXT))
756 local_irq_restore(regs->psw.mask & ~PSW_MASK_PER);
761 int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
763 struct jprobe *jp = container_of(p, struct jprobe, kp);
764 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
767 memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
769 /* setup return addr to the jprobe handler routine */
770 regs->psw.addr = (unsigned long) jp->entry | PSW_ADDR_AMODE;
771 regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
773 /* r15 is the stack pointer */
774 stack = (unsigned long) regs->gprs[15];
776 memcpy(kcb->jprobes_stack, (void *) stack, MIN_STACK_SIZE(stack));
780 void __kprobes jprobe_return(void)
782 asm volatile(".word 0x0002");
785 static void __used __kprobes jprobe_return_end(void)
787 asm volatile("bcr 0,0");
790 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
792 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
795 stack = (unsigned long) kcb->jprobe_saved_regs.gprs[15];
797 /* Put the regs back */
798 memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
799 /* put the stack back */
800 memcpy((void *) stack, kcb->jprobes_stack, MIN_STACK_SIZE(stack));
801 preempt_enable_no_resched();
805 static struct kprobe trampoline = {
806 .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
807 .pre_handler = trampoline_probe_handler
810 int __init arch_init_kprobes(void)
812 return register_kprobe(&trampoline);
815 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
817 return p->addr == (kprobe_opcode_t *) &kretprobe_trampoline;