1 #include <trace/syscall.h>
2 #include <trace/events/syscalls.h>
3 #include <linux/slab.h>
4 #include <linux/kernel.h>
5 #include <linux/ftrace.h>
6 #include <linux/perf_event.h>
7 #include <asm/syscall.h>
9 #include "trace_output.h"
12 static DEFINE_MUTEX(syscall_trace_lock);
13 static int sys_refcount_enter;
14 static int sys_refcount_exit;
15 static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
16 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
18 static int syscall_enter_register(struct ftrace_event_call *event,
20 static int syscall_exit_register(struct ftrace_event_call *event,
23 static int syscall_enter_define_fields(struct ftrace_event_call *call);
24 static int syscall_exit_define_fields(struct ftrace_event_call *call);
26 static struct list_head *
27 syscall_get_enter_fields(struct ftrace_event_call *call)
29 struct syscall_metadata *entry = call->data;
31 return &entry->enter_fields;
34 struct trace_event_functions enter_syscall_print_funcs = {
35 .trace = print_syscall_enter,
38 struct trace_event_functions exit_syscall_print_funcs = {
39 .trace = print_syscall_exit,
42 struct ftrace_event_class event_class_syscall_enter = {
44 .reg = syscall_enter_register,
45 .define_fields = syscall_enter_define_fields,
46 .get_fields = syscall_get_enter_fields,
47 .raw_init = init_syscall_trace,
50 struct ftrace_event_class event_class_syscall_exit = {
52 .reg = syscall_exit_register,
53 .define_fields = syscall_exit_define_fields,
54 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
55 .raw_init = init_syscall_trace,
58 extern unsigned long __start_syscalls_metadata[];
59 extern unsigned long __stop_syscalls_metadata[];
61 static struct syscall_metadata **syscalls_metadata;
63 static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
65 struct syscall_metadata *start;
66 struct syscall_metadata *stop;
67 char str[KSYM_SYMBOL_LEN];
70 start = (struct syscall_metadata *)__start_syscalls_metadata;
71 stop = (struct syscall_metadata *)__stop_syscalls_metadata;
72 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
74 for ( ; start < stop; start++) {
76 * Only compare after the "sys" prefix. Archs that use
77 * syscall wrappers may have syscalls symbols aliases prefixed
78 * with "SyS" instead of "sys", leading to an unwanted
81 if (start->name && !strcmp(start->name + 3, str + 3))
87 static struct syscall_metadata *syscall_nr_to_meta(int nr)
89 if (!syscalls_metadata || nr >= NR_syscalls || nr < 0)
92 return syscalls_metadata[nr];
96 print_syscall_enter(struct trace_iterator *iter, int flags,
97 struct trace_event *event)
99 struct trace_seq *s = &iter->seq;
100 struct trace_entry *ent = iter->ent;
101 struct syscall_trace_enter *trace;
102 struct syscall_metadata *entry;
105 trace = (typeof(trace))ent;
107 entry = syscall_nr_to_meta(syscall);
112 if (entry->enter_event->event.type != ent->type) {
117 ret = trace_seq_printf(s, "%s(", entry->name);
119 return TRACE_TYPE_PARTIAL_LINE;
121 for (i = 0; i < entry->nb_args; i++) {
122 /* parameter types */
123 if (trace_flags & TRACE_ITER_VERBOSE) {
124 ret = trace_seq_printf(s, "%s ", entry->types[i]);
126 return TRACE_TYPE_PARTIAL_LINE;
128 /* parameter values */
129 ret = trace_seq_printf(s, "%s: %lx%s", entry->args[i],
131 i == entry->nb_args - 1 ? "" : ", ");
133 return TRACE_TYPE_PARTIAL_LINE;
136 ret = trace_seq_putc(s, ')');
138 return TRACE_TYPE_PARTIAL_LINE;
141 ret = trace_seq_putc(s, '\n');
143 return TRACE_TYPE_PARTIAL_LINE;
145 return TRACE_TYPE_HANDLED;
149 print_syscall_exit(struct trace_iterator *iter, int flags,
150 struct trace_event *event)
152 struct trace_seq *s = &iter->seq;
153 struct trace_entry *ent = iter->ent;
154 struct syscall_trace_exit *trace;
156 struct syscall_metadata *entry;
159 trace = (typeof(trace))ent;
161 entry = syscall_nr_to_meta(syscall);
164 trace_seq_printf(s, "\n");
165 return TRACE_TYPE_HANDLED;
168 if (entry->exit_event->event.type != ent->type) {
170 return TRACE_TYPE_UNHANDLED;
173 ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
176 return TRACE_TYPE_PARTIAL_LINE;
178 return TRACE_TYPE_HANDLED;
181 extern char *__bad_type_size(void);
183 #define SYSCALL_FIELD(type, name) \
184 sizeof(type) != sizeof(trace.name) ? \
185 __bad_type_size() : \
186 #type, #name, offsetof(typeof(trace), name), \
187 sizeof(trace.name), is_signed_type(type)
190 int __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len)
195 /* When len=0, we just calculate the needed length */
196 #define LEN_OR_ZERO (len ? len - pos : 0)
198 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
199 for (i = 0; i < entry->nb_args; i++) {
200 pos += snprintf(buf + pos, LEN_OR_ZERO, "%s: 0x%%0%zulx%s",
201 entry->args[i], sizeof(unsigned long),
202 i == entry->nb_args - 1 ? "" : ", ");
204 pos += snprintf(buf + pos, LEN_OR_ZERO, "\"");
206 for (i = 0; i < entry->nb_args; i++) {
207 pos += snprintf(buf + pos, LEN_OR_ZERO,
208 ", ((unsigned long)(REC->%s))", entry->args[i]);
213 /* return the length of print_fmt */
217 static int set_syscall_print_fmt(struct ftrace_event_call *call)
221 struct syscall_metadata *entry = call->data;
223 if (entry->enter_event != call) {
224 call->print_fmt = "\"0x%lx\", REC->ret";
228 /* First: called with 0 length to calculate the needed length */
229 len = __set_enter_print_fmt(entry, NULL, 0);
231 print_fmt = kmalloc(len + 1, GFP_KERNEL);
235 /* Second: actually write the @print_fmt */
236 __set_enter_print_fmt(entry, print_fmt, len + 1);
237 call->print_fmt = print_fmt;
242 static void free_syscall_print_fmt(struct ftrace_event_call *call)
244 struct syscall_metadata *entry = call->data;
246 if (entry->enter_event == call)
247 kfree(call->print_fmt);
250 static int syscall_enter_define_fields(struct ftrace_event_call *call)
252 struct syscall_trace_enter trace;
253 struct syscall_metadata *meta = call->data;
256 int offset = offsetof(typeof(trace), args);
258 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
262 for (i = 0; i < meta->nb_args; i++) {
263 ret = trace_define_field(call, meta->types[i],
264 meta->args[i], offset,
265 sizeof(unsigned long), 0,
267 offset += sizeof(unsigned long);
273 static int syscall_exit_define_fields(struct ftrace_event_call *call)
275 struct syscall_trace_exit trace;
278 ret = trace_define_field(call, SYSCALL_FIELD(int, nr), FILTER_OTHER);
282 ret = trace_define_field(call, SYSCALL_FIELD(long, ret),
288 void ftrace_syscall_enter(void *ignore, struct pt_regs *regs, long id)
290 struct syscall_trace_enter *entry;
291 struct syscall_metadata *sys_data;
292 struct ring_buffer_event *event;
293 struct ring_buffer *buffer;
297 syscall_nr = syscall_get_nr(current, regs);
300 if (!test_bit(syscall_nr, enabled_enter_syscalls))
303 sys_data = syscall_nr_to_meta(syscall_nr);
307 size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
309 event = trace_current_buffer_lock_reserve(&buffer,
310 sys_data->enter_event->event.type, size, 0, 0);
314 entry = ring_buffer_event_data(event);
315 entry->nr = syscall_nr;
316 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
318 if (!filter_current_check_discard(buffer, sys_data->enter_event,
320 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
323 void ftrace_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
325 struct syscall_trace_exit *entry;
326 struct syscall_metadata *sys_data;
327 struct ring_buffer_event *event;
328 struct ring_buffer *buffer;
331 syscall_nr = syscall_get_nr(current, regs);
334 if (!test_bit(syscall_nr, enabled_exit_syscalls))
337 sys_data = syscall_nr_to_meta(syscall_nr);
341 event = trace_current_buffer_lock_reserve(&buffer,
342 sys_data->exit_event->event.type, sizeof(*entry), 0, 0);
346 entry = ring_buffer_event_data(event);
347 entry->nr = syscall_nr;
348 entry->ret = syscall_get_return_value(current, regs);
350 if (!filter_current_check_discard(buffer, sys_data->exit_event,
352 trace_current_buffer_unlock_commit(buffer, event, 0, 0);
355 int reg_event_syscall_enter(struct ftrace_event_call *call)
360 num = ((struct syscall_metadata *)call->data)->syscall_nr;
361 if (num < 0 || num >= NR_syscalls)
363 mutex_lock(&syscall_trace_lock);
364 if (!sys_refcount_enter)
365 ret = register_trace_sys_enter(ftrace_syscall_enter, NULL);
367 set_bit(num, enabled_enter_syscalls);
368 sys_refcount_enter++;
370 mutex_unlock(&syscall_trace_lock);
374 void unreg_event_syscall_enter(struct ftrace_event_call *call)
378 num = ((struct syscall_metadata *)call->data)->syscall_nr;
379 if (num < 0 || num >= NR_syscalls)
381 mutex_lock(&syscall_trace_lock);
382 sys_refcount_enter--;
383 clear_bit(num, enabled_enter_syscalls);
384 if (!sys_refcount_enter)
385 unregister_trace_sys_enter(ftrace_syscall_enter, NULL);
386 mutex_unlock(&syscall_trace_lock);
389 int reg_event_syscall_exit(struct ftrace_event_call *call)
394 num = ((struct syscall_metadata *)call->data)->syscall_nr;
395 if (num < 0 || num >= NR_syscalls)
397 mutex_lock(&syscall_trace_lock);
398 if (!sys_refcount_exit)
399 ret = register_trace_sys_exit(ftrace_syscall_exit, NULL);
401 set_bit(num, enabled_exit_syscalls);
404 mutex_unlock(&syscall_trace_lock);
408 void unreg_event_syscall_exit(struct ftrace_event_call *call)
412 num = ((struct syscall_metadata *)call->data)->syscall_nr;
413 if (num < 0 || num >= NR_syscalls)
415 mutex_lock(&syscall_trace_lock);
417 clear_bit(num, enabled_exit_syscalls);
418 if (!sys_refcount_exit)
419 unregister_trace_sys_exit(ftrace_syscall_exit, NULL);
420 mutex_unlock(&syscall_trace_lock);
423 int init_syscall_trace(struct ftrace_event_call *call)
427 if (set_syscall_print_fmt(call) < 0)
430 id = trace_event_raw_init(call);
433 free_syscall_print_fmt(call);
440 unsigned long __init arch_syscall_addr(int nr)
442 return (unsigned long)sys_call_table[nr];
445 int __init init_ftrace_syscalls(void)
447 struct syscall_metadata *meta;
451 syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
452 NR_syscalls, GFP_KERNEL);
453 if (!syscalls_metadata) {
458 for (i = 0; i < NR_syscalls; i++) {
459 addr = arch_syscall_addr(i);
460 meta = find_syscall_meta(addr);
464 meta->syscall_nr = i;
465 syscalls_metadata[i] = meta;
470 core_initcall(init_ftrace_syscalls);
472 #ifdef CONFIG_PERF_EVENTS
474 static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls);
475 static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
476 static int sys_perf_refcount_enter;
477 static int sys_perf_refcount_exit;
479 static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
481 struct syscall_metadata *sys_data;
482 struct syscall_trace_enter *rec;
483 struct hlist_head *head;
488 syscall_nr = syscall_get_nr(current, regs);
489 if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))
492 sys_data = syscall_nr_to_meta(syscall_nr);
496 /* get the size after alignment with the u32 buffer size field */
497 size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
498 size = ALIGN(size + sizeof(u32), sizeof(u64));
501 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
502 "perf buffer not large enough"))
505 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
506 sys_data->enter_event->event.type, regs, &rctx);
510 rec->nr = syscall_nr;
511 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
512 (unsigned long *)&rec->args);
514 head = this_cpu_ptr(sys_data->enter_event->perf_events);
515 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
518 int perf_sysenter_enable(struct ftrace_event_call *call)
523 num = ((struct syscall_metadata *)call->data)->syscall_nr;
525 mutex_lock(&syscall_trace_lock);
526 if (!sys_perf_refcount_enter)
527 ret = register_trace_sys_enter(perf_syscall_enter, NULL);
529 pr_info("event trace: Could not activate"
530 "syscall entry trace point");
532 set_bit(num, enabled_perf_enter_syscalls);
533 sys_perf_refcount_enter++;
535 mutex_unlock(&syscall_trace_lock);
539 void perf_sysenter_disable(struct ftrace_event_call *call)
543 num = ((struct syscall_metadata *)call->data)->syscall_nr;
545 mutex_lock(&syscall_trace_lock);
546 sys_perf_refcount_enter--;
547 clear_bit(num, enabled_perf_enter_syscalls);
548 if (!sys_perf_refcount_enter)
549 unregister_trace_sys_enter(perf_syscall_enter, NULL);
550 mutex_unlock(&syscall_trace_lock);
553 static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
555 struct syscall_metadata *sys_data;
556 struct syscall_trace_exit *rec;
557 struct hlist_head *head;
562 syscall_nr = syscall_get_nr(current, regs);
563 if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))
566 sys_data = syscall_nr_to_meta(syscall_nr);
570 /* We can probably do that at build time */
571 size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
575 * Impossible, but be paranoid with the future
576 * How to put this check outside runtime?
578 if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
579 "exit event has grown above perf buffer size"))
582 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
583 sys_data->exit_event->event.type, regs, &rctx);
587 rec->nr = syscall_nr;
588 rec->ret = syscall_get_return_value(current, regs);
590 head = this_cpu_ptr(sys_data->exit_event->perf_events);
591 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
594 int perf_sysexit_enable(struct ftrace_event_call *call)
599 num = ((struct syscall_metadata *)call->data)->syscall_nr;
601 mutex_lock(&syscall_trace_lock);
602 if (!sys_perf_refcount_exit)
603 ret = register_trace_sys_exit(perf_syscall_exit, NULL);
605 pr_info("event trace: Could not activate"
606 "syscall exit trace point");
608 set_bit(num, enabled_perf_exit_syscalls);
609 sys_perf_refcount_exit++;
611 mutex_unlock(&syscall_trace_lock);
615 void perf_sysexit_disable(struct ftrace_event_call *call)
619 num = ((struct syscall_metadata *)call->data)->syscall_nr;
621 mutex_lock(&syscall_trace_lock);
622 sys_perf_refcount_exit--;
623 clear_bit(num, enabled_perf_exit_syscalls);
624 if (!sys_perf_refcount_exit)
625 unregister_trace_sys_exit(perf_syscall_exit, NULL);
626 mutex_unlock(&syscall_trace_lock);
629 #endif /* CONFIG_PERF_EVENTS */
631 static int syscall_enter_register(struct ftrace_event_call *event,
635 case TRACE_REG_REGISTER:
636 return reg_event_syscall_enter(event);
637 case TRACE_REG_UNREGISTER:
638 unreg_event_syscall_enter(event);
641 #ifdef CONFIG_PERF_EVENTS
642 case TRACE_REG_PERF_REGISTER:
643 return perf_sysenter_enable(event);
644 case TRACE_REG_PERF_UNREGISTER:
645 perf_sysenter_disable(event);
652 static int syscall_exit_register(struct ftrace_event_call *event,
656 case TRACE_REG_REGISTER:
657 return reg_event_syscall_exit(event);
658 case TRACE_REG_UNREGISTER:
659 unreg_event_syscall_exit(event);
662 #ifdef CONFIG_PERF_EVENTS
663 case TRACE_REG_PERF_REGISTER:
664 return perf_sysexit_enable(event);
665 case TRACE_REG_PERF_UNREGISTER:
666 perf_sysexit_disable(event);