4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace/beauty/beauty.h"
35 #include "trace-event.h"
36 #include "util/parse-events.h"
37 #include "util/bpf-loader.h"
38 #include "callchain.h"
39 #include "syscalltbl.h"
40 #include "rb_resort.h"
43 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
46 #include <linux/err.h>
47 #include <linux/filter.h>
48 #include <linux/audit.h>
49 #include <linux/kernel.h>
50 #include <linux/random.h>
51 #include <linux/stringify.h>
52 #include <linux/time64.h>
54 #include "sane_ctype.h"
57 # define O_CLOEXEC 02000000
61 struct perf_tool tool;
62 struct syscalltbl *sctbl;
65 struct syscall *table;
67 struct perf_evsel *sys_enter,
71 struct record_opts opts;
72 struct perf_evlist *evlist;
74 struct thread *current;
77 unsigned long nr_events;
78 struct strlist *ev_qualifier;
87 double duration_filter;
93 unsigned int max_stack;
94 unsigned int min_stack;
95 bool not_ev_qualifier;
99 bool multiple_threads;
103 bool show_tool_stats;
105 bool kernel_syscallchains;
115 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
116 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
120 #define TP_UINT_FIELD(bits) \
121 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
124 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
133 #define TP_UINT_FIELD__SWAPPED(bits) \
134 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
137 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
138 return bswap_##bits(value);\
141 TP_UINT_FIELD__SWAPPED(16);
142 TP_UINT_FIELD__SWAPPED(32);
143 TP_UINT_FIELD__SWAPPED(64);
145 static int tp_field__init_uint(struct tp_field *field,
146 struct format_field *format_field,
149 field->offset = format_field->offset;
151 switch (format_field->size) {
153 field->integer = tp_field__u8;
156 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
159 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
162 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
171 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
173 return sample->raw_data + field->offset;
176 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
178 field->offset = format_field->offset;
179 field->pointer = tp_field__ptr;
186 struct tp_field args, ret;
190 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
191 struct tp_field *field,
194 struct format_field *format_field = perf_evsel__field(evsel, name);
196 if (format_field == NULL)
199 return tp_field__init_uint(field, format_field, evsel->needs_swap);
202 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
203 ({ struct syscall_tp *sc = evsel->priv;\
204 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
206 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
207 struct tp_field *field,
210 struct format_field *format_field = perf_evsel__field(evsel, name);
212 if (format_field == NULL)
215 return tp_field__init_ptr(field, format_field);
218 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
219 ({ struct syscall_tp *sc = evsel->priv;\
220 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
222 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
225 perf_evsel__delete(evsel);
228 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
230 evsel->priv = malloc(sizeof(struct syscall_tp));
231 if (evsel->priv != NULL) {
232 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
235 evsel->handler = handler;
246 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
248 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
250 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
252 evsel = perf_evsel__newtp("syscalls", direction);
257 if (perf_evsel__init_syscall_tp(evsel, handler))
263 perf_evsel__delete_priv(evsel);
267 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
268 ({ struct syscall_tp *fields = evsel->priv; \
269 fields->name.integer(&fields->name, sample); })
271 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
272 ({ struct syscall_tp *fields = evsel->priv; \
273 fields->name.pointer(&fields->name, sample); })
278 const char **entries;
281 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
282 .nr_entries = ARRAY_SIZE(array), \
286 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
288 .nr_entries = ARRAY_SIZE(array), \
292 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
294 struct syscall_arg *arg)
296 struct strarray *sa = arg->parm;
297 int idx = arg->val - sa->offset;
299 if (idx < 0 || idx >= sa->nr_entries)
300 return scnprintf(bf, size, intfmt, arg->val);
302 return scnprintf(bf, size, "%s", sa->entries[idx]);
305 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
306 struct syscall_arg *arg)
308 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
311 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
313 #if defined(__i386__) || defined(__x86_64__)
315 * FIXME: Make this available to all arches as soon as the ioctl beautifier
316 * gets rewritten to support all arches.
318 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
319 struct syscall_arg *arg)
321 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
324 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
325 #endif /* defined(__i386__) || defined(__x86_64__) */
327 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
328 struct syscall_arg *arg);
330 #define SCA_FD syscall_arg__scnprintf_fd
333 #define AT_FDCWD -100
336 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
337 struct syscall_arg *arg)
342 return scnprintf(bf, size, "CWD");
344 return syscall_arg__scnprintf_fd(bf, size, arg);
347 #define SCA_FDAT syscall_arg__scnprintf_fd_at
349 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
350 struct syscall_arg *arg);
352 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
354 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
355 struct syscall_arg *arg)
357 return scnprintf(bf, size, "%#lx", arg->val);
360 #define SCA_HEX syscall_arg__scnprintf_hex
362 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
363 struct syscall_arg *arg)
365 return scnprintf(bf, size, "%d", arg->val);
368 #define SCA_INT syscall_arg__scnprintf_int
370 static const char *bpf_cmd[] = {
371 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
372 "MAP_GET_NEXT_KEY", "PROG_LOAD",
374 static DEFINE_STRARRAY(bpf_cmd);
376 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
377 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
379 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
380 static DEFINE_STRARRAY(itimers);
382 static const char *keyctl_options[] = {
383 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
384 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
385 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
386 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
387 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
389 static DEFINE_STRARRAY(keyctl_options);
391 static const char *whences[] = { "SET", "CUR", "END",
399 static DEFINE_STRARRAY(whences);
401 static const char *fcntl_cmds[] = {
402 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
403 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
404 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
407 static DEFINE_STRARRAY(fcntl_cmds);
409 static const char *rlimit_resources[] = {
410 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
411 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
414 static DEFINE_STRARRAY(rlimit_resources);
416 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
417 static DEFINE_STRARRAY(sighow);
419 static const char *clockid[] = {
420 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
421 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
422 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
424 static DEFINE_STRARRAY(clockid);
426 static const char *socket_families[] = {
427 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
428 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
429 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
430 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
431 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
432 "ALG", "NFC", "VSOCK",
434 static DEFINE_STRARRAY(socket_families);
436 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
437 struct syscall_arg *arg)
442 if (mode == F_OK) /* 0 */
443 return scnprintf(bf, size, "F");
445 if (mode & n##_OK) { \
446 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
456 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
461 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
463 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
464 struct syscall_arg *arg);
466 #define SCA_FILENAME syscall_arg__scnprintf_filename
468 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
469 struct syscall_arg *arg)
471 int printed = 0, flags = arg->val;
474 if (flags & O_##n) { \
475 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
484 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
489 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
491 #if defined(__i386__) || defined(__x86_64__)
493 * FIXME: Make this available to all arches.
495 #define TCGETS 0x5401
497 static const char *tioctls[] = {
498 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
499 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
500 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
501 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
502 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
503 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
504 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
505 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
506 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
507 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
508 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
509 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
510 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
511 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
512 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
515 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
516 #endif /* defined(__i386__) || defined(__x86_64__) */
518 #ifndef GRND_NONBLOCK
519 #define GRND_NONBLOCK 0x0001
522 #define GRND_RANDOM 0x0002
525 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
526 struct syscall_arg *arg)
528 int printed = 0, flags = arg->val;
531 if (flags & GRND_##n) { \
532 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
533 flags &= ~GRND_##n; \
541 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
546 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
548 #define STRARRAY(arg, name, array) \
549 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
550 .arg_parm = { [arg] = &strarray__##array, }
552 #include "trace/beauty/eventfd.c"
553 #include "trace/beauty/flock.c"
554 #include "trace/beauty/futex_op.c"
555 #include "trace/beauty/mmap.c"
556 #include "trace/beauty/mode_t.c"
557 #include "trace/beauty/msg_flags.c"
558 #include "trace/beauty/open_flags.c"
559 #include "trace/beauty/perf_event_open.c"
560 #include "trace/beauty/pid.c"
561 #include "trace/beauty/sched_policy.c"
562 #include "trace/beauty/seccomp.c"
563 #include "trace/beauty/signum.c"
564 #include "trace/beauty/socket_type.c"
565 #include "trace/beauty/waitid_options.c"
567 static struct syscall_fmt {
570 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
577 { .name = "access", .errmsg = true,
578 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
579 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
580 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
581 { .name = "brk", .hexret = true,
582 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
583 { .name = "chdir", .errmsg = true, },
584 { .name = "chmod", .errmsg = true, },
585 { .name = "chroot", .errmsg = true, },
586 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
587 { .name = "clone", .errpid = true, },
588 { .name = "close", .errmsg = true,
589 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
590 { .name = "connect", .errmsg = true, },
591 { .name = "creat", .errmsg = true, },
592 { .name = "dup", .errmsg = true, },
593 { .name = "dup2", .errmsg = true, },
594 { .name = "dup3", .errmsg = true, },
595 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
596 { .name = "eventfd2", .errmsg = true,
597 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
598 { .name = "faccessat", .errmsg = true, },
599 { .name = "fadvise64", .errmsg = true, },
600 { .name = "fallocate", .errmsg = true, },
601 { .name = "fchdir", .errmsg = true, },
602 { .name = "fchmod", .errmsg = true, },
603 { .name = "fchmodat", .errmsg = true,
604 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
605 { .name = "fchown", .errmsg = true, },
606 { .name = "fchownat", .errmsg = true,
607 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
608 { .name = "fcntl", .errmsg = true,
609 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
610 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
611 { .name = "fdatasync", .errmsg = true, },
612 { .name = "flock", .errmsg = true,
613 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
614 { .name = "fsetxattr", .errmsg = true, },
615 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
616 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
617 { .name = "fstatfs", .errmsg = true, },
618 { .name = "fsync", .errmsg = true, },
619 { .name = "ftruncate", .errmsg = true, },
620 { .name = "futex", .errmsg = true,
621 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
622 { .name = "futimesat", .errmsg = true,
623 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
624 { .name = "getdents", .errmsg = true, },
625 { .name = "getdents64", .errmsg = true, },
626 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
627 { .name = "getpid", .errpid = true, },
628 { .name = "getpgid", .errpid = true, },
629 { .name = "getppid", .errpid = true, },
630 { .name = "getrandom", .errmsg = true,
631 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
632 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
633 { .name = "getxattr", .errmsg = true, },
634 { .name = "inotify_add_watch", .errmsg = true, },
635 { .name = "ioctl", .errmsg = true,
637 #if defined(__i386__) || defined(__x86_64__)
639 * FIXME: Make this available to all arches.
641 [1] = SCA_STRHEXARRAY, /* cmd */
642 [2] = SCA_HEX, /* arg */ },
643 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
645 [2] = SCA_HEX, /* arg */ }, },
647 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
648 { .name = "kill", .errmsg = true,
649 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
650 { .name = "lchown", .errmsg = true, },
651 { .name = "lgetxattr", .errmsg = true, },
652 { .name = "linkat", .errmsg = true,
653 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
654 { .name = "listxattr", .errmsg = true, },
655 { .name = "llistxattr", .errmsg = true, },
656 { .name = "lremovexattr", .errmsg = true, },
657 { .name = "lseek", .errmsg = true,
658 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
659 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
660 { .name = "lsetxattr", .errmsg = true, },
661 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
662 { .name = "lsxattr", .errmsg = true, },
663 { .name = "madvise", .errmsg = true,
664 .arg_scnprintf = { [0] = SCA_HEX, /* start */
665 [2] = SCA_MADV_BHV, /* behavior */ }, },
666 { .name = "mkdir", .errmsg = true, },
667 { .name = "mkdirat", .errmsg = true,
668 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
669 { .name = "mknod", .errmsg = true, },
670 { .name = "mknodat", .errmsg = true,
671 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
672 { .name = "mlock", .errmsg = true,
673 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
674 { .name = "mlockall", .errmsg = true,
675 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
676 { .name = "mmap", .hexret = true,
677 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
678 [2] = SCA_MMAP_PROT, /* prot */
679 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
680 { .name = "mprotect", .errmsg = true,
681 .arg_scnprintf = { [0] = SCA_HEX, /* start */
682 [2] = SCA_MMAP_PROT, /* prot */ }, },
683 { .name = "mq_unlink", .errmsg = true,
684 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
685 { .name = "mremap", .hexret = true,
686 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
687 [3] = SCA_MREMAP_FLAGS, /* flags */
688 [4] = SCA_HEX, /* new_addr */ }, },
689 { .name = "munlock", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
691 { .name = "munmap", .errmsg = true,
692 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
693 { .name = "name_to_handle_at", .errmsg = true,
694 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
695 { .name = "newfstatat", .errmsg = true,
696 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
697 { .name = "open", .errmsg = true,
698 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
699 { .name = "open_by_handle_at", .errmsg = true,
700 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
701 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
702 { .name = "openat", .errmsg = true,
703 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
704 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
705 { .name = "perf_event_open", .errmsg = true,
706 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
707 [3] = SCA_FD, /* group_fd */
708 [4] = SCA_PERF_FLAGS, /* flags */ }, },
709 { .name = "pipe2", .errmsg = true,
710 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
711 { .name = "poll", .errmsg = true, .timeout = true, },
712 { .name = "ppoll", .errmsg = true, .timeout = true, },
713 { .name = "pread", .errmsg = true, .alias = "pread64", },
714 { .name = "preadv", .errmsg = true, .alias = "pread", },
715 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
716 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
717 { .name = "pwritev", .errmsg = true, },
718 { .name = "read", .errmsg = true, },
719 { .name = "readlink", .errmsg = true, },
720 { .name = "readlinkat", .errmsg = true,
721 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
722 { .name = "readv", .errmsg = true, },
723 { .name = "recvfrom", .errmsg = true,
724 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
725 { .name = "recvmmsg", .errmsg = true,
726 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
727 { .name = "recvmsg", .errmsg = true,
728 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
729 { .name = "removexattr", .errmsg = true, },
730 { .name = "renameat", .errmsg = true,
731 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
732 { .name = "rmdir", .errmsg = true, },
733 { .name = "rt_sigaction", .errmsg = true,
734 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
735 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
736 { .name = "rt_sigqueueinfo", .errmsg = true,
737 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
738 { .name = "rt_tgsigqueueinfo", .errmsg = true,
739 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
740 { .name = "sched_getattr", .errmsg = true, },
741 { .name = "sched_setattr", .errmsg = true, },
742 { .name = "sched_setscheduler", .errmsg = true,
743 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
744 { .name = "seccomp", .errmsg = true,
745 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
746 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
747 { .name = "select", .errmsg = true, .timeout = true, },
748 { .name = "sendmmsg", .errmsg = true,
749 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
750 { .name = "sendmsg", .errmsg = true,
751 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
752 { .name = "sendto", .errmsg = true,
753 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
754 { .name = "set_tid_address", .errpid = true, },
755 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
756 { .name = "setpgid", .errmsg = true, },
757 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
758 { .name = "setxattr", .errmsg = true, },
759 { .name = "shutdown", .errmsg = true, },
760 { .name = "socket", .errmsg = true,
761 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
762 [1] = SCA_SK_TYPE, /* type */ },
763 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
764 { .name = "socketpair", .errmsg = true,
765 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
766 [1] = SCA_SK_TYPE, /* type */ },
767 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
768 { .name = "stat", .errmsg = true, .alias = "newstat", },
769 { .name = "statfs", .errmsg = true, },
770 { .name = "statx", .errmsg = true,
771 .arg_scnprintf = { [0] = SCA_FDAT, /* flags */
772 [2] = SCA_STATX_FLAGS, /* flags */
773 [3] = SCA_STATX_MASK, /* mask */ }, },
774 { .name = "swapoff", .errmsg = true,
775 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
776 { .name = "swapon", .errmsg = true,
777 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
778 { .name = "symlinkat", .errmsg = true,
779 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
780 { .name = "tgkill", .errmsg = true,
781 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
782 { .name = "tkill", .errmsg = true,
783 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
784 { .name = "truncate", .errmsg = true, },
785 { .name = "uname", .errmsg = true, .alias = "newuname", },
786 { .name = "unlinkat", .errmsg = true,
787 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
788 { .name = "utime", .errmsg = true, },
789 { .name = "utimensat", .errmsg = true,
790 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
791 { .name = "utimes", .errmsg = true, },
792 { .name = "vmsplice", .errmsg = true, },
793 { .name = "wait4", .errpid = true,
794 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
795 { .name = "waitid", .errpid = true,
796 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
797 { .name = "write", .errmsg = true, },
798 { .name = "writev", .errmsg = true, },
801 static int syscall_fmt__cmp(const void *name, const void *fmtp)
803 const struct syscall_fmt *fmt = fmtp;
804 return strcmp(name, fmt->name);
807 static struct syscall_fmt *syscall_fmt__find(const char *name)
809 const int nmemb = ARRAY_SIZE(syscall_fmts);
810 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
814 struct event_format *tp_format;
816 struct format_field *args;
819 struct syscall_fmt *fmt;
820 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
825 * We need to have this 'calculated' boolean because in some cases we really
826 * don't know what is the duration of a syscall, for instance, when we start
827 * a session and some threads are waiting for a syscall to finish, say 'poll',
828 * in which case all we can do is to print "( ? ) for duration and for the
831 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
833 double duration = (double)t / NSEC_PER_MSEC;
834 size_t printed = fprintf(fp, "(");
837 printed += fprintf(fp, " ? ");
838 else if (duration >= 1.0)
839 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
840 else if (duration >= 0.01)
841 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
843 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
844 return printed + fprintf(fp, "): ");
848 * filename.ptr: The filename char pointer that will be vfs_getname'd
849 * filename.entry_str_pos: Where to insert the string translated from
850 * filename.ptr by the vfs_getname tracepoint/kprobe.
852 struct thread_trace {
855 unsigned long nr_events;
856 unsigned long pfmaj, pfmin;
861 short int entry_str_pos;
863 unsigned int namelen;
871 struct intlist *syscall_stats;
874 static struct thread_trace *thread_trace__new(void)
876 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
879 ttrace->paths.max = -1;
881 ttrace->syscall_stats = intlist__new(NULL);
886 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
888 struct thread_trace *ttrace;
893 if (thread__priv(thread) == NULL)
894 thread__set_priv(thread, thread_trace__new());
896 if (thread__priv(thread) == NULL)
899 ttrace = thread__priv(thread);
904 color_fprintf(fp, PERF_COLOR_RED,
905 "WARNING: not enough memory, dropping samples!\n");
909 #define TRACE_PFMAJ (1 << 0)
910 #define TRACE_PFMIN (1 << 1)
912 static const size_t trace__entry_str_size = 2048;
914 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
916 struct thread_trace *ttrace = thread__priv(thread);
918 if (fd > ttrace->paths.max) {
919 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
924 if (ttrace->paths.max != -1) {
925 memset(npath + ttrace->paths.max + 1, 0,
926 (fd - ttrace->paths.max) * sizeof(char *));
928 memset(npath, 0, (fd + 1) * sizeof(char *));
931 ttrace->paths.table = npath;
932 ttrace->paths.max = fd;
935 ttrace->paths.table[fd] = strdup(pathname);
937 return ttrace->paths.table[fd] != NULL ? 0 : -1;
940 static int thread__read_fd_path(struct thread *thread, int fd)
942 char linkname[PATH_MAX], pathname[PATH_MAX];
946 if (thread->pid_ == thread->tid) {
947 scnprintf(linkname, sizeof(linkname),
948 "/proc/%d/fd/%d", thread->pid_, fd);
950 scnprintf(linkname, sizeof(linkname),
951 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
954 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
957 ret = readlink(linkname, pathname, sizeof(pathname));
959 if (ret < 0 || ret > st.st_size)
962 pathname[ret] = '\0';
963 return trace__set_fd_pathname(thread, fd, pathname);
966 static const char *thread__fd_path(struct thread *thread, int fd,
969 struct thread_trace *ttrace = thread__priv(thread);
977 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
980 ++trace->stats.proc_getname;
981 if (thread__read_fd_path(thread, fd))
985 return ttrace->paths.table[fd];
988 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
989 struct syscall_arg *arg)
992 size_t printed = scnprintf(bf, size, "%d", fd);
993 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
996 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1001 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1002 struct syscall_arg *arg)
1005 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1006 struct thread_trace *ttrace = thread__priv(arg->thread);
1008 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1009 zfree(&ttrace->paths.table[fd]);
1014 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1017 struct thread_trace *ttrace = thread__priv(thread);
1019 ttrace->filename.ptr = ptr;
1020 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1023 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1024 struct syscall_arg *arg)
1026 unsigned long ptr = arg->val;
1028 if (!arg->trace->vfs_getname)
1029 return scnprintf(bf, size, "%#x", ptr);
1031 thread__set_filename_pos(arg->thread, bf, ptr);
1035 static bool trace__filter_duration(struct trace *trace, double t)
1037 return t < (trace->duration_filter * NSEC_PER_MSEC);
1040 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1042 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1044 return fprintf(fp, "%10.3f ", ts);
1048 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1049 * using ttrace->entry_time for a thread that receives a sys_exit without
1050 * first having received a sys_enter ("poll" issued before tracing session
1051 * starts, lost sys_enter exit due to ring buffer overflow).
1053 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1056 return __trace__fprintf_tstamp(trace, tstamp, fp);
1058 return fprintf(fp, " ? ");
1061 static bool done = false;
1062 static bool interrupted = false;
1064 static void sig_handler(int sig)
1067 interrupted = sig == SIGINT;
1070 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1071 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1073 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1074 printed += fprintf_duration(duration, duration_calculated, fp);
1076 if (trace->multiple_threads) {
1077 if (trace->show_comm)
1078 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1079 printed += fprintf(fp, "%d ", thread->tid);
1085 static int trace__process_event(struct trace *trace, struct machine *machine,
1086 union perf_event *event, struct perf_sample *sample)
1090 switch (event->header.type) {
1091 case PERF_RECORD_LOST:
1092 color_fprintf(trace->output, PERF_COLOR_RED,
1093 "LOST %" PRIu64 " events!\n", event->lost.lost);
1094 ret = machine__process_lost_event(machine, event, sample);
1097 ret = machine__process_event(machine, event, sample);
1104 static int trace__tool_process(struct perf_tool *tool,
1105 union perf_event *event,
1106 struct perf_sample *sample,
1107 struct machine *machine)
1109 struct trace *trace = container_of(tool, struct trace, tool);
1110 return trace__process_event(trace, machine, event, sample);
1113 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1115 struct machine *machine = vmachine;
1117 if (machine->kptr_restrict_warned)
1120 if (symbol_conf.kptr_restrict) {
1121 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1122 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1123 "Kernel samples will not be resolved.\n");
1124 machine->kptr_restrict_warned = true;
1128 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1131 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1133 int err = symbol__init(NULL);
1138 trace->host = machine__new_host();
1139 if (trace->host == NULL)
1142 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1145 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1146 evlist->threads, trace__tool_process, false,
1147 trace->opts.proc_map_timeout);
1154 static int syscall__set_arg_fmts(struct syscall *sc)
1156 struct format_field *field;
1159 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1160 if (sc->arg_scnprintf == NULL)
1164 sc->arg_parm = sc->fmt->arg_parm;
1166 for (field = sc->args; field; field = field->next) {
1167 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1168 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1169 else if (strcmp(field->type, "const char *") == 0 &&
1170 (strcmp(field->name, "filename") == 0 ||
1171 strcmp(field->name, "path") == 0 ||
1172 strcmp(field->name, "pathname") == 0))
1173 sc->arg_scnprintf[idx] = SCA_FILENAME;
1174 else if (field->flags & FIELD_IS_POINTER)
1175 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1176 else if (strcmp(field->type, "pid_t") == 0)
1177 sc->arg_scnprintf[idx] = SCA_PID;
1178 else if (strcmp(field->type, "umode_t") == 0)
1179 sc->arg_scnprintf[idx] = SCA_MODE_T;
1180 else if ((strcmp(field->type, "int") == 0 ||
1181 strcmp(field->type, "unsigned int") == 0 ||
1182 strcmp(field->type, "long") == 0) &&
1183 (len = strlen(field->name)) >= 2 &&
1184 strcmp(field->name + len - 2, "fd") == 0) {
1186 * /sys/kernel/tracing/events/syscalls/sys_enter*
1187 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1192 sc->arg_scnprintf[idx] = SCA_FD;
1200 static int trace__read_syscall_info(struct trace *trace, int id)
1204 const char *name = syscalltbl__name(trace->sctbl, id);
1209 if (id > trace->syscalls.max) {
1210 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1212 if (nsyscalls == NULL)
1215 if (trace->syscalls.max != -1) {
1216 memset(nsyscalls + trace->syscalls.max + 1, 0,
1217 (id - trace->syscalls.max) * sizeof(*sc));
1219 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1222 trace->syscalls.table = nsyscalls;
1223 trace->syscalls.max = id;
1226 sc = trace->syscalls.table + id;
1229 sc->fmt = syscall_fmt__find(sc->name);
1231 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1232 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1234 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1235 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1236 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1239 if (IS_ERR(sc->tp_format))
1242 sc->args = sc->tp_format->format.fields;
1243 sc->nr_args = sc->tp_format->format.nr_fields;
1245 * We need to check and discard the first variable '__syscall_nr'
1246 * or 'nr' that mean the syscall number. It is needless here.
1247 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1249 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1250 sc->args = sc->args->next;
1254 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1256 return syscall__set_arg_fmts(sc);
1259 static int trace__validate_ev_qualifier(struct trace *trace)
1262 struct str_node *pos;
1264 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1265 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1266 sizeof(trace->ev_qualifier_ids.entries[0]));
1268 if (trace->ev_qualifier_ids.entries == NULL) {
1269 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1277 strlist__for_each_entry(pos, trace->ev_qualifier) {
1278 const char *sc = pos->s;
1279 int id = syscalltbl__id(trace->sctbl, sc);
1283 fputs("Error:\tInvalid syscall ", trace->output);
1286 fputs(", ", trace->output);
1289 fputs(sc, trace->output);
1292 trace->ev_qualifier_ids.entries[i++] = id;
1296 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1297 "\nHint:\tand: 'man syscalls'\n", trace->output);
1298 zfree(&trace->ev_qualifier_ids.entries);
1299 trace->ev_qualifier_ids.nr = 0;
1306 * args is to be interpreted as a series of longs but we need to handle
1307 * 8-byte unaligned accesses. args points to raw_data within the event
1308 * and raw_data is guaranteed to be 8-byte unaligned because it is
1309 * preceded by raw_size which is a u32. So we need to copy args to a temp
1310 * variable to read it. Most notably this avoids extended load instructions
1311 * on unaligned addresses
1314 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1315 unsigned char *args, struct trace *trace,
1316 struct thread *thread)
1322 if (sc->args != NULL) {
1323 struct format_field *field;
1325 struct syscall_arg arg = {
1332 for (field = sc->args; field;
1333 field = field->next, ++arg.idx, bit <<= 1) {
1337 /* special care for unaligned accesses */
1338 p = args + sizeof(unsigned long) * arg.idx;
1339 memcpy(&val, p, sizeof(val));
1342 * Suppress this argument if its value is zero and
1343 * and we don't have a string associated in an
1347 !(sc->arg_scnprintf &&
1348 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1349 sc->arg_parm[arg.idx]))
1352 printed += scnprintf(bf + printed, size - printed,
1353 "%s%s: ", printed ? ", " : "", field->name);
1354 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1357 arg.parm = sc->arg_parm[arg.idx];
1358 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1359 size - printed, &arg);
1361 printed += scnprintf(bf + printed, size - printed,
1365 } else if (IS_ERR(sc->tp_format)) {
1367 * If we managed to read the tracepoint /format file, then we
1368 * may end up not having any args, like with gettid(), so only
1369 * print the raw args when we didn't manage to read it.
1374 /* special care for unaligned accesses */
1375 p = args + sizeof(unsigned long) * i;
1376 memcpy(&val, p, sizeof(val));
1377 printed += scnprintf(bf + printed, size - printed,
1379 printed ? ", " : "", i, val);
1387 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1388 union perf_event *event,
1389 struct perf_sample *sample);
1391 static struct syscall *trace__syscall_info(struct trace *trace,
1392 struct perf_evsel *evsel, int id)
1398 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1399 * before that, leaving at a higher verbosity level till that is
1400 * explained. Reproduced with plain ftrace with:
1402 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1403 * grep "NR -1 " /t/trace_pipe
1405 * After generating some load on the machine.
1409 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1410 id, perf_evsel__name(evsel), ++n);
1415 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1416 trace__read_syscall_info(trace, id))
1419 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1422 return &trace->syscalls.table[id];
1426 fprintf(trace->output, "Problems reading syscall %d", id);
1427 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1428 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1429 fputs(" information\n", trace->output);
1434 static void thread__update_stats(struct thread_trace *ttrace,
1435 int id, struct perf_sample *sample)
1437 struct int_node *inode;
1438 struct stats *stats;
1441 inode = intlist__findnew(ttrace->syscall_stats, id);
1445 stats = inode->priv;
1446 if (stats == NULL) {
1447 stats = malloc(sizeof(struct stats));
1451 inode->priv = stats;
1454 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1455 duration = sample->time - ttrace->entry_time;
1457 update_stats(stats, duration);
1460 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1462 struct thread_trace *ttrace;
1466 if (trace->current == NULL)
1469 ttrace = thread__priv(trace->current);
1471 if (!ttrace->entry_pending)
1474 duration = sample->time - ttrace->entry_time;
1476 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
1477 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1478 ttrace->entry_pending = false;
1483 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1484 union perf_event *event __maybe_unused,
1485 struct perf_sample *sample)
1490 struct thread *thread;
1491 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1492 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1493 struct thread_trace *ttrace;
1498 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1499 ttrace = thread__trace(thread, trace->output);
1503 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1505 if (ttrace->entry_str == NULL) {
1506 ttrace->entry_str = malloc(trace__entry_str_size);
1507 if (!ttrace->entry_str)
1511 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1512 trace__printf_interrupted_entry(trace, sample);
1514 ttrace->entry_time = sample->time;
1515 msg = ttrace->entry_str;
1516 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1518 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1519 args, trace, thread);
1522 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1523 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1524 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1527 ttrace->entry_pending = true;
1528 /* See trace__vfs_getname & trace__sys_exit */
1529 ttrace->filename.pending_open = false;
1532 if (trace->current != thread) {
1533 thread__put(trace->current);
1534 trace->current = thread__get(thread);
1538 thread__put(thread);
1542 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1543 struct perf_sample *sample,
1544 struct callchain_cursor *cursor)
1546 struct addr_location al;
1548 if (machine__resolve(trace->host, &al, sample) < 0 ||
1549 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1555 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1557 /* TODO: user-configurable print_opts */
1558 const unsigned int print_opts = EVSEL__PRINT_SYM |
1560 EVSEL__PRINT_UNKNOWN_AS_ADDR;
1562 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1565 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1566 union perf_event *event __maybe_unused,
1567 struct perf_sample *sample)
1571 bool duration_calculated = false;
1572 struct thread *thread;
1573 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1574 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1575 struct thread_trace *ttrace;
1580 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1581 ttrace = thread__trace(thread, trace->output);
1586 thread__update_stats(ttrace, id, sample);
1588 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1590 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1591 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1592 ttrace->filename.pending_open = false;
1593 ++trace->stats.vfs_getname;
1596 if (ttrace->entry_time) {
1597 duration = sample->time - ttrace->entry_time;
1598 if (trace__filter_duration(trace, duration))
1600 duration_calculated = true;
1601 } else if (trace->duration_filter)
1604 if (sample->callchain) {
1605 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1606 if (callchain_ret == 0) {
1607 if (callchain_cursor.nr < trace->min_stack)
1613 if (trace->summary_only)
1616 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
1618 if (ttrace->entry_pending) {
1619 fprintf(trace->output, "%-70s", ttrace->entry_str);
1621 fprintf(trace->output, " ... [");
1622 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1623 fprintf(trace->output, "]: %s()", sc->name);
1626 if (sc->fmt == NULL) {
1628 fprintf(trace->output, ") = %ld", ret);
1629 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1630 char bf[STRERR_BUFSIZE];
1631 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1632 *e = audit_errno_to_name(-ret);
1634 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1635 } else if (ret == 0 && sc->fmt->timeout)
1636 fprintf(trace->output, ") = 0 Timeout");
1637 else if (sc->fmt->hexret)
1638 fprintf(trace->output, ") = %#lx", ret);
1639 else if (sc->fmt->errpid) {
1640 struct thread *child = machine__find_thread(trace->host, ret, ret);
1642 if (child != NULL) {
1643 fprintf(trace->output, ") = %ld", ret);
1644 if (child->comm_set)
1645 fprintf(trace->output, " (%s)", thread__comm_str(child));
1651 fputc('\n', trace->output);
1653 if (callchain_ret > 0)
1654 trace__fprintf_callchain(trace, sample);
1655 else if (callchain_ret < 0)
1656 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1658 ttrace->entry_pending = false;
1661 thread__put(thread);
1665 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1666 union perf_event *event __maybe_unused,
1667 struct perf_sample *sample)
1669 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1670 struct thread_trace *ttrace;
1671 size_t filename_len, entry_str_len, to_move;
1672 ssize_t remaining_space;
1674 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1679 ttrace = thread__priv(thread);
1683 filename_len = strlen(filename);
1684 if (filename_len == 0)
1687 if (ttrace->filename.namelen < filename_len) {
1688 char *f = realloc(ttrace->filename.name, filename_len + 1);
1693 ttrace->filename.namelen = filename_len;
1694 ttrace->filename.name = f;
1697 strcpy(ttrace->filename.name, filename);
1698 ttrace->filename.pending_open = true;
1700 if (!ttrace->filename.ptr)
1703 entry_str_len = strlen(ttrace->entry_str);
1704 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1705 if (remaining_space <= 0)
1708 if (filename_len > (size_t)remaining_space) {
1709 filename += filename_len - remaining_space;
1710 filename_len = remaining_space;
1713 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1714 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1715 memmove(pos + filename_len, pos, to_move);
1716 memcpy(pos, filename, filename_len);
1718 ttrace->filename.ptr = 0;
1719 ttrace->filename.entry_str_pos = 0;
1721 thread__put(thread);
1726 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1727 union perf_event *event __maybe_unused,
1728 struct perf_sample *sample)
1730 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1731 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1732 struct thread *thread = machine__findnew_thread(trace->host,
1735 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1740 ttrace->runtime_ms += runtime_ms;
1741 trace->runtime_ms += runtime_ms;
1743 thread__put(thread);
1747 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1749 perf_evsel__strval(evsel, sample, "comm"),
1750 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1752 perf_evsel__intval(evsel, sample, "vruntime"));
1756 static void bpf_output__printer(enum binary_printer_ops op,
1757 unsigned int val, void *extra)
1759 FILE *output = extra;
1760 unsigned char ch = (unsigned char)val;
1763 case BINARY_PRINT_CHAR_DATA:
1764 fprintf(output, "%c", isprint(ch) ? ch : '.');
1766 case BINARY_PRINT_DATA_BEGIN:
1767 case BINARY_PRINT_LINE_BEGIN:
1768 case BINARY_PRINT_ADDR:
1769 case BINARY_PRINT_NUM_DATA:
1770 case BINARY_PRINT_NUM_PAD:
1771 case BINARY_PRINT_SEP:
1772 case BINARY_PRINT_CHAR_PAD:
1773 case BINARY_PRINT_LINE_END:
1774 case BINARY_PRINT_DATA_END:
1780 static void bpf_output__fprintf(struct trace *trace,
1781 struct perf_sample *sample)
1783 print_binary(sample->raw_data, sample->raw_size, 8,
1784 bpf_output__printer, trace->output);
1787 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1788 union perf_event *event __maybe_unused,
1789 struct perf_sample *sample)
1791 int callchain_ret = 0;
1793 if (sample->callchain) {
1794 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1795 if (callchain_ret == 0) {
1796 if (callchain_cursor.nr < trace->min_stack)
1802 trace__printf_interrupted_entry(trace, sample);
1803 trace__fprintf_tstamp(trace, sample->time, trace->output);
1805 if (trace->trace_syscalls)
1806 fprintf(trace->output, "( ): ");
1808 fprintf(trace->output, "%s:", evsel->name);
1810 if (perf_evsel__is_bpf_output(evsel)) {
1811 bpf_output__fprintf(trace, sample);
1812 } else if (evsel->tp_format) {
1813 event_format__fprintf(evsel->tp_format, sample->cpu,
1814 sample->raw_data, sample->raw_size,
1818 fprintf(trace->output, ")\n");
1820 if (callchain_ret > 0)
1821 trace__fprintf_callchain(trace, sample);
1822 else if (callchain_ret < 0)
1823 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1828 static void print_location(FILE *f, struct perf_sample *sample,
1829 struct addr_location *al,
1830 bool print_dso, bool print_sym)
1833 if ((verbose > 0 || print_dso) && al->map)
1834 fprintf(f, "%s@", al->map->dso->long_name);
1836 if ((verbose > 0 || print_sym) && al->sym)
1837 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1838 al->addr - al->sym->start);
1840 fprintf(f, "0x%" PRIx64, al->addr);
1842 fprintf(f, "0x%" PRIx64, sample->addr);
1845 static int trace__pgfault(struct trace *trace,
1846 struct perf_evsel *evsel,
1847 union perf_event *event __maybe_unused,
1848 struct perf_sample *sample)
1850 struct thread *thread;
1851 struct addr_location al;
1852 char map_type = 'd';
1853 struct thread_trace *ttrace;
1855 int callchain_ret = 0;
1857 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1859 if (sample->callchain) {
1860 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1861 if (callchain_ret == 0) {
1862 if (callchain_cursor.nr < trace->min_stack)
1868 ttrace = thread__trace(thread, trace->output);
1872 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1877 if (trace->summary_only)
1880 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1883 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
1885 fprintf(trace->output, "%sfault [",
1886 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1889 print_location(trace->output, sample, &al, false, true);
1891 fprintf(trace->output, "] => ");
1893 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1897 thread__find_addr_location(thread, sample->cpumode,
1898 MAP__FUNCTION, sample->addr, &al);
1906 print_location(trace->output, sample, &al, true, false);
1908 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1910 if (callchain_ret > 0)
1911 trace__fprintf_callchain(trace, sample);
1912 else if (callchain_ret < 0)
1913 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1917 thread__put(thread);
1921 static void trace__set_base_time(struct trace *trace,
1922 struct perf_evsel *evsel,
1923 struct perf_sample *sample)
1926 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1927 * and don't use sample->time unconditionally, we may end up having
1928 * some other event in the future without PERF_SAMPLE_TIME for good
1929 * reason, i.e. we may not be interested in its timestamps, just in
1930 * it taking place, picking some piece of information when it
1931 * appears in our event stream (vfs_getname comes to mind).
1933 if (trace->base_time == 0 && !trace->full_time &&
1934 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1935 trace->base_time = sample->time;
1938 static int trace__process_sample(struct perf_tool *tool,
1939 union perf_event *event,
1940 struct perf_sample *sample,
1941 struct perf_evsel *evsel,
1942 struct machine *machine __maybe_unused)
1944 struct trace *trace = container_of(tool, struct trace, tool);
1945 struct thread *thread;
1948 tracepoint_handler handler = evsel->handler;
1950 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1951 if (thread && thread__is_filtered(thread))
1954 trace__set_base_time(trace, evsel, sample);
1958 handler(trace, evsel, event, sample);
1961 thread__put(thread);
1965 static int trace__record(struct trace *trace, int argc, const char **argv)
1967 unsigned int rec_argc, i, j;
1968 const char **rec_argv;
1969 const char * const record_args[] = {
1976 const char * const sc_args[] = { "-e", };
1977 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1978 const char * const majpf_args[] = { "-e", "major-faults" };
1979 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1980 const char * const minpf_args[] = { "-e", "minor-faults" };
1981 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1983 /* +1 is for the event string below */
1984 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1985 majpf_args_nr + minpf_args_nr + argc;
1986 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1988 if (rec_argv == NULL)
1992 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1993 rec_argv[j++] = record_args[i];
1995 if (trace->trace_syscalls) {
1996 for (i = 0; i < sc_args_nr; i++)
1997 rec_argv[j++] = sc_args[i];
1999 /* event string may be different for older kernels - e.g., RHEL6 */
2000 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2001 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2002 else if (is_valid_tracepoint("syscalls:sys_enter"))
2003 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2005 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2010 if (trace->trace_pgfaults & TRACE_PFMAJ)
2011 for (i = 0; i < majpf_args_nr; i++)
2012 rec_argv[j++] = majpf_args[i];
2014 if (trace->trace_pgfaults & TRACE_PFMIN)
2015 for (i = 0; i < minpf_args_nr; i++)
2016 rec_argv[j++] = minpf_args[i];
2018 for (i = 0; i < (unsigned int)argc; i++)
2019 rec_argv[j++] = argv[i];
2021 return cmd_record(j, rec_argv);
2024 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2026 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2028 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2033 if (perf_evsel__field(evsel, "pathname") == NULL) {
2034 perf_evsel__delete(evsel);
2038 evsel->handler = trace__vfs_getname;
2039 perf_evlist__add(evlist, evsel);
2043 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2045 struct perf_evsel *evsel;
2046 struct perf_event_attr attr = {
2047 .type = PERF_TYPE_SOFTWARE,
2051 attr.config = config;
2052 attr.sample_period = 1;
2054 event_attr_init(&attr);
2056 evsel = perf_evsel__new(&attr);
2058 evsel->handler = trace__pgfault;
2063 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2065 const u32 type = event->header.type;
2066 struct perf_evsel *evsel;
2068 if (type != PERF_RECORD_SAMPLE) {
2069 trace__process_event(trace, trace->host, event, sample);
2073 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2074 if (evsel == NULL) {
2075 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2079 trace__set_base_time(trace, evsel, sample);
2081 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2082 sample->raw_data == NULL) {
2083 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2084 perf_evsel__name(evsel), sample->tid,
2085 sample->cpu, sample->raw_size);
2087 tracepoint_handler handler = evsel->handler;
2088 handler(trace, evsel, event, sample);
2092 static int trace__add_syscall_newtp(struct trace *trace)
2095 struct perf_evlist *evlist = trace->evlist;
2096 struct perf_evsel *sys_enter, *sys_exit;
2098 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2099 if (sys_enter == NULL)
2102 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2103 goto out_delete_sys_enter;
2105 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2106 if (sys_exit == NULL)
2107 goto out_delete_sys_enter;
2109 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2110 goto out_delete_sys_exit;
2112 perf_evlist__add(evlist, sys_enter);
2113 perf_evlist__add(evlist, sys_exit);
2115 if (callchain_param.enabled && !trace->kernel_syscallchains) {
2117 * We're interested only in the user space callchain
2118 * leading to the syscall, allow overriding that for
2119 * debugging reasons using --kernel_syscall_callchains
2121 sys_exit->attr.exclude_callchain_kernel = 1;
2124 trace->syscalls.events.sys_enter = sys_enter;
2125 trace->syscalls.events.sys_exit = sys_exit;
2131 out_delete_sys_exit:
2132 perf_evsel__delete_priv(sys_exit);
2133 out_delete_sys_enter:
2134 perf_evsel__delete_priv(sys_enter);
2138 static int trace__set_ev_qualifier_filter(struct trace *trace)
2141 struct perf_evsel *sys_exit;
2142 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2143 trace->ev_qualifier_ids.nr,
2144 trace->ev_qualifier_ids.entries);
2149 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2151 sys_exit = trace->syscalls.events.sys_exit;
2152 err = perf_evsel__append_tp_filter(sys_exit, filter);
2163 static int trace__run(struct trace *trace, int argc, const char **argv)
2165 struct perf_evlist *evlist = trace->evlist;
2166 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2168 unsigned long before;
2169 const bool forks = argc > 0;
2170 bool draining = false;
2174 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2175 goto out_error_raw_syscalls;
2177 if (trace->trace_syscalls)
2178 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2180 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2181 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2182 if (pgfault_maj == NULL)
2184 perf_evlist__add(evlist, pgfault_maj);
2187 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2188 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2189 if (pgfault_min == NULL)
2191 perf_evlist__add(evlist, pgfault_min);
2195 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2196 trace__sched_stat_runtime))
2197 goto out_error_sched_stat_runtime;
2199 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2201 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2202 goto out_delete_evlist;
2205 err = trace__symbols_init(trace, evlist);
2207 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2208 goto out_delete_evlist;
2211 perf_evlist__config(evlist, &trace->opts, NULL);
2213 if (callchain_param.enabled) {
2214 bool use_identifier = false;
2216 if (trace->syscalls.events.sys_exit) {
2217 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2218 &trace->opts, &callchain_param);
2219 use_identifier = true;
2223 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2224 use_identifier = true;
2228 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2229 use_identifier = true;
2232 if (use_identifier) {
2234 * Now we have evsels with different sample_ids, use
2235 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2236 * from a fixed position in each ring buffer record.
2238 * As of this the changeset introducing this comment, this
2239 * isn't strictly needed, as the fields that can come before
2240 * PERF_SAMPLE_ID are all used, but we'll probably disable
2241 * some of those for things like copying the payload of
2242 * pointer syscall arguments, and for vfs_getname we don't
2243 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2244 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2246 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2247 perf_evlist__reset_sample_bit(evlist, ID);
2251 signal(SIGCHLD, sig_handler);
2252 signal(SIGINT, sig_handler);
2255 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2258 fprintf(trace->output, "Couldn't run the workload!\n");
2259 goto out_delete_evlist;
2263 err = perf_evlist__open(evlist);
2265 goto out_error_open;
2267 err = bpf__apply_obj_config();
2269 char errbuf[BUFSIZ];
2271 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2272 pr_err("ERROR: Apply config to BPF failed: %s\n",
2274 goto out_error_open;
2278 * Better not use !target__has_task() here because we need to cover the
2279 * case where no threads were specified in the command line, but a
2280 * workload was, and in that case we will fill in the thread_map when
2281 * we fork the workload in perf_evlist__prepare_workload.
2283 if (trace->filter_pids.nr > 0)
2284 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2285 else if (thread_map__pid(evlist->threads, 0) == -1)
2286 err = perf_evlist__set_filter_pid(evlist, getpid());
2291 if (trace->ev_qualifier_ids.nr > 0) {
2292 err = trace__set_ev_qualifier_filter(trace);
2296 pr_debug("event qualifier tracepoint filter: %s\n",
2297 trace->syscalls.events.sys_exit->filter);
2300 err = perf_evlist__apply_filters(evlist, &evsel);
2302 goto out_error_apply_filters;
2304 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2306 goto out_error_mmap;
2308 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
2309 perf_evlist__enable(evlist);
2312 perf_evlist__start_workload(evlist);
2314 if (trace->opts.initial_delay) {
2315 usleep(trace->opts.initial_delay * 1000);
2316 perf_evlist__enable(evlist);
2319 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2320 evlist->threads->nr > 1 ||
2321 perf_evlist__first(evlist)->attr.inherit;
2323 before = trace->nr_events;
2325 for (i = 0; i < evlist->nr_mmaps; i++) {
2326 union perf_event *event;
2328 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2329 struct perf_sample sample;
2333 err = perf_evlist__parse_sample(evlist, event, &sample);
2335 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2339 trace__handle_event(trace, event, &sample);
2341 perf_evlist__mmap_consume(evlist, i);
2346 if (done && !draining) {
2347 perf_evlist__disable(evlist);
2353 if (trace->nr_events == before) {
2354 int timeout = done ? 100 : -1;
2356 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2357 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2367 thread__zput(trace->current);
2369 perf_evlist__disable(evlist);
2373 trace__fprintf_thread_summary(trace, trace->output);
2375 if (trace->show_tool_stats) {
2376 fprintf(trace->output, "Stats:\n "
2377 " vfs_getname : %" PRIu64 "\n"
2378 " proc_getname: %" PRIu64 "\n",
2379 trace->stats.vfs_getname,
2380 trace->stats.proc_getname);
2385 perf_evlist__delete(evlist);
2386 trace->evlist = NULL;
2387 trace->live = false;
2390 char errbuf[BUFSIZ];
2392 out_error_sched_stat_runtime:
2393 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2396 out_error_raw_syscalls:
2397 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2401 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2405 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2408 fprintf(trace->output, "%s\n", errbuf);
2409 goto out_delete_evlist;
2411 out_error_apply_filters:
2412 fprintf(trace->output,
2413 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2414 evsel->filter, perf_evsel__name(evsel), errno,
2415 str_error_r(errno, errbuf, sizeof(errbuf)));
2416 goto out_delete_evlist;
2419 fprintf(trace->output, "Not enough memory to run!\n");
2420 goto out_delete_evlist;
2423 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2424 goto out_delete_evlist;
2427 static int trace__replay(struct trace *trace)
2429 const struct perf_evsel_str_handler handlers[] = {
2430 { "probe:vfs_getname", trace__vfs_getname, },
2432 struct perf_data_file file = {
2434 .mode = PERF_DATA_MODE_READ,
2435 .force = trace->force,
2437 struct perf_session *session;
2438 struct perf_evsel *evsel;
2441 trace->tool.sample = trace__process_sample;
2442 trace->tool.mmap = perf_event__process_mmap;
2443 trace->tool.mmap2 = perf_event__process_mmap2;
2444 trace->tool.comm = perf_event__process_comm;
2445 trace->tool.exit = perf_event__process_exit;
2446 trace->tool.fork = perf_event__process_fork;
2447 trace->tool.attr = perf_event__process_attr;
2448 trace->tool.tracing_data = perf_event__process_tracing_data;
2449 trace->tool.build_id = perf_event__process_build_id;
2450 trace->tool.namespaces = perf_event__process_namespaces;
2452 trace->tool.ordered_events = true;
2453 trace->tool.ordering_requires_timestamps = true;
2455 /* add tid to output */
2456 trace->multiple_threads = true;
2458 session = perf_session__new(&file, false, &trace->tool);
2459 if (session == NULL)
2462 if (trace->opts.target.pid)
2463 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2465 if (trace->opts.target.tid)
2466 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2468 if (symbol__init(&session->header.env) < 0)
2471 trace->host = &session->machines.host;
2473 err = perf_session__set_tracepoints_handlers(session, handlers);
2477 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2478 "raw_syscalls:sys_enter");
2479 /* older kernels have syscalls tp versus raw_syscalls */
2481 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2482 "syscalls:sys_enter");
2485 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2486 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2487 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2491 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2492 "raw_syscalls:sys_exit");
2494 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2495 "syscalls:sys_exit");
2497 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2498 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2499 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2503 evlist__for_each_entry(session->evlist, evsel) {
2504 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2505 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2506 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2507 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2508 evsel->handler = trace__pgfault;
2513 err = perf_session__process_events(session);
2515 pr_err("Failed to process events, error %d", err);
2517 else if (trace->summary)
2518 trace__fprintf_thread_summary(trace, trace->output);
2521 perf_session__delete(session);
2526 static size_t trace__fprintf_threads_header(FILE *fp)
2530 printed = fprintf(fp, "\n Summary of events:\n\n");
2535 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2536 struct stats *stats;
2541 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2542 struct stats *stats = source->priv;
2544 entry->syscall = source->i;
2545 entry->stats = stats;
2546 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2549 static size_t thread__dump_stats(struct thread_trace *ttrace,
2550 struct trace *trace, FILE *fp)
2555 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2557 if (syscall_stats == NULL)
2560 printed += fprintf(fp, "\n");
2562 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2563 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2564 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2566 resort_rb__for_each_entry(nd, syscall_stats) {
2567 struct stats *stats = syscall_stats_entry->stats;
2569 double min = (double)(stats->min) / NSEC_PER_MSEC;
2570 double max = (double)(stats->max) / NSEC_PER_MSEC;
2571 double avg = avg_stats(stats);
2573 u64 n = (u64) stats->n;
2575 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2576 avg /= NSEC_PER_MSEC;
2578 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2579 printed += fprintf(fp, " %-15s", sc->name);
2580 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2581 n, syscall_stats_entry->msecs, min, avg);
2582 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2586 resort_rb__delete(syscall_stats);
2587 printed += fprintf(fp, "\n\n");
2592 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2595 struct thread_trace *ttrace = thread__priv(thread);
2601 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2603 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2604 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2605 printed += fprintf(fp, "%.1f%%", ratio);
2607 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2609 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2611 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2612 else if (fputc('\n', fp) != EOF)
2615 printed += thread__dump_stats(ttrace, trace, fp);
2620 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2622 return ttrace ? ttrace->nr_events : 0;
2625 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2626 struct thread *thread;
2629 entry->thread = rb_entry(nd, struct thread, rb_node);
2632 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2634 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2635 size_t printed = trace__fprintf_threads_header(fp);
2638 if (threads == NULL) {
2639 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2643 resort_rb__for_each_entry(nd, threads)
2644 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2646 resort_rb__delete(threads);
2651 static int trace__set_duration(const struct option *opt, const char *str,
2652 int unset __maybe_unused)
2654 struct trace *trace = opt->value;
2656 trace->duration_filter = atof(str);
2660 static int trace__set_filter_pids(const struct option *opt, const char *str,
2661 int unset __maybe_unused)
2665 struct trace *trace = opt->value;
2667 * FIXME: introduce a intarray class, plain parse csv and create a
2668 * { int nr, int entries[] } struct...
2670 struct intlist *list = intlist__new(str);
2675 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2676 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2678 if (trace->filter_pids.entries == NULL)
2681 trace->filter_pids.entries[0] = getpid();
2683 for (i = 1; i < trace->filter_pids.nr; ++i)
2684 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2686 intlist__delete(list);
2692 static int trace__open_output(struct trace *trace, const char *filename)
2696 if (!stat(filename, &st) && st.st_size) {
2697 char oldname[PATH_MAX];
2699 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2701 rename(filename, oldname);
2704 trace->output = fopen(filename, "w");
2706 return trace->output == NULL ? -errno : 0;
2709 static int parse_pagefaults(const struct option *opt, const char *str,
2710 int unset __maybe_unused)
2712 int *trace_pgfaults = opt->value;
2714 if (strcmp(str, "all") == 0)
2715 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2716 else if (strcmp(str, "maj") == 0)
2717 *trace_pgfaults |= TRACE_PFMAJ;
2718 else if (strcmp(str, "min") == 0)
2719 *trace_pgfaults |= TRACE_PFMIN;
2726 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2728 struct perf_evsel *evsel;
2730 evlist__for_each_entry(evlist, evsel)
2731 evsel->handler = handler;
2735 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2736 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2737 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2739 * It'd be better to introduce a parse_options() variant that would return a
2740 * list with the terms it didn't match to an event...
2742 static int trace__parse_events_option(const struct option *opt, const char *str,
2743 int unset __maybe_unused)
2745 struct trace *trace = (struct trace *)opt->value;
2746 const char *s = str;
2747 char *sep = NULL, *lists[2] = { NULL, NULL, };
2748 int len = strlen(str), err = -1, list;
2749 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2750 char group_name[PATH_MAX];
2752 if (strace_groups_dir == NULL)
2757 trace->not_ev_qualifier = true;
2761 if ((sep = strchr(s, ',')) != NULL)
2765 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2768 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2769 if (access(group_name, R_OK) == 0)
2774 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2776 lists[list] = malloc(len);
2777 if (lists[list] == NULL)
2779 strcpy(lists[list], s);
2789 if (lists[1] != NULL) {
2790 struct strlist_config slist_config = {
2791 .dirname = strace_groups_dir,
2794 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2795 if (trace->ev_qualifier == NULL) {
2796 fputs("Not enough memory to parse event qualifier", trace->output);
2800 if (trace__validate_ev_qualifier(trace))
2807 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2808 "event selector. use 'perf list' to list available events",
2809 parse_events_option);
2810 err = parse_events_option(&o, lists[0], 0);
2819 int cmd_trace(int argc, const char **argv)
2821 const char *trace_usage[] = {
2822 "perf trace [<options>] [<command>]",
2823 "perf trace [<options>] -- <command> [<options>]",
2824 "perf trace record [<options>] [<command>]",
2825 "perf trace record [<options>] -- <command> [<options>]",
2828 struct trace trace = {
2837 .user_freq = UINT_MAX,
2838 .user_interval = ULLONG_MAX,
2839 .no_buffering = true,
2840 .mmap_pages = UINT_MAX,
2841 .proc_map_timeout = 500,
2845 .trace_syscalls = true,
2846 .kernel_syscallchains = false,
2847 .max_stack = UINT_MAX,
2849 const char *output_name = NULL;
2850 const struct option trace_options[] = {
2851 OPT_CALLBACK('e', "event", &trace, "event",
2852 "event/syscall selector. use 'perf list' to list available events",
2853 trace__parse_events_option),
2854 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2855 "show the thread COMM next to its id"),
2856 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2857 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2858 trace__parse_events_option),
2859 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2860 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2861 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2862 "trace events on existing process id"),
2863 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2864 "trace events on existing thread id"),
2865 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2866 "pids to filter (by the kernel)", trace__set_filter_pids),
2867 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2868 "system-wide collection from all CPUs"),
2869 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2870 "list of cpus to monitor"),
2871 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2872 "child tasks do not inherit counters"),
2873 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2874 "number of mmap data pages",
2875 perf_evlist__parse_mmap_pages),
2876 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2878 OPT_CALLBACK(0, "duration", &trace, "float",
2879 "show only events with duration > N.M ms",
2880 trace__set_duration),
2881 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2882 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2883 OPT_BOOLEAN('T', "time", &trace.full_time,
2884 "Show full timestamp, not time relative to first start"),
2885 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2886 "Show only syscall summary with statistics"),
2887 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2888 "Show all syscalls and summary with statistics"),
2889 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2890 "Trace pagefaults", parse_pagefaults, "maj"),
2891 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2892 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2893 OPT_CALLBACK(0, "call-graph", &trace.opts,
2894 "record_mode[,record_size]", record_callchain_help,
2895 &record_parse_callchain_opt),
2896 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2897 "Show the kernel callchains on the syscall exit path"),
2898 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2899 "Set the minimum stack depth when parsing the callchain, "
2900 "anything below the specified depth will be ignored."),
2901 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2902 "Set the maximum stack depth when parsing the callchain, "
2903 "anything beyond the specified depth will be ignored. "
2904 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2905 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2906 "per thread proc mmap processing timeout in ms"),
2907 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2908 "ms to wait before starting measurement after program "
2912 bool __maybe_unused max_stack_user_set = true;
2913 bool mmap_pages_user_set = true;
2914 const char * const trace_subcommands[] = { "record", NULL };
2918 signal(SIGSEGV, sighandler_dump_stack);
2919 signal(SIGFPE, sighandler_dump_stack);
2921 trace.evlist = perf_evlist__new();
2922 trace.sctbl = syscalltbl__new();
2924 if (trace.evlist == NULL || trace.sctbl == NULL) {
2925 pr_err("Not enough memory to run!\n");
2930 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2931 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2933 err = bpf__setup_stdout(trace.evlist);
2935 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2936 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2942 if (trace.trace_pgfaults) {
2943 trace.opts.sample_address = true;
2944 trace.opts.sample_time = true;
2947 if (trace.opts.mmap_pages == UINT_MAX)
2948 mmap_pages_user_set = false;
2950 if (trace.max_stack == UINT_MAX) {
2951 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2952 max_stack_user_set = false;
2955 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2956 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2957 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2960 if (callchain_param.enabled) {
2961 if (!mmap_pages_user_set && geteuid() == 0)
2962 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2964 symbol_conf.use_callchain = true;
2967 if (trace.evlist->nr_entries > 0)
2968 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2970 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2971 return trace__record(&trace, argc-1, &argv[1]);
2973 /* summary_only implies summary option, but don't overwrite summary if set */
2974 if (trace.summary_only)
2975 trace.summary = trace.summary_only;
2977 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2978 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2979 pr_err("Please specify something to trace.\n");
2983 if (!trace.trace_syscalls && trace.ev_qualifier) {
2984 pr_err("The -e option can't be used with --no-syscalls.\n");
2988 if (output_name != NULL) {
2989 err = trace__open_output(&trace, output_name);
2991 perror("failed to create output file");
2996 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2998 err = target__validate(&trace.opts.target);
3000 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3001 fprintf(trace.output, "%s", bf);
3005 err = target__parse_uid(&trace.opts.target);
3007 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3008 fprintf(trace.output, "%s", bf);
3012 if (!argc && target__none(&trace.opts.target))
3013 trace.opts.target.system_wide = true;
3016 err = trace__replay(&trace);
3018 err = trace__run(&trace, argc, argv);
3021 if (output_name != NULL)
3022 fclose(trace.output);