4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace/beauty/beauty.h"
35 #include "trace-event.h"
36 #include "util/parse-events.h"
37 #include "util/bpf-loader.h"
38 #include "callchain.h"
39 #include "syscalltbl.h"
40 #include "rb_resort.h"
43 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
46 #include <linux/err.h>
47 #include <linux/filter.h>
48 #include <linux/audit.h>
49 #include <linux/kernel.h>
50 #include <linux/random.h>
51 #include <linux/stringify.h>
52 #include <linux/time64.h>
55 # define O_CLOEXEC 02000000
59 struct perf_tool tool;
60 struct syscalltbl *sctbl;
63 struct syscall *table;
65 struct perf_evsel *sys_enter,
69 struct record_opts opts;
70 struct perf_evlist *evlist;
72 struct thread *current;
75 unsigned long nr_events;
76 struct strlist *ev_qualifier;
85 double duration_filter;
91 unsigned int max_stack;
92 unsigned int min_stack;
93 bool not_ev_qualifier;
97 bool multiple_threads;
101 bool show_tool_stats;
103 bool kernel_syscallchains;
113 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
114 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
118 #define TP_UINT_FIELD(bits) \
119 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
122 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
131 #define TP_UINT_FIELD__SWAPPED(bits) \
132 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 return bswap_##bits(value);\
139 TP_UINT_FIELD__SWAPPED(16);
140 TP_UINT_FIELD__SWAPPED(32);
141 TP_UINT_FIELD__SWAPPED(64);
143 static int tp_field__init_uint(struct tp_field *field,
144 struct format_field *format_field,
147 field->offset = format_field->offset;
149 switch (format_field->size) {
151 field->integer = tp_field__u8;
154 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
157 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
160 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
169 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
171 return sample->raw_data + field->offset;
174 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
176 field->offset = format_field->offset;
177 field->pointer = tp_field__ptr;
184 struct tp_field args, ret;
188 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
189 struct tp_field *field,
192 struct format_field *format_field = perf_evsel__field(evsel, name);
194 if (format_field == NULL)
197 return tp_field__init_uint(field, format_field, evsel->needs_swap);
200 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
201 ({ struct syscall_tp *sc = evsel->priv;\
202 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
204 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
205 struct tp_field *field,
208 struct format_field *format_field = perf_evsel__field(evsel, name);
210 if (format_field == NULL)
213 return tp_field__init_ptr(field, format_field);
216 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
217 ({ struct syscall_tp *sc = evsel->priv;\
218 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
220 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
223 perf_evsel__delete(evsel);
226 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
228 evsel->priv = malloc(sizeof(struct syscall_tp));
229 if (evsel->priv != NULL) {
230 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
233 evsel->handler = handler;
244 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
246 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
248 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
250 evsel = perf_evsel__newtp("syscalls", direction);
255 if (perf_evsel__init_syscall_tp(evsel, handler))
261 perf_evsel__delete_priv(evsel);
265 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
266 ({ struct syscall_tp *fields = evsel->priv; \
267 fields->name.integer(&fields->name, sample); })
269 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
270 ({ struct syscall_tp *fields = evsel->priv; \
271 fields->name.pointer(&fields->name, sample); })
276 const char **entries;
279 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
280 .nr_entries = ARRAY_SIZE(array), \
284 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
286 .nr_entries = ARRAY_SIZE(array), \
290 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
292 struct syscall_arg *arg)
294 struct strarray *sa = arg->parm;
295 int idx = arg->val - sa->offset;
297 if (idx < 0 || idx >= sa->nr_entries)
298 return scnprintf(bf, size, intfmt, arg->val);
300 return scnprintf(bf, size, "%s", sa->entries[idx]);
303 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
304 struct syscall_arg *arg)
306 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
309 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
311 #if defined(__i386__) || defined(__x86_64__)
313 * FIXME: Make this available to all arches as soon as the ioctl beautifier
314 * gets rewritten to support all arches.
316 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
317 struct syscall_arg *arg)
319 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
322 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
323 #endif /* defined(__i386__) || defined(__x86_64__) */
325 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
326 struct syscall_arg *arg);
328 #define SCA_FD syscall_arg__scnprintf_fd
331 #define AT_FDCWD -100
334 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
335 struct syscall_arg *arg)
340 return scnprintf(bf, size, "CWD");
342 return syscall_arg__scnprintf_fd(bf, size, arg);
345 #define SCA_FDAT syscall_arg__scnprintf_fd_at
347 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
348 struct syscall_arg *arg);
350 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
352 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
353 struct syscall_arg *arg)
355 return scnprintf(bf, size, "%#lx", arg->val);
358 #define SCA_HEX syscall_arg__scnprintf_hex
360 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
361 struct syscall_arg *arg)
363 return scnprintf(bf, size, "%d", arg->val);
366 #define SCA_INT syscall_arg__scnprintf_int
368 static const char *bpf_cmd[] = {
369 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
370 "MAP_GET_NEXT_KEY", "PROG_LOAD",
372 static DEFINE_STRARRAY(bpf_cmd);
374 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
375 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
377 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
378 static DEFINE_STRARRAY(itimers);
380 static const char *keyctl_options[] = {
381 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
382 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
383 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
384 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
385 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
387 static DEFINE_STRARRAY(keyctl_options);
389 static const char *whences[] = { "SET", "CUR", "END",
397 static DEFINE_STRARRAY(whences);
399 static const char *fcntl_cmds[] = {
400 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
401 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
402 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
405 static DEFINE_STRARRAY(fcntl_cmds);
407 static const char *rlimit_resources[] = {
408 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
409 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
412 static DEFINE_STRARRAY(rlimit_resources);
414 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
415 static DEFINE_STRARRAY(sighow);
417 static const char *clockid[] = {
418 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
419 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
420 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
422 static DEFINE_STRARRAY(clockid);
424 static const char *socket_families[] = {
425 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
426 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
427 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
428 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
429 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
430 "ALG", "NFC", "VSOCK",
432 static DEFINE_STRARRAY(socket_families);
434 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
435 struct syscall_arg *arg)
440 if (mode == F_OK) /* 0 */
441 return scnprintf(bf, size, "F");
443 if (mode & n##_OK) { \
444 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
454 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
459 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
461 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
462 struct syscall_arg *arg);
464 #define SCA_FILENAME syscall_arg__scnprintf_filename
466 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
467 struct syscall_arg *arg)
469 int printed = 0, flags = arg->val;
472 if (flags & O_##n) { \
473 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
482 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
487 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
489 #if defined(__i386__) || defined(__x86_64__)
491 * FIXME: Make this available to all arches.
493 #define TCGETS 0x5401
495 static const char *tioctls[] = {
496 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
497 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
498 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
499 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
500 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
501 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
502 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
503 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
504 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
505 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
506 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
507 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
508 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
509 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
510 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
513 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
514 #endif /* defined(__i386__) || defined(__x86_64__) */
516 #ifndef GRND_NONBLOCK
517 #define GRND_NONBLOCK 0x0001
520 #define GRND_RANDOM 0x0002
523 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
524 struct syscall_arg *arg)
526 int printed = 0, flags = arg->val;
529 if (flags & GRND_##n) { \
530 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
531 flags &= ~GRND_##n; \
539 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
544 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
546 #define STRARRAY(arg, name, array) \
547 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
548 .arg_parm = { [arg] = &strarray__##array, }
550 #include "trace/beauty/eventfd.c"
551 #include "trace/beauty/flock.c"
552 #include "trace/beauty/futex_op.c"
553 #include "trace/beauty/mmap.c"
554 #include "trace/beauty/mode_t.c"
555 #include "trace/beauty/msg_flags.c"
556 #include "trace/beauty/open_flags.c"
557 #include "trace/beauty/perf_event_open.c"
558 #include "trace/beauty/pid.c"
559 #include "trace/beauty/sched_policy.c"
560 #include "trace/beauty/seccomp.c"
561 #include "trace/beauty/signum.c"
562 #include "trace/beauty/socket_type.c"
563 #include "trace/beauty/waitid_options.c"
565 static struct syscall_fmt {
568 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
575 { .name = "access", .errmsg = true,
576 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
577 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
578 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
579 { .name = "brk", .hexret = true,
580 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
581 { .name = "chdir", .errmsg = true, },
582 { .name = "chmod", .errmsg = true, },
583 { .name = "chroot", .errmsg = true, },
584 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
585 { .name = "clone", .errpid = true, },
586 { .name = "close", .errmsg = true,
587 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
588 { .name = "connect", .errmsg = true, },
589 { .name = "creat", .errmsg = true, },
590 { .name = "dup", .errmsg = true, },
591 { .name = "dup2", .errmsg = true, },
592 { .name = "dup3", .errmsg = true, },
593 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
594 { .name = "eventfd2", .errmsg = true,
595 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
596 { .name = "faccessat", .errmsg = true, },
597 { .name = "fadvise64", .errmsg = true, },
598 { .name = "fallocate", .errmsg = true, },
599 { .name = "fchdir", .errmsg = true, },
600 { .name = "fchmod", .errmsg = true, },
601 { .name = "fchmodat", .errmsg = true,
602 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
603 { .name = "fchown", .errmsg = true, },
604 { .name = "fchownat", .errmsg = true,
605 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
606 { .name = "fcntl", .errmsg = true,
607 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
608 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
609 { .name = "fdatasync", .errmsg = true, },
610 { .name = "flock", .errmsg = true,
611 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
612 { .name = "fsetxattr", .errmsg = true, },
613 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
614 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
615 { .name = "fstatfs", .errmsg = true, },
616 { .name = "fsync", .errmsg = true, },
617 { .name = "ftruncate", .errmsg = true, },
618 { .name = "futex", .errmsg = true,
619 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
620 { .name = "futimesat", .errmsg = true,
621 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
622 { .name = "getdents", .errmsg = true, },
623 { .name = "getdents64", .errmsg = true, },
624 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
625 { .name = "getpid", .errpid = true, },
626 { .name = "getpgid", .errpid = true, },
627 { .name = "getppid", .errpid = true, },
628 { .name = "getrandom", .errmsg = true,
629 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
630 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
631 { .name = "getxattr", .errmsg = true, },
632 { .name = "inotify_add_watch", .errmsg = true, },
633 { .name = "ioctl", .errmsg = true,
635 #if defined(__i386__) || defined(__x86_64__)
637 * FIXME: Make this available to all arches.
639 [1] = SCA_STRHEXARRAY, /* cmd */
640 [2] = SCA_HEX, /* arg */ },
641 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
643 [2] = SCA_HEX, /* arg */ }, },
645 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
646 { .name = "kill", .errmsg = true,
647 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
648 { .name = "lchown", .errmsg = true, },
649 { .name = "lgetxattr", .errmsg = true, },
650 { .name = "linkat", .errmsg = true,
651 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
652 { .name = "listxattr", .errmsg = true, },
653 { .name = "llistxattr", .errmsg = true, },
654 { .name = "lremovexattr", .errmsg = true, },
655 { .name = "lseek", .errmsg = true,
656 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
657 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
658 { .name = "lsetxattr", .errmsg = true, },
659 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
660 { .name = "lsxattr", .errmsg = true, },
661 { .name = "madvise", .errmsg = true,
662 .arg_scnprintf = { [0] = SCA_HEX, /* start */
663 [2] = SCA_MADV_BHV, /* behavior */ }, },
664 { .name = "mkdir", .errmsg = true, },
665 { .name = "mkdirat", .errmsg = true,
666 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
667 { .name = "mknod", .errmsg = true, },
668 { .name = "mknodat", .errmsg = true,
669 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
670 { .name = "mlock", .errmsg = true,
671 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
672 { .name = "mlockall", .errmsg = true,
673 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
674 { .name = "mmap", .hexret = true,
675 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
676 [2] = SCA_MMAP_PROT, /* prot */
677 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
678 { .name = "mprotect", .errmsg = true,
679 .arg_scnprintf = { [0] = SCA_HEX, /* start */
680 [2] = SCA_MMAP_PROT, /* prot */ }, },
681 { .name = "mq_unlink", .errmsg = true,
682 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
683 { .name = "mremap", .hexret = true,
684 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
685 [3] = SCA_MREMAP_FLAGS, /* flags */
686 [4] = SCA_HEX, /* new_addr */ }, },
687 { .name = "munlock", .errmsg = true,
688 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
689 { .name = "munmap", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
691 { .name = "name_to_handle_at", .errmsg = true,
692 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
693 { .name = "newfstatat", .errmsg = true,
694 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
695 { .name = "open", .errmsg = true,
696 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
697 { .name = "open_by_handle_at", .errmsg = true,
698 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
699 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
700 { .name = "openat", .errmsg = true,
701 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
702 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
703 { .name = "perf_event_open", .errmsg = true,
704 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
705 [3] = SCA_FD, /* group_fd */
706 [4] = SCA_PERF_FLAGS, /* flags */ }, },
707 { .name = "pipe2", .errmsg = true,
708 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
709 { .name = "poll", .errmsg = true, .timeout = true, },
710 { .name = "ppoll", .errmsg = true, .timeout = true, },
711 { .name = "pread", .errmsg = true, .alias = "pread64", },
712 { .name = "preadv", .errmsg = true, .alias = "pread", },
713 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
714 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
715 { .name = "pwritev", .errmsg = true, },
716 { .name = "read", .errmsg = true, },
717 { .name = "readlink", .errmsg = true, },
718 { .name = "readlinkat", .errmsg = true,
719 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
720 { .name = "readv", .errmsg = true, },
721 { .name = "recvfrom", .errmsg = true,
722 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
723 { .name = "recvmmsg", .errmsg = true,
724 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
725 { .name = "recvmsg", .errmsg = true,
726 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
727 { .name = "removexattr", .errmsg = true, },
728 { .name = "renameat", .errmsg = true,
729 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
730 { .name = "rmdir", .errmsg = true, },
731 { .name = "rt_sigaction", .errmsg = true,
732 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
733 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
734 { .name = "rt_sigqueueinfo", .errmsg = true,
735 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
736 { .name = "rt_tgsigqueueinfo", .errmsg = true,
737 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
738 { .name = "sched_getattr", .errmsg = true, },
739 { .name = "sched_setattr", .errmsg = true, },
740 { .name = "sched_setscheduler", .errmsg = true,
741 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
742 { .name = "seccomp", .errmsg = true,
743 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
744 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
745 { .name = "select", .errmsg = true, .timeout = true, },
746 { .name = "sendmmsg", .errmsg = true,
747 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
748 { .name = "sendmsg", .errmsg = true,
749 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
750 { .name = "sendto", .errmsg = true,
751 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
752 { .name = "set_tid_address", .errpid = true, },
753 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
754 { .name = "setpgid", .errmsg = true, },
755 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
756 { .name = "setxattr", .errmsg = true, },
757 { .name = "shutdown", .errmsg = true, },
758 { .name = "socket", .errmsg = true,
759 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
760 [1] = SCA_SK_TYPE, /* type */ },
761 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
762 { .name = "socketpair", .errmsg = true,
763 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
764 [1] = SCA_SK_TYPE, /* type */ },
765 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
766 { .name = "stat", .errmsg = true, .alias = "newstat", },
767 { .name = "statfs", .errmsg = true, },
768 { .name = "statx", .errmsg = true,
769 .arg_scnprintf = { [0] = SCA_FDAT, /* flags */
770 [2] = SCA_STATX_FLAGS, /* flags */
771 [3] = SCA_STATX_MASK, /* mask */ }, },
772 { .name = "swapoff", .errmsg = true,
773 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
774 { .name = "swapon", .errmsg = true,
775 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
776 { .name = "symlinkat", .errmsg = true,
777 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
778 { .name = "tgkill", .errmsg = true,
779 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
780 { .name = "tkill", .errmsg = true,
781 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
782 { .name = "truncate", .errmsg = true, },
783 { .name = "uname", .errmsg = true, .alias = "newuname", },
784 { .name = "unlinkat", .errmsg = true,
785 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
786 { .name = "utime", .errmsg = true, },
787 { .name = "utimensat", .errmsg = true,
788 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
789 { .name = "utimes", .errmsg = true, },
790 { .name = "vmsplice", .errmsg = true, },
791 { .name = "wait4", .errpid = true,
792 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
793 { .name = "waitid", .errpid = true,
794 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
795 { .name = "write", .errmsg = true, },
796 { .name = "writev", .errmsg = true, },
799 static int syscall_fmt__cmp(const void *name, const void *fmtp)
801 const struct syscall_fmt *fmt = fmtp;
802 return strcmp(name, fmt->name);
805 static struct syscall_fmt *syscall_fmt__find(const char *name)
807 const int nmemb = ARRAY_SIZE(syscall_fmts);
808 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
812 struct event_format *tp_format;
814 struct format_field *args;
817 struct syscall_fmt *fmt;
818 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
823 * We need to have this 'calculated' boolean because in some cases we really
824 * don't know what is the duration of a syscall, for instance, when we start
825 * a session and some threads are waiting for a syscall to finish, say 'poll',
826 * in which case all we can do is to print "( ? ) for duration and for the
829 static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
831 double duration = (double)t / NSEC_PER_MSEC;
832 size_t printed = fprintf(fp, "(");
835 printed += fprintf(fp, " ? ");
836 else if (duration >= 1.0)
837 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
838 else if (duration >= 0.01)
839 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
841 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
842 return printed + fprintf(fp, "): ");
846 * filename.ptr: The filename char pointer that will be vfs_getname'd
847 * filename.entry_str_pos: Where to insert the string translated from
848 * filename.ptr by the vfs_getname tracepoint/kprobe.
850 struct thread_trace {
853 unsigned long nr_events;
854 unsigned long pfmaj, pfmin;
859 short int entry_str_pos;
861 unsigned int namelen;
869 struct intlist *syscall_stats;
872 static struct thread_trace *thread_trace__new(void)
874 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
877 ttrace->paths.max = -1;
879 ttrace->syscall_stats = intlist__new(NULL);
884 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
886 struct thread_trace *ttrace;
891 if (thread__priv(thread) == NULL)
892 thread__set_priv(thread, thread_trace__new());
894 if (thread__priv(thread) == NULL)
897 ttrace = thread__priv(thread);
902 color_fprintf(fp, PERF_COLOR_RED,
903 "WARNING: not enough memory, dropping samples!\n");
907 #define TRACE_PFMAJ (1 << 0)
908 #define TRACE_PFMIN (1 << 1)
910 static const size_t trace__entry_str_size = 2048;
912 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
914 struct thread_trace *ttrace = thread__priv(thread);
916 if (fd > ttrace->paths.max) {
917 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
922 if (ttrace->paths.max != -1) {
923 memset(npath + ttrace->paths.max + 1, 0,
924 (fd - ttrace->paths.max) * sizeof(char *));
926 memset(npath, 0, (fd + 1) * sizeof(char *));
929 ttrace->paths.table = npath;
930 ttrace->paths.max = fd;
933 ttrace->paths.table[fd] = strdup(pathname);
935 return ttrace->paths.table[fd] != NULL ? 0 : -1;
938 static int thread__read_fd_path(struct thread *thread, int fd)
940 char linkname[PATH_MAX], pathname[PATH_MAX];
944 if (thread->pid_ == thread->tid) {
945 scnprintf(linkname, sizeof(linkname),
946 "/proc/%d/fd/%d", thread->pid_, fd);
948 scnprintf(linkname, sizeof(linkname),
949 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
952 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
955 ret = readlink(linkname, pathname, sizeof(pathname));
957 if (ret < 0 || ret > st.st_size)
960 pathname[ret] = '\0';
961 return trace__set_fd_pathname(thread, fd, pathname);
964 static const char *thread__fd_path(struct thread *thread, int fd,
967 struct thread_trace *ttrace = thread__priv(thread);
975 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
978 ++trace->stats.proc_getname;
979 if (thread__read_fd_path(thread, fd))
983 return ttrace->paths.table[fd];
986 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
987 struct syscall_arg *arg)
990 size_t printed = scnprintf(bf, size, "%d", fd);
991 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
994 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
999 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1000 struct syscall_arg *arg)
1003 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1004 struct thread_trace *ttrace = thread__priv(arg->thread);
1006 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1007 zfree(&ttrace->paths.table[fd]);
1012 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1015 struct thread_trace *ttrace = thread__priv(thread);
1017 ttrace->filename.ptr = ptr;
1018 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1021 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1022 struct syscall_arg *arg)
1024 unsigned long ptr = arg->val;
1026 if (!arg->trace->vfs_getname)
1027 return scnprintf(bf, size, "%#x", ptr);
1029 thread__set_filename_pos(arg->thread, bf, ptr);
1033 static bool trace__filter_duration(struct trace *trace, double t)
1035 return t < (trace->duration_filter * NSEC_PER_MSEC);
1038 static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1040 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1042 return fprintf(fp, "%10.3f ", ts);
1046 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1047 * using ttrace->entry_time for a thread that receives a sys_exit without
1048 * first having received a sys_enter ("poll" issued before tracing session
1049 * starts, lost sys_enter exit due to ring buffer overflow).
1051 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1054 return __trace__fprintf_tstamp(trace, tstamp, fp);
1056 return fprintf(fp, " ? ");
1059 static bool done = false;
1060 static bool interrupted = false;
1062 static void sig_handler(int sig)
1065 interrupted = sig == SIGINT;
1068 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1069 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
1071 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1072 printed += fprintf_duration(duration, duration_calculated, fp);
1074 if (trace->multiple_threads) {
1075 if (trace->show_comm)
1076 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1077 printed += fprintf(fp, "%d ", thread->tid);
1083 static int trace__process_event(struct trace *trace, struct machine *machine,
1084 union perf_event *event, struct perf_sample *sample)
1088 switch (event->header.type) {
1089 case PERF_RECORD_LOST:
1090 color_fprintf(trace->output, PERF_COLOR_RED,
1091 "LOST %" PRIu64 " events!\n", event->lost.lost);
1092 ret = machine__process_lost_event(machine, event, sample);
1095 ret = machine__process_event(machine, event, sample);
1102 static int trace__tool_process(struct perf_tool *tool,
1103 union perf_event *event,
1104 struct perf_sample *sample,
1105 struct machine *machine)
1107 struct trace *trace = container_of(tool, struct trace, tool);
1108 return trace__process_event(trace, machine, event, sample);
1111 static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1113 struct machine *machine = vmachine;
1115 if (machine->kptr_restrict_warned)
1118 if (symbol_conf.kptr_restrict) {
1119 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1120 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1121 "Kernel samples will not be resolved.\n");
1122 machine->kptr_restrict_warned = true;
1126 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1129 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1131 int err = symbol__init(NULL);
1136 trace->host = machine__new_host();
1137 if (trace->host == NULL)
1140 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
1143 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1144 evlist->threads, trace__tool_process, false,
1145 trace->opts.proc_map_timeout);
1152 static int syscall__set_arg_fmts(struct syscall *sc)
1154 struct format_field *field;
1157 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1158 if (sc->arg_scnprintf == NULL)
1162 sc->arg_parm = sc->fmt->arg_parm;
1164 for (field = sc->args; field; field = field->next) {
1165 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1166 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1167 else if (strcmp(field->type, "const char *") == 0 &&
1168 (strcmp(field->name, "filename") == 0 ||
1169 strcmp(field->name, "path") == 0 ||
1170 strcmp(field->name, "pathname") == 0))
1171 sc->arg_scnprintf[idx] = SCA_FILENAME;
1172 else if (field->flags & FIELD_IS_POINTER)
1173 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1174 else if (strcmp(field->type, "pid_t") == 0)
1175 sc->arg_scnprintf[idx] = SCA_PID;
1176 else if (strcmp(field->type, "umode_t") == 0)
1177 sc->arg_scnprintf[idx] = SCA_MODE_T;
1178 else if ((strcmp(field->type, "int") == 0 ||
1179 strcmp(field->type, "unsigned int") == 0 ||
1180 strcmp(field->type, "long") == 0) &&
1181 (len = strlen(field->name)) >= 2 &&
1182 strcmp(field->name + len - 2, "fd") == 0) {
1184 * /sys/kernel/tracing/events/syscalls/sys_enter*
1185 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1190 sc->arg_scnprintf[idx] = SCA_FD;
1198 static int trace__read_syscall_info(struct trace *trace, int id)
1202 const char *name = syscalltbl__name(trace->sctbl, id);
1207 if (id > trace->syscalls.max) {
1208 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1210 if (nsyscalls == NULL)
1213 if (trace->syscalls.max != -1) {
1214 memset(nsyscalls + trace->syscalls.max + 1, 0,
1215 (id - trace->syscalls.max) * sizeof(*sc));
1217 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1220 trace->syscalls.table = nsyscalls;
1221 trace->syscalls.max = id;
1224 sc = trace->syscalls.table + id;
1227 sc->fmt = syscall_fmt__find(sc->name);
1229 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1230 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1232 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1233 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1234 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1237 if (IS_ERR(sc->tp_format))
1240 sc->args = sc->tp_format->format.fields;
1241 sc->nr_args = sc->tp_format->format.nr_fields;
1243 * We need to check and discard the first variable '__syscall_nr'
1244 * or 'nr' that mean the syscall number. It is needless here.
1245 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1247 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1248 sc->args = sc->args->next;
1252 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1254 return syscall__set_arg_fmts(sc);
1257 static int trace__validate_ev_qualifier(struct trace *trace)
1260 struct str_node *pos;
1262 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1263 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1264 sizeof(trace->ev_qualifier_ids.entries[0]));
1266 if (trace->ev_qualifier_ids.entries == NULL) {
1267 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1275 strlist__for_each_entry(pos, trace->ev_qualifier) {
1276 const char *sc = pos->s;
1277 int id = syscalltbl__id(trace->sctbl, sc);
1281 fputs("Error:\tInvalid syscall ", trace->output);
1284 fputs(", ", trace->output);
1287 fputs(sc, trace->output);
1290 trace->ev_qualifier_ids.entries[i++] = id;
1294 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1295 "\nHint:\tand: 'man syscalls'\n", trace->output);
1296 zfree(&trace->ev_qualifier_ids.entries);
1297 trace->ev_qualifier_ids.nr = 0;
1304 * args is to be interpreted as a series of longs but we need to handle
1305 * 8-byte unaligned accesses. args points to raw_data within the event
1306 * and raw_data is guaranteed to be 8-byte unaligned because it is
1307 * preceded by raw_size which is a u32. So we need to copy args to a temp
1308 * variable to read it. Most notably this avoids extended load instructions
1309 * on unaligned addresses
1312 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1313 unsigned char *args, struct trace *trace,
1314 struct thread *thread)
1320 if (sc->args != NULL) {
1321 struct format_field *field;
1323 struct syscall_arg arg = {
1330 for (field = sc->args; field;
1331 field = field->next, ++arg.idx, bit <<= 1) {
1335 /* special care for unaligned accesses */
1336 p = args + sizeof(unsigned long) * arg.idx;
1337 memcpy(&val, p, sizeof(val));
1340 * Suppress this argument if its value is zero and
1341 * and we don't have a string associated in an
1345 !(sc->arg_scnprintf &&
1346 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1347 sc->arg_parm[arg.idx]))
1350 printed += scnprintf(bf + printed, size - printed,
1351 "%s%s: ", printed ? ", " : "", field->name);
1352 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1355 arg.parm = sc->arg_parm[arg.idx];
1356 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1357 size - printed, &arg);
1359 printed += scnprintf(bf + printed, size - printed,
1363 } else if (IS_ERR(sc->tp_format)) {
1365 * If we managed to read the tracepoint /format file, then we
1366 * may end up not having any args, like with gettid(), so only
1367 * print the raw args when we didn't manage to read it.
1372 /* special care for unaligned accesses */
1373 p = args + sizeof(unsigned long) * i;
1374 memcpy(&val, p, sizeof(val));
1375 printed += scnprintf(bf + printed, size - printed,
1377 printed ? ", " : "", i, val);
1385 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1386 union perf_event *event,
1387 struct perf_sample *sample);
1389 static struct syscall *trace__syscall_info(struct trace *trace,
1390 struct perf_evsel *evsel, int id)
1396 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1397 * before that, leaving at a higher verbosity level till that is
1398 * explained. Reproduced with plain ftrace with:
1400 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1401 * grep "NR -1 " /t/trace_pipe
1403 * After generating some load on the machine.
1407 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1408 id, perf_evsel__name(evsel), ++n);
1413 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1414 trace__read_syscall_info(trace, id))
1417 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1420 return &trace->syscalls.table[id];
1424 fprintf(trace->output, "Problems reading syscall %d", id);
1425 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1426 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1427 fputs(" information\n", trace->output);
1432 static void thread__update_stats(struct thread_trace *ttrace,
1433 int id, struct perf_sample *sample)
1435 struct int_node *inode;
1436 struct stats *stats;
1439 inode = intlist__findnew(ttrace->syscall_stats, id);
1443 stats = inode->priv;
1444 if (stats == NULL) {
1445 stats = malloc(sizeof(struct stats));
1449 inode->priv = stats;
1452 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1453 duration = sample->time - ttrace->entry_time;
1455 update_stats(stats, duration);
1458 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1460 struct thread_trace *ttrace;
1464 if (trace->current == NULL)
1467 ttrace = thread__priv(trace->current);
1469 if (!ttrace->entry_pending)
1472 duration = sample->time - ttrace->entry_time;
1474 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
1475 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1476 ttrace->entry_pending = false;
1481 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1482 union perf_event *event __maybe_unused,
1483 struct perf_sample *sample)
1488 struct thread *thread;
1489 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1490 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1491 struct thread_trace *ttrace;
1496 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1497 ttrace = thread__trace(thread, trace->output);
1501 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1503 if (ttrace->entry_str == NULL) {
1504 ttrace->entry_str = malloc(trace__entry_str_size);
1505 if (!ttrace->entry_str)
1509 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1510 trace__printf_interrupted_entry(trace, sample);
1512 ttrace->entry_time = sample->time;
1513 msg = ttrace->entry_str;
1514 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1516 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1517 args, trace, thread);
1520 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1521 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
1522 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
1525 ttrace->entry_pending = true;
1526 /* See trace__vfs_getname & trace__sys_exit */
1527 ttrace->filename.pending_open = false;
1530 if (trace->current != thread) {
1531 thread__put(trace->current);
1532 trace->current = thread__get(thread);
1536 thread__put(thread);
1540 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1541 struct perf_sample *sample,
1542 struct callchain_cursor *cursor)
1544 struct addr_location al;
1546 if (machine__resolve(trace->host, &al, sample) < 0 ||
1547 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1553 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1555 /* TODO: user-configurable print_opts */
1556 const unsigned int print_opts = EVSEL__PRINT_SYM |
1558 EVSEL__PRINT_UNKNOWN_AS_ADDR;
1560 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1563 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1564 union perf_event *event __maybe_unused,
1565 struct perf_sample *sample)
1569 bool duration_calculated = false;
1570 struct thread *thread;
1571 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1572 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1573 struct thread_trace *ttrace;
1578 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1579 ttrace = thread__trace(thread, trace->output);
1584 thread__update_stats(ttrace, id, sample);
1586 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1588 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1589 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1590 ttrace->filename.pending_open = false;
1591 ++trace->stats.vfs_getname;
1594 if (ttrace->entry_time) {
1595 duration = sample->time - ttrace->entry_time;
1596 if (trace__filter_duration(trace, duration))
1598 duration_calculated = true;
1599 } else if (trace->duration_filter)
1602 if (sample->callchain) {
1603 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1604 if (callchain_ret == 0) {
1605 if (callchain_cursor.nr < trace->min_stack)
1611 if (trace->summary_only)
1614 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
1616 if (ttrace->entry_pending) {
1617 fprintf(trace->output, "%-70s", ttrace->entry_str);
1619 fprintf(trace->output, " ... [");
1620 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1621 fprintf(trace->output, "]: %s()", sc->name);
1624 if (sc->fmt == NULL) {
1626 fprintf(trace->output, ") = %ld", ret);
1627 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1628 char bf[STRERR_BUFSIZE];
1629 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
1630 *e = audit_errno_to_name(-ret);
1632 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1633 } else if (ret == 0 && sc->fmt->timeout)
1634 fprintf(trace->output, ") = 0 Timeout");
1635 else if (sc->fmt->hexret)
1636 fprintf(trace->output, ") = %#lx", ret);
1637 else if (sc->fmt->errpid) {
1638 struct thread *child = machine__find_thread(trace->host, ret, ret);
1640 if (child != NULL) {
1641 fprintf(trace->output, ") = %ld", ret);
1642 if (child->comm_set)
1643 fprintf(trace->output, " (%s)", thread__comm_str(child));
1649 fputc('\n', trace->output);
1651 if (callchain_ret > 0)
1652 trace__fprintf_callchain(trace, sample);
1653 else if (callchain_ret < 0)
1654 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1656 ttrace->entry_pending = false;
1659 thread__put(thread);
1663 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1664 union perf_event *event __maybe_unused,
1665 struct perf_sample *sample)
1667 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1668 struct thread_trace *ttrace;
1669 size_t filename_len, entry_str_len, to_move;
1670 ssize_t remaining_space;
1672 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
1677 ttrace = thread__priv(thread);
1681 filename_len = strlen(filename);
1682 if (filename_len == 0)
1685 if (ttrace->filename.namelen < filename_len) {
1686 char *f = realloc(ttrace->filename.name, filename_len + 1);
1691 ttrace->filename.namelen = filename_len;
1692 ttrace->filename.name = f;
1695 strcpy(ttrace->filename.name, filename);
1696 ttrace->filename.pending_open = true;
1698 if (!ttrace->filename.ptr)
1701 entry_str_len = strlen(ttrace->entry_str);
1702 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1703 if (remaining_space <= 0)
1706 if (filename_len > (size_t)remaining_space) {
1707 filename += filename_len - remaining_space;
1708 filename_len = remaining_space;
1711 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1712 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1713 memmove(pos + filename_len, pos, to_move);
1714 memcpy(pos, filename, filename_len);
1716 ttrace->filename.ptr = 0;
1717 ttrace->filename.entry_str_pos = 0;
1719 thread__put(thread);
1724 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1725 union perf_event *event __maybe_unused,
1726 struct perf_sample *sample)
1728 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1729 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1730 struct thread *thread = machine__findnew_thread(trace->host,
1733 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1738 ttrace->runtime_ms += runtime_ms;
1739 trace->runtime_ms += runtime_ms;
1741 thread__put(thread);
1745 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1747 perf_evsel__strval(evsel, sample, "comm"),
1748 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1750 perf_evsel__intval(evsel, sample, "vruntime"));
1754 static void bpf_output__printer(enum binary_printer_ops op,
1755 unsigned int val, void *extra)
1757 FILE *output = extra;
1758 unsigned char ch = (unsigned char)val;
1761 case BINARY_PRINT_CHAR_DATA:
1762 fprintf(output, "%c", isprint(ch) ? ch : '.');
1764 case BINARY_PRINT_DATA_BEGIN:
1765 case BINARY_PRINT_LINE_BEGIN:
1766 case BINARY_PRINT_ADDR:
1767 case BINARY_PRINT_NUM_DATA:
1768 case BINARY_PRINT_NUM_PAD:
1769 case BINARY_PRINT_SEP:
1770 case BINARY_PRINT_CHAR_PAD:
1771 case BINARY_PRINT_LINE_END:
1772 case BINARY_PRINT_DATA_END:
1778 static void bpf_output__fprintf(struct trace *trace,
1779 struct perf_sample *sample)
1781 print_binary(sample->raw_data, sample->raw_size, 8,
1782 bpf_output__printer, trace->output);
1785 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1786 union perf_event *event __maybe_unused,
1787 struct perf_sample *sample)
1789 int callchain_ret = 0;
1791 if (sample->callchain) {
1792 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1793 if (callchain_ret == 0) {
1794 if (callchain_cursor.nr < trace->min_stack)
1800 trace__printf_interrupted_entry(trace, sample);
1801 trace__fprintf_tstamp(trace, sample->time, trace->output);
1803 if (trace->trace_syscalls)
1804 fprintf(trace->output, "( ): ");
1806 fprintf(trace->output, "%s:", evsel->name);
1808 if (perf_evsel__is_bpf_output(evsel)) {
1809 bpf_output__fprintf(trace, sample);
1810 } else if (evsel->tp_format) {
1811 event_format__fprintf(evsel->tp_format, sample->cpu,
1812 sample->raw_data, sample->raw_size,
1816 fprintf(trace->output, ")\n");
1818 if (callchain_ret > 0)
1819 trace__fprintf_callchain(trace, sample);
1820 else if (callchain_ret < 0)
1821 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1826 static void print_location(FILE *f, struct perf_sample *sample,
1827 struct addr_location *al,
1828 bool print_dso, bool print_sym)
1831 if ((verbose > 0 || print_dso) && al->map)
1832 fprintf(f, "%s@", al->map->dso->long_name);
1834 if ((verbose > 0 || print_sym) && al->sym)
1835 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1836 al->addr - al->sym->start);
1838 fprintf(f, "0x%" PRIx64, al->addr);
1840 fprintf(f, "0x%" PRIx64, sample->addr);
1843 static int trace__pgfault(struct trace *trace,
1844 struct perf_evsel *evsel,
1845 union perf_event *event __maybe_unused,
1846 struct perf_sample *sample)
1848 struct thread *thread;
1849 struct addr_location al;
1850 char map_type = 'd';
1851 struct thread_trace *ttrace;
1853 int callchain_ret = 0;
1855 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1857 if (sample->callchain) {
1858 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1859 if (callchain_ret == 0) {
1860 if (callchain_cursor.nr < trace->min_stack)
1866 ttrace = thread__trace(thread, trace->output);
1870 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1875 if (trace->summary_only)
1878 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
1881 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
1883 fprintf(trace->output, "%sfault [",
1884 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1887 print_location(trace->output, sample, &al, false, true);
1889 fprintf(trace->output, "] => ");
1891 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
1895 thread__find_addr_location(thread, sample->cpumode,
1896 MAP__FUNCTION, sample->addr, &al);
1904 print_location(trace->output, sample, &al, true, false);
1906 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1908 if (callchain_ret > 0)
1909 trace__fprintf_callchain(trace, sample);
1910 else if (callchain_ret < 0)
1911 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1915 thread__put(thread);
1919 static void trace__set_base_time(struct trace *trace,
1920 struct perf_evsel *evsel,
1921 struct perf_sample *sample)
1924 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1925 * and don't use sample->time unconditionally, we may end up having
1926 * some other event in the future without PERF_SAMPLE_TIME for good
1927 * reason, i.e. we may not be interested in its timestamps, just in
1928 * it taking place, picking some piece of information when it
1929 * appears in our event stream (vfs_getname comes to mind).
1931 if (trace->base_time == 0 && !trace->full_time &&
1932 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
1933 trace->base_time = sample->time;
1936 static int trace__process_sample(struct perf_tool *tool,
1937 union perf_event *event,
1938 struct perf_sample *sample,
1939 struct perf_evsel *evsel,
1940 struct machine *machine __maybe_unused)
1942 struct trace *trace = container_of(tool, struct trace, tool);
1943 struct thread *thread;
1946 tracepoint_handler handler = evsel->handler;
1948 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1949 if (thread && thread__is_filtered(thread))
1952 trace__set_base_time(trace, evsel, sample);
1956 handler(trace, evsel, event, sample);
1959 thread__put(thread);
1963 static int trace__record(struct trace *trace, int argc, const char **argv)
1965 unsigned int rec_argc, i, j;
1966 const char **rec_argv;
1967 const char * const record_args[] = {
1974 const char * const sc_args[] = { "-e", };
1975 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1976 const char * const majpf_args[] = { "-e", "major-faults" };
1977 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1978 const char * const minpf_args[] = { "-e", "minor-faults" };
1979 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1981 /* +1 is for the event string below */
1982 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1983 majpf_args_nr + minpf_args_nr + argc;
1984 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1986 if (rec_argv == NULL)
1990 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1991 rec_argv[j++] = record_args[i];
1993 if (trace->trace_syscalls) {
1994 for (i = 0; i < sc_args_nr; i++)
1995 rec_argv[j++] = sc_args[i];
1997 /* event string may be different for older kernels - e.g., RHEL6 */
1998 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1999 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2000 else if (is_valid_tracepoint("syscalls:sys_enter"))
2001 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2003 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2008 if (trace->trace_pgfaults & TRACE_PFMAJ)
2009 for (i = 0; i < majpf_args_nr; i++)
2010 rec_argv[j++] = majpf_args[i];
2012 if (trace->trace_pgfaults & TRACE_PFMIN)
2013 for (i = 0; i < minpf_args_nr; i++)
2014 rec_argv[j++] = minpf_args[i];
2016 for (i = 0; i < (unsigned int)argc; i++)
2017 rec_argv[j++] = argv[i];
2019 return cmd_record(j, rec_argv);
2022 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2024 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2026 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2031 if (perf_evsel__field(evsel, "pathname") == NULL) {
2032 perf_evsel__delete(evsel);
2036 evsel->handler = trace__vfs_getname;
2037 perf_evlist__add(evlist, evsel);
2041 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
2043 struct perf_evsel *evsel;
2044 struct perf_event_attr attr = {
2045 .type = PERF_TYPE_SOFTWARE,
2049 attr.config = config;
2050 attr.sample_period = 1;
2052 event_attr_init(&attr);
2054 evsel = perf_evsel__new(&attr);
2056 evsel->handler = trace__pgfault;
2061 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2063 const u32 type = event->header.type;
2064 struct perf_evsel *evsel;
2066 if (type != PERF_RECORD_SAMPLE) {
2067 trace__process_event(trace, trace->host, event, sample);
2071 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2072 if (evsel == NULL) {
2073 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2077 trace__set_base_time(trace, evsel, sample);
2079 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2080 sample->raw_data == NULL) {
2081 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2082 perf_evsel__name(evsel), sample->tid,
2083 sample->cpu, sample->raw_size);
2085 tracepoint_handler handler = evsel->handler;
2086 handler(trace, evsel, event, sample);
2090 static int trace__add_syscall_newtp(struct trace *trace)
2093 struct perf_evlist *evlist = trace->evlist;
2094 struct perf_evsel *sys_enter, *sys_exit;
2096 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2097 if (sys_enter == NULL)
2100 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2101 goto out_delete_sys_enter;
2103 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2104 if (sys_exit == NULL)
2105 goto out_delete_sys_enter;
2107 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2108 goto out_delete_sys_exit;
2110 perf_evlist__add(evlist, sys_enter);
2111 perf_evlist__add(evlist, sys_exit);
2113 if (callchain_param.enabled && !trace->kernel_syscallchains) {
2115 * We're interested only in the user space callchain
2116 * leading to the syscall, allow overriding that for
2117 * debugging reasons using --kernel_syscall_callchains
2119 sys_exit->attr.exclude_callchain_kernel = 1;
2122 trace->syscalls.events.sys_enter = sys_enter;
2123 trace->syscalls.events.sys_exit = sys_exit;
2129 out_delete_sys_exit:
2130 perf_evsel__delete_priv(sys_exit);
2131 out_delete_sys_enter:
2132 perf_evsel__delete_priv(sys_enter);
2136 static int trace__set_ev_qualifier_filter(struct trace *trace)
2139 struct perf_evsel *sys_exit;
2140 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2141 trace->ev_qualifier_ids.nr,
2142 trace->ev_qualifier_ids.entries);
2147 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2149 sys_exit = trace->syscalls.events.sys_exit;
2150 err = perf_evsel__append_tp_filter(sys_exit, filter);
2161 static int trace__run(struct trace *trace, int argc, const char **argv)
2163 struct perf_evlist *evlist = trace->evlist;
2164 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
2166 unsigned long before;
2167 const bool forks = argc > 0;
2168 bool draining = false;
2172 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2173 goto out_error_raw_syscalls;
2175 if (trace->trace_syscalls)
2176 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2178 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2179 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2180 if (pgfault_maj == NULL)
2182 perf_evlist__add(evlist, pgfault_maj);
2185 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2186 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2187 if (pgfault_min == NULL)
2189 perf_evlist__add(evlist, pgfault_min);
2193 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2194 trace__sched_stat_runtime))
2195 goto out_error_sched_stat_runtime;
2197 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2199 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2200 goto out_delete_evlist;
2203 err = trace__symbols_init(trace, evlist);
2205 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2206 goto out_delete_evlist;
2209 perf_evlist__config(evlist, &trace->opts, NULL);
2211 if (callchain_param.enabled) {
2212 bool use_identifier = false;
2214 if (trace->syscalls.events.sys_exit) {
2215 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2216 &trace->opts, &callchain_param);
2217 use_identifier = true;
2221 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2222 use_identifier = true;
2226 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2227 use_identifier = true;
2230 if (use_identifier) {
2232 * Now we have evsels with different sample_ids, use
2233 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2234 * from a fixed position in each ring buffer record.
2236 * As of this the changeset introducing this comment, this
2237 * isn't strictly needed, as the fields that can come before
2238 * PERF_SAMPLE_ID are all used, but we'll probably disable
2239 * some of those for things like copying the payload of
2240 * pointer syscall arguments, and for vfs_getname we don't
2241 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2242 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2244 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2245 perf_evlist__reset_sample_bit(evlist, ID);
2249 signal(SIGCHLD, sig_handler);
2250 signal(SIGINT, sig_handler);
2253 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2256 fprintf(trace->output, "Couldn't run the workload!\n");
2257 goto out_delete_evlist;
2261 err = perf_evlist__open(evlist);
2263 goto out_error_open;
2265 err = bpf__apply_obj_config();
2267 char errbuf[BUFSIZ];
2269 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2270 pr_err("ERROR: Apply config to BPF failed: %s\n",
2272 goto out_error_open;
2276 * Better not use !target__has_task() here because we need to cover the
2277 * case where no threads were specified in the command line, but a
2278 * workload was, and in that case we will fill in the thread_map when
2279 * we fork the workload in perf_evlist__prepare_workload.
2281 if (trace->filter_pids.nr > 0)
2282 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2283 else if (thread_map__pid(evlist->threads, 0) == -1)
2284 err = perf_evlist__set_filter_pid(evlist, getpid());
2289 if (trace->ev_qualifier_ids.nr > 0) {
2290 err = trace__set_ev_qualifier_filter(trace);
2294 pr_debug("event qualifier tracepoint filter: %s\n",
2295 trace->syscalls.events.sys_exit->filter);
2298 err = perf_evlist__apply_filters(evlist, &evsel);
2300 goto out_error_apply_filters;
2302 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2304 goto out_error_mmap;
2306 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
2307 perf_evlist__enable(evlist);
2310 perf_evlist__start_workload(evlist);
2312 if (trace->opts.initial_delay) {
2313 usleep(trace->opts.initial_delay * 1000);
2314 perf_evlist__enable(evlist);
2317 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2318 evlist->threads->nr > 1 ||
2319 perf_evlist__first(evlist)->attr.inherit;
2321 before = trace->nr_events;
2323 for (i = 0; i < evlist->nr_mmaps; i++) {
2324 union perf_event *event;
2326 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2327 struct perf_sample sample;
2331 err = perf_evlist__parse_sample(evlist, event, &sample);
2333 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2337 trace__handle_event(trace, event, &sample);
2339 perf_evlist__mmap_consume(evlist, i);
2344 if (done && !draining) {
2345 perf_evlist__disable(evlist);
2351 if (trace->nr_events == before) {
2352 int timeout = done ? 100 : -1;
2354 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2355 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2365 thread__zput(trace->current);
2367 perf_evlist__disable(evlist);
2371 trace__fprintf_thread_summary(trace, trace->output);
2373 if (trace->show_tool_stats) {
2374 fprintf(trace->output, "Stats:\n "
2375 " vfs_getname : %" PRIu64 "\n"
2376 " proc_getname: %" PRIu64 "\n",
2377 trace->stats.vfs_getname,
2378 trace->stats.proc_getname);
2383 perf_evlist__delete(evlist);
2384 trace->evlist = NULL;
2385 trace->live = false;
2388 char errbuf[BUFSIZ];
2390 out_error_sched_stat_runtime:
2391 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2394 out_error_raw_syscalls:
2395 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2399 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2403 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2406 fprintf(trace->output, "%s\n", errbuf);
2407 goto out_delete_evlist;
2409 out_error_apply_filters:
2410 fprintf(trace->output,
2411 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2412 evsel->filter, perf_evsel__name(evsel), errno,
2413 str_error_r(errno, errbuf, sizeof(errbuf)));
2414 goto out_delete_evlist;
2417 fprintf(trace->output, "Not enough memory to run!\n");
2418 goto out_delete_evlist;
2421 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2422 goto out_delete_evlist;
2425 static int trace__replay(struct trace *trace)
2427 const struct perf_evsel_str_handler handlers[] = {
2428 { "probe:vfs_getname", trace__vfs_getname, },
2430 struct perf_data_file file = {
2432 .mode = PERF_DATA_MODE_READ,
2433 .force = trace->force,
2435 struct perf_session *session;
2436 struct perf_evsel *evsel;
2439 trace->tool.sample = trace__process_sample;
2440 trace->tool.mmap = perf_event__process_mmap;
2441 trace->tool.mmap2 = perf_event__process_mmap2;
2442 trace->tool.comm = perf_event__process_comm;
2443 trace->tool.exit = perf_event__process_exit;
2444 trace->tool.fork = perf_event__process_fork;
2445 trace->tool.attr = perf_event__process_attr;
2446 trace->tool.tracing_data = perf_event__process_tracing_data;
2447 trace->tool.build_id = perf_event__process_build_id;
2448 trace->tool.namespaces = perf_event__process_namespaces;
2450 trace->tool.ordered_events = true;
2451 trace->tool.ordering_requires_timestamps = true;
2453 /* add tid to output */
2454 trace->multiple_threads = true;
2456 session = perf_session__new(&file, false, &trace->tool);
2457 if (session == NULL)
2460 if (trace->opts.target.pid)
2461 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2463 if (trace->opts.target.tid)
2464 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2466 if (symbol__init(&session->header.env) < 0)
2469 trace->host = &session->machines.host;
2471 err = perf_session__set_tracepoints_handlers(session, handlers);
2475 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2476 "raw_syscalls:sys_enter");
2477 /* older kernels have syscalls tp versus raw_syscalls */
2479 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2480 "syscalls:sys_enter");
2483 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2484 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2485 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2489 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2490 "raw_syscalls:sys_exit");
2492 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2493 "syscalls:sys_exit");
2495 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2496 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2497 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2501 evlist__for_each_entry(session->evlist, evsel) {
2502 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2503 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2504 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2505 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2506 evsel->handler = trace__pgfault;
2511 err = perf_session__process_events(session);
2513 pr_err("Failed to process events, error %d", err);
2515 else if (trace->summary)
2516 trace__fprintf_thread_summary(trace, trace->output);
2519 perf_session__delete(session);
2524 static size_t trace__fprintf_threads_header(FILE *fp)
2528 printed = fprintf(fp, "\n Summary of events:\n\n");
2533 DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2534 struct stats *stats;
2539 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2540 struct stats *stats = source->priv;
2542 entry->syscall = source->i;
2543 entry->stats = stats;
2544 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2547 static size_t thread__dump_stats(struct thread_trace *ttrace,
2548 struct trace *trace, FILE *fp)
2553 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
2555 if (syscall_stats == NULL)
2558 printed += fprintf(fp, "\n");
2560 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2561 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2562 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2564 resort_rb__for_each_entry(nd, syscall_stats) {
2565 struct stats *stats = syscall_stats_entry->stats;
2567 double min = (double)(stats->min) / NSEC_PER_MSEC;
2568 double max = (double)(stats->max) / NSEC_PER_MSEC;
2569 double avg = avg_stats(stats);
2571 u64 n = (u64) stats->n;
2573 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2574 avg /= NSEC_PER_MSEC;
2576 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
2577 printed += fprintf(fp, " %-15s", sc->name);
2578 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2579 n, syscall_stats_entry->msecs, min, avg);
2580 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2584 resort_rb__delete(syscall_stats);
2585 printed += fprintf(fp, "\n\n");
2590 static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
2593 struct thread_trace *ttrace = thread__priv(thread);
2599 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2601 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2602 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2603 printed += fprintf(fp, "%.1f%%", ratio);
2605 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2607 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2609 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2610 else if (fputc('\n', fp) != EOF)
2613 printed += thread__dump_stats(ttrace, trace, fp);
2618 static unsigned long thread__nr_events(struct thread_trace *ttrace)
2620 return ttrace ? ttrace->nr_events : 0;
2623 DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2624 struct thread *thread;
2627 entry->thread = rb_entry(nd, struct thread, rb_node);
2630 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2632 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2633 size_t printed = trace__fprintf_threads_header(fp);
2636 if (threads == NULL) {
2637 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2641 resort_rb__for_each_entry(nd, threads)
2642 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
2644 resort_rb__delete(threads);
2649 static int trace__set_duration(const struct option *opt, const char *str,
2650 int unset __maybe_unused)
2652 struct trace *trace = opt->value;
2654 trace->duration_filter = atof(str);
2658 static int trace__set_filter_pids(const struct option *opt, const char *str,
2659 int unset __maybe_unused)
2663 struct trace *trace = opt->value;
2665 * FIXME: introduce a intarray class, plain parse csv and create a
2666 * { int nr, int entries[] } struct...
2668 struct intlist *list = intlist__new(str);
2673 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2674 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2676 if (trace->filter_pids.entries == NULL)
2679 trace->filter_pids.entries[0] = getpid();
2681 for (i = 1; i < trace->filter_pids.nr; ++i)
2682 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2684 intlist__delete(list);
2690 static int trace__open_output(struct trace *trace, const char *filename)
2694 if (!stat(filename, &st) && st.st_size) {
2695 char oldname[PATH_MAX];
2697 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2699 rename(filename, oldname);
2702 trace->output = fopen(filename, "w");
2704 return trace->output == NULL ? -errno : 0;
2707 static int parse_pagefaults(const struct option *opt, const char *str,
2708 int unset __maybe_unused)
2710 int *trace_pgfaults = opt->value;
2712 if (strcmp(str, "all") == 0)
2713 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2714 else if (strcmp(str, "maj") == 0)
2715 *trace_pgfaults |= TRACE_PFMAJ;
2716 else if (strcmp(str, "min") == 0)
2717 *trace_pgfaults |= TRACE_PFMIN;
2724 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2726 struct perf_evsel *evsel;
2728 evlist__for_each_entry(evlist, evsel)
2729 evsel->handler = handler;
2733 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2734 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2735 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2737 * It'd be better to introduce a parse_options() variant that would return a
2738 * list with the terms it didn't match to an event...
2740 static int trace__parse_events_option(const struct option *opt, const char *str,
2741 int unset __maybe_unused)
2743 struct trace *trace = (struct trace *)opt->value;
2744 const char *s = str;
2745 char *sep = NULL, *lists[2] = { NULL, NULL, };
2746 int len = strlen(str), err = -1, list;
2747 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2748 char group_name[PATH_MAX];
2750 if (strace_groups_dir == NULL)
2755 trace->not_ev_qualifier = true;
2759 if ((sep = strchr(s, ',')) != NULL)
2763 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2766 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2767 if (access(group_name, R_OK) == 0)
2772 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2774 lists[list] = malloc(len);
2775 if (lists[list] == NULL)
2777 strcpy(lists[list], s);
2787 if (lists[1] != NULL) {
2788 struct strlist_config slist_config = {
2789 .dirname = strace_groups_dir,
2792 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2793 if (trace->ev_qualifier == NULL) {
2794 fputs("Not enough memory to parse event qualifier", trace->output);
2798 if (trace__validate_ev_qualifier(trace))
2805 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2806 "event selector. use 'perf list' to list available events",
2807 parse_events_option);
2808 err = parse_events_option(&o, lists[0], 0);
2817 int cmd_trace(int argc, const char **argv)
2819 const char *trace_usage[] = {
2820 "perf trace [<options>] [<command>]",
2821 "perf trace [<options>] -- <command> [<options>]",
2822 "perf trace record [<options>] [<command>]",
2823 "perf trace record [<options>] -- <command> [<options>]",
2826 struct trace trace = {
2835 .user_freq = UINT_MAX,
2836 .user_interval = ULLONG_MAX,
2837 .no_buffering = true,
2838 .mmap_pages = UINT_MAX,
2839 .proc_map_timeout = 500,
2843 .trace_syscalls = true,
2844 .kernel_syscallchains = false,
2845 .max_stack = UINT_MAX,
2847 const char *output_name = NULL;
2848 const struct option trace_options[] = {
2849 OPT_CALLBACK('e', "event", &trace, "event",
2850 "event/syscall selector. use 'perf list' to list available events",
2851 trace__parse_events_option),
2852 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2853 "show the thread COMM next to its id"),
2854 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2855 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2856 trace__parse_events_option),
2857 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2858 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2859 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2860 "trace events on existing process id"),
2861 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2862 "trace events on existing thread id"),
2863 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2864 "pids to filter (by the kernel)", trace__set_filter_pids),
2865 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2866 "system-wide collection from all CPUs"),
2867 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2868 "list of cpus to monitor"),
2869 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2870 "child tasks do not inherit counters"),
2871 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2872 "number of mmap data pages",
2873 perf_evlist__parse_mmap_pages),
2874 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2876 OPT_CALLBACK(0, "duration", &trace, "float",
2877 "show only events with duration > N.M ms",
2878 trace__set_duration),
2879 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2880 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2881 OPT_BOOLEAN('T', "time", &trace.full_time,
2882 "Show full timestamp, not time relative to first start"),
2883 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2884 "Show only syscall summary with statistics"),
2885 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2886 "Show all syscalls and summary with statistics"),
2887 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2888 "Trace pagefaults", parse_pagefaults, "maj"),
2889 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2890 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2891 OPT_CALLBACK(0, "call-graph", &trace.opts,
2892 "record_mode[,record_size]", record_callchain_help,
2893 &record_parse_callchain_opt),
2894 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2895 "Show the kernel callchains on the syscall exit path"),
2896 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2897 "Set the minimum stack depth when parsing the callchain, "
2898 "anything below the specified depth will be ignored."),
2899 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2900 "Set the maximum stack depth when parsing the callchain, "
2901 "anything beyond the specified depth will be ignored. "
2902 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
2903 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2904 "per thread proc mmap processing timeout in ms"),
2905 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2906 "ms to wait before starting measurement after program "
2910 bool __maybe_unused max_stack_user_set = true;
2911 bool mmap_pages_user_set = true;
2912 const char * const trace_subcommands[] = { "record", NULL };
2916 signal(SIGSEGV, sighandler_dump_stack);
2917 signal(SIGFPE, sighandler_dump_stack);
2919 trace.evlist = perf_evlist__new();
2920 trace.sctbl = syscalltbl__new();
2922 if (trace.evlist == NULL || trace.sctbl == NULL) {
2923 pr_err("Not enough memory to run!\n");
2928 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2929 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2931 err = bpf__setup_stdout(trace.evlist);
2933 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2934 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2940 if (trace.trace_pgfaults) {
2941 trace.opts.sample_address = true;
2942 trace.opts.sample_time = true;
2945 if (trace.opts.mmap_pages == UINT_MAX)
2946 mmap_pages_user_set = false;
2948 if (trace.max_stack == UINT_MAX) {
2949 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
2950 max_stack_user_set = false;
2953 #ifdef HAVE_DWARF_UNWIND_SUPPORT
2954 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
2955 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2958 if (callchain_param.enabled) {
2959 if (!mmap_pages_user_set && geteuid() == 0)
2960 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2962 symbol_conf.use_callchain = true;
2965 if (trace.evlist->nr_entries > 0)
2966 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2968 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2969 return trace__record(&trace, argc-1, &argv[1]);
2971 /* summary_only implies summary option, but don't overwrite summary if set */
2972 if (trace.summary_only)
2973 trace.summary = trace.summary_only;
2975 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2976 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2977 pr_err("Please specify something to trace.\n");
2981 if (!trace.trace_syscalls && trace.ev_qualifier) {
2982 pr_err("The -e option can't be used with --no-syscalls.\n");
2986 if (output_name != NULL) {
2987 err = trace__open_output(&trace, output_name);
2989 perror("failed to create output file");
2994 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2996 err = target__validate(&trace.opts.target);
2998 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2999 fprintf(trace.output, "%s", bf);
3003 err = target__parse_uid(&trace.opts.target);
3005 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3006 fprintf(trace.output, "%s", bf);
3010 if (!argc && target__none(&trace.opts.target))
3011 trace.opts.target.system_wide = true;
3014 err = trace__replay(&trace);
3016 err = trace__run(&trace, argc, argv);
3019 if (output_name != NULL)
3020 fclose(trace.output);