2 * ring buffer based function tracer
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
47 #include "trace_output.h"
50 * On boot up, the ring buffer is set to the minimum size, so that
51 * we do not waste memory on systems that are not using tracing.
53 bool ring_buffer_expanded;
56 * We need to change this state when a selftest is running.
57 * A selftest will lurk into the ring-buffer to count the
58 * entries inserted during the selftest although some concurrent
59 * insertions into the ring-buffer such as trace_printk could occurred
60 * at the same time, giving false positive or negative results.
62 static bool __read_mostly tracing_selftest_running;
65 * If a tracer is running, we do not want to run SELFTEST.
67 bool __read_mostly tracing_selftest_disabled;
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
86 * To prevent the comm cache from being overwritten when no
87 * tracing is active, only save the comm when a trace event
90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
93 * Kill all tracing for good (never come back).
94 * It is initialized to 1 but will turn to zero if the initialization
95 * of the tracer is successful. But that is the only place that sets
98 static int tracing_disabled = 1;
100 cpumask_var_t __read_mostly tracing_buffer_mask;
103 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106 * is set, then ftrace_dump is called. This will output the contents
107 * of the ftrace buffers to the console. This is very useful for
108 * capturing traces that lead to crashes and outputing it to a
111 * It is default off, but you can enable it with either specifying
112 * "ftrace_dump_on_oops" in the kernel command line, or setting
113 * /proc/sys/kernel/ftrace_dump_on_oops
114 * Set 1 if you want to dump buffers of all CPUs
115 * Set 2 if you want to dump the buffer of the CPU that triggered oops
118 enum ftrace_dump_mode ftrace_dump_on_oops;
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
123 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
124 /* Map of enums to their values, for "enum_map" file */
125 struct trace_enum_map_head {
127 unsigned long length;
130 union trace_enum_map_item;
132 struct trace_enum_map_tail {
134 * "end" is first and points to NULL as it must be different
135 * than "mod" or "enum_string"
137 union trace_enum_map_item *next;
138 const char *end; /* points to NULL */
141 static DEFINE_MUTEX(trace_enum_mutex);
144 * The trace_enum_maps are saved in an array with two extra elements,
145 * one at the beginning, and one at the end. The beginning item contains
146 * the count of the saved maps (head.length), and the module they
147 * belong to if not built in (head.mod). The ending item contains a
148 * pointer to the next array of saved enum_map items.
150 union trace_enum_map_item {
151 struct trace_enum_map map;
152 struct trace_enum_map_head head;
153 struct trace_enum_map_tail tail;
156 static union trace_enum_map_item *trace_enum_maps;
157 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161 #define MAX_TRACER_SIZE 100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
165 static bool allocate_snapshot;
167 static int __init set_cmdline_ftrace(char *str)
169 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170 default_bootup_tracer = bootup_tracer_buf;
171 /* We are using ftrace early, expand it */
172 ring_buffer_expanded = true;
175 __setup("ftrace=", set_cmdline_ftrace);
177 static int __init set_ftrace_dump_on_oops(char *str)
179 if (*str++ != '=' || !*str) {
180 ftrace_dump_on_oops = DUMP_ALL;
184 if (!strcmp("orig_cpu", str)) {
185 ftrace_dump_on_oops = DUMP_ORIG;
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193 static int __init stop_trace_on_warning(char *str)
195 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196 __disable_trace_on_warning = 1;
199 __setup("traceoff_on_warning", stop_trace_on_warning);
201 static int __init boot_alloc_snapshot(char *str)
203 allocate_snapshot = true;
204 /* We also need the main ring buffer expanded */
205 ring_buffer_expanded = true;
208 __setup("alloc_snapshot", boot_alloc_snapshot);
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213 static int __init set_trace_boot_options(char *str)
215 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218 __setup("trace_options=", set_trace_boot_options);
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
223 static int __init set_trace_boot_clock(char *str)
225 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226 trace_boot_clock = trace_boot_clock_buf;
229 __setup("trace_clock=", set_trace_boot_clock);
231 static int __init set_tracepoint_printk(char *str)
233 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234 tracepoint_printk = 1;
237 __setup("tp_printk", set_tracepoint_printk);
239 unsigned long long ns2usecs(u64 nsec)
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS \
248 (FUNCTION_DEFAULT_FLAGS | \
249 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
250 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
251 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
252 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
256 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260 TRACE_ITER_EVENT_FORK
263 * The global_trace is the descriptor that holds the top-level tracing
264 * buffers for the live tracing.
266 static struct trace_array global_trace = {
267 .trace_flags = TRACE_DEFAULT_FLAGS,
270 LIST_HEAD(ftrace_trace_arrays);
272 int trace_array_get(struct trace_array *this_tr)
274 struct trace_array *tr;
277 mutex_lock(&trace_types_lock);
278 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285 mutex_unlock(&trace_types_lock);
290 static void __trace_array_put(struct trace_array *this_tr)
292 WARN_ON(!this_tr->ref);
296 void trace_array_put(struct trace_array *this_tr)
298 mutex_lock(&trace_types_lock);
299 __trace_array_put(this_tr);
300 mutex_unlock(&trace_types_lock);
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304 struct ring_buffer *buffer,
305 struct ring_buffer_event *event)
307 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308 !filter_match_preds(call->filter, rec)) {
309 __trace_event_discard_commit(buffer, event);
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 vfree(pid_list->pids);
323 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324 * @filtered_pids: The list of pids to check
325 * @search_pid: The PID to find in @filtered_pids
327 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 * If pid_max changed after filtered_pids was created, we
334 * by default ignore all pids greater than the previous pid_max.
336 if (search_pid >= filtered_pids->pid_max)
339 return test_bit(search_pid, filtered_pids->pids);
343 * trace_ignore_this_task - should a task be ignored for tracing
344 * @filtered_pids: The list of pids to check
345 * @task: The task that should be ignored if not filtered
347 * Checks if @task should be traced or not from @filtered_pids.
348 * Returns true if @task should *NOT* be traced.
349 * Returns false if @task should be traced.
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 * Return false, because if filtered_pids does not exist,
356 * all pids are good to trace.
361 return !trace_find_filtered_pid(filtered_pids, task->pid);
365 * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366 * @pid_list: The list to modify
367 * @self: The current task for fork or NULL for exit
368 * @task: The task to add or remove
370 * If adding a task, if @self is defined, the task is only added if @self
371 * is also included in @pid_list. This happens on fork and tasks should
372 * only be added when the parent is listed. If @self is NULL, then the
373 * @task pid will be removed from the list, which would happen on exit
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377 struct task_struct *self,
378 struct task_struct *task)
383 /* For forks, we only add if the forking task is listed */
385 if (!trace_find_filtered_pid(pid_list, self->pid))
389 /* Sorry, but we don't support pid_max changing after setting */
390 if (task->pid >= pid_list->pid_max)
393 /* "self" is set for forks, and NULL for exits */
395 set_bit(task->pid, pid_list->pids);
397 clear_bit(task->pid, pid_list->pids);
401 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402 * @pid_list: The pid list to show
403 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404 * @pos: The position of the file
406 * This is used by the seq_file "next" operation to iterate the pids
407 * listed in a trace_pid_list structure.
409 * Returns the pid+1 as we want to display pid of zero, but NULL would
410 * stop the iteration.
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 unsigned long pid = (unsigned long)v;
418 /* pid already is +1 of the actual prevous bit */
419 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421 /* Return pid + 1 to allow zero to be represented */
422 if (pid < pid_list->pid_max)
423 return (void *)(pid + 1);
429 * trace_pid_start - Used for seq_file to start reading pid lists
430 * @pid_list: The pid list to show
431 * @pos: The position of the file
433 * This is used by seq_file "start" operation to start the iteration
436 * Returns the pid+1 as we want to display pid of zero, but NULL would
437 * stop the iteration.
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
444 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445 if (pid >= pid_list->pid_max)
448 /* Return pid + 1 so that zero can be the exit value */
449 for (pid++; pid && l < *pos;
450 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
456 * trace_pid_show - show the current pid in seq_file processing
457 * @m: The seq_file structure to write into
458 * @v: A void pointer of the pid (+1) value to display
460 * Can be directly used by seq_file operations to display the current
463 int trace_pid_show(struct seq_file *m, void *v)
465 unsigned long pid = (unsigned long)v - 1;
467 seq_printf(m, "%lu\n", pid);
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE 127
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475 struct trace_pid_list **new_pid_list,
476 const char __user *ubuf, size_t cnt)
478 struct trace_pid_list *pid_list;
479 struct trace_parser parser;
487 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
491 * Always recreate a new array. The write is an all or nothing
492 * operation. Always create a new array when adding new pids by
493 * the user. If the operation fails, then the current list is
496 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
500 pid_list->pid_max = READ_ONCE(pid_max);
502 /* Only truncating will shrink pid_max */
503 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504 pid_list->pid_max = filtered_pids->pid_max;
506 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507 if (!pid_list->pids) {
513 /* copy the current bits to the new max */
514 for_each_set_bit(pid, filtered_pids->pids,
515 filtered_pids->pid_max) {
516 set_bit(pid, pid_list->pids);
525 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526 if (ret < 0 || !trace_parser_loaded(&parser))
533 parser.buffer[parser.idx] = 0;
536 if (kstrtoul(parser.buffer, 0, &val))
538 if (val >= pid_list->pid_max)
543 set_bit(pid, pid_list->pids);
546 trace_parser_clear(&parser);
549 trace_parser_put(&parser);
552 trace_free_pid_list(pid_list);
557 /* Cleared the list of pids */
558 trace_free_pid_list(pid_list);
563 *new_pid_list = pid_list;
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
572 /* Early boot up does not have a buffer yet */
574 return trace_clock_local();
576 ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
582 u64 ftrace_now(int cpu)
584 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
588 * tracing_is_enabled - Show if global_trace has been disabled
590 * Shows if the global trace has been enabled or not. It uses the
591 * mirror flag "buffer_disabled" to be used in fast paths such as for
592 * the irqsoff tracer. But it may be inaccurate due to races. If you
593 * need to know the accurate state, use tracing_is_on() which is a little
594 * slower, but accurate.
596 int tracing_is_enabled(void)
599 * For quick access (irqsoff uses this in fast path), just
600 * return the mirror variable of the state of the ring buffer.
601 * It's a little racy, but we don't really care.
604 return !global_trace.buffer_disabled;
608 * trace_buf_size is the size in bytes that is allocated
609 * for a buffer. Note, the number of bytes is always rounded
612 * This number is purposely set to a low number of 16384.
613 * If the dump on oops happens, it will be much appreciated
614 * to not have to wait for all that output. Anyway this can be
615 * boot time and run time configurable.
617 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
619 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer *trace_types __read_mostly;
625 * trace_types_lock is used to protect the trace_types list.
627 DEFINE_MUTEX(trace_types_lock);
630 * serialize the access of the ring buffer
632 * ring buffer serializes readers, but it is low level protection.
633 * The validity of the events (which returns by ring_buffer_peek() ..etc)
634 * are not protected by ring buffer.
636 * The content of events may become garbage if we allow other process consumes
637 * these events concurrently:
638 * A) the page of the consumed events may become a normal page
639 * (not reader page) in ring buffer, and this page will be rewrited
640 * by events producer.
641 * B) The page of the consumed events may become a page for splice_read,
642 * and this page will be returned to system.
644 * These primitives allow multi process access to different cpu ring buffer
647 * These primitives don't distinguish read-only and read-consume access.
648 * Multi read-only access are also serialized.
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
655 static inline void trace_access_lock(int cpu)
657 if (cpu == RING_BUFFER_ALL_CPUS) {
658 /* gain it for accessing the whole ring buffer. */
659 down_write(&all_cpu_access_lock);
661 /* gain it for accessing a cpu ring buffer. */
663 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 down_read(&all_cpu_access_lock);
666 /* Secondly block other access to this @cpu ring buffer. */
667 mutex_lock(&per_cpu(cpu_access_lock, cpu));
671 static inline void trace_access_unlock(int cpu)
673 if (cpu == RING_BUFFER_ALL_CPUS) {
674 up_write(&all_cpu_access_lock);
676 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 up_read(&all_cpu_access_lock);
681 static inline void trace_access_lock_init(void)
685 for_each_possible_cpu(cpu)
686 mutex_init(&per_cpu(cpu_access_lock, cpu));
691 static DEFINE_MUTEX(access_lock);
693 static inline void trace_access_lock(int cpu)
696 mutex_lock(&access_lock);
699 static inline void trace_access_unlock(int cpu)
702 mutex_unlock(&access_lock);
705 static inline void trace_access_lock_init(void)
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
714 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 struct ring_buffer *buffer,
718 int skip, int pc, struct pt_regs *regs);
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
723 int skip, int pc, struct pt_regs *regs)
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 struct ring_buffer *buffer,
729 int skip, int pc, struct pt_regs *regs)
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 int type, unsigned long flags, int pc)
739 struct trace_entry *ent = ring_buffer_event_data(event);
741 tracing_generic_entry_update(ent, flags, pc);
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
749 unsigned long flags, int pc)
751 struct ring_buffer_event *event;
753 event = ring_buffer_lock_reserve(buffer, len);
755 trace_event_setup(event, type, flags, pc);
760 static void tracer_tracing_on(struct trace_array *tr)
762 if (tr->trace_buffer.buffer)
763 ring_buffer_record_on(tr->trace_buffer.buffer);
765 * This flag is looked at when buffers haven't been allocated
766 * yet, or by some tracers (like irqsoff), that just want to
767 * know if the ring buffer has been disabled, but it can handle
768 * races of where it gets disabled but we still do a record.
769 * As the check is in the fast path of the tracers, it is more
770 * important to be fast than accurate.
772 tr->buffer_disabled = 0;
773 /* Make the flag seen by readers */
778 * tracing_on - enable tracing buffers
780 * This function enables tracing buffers that may have been
781 * disabled with tracing_off.
783 void tracing_on(void)
785 tracer_tracing_on(&global_trace);
787 EXPORT_SYMBOL_GPL(tracing_on);
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
793 __this_cpu_write(trace_cmdline_save, true);
795 /* If this is the temp buffer, we need to commit fully */
796 if (this_cpu_read(trace_buffered_event) == event) {
797 /* Length is in event->array[0] */
798 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 /* Release the temp buffer */
800 this_cpu_dec(trace_buffered_event_cnt);
802 ring_buffer_unlock_commit(buffer, event);
806 * __trace_puts - write a constant string into the trace buffer.
807 * @ip: The address of the caller
808 * @str: The constant string to write
809 * @size: The size of the string.
811 int __trace_puts(unsigned long ip, const char *str, int size)
813 struct ring_buffer_event *event;
814 struct ring_buffer *buffer;
815 struct print_entry *entry;
816 unsigned long irq_flags;
820 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
823 pc = preempt_count();
825 if (unlikely(tracing_selftest_running || tracing_disabled))
828 alloc = sizeof(*entry) + size + 2; /* possible \n added */
830 local_save_flags(irq_flags);
831 buffer = global_trace.trace_buffer.buffer;
832 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
837 entry = ring_buffer_event_data(event);
840 memcpy(&entry->buf, str, size);
842 /* Add a newline if necessary */
843 if (entry->buf[size - 1] != '\n') {
844 entry->buf[size] = '\n';
845 entry->buf[size + 1] = '\0';
847 entry->buf[size] = '\0';
849 __buffer_unlock_commit(buffer, event);
850 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
854 EXPORT_SYMBOL_GPL(__trace_puts);
857 * __trace_bputs - write the pointer to a constant string into trace buffer
858 * @ip: The address of the caller
859 * @str: The constant string to write to the buffer to
861 int __trace_bputs(unsigned long ip, const char *str)
863 struct ring_buffer_event *event;
864 struct ring_buffer *buffer;
865 struct bputs_entry *entry;
866 unsigned long irq_flags;
867 int size = sizeof(struct bputs_entry);
870 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
873 pc = preempt_count();
875 if (unlikely(tracing_selftest_running || tracing_disabled))
878 local_save_flags(irq_flags);
879 buffer = global_trace.trace_buffer.buffer;
880 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
885 entry = ring_buffer_event_data(event);
889 __buffer_unlock_commit(buffer, event);
890 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
894 EXPORT_SYMBOL_GPL(__trace_bputs);
896 #ifdef CONFIG_TRACER_SNAPSHOT
898 * trace_snapshot - take a snapshot of the current buffer.
900 * This causes a swap between the snapshot buffer and the current live
901 * tracing buffer. You can use this to take snapshots of the live
902 * trace when some condition is triggered, but continue to trace.
904 * Note, make sure to allocate the snapshot with either
905 * a tracing_snapshot_alloc(), or by doing it manually
906 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
908 * If the snapshot buffer is not allocated, it will stop tracing.
909 * Basically making a permanent snapshot.
911 void tracing_snapshot(void)
913 struct trace_array *tr = &global_trace;
914 struct tracer *tracer = tr->current_trace;
918 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
919 internal_trace_puts("*** snapshot is being ignored ***\n");
923 if (!tr->allocated_snapshot) {
924 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
925 internal_trace_puts("*** stopping trace here! ***\n");
930 /* Note, snapshot can not be used when the tracer uses it */
931 if (tracer->use_max_tr) {
932 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
933 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
937 local_irq_save(flags);
938 update_max_tr(tr, current, smp_processor_id());
939 local_irq_restore(flags);
941 EXPORT_SYMBOL_GPL(tracing_snapshot);
943 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
944 struct trace_buffer *size_buf, int cpu_id);
945 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
947 static int alloc_snapshot(struct trace_array *tr)
951 if (!tr->allocated_snapshot) {
953 /* allocate spare buffer */
954 ret = resize_buffer_duplicate_size(&tr->max_buffer,
955 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
959 tr->allocated_snapshot = true;
965 static void free_snapshot(struct trace_array *tr)
968 * We don't free the ring buffer. instead, resize it because
969 * The max_tr ring buffer has some state (e.g. ring->clock) and
970 * we want preserve it.
972 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
973 set_buffer_entries(&tr->max_buffer, 1);
974 tracing_reset_online_cpus(&tr->max_buffer);
975 tr->allocated_snapshot = false;
979 * tracing_alloc_snapshot - allocate snapshot buffer.
981 * This only allocates the snapshot buffer if it isn't already
982 * allocated - it doesn't also take a snapshot.
984 * This is meant to be used in cases where the snapshot buffer needs
985 * to be set up for events that can't sleep but need to be able to
986 * trigger a snapshot.
988 int tracing_alloc_snapshot(void)
990 struct trace_array *tr = &global_trace;
993 ret = alloc_snapshot(tr);
998 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1001 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1003 * This is similar to trace_snapshot(), but it will allocate the
1004 * snapshot buffer if it isn't already allocated. Use this only
1005 * where it is safe to sleep, as the allocation may sleep.
1007 * This causes a swap between the snapshot buffer and the current live
1008 * tracing buffer. You can use this to take snapshots of the live
1009 * trace when some condition is triggered, but continue to trace.
1011 void tracing_snapshot_alloc(void)
1015 ret = tracing_alloc_snapshot();
1021 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1023 void tracing_snapshot(void)
1025 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1027 EXPORT_SYMBOL_GPL(tracing_snapshot);
1028 int tracing_alloc_snapshot(void)
1030 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1033 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1034 void tracing_snapshot_alloc(void)
1039 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1040 #endif /* CONFIG_TRACER_SNAPSHOT */
1042 static void tracer_tracing_off(struct trace_array *tr)
1044 if (tr->trace_buffer.buffer)
1045 ring_buffer_record_off(tr->trace_buffer.buffer);
1047 * This flag is looked at when buffers haven't been allocated
1048 * yet, or by some tracers (like irqsoff), that just want to
1049 * know if the ring buffer has been disabled, but it can handle
1050 * races of where it gets disabled but we still do a record.
1051 * As the check is in the fast path of the tracers, it is more
1052 * important to be fast than accurate.
1054 tr->buffer_disabled = 1;
1055 /* Make the flag seen by readers */
1060 * tracing_off - turn off tracing buffers
1062 * This function stops the tracing buffers from recording data.
1063 * It does not disable any overhead the tracers themselves may
1064 * be causing. This function simply causes all recording to
1065 * the ring buffers to fail.
1067 void tracing_off(void)
1069 tracer_tracing_off(&global_trace);
1071 EXPORT_SYMBOL_GPL(tracing_off);
1073 void disable_trace_on_warning(void)
1075 if (__disable_trace_on_warning)
1080 * tracer_tracing_is_on - show real state of ring buffer enabled
1081 * @tr : the trace array to know if ring buffer is enabled
1083 * Shows real state of the ring buffer if it is enabled or not.
1085 int tracer_tracing_is_on(struct trace_array *tr)
1087 if (tr->trace_buffer.buffer)
1088 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1089 return !tr->buffer_disabled;
1093 * tracing_is_on - show state of ring buffers enabled
1095 int tracing_is_on(void)
1097 return tracer_tracing_is_on(&global_trace);
1099 EXPORT_SYMBOL_GPL(tracing_is_on);
1101 static int __init set_buf_size(char *str)
1103 unsigned long buf_size;
1107 buf_size = memparse(str, &str);
1108 /* nr_entries can not be zero */
1111 trace_buf_size = buf_size;
1114 __setup("trace_buf_size=", set_buf_size);
1116 static int __init set_tracing_thresh(char *str)
1118 unsigned long threshold;
1123 ret = kstrtoul(str, 0, &threshold);
1126 tracing_thresh = threshold * 1000;
1129 __setup("tracing_thresh=", set_tracing_thresh);
1131 unsigned long nsecs_to_usecs(unsigned long nsecs)
1133 return nsecs / 1000;
1137 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1138 * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1139 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1140 * of strings in the order that the enums were defined.
1145 /* These must match the bit postions in trace_iterator_flags */
1146 static const char *trace_options[] = {
1154 int in_ns; /* is this clock in nanoseconds? */
1155 } trace_clocks[] = {
1156 { trace_clock_local, "local", 1 },
1157 { trace_clock_global, "global", 1 },
1158 { trace_clock_counter, "counter", 0 },
1159 { trace_clock_jiffies, "uptime", 0 },
1160 { trace_clock, "perf", 1 },
1161 { ktime_get_mono_fast_ns, "mono", 1 },
1162 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1163 { ktime_get_boot_fast_ns, "boot", 1 },
1168 * trace_parser_get_init - gets the buffer for trace parser
1170 int trace_parser_get_init(struct trace_parser *parser, int size)
1172 memset(parser, 0, sizeof(*parser));
1174 parser->buffer = kmalloc(size, GFP_KERNEL);
1175 if (!parser->buffer)
1178 parser->size = size;
1183 * trace_parser_put - frees the buffer for trace parser
1185 void trace_parser_put(struct trace_parser *parser)
1187 kfree(parser->buffer);
1188 parser->buffer = NULL;
1192 * trace_get_user - reads the user input string separated by space
1193 * (matched by isspace(ch))
1195 * For each string found the 'struct trace_parser' is updated,
1196 * and the function returns.
1198 * Returns number of bytes read.
1200 * See kernel/trace/trace.h for 'struct trace_parser' details.
1202 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1203 size_t cnt, loff_t *ppos)
1210 trace_parser_clear(parser);
1212 ret = get_user(ch, ubuf++);
1220 * The parser is not finished with the last write,
1221 * continue reading the user input without skipping spaces.
1223 if (!parser->cont) {
1224 /* skip white space */
1225 while (cnt && isspace(ch)) {
1226 ret = get_user(ch, ubuf++);
1233 /* only spaces were written */
1243 /* read the non-space input */
1244 while (cnt && !isspace(ch)) {
1245 if (parser->idx < parser->size - 1)
1246 parser->buffer[parser->idx++] = ch;
1251 ret = get_user(ch, ubuf++);
1258 /* We either got finished input or we have to wait for another call. */
1260 parser->buffer[parser->idx] = 0;
1261 parser->cont = false;
1262 } else if (parser->idx < parser->size - 1) {
1263 parser->cont = true;
1264 parser->buffer[parser->idx++] = ch;
1277 /* TODO add a seq_buf_to_buffer() */
1278 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1282 if (trace_seq_used(s) <= s->seq.readpos)
1285 len = trace_seq_used(s) - s->seq.readpos;
1288 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1290 s->seq.readpos += cnt;
1294 unsigned long __read_mostly tracing_thresh;
1296 #ifdef CONFIG_TRACER_MAX_TRACE
1298 * Copy the new maximum trace into the separate maximum-trace
1299 * structure. (this way the maximum trace is permanently saved,
1300 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1303 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1305 struct trace_buffer *trace_buf = &tr->trace_buffer;
1306 struct trace_buffer *max_buf = &tr->max_buffer;
1307 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1308 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1311 max_buf->time_start = data->preempt_timestamp;
1313 max_data->saved_latency = tr->max_latency;
1314 max_data->critical_start = data->critical_start;
1315 max_data->critical_end = data->critical_end;
1317 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1318 max_data->pid = tsk->pid;
1320 * If tsk == current, then use current_uid(), as that does not use
1321 * RCU. The irq tracer can be called out of RCU scope.
1324 max_data->uid = current_uid();
1326 max_data->uid = task_uid(tsk);
1328 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1329 max_data->policy = tsk->policy;
1330 max_data->rt_priority = tsk->rt_priority;
1332 /* record this tasks comm */
1333 tracing_record_cmdline(tsk);
1337 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1339 * @tsk: the task with the latency
1340 * @cpu: The cpu that initiated the trace.
1342 * Flip the buffers between the @tr and the max_tr and record information
1343 * about which task was the cause of this latency.
1346 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1348 struct ring_buffer *buf;
1353 WARN_ON_ONCE(!irqs_disabled());
1355 if (!tr->allocated_snapshot) {
1356 /* Only the nop tracer should hit this when disabling */
1357 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1361 arch_spin_lock(&tr->max_lock);
1363 buf = tr->trace_buffer.buffer;
1364 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1365 tr->max_buffer.buffer = buf;
1367 __update_max_tr(tr, tsk, cpu);
1368 arch_spin_unlock(&tr->max_lock);
1372 * update_max_tr_single - only copy one trace over, and reset the rest
1374 * @tsk - task with the latency
1375 * @cpu - the cpu of the buffer to copy.
1377 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1380 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 WARN_ON_ONCE(!irqs_disabled());
1388 if (!tr->allocated_snapshot) {
1389 /* Only the nop tracer should hit this when disabling */
1390 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1394 arch_spin_lock(&tr->max_lock);
1396 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1398 if (ret == -EBUSY) {
1400 * We failed to swap the buffer due to a commit taking
1401 * place on this CPU. We fail to record, but we reset
1402 * the max trace buffer (no one writes directly to it)
1403 * and flag that it failed.
1405 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1406 "Failed to swap buffers due to commit in progress\n");
1409 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1411 __update_max_tr(tr, tsk, cpu);
1412 arch_spin_unlock(&tr->max_lock);
1414 #endif /* CONFIG_TRACER_MAX_TRACE */
1416 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1418 /* Iterators are static, they should be filled or empty */
1419 if (trace_buffer_iter(iter, iter->cpu_file))
1422 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1426 #ifdef CONFIG_FTRACE_STARTUP_TEST
1427 static int run_tracer_selftest(struct tracer *type)
1429 struct trace_array *tr = &global_trace;
1430 struct tracer *saved_tracer = tr->current_trace;
1433 if (!type->selftest || tracing_selftest_disabled)
1437 * Run a selftest on this tracer.
1438 * Here we reset the trace buffer, and set the current
1439 * tracer to be this tracer. The tracer can then run some
1440 * internal tracing to verify that everything is in order.
1441 * If we fail, we do not register this tracer.
1443 tracing_reset_online_cpus(&tr->trace_buffer);
1445 tr->current_trace = type;
1447 #ifdef CONFIG_TRACER_MAX_TRACE
1448 if (type->use_max_tr) {
1449 /* If we expanded the buffers, make sure the max is expanded too */
1450 if (ring_buffer_expanded)
1451 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1452 RING_BUFFER_ALL_CPUS);
1453 tr->allocated_snapshot = true;
1457 /* the test is responsible for initializing and enabling */
1458 pr_info("Testing tracer %s: ", type->name);
1459 ret = type->selftest(type, tr);
1460 /* the test is responsible for resetting too */
1461 tr->current_trace = saved_tracer;
1463 printk(KERN_CONT "FAILED!\n");
1464 /* Add the warning after printing 'FAILED' */
1468 /* Only reset on passing, to avoid touching corrupted buffers */
1469 tracing_reset_online_cpus(&tr->trace_buffer);
1471 #ifdef CONFIG_TRACER_MAX_TRACE
1472 if (type->use_max_tr) {
1473 tr->allocated_snapshot = false;
1475 /* Shrink the max buffer again */
1476 if (ring_buffer_expanded)
1477 ring_buffer_resize(tr->max_buffer.buffer, 1,
1478 RING_BUFFER_ALL_CPUS);
1482 printk(KERN_CONT "PASSED\n");
1486 static inline int run_tracer_selftest(struct tracer *type)
1490 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1492 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1494 static void __init apply_trace_boot_options(void);
1497 * register_tracer - register a tracer with the ftrace system.
1498 * @type - the plugin for the tracer
1500 * Register a new plugin tracer.
1502 int __init register_tracer(struct tracer *type)
1508 pr_info("Tracer must have a name\n");
1512 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1513 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1517 mutex_lock(&trace_types_lock);
1519 tracing_selftest_running = true;
1521 for (t = trace_types; t; t = t->next) {
1522 if (strcmp(type->name, t->name) == 0) {
1524 pr_info("Tracer %s already registered\n",
1531 if (!type->set_flag)
1532 type->set_flag = &dummy_set_flag;
1534 /*allocate a dummy tracer_flags*/
1535 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1540 type->flags->val = 0;
1541 type->flags->opts = dummy_tracer_opt;
1543 if (!type->flags->opts)
1544 type->flags->opts = dummy_tracer_opt;
1546 /* store the tracer for __set_tracer_option */
1547 type->flags->trace = type;
1549 ret = run_tracer_selftest(type);
1553 type->next = trace_types;
1555 add_tracer_options(&global_trace, type);
1558 tracing_selftest_running = false;
1559 mutex_unlock(&trace_types_lock);
1561 if (ret || !default_bootup_tracer)
1564 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1567 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1568 /* Do we want this tracer to start on bootup? */
1569 tracing_set_tracer(&global_trace, type->name);
1570 default_bootup_tracer = NULL;
1572 apply_trace_boot_options();
1574 /* disable other selftests, since this will break it. */
1575 tracing_selftest_disabled = true;
1576 #ifdef CONFIG_FTRACE_STARTUP_TEST
1577 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1585 void tracing_reset(struct trace_buffer *buf, int cpu)
1587 struct ring_buffer *buffer = buf->buffer;
1592 ring_buffer_record_disable(buffer);
1594 /* Make sure all commits have finished */
1595 synchronize_sched();
1596 ring_buffer_reset_cpu(buffer, cpu);
1598 ring_buffer_record_enable(buffer);
1601 void tracing_reset_online_cpus(struct trace_buffer *buf)
1603 struct ring_buffer *buffer = buf->buffer;
1609 ring_buffer_record_disable(buffer);
1611 /* Make sure all commits have finished */
1612 synchronize_sched();
1614 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1616 for_each_online_cpu(cpu)
1617 ring_buffer_reset_cpu(buffer, cpu);
1619 ring_buffer_record_enable(buffer);
1622 /* Must have trace_types_lock held */
1623 void tracing_reset_all_online_cpus(void)
1625 struct trace_array *tr;
1627 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1628 tracing_reset_online_cpus(&tr->trace_buffer);
1629 #ifdef CONFIG_TRACER_MAX_TRACE
1630 tracing_reset_online_cpus(&tr->max_buffer);
1635 #define SAVED_CMDLINES_DEFAULT 128
1636 #define NO_CMDLINE_MAP UINT_MAX
1637 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1638 struct saved_cmdlines_buffer {
1639 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1640 unsigned *map_cmdline_to_pid;
1641 unsigned cmdline_num;
1643 char *saved_cmdlines;
1645 static struct saved_cmdlines_buffer *savedcmd;
1647 /* temporary disable recording */
1648 static atomic_t trace_record_cmdline_disabled __read_mostly;
1650 static inline char *get_saved_cmdlines(int idx)
1652 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1655 static inline void set_cmdline(int idx, const char *cmdline)
1657 memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1660 static int allocate_cmdlines_buffer(unsigned int val,
1661 struct saved_cmdlines_buffer *s)
1663 s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1665 if (!s->map_cmdline_to_pid)
1668 s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1669 if (!s->saved_cmdlines) {
1670 kfree(s->map_cmdline_to_pid);
1675 s->cmdline_num = val;
1676 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1677 sizeof(s->map_pid_to_cmdline));
1678 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1679 val * sizeof(*s->map_cmdline_to_pid));
1684 static int trace_create_savedcmd(void)
1688 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1692 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1702 int is_tracing_stopped(void)
1704 return global_trace.stop_count;
1708 * tracing_start - quick start of the tracer
1710 * If tracing is enabled but was stopped by tracing_stop,
1711 * this will start the tracer back up.
1713 void tracing_start(void)
1715 struct ring_buffer *buffer;
1716 unsigned long flags;
1718 if (tracing_disabled)
1721 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1722 if (--global_trace.stop_count) {
1723 if (global_trace.stop_count < 0) {
1724 /* Someone screwed up their debugging */
1726 global_trace.stop_count = 0;
1731 /* Prevent the buffers from switching */
1732 arch_spin_lock(&global_trace.max_lock);
1734 buffer = global_trace.trace_buffer.buffer;
1736 ring_buffer_record_enable(buffer);
1738 #ifdef CONFIG_TRACER_MAX_TRACE
1739 buffer = global_trace.max_buffer.buffer;
1741 ring_buffer_record_enable(buffer);
1744 arch_spin_unlock(&global_trace.max_lock);
1747 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1750 static void tracing_start_tr(struct trace_array *tr)
1752 struct ring_buffer *buffer;
1753 unsigned long flags;
1755 if (tracing_disabled)
1758 /* If global, we need to also start the max tracer */
1759 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1760 return tracing_start();
1762 raw_spin_lock_irqsave(&tr->start_lock, flags);
1764 if (--tr->stop_count) {
1765 if (tr->stop_count < 0) {
1766 /* Someone screwed up their debugging */
1773 buffer = tr->trace_buffer.buffer;
1775 ring_buffer_record_enable(buffer);
1778 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1782 * tracing_stop - quick stop of the tracer
1784 * Light weight way to stop tracing. Use in conjunction with
1787 void tracing_stop(void)
1789 struct ring_buffer *buffer;
1790 unsigned long flags;
1792 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1793 if (global_trace.stop_count++)
1796 /* Prevent the buffers from switching */
1797 arch_spin_lock(&global_trace.max_lock);
1799 buffer = global_trace.trace_buffer.buffer;
1801 ring_buffer_record_disable(buffer);
1803 #ifdef CONFIG_TRACER_MAX_TRACE
1804 buffer = global_trace.max_buffer.buffer;
1806 ring_buffer_record_disable(buffer);
1809 arch_spin_unlock(&global_trace.max_lock);
1812 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1815 static void tracing_stop_tr(struct trace_array *tr)
1817 struct ring_buffer *buffer;
1818 unsigned long flags;
1820 /* If global, we need to also stop the max tracer */
1821 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1822 return tracing_stop();
1824 raw_spin_lock_irqsave(&tr->start_lock, flags);
1825 if (tr->stop_count++)
1828 buffer = tr->trace_buffer.buffer;
1830 ring_buffer_record_disable(buffer);
1833 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1836 void trace_stop_cmdline_recording(void);
1838 static int trace_save_cmdline(struct task_struct *tsk)
1842 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1846 * It's not the end of the world if we don't get
1847 * the lock, but we also don't want to spin
1848 * nor do we want to disable interrupts,
1849 * so if we miss here, then better luck next time.
1851 if (!arch_spin_trylock(&trace_cmdline_lock))
1854 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1855 if (idx == NO_CMDLINE_MAP) {
1856 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1859 * Check whether the cmdline buffer at idx has a pid
1860 * mapped. We are going to overwrite that entry so we
1861 * need to clear the map_pid_to_cmdline. Otherwise we
1862 * would read the new comm for the old pid.
1864 pid = savedcmd->map_cmdline_to_pid[idx];
1865 if (pid != NO_CMDLINE_MAP)
1866 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1868 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1869 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1871 savedcmd->cmdline_idx = idx;
1874 set_cmdline(idx, tsk->comm);
1876 arch_spin_unlock(&trace_cmdline_lock);
1881 static void __trace_find_cmdline(int pid, char comm[])
1886 strcpy(comm, "<idle>");
1890 if (WARN_ON_ONCE(pid < 0)) {
1891 strcpy(comm, "<XXX>");
1895 if (pid > PID_MAX_DEFAULT) {
1896 strcpy(comm, "<...>");
1900 map = savedcmd->map_pid_to_cmdline[pid];
1901 if (map != NO_CMDLINE_MAP)
1902 strcpy(comm, get_saved_cmdlines(map));
1904 strcpy(comm, "<...>");
1907 void trace_find_cmdline(int pid, char comm[])
1910 arch_spin_lock(&trace_cmdline_lock);
1912 __trace_find_cmdline(pid, comm);
1914 arch_spin_unlock(&trace_cmdline_lock);
1918 void tracing_record_cmdline(struct task_struct *tsk)
1920 if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1923 if (!__this_cpu_read(trace_cmdline_save))
1926 if (trace_save_cmdline(tsk))
1927 __this_cpu_write(trace_cmdline_save, false);
1931 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1934 struct task_struct *tsk = current;
1936 entry->preempt_count = pc & 0xff;
1937 entry->pid = (tsk) ? tsk->pid : 0;
1939 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1940 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1942 TRACE_FLAG_IRQS_NOSUPPORT |
1944 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
1945 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1946 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1947 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1948 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1950 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1952 struct ring_buffer_event *
1953 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1956 unsigned long flags, int pc)
1958 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
1961 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1962 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1963 static int trace_buffered_event_ref;
1966 * trace_buffered_event_enable - enable buffering events
1968 * When events are being filtered, it is quicker to use a temporary
1969 * buffer to write the event data into if there's a likely chance
1970 * that it will not be committed. The discard of the ring buffer
1971 * is not as fast as committing, and is much slower than copying
1974 * When an event is to be filtered, allocate per cpu buffers to
1975 * write the event data into, and if the event is filtered and discarded
1976 * it is simply dropped, otherwise, the entire data is to be committed
1979 void trace_buffered_event_enable(void)
1981 struct ring_buffer_event *event;
1985 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1987 if (trace_buffered_event_ref++)
1990 for_each_tracing_cpu(cpu) {
1991 page = alloc_pages_node(cpu_to_node(cpu),
1992 GFP_KERNEL | __GFP_NORETRY, 0);
1996 event = page_address(page);
1997 memset(event, 0, sizeof(*event));
1999 per_cpu(trace_buffered_event, cpu) = event;
2002 if (cpu == smp_processor_id() &&
2003 this_cpu_read(trace_buffered_event) !=
2004 per_cpu(trace_buffered_event, cpu))
2011 trace_buffered_event_disable();
2014 static void enable_trace_buffered_event(void *data)
2016 /* Probably not needed, but do it anyway */
2018 this_cpu_dec(trace_buffered_event_cnt);
2021 static void disable_trace_buffered_event(void *data)
2023 this_cpu_inc(trace_buffered_event_cnt);
2027 * trace_buffered_event_disable - disable buffering events
2029 * When a filter is removed, it is faster to not use the buffered
2030 * events, and to commit directly into the ring buffer. Free up
2031 * the temp buffers when there are no more users. This requires
2032 * special synchronization with current events.
2034 void trace_buffered_event_disable(void)
2038 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2040 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2043 if (--trace_buffered_event_ref)
2047 /* For each CPU, set the buffer as used. */
2048 smp_call_function_many(tracing_buffer_mask,
2049 disable_trace_buffered_event, NULL, 1);
2052 /* Wait for all current users to finish */
2053 synchronize_sched();
2055 for_each_tracing_cpu(cpu) {
2056 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2057 per_cpu(trace_buffered_event, cpu) = NULL;
2060 * Make sure trace_buffered_event is NULL before clearing
2061 * trace_buffered_event_cnt.
2066 /* Do the work on each cpu */
2067 smp_call_function_many(tracing_buffer_mask,
2068 enable_trace_buffered_event, NULL, 1);
2072 static struct ring_buffer *temp_buffer;
2074 struct ring_buffer_event *
2075 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2076 struct trace_event_file *trace_file,
2077 int type, unsigned long len,
2078 unsigned long flags, int pc)
2080 struct ring_buffer_event *entry;
2083 *current_rb = trace_file->tr->trace_buffer.buffer;
2085 if ((trace_file->flags &
2086 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2087 (entry = this_cpu_read(trace_buffered_event))) {
2088 /* Try to use the per cpu buffer first */
2089 val = this_cpu_inc_return(trace_buffered_event_cnt);
2091 trace_event_setup(entry, type, flags, pc);
2092 entry->array[0] = len;
2095 this_cpu_dec(trace_buffered_event_cnt);
2098 entry = __trace_buffer_lock_reserve(*current_rb,
2099 type, len, flags, pc);
2101 * If tracing is off, but we have triggers enabled
2102 * we still need to look at the event data. Use the temp_buffer
2103 * to store the trace event for the tigger to use. It's recusive
2104 * safe and will not be recorded anywhere.
2106 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2107 *current_rb = temp_buffer;
2108 entry = __trace_buffer_lock_reserve(*current_rb,
2109 type, len, flags, pc);
2113 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2115 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2116 static DEFINE_MUTEX(tracepoint_printk_mutex);
2118 static void output_printk(struct trace_event_buffer *fbuffer)
2120 struct trace_event_call *event_call;
2121 struct trace_event *event;
2122 unsigned long flags;
2123 struct trace_iterator *iter = tracepoint_print_iter;
2125 /* We should never get here if iter is NULL */
2126 if (WARN_ON_ONCE(!iter))
2129 event_call = fbuffer->trace_file->event_call;
2130 if (!event_call || !event_call->event.funcs ||
2131 !event_call->event.funcs->trace)
2134 event = &fbuffer->trace_file->event_call->event;
2136 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2137 trace_seq_init(&iter->seq);
2138 iter->ent = fbuffer->entry;
2139 event_call->event.funcs->trace(iter, 0, event);
2140 trace_seq_putc(&iter->seq, 0);
2141 printk("%s", iter->seq.buffer);
2143 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2146 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2147 void __user *buffer, size_t *lenp,
2150 int save_tracepoint_printk;
2153 mutex_lock(&tracepoint_printk_mutex);
2154 save_tracepoint_printk = tracepoint_printk;
2156 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2159 * This will force exiting early, as tracepoint_printk
2160 * is always zero when tracepoint_printk_iter is not allocated
2162 if (!tracepoint_print_iter)
2163 tracepoint_printk = 0;
2165 if (save_tracepoint_printk == tracepoint_printk)
2168 if (tracepoint_printk)
2169 static_key_enable(&tracepoint_printk_key.key);
2171 static_key_disable(&tracepoint_printk_key.key);
2174 mutex_unlock(&tracepoint_printk_mutex);
2179 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2181 if (static_key_false(&tracepoint_printk_key.key))
2182 output_printk(fbuffer);
2184 event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2185 fbuffer->event, fbuffer->entry,
2186 fbuffer->flags, fbuffer->pc);
2188 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2190 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2191 struct ring_buffer *buffer,
2192 struct ring_buffer_event *event,
2193 unsigned long flags, int pc,
2194 struct pt_regs *regs)
2196 __buffer_unlock_commit(buffer, event);
2199 * If regs is not set, then skip the following callers:
2200 * trace_buffer_unlock_commit_regs
2201 * event_trigger_unlock_commit
2202 * trace_event_buffer_commit
2203 * trace_event_raw_event_sched_switch
2204 * Note, we can still get here via blktrace, wakeup tracer
2205 * and mmiotrace, but that's ok if they lose a function or
2206 * two. They are that meaningful.
2208 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2209 ftrace_trace_userstack(buffer, flags, pc);
2213 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2216 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2217 struct ring_buffer_event *event)
2219 __buffer_unlock_commit(buffer, event);
2223 trace_process_export(struct trace_export *export,
2224 struct ring_buffer_event *event)
2226 struct trace_entry *entry;
2227 unsigned int size = 0;
2229 entry = ring_buffer_event_data(event);
2230 size = ring_buffer_event_length(event);
2231 export->write(entry, size);
2234 static DEFINE_MUTEX(ftrace_export_lock);
2236 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2238 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2240 static inline void ftrace_exports_enable(void)
2242 static_branch_enable(&ftrace_exports_enabled);
2245 static inline void ftrace_exports_disable(void)
2247 static_branch_disable(&ftrace_exports_enabled);
2250 void ftrace_exports(struct ring_buffer_event *event)
2252 struct trace_export *export;
2254 preempt_disable_notrace();
2256 export = rcu_dereference_raw_notrace(ftrace_exports_list);
2258 trace_process_export(export, event);
2259 export = rcu_dereference_raw_notrace(export->next);
2262 preempt_enable_notrace();
2266 add_trace_export(struct trace_export **list, struct trace_export *export)
2268 rcu_assign_pointer(export->next, *list);
2270 * We are entering export into the list but another
2271 * CPU might be walking that list. We need to make sure
2272 * the export->next pointer is valid before another CPU sees
2273 * the export pointer included into the list.
2275 rcu_assign_pointer(*list, export);
2279 rm_trace_export(struct trace_export **list, struct trace_export *export)
2281 struct trace_export **p;
2283 for (p = list; *p != NULL; p = &(*p)->next)
2290 rcu_assign_pointer(*p, (*p)->next);
2296 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2299 ftrace_exports_enable();
2301 add_trace_export(list, export);
2305 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2309 ret = rm_trace_export(list, export);
2311 ftrace_exports_disable();
2316 int register_ftrace_export(struct trace_export *export)
2318 if (WARN_ON_ONCE(!export->write))
2321 mutex_lock(&ftrace_export_lock);
2323 add_ftrace_export(&ftrace_exports_list, export);
2325 mutex_unlock(&ftrace_export_lock);
2329 EXPORT_SYMBOL_GPL(register_ftrace_export);
2331 int unregister_ftrace_export(struct trace_export *export)
2335 mutex_lock(&ftrace_export_lock);
2337 ret = rm_ftrace_export(&ftrace_exports_list, export);
2339 mutex_unlock(&ftrace_export_lock);
2343 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2346 trace_function(struct trace_array *tr,
2347 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2350 struct trace_event_call *call = &event_function;
2351 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2352 struct ring_buffer_event *event;
2353 struct ftrace_entry *entry;
2355 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2359 entry = ring_buffer_event_data(event);
2361 entry->parent_ip = parent_ip;
2363 if (!call_filter_check_discard(call, entry, buffer, event)) {
2364 if (static_branch_unlikely(&ftrace_exports_enabled))
2365 ftrace_exports(event);
2366 __buffer_unlock_commit(buffer, event);
2370 #ifdef CONFIG_STACKTRACE
2372 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2373 struct ftrace_stack {
2374 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2377 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2378 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2380 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2381 unsigned long flags,
2382 int skip, int pc, struct pt_regs *regs)
2384 struct trace_event_call *call = &event_kernel_stack;
2385 struct ring_buffer_event *event;
2386 struct stack_entry *entry;
2387 struct stack_trace trace;
2389 int size = FTRACE_STACK_ENTRIES;
2391 trace.nr_entries = 0;
2395 * Add two, for this function and the call to save_stack_trace()
2396 * If regs is set, then these functions will not be in the way.
2402 * Since events can happen in NMIs there's no safe way to
2403 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2404 * or NMI comes in, it will just have to use the default
2405 * FTRACE_STACK_SIZE.
2407 preempt_disable_notrace();
2409 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2411 * We don't need any atomic variables, just a barrier.
2412 * If an interrupt comes in, we don't care, because it would
2413 * have exited and put the counter back to what we want.
2414 * We just need a barrier to keep gcc from moving things
2418 if (use_stack == 1) {
2419 trace.entries = this_cpu_ptr(ftrace_stack.calls);
2420 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
2423 save_stack_trace_regs(regs, &trace);
2425 save_stack_trace(&trace);
2427 if (trace.nr_entries > size)
2428 size = trace.nr_entries;
2430 /* From now on, use_stack is a boolean */
2433 size *= sizeof(unsigned long);
2435 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2436 sizeof(*entry) + size, flags, pc);
2439 entry = ring_buffer_event_data(event);
2441 memset(&entry->caller, 0, size);
2444 memcpy(&entry->caller, trace.entries,
2445 trace.nr_entries * sizeof(unsigned long));
2447 trace.max_entries = FTRACE_STACK_ENTRIES;
2448 trace.entries = entry->caller;
2450 save_stack_trace_regs(regs, &trace);
2452 save_stack_trace(&trace);
2455 entry->size = trace.nr_entries;
2457 if (!call_filter_check_discard(call, entry, buffer, event))
2458 __buffer_unlock_commit(buffer, event);
2461 /* Again, don't let gcc optimize things here */
2463 __this_cpu_dec(ftrace_stack_reserve);
2464 preempt_enable_notrace();
2468 static inline void ftrace_trace_stack(struct trace_array *tr,
2469 struct ring_buffer *buffer,
2470 unsigned long flags,
2471 int skip, int pc, struct pt_regs *regs)
2473 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2476 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2479 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2482 __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2486 * trace_dump_stack - record a stack back trace in the trace buffer
2487 * @skip: Number of functions to skip (helper handlers)
2489 void trace_dump_stack(int skip)
2491 unsigned long flags;
2493 if (tracing_disabled || tracing_selftest_running)
2496 local_save_flags(flags);
2499 * Skip 3 more, seems to get us at the caller of
2503 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2504 flags, skip, preempt_count(), NULL);
2507 static DEFINE_PER_CPU(int, user_stack_count);
2510 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2512 struct trace_event_call *call = &event_user_stack;
2513 struct ring_buffer_event *event;
2514 struct userstack_entry *entry;
2515 struct stack_trace trace;
2517 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2521 * NMIs can not handle page faults, even with fix ups.
2522 * The save user stack can (and often does) fault.
2524 if (unlikely(in_nmi()))
2528 * prevent recursion, since the user stack tracing may
2529 * trigger other kernel events.
2532 if (__this_cpu_read(user_stack_count))
2535 __this_cpu_inc(user_stack_count);
2537 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2538 sizeof(*entry), flags, pc);
2540 goto out_drop_count;
2541 entry = ring_buffer_event_data(event);
2543 entry->tgid = current->tgid;
2544 memset(&entry->caller, 0, sizeof(entry->caller));
2546 trace.nr_entries = 0;
2547 trace.max_entries = FTRACE_STACK_ENTRIES;
2549 trace.entries = entry->caller;
2551 save_stack_trace_user(&trace);
2552 if (!call_filter_check_discard(call, entry, buffer, event))
2553 __buffer_unlock_commit(buffer, event);
2556 __this_cpu_dec(user_stack_count);
2562 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2564 ftrace_trace_userstack(tr, flags, preempt_count());
2568 #endif /* CONFIG_STACKTRACE */
2570 /* created for use with alloc_percpu */
2571 struct trace_buffer_struct {
2573 char buffer[4][TRACE_BUF_SIZE];
2576 static struct trace_buffer_struct *trace_percpu_buffer;
2579 * Thise allows for lockless recording. If we're nested too deeply, then
2580 * this returns NULL.
2582 static char *get_trace_buf(void)
2584 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2586 if (!buffer || buffer->nesting >= 4)
2589 return &buffer->buffer[buffer->nesting++][0];
2592 static void put_trace_buf(void)
2594 this_cpu_dec(trace_percpu_buffer->nesting);
2597 static int alloc_percpu_trace_buffer(void)
2599 struct trace_buffer_struct *buffers;
2601 buffers = alloc_percpu(struct trace_buffer_struct);
2602 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2605 trace_percpu_buffer = buffers;
2609 static int buffers_allocated;
2611 void trace_printk_init_buffers(void)
2613 if (buffers_allocated)
2616 if (alloc_percpu_trace_buffer())
2619 /* trace_printk() is for debug use only. Don't use it in production. */
2622 pr_warn("**********************************************************\n");
2623 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2625 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
2627 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
2628 pr_warn("** unsafe for production use. **\n");
2630 pr_warn("** If you see this message and you are not debugging **\n");
2631 pr_warn("** the kernel, report this immediately to your vendor! **\n");
2633 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2634 pr_warn("**********************************************************\n");
2636 /* Expand the buffers to set size */
2637 tracing_update_buffers();
2639 buffers_allocated = 1;
2642 * trace_printk_init_buffers() can be called by modules.
2643 * If that happens, then we need to start cmdline recording
2644 * directly here. If the global_trace.buffer is already
2645 * allocated here, then this was called by module code.
2647 if (global_trace.trace_buffer.buffer)
2648 tracing_start_cmdline_record();
2651 void trace_printk_start_comm(void)
2653 /* Start tracing comms if trace printk is set */
2654 if (!buffers_allocated)
2656 tracing_start_cmdline_record();
2659 static void trace_printk_start_stop_comm(int enabled)
2661 if (!buffers_allocated)
2665 tracing_start_cmdline_record();
2667 tracing_stop_cmdline_record();
2671 * trace_vbprintk - write binary msg to tracing buffer
2674 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2676 struct trace_event_call *call = &event_bprint;
2677 struct ring_buffer_event *event;
2678 struct ring_buffer *buffer;
2679 struct trace_array *tr = &global_trace;
2680 struct bprint_entry *entry;
2681 unsigned long flags;
2683 int len = 0, size, pc;
2685 if (unlikely(tracing_selftest_running || tracing_disabled))
2688 /* Don't pollute graph traces with trace_vprintk internals */
2689 pause_graph_tracing();
2691 pc = preempt_count();
2692 preempt_disable_notrace();
2694 tbuffer = get_trace_buf();
2700 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2702 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2705 local_save_flags(flags);
2706 size = sizeof(*entry) + sizeof(u32) * len;
2707 buffer = tr->trace_buffer.buffer;
2708 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2712 entry = ring_buffer_event_data(event);
2716 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2717 if (!call_filter_check_discard(call, entry, buffer, event)) {
2718 __buffer_unlock_commit(buffer, event);
2719 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2726 preempt_enable_notrace();
2727 unpause_graph_tracing();
2731 EXPORT_SYMBOL_GPL(trace_vbprintk);
2734 __trace_array_vprintk(struct ring_buffer *buffer,
2735 unsigned long ip, const char *fmt, va_list args)
2737 struct trace_event_call *call = &event_print;
2738 struct ring_buffer_event *event;
2739 int len = 0, size, pc;
2740 struct print_entry *entry;
2741 unsigned long flags;
2744 if (tracing_disabled || tracing_selftest_running)
2747 /* Don't pollute graph traces with trace_vprintk internals */
2748 pause_graph_tracing();
2750 pc = preempt_count();
2751 preempt_disable_notrace();
2754 tbuffer = get_trace_buf();
2760 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2762 local_save_flags(flags);
2763 size = sizeof(*entry) + len + 1;
2764 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2768 entry = ring_buffer_event_data(event);
2771 memcpy(&entry->buf, tbuffer, len + 1);
2772 if (!call_filter_check_discard(call, entry, buffer, event)) {
2773 __buffer_unlock_commit(buffer, event);
2774 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2781 preempt_enable_notrace();
2782 unpause_graph_tracing();
2787 int trace_array_vprintk(struct trace_array *tr,
2788 unsigned long ip, const char *fmt, va_list args)
2790 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2793 int trace_array_printk(struct trace_array *tr,
2794 unsigned long ip, const char *fmt, ...)
2799 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2803 ret = trace_array_vprintk(tr, ip, fmt, ap);
2808 int trace_array_printk_buf(struct ring_buffer *buffer,
2809 unsigned long ip, const char *fmt, ...)
2814 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2818 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2823 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2825 return trace_array_vprintk(&global_trace, ip, fmt, args);
2827 EXPORT_SYMBOL_GPL(trace_vprintk);
2829 static void trace_iterator_increment(struct trace_iterator *iter)
2831 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2835 ring_buffer_read(buf_iter, NULL);
2838 static struct trace_entry *
2839 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2840 unsigned long *lost_events)
2842 struct ring_buffer_event *event;
2843 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2846 event = ring_buffer_iter_peek(buf_iter, ts);
2848 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2852 iter->ent_size = ring_buffer_event_length(event);
2853 return ring_buffer_event_data(event);
2859 static struct trace_entry *
2860 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2861 unsigned long *missing_events, u64 *ent_ts)
2863 struct ring_buffer *buffer = iter->trace_buffer->buffer;
2864 struct trace_entry *ent, *next = NULL;
2865 unsigned long lost_events = 0, next_lost = 0;
2866 int cpu_file = iter->cpu_file;
2867 u64 next_ts = 0, ts;
2873 * If we are in a per_cpu trace file, don't bother by iterating over
2874 * all cpu and peek directly.
2876 if (cpu_file > RING_BUFFER_ALL_CPUS) {
2877 if (ring_buffer_empty_cpu(buffer, cpu_file))
2879 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2881 *ent_cpu = cpu_file;
2886 for_each_tracing_cpu(cpu) {
2888 if (ring_buffer_empty_cpu(buffer, cpu))
2891 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2894 * Pick the entry with the smallest timestamp:
2896 if (ent && (!next || ts < next_ts)) {
2900 next_lost = lost_events;
2901 next_size = iter->ent_size;
2905 iter->ent_size = next_size;
2908 *ent_cpu = next_cpu;
2914 *missing_events = next_lost;
2919 /* Find the next real entry, without updating the iterator itself */
2920 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2921 int *ent_cpu, u64 *ent_ts)
2923 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2926 /* Find the next real entry, and increment the iterator to the next entry */
2927 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2929 iter->ent = __find_next_entry(iter, &iter->cpu,
2930 &iter->lost_events, &iter->ts);
2933 trace_iterator_increment(iter);
2935 return iter->ent ? iter : NULL;
2938 static void trace_consume(struct trace_iterator *iter)
2940 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2941 &iter->lost_events);
2944 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2946 struct trace_iterator *iter = m->private;
2950 WARN_ON_ONCE(iter->leftover);
2954 /* can't go backwards */
2959 ent = trace_find_next_entry_inc(iter);
2963 while (ent && iter->idx < i)
2964 ent = trace_find_next_entry_inc(iter);
2971 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2973 struct ring_buffer_event *event;
2974 struct ring_buffer_iter *buf_iter;
2975 unsigned long entries = 0;
2978 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2980 buf_iter = trace_buffer_iter(iter, cpu);
2984 ring_buffer_iter_reset(buf_iter);
2987 * We could have the case with the max latency tracers
2988 * that a reset never took place on a cpu. This is evident
2989 * by the timestamp being before the start of the buffer.
2991 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2992 if (ts >= iter->trace_buffer->time_start)
2995 ring_buffer_read(buf_iter, NULL);
2998 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3002 * The current tracer is copied to avoid a global locking
3005 static void *s_start(struct seq_file *m, loff_t *pos)
3007 struct trace_iterator *iter = m->private;
3008 struct trace_array *tr = iter->tr;
3009 int cpu_file = iter->cpu_file;
3015 * copy the tracer to avoid using a global lock all around.
3016 * iter->trace is a copy of current_trace, the pointer to the
3017 * name may be used instead of a strcmp(), as iter->trace->name
3018 * will point to the same string as current_trace->name.
3020 mutex_lock(&trace_types_lock);
3021 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3022 *iter->trace = *tr->current_trace;
3023 mutex_unlock(&trace_types_lock);
3025 #ifdef CONFIG_TRACER_MAX_TRACE
3026 if (iter->snapshot && iter->trace->use_max_tr)
3027 return ERR_PTR(-EBUSY);
3030 if (!iter->snapshot)
3031 atomic_inc(&trace_record_cmdline_disabled);
3033 if (*pos != iter->pos) {
3038 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3039 for_each_tracing_cpu(cpu)
3040 tracing_iter_reset(iter, cpu);
3042 tracing_iter_reset(iter, cpu_file);
3045 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3050 * If we overflowed the seq_file before, then we want
3051 * to just reuse the trace_seq buffer again.
3057 p = s_next(m, p, &l);
3061 trace_event_read_lock();
3062 trace_access_lock(cpu_file);
3066 static void s_stop(struct seq_file *m, void *p)
3068 struct trace_iterator *iter = m->private;
3070 #ifdef CONFIG_TRACER_MAX_TRACE
3071 if (iter->snapshot && iter->trace->use_max_tr)
3075 if (!iter->snapshot)
3076 atomic_dec(&trace_record_cmdline_disabled);
3078 trace_access_unlock(iter->cpu_file);
3079 trace_event_read_unlock();
3083 get_total_entries(struct trace_buffer *buf,
3084 unsigned long *total, unsigned long *entries)
3086 unsigned long count;
3092 for_each_tracing_cpu(cpu) {
3093 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3095 * If this buffer has skipped entries, then we hold all
3096 * entries for the trace and we need to ignore the
3097 * ones before the time stamp.
3099 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3100 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3101 /* total is the same as the entries */
3105 ring_buffer_overrun_cpu(buf->buffer, cpu);
3110 static void print_lat_help_header(struct seq_file *m)
3112 seq_puts(m, "# _------=> CPU# \n"
3113 "# / _-----=> irqs-off \n"
3114 "# | / _----=> need-resched \n"
3115 "# || / _---=> hardirq/softirq \n"
3116 "# ||| / _--=> preempt-depth \n"
3118 "# cmd pid ||||| time | caller \n"
3119 "# \\ / ||||| \\ | / \n");
3122 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3124 unsigned long total;
3125 unsigned long entries;
3127 get_total_entries(buf, &total, &entries);
3128 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3129 entries, total, num_online_cpus());
3133 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3135 print_event_info(buf, m);
3136 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"
3140 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3142 print_event_info(buf, m);
3143 seq_puts(m, "# _-----=> irqs-off\n"
3144 "# / _----=> need-resched\n"
3145 "# | / _---=> hardirq/softirq\n"
3146 "# || / _--=> preempt-depth\n"
3148 "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
3149 "# | | | |||| | |\n");
3153 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3155 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3156 struct trace_buffer *buf = iter->trace_buffer;
3157 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3158 struct tracer *type = iter->trace;
3159 unsigned long entries;
3160 unsigned long total;
3161 const char *name = "preemption";
3165 get_total_entries(buf, &total, &entries);
3167 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3169 seq_puts(m, "# -----------------------------------"
3170 "---------------------------------\n");
3171 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3172 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3173 nsecs_to_usecs(data->saved_latency),
3177 #if defined(CONFIG_PREEMPT_NONE)
3179 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3181 #elif defined(CONFIG_PREEMPT)
3186 /* These are reserved for later use */
3189 seq_printf(m, " #P:%d)\n", num_online_cpus());
3193 seq_puts(m, "# -----------------\n");
3194 seq_printf(m, "# | task: %.16s-%d "
3195 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3196 data->comm, data->pid,
3197 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3198 data->policy, data->rt_priority);
3199 seq_puts(m, "# -----------------\n");
3201 if (data->critical_start) {
3202 seq_puts(m, "# => started at: ");
3203 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3204 trace_print_seq(m, &iter->seq);
3205 seq_puts(m, "\n# => ended at: ");
3206 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3207 trace_print_seq(m, &iter->seq);
3208 seq_puts(m, "\n#\n");
3214 static void test_cpu_buff_start(struct trace_iterator *iter)
3216 struct trace_seq *s = &iter->seq;
3217 struct trace_array *tr = iter->tr;
3219 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3222 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3225 if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3228 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3232 cpumask_set_cpu(iter->cpu, iter->started);
3234 /* Don't print started cpu buffer for the first entry of the trace */
3236 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3240 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3242 struct trace_array *tr = iter->tr;
3243 struct trace_seq *s = &iter->seq;
3244 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3245 struct trace_entry *entry;
3246 struct trace_event *event;
3250 test_cpu_buff_start(iter);
3252 event = ftrace_find_event(entry->type);
3254 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3255 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3256 trace_print_lat_context(iter);
3258 trace_print_context(iter);
3261 if (trace_seq_has_overflowed(s))
3262 return TRACE_TYPE_PARTIAL_LINE;
3265 return event->funcs->trace(iter, sym_flags, event);
3267 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3269 return trace_handle_return(s);
3272 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3274 struct trace_array *tr = iter->tr;
3275 struct trace_seq *s = &iter->seq;
3276 struct trace_entry *entry;
3277 struct trace_event *event;
3281 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3282 trace_seq_printf(s, "%d %d %llu ",
3283 entry->pid, iter->cpu, iter->ts);
3285 if (trace_seq_has_overflowed(s))
3286 return TRACE_TYPE_PARTIAL_LINE;
3288 event = ftrace_find_event(entry->type);
3290 return event->funcs->raw(iter, 0, event);
3292 trace_seq_printf(s, "%d ?\n", entry->type);
3294 return trace_handle_return(s);
3297 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3299 struct trace_array *tr = iter->tr;
3300 struct trace_seq *s = &iter->seq;
3301 unsigned char newline = '\n';
3302 struct trace_entry *entry;
3303 struct trace_event *event;
3307 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3308 SEQ_PUT_HEX_FIELD(s, entry->pid);
3309 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3310 SEQ_PUT_HEX_FIELD(s, iter->ts);
3311 if (trace_seq_has_overflowed(s))
3312 return TRACE_TYPE_PARTIAL_LINE;
3315 event = ftrace_find_event(entry->type);
3317 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3318 if (ret != TRACE_TYPE_HANDLED)
3322 SEQ_PUT_FIELD(s, newline);
3324 return trace_handle_return(s);
3327 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3329 struct trace_array *tr = iter->tr;
3330 struct trace_seq *s = &iter->seq;
3331 struct trace_entry *entry;
3332 struct trace_event *event;
3336 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3337 SEQ_PUT_FIELD(s, entry->pid);
3338 SEQ_PUT_FIELD(s, iter->cpu);
3339 SEQ_PUT_FIELD(s, iter->ts);
3340 if (trace_seq_has_overflowed(s))
3341 return TRACE_TYPE_PARTIAL_LINE;
3344 event = ftrace_find_event(entry->type);
3345 return event ? event->funcs->binary(iter, 0, event) :
3349 int trace_empty(struct trace_iterator *iter)
3351 struct ring_buffer_iter *buf_iter;
3354 /* If we are looking at one CPU buffer, only check that one */
3355 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3356 cpu = iter->cpu_file;
3357 buf_iter = trace_buffer_iter(iter, cpu);
3359 if (!ring_buffer_iter_empty(buf_iter))
3362 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3368 for_each_tracing_cpu(cpu) {
3369 buf_iter = trace_buffer_iter(iter, cpu);
3371 if (!ring_buffer_iter_empty(buf_iter))
3374 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3382 /* Called with trace_event_read_lock() held. */
3383 enum print_line_t print_trace_line(struct trace_iterator *iter)
3385 struct trace_array *tr = iter->tr;
3386 unsigned long trace_flags = tr->trace_flags;
3387 enum print_line_t ret;
3389 if (iter->lost_events) {
3390 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3391 iter->cpu, iter->lost_events);
3392 if (trace_seq_has_overflowed(&iter->seq))
3393 return TRACE_TYPE_PARTIAL_LINE;
3396 if (iter->trace && iter->trace->print_line) {
3397 ret = iter->trace->print_line(iter);
3398 if (ret != TRACE_TYPE_UNHANDLED)
3402 if (iter->ent->type == TRACE_BPUTS &&
3403 trace_flags & TRACE_ITER_PRINTK &&
3404 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3405 return trace_print_bputs_msg_only(iter);
3407 if (iter->ent->type == TRACE_BPRINT &&
3408 trace_flags & TRACE_ITER_PRINTK &&
3409 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3410 return trace_print_bprintk_msg_only(iter);
3412 if (iter->ent->type == TRACE_PRINT &&
3413 trace_flags & TRACE_ITER_PRINTK &&
3414 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3415 return trace_print_printk_msg_only(iter);
3417 if (trace_flags & TRACE_ITER_BIN)
3418 return print_bin_fmt(iter);
3420 if (trace_flags & TRACE_ITER_HEX)
3421 return print_hex_fmt(iter);
3423 if (trace_flags & TRACE_ITER_RAW)
3424 return print_raw_fmt(iter);
3426 return print_trace_fmt(iter);
3429 void trace_latency_header(struct seq_file *m)
3431 struct trace_iterator *iter = m->private;
3432 struct trace_array *tr = iter->tr;
3434 /* print nothing if the buffers are empty */
3435 if (trace_empty(iter))
3438 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3439 print_trace_header(m, iter);
3441 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3442 print_lat_help_header(m);
3445 void trace_default_header(struct seq_file *m)
3447 struct trace_iterator *iter = m->private;
3448 struct trace_array *tr = iter->tr;
3449 unsigned long trace_flags = tr->trace_flags;
3451 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3454 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3455 /* print nothing if the buffers are empty */
3456 if (trace_empty(iter))
3458 print_trace_header(m, iter);
3459 if (!(trace_flags & TRACE_ITER_VERBOSE))
3460 print_lat_help_header(m);
3462 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3463 if (trace_flags & TRACE_ITER_IRQ_INFO)
3464 print_func_help_header_irq(iter->trace_buffer, m);
3466 print_func_help_header(iter->trace_buffer, m);
3471 static void test_ftrace_alive(struct seq_file *m)
3473 if (!ftrace_is_dead())
3475 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3476 "# MAY BE MISSING FUNCTION EVENTS\n");
3479 #ifdef CONFIG_TRACER_MAX_TRACE
3480 static void show_snapshot_main_help(struct seq_file *m)
3482 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3483 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3484 "# Takes a snapshot of the main buffer.\n"
3485 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3486 "# (Doesn't have to be '2' works with any number that\n"
3487 "# is not a '0' or '1')\n");
3490 static void show_snapshot_percpu_help(struct seq_file *m)
3492 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3493 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3494 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3495 "# Takes a snapshot of the main buffer for this cpu.\n");
3497 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3498 "# Must use main snapshot file to allocate.\n");
3500 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3501 "# (Doesn't have to be '2' works with any number that\n"
3502 "# is not a '0' or '1')\n");
3505 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3507 if (iter->tr->allocated_snapshot)
3508 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3510 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3512 seq_puts(m, "# Snapshot commands:\n");
3513 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3514 show_snapshot_main_help(m);
3516 show_snapshot_percpu_help(m);
3519 /* Should never be called */
3520 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3523 static int s_show(struct seq_file *m, void *v)
3525 struct trace_iterator *iter = v;
3528 if (iter->ent == NULL) {
3530 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3532 test_ftrace_alive(m);
3534 if (iter->snapshot && trace_empty(iter))
3535 print_snapshot_help(m, iter);
3536 else if (iter->trace && iter->trace->print_header)
3537 iter->trace->print_header(m);
3539 trace_default_header(m);
3541 } else if (iter->leftover) {
3543 * If we filled the seq_file buffer earlier, we
3544 * want to just show it now.
3546 ret = trace_print_seq(m, &iter->seq);
3548 /* ret should this time be zero, but you never know */
3549 iter->leftover = ret;
3552 print_trace_line(iter);
3553 ret = trace_print_seq(m, &iter->seq);
3555 * If we overflow the seq_file buffer, then it will
3556 * ask us for this data again at start up.
3558 * ret is 0 if seq_file write succeeded.
3561 iter->leftover = ret;
3568 * Should be used after trace_array_get(), trace_types_lock
3569 * ensures that i_cdev was already initialized.
3571 static inline int tracing_get_cpu(struct inode *inode)
3573 if (inode->i_cdev) /* See trace_create_cpu_file() */
3574 return (long)inode->i_cdev - 1;
3575 return RING_BUFFER_ALL_CPUS;
3578 static const struct seq_operations tracer_seq_ops = {
3585 static struct trace_iterator *
3586 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3588 struct trace_array *tr = inode->i_private;
3589 struct trace_iterator *iter;
3592 if (tracing_disabled)
3593 return ERR_PTR(-ENODEV);
3595 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3597 return ERR_PTR(-ENOMEM);
3599 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3601 if (!iter->buffer_iter)
3605 * We make a copy of the current tracer to avoid concurrent
3606 * changes on it while we are reading.
3608 mutex_lock(&trace_types_lock);
3609 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3613 *iter->trace = *tr->current_trace;
3615 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3620 #ifdef CONFIG_TRACER_MAX_TRACE
3621 /* Currently only the top directory has a snapshot */
3622 if (tr->current_trace->print_max || snapshot)
3623 iter->trace_buffer = &tr->max_buffer;
3626 iter->trace_buffer = &tr->trace_buffer;
3627 iter->snapshot = snapshot;
3629 iter->cpu_file = tracing_get_cpu(inode);
3630 mutex_init(&iter->mutex);
3632 /* Notify the tracer early; before we stop tracing. */
3633 if (iter->trace && iter->trace->open)
3634 iter->trace->open(iter);
3636 /* Annotate start of buffers if we had overruns */
3637 if (ring_buffer_overruns(iter->trace_buffer->buffer))
3638 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3640 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3641 if (trace_clocks[tr->clock_id].in_ns)
3642 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3644 /* stop the trace while dumping if we are not opening "snapshot" */
3645 if (!iter->snapshot)
3646 tracing_stop_tr(tr);
3648 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3649 for_each_tracing_cpu(cpu) {
3650 iter->buffer_iter[cpu] =
3651 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3653 ring_buffer_read_prepare_sync();
3654 for_each_tracing_cpu(cpu) {
3655 ring_buffer_read_start(iter->buffer_iter[cpu]);
3656 tracing_iter_reset(iter, cpu);
3659 cpu = iter->cpu_file;
3660 iter->buffer_iter[cpu] =
3661 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3662 ring_buffer_read_prepare_sync();
3663 ring_buffer_read_start(iter->buffer_iter[cpu]);
3664 tracing_iter_reset(iter, cpu);
3667 mutex_unlock(&trace_types_lock);
3672 mutex_unlock(&trace_types_lock);
3674 kfree(iter->buffer_iter);
3676 seq_release_private(inode, file);
3677 return ERR_PTR(-ENOMEM);
3680 int tracing_open_generic(struct inode *inode, struct file *filp)
3682 if (tracing_disabled)
3685 filp->private_data = inode->i_private;
3689 bool tracing_is_disabled(void)
3691 return (tracing_disabled) ? true: false;
3695 * Open and update trace_array ref count.
3696 * Must have the current trace_array passed to it.
3698 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3700 struct trace_array *tr = inode->i_private;
3702 if (tracing_disabled)
3705 if (trace_array_get(tr) < 0)
3708 filp->private_data = inode->i_private;
3713 static int tracing_release(struct inode *inode, struct file *file)
3715 struct trace_array *tr = inode->i_private;
3716 struct seq_file *m = file->private_data;
3717 struct trace_iterator *iter;
3720 if (!(file->f_mode & FMODE_READ)) {
3721 trace_array_put(tr);
3725 /* Writes do not use seq_file */
3727 mutex_lock(&trace_types_lock);
3729 for_each_tracing_cpu(cpu) {
3730 if (iter->buffer_iter[cpu])
3731 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3734 if (iter->trace && iter->trace->close)
3735 iter->trace->close(iter);
3737 if (!iter->snapshot)
3738 /* reenable tracing if it was previously enabled */
3739 tracing_start_tr(tr);
3741 __trace_array_put(tr);
3743 mutex_unlock(&trace_types_lock);
3745 mutex_destroy(&iter->mutex);
3746 free_cpumask_var(iter->started);
3748 kfree(iter->buffer_iter);
3749 seq_release_private(inode, file);
3754 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3756 struct trace_array *tr = inode->i_private;
3758 trace_array_put(tr);
3762 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3764 struct trace_array *tr = inode->i_private;
3766 trace_array_put(tr);
3768 return single_release(inode, file);
3771 static int tracing_open(struct inode *inode, struct file *file)
3773 struct trace_array *tr = inode->i_private;
3774 struct trace_iterator *iter;
3777 if (trace_array_get(tr) < 0)
3780 /* If this file was open for write, then erase contents */
3781 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3782 int cpu = tracing_get_cpu(inode);
3784 if (cpu == RING_BUFFER_ALL_CPUS)
3785 tracing_reset_online_cpus(&tr->trace_buffer);
3787 tracing_reset(&tr->trace_buffer, cpu);
3790 if (file->f_mode & FMODE_READ) {
3791 iter = __tracing_open(inode, file, false);
3793 ret = PTR_ERR(iter);
3794 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3795 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3799 trace_array_put(tr);
3805 * Some tracers are not suitable for instance buffers.
3806 * A tracer is always available for the global array (toplevel)
3807 * or if it explicitly states that it is.
3810 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3812 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3815 /* Find the next tracer that this trace array may use */
3816 static struct tracer *
3817 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3819 while (t && !trace_ok_for_array(t, tr))
3826 t_next(struct seq_file *m, void *v, loff_t *pos)
3828 struct trace_array *tr = m->private;
3829 struct tracer *t = v;
3834 t = get_tracer_for_array(tr, t->next);
3839 static void *t_start(struct seq_file *m, loff_t *pos)
3841 struct trace_array *tr = m->private;
3845 mutex_lock(&trace_types_lock);
3847 t = get_tracer_for_array(tr, trace_types);
3848 for (; t && l < *pos; t = t_next(m, t, &l))
3854 static void t_stop(struct seq_file *m, void *p)
3856 mutex_unlock(&trace_types_lock);
3859 static int t_show(struct seq_file *m, void *v)
3861 struct tracer *t = v;
3866 seq_puts(m, t->name);
3875 static const struct seq_operations show_traces_seq_ops = {
3882 static int show_traces_open(struct inode *inode, struct file *file)
3884 struct trace_array *tr = inode->i_private;
3888 if (tracing_disabled)
3891 ret = seq_open(file, &show_traces_seq_ops);
3895 m = file->private_data;
3902 tracing_write_stub(struct file *filp, const char __user *ubuf,
3903 size_t count, loff_t *ppos)
3908 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3912 if (file->f_mode & FMODE_READ)
3913 ret = seq_lseek(file, offset, whence);
3915 file->f_pos = ret = 0;
3920 static const struct file_operations tracing_fops = {
3921 .open = tracing_open,
3923 .write = tracing_write_stub,
3924 .llseek = tracing_lseek,
3925 .release = tracing_release,
3928 static const struct file_operations show_traces_fops = {
3929 .open = show_traces_open,
3931 .release = seq_release,
3932 .llseek = seq_lseek,
3936 * The tracer itself will not take this lock, but still we want
3937 * to provide a consistent cpumask to user-space:
3939 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3942 * Temporary storage for the character representation of the
3943 * CPU bitmask (and one more byte for the newline):
3945 static char mask_str[NR_CPUS + 1];
3948 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3949 size_t count, loff_t *ppos)
3951 struct trace_array *tr = file_inode(filp)->i_private;
3954 mutex_lock(&tracing_cpumask_update_lock);
3956 len = snprintf(mask_str, count, "%*pb\n",
3957 cpumask_pr_args(tr->tracing_cpumask));
3962 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3965 mutex_unlock(&tracing_cpumask_update_lock);
3971 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3972 size_t count, loff_t *ppos)
3974 struct trace_array *tr = file_inode(filp)->i_private;
3975 cpumask_var_t tracing_cpumask_new;
3978 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3981 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3985 mutex_lock(&tracing_cpumask_update_lock);
3987 local_irq_disable();
3988 arch_spin_lock(&tr->max_lock);
3989 for_each_tracing_cpu(cpu) {
3991 * Increase/decrease the disabled counter if we are
3992 * about to flip a bit in the cpumask:
3994 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3995 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3996 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3997 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3999 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4000 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4001 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4002 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4005 arch_spin_unlock(&tr->max_lock);
4008 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4010 mutex_unlock(&tracing_cpumask_update_lock);
4011 free_cpumask_var(tracing_cpumask_new);
4016 free_cpumask_var(tracing_cpumask_new);
4021 static const struct file_operations tracing_cpumask_fops = {
4022 .open = tracing_open_generic_tr,
4023 .read = tracing_cpumask_read,
4024 .write = tracing_cpumask_write,
4025 .release = tracing_release_generic_tr,
4026 .llseek = generic_file_llseek,
4029 static int tracing_trace_options_show(struct seq_file *m, void *v)
4031 struct tracer_opt *trace_opts;
4032 struct trace_array *tr = m->private;
4036 mutex_lock(&trace_types_lock);
4037 tracer_flags = tr->current_trace->flags->val;
4038 trace_opts = tr->current_trace->flags->opts;
4040 for (i = 0; trace_options[i]; i++) {
4041 if (tr->trace_flags & (1 << i))
4042 seq_printf(m, "%s\n", trace_options[i]);
4044 seq_printf(m, "no%s\n", trace_options[i]);
4047 for (i = 0; trace_opts[i].name; i++) {
4048 if (tracer_flags & trace_opts[i].bit)
4049 seq_printf(m, "%s\n", trace_opts[i].name);
4051 seq_printf(m, "no%s\n", trace_opts[i].name);
4053 mutex_unlock(&trace_types_lock);
4058 static int __set_tracer_option(struct trace_array *tr,
4059 struct tracer_flags *tracer_flags,
4060 struct tracer_opt *opts, int neg)
4062 struct tracer *trace = tracer_flags->trace;
4065 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4070 tracer_flags->val &= ~opts->bit;
4072 tracer_flags->val |= opts->bit;
4076 /* Try to assign a tracer specific option */
4077 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4079 struct tracer *trace = tr->current_trace;
4080 struct tracer_flags *tracer_flags = trace->flags;
4081 struct tracer_opt *opts = NULL;
4084 for (i = 0; tracer_flags->opts[i].name; i++) {
4085 opts = &tracer_flags->opts[i];
4087 if (strcmp(cmp, opts->name) == 0)
4088 return __set_tracer_option(tr, trace->flags, opts, neg);
4094 /* Some tracers require overwrite to stay enabled */
4095 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4097 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4103 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4105 /* do nothing if flag is already set */
4106 if (!!(tr->trace_flags & mask) == !!enabled)
4109 /* Give the tracer a chance to approve the change */
4110 if (tr->current_trace->flag_changed)
4111 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4115 tr->trace_flags |= mask;
4117 tr->trace_flags &= ~mask;
4119 if (mask == TRACE_ITER_RECORD_CMD)
4120 trace_event_enable_cmd_record(enabled);
4122 if (mask == TRACE_ITER_EVENT_FORK)
4123 trace_event_follow_fork(tr, enabled);
4125 if (mask == TRACE_ITER_OVERWRITE) {
4126 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4127 #ifdef CONFIG_TRACER_MAX_TRACE
4128 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4132 if (mask == TRACE_ITER_PRINTK) {
4133 trace_printk_start_stop_comm(enabled);
4134 trace_printk_control(enabled);
4140 static int trace_set_options(struct trace_array *tr, char *option)
4146 size_t orig_len = strlen(option);
4148 cmp = strstrip(option);
4150 if (strncmp(cmp, "no", 2) == 0) {
4155 mutex_lock(&trace_types_lock);
4157 for (i = 0; trace_options[i]; i++) {
4158 if (strcmp(cmp, trace_options[i]) == 0) {
4159 ret = set_tracer_flag(tr, 1 << i, !neg);
4164 /* If no option could be set, test the specific tracer options */
4165 if (!trace_options[i])
4166 ret = set_tracer_option(tr, cmp, neg);
4168 mutex_unlock(&trace_types_lock);
4171 * If the first trailing whitespace is replaced with '\0' by strstrip,
4172 * turn it back into a space.
4174 if (orig_len > strlen(option))
4175 option[strlen(option)] = ' ';
4180 static void __init apply_trace_boot_options(void)
4182 char *buf = trace_boot_options_buf;
4186 option = strsep(&buf, ",");
4192 trace_set_options(&global_trace, option);
4194 /* Put back the comma to allow this to be called again */
4201 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4202 size_t cnt, loff_t *ppos)
4204 struct seq_file *m = filp->private_data;
4205 struct trace_array *tr = m->private;
4209 if (cnt >= sizeof(buf))
4212 if (copy_from_user(buf, ubuf, cnt))
4217 ret = trace_set_options(tr, buf);
4226 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4228 struct trace_array *tr = inode->i_private;
4231 if (tracing_disabled)
4234 if (trace_array_get(tr) < 0)
4237 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4239 trace_array_put(tr);
4244 static const struct file_operations tracing_iter_fops = {
4245 .open = tracing_trace_options_open,
4247 .llseek = seq_lseek,
4248 .release = tracing_single_release_tr,
4249 .write = tracing_trace_options_write,
4252 static const char readme_msg[] =
4253 "tracing mini-HOWTO:\n\n"
4254 "# echo 0 > tracing_on : quick way to disable tracing\n"
4255 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4256 " Important files:\n"
4257 " trace\t\t\t- The static contents of the buffer\n"
4258 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4259 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4260 " current_tracer\t- function and latency tracers\n"
4261 " available_tracers\t- list of configured tracers for current_tracer\n"
4262 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4263 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4264 " trace_clock\t\t-change the clock used to order events\n"
4265 " local: Per cpu clock but may not be synced across CPUs\n"
4266 " global: Synced across CPUs but slows tracing down.\n"
4267 " counter: Not a clock, but just an increment\n"
4268 " uptime: Jiffy counter from time of boot\n"
4269 " perf: Same clock that perf events use\n"
4270 #ifdef CONFIG_X86_64
4271 " x86-tsc: TSC cycle counter\n"
4273 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4274 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4275 " tracing_cpumask\t- Limit which CPUs to trace\n"
4276 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4277 "\t\t\t Remove sub-buffer with rmdir\n"
4278 " trace_options\t\t- Set format or modify how tracing happens\n"
4279 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
4280 "\t\t\t option name\n"
4281 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4282 #ifdef CONFIG_DYNAMIC_FTRACE
4283 "\n available_filter_functions - list of functions that can be filtered on\n"
4284 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4285 "\t\t\t functions\n"
4286 "\t accepts: func_full_name or glob-matching-pattern\n"
4287 "\t modules: Can select a group via module\n"
4288 "\t Format: :mod:<module-name>\n"
4289 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4290 "\t triggers: a command to perform when function is hit\n"
4291 "\t Format: <function>:<trigger>[:count]\n"
4292 "\t trigger: traceon, traceoff\n"
4293 "\t\t enable_event:<system>:<event>\n"
4294 "\t\t disable_event:<system>:<event>\n"
4295 #ifdef CONFIG_STACKTRACE
4298 #ifdef CONFIG_TRACER_SNAPSHOT
4303 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4304 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4305 "\t The first one will disable tracing every time do_fault is hit\n"
4306 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4307 "\t The first time do trap is hit and it disables tracing, the\n"
4308 "\t counter will decrement to 2. If tracing is already disabled,\n"
4309 "\t the counter will not decrement. It only decrements when the\n"
4310 "\t trigger did work\n"
4311 "\t To remove trigger without count:\n"
4312 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4313 "\t To remove trigger with a count:\n"
4314 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4315 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4316 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4317 "\t modules: Can select a group via module command :mod:\n"
4318 "\t Does not accept triggers\n"
4319 #endif /* CONFIG_DYNAMIC_FTRACE */
4320 #ifdef CONFIG_FUNCTION_TRACER
4321 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4324 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4325 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4326 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4327 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4329 #ifdef CONFIG_TRACER_SNAPSHOT
4330 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4331 "\t\t\t snapshot buffer. Read the contents for more\n"
4332 "\t\t\t information\n"
4334 #ifdef CONFIG_STACK_TRACER
4335 " stack_trace\t\t- Shows the max stack trace when active\n"
4336 " stack_max_size\t- Shows current max stack size that was traced\n"
4337 "\t\t\t Write into this file to reset the max size (trigger a\n"
4338 "\t\t\t new trace)\n"
4339 #ifdef CONFIG_DYNAMIC_FTRACE
4340 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4343 #endif /* CONFIG_STACK_TRACER */
4344 #ifdef CONFIG_KPROBE_EVENTS
4345 " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4346 "\t\t\t Write into this file to define/undefine new trace events.\n"
4348 #ifdef CONFIG_UPROBE_EVENTS
4349 " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4350 "\t\t\t Write into this file to define/undefine new trace events.\n"
4352 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4353 "\t accepts: event-definitions (one definition per line)\n"
4354 "\t Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4355 "\t -:[<group>/]<event>\n"
4356 #ifdef CONFIG_KPROBE_EVENTS
4357 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4358 "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4360 #ifdef CONFIG_UPROBE_EVENTS
4361 "\t place: <path>:<offset>\n"
4363 "\t args: <name>=fetcharg[:type]\n"
4364 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4365 "\t $stack<index>, $stack, $retval, $comm\n"
4366 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4367 "\t b<bit-width>@<bit-offset>/<container-size>\n"
4369 " events/\t\t- Directory containing all trace event subsystems:\n"
4370 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4371 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4372 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4374 " filter\t\t- If set, only events passing filter are traced\n"
4375 " events/<system>/<event>/\t- Directory containing control files for\n"
4377 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4378 " filter\t\t- If set, only events passing filter are traced\n"
4379 " trigger\t\t- If set, a command to perform when event is hit\n"
4380 "\t Format: <trigger>[:count][if <filter>]\n"
4381 "\t trigger: traceon, traceoff\n"
4382 "\t enable_event:<system>:<event>\n"
4383 "\t disable_event:<system>:<event>\n"
4384 #ifdef CONFIG_HIST_TRIGGERS
4385 "\t enable_hist:<system>:<event>\n"
4386 "\t disable_hist:<system>:<event>\n"
4388 #ifdef CONFIG_STACKTRACE
4391 #ifdef CONFIG_TRACER_SNAPSHOT
4394 #ifdef CONFIG_HIST_TRIGGERS
4395 "\t\t hist (see below)\n"
4397 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4398 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4399 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4400 "\t events/block/block_unplug/trigger\n"
4401 "\t The first disables tracing every time block_unplug is hit.\n"
4402 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4403 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4404 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4405 "\t Like function triggers, the counter is only decremented if it\n"
4406 "\t enabled or disabled tracing.\n"
4407 "\t To remove a trigger without a count:\n"
4408 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4409 "\t To remove a trigger with a count:\n"
4410 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4411 "\t Filters can be ignored when removing a trigger.\n"
4412 #ifdef CONFIG_HIST_TRIGGERS
4413 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4414 "\t Format: hist:keys=<field1[,field2,...]>\n"
4415 "\t [:values=<field1[,field2,...]>]\n"
4416 "\t [:sort=<field1[,field2,...]>]\n"
4417 "\t [:size=#entries]\n"
4418 "\t [:pause][:continue][:clear]\n"
4419 "\t [:name=histname1]\n"
4420 "\t [if <filter>]\n\n"
4421 "\t When a matching event is hit, an entry is added to a hash\n"
4422 "\t table using the key(s) and value(s) named, and the value of a\n"
4423 "\t sum called 'hitcount' is incremented. Keys and values\n"
4424 "\t correspond to fields in the event's format description. Keys\n"
4425 "\t can be any field, or the special string 'stacktrace'.\n"
4426 "\t Compound keys consisting of up to two fields can be specified\n"
4427 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4428 "\t fields. Sort keys consisting of up to two fields can be\n"
4429 "\t specified using the 'sort' keyword. The sort direction can\n"
4430 "\t be modified by appending '.descending' or '.ascending' to a\n"
4431 "\t sort field. The 'size' parameter can be used to specify more\n"
4432 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4433 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4434 "\t its histogram data will be shared with other triggers of the\n"
4435 "\t same name, and trigger hits will update this common data.\n\n"
4436 "\t Reading the 'hist' file for the event will dump the hash\n"
4437 "\t table in its entirety to stdout. If there are multiple hist\n"
4438 "\t triggers attached to an event, there will be a table for each\n"
4439 "\t trigger in the output. The table displayed for a named\n"
4440 "\t trigger will be the same as any other instance having the\n"
4441 "\t same name. The default format used to display a given field\n"
4442 "\t can be modified by appending any of the following modifiers\n"
4443 "\t to the field name, as applicable:\n\n"
4444 "\t .hex display a number as a hex value\n"
4445 "\t .sym display an address as a symbol\n"
4446 "\t .sym-offset display an address as a symbol and offset\n"
4447 "\t .execname display a common_pid as a program name\n"
4448 "\t .syscall display a syscall id as a syscall name\n\n"
4449 "\t .log2 display log2 value rather than raw number\n\n"
4450 "\t The 'pause' parameter can be used to pause an existing hist\n"
4451 "\t trigger or to start a hist trigger but not log any events\n"
4452 "\t until told to do so. 'continue' can be used to start or\n"
4453 "\t restart a paused hist trigger.\n\n"
4454 "\t The 'clear' parameter will clear the contents of a running\n"
4455 "\t hist trigger and leave its current paused/active state\n"
4457 "\t The enable_hist and disable_hist triggers can be used to\n"
4458 "\t have one event conditionally start and stop another event's\n"
4459 "\t already-attached hist trigger. The syntax is analagous to\n"
4460 "\t the enable_event and disable_event triggers.\n"
4465 tracing_readme_read(struct file *filp, char __user *ubuf,
4466 size_t cnt, loff_t *ppos)
4468 return simple_read_from_buffer(ubuf, cnt, ppos,
4469 readme_msg, strlen(readme_msg));
4472 static const struct file_operations tracing_readme_fops = {
4473 .open = tracing_open_generic,
4474 .read = tracing_readme_read,
4475 .llseek = generic_file_llseek,
4478 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4480 unsigned int *ptr = v;
4482 if (*pos || m->count)
4487 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4489 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4498 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4504 arch_spin_lock(&trace_cmdline_lock);
4506 v = &savedcmd->map_cmdline_to_pid[0];
4508 v = saved_cmdlines_next(m, v, &l);
4516 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4518 arch_spin_unlock(&trace_cmdline_lock);
4522 static int saved_cmdlines_show(struct seq_file *m, void *v)
4524 char buf[TASK_COMM_LEN];
4525 unsigned int *pid = v;
4527 __trace_find_cmdline(*pid, buf);
4528 seq_printf(m, "%d %s\n", *pid, buf);
4532 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4533 .start = saved_cmdlines_start,
4534 .next = saved_cmdlines_next,
4535 .stop = saved_cmdlines_stop,
4536 .show = saved_cmdlines_show,
4539 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4541 if (tracing_disabled)
4544 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4547 static const struct file_operations tracing_saved_cmdlines_fops = {
4548 .open = tracing_saved_cmdlines_open,
4550 .llseek = seq_lseek,
4551 .release = seq_release,
4555 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4556 size_t cnt, loff_t *ppos)
4561 arch_spin_lock(&trace_cmdline_lock);
4562 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4563 arch_spin_unlock(&trace_cmdline_lock);
4565 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4568 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4570 kfree(s->saved_cmdlines);
4571 kfree(s->map_cmdline_to_pid);
4575 static int tracing_resize_saved_cmdlines(unsigned int val)
4577 struct saved_cmdlines_buffer *s, *savedcmd_temp;
4579 s = kmalloc(sizeof(*s), GFP_KERNEL);
4583 if (allocate_cmdlines_buffer(val, s) < 0) {
4588 arch_spin_lock(&trace_cmdline_lock);
4589 savedcmd_temp = savedcmd;
4591 arch_spin_unlock(&trace_cmdline_lock);
4592 free_saved_cmdlines_buffer(savedcmd_temp);
4598 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4599 size_t cnt, loff_t *ppos)
4604 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4608 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4609 if (!val || val > PID_MAX_DEFAULT)
4612 ret = tracing_resize_saved_cmdlines((unsigned int)val);
4621 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4622 .open = tracing_open_generic,
4623 .read = tracing_saved_cmdlines_size_read,
4624 .write = tracing_saved_cmdlines_size_write,
4627 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4628 static union trace_enum_map_item *
4629 update_enum_map(union trace_enum_map_item *ptr)
4631 if (!ptr->map.enum_string) {
4632 if (ptr->tail.next) {
4633 ptr = ptr->tail.next;
4634 /* Set ptr to the next real item (skip head) */
4642 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4644 union trace_enum_map_item *ptr = v;
4647 * Paranoid! If ptr points to end, we don't want to increment past it.
4648 * This really should never happen.
4650 ptr = update_enum_map(ptr);
4651 if (WARN_ON_ONCE(!ptr))
4658 ptr = update_enum_map(ptr);
4663 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4665 union trace_enum_map_item *v;
4668 mutex_lock(&trace_enum_mutex);
4670 v = trace_enum_maps;
4674 while (v && l < *pos) {
4675 v = enum_map_next(m, v, &l);
4681 static void enum_map_stop(struct seq_file *m, void *v)
4683 mutex_unlock(&trace_enum_mutex);
4686 static int enum_map_show(struct seq_file *m, void *v)
4688 union trace_enum_map_item *ptr = v;
4690 seq_printf(m, "%s %ld (%s)\n",
4691 ptr->map.enum_string, ptr->map.enum_value,
4697 static const struct seq_operations tracing_enum_map_seq_ops = {
4698 .start = enum_map_start,
4699 .next = enum_map_next,
4700 .stop = enum_map_stop,
4701 .show = enum_map_show,
4704 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4706 if (tracing_disabled)
4709 return seq_open(filp, &tracing_enum_map_seq_ops);
4712 static const struct file_operations tracing_enum_map_fops = {
4713 .open = tracing_enum_map_open,
4715 .llseek = seq_lseek,
4716 .release = seq_release,
4719 static inline union trace_enum_map_item *
4720 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4722 /* Return tail of array given the head */
4723 return ptr + ptr->head.length + 1;
4727 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4730 struct trace_enum_map **stop;
4731 struct trace_enum_map **map;
4732 union trace_enum_map_item *map_array;
4733 union trace_enum_map_item *ptr;
4738 * The trace_enum_maps contains the map plus a head and tail item,
4739 * where the head holds the module and length of array, and the
4740 * tail holds a pointer to the next list.
4742 map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4744 pr_warn("Unable to allocate trace enum mapping\n");
4748 mutex_lock(&trace_enum_mutex);
4750 if (!trace_enum_maps)
4751 trace_enum_maps = map_array;
4753 ptr = trace_enum_maps;
4755 ptr = trace_enum_jmp_to_tail(ptr);
4756 if (!ptr->tail.next)
4758 ptr = ptr->tail.next;
4761 ptr->tail.next = map_array;
4763 map_array->head.mod = mod;
4764 map_array->head.length = len;
4767 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4768 map_array->map = **map;
4771 memset(map_array, 0, sizeof(*map_array));
4773 mutex_unlock(&trace_enum_mutex);
4776 static void trace_create_enum_file(struct dentry *d_tracer)
4778 trace_create_file("enum_map", 0444, d_tracer,
4779 NULL, &tracing_enum_map_fops);
4782 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4783 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4784 static inline void trace_insert_enum_map_file(struct module *mod,
4785 struct trace_enum_map **start, int len) { }
4786 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4788 static void trace_insert_enum_map(struct module *mod,
4789 struct trace_enum_map **start, int len)
4791 struct trace_enum_map **map;
4798 trace_event_enum_update(map, len);
4800 trace_insert_enum_map_file(mod, start, len);
4804 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4805 size_t cnt, loff_t *ppos)
4807 struct trace_array *tr = filp->private_data;
4808 char buf[MAX_TRACER_SIZE+2];
4811 mutex_lock(&trace_types_lock);
4812 r = sprintf(buf, "%s\n", tr->current_trace->name);
4813 mutex_unlock(&trace_types_lock);
4815 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4818 int tracer_init(struct tracer *t, struct trace_array *tr)
4820 tracing_reset_online_cpus(&tr->trace_buffer);
4824 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4828 for_each_tracing_cpu(cpu)
4829 per_cpu_ptr(buf->data, cpu)->entries = val;
4832 #ifdef CONFIG_TRACER_MAX_TRACE
4833 /* resize @tr's buffer to the size of @size_tr's entries */
4834 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4835 struct trace_buffer *size_buf, int cpu_id)
4839 if (cpu_id == RING_BUFFER_ALL_CPUS) {
4840 for_each_tracing_cpu(cpu) {
4841 ret = ring_buffer_resize(trace_buf->buffer,
4842 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4845 per_cpu_ptr(trace_buf->data, cpu)->entries =
4846 per_cpu_ptr(size_buf->data, cpu)->entries;
4849 ret = ring_buffer_resize(trace_buf->buffer,
4850 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4852 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4853 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4858 #endif /* CONFIG_TRACER_MAX_TRACE */
4860 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4861 unsigned long size, int cpu)
4866 * If kernel or user changes the size of the ring buffer
4867 * we use the size that was given, and we can forget about
4868 * expanding it later.
4870 ring_buffer_expanded = true;
4872 /* May be called before buffers are initialized */
4873 if (!tr->trace_buffer.buffer)
4876 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4880 #ifdef CONFIG_TRACER_MAX_TRACE
4881 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4882 !tr->current_trace->use_max_tr)
4885 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4887 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4888 &tr->trace_buffer, cpu);
4891 * AARGH! We are left with different
4892 * size max buffer!!!!
4893 * The max buffer is our "snapshot" buffer.
4894 * When a tracer needs a snapshot (one of the
4895 * latency tracers), it swaps the max buffer
4896 * with the saved snap shot. We succeeded to
4897 * update the size of the main buffer, but failed to
4898 * update the size of the max buffer. But when we tried
4899 * to reset the main buffer to the original size, we
4900 * failed there too. This is very unlikely to
4901 * happen, but if it does, warn and kill all
4905 tracing_disabled = 1;
4910 if (cpu == RING_BUFFER_ALL_CPUS)
4911 set_buffer_entries(&tr->max_buffer, size);
4913 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4916 #endif /* CONFIG_TRACER_MAX_TRACE */
4918 if (cpu == RING_BUFFER_ALL_CPUS)
4919 set_buffer_entries(&tr->trace_buffer, size);
4921 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4926 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4927 unsigned long size, int cpu_id)
4931 mutex_lock(&trace_types_lock);
4933 if (cpu_id != RING_BUFFER_ALL_CPUS) {
4934 /* make sure, this cpu is enabled in the mask */
4935 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4941 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4946 mutex_unlock(&trace_types_lock);
4953 * tracing_update_buffers - used by tracing facility to expand ring buffers
4955 * To save on memory when the tracing is never used on a system with it
4956 * configured in. The ring buffers are set to a minimum size. But once
4957 * a user starts to use the tracing facility, then they need to grow
4958 * to their default size.
4960 * This function is to be called when a tracer is about to be used.
4962 int tracing_update_buffers(void)
4966 mutex_lock(&trace_types_lock);
4967 if (!ring_buffer_expanded)
4968 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4969 RING_BUFFER_ALL_CPUS);
4970 mutex_unlock(&trace_types_lock);
4975 struct trace_option_dentry;
4978 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4981 * Used to clear out the tracer before deletion of an instance.
4982 * Must have trace_types_lock held.
4984 static void tracing_set_nop(struct trace_array *tr)
4986 if (tr->current_trace == &nop_trace)
4989 tr->current_trace->enabled--;
4991 if (tr->current_trace->reset)
4992 tr->current_trace->reset(tr);
4994 tr->current_trace = &nop_trace;
4997 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4999 /* Only enable if the directory has been created already. */
5003 create_trace_option_files(tr, t);
5006 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5009 #ifdef CONFIG_TRACER_MAX_TRACE
5014 mutex_lock(&trace_types_lock);
5016 if (!ring_buffer_expanded) {
5017 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5018 RING_BUFFER_ALL_CPUS);
5024 for (t = trace_types; t; t = t->next) {
5025 if (strcmp(t->name, buf) == 0)
5032 if (t == tr->current_trace)
5035 /* Some tracers are only allowed for the top level buffer */
5036 if (!trace_ok_for_array(t, tr)) {
5041 /* If trace pipe files are being read, we can't change the tracer */
5042 if (tr->current_trace->ref) {
5047 trace_branch_disable();
5049 tr->current_trace->enabled--;
5051 if (tr->current_trace->reset)
5052 tr->current_trace->reset(tr);
5054 /* Current trace needs to be nop_trace before synchronize_sched */
5055 tr->current_trace = &nop_trace;
5057 #ifdef CONFIG_TRACER_MAX_TRACE
5058 had_max_tr = tr->allocated_snapshot;
5060 if (had_max_tr && !t->use_max_tr) {
5062 * We need to make sure that the update_max_tr sees that
5063 * current_trace changed to nop_trace to keep it from
5064 * swapping the buffers after we resize it.
5065 * The update_max_tr is called from interrupts disabled
5066 * so a synchronized_sched() is sufficient.
5068 synchronize_sched();
5073 #ifdef CONFIG_TRACER_MAX_TRACE
5074 if (t->use_max_tr && !had_max_tr) {
5075 ret = alloc_snapshot(tr);
5082 ret = tracer_init(t, tr);
5087 tr->current_trace = t;
5088 tr->current_trace->enabled++;
5089 trace_branch_enable(tr);
5091 mutex_unlock(&trace_types_lock);
5097 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5098 size_t cnt, loff_t *ppos)
5100 struct trace_array *tr = filp->private_data;
5101 char buf[MAX_TRACER_SIZE+1];
5108 if (cnt > MAX_TRACER_SIZE)
5109 cnt = MAX_TRACER_SIZE;
5111 if (copy_from_user(buf, ubuf, cnt))
5116 /* strip ending whitespace. */
5117 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5120 err = tracing_set_tracer(tr, buf);
5130 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5131 size_t cnt, loff_t *ppos)
5136 r = snprintf(buf, sizeof(buf), "%ld\n",
5137 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5138 if (r > sizeof(buf))
5140 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5144 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5145 size_t cnt, loff_t *ppos)
5150 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5160 tracing_thresh_read(struct file *filp, char __user *ubuf,
5161 size_t cnt, loff_t *ppos)
5163 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5167 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5168 size_t cnt, loff_t *ppos)
5170 struct trace_array *tr = filp->private_data;
5173 mutex_lock(&trace_types_lock);
5174 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5178 if (tr->current_trace->update_thresh) {
5179 ret = tr->current_trace->update_thresh(tr);
5186 mutex_unlock(&trace_types_lock);
5191 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5194 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5195 size_t cnt, loff_t *ppos)
5197 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5201 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5202 size_t cnt, loff_t *ppos)
5204 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5209 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5211 struct trace_array *tr = inode->i_private;
5212 struct trace_iterator *iter;
5215 if (tracing_disabled)
5218 if (trace_array_get(tr) < 0)
5221 mutex_lock(&trace_types_lock);
5223 /* create a buffer to store the information to pass to userspace */
5224 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5227 __trace_array_put(tr);
5231 trace_seq_init(&iter->seq);
5232 iter->trace = tr->current_trace;
5234 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5239 /* trace pipe does not show start of buffer */
5240 cpumask_setall(iter->started);
5242 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5243 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5245 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5246 if (trace_clocks[tr->clock_id].in_ns)
5247 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5250 iter->trace_buffer = &tr->trace_buffer;
5251 iter->cpu_file = tracing_get_cpu(inode);
5252 mutex_init(&iter->mutex);
5253 filp->private_data = iter;
5255 if (iter->trace->pipe_open)
5256 iter->trace->pipe_open(iter);
5258 nonseekable_open(inode, filp);
5260 tr->current_trace->ref++;
5262 mutex_unlock(&trace_types_lock);
5268 __trace_array_put(tr);
5269 mutex_unlock(&trace_types_lock);
5273 static int tracing_release_pipe(struct inode *inode, struct file *file)
5275 struct trace_iterator *iter = file->private_data;
5276 struct trace_array *tr = inode->i_private;
5278 mutex_lock(&trace_types_lock);
5280 tr->current_trace->ref--;
5282 if (iter->trace->pipe_close)
5283 iter->trace->pipe_close(iter);
5285 mutex_unlock(&trace_types_lock);
5287 free_cpumask_var(iter->started);
5288 mutex_destroy(&iter->mutex);
5291 trace_array_put(tr);
5297 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5299 struct trace_array *tr = iter->tr;
5301 /* Iterators are static, they should be filled or empty */
5302 if (trace_buffer_iter(iter, iter->cpu_file))
5303 return POLLIN | POLLRDNORM;
5305 if (tr->trace_flags & TRACE_ITER_BLOCK)
5307 * Always select as readable when in blocking mode
5309 return POLLIN | POLLRDNORM;
5311 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5316 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5318 struct trace_iterator *iter = filp->private_data;
5320 return trace_poll(iter, filp, poll_table);
5323 /* Must be called with iter->mutex held. */
5324 static int tracing_wait_pipe(struct file *filp)
5326 struct trace_iterator *iter = filp->private_data;
5329 while (trace_empty(iter)) {
5331 if ((filp->f_flags & O_NONBLOCK)) {
5336 * We block until we read something and tracing is disabled.
5337 * We still block if tracing is disabled, but we have never
5338 * read anything. This allows a user to cat this file, and
5339 * then enable tracing. But after we have read something,
5340 * we give an EOF when tracing is again disabled.
5342 * iter->pos will be 0 if we haven't read anything.
5344 if (!tracing_is_on() && iter->pos)
5347 mutex_unlock(&iter->mutex);
5349 ret = wait_on_pipe(iter, false);
5351 mutex_lock(&iter->mutex);
5364 tracing_read_pipe(struct file *filp, char __user *ubuf,
5365 size_t cnt, loff_t *ppos)
5367 struct trace_iterator *iter = filp->private_data;
5371 * Avoid more than one consumer on a single file descriptor
5372 * This is just a matter of traces coherency, the ring buffer itself
5375 mutex_lock(&iter->mutex);
5377 /* return any leftover data */
5378 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5382 trace_seq_init(&iter->seq);
5384 if (iter->trace->read) {
5385 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5391 sret = tracing_wait_pipe(filp);
5395 /* stop when tracing is finished */
5396 if (trace_empty(iter)) {
5401 if (cnt >= PAGE_SIZE)
5402 cnt = PAGE_SIZE - 1;
5404 /* reset all but tr, trace, and overruns */
5405 memset(&iter->seq, 0,
5406 sizeof(struct trace_iterator) -
5407 offsetof(struct trace_iterator, seq));
5408 cpumask_clear(iter->started);
5411 trace_event_read_lock();
5412 trace_access_lock(iter->cpu_file);
5413 while (trace_find_next_entry_inc(iter) != NULL) {
5414 enum print_line_t ret;
5415 int save_len = iter->seq.seq.len;
5417 ret = print_trace_line(iter);
5418 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5419 /* don't print partial lines */
5420 iter->seq.seq.len = save_len;
5423 if (ret != TRACE_TYPE_NO_CONSUME)
5424 trace_consume(iter);
5426 if (trace_seq_used(&iter->seq) >= cnt)
5430 * Setting the full flag means we reached the trace_seq buffer
5431 * size and we should leave by partial output condition above.
5432 * One of the trace_seq_* functions is not used properly.
5434 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5437 trace_access_unlock(iter->cpu_file);
5438 trace_event_read_unlock();
5440 /* Now copy what we have to the user */
5441 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5442 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5443 trace_seq_init(&iter->seq);
5446 * If there was nothing to send to user, in spite of consuming trace
5447 * entries, go back to wait for more entries.
5453 mutex_unlock(&iter->mutex);
5458 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5461 __free_page(spd->pages[idx]);
5464 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5466 .confirm = generic_pipe_buf_confirm,
5467 .release = generic_pipe_buf_release,
5468 .steal = generic_pipe_buf_steal,
5469 .get = generic_pipe_buf_get,
5473 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5479 /* Seq buffer is page-sized, exactly what we need. */
5481 save_len = iter->seq.seq.len;
5482 ret = print_trace_line(iter);
5484 if (trace_seq_has_overflowed(&iter->seq)) {
5485 iter->seq.seq.len = save_len;
5490 * This should not be hit, because it should only
5491 * be set if the iter->seq overflowed. But check it
5492 * anyway to be safe.
5494 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5495 iter->seq.seq.len = save_len;
5499 count = trace_seq_used(&iter->seq) - save_len;
5502 iter->seq.seq.len = save_len;
5506 if (ret != TRACE_TYPE_NO_CONSUME)
5507 trace_consume(iter);
5509 if (!trace_find_next_entry_inc(iter)) {
5519 static ssize_t tracing_splice_read_pipe(struct file *filp,
5521 struct pipe_inode_info *pipe,
5525 struct page *pages_def[PIPE_DEF_BUFFERS];
5526 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5527 struct trace_iterator *iter = filp->private_data;
5528 struct splice_pipe_desc spd = {
5530 .partial = partial_def,
5531 .nr_pages = 0, /* This gets updated below. */
5532 .nr_pages_max = PIPE_DEF_BUFFERS,
5534 .ops = &tracing_pipe_buf_ops,
5535 .spd_release = tracing_spd_release_pipe,
5541 if (splice_grow_spd(pipe, &spd))
5544 mutex_lock(&iter->mutex);
5546 if (iter->trace->splice_read) {
5547 ret = iter->trace->splice_read(iter, filp,
5548 ppos, pipe, len, flags);
5553 ret = tracing_wait_pipe(filp);
5557 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5562 trace_event_read_lock();
5563 trace_access_lock(iter->cpu_file);
5565 /* Fill as many pages as possible. */
5566 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5567 spd.pages[i] = alloc_page(GFP_KERNEL);
5571 rem = tracing_fill_pipe_page(rem, iter);
5573 /* Copy the data into the page, so we can start over. */
5574 ret = trace_seq_to_buffer(&iter->seq,
5575 page_address(spd.pages[i]),
5576 trace_seq_used(&iter->seq));
5578 __free_page(spd.pages[i]);
5581 spd.partial[i].offset = 0;
5582 spd.partial[i].len = trace_seq_used(&iter->seq);
5584 trace_seq_init(&iter->seq);
5587 trace_access_unlock(iter->cpu_file);
5588 trace_event_read_unlock();
5589 mutex_unlock(&iter->mutex);
5594 ret = splice_to_pipe(pipe, &spd);
5598 splice_shrink_spd(&spd);
5602 mutex_unlock(&iter->mutex);
5607 tracing_entries_read(struct file *filp, char __user *ubuf,
5608 size_t cnt, loff_t *ppos)
5610 struct inode *inode = file_inode(filp);
5611 struct trace_array *tr = inode->i_private;
5612 int cpu = tracing_get_cpu(inode);
5617 mutex_lock(&trace_types_lock);
5619 if (cpu == RING_BUFFER_ALL_CPUS) {
5620 int cpu, buf_size_same;
5625 /* check if all cpu sizes are same */
5626 for_each_tracing_cpu(cpu) {
5627 /* fill in the size from first enabled cpu */
5629 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5630 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5636 if (buf_size_same) {
5637 if (!ring_buffer_expanded)
5638 r = sprintf(buf, "%lu (expanded: %lu)\n",
5640 trace_buf_size >> 10);
5642 r = sprintf(buf, "%lu\n", size >> 10);
5644 r = sprintf(buf, "X\n");
5646 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5648 mutex_unlock(&trace_types_lock);
5650 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5655 tracing_entries_write(struct file *filp, const char __user *ubuf,
5656 size_t cnt, loff_t *ppos)
5658 struct inode *inode = file_inode(filp);
5659 struct trace_array *tr = inode->i_private;
5663 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5667 /* must have at least 1 entry */
5671 /* value is in KB */
5673 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5683 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5684 size_t cnt, loff_t *ppos)
5686 struct trace_array *tr = filp->private_data;
5689 unsigned long size = 0, expanded_size = 0;
5691 mutex_lock(&trace_types_lock);
5692 for_each_tracing_cpu(cpu) {
5693 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5694 if (!ring_buffer_expanded)
5695 expanded_size += trace_buf_size >> 10;
5697 if (ring_buffer_expanded)
5698 r = sprintf(buf, "%lu\n", size);
5700 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5701 mutex_unlock(&trace_types_lock);
5703 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5707 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5708 size_t cnt, loff_t *ppos)
5711 * There is no need to read what the user has written, this function
5712 * is just to make sure that there is no error when "echo" is used
5721 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5723 struct trace_array *tr = inode->i_private;
5725 /* disable tracing ? */
5726 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5727 tracer_tracing_off(tr);
5728 /* resize the ring buffer to 0 */
5729 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5731 trace_array_put(tr);
5737 tracing_mark_write(struct file *filp, const char __user *ubuf,
5738 size_t cnt, loff_t *fpos)
5740 struct trace_array *tr = filp->private_data;
5741 struct ring_buffer_event *event;
5742 struct ring_buffer *buffer;
5743 struct print_entry *entry;
5744 unsigned long irq_flags;
5745 const char faulted[] = "<faulted>";
5750 /* Used in tracing_mark_raw_write() as well */
5751 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5753 if (tracing_disabled)
5756 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5759 if (cnt > TRACE_BUF_SIZE)
5760 cnt = TRACE_BUF_SIZE;
5762 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5764 local_save_flags(irq_flags);
5765 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5767 /* If less than "<faulted>", then make sure we can still add that */
5768 if (cnt < FAULTED_SIZE)
5769 size += FAULTED_SIZE - cnt;
5771 buffer = tr->trace_buffer.buffer;
5772 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5773 irq_flags, preempt_count());
5774 if (unlikely(!event))
5775 /* Ring buffer disabled, return as if not open for write */
5778 entry = ring_buffer_event_data(event);
5779 entry->ip = _THIS_IP_;
5781 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5783 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5790 if (entry->buf[cnt - 1] != '\n') {
5791 entry->buf[cnt] = '\n';
5792 entry->buf[cnt + 1] = '\0';
5794 entry->buf[cnt] = '\0';
5796 __buffer_unlock_commit(buffer, event);
5804 /* Limit it for now to 3K (including tag) */
5805 #define RAW_DATA_MAX_SIZE (1024*3)
5808 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5809 size_t cnt, loff_t *fpos)
5811 struct trace_array *tr = filp->private_data;
5812 struct ring_buffer_event *event;
5813 struct ring_buffer *buffer;
5814 struct raw_data_entry *entry;
5815 const char faulted[] = "<faulted>";
5816 unsigned long irq_flags;
5821 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5823 if (tracing_disabled)
5826 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5829 /* The marker must at least have a tag id */
5830 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5833 if (cnt > TRACE_BUF_SIZE)
5834 cnt = TRACE_BUF_SIZE;
5836 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5838 local_save_flags(irq_flags);
5839 size = sizeof(*entry) + cnt;
5840 if (cnt < FAULT_SIZE_ID)
5841 size += FAULT_SIZE_ID - cnt;
5843 buffer = tr->trace_buffer.buffer;
5844 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5845 irq_flags, preempt_count());
5847 /* Ring buffer disabled, return as if not open for write */
5850 entry = ring_buffer_event_data(event);
5852 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5855 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5860 __buffer_unlock_commit(buffer, event);
5868 static int tracing_clock_show(struct seq_file *m, void *v)
5870 struct trace_array *tr = m->private;
5873 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5875 "%s%s%s%s", i ? " " : "",
5876 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5877 i == tr->clock_id ? "]" : "");
5883 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5887 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5888 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5891 if (i == ARRAY_SIZE(trace_clocks))
5894 mutex_lock(&trace_types_lock);
5898 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5901 * New clock may not be consistent with the previous clock.
5902 * Reset the buffer so that it doesn't have incomparable timestamps.
5904 tracing_reset_online_cpus(&tr->trace_buffer);
5906 #ifdef CONFIG_TRACER_MAX_TRACE
5907 if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5908 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5909 tracing_reset_online_cpus(&tr->max_buffer);
5912 mutex_unlock(&trace_types_lock);
5917 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5918 size_t cnt, loff_t *fpos)
5920 struct seq_file *m = filp->private_data;
5921 struct trace_array *tr = m->private;
5923 const char *clockstr;
5926 if (cnt >= sizeof(buf))
5929 if (copy_from_user(buf, ubuf, cnt))
5934 clockstr = strstrip(buf);
5936 ret = tracing_set_clock(tr, clockstr);
5945 static int tracing_clock_open(struct inode *inode, struct file *file)
5947 struct trace_array *tr = inode->i_private;
5950 if (tracing_disabled)
5953 if (trace_array_get(tr))
5956 ret = single_open(file, tracing_clock_show, inode->i_private);
5958 trace_array_put(tr);
5963 struct ftrace_buffer_info {
5964 struct trace_iterator iter;
5969 #ifdef CONFIG_TRACER_SNAPSHOT
5970 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5972 struct trace_array *tr = inode->i_private;
5973 struct trace_iterator *iter;
5977 if (trace_array_get(tr) < 0)
5980 if (file->f_mode & FMODE_READ) {
5981 iter = __tracing_open(inode, file, true);
5983 ret = PTR_ERR(iter);
5985 /* Writes still need the seq_file to hold the private data */
5987 m = kzalloc(sizeof(*m), GFP_KERNEL);
5990 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5998 iter->trace_buffer = &tr->max_buffer;
5999 iter->cpu_file = tracing_get_cpu(inode);
6001 file->private_data = m;
6005 trace_array_put(tr);
6011 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6014 struct seq_file *m = filp->private_data;
6015 struct trace_iterator *iter = m->private;
6016 struct trace_array *tr = iter->tr;
6020 ret = tracing_update_buffers();
6024 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6028 mutex_lock(&trace_types_lock);
6030 if (tr->current_trace->use_max_tr) {
6037 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6041 if (tr->allocated_snapshot)
6045 /* Only allow per-cpu swap if the ring buffer supports it */
6046 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6047 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6052 if (!tr->allocated_snapshot) {
6053 ret = alloc_snapshot(tr);
6057 local_irq_disable();
6058 /* Now, we're going to swap */
6059 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6060 update_max_tr(tr, current, smp_processor_id());
6062 update_max_tr_single(tr, current, iter->cpu_file);
6066 if (tr->allocated_snapshot) {
6067 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6068 tracing_reset_online_cpus(&tr->max_buffer);
6070 tracing_reset(&tr->max_buffer, iter->cpu_file);
6080 mutex_unlock(&trace_types_lock);
6084 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6086 struct seq_file *m = file->private_data;
6089 ret = tracing_release(inode, file);
6091 if (file->f_mode & FMODE_READ)
6094 /* If write only, the seq_file is just a stub */
6102 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6103 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6104 size_t count, loff_t *ppos);
6105 static int tracing_buffers_release(struct inode *inode, struct file *file);
6106 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6107 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6109 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6111 struct ftrace_buffer_info *info;
6114 ret = tracing_buffers_open(inode, filp);
6118 info = filp->private_data;
6120 if (info->iter.trace->use_max_tr) {
6121 tracing_buffers_release(inode, filp);
6125 info->iter.snapshot = true;
6126 info->iter.trace_buffer = &info->iter.tr->max_buffer;
6131 #endif /* CONFIG_TRACER_SNAPSHOT */
6134 static const struct file_operations tracing_thresh_fops = {
6135 .open = tracing_open_generic,
6136 .read = tracing_thresh_read,
6137 .write = tracing_thresh_write,
6138 .llseek = generic_file_llseek,
6141 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6142 static const struct file_operations tracing_max_lat_fops = {
6143 .open = tracing_open_generic,
6144 .read = tracing_max_lat_read,
6145 .write = tracing_max_lat_write,
6146 .llseek = generic_file_llseek,
6150 static const struct file_operations set_tracer_fops = {
6151 .open = tracing_open_generic,
6152 .read = tracing_set_trace_read,
6153 .write = tracing_set_trace_write,
6154 .llseek = generic_file_llseek,
6157 static const struct file_operations tracing_pipe_fops = {
6158 .open = tracing_open_pipe,
6159 .poll = tracing_poll_pipe,
6160 .read = tracing_read_pipe,
6161 .splice_read = tracing_splice_read_pipe,
6162 .release = tracing_release_pipe,
6163 .llseek = no_llseek,
6166 static const struct file_operations tracing_entries_fops = {
6167 .open = tracing_open_generic_tr,
6168 .read = tracing_entries_read,
6169 .write = tracing_entries_write,
6170 .llseek = generic_file_llseek,
6171 .release = tracing_release_generic_tr,
6174 static const struct file_operations tracing_total_entries_fops = {
6175 .open = tracing_open_generic_tr,
6176 .read = tracing_total_entries_read,
6177 .llseek = generic_file_llseek,
6178 .release = tracing_release_generic_tr,
6181 static const struct file_operations tracing_free_buffer_fops = {
6182 .open = tracing_open_generic_tr,
6183 .write = tracing_free_buffer_write,
6184 .release = tracing_free_buffer_release,
6187 static const struct file_operations tracing_mark_fops = {
6188 .open = tracing_open_generic_tr,
6189 .write = tracing_mark_write,
6190 .llseek = generic_file_llseek,
6191 .release = tracing_release_generic_tr,
6194 static const struct file_operations tracing_mark_raw_fops = {
6195 .open = tracing_open_generic_tr,
6196 .write = tracing_mark_raw_write,
6197 .llseek = generic_file_llseek,
6198 .release = tracing_release_generic_tr,
6201 static const struct file_operations trace_clock_fops = {
6202 .open = tracing_clock_open,
6204 .llseek = seq_lseek,
6205 .release = tracing_single_release_tr,
6206 .write = tracing_clock_write,
6209 #ifdef CONFIG_TRACER_SNAPSHOT
6210 static const struct file_operations snapshot_fops = {
6211 .open = tracing_snapshot_open,
6213 .write = tracing_snapshot_write,
6214 .llseek = tracing_lseek,
6215 .release = tracing_snapshot_release,
6218 static const struct file_operations snapshot_raw_fops = {
6219 .open = snapshot_raw_open,
6220 .read = tracing_buffers_read,
6221 .release = tracing_buffers_release,
6222 .splice_read = tracing_buffers_splice_read,
6223 .llseek = no_llseek,
6226 #endif /* CONFIG_TRACER_SNAPSHOT */
6228 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6230 struct trace_array *tr = inode->i_private;
6231 struct ftrace_buffer_info *info;
6234 if (tracing_disabled)
6237 if (trace_array_get(tr) < 0)
6240 info = kzalloc(sizeof(*info), GFP_KERNEL);
6242 trace_array_put(tr);
6246 mutex_lock(&trace_types_lock);
6249 info->iter.cpu_file = tracing_get_cpu(inode);
6250 info->iter.trace = tr->current_trace;
6251 info->iter.trace_buffer = &tr->trace_buffer;
6253 /* Force reading ring buffer for first read */
6254 info->read = (unsigned int)-1;
6256 filp->private_data = info;
6258 tr->current_trace->ref++;
6260 mutex_unlock(&trace_types_lock);
6262 ret = nonseekable_open(inode, filp);
6264 trace_array_put(tr);
6270 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6272 struct ftrace_buffer_info *info = filp->private_data;
6273 struct trace_iterator *iter = &info->iter;
6275 return trace_poll(iter, filp, poll_table);
6279 tracing_buffers_read(struct file *filp, char __user *ubuf,
6280 size_t count, loff_t *ppos)
6282 struct ftrace_buffer_info *info = filp->private_data;
6283 struct trace_iterator *iter = &info->iter;
6290 #ifdef CONFIG_TRACER_MAX_TRACE
6291 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6296 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6301 /* Do we have previous read data to read? */
6302 if (info->read < PAGE_SIZE)
6306 trace_access_lock(iter->cpu_file);
6307 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6311 trace_access_unlock(iter->cpu_file);
6314 if (trace_empty(iter)) {
6315 if ((filp->f_flags & O_NONBLOCK))
6318 ret = wait_on_pipe(iter, false);
6329 size = PAGE_SIZE - info->read;
6333 ret = copy_to_user(ubuf, info->spare + info->read, size);
6345 static int tracing_buffers_release(struct inode *inode, struct file *file)
6347 struct ftrace_buffer_info *info = file->private_data;
6348 struct trace_iterator *iter = &info->iter;
6350 mutex_lock(&trace_types_lock);
6352 iter->tr->current_trace->ref--;
6354 __trace_array_put(iter->tr);
6357 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6360 mutex_unlock(&trace_types_lock);
6366 struct ring_buffer *buffer;
6371 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6372 struct pipe_buffer *buf)
6374 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6379 ring_buffer_free_read_page(ref->buffer, ref->page);
6384 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6385 struct pipe_buffer *buf)
6387 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6392 /* Pipe buffer operations for a buffer. */
6393 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6395 .confirm = generic_pipe_buf_confirm,
6396 .release = buffer_pipe_buf_release,
6397 .steal = generic_pipe_buf_steal,
6398 .get = buffer_pipe_buf_get,
6402 * Callback from splice_to_pipe(), if we need to release some pages
6403 * at the end of the spd in case we error'ed out in filling the pipe.
6405 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6407 struct buffer_ref *ref =
6408 (struct buffer_ref *)spd->partial[i].private;
6413 ring_buffer_free_read_page(ref->buffer, ref->page);
6415 spd->partial[i].private = 0;
6419 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6420 struct pipe_inode_info *pipe, size_t len,
6423 struct ftrace_buffer_info *info = file->private_data;
6424 struct trace_iterator *iter = &info->iter;
6425 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6426 struct page *pages_def[PIPE_DEF_BUFFERS];
6427 struct splice_pipe_desc spd = {
6429 .partial = partial_def,
6430 .nr_pages_max = PIPE_DEF_BUFFERS,
6432 .ops = &buffer_pipe_buf_ops,
6433 .spd_release = buffer_spd_release,
6435 struct buffer_ref *ref;
6436 int entries, size, i;
6439 #ifdef CONFIG_TRACER_MAX_TRACE
6440 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6444 if (*ppos & (PAGE_SIZE - 1))
6447 if (len & (PAGE_SIZE - 1)) {
6448 if (len < PAGE_SIZE)
6453 if (splice_grow_spd(pipe, &spd))
6457 trace_access_lock(iter->cpu_file);
6458 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6460 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6464 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6471 ref->buffer = iter->trace_buffer->buffer;
6472 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6479 r = ring_buffer_read_page(ref->buffer, &ref->page,
6480 len, iter->cpu_file, 1);
6482 ring_buffer_free_read_page(ref->buffer, ref->page);
6488 * zero out any left over data, this is going to
6491 size = ring_buffer_page_len(ref->page);
6492 if (size < PAGE_SIZE)
6493 memset(ref->page + size, 0, PAGE_SIZE - size);
6495 page = virt_to_page(ref->page);
6497 spd.pages[i] = page;
6498 spd.partial[i].len = PAGE_SIZE;
6499 spd.partial[i].offset = 0;
6500 spd.partial[i].private = (unsigned long)ref;
6504 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6507 trace_access_unlock(iter->cpu_file);
6510 /* did we read anything? */
6511 if (!spd.nr_pages) {
6516 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6519 ret = wait_on_pipe(iter, true);
6526 ret = splice_to_pipe(pipe, &spd);
6528 splice_shrink_spd(&spd);
6533 static const struct file_operations tracing_buffers_fops = {
6534 .open = tracing_buffers_open,
6535 .read = tracing_buffers_read,
6536 .poll = tracing_buffers_poll,
6537 .release = tracing_buffers_release,
6538 .splice_read = tracing_buffers_splice_read,
6539 .llseek = no_llseek,
6543 tracing_stats_read(struct file *filp, char __user *ubuf,
6544 size_t count, loff_t *ppos)
6546 struct inode *inode = file_inode(filp);
6547 struct trace_array *tr = inode->i_private;
6548 struct trace_buffer *trace_buf = &tr->trace_buffer;
6549 int cpu = tracing_get_cpu(inode);
6550 struct trace_seq *s;
6552 unsigned long long t;
6553 unsigned long usec_rem;
6555 s = kmalloc(sizeof(*s), GFP_KERNEL);
6561 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6562 trace_seq_printf(s, "entries: %ld\n", cnt);
6564 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6565 trace_seq_printf(s, "overrun: %ld\n", cnt);
6567 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6568 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6570 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6571 trace_seq_printf(s, "bytes: %ld\n", cnt);
6573 if (trace_clocks[tr->clock_id].in_ns) {
6574 /* local or global for trace_clock */
6575 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6576 usec_rem = do_div(t, USEC_PER_SEC);
6577 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6580 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6581 usec_rem = do_div(t, USEC_PER_SEC);
6582 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6584 /* counter or tsc mode for trace_clock */
6585 trace_seq_printf(s, "oldest event ts: %llu\n",
6586 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6588 trace_seq_printf(s, "now ts: %llu\n",
6589 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6592 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6593 trace_seq_printf(s, "dropped events: %ld\n", cnt);
6595 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6596 trace_seq_printf(s, "read events: %ld\n", cnt);
6598 count = simple_read_from_buffer(ubuf, count, ppos,
6599 s->buffer, trace_seq_used(s));
6606 static const struct file_operations tracing_stats_fops = {
6607 .open = tracing_open_generic_tr,
6608 .read = tracing_stats_read,
6609 .llseek = generic_file_llseek,
6610 .release = tracing_release_generic_tr,
6613 #ifdef CONFIG_DYNAMIC_FTRACE
6615 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6621 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6622 size_t cnt, loff_t *ppos)
6624 static char ftrace_dyn_info_buffer[1024];
6625 static DEFINE_MUTEX(dyn_info_mutex);
6626 unsigned long *p = filp->private_data;
6627 char *buf = ftrace_dyn_info_buffer;
6628 int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6631 mutex_lock(&dyn_info_mutex);
6632 r = sprintf(buf, "%ld ", *p);
6634 r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6637 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6639 mutex_unlock(&dyn_info_mutex);
6644 static const struct file_operations tracing_dyn_info_fops = {
6645 .open = tracing_open_generic,
6646 .read = tracing_read_dyn_info,
6647 .llseek = generic_file_llseek,
6649 #endif /* CONFIG_DYNAMIC_FTRACE */
6651 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6653 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6659 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6661 unsigned long *count = (long *)data;
6673 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6674 struct ftrace_probe_ops *ops, void *data)
6676 long count = (long)data;
6678 seq_printf(m, "%ps:", (void *)ip);
6680 seq_puts(m, "snapshot");
6683 seq_puts(m, ":unlimited\n");
6685 seq_printf(m, ":count=%ld\n", count);
6690 static struct ftrace_probe_ops snapshot_probe_ops = {
6691 .func = ftrace_snapshot,
6692 .print = ftrace_snapshot_print,
6695 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6696 .func = ftrace_count_snapshot,
6697 .print = ftrace_snapshot_print,
6701 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6702 char *glob, char *cmd, char *param, int enable)
6704 struct ftrace_probe_ops *ops;
6705 void *count = (void *)-1;
6709 /* hash funcs only work with set_ftrace_filter */
6713 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
6715 if (glob[0] == '!') {
6716 unregister_ftrace_function_probe_func(glob+1, ops);
6723 number = strsep(¶m, ":");
6725 if (!strlen(number))
6729 * We use the callback data field (which is a pointer)
6732 ret = kstrtoul(number, 0, (unsigned long *)&count);
6737 ret = register_ftrace_function_probe(glob, ops, count);
6740 alloc_snapshot(&global_trace);
6742 return ret < 0 ? ret : 0;
6745 static struct ftrace_func_command ftrace_snapshot_cmd = {
6747 .func = ftrace_trace_snapshot_callback,
6750 static __init int register_snapshot_cmd(void)
6752 return register_ftrace_command(&ftrace_snapshot_cmd);
6755 static inline __init int register_snapshot_cmd(void) { return 0; }
6756 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6758 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6760 if (WARN_ON(!tr->dir))
6761 return ERR_PTR(-ENODEV);
6763 /* Top directory uses NULL as the parent */
6764 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6767 /* All sub buffers have a descriptor */
6771 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6773 struct dentry *d_tracer;
6776 return tr->percpu_dir;
6778 d_tracer = tracing_get_dentry(tr);
6779 if (IS_ERR(d_tracer))
6782 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6784 WARN_ONCE(!tr->percpu_dir,
6785 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6787 return tr->percpu_dir;
6790 static struct dentry *
6791 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6792 void *data, long cpu, const struct file_operations *fops)
6794 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6796 if (ret) /* See tracing_get_cpu() */
6797 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6802 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6804 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6805 struct dentry *d_cpu;
6806 char cpu_dir[30]; /* 30 characters should be more than enough */
6811 snprintf(cpu_dir, 30, "cpu%ld", cpu);
6812 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6814 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6818 /* per cpu trace_pipe */
6819 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6820 tr, cpu, &tracing_pipe_fops);
6823 trace_create_cpu_file("trace", 0644, d_cpu,
6824 tr, cpu, &tracing_fops);
6826 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6827 tr, cpu, &tracing_buffers_fops);
6829 trace_create_cpu_file("stats", 0444, d_cpu,
6830 tr, cpu, &tracing_stats_fops);
6832 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6833 tr, cpu, &tracing_entries_fops);
6835 #ifdef CONFIG_TRACER_SNAPSHOT
6836 trace_create_cpu_file("snapshot", 0644, d_cpu,
6837 tr, cpu, &snapshot_fops);
6839 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6840 tr, cpu, &snapshot_raw_fops);
6844 #ifdef CONFIG_FTRACE_SELFTEST
6845 /* Let selftest have access to static functions in this file */
6846 #include "trace_selftest.c"
6850 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6853 struct trace_option_dentry *topt = filp->private_data;
6856 if (topt->flags->val & topt->opt->bit)
6861 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6865 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6868 struct trace_option_dentry *topt = filp->private_data;
6872 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6876 if (val != 0 && val != 1)
6879 if (!!(topt->flags->val & topt->opt->bit) != val) {
6880 mutex_lock(&trace_types_lock);
6881 ret = __set_tracer_option(topt->tr, topt->flags,
6883 mutex_unlock(&trace_types_lock);
6894 static const struct file_operations trace_options_fops = {
6895 .open = tracing_open_generic,
6896 .read = trace_options_read,
6897 .write = trace_options_write,
6898 .llseek = generic_file_llseek,
6902 * In order to pass in both the trace_array descriptor as well as the index
6903 * to the flag that the trace option file represents, the trace_array
6904 * has a character array of trace_flags_index[], which holds the index
6905 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6906 * The address of this character array is passed to the flag option file
6907 * read/write callbacks.
6909 * In order to extract both the index and the trace_array descriptor,
6910 * get_tr_index() uses the following algorithm.
6914 * As the pointer itself contains the address of the index (remember
6917 * Then to get the trace_array descriptor, by subtracting that index
6918 * from the ptr, we get to the start of the index itself.
6920 * ptr - idx == &index[0]
6922 * Then a simple container_of() from that pointer gets us to the
6923 * trace_array descriptor.
6925 static void get_tr_index(void *data, struct trace_array **ptr,
6926 unsigned int *pindex)
6928 *pindex = *(unsigned char *)data;
6930 *ptr = container_of(data - *pindex, struct trace_array,
6935 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6938 void *tr_index = filp->private_data;
6939 struct trace_array *tr;
6943 get_tr_index(tr_index, &tr, &index);
6945 if (tr->trace_flags & (1 << index))
6950 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6954 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6957 void *tr_index = filp->private_data;
6958 struct trace_array *tr;
6963 get_tr_index(tr_index, &tr, &index);
6965 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6969 if (val != 0 && val != 1)
6972 mutex_lock(&trace_types_lock);
6973 ret = set_tracer_flag(tr, 1 << index, val);
6974 mutex_unlock(&trace_types_lock);
6984 static const struct file_operations trace_options_core_fops = {
6985 .open = tracing_open_generic,
6986 .read = trace_options_core_read,
6987 .write = trace_options_core_write,
6988 .llseek = generic_file_llseek,
6991 struct dentry *trace_create_file(const char *name,
6993 struct dentry *parent,
6995 const struct file_operations *fops)
6999 ret = tracefs_create_file(name, mode, parent, data, fops);
7001 pr_warn("Could not create tracefs '%s' entry\n", name);
7007 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7009 struct dentry *d_tracer;
7014 d_tracer = tracing_get_dentry(tr);
7015 if (IS_ERR(d_tracer))
7018 tr->options = tracefs_create_dir("options", d_tracer);
7020 pr_warn("Could not create tracefs directory 'options'\n");
7028 create_trace_option_file(struct trace_array *tr,
7029 struct trace_option_dentry *topt,
7030 struct tracer_flags *flags,
7031 struct tracer_opt *opt)
7033 struct dentry *t_options;
7035 t_options = trace_options_init_dentry(tr);
7039 topt->flags = flags;
7043 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7044 &trace_options_fops);
7049 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7051 struct trace_option_dentry *topts;
7052 struct trace_options *tr_topts;
7053 struct tracer_flags *flags;
7054 struct tracer_opt *opts;
7061 flags = tracer->flags;
7063 if (!flags || !flags->opts)
7067 * If this is an instance, only create flags for tracers
7068 * the instance may have.
7070 if (!trace_ok_for_array(tracer, tr))
7073 for (i = 0; i < tr->nr_topts; i++) {
7074 /* Make sure there's no duplicate flags. */
7075 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7081 for (cnt = 0; opts[cnt].name; cnt++)
7084 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7088 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7095 tr->topts = tr_topts;
7096 tr->topts[tr->nr_topts].tracer = tracer;
7097 tr->topts[tr->nr_topts].topts = topts;
7100 for (cnt = 0; opts[cnt].name; cnt++) {
7101 create_trace_option_file(tr, &topts[cnt], flags,
7103 WARN_ONCE(topts[cnt].entry == NULL,
7104 "Failed to create trace option: %s",
7109 static struct dentry *
7110 create_trace_option_core_file(struct trace_array *tr,
7111 const char *option, long index)
7113 struct dentry *t_options;
7115 t_options = trace_options_init_dentry(tr);
7119 return trace_create_file(option, 0644, t_options,
7120 (void *)&tr->trace_flags_index[index],
7121 &trace_options_core_fops);
7124 static void create_trace_options_dir(struct trace_array *tr)
7126 struct dentry *t_options;
7127 bool top_level = tr == &global_trace;
7130 t_options = trace_options_init_dentry(tr);
7134 for (i = 0; trace_options[i]; i++) {
7136 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7137 create_trace_option_core_file(tr, trace_options[i], i);
7142 rb_simple_read(struct file *filp, char __user *ubuf,
7143 size_t cnt, loff_t *ppos)
7145 struct trace_array *tr = filp->private_data;
7149 r = tracer_tracing_is_on(tr);
7150 r = sprintf(buf, "%d\n", r);
7152 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7156 rb_simple_write(struct file *filp, const char __user *ubuf,
7157 size_t cnt, loff_t *ppos)
7159 struct trace_array *tr = filp->private_data;
7160 struct ring_buffer *buffer = tr->trace_buffer.buffer;
7164 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7169 mutex_lock(&trace_types_lock);
7171 tracer_tracing_on(tr);
7172 if (tr->current_trace->start)
7173 tr->current_trace->start(tr);
7175 tracer_tracing_off(tr);
7176 if (tr->current_trace->stop)
7177 tr->current_trace->stop(tr);
7179 mutex_unlock(&trace_types_lock);
7187 static const struct file_operations rb_simple_fops = {
7188 .open = tracing_open_generic_tr,
7189 .read = rb_simple_read,
7190 .write = rb_simple_write,
7191 .release = tracing_release_generic_tr,
7192 .llseek = default_llseek,
7195 struct dentry *trace_instance_dir;
7198 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7201 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7203 enum ring_buffer_flags rb_flags;
7205 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7209 buf->buffer = ring_buffer_alloc(size, rb_flags);
7213 buf->data = alloc_percpu(struct trace_array_cpu);
7215 ring_buffer_free(buf->buffer);
7219 /* Allocate the first page for all buffers */
7220 set_buffer_entries(&tr->trace_buffer,
7221 ring_buffer_size(tr->trace_buffer.buffer, 0));
7226 static int allocate_trace_buffers(struct trace_array *tr, int size)
7230 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7234 #ifdef CONFIG_TRACER_MAX_TRACE
7235 ret = allocate_trace_buffer(tr, &tr->max_buffer,
7236 allocate_snapshot ? size : 1);
7238 ring_buffer_free(tr->trace_buffer.buffer);
7239 free_percpu(tr->trace_buffer.data);
7242 tr->allocated_snapshot = allocate_snapshot;
7245 * Only the top level trace array gets its snapshot allocated
7246 * from the kernel command line.
7248 allocate_snapshot = false;
7253 static void free_trace_buffer(struct trace_buffer *buf)
7256 ring_buffer_free(buf->buffer);
7258 free_percpu(buf->data);
7263 static void free_trace_buffers(struct trace_array *tr)
7268 free_trace_buffer(&tr->trace_buffer);
7270 #ifdef CONFIG_TRACER_MAX_TRACE
7271 free_trace_buffer(&tr->max_buffer);
7275 static void init_trace_flags_index(struct trace_array *tr)
7279 /* Used by the trace options files */
7280 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7281 tr->trace_flags_index[i] = i;
7284 static void __update_tracer_options(struct trace_array *tr)
7288 for (t = trace_types; t; t = t->next)
7289 add_tracer_options(tr, t);
7292 static void update_tracer_options(struct trace_array *tr)
7294 mutex_lock(&trace_types_lock);
7295 __update_tracer_options(tr);
7296 mutex_unlock(&trace_types_lock);
7299 static int instance_mkdir(const char *name)
7301 struct trace_array *tr;
7304 mutex_lock(&trace_types_lock);
7307 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7308 if (tr->name && strcmp(tr->name, name) == 0)
7313 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7317 tr->name = kstrdup(name, GFP_KERNEL);
7321 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7324 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7326 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7328 raw_spin_lock_init(&tr->start_lock);
7330 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7332 tr->current_trace = &nop_trace;
7334 INIT_LIST_HEAD(&tr->systems);
7335 INIT_LIST_HEAD(&tr->events);
7337 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7340 tr->dir = tracefs_create_dir(name, trace_instance_dir);
7344 ret = event_trace_add_tracer(tr->dir, tr);
7346 tracefs_remove_recursive(tr->dir);
7350 init_tracer_tracefs(tr, tr->dir);
7351 init_trace_flags_index(tr);
7352 __update_tracer_options(tr);
7354 list_add(&tr->list, &ftrace_trace_arrays);
7356 mutex_unlock(&trace_types_lock);
7361 free_trace_buffers(tr);
7362 free_cpumask_var(tr->tracing_cpumask);
7367 mutex_unlock(&trace_types_lock);
7373 static int instance_rmdir(const char *name)
7375 struct trace_array *tr;
7380 mutex_lock(&trace_types_lock);
7383 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7384 if (tr->name && strcmp(tr->name, name) == 0) {
7393 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7396 list_del(&tr->list);
7398 /* Disable all the flags that were enabled coming in */
7399 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7400 if ((1 << i) & ZEROED_TRACE_FLAGS)
7401 set_tracer_flag(tr, 1 << i, 0);
7404 tracing_set_nop(tr);
7405 event_trace_del_tracer(tr);
7406 ftrace_destroy_function_files(tr);
7407 tracefs_remove_recursive(tr->dir);
7408 free_trace_buffers(tr);
7410 for (i = 0; i < tr->nr_topts; i++) {
7411 kfree(tr->topts[i].topts);
7421 mutex_unlock(&trace_types_lock);
7426 static __init void create_trace_instances(struct dentry *d_tracer)
7428 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7431 if (WARN_ON(!trace_instance_dir))
7436 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7440 trace_create_file("available_tracers", 0444, d_tracer,
7441 tr, &show_traces_fops);
7443 trace_create_file("current_tracer", 0644, d_tracer,
7444 tr, &set_tracer_fops);
7446 trace_create_file("tracing_cpumask", 0644, d_tracer,
7447 tr, &tracing_cpumask_fops);
7449 trace_create_file("trace_options", 0644, d_tracer,
7450 tr, &tracing_iter_fops);
7452 trace_create_file("trace", 0644, d_tracer,
7455 trace_create_file("trace_pipe", 0444, d_tracer,
7456 tr, &tracing_pipe_fops);
7458 trace_create_file("buffer_size_kb", 0644, d_tracer,
7459 tr, &tracing_entries_fops);
7461 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7462 tr, &tracing_total_entries_fops);
7464 trace_create_file("free_buffer", 0200, d_tracer,
7465 tr, &tracing_free_buffer_fops);
7467 trace_create_file("trace_marker", 0220, d_tracer,
7468 tr, &tracing_mark_fops);
7470 trace_create_file("trace_marker_raw", 0220, d_tracer,
7471 tr, &tracing_mark_raw_fops);
7473 trace_create_file("trace_clock", 0644, d_tracer, tr,
7476 trace_create_file("tracing_on", 0644, d_tracer,
7477 tr, &rb_simple_fops);
7479 create_trace_options_dir(tr);
7481 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7482 trace_create_file("tracing_max_latency", 0644, d_tracer,
7483 &tr->max_latency, &tracing_max_lat_fops);
7486 if (ftrace_create_function_files(tr, d_tracer))
7487 WARN(1, "Could not allocate function filter files");
7489 #ifdef CONFIG_TRACER_SNAPSHOT
7490 trace_create_file("snapshot", 0644, d_tracer,
7491 tr, &snapshot_fops);
7494 for_each_tracing_cpu(cpu)
7495 tracing_init_tracefs_percpu(tr, cpu);
7497 ftrace_init_tracefs(tr, d_tracer);
7500 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7502 struct vfsmount *mnt;
7503 struct file_system_type *type;
7506 * To maintain backward compatibility for tools that mount
7507 * debugfs to get to the tracing facility, tracefs is automatically
7508 * mounted to the debugfs/tracing directory.
7510 type = get_fs_type("tracefs");
7513 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7514 put_filesystem(type);
7523 * tracing_init_dentry - initialize top level trace array
7525 * This is called when creating files or directories in the tracing
7526 * directory. It is called via fs_initcall() by any of the boot up code
7527 * and expects to return the dentry of the top level tracing directory.
7529 struct dentry *tracing_init_dentry(void)
7531 struct trace_array *tr = &global_trace;
7533 /* The top level trace array uses NULL as parent */
7537 if (WARN_ON(!tracefs_initialized()) ||
7538 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7539 WARN_ON(!debugfs_initialized())))
7540 return ERR_PTR(-ENODEV);
7543 * As there may still be users that expect the tracing
7544 * files to exist in debugfs/tracing, we must automount
7545 * the tracefs file system there, so older tools still
7546 * work with the newer kerenl.
7548 tr->dir = debugfs_create_automount("tracing", NULL,
7549 trace_automount, NULL);
7551 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7552 return ERR_PTR(-ENOMEM);
7558 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7559 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7561 static void __init trace_enum_init(void)
7565 len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7566 trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7569 #ifdef CONFIG_MODULES
7570 static void trace_module_add_enums(struct module *mod)
7572 if (!mod->num_trace_enums)
7576 * Modules with bad taint do not have events created, do
7577 * not bother with enums either.
7579 if (trace_module_has_bad_taint(mod))
7582 trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7585 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7586 static void trace_module_remove_enums(struct module *mod)
7588 union trace_enum_map_item *map;
7589 union trace_enum_map_item **last = &trace_enum_maps;
7591 if (!mod->num_trace_enums)
7594 mutex_lock(&trace_enum_mutex);
7596 map = trace_enum_maps;
7599 if (map->head.mod == mod)
7601 map = trace_enum_jmp_to_tail(map);
7602 last = &map->tail.next;
7603 map = map->tail.next;
7608 *last = trace_enum_jmp_to_tail(map)->tail.next;
7611 mutex_unlock(&trace_enum_mutex);
7614 static inline void trace_module_remove_enums(struct module *mod) { }
7615 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7617 static int trace_module_notify(struct notifier_block *self,
7618 unsigned long val, void *data)
7620 struct module *mod = data;
7623 case MODULE_STATE_COMING:
7624 trace_module_add_enums(mod);
7626 case MODULE_STATE_GOING:
7627 trace_module_remove_enums(mod);
7634 static struct notifier_block trace_module_nb = {
7635 .notifier_call = trace_module_notify,
7638 #endif /* CONFIG_MODULES */
7640 static __init int tracer_init_tracefs(void)
7642 struct dentry *d_tracer;
7644 trace_access_lock_init();
7646 d_tracer = tracing_init_dentry();
7647 if (IS_ERR(d_tracer))
7650 init_tracer_tracefs(&global_trace, d_tracer);
7651 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7653 trace_create_file("tracing_thresh", 0644, d_tracer,
7654 &global_trace, &tracing_thresh_fops);
7656 trace_create_file("README", 0444, d_tracer,
7657 NULL, &tracing_readme_fops);
7659 trace_create_file("saved_cmdlines", 0444, d_tracer,
7660 NULL, &tracing_saved_cmdlines_fops);
7662 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7663 NULL, &tracing_saved_cmdlines_size_fops);
7667 trace_create_enum_file(d_tracer);
7669 #ifdef CONFIG_MODULES
7670 register_module_notifier(&trace_module_nb);
7673 #ifdef CONFIG_DYNAMIC_FTRACE
7674 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7675 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7678 create_trace_instances(d_tracer);
7680 update_tracer_options(&global_trace);
7685 static int trace_panic_handler(struct notifier_block *this,
7686 unsigned long event, void *unused)
7688 if (ftrace_dump_on_oops)
7689 ftrace_dump(ftrace_dump_on_oops);
7693 static struct notifier_block trace_panic_notifier = {
7694 .notifier_call = trace_panic_handler,
7696 .priority = 150 /* priority: INT_MAX >= x >= 0 */
7699 static int trace_die_handler(struct notifier_block *self,
7705 if (ftrace_dump_on_oops)
7706 ftrace_dump(ftrace_dump_on_oops);
7714 static struct notifier_block trace_die_notifier = {
7715 .notifier_call = trace_die_handler,
7720 * printk is set to max of 1024, we really don't need it that big.
7721 * Nothing should be printing 1000 characters anyway.
7723 #define TRACE_MAX_PRINT 1000
7726 * Define here KERN_TRACE so that we have one place to modify
7727 * it if we decide to change what log level the ftrace dump
7730 #define KERN_TRACE KERN_EMERG
7733 trace_printk_seq(struct trace_seq *s)
7735 /* Probably should print a warning here. */
7736 if (s->seq.len >= TRACE_MAX_PRINT)
7737 s->seq.len = TRACE_MAX_PRINT;
7740 * More paranoid code. Although the buffer size is set to
7741 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7742 * an extra layer of protection.
7744 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7745 s->seq.len = s->seq.size - 1;
7747 /* should be zero ended, but we are paranoid. */
7748 s->buffer[s->seq.len] = 0;
7750 printk(KERN_TRACE "%s", s->buffer);
7755 void trace_init_global_iter(struct trace_iterator *iter)
7757 iter->tr = &global_trace;
7758 iter->trace = iter->tr->current_trace;
7759 iter->cpu_file = RING_BUFFER_ALL_CPUS;
7760 iter->trace_buffer = &global_trace.trace_buffer;
7762 if (iter->trace && iter->trace->open)
7763 iter->trace->open(iter);
7765 /* Annotate start of buffers if we had overruns */
7766 if (ring_buffer_overruns(iter->trace_buffer->buffer))
7767 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7769 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7770 if (trace_clocks[iter->tr->clock_id].in_ns)
7771 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7774 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7776 /* use static because iter can be a bit big for the stack */
7777 static struct trace_iterator iter;
7778 static atomic_t dump_running;
7779 struct trace_array *tr = &global_trace;
7780 unsigned int old_userobj;
7781 unsigned long flags;
7784 /* Only allow one dump user at a time. */
7785 if (atomic_inc_return(&dump_running) != 1) {
7786 atomic_dec(&dump_running);
7791 * Always turn off tracing when we dump.
7792 * We don't need to show trace output of what happens
7793 * between multiple crashes.
7795 * If the user does a sysrq-z, then they can re-enable
7796 * tracing with echo 1 > tracing_on.
7800 local_irq_save(flags);
7802 /* Simulate the iterator */
7803 trace_init_global_iter(&iter);
7805 for_each_tracing_cpu(cpu) {
7806 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7809 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7811 /* don't look at user memory in panic mode */
7812 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7814 switch (oops_dump_mode) {
7816 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7819 iter.cpu_file = raw_smp_processor_id();
7824 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7825 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7828 printk(KERN_TRACE "Dumping ftrace buffer:\n");
7830 /* Did function tracer already get disabled? */
7831 if (ftrace_is_dead()) {
7832 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7833 printk("# MAY BE MISSING FUNCTION EVENTS\n");
7837 * We need to stop all tracing on all CPUS to read the
7838 * the next buffer. This is a bit expensive, but is
7839 * not done often. We fill all what we can read,
7840 * and then release the locks again.
7843 while (!trace_empty(&iter)) {
7846 printk(KERN_TRACE "---------------------------------\n");
7850 /* reset all but tr, trace, and overruns */
7851 memset(&iter.seq, 0,
7852 sizeof(struct trace_iterator) -
7853 offsetof(struct trace_iterator, seq));
7854 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7857 if (trace_find_next_entry_inc(&iter) != NULL) {
7860 ret = print_trace_line(&iter);
7861 if (ret != TRACE_TYPE_NO_CONSUME)
7862 trace_consume(&iter);
7864 touch_nmi_watchdog();
7866 trace_printk_seq(&iter.seq);
7870 printk(KERN_TRACE " (ftrace buffer empty)\n");
7872 printk(KERN_TRACE "---------------------------------\n");
7875 tr->trace_flags |= old_userobj;
7877 for_each_tracing_cpu(cpu) {
7878 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7880 atomic_dec(&dump_running);
7881 local_irq_restore(flags);
7883 EXPORT_SYMBOL_GPL(ftrace_dump);
7885 __init static int tracer_alloc_buffers(void)
7891 * Make sure we don't accidently add more trace options
7892 * than we have bits for.
7894 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7896 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7899 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7900 goto out_free_buffer_mask;
7902 /* Only allocate trace_printk buffers if a trace_printk exists */
7903 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7904 /* Must be called before global_trace.buffer is allocated */
7905 trace_printk_init_buffers();
7907 /* To save memory, keep the ring buffer size to its minimum */
7908 if (ring_buffer_expanded)
7909 ring_buf_size = trace_buf_size;
7913 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7914 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7916 raw_spin_lock_init(&global_trace.start_lock);
7919 * The prepare callbacks allocates some memory for the ring buffer. We
7920 * don't free the buffer if the if the CPU goes down. If we were to free
7921 * the buffer, then the user would lose any trace that was in the
7922 * buffer. The memory will be removed once the "instance" is removed.
7924 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
7925 "trace/RB:preapre", trace_rb_cpu_prepare,
7928 goto out_free_cpumask;
7929 /* Used for event triggers */
7930 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7932 goto out_rm_hp_state;
7934 if (trace_create_savedcmd() < 0)
7935 goto out_free_temp_buffer;
7937 /* TODO: make the number of buffers hot pluggable with CPUS */
7938 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7939 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7941 goto out_free_savedcmd;
7944 if (global_trace.buffer_disabled)
7947 if (trace_boot_clock) {
7948 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7950 pr_warn("Trace clock %s not defined, going back to default\n",
7955 * register_tracer() might reference current_trace, so it
7956 * needs to be set before we register anything. This is
7957 * just a bootstrap of current_trace anyway.
7959 global_trace.current_trace = &nop_trace;
7961 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7963 ftrace_init_global_array_ops(&global_trace);
7965 init_trace_flags_index(&global_trace);
7967 register_tracer(&nop_trace);
7969 /* All seems OK, enable tracing */
7970 tracing_disabled = 0;
7972 atomic_notifier_chain_register(&panic_notifier_list,
7973 &trace_panic_notifier);
7975 register_die_notifier(&trace_die_notifier);
7977 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7979 INIT_LIST_HEAD(&global_trace.systems);
7980 INIT_LIST_HEAD(&global_trace.events);
7981 list_add(&global_trace.list, &ftrace_trace_arrays);
7983 apply_trace_boot_options();
7985 register_snapshot_cmd();
7990 free_saved_cmdlines_buffer(savedcmd);
7991 out_free_temp_buffer:
7992 ring_buffer_free(temp_buffer);
7994 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
7996 free_cpumask_var(global_trace.tracing_cpumask);
7997 out_free_buffer_mask:
7998 free_cpumask_var(tracing_buffer_mask);
8003 void __init trace_init(void)
8005 if (tracepoint_printk) {
8006 tracepoint_print_iter =
8007 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8008 if (WARN_ON(!tracepoint_print_iter))
8009 tracepoint_printk = 0;
8011 static_key_enable(&tracepoint_printk_key.key);
8013 tracer_alloc_buffers();
8017 __init static int clear_boot_tracer(void)
8020 * The default tracer at boot buffer is an init section.
8021 * This function is called in lateinit. If we did not
8022 * find the boot tracer, then clear it out, to prevent
8023 * later registration from accessing the buffer that is
8024 * about to be freed.
8026 if (!default_bootup_tracer)
8029 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8030 default_bootup_tracer);
8031 default_bootup_tracer = NULL;
8036 fs_initcall(tracer_init_tracefs);
8037 late_initcall(clear_boot_tracer);