]> git.karo-electronics.de Git - karo-tx-linux.git/blob - kernel/trace/trace.c
ring-buffer: Return reader page back into existing ring buffer
[karo-tx-linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
124 /* Map of enums to their values, for "enum_map" file */
125 struct trace_enum_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_enum_map_item;
131
132 struct trace_enum_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "enum_string"
136          */
137         union trace_enum_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_enum_mutex);
142
143 /*
144  * The trace_enum_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved enum_map items.
149  */
150 union trace_enum_map_item {
151         struct trace_enum_map           map;
152         struct trace_enum_map_head      head;
153         struct trace_enum_map_tail      tail;
154 };
155
156 static union trace_enum_map_item *trace_enum_maps;
157 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286
287         return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312
313         return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415
416         (*pos)++;
417
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424
425         return NULL;
426 }
427
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list)
498                 return -ENOMEM;
499
500         pid_list->pid_max = READ_ONCE(pid_max);
501
502         /* Only truncating will shrink pid_max */
503         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504                 pid_list->pid_max = filtered_pids->pid_max;
505
506         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507         if (!pid_list->pids) {
508                 kfree(pid_list);
509                 return -ENOMEM;
510         }
511
512         if (filtered_pids) {
513                 /* copy the current bits to the new max */
514                 for_each_set_bit(pid, filtered_pids->pids,
515                                  filtered_pids->pid_max) {
516                         set_bit(pid, pid_list->pids);
517                         nr_pids++;
518                 }
519         }
520
521         while (cnt > 0) {
522
523                 pos = 0;
524
525                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526                 if (ret < 0 || !trace_parser_loaded(&parser))
527                         break;
528
529                 read += ret;
530                 ubuf += ret;
531                 cnt -= ret;
532
533                 parser.buffer[parser.idx] = 0;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_cmdline_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id());
924         local_irq_restore(flags);
925 }
926
927 /**
928  * trace_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943         struct trace_array *tr = &global_trace;
944
945         tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950                                         struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955         int ret;
956
957         if (!tr->allocated_snapshot) {
958
959                 /* allocate spare buffer */
960                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962                 if (ret < 0)
963                         return ret;
964
965                 tr->allocated_snapshot = true;
966         }
967
968         return 0;
969 }
970
971 static void free_snapshot(struct trace_array *tr)
972 {
973         /*
974          * We don't free the ring buffer. instead, resize it because
975          * The max_tr ring buffer has some state (e.g. ring->clock) and
976          * we want preserve it.
977          */
978         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979         set_buffer_entries(&tr->max_buffer, 1);
980         tracing_reset_online_cpus(&tr->max_buffer);
981         tr->allocated_snapshot = false;
982 }
983
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996         struct trace_array *tr = &global_trace;
997         int ret;
998
999         ret = alloc_snapshot(tr);
1000         WARN_ON(ret < 0);
1001
1002         return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006 /**
1007  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to trace_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019         int ret;
1020
1021         ret = tracing_alloc_snapshot();
1022         if (ret < 0)
1023                 return;
1024
1025         tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037         return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042         /* Give warning */
1043         tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050         if (tr->trace_buffer.buffer)
1051                 ring_buffer_record_off(tr->trace_buffer.buffer);
1052         /*
1053          * This flag is looked at when buffers haven't been allocated
1054          * yet, or by some tracers (like irqsoff), that just want to
1055          * know if the ring buffer has been disabled, but it can handle
1056          * races of where it gets disabled but we still do a record.
1057          * As the check is in the fast path of the tracers, it is more
1058          * important to be fast than accurate.
1059          */
1060         tr->buffer_disabled = 1;
1061         /* Make the flag seen by readers */
1062         smp_wmb();
1063 }
1064
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075         tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078
1079 void disable_trace_on_warning(void)
1080 {
1081         if (__disable_trace_on_warning)
1082                 tracing_off();
1083 }
1084
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093         if (tr->trace_buffer.buffer)
1094                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095         return !tr->buffer_disabled;
1096 }
1097
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103         return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107 static int __init set_buf_size(char *str)
1108 {
1109         unsigned long buf_size;
1110
1111         if (!str)
1112                 return 0;
1113         buf_size = memparse(str, &str);
1114         /* nr_entries can not be zero */
1115         if (buf_size == 0)
1116                 return 0;
1117         trace_buf_size = buf_size;
1118         return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124         unsigned long threshold;
1125         int ret;
1126
1127         if (!str)
1128                 return 0;
1129         ret = kstrtoul(str, 0, &threshold);
1130         if (ret < 0)
1131                 return 0;
1132         tracing_thresh = threshold * 1000;
1133         return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139         return nsecs / 1000;
1140 }
1141
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the enums were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153         TRACE_FLAGS
1154         NULL
1155 };
1156
1157 static struct {
1158         u64 (*func)(void);
1159         const char *name;
1160         int in_ns;              /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162         { trace_clock_local,            "local",        1 },
1163         { trace_clock_global,           "global",       1 },
1164         { trace_clock_counter,          "counter",      0 },
1165         { trace_clock_jiffies,          "uptime",       0 },
1166         { trace_clock,                  "perf",         1 },
1167         { ktime_get_mono_fast_ns,       "mono",         1 },
1168         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1169         { ktime_get_boot_fast_ns,       "boot",         1 },
1170         ARCH_TRACE_CLOCKS
1171 };
1172
1173 /*
1174  * trace_parser_get_init - gets the buffer for trace parser
1175  */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178         memset(parser, 0, sizeof(*parser));
1179
1180         parser->buffer = kmalloc(size, GFP_KERNEL);
1181         if (!parser->buffer)
1182                 return 1;
1183
1184         parser->size = size;
1185         return 0;
1186 }
1187
1188 /*
1189  * trace_parser_put - frees the buffer for trace parser
1190  */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193         kfree(parser->buffer);
1194         parser->buffer = NULL;
1195 }
1196
1197 /*
1198  * trace_get_user - reads the user input string separated by  space
1199  * (matched by isspace(ch))
1200  *
1201  * For each string found the 'struct trace_parser' is updated,
1202  * and the function returns.
1203  *
1204  * Returns number of bytes read.
1205  *
1206  * See kernel/trace/trace.h for 'struct trace_parser' details.
1207  */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209         size_t cnt, loff_t *ppos)
1210 {
1211         char ch;
1212         size_t read = 0;
1213         ssize_t ret;
1214
1215         if (!*ppos)
1216                 trace_parser_clear(parser);
1217
1218         ret = get_user(ch, ubuf++);
1219         if (ret)
1220                 goto out;
1221
1222         read++;
1223         cnt--;
1224
1225         /*
1226          * The parser is not finished with the last write,
1227          * continue reading the user input without skipping spaces.
1228          */
1229         if (!parser->cont) {
1230                 /* skip white space */
1231                 while (cnt && isspace(ch)) {
1232                         ret = get_user(ch, ubuf++);
1233                         if (ret)
1234                                 goto out;
1235                         read++;
1236                         cnt--;
1237                 }
1238
1239                 /* only spaces were written */
1240                 if (isspace(ch)) {
1241                         *ppos += read;
1242                         ret = read;
1243                         goto out;
1244                 }
1245
1246                 parser->idx = 0;
1247         }
1248
1249         /* read the non-space input */
1250         while (cnt && !isspace(ch)) {
1251                 if (parser->idx < parser->size - 1)
1252                         parser->buffer[parser->idx++] = ch;
1253                 else {
1254                         ret = -EINVAL;
1255                         goto out;
1256                 }
1257                 ret = get_user(ch, ubuf++);
1258                 if (ret)
1259                         goto out;
1260                 read++;
1261                 cnt--;
1262         }
1263
1264         /* We either got finished input or we have to wait for another call. */
1265         if (isspace(ch)) {
1266                 parser->buffer[parser->idx] = 0;
1267                 parser->cont = false;
1268         } else if (parser->idx < parser->size - 1) {
1269                 parser->cont = true;
1270                 parser->buffer[parser->idx++] = ch;
1271         } else {
1272                 ret = -EINVAL;
1273                 goto out;
1274         }
1275
1276         *ppos += read;
1277         ret = read;
1278
1279 out:
1280         return ret;
1281 }
1282
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286         int len;
1287
1288         if (trace_seq_used(s) <= s->seq.readpos)
1289                 return -EBUSY;
1290
1291         len = trace_seq_used(s) - s->seq.readpos;
1292         if (cnt > len)
1293                 cnt = len;
1294         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295
1296         s->seq.readpos += cnt;
1297         return cnt;
1298 }
1299
1300 unsigned long __read_mostly     tracing_thresh;
1301
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304  * Copy the new maximum trace into the separate maximum-trace
1305  * structure. (this way the maximum trace is permanently saved,
1306  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307  */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311         struct trace_buffer *trace_buf = &tr->trace_buffer;
1312         struct trace_buffer *max_buf = &tr->max_buffer;
1313         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315
1316         max_buf->cpu = cpu;
1317         max_buf->time_start = data->preempt_timestamp;
1318
1319         max_data->saved_latency = tr->max_latency;
1320         max_data->critical_start = data->critical_start;
1321         max_data->critical_end = data->critical_end;
1322
1323         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324         max_data->pid = tsk->pid;
1325         /*
1326          * If tsk == current, then use current_uid(), as that does not use
1327          * RCU. The irq tracer can be called out of RCU scope.
1328          */
1329         if (tsk == current)
1330                 max_data->uid = current_uid();
1331         else
1332                 max_data->uid = task_uid(tsk);
1333
1334         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335         max_data->policy = tsk->policy;
1336         max_data->rt_priority = tsk->rt_priority;
1337
1338         /* record this tasks comm */
1339         tracing_record_cmdline(tsk);
1340 }
1341
1342 /**
1343  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344  * @tr: tracer
1345  * @tsk: the task with the latency
1346  * @cpu: The cpu that initiated the trace.
1347  *
1348  * Flip the buffers between the @tr and the max_tr and record information
1349  * about which task was the cause of this latency.
1350  */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354         struct ring_buffer *buf;
1355
1356         if (tr->stop_count)
1357                 return;
1358
1359         WARN_ON_ONCE(!irqs_disabled());
1360
1361         if (!tr->allocated_snapshot) {
1362                 /* Only the nop tracer should hit this when disabling */
1363                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364                 return;
1365         }
1366
1367         arch_spin_lock(&tr->max_lock);
1368
1369         buf = tr->trace_buffer.buffer;
1370         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371         tr->max_buffer.buffer = buf;
1372
1373         __update_max_tr(tr, tsk, cpu);
1374         arch_spin_unlock(&tr->max_lock);
1375 }
1376
1377 /**
1378  * update_max_tr_single - only copy one trace over, and reset the rest
1379  * @tr - tracer
1380  * @tsk - task with the latency
1381  * @cpu - the cpu of the buffer to copy.
1382  *
1383  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384  */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388         int ret;
1389
1390         if (tr->stop_count)
1391                 return;
1392
1393         WARN_ON_ONCE(!irqs_disabled());
1394         if (!tr->allocated_snapshot) {
1395                 /* Only the nop tracer should hit this when disabling */
1396                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397                 return;
1398         }
1399
1400         arch_spin_lock(&tr->max_lock);
1401
1402         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403
1404         if (ret == -EBUSY) {
1405                 /*
1406                  * We failed to swap the buffer due to a commit taking
1407                  * place on this CPU. We fail to record, but we reset
1408                  * the max trace buffer (no one writes directly to it)
1409                  * and flag that it failed.
1410                  */
1411                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412                         "Failed to swap buffers due to commit in progress\n");
1413         }
1414
1415         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416
1417         __update_max_tr(tr, tsk, cpu);
1418         arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424         /* Iterators are static, they should be filled or empty */
1425         if (trace_buffer_iter(iter, iter->cpu_file))
1426                 return 0;
1427
1428         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429                                 full);
1430 }
1431
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434
1435 struct trace_selftests {
1436         struct list_head                list;
1437         struct tracer                   *type;
1438 };
1439
1440 static LIST_HEAD(postponed_selftests);
1441
1442 static int save_selftest(struct tracer *type)
1443 {
1444         struct trace_selftests *selftest;
1445
1446         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447         if (!selftest)
1448                 return -ENOMEM;
1449
1450         selftest->type = type;
1451         list_add(&selftest->list, &postponed_selftests);
1452         return 0;
1453 }
1454
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457         struct trace_array *tr = &global_trace;
1458         struct tracer *saved_tracer = tr->current_trace;
1459         int ret;
1460
1461         if (!type->selftest || tracing_selftest_disabled)
1462                 return 0;
1463
1464         /*
1465          * If a tracer registers early in boot up (before scheduling is
1466          * initialized and such), then do not run its selftests yet.
1467          * Instead, run it a little later in the boot process.
1468          */
1469         if (!selftests_can_run)
1470                 return save_selftest(type);
1471
1472         /*
1473          * Run a selftest on this tracer.
1474          * Here we reset the trace buffer, and set the current
1475          * tracer to be this tracer. The tracer can then run some
1476          * internal tracing to verify that everything is in order.
1477          * If we fail, we do not register this tracer.
1478          */
1479         tracing_reset_online_cpus(&tr->trace_buffer);
1480
1481         tr->current_trace = type;
1482
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484         if (type->use_max_tr) {
1485                 /* If we expanded the buffers, make sure the max is expanded too */
1486                 if (ring_buffer_expanded)
1487                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488                                            RING_BUFFER_ALL_CPUS);
1489                 tr->allocated_snapshot = true;
1490         }
1491 #endif
1492
1493         /* the test is responsible for initializing and enabling */
1494         pr_info("Testing tracer %s: ", type->name);
1495         ret = type->selftest(type, tr);
1496         /* the test is responsible for resetting too */
1497         tr->current_trace = saved_tracer;
1498         if (ret) {
1499                 printk(KERN_CONT "FAILED!\n");
1500                 /* Add the warning after printing 'FAILED' */
1501                 WARN_ON(1);
1502                 return -1;
1503         }
1504         /* Only reset on passing, to avoid touching corrupted buffers */
1505         tracing_reset_online_cpus(&tr->trace_buffer);
1506
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508         if (type->use_max_tr) {
1509                 tr->allocated_snapshot = false;
1510
1511                 /* Shrink the max buffer again */
1512                 if (ring_buffer_expanded)
1513                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1514                                            RING_BUFFER_ALL_CPUS);
1515         }
1516 #endif
1517
1518         printk(KERN_CONT "PASSED\n");
1519         return 0;
1520 }
1521
1522 static __init int init_trace_selftests(void)
1523 {
1524         struct trace_selftests *p, *n;
1525         struct tracer *t, **last;
1526         int ret;
1527
1528         selftests_can_run = true;
1529
1530         mutex_lock(&trace_types_lock);
1531
1532         if (list_empty(&postponed_selftests))
1533                 goto out;
1534
1535         pr_info("Running postponed tracer tests:\n");
1536
1537         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538                 ret = run_tracer_selftest(p->type);
1539                 /* If the test fails, then warn and remove from available_tracers */
1540                 if (ret < 0) {
1541                         WARN(1, "tracer: %s failed selftest, disabling\n",
1542                              p->type->name);
1543                         last = &trace_types;
1544                         for (t = trace_types; t; t = t->next) {
1545                                 if (t == p->type) {
1546                                         *last = t->next;
1547                                         break;
1548                                 }
1549                                 last = &t->next;
1550                         }
1551                 }
1552                 list_del(&p->list);
1553                 kfree(p);
1554         }
1555
1556  out:
1557         mutex_unlock(&trace_types_lock);
1558
1559         return 0;
1560 }
1561 early_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565         return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570
1571 static void __init apply_trace_boot_options(void);
1572
1573 /**
1574  * register_tracer - register a tracer with the ftrace system.
1575  * @type - the plugin for the tracer
1576  *
1577  * Register a new plugin tracer.
1578  */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581         struct tracer *t;
1582         int ret = 0;
1583
1584         if (!type->name) {
1585                 pr_info("Tracer must have a name\n");
1586                 return -1;
1587         }
1588
1589         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591                 return -1;
1592         }
1593
1594         mutex_lock(&trace_types_lock);
1595
1596         tracing_selftest_running = true;
1597
1598         for (t = trace_types; t; t = t->next) {
1599                 if (strcmp(type->name, t->name) == 0) {
1600                         /* already found */
1601                         pr_info("Tracer %s already registered\n",
1602                                 type->name);
1603                         ret = -1;
1604                         goto out;
1605                 }
1606         }
1607
1608         if (!type->set_flag)
1609                 type->set_flag = &dummy_set_flag;
1610         if (!type->flags) {
1611                 /*allocate a dummy tracer_flags*/
1612                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613                 if (!type->flags) {
1614                         ret = -ENOMEM;
1615                         goto out;
1616                 }
1617                 type->flags->val = 0;
1618                 type->flags->opts = dummy_tracer_opt;
1619         } else
1620                 if (!type->flags->opts)
1621                         type->flags->opts = dummy_tracer_opt;
1622
1623         /* store the tracer for __set_tracer_option */
1624         type->flags->trace = type;
1625
1626         ret = run_tracer_selftest(type);
1627         if (ret < 0)
1628                 goto out;
1629
1630         type->next = trace_types;
1631         trace_types = type;
1632         add_tracer_options(&global_trace, type);
1633
1634  out:
1635         tracing_selftest_running = false;
1636         mutex_unlock(&trace_types_lock);
1637
1638         if (ret || !default_bootup_tracer)
1639                 goto out_unlock;
1640
1641         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642                 goto out_unlock;
1643
1644         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645         /* Do we want this tracer to start on bootup? */
1646         tracing_set_tracer(&global_trace, type->name);
1647         default_bootup_tracer = NULL;
1648
1649         apply_trace_boot_options();
1650
1651         /* disable other selftests, since this will break it. */
1652         tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655                type->name);
1656 #endif
1657
1658  out_unlock:
1659         return ret;
1660 }
1661
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664         struct ring_buffer *buffer = buf->buffer;
1665
1666         if (!buffer)
1667                 return;
1668
1669         ring_buffer_record_disable(buffer);
1670
1671         /* Make sure all commits have finished */
1672         synchronize_sched();
1673         ring_buffer_reset_cpu(buffer, cpu);
1674
1675         ring_buffer_record_enable(buffer);
1676 }
1677
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680         struct ring_buffer *buffer = buf->buffer;
1681         int cpu;
1682
1683         if (!buffer)
1684                 return;
1685
1686         ring_buffer_record_disable(buffer);
1687
1688         /* Make sure all commits have finished */
1689         synchronize_sched();
1690
1691         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692
1693         for_each_online_cpu(cpu)
1694                 ring_buffer_reset_cpu(buffer, cpu);
1695
1696         ring_buffer_record_enable(buffer);
1697 }
1698
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702         struct trace_array *tr;
1703
1704         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705                 tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707                 tracing_reset_online_cpus(&tr->max_buffer);
1708 #endif
1709         }
1710 }
1711
1712 #define SAVED_CMDLINES_DEFAULT 128
1713 #define NO_CMDLINE_MAP UINT_MAX
1714 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1715 struct saved_cmdlines_buffer {
1716         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1717         unsigned *map_cmdline_to_pid;
1718         unsigned cmdline_num;
1719         int cmdline_idx;
1720         char *saved_cmdlines;
1721 };
1722 static struct saved_cmdlines_buffer *savedcmd;
1723
1724 /* temporary disable recording */
1725 static atomic_t trace_record_cmdline_disabled __read_mostly;
1726
1727 static inline char *get_saved_cmdlines(int idx)
1728 {
1729         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1730 }
1731
1732 static inline void set_cmdline(int idx, const char *cmdline)
1733 {
1734         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1735 }
1736
1737 static int allocate_cmdlines_buffer(unsigned int val,
1738                                     struct saved_cmdlines_buffer *s)
1739 {
1740         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1741                                         GFP_KERNEL);
1742         if (!s->map_cmdline_to_pid)
1743                 return -ENOMEM;
1744
1745         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1746         if (!s->saved_cmdlines) {
1747                 kfree(s->map_cmdline_to_pid);
1748                 return -ENOMEM;
1749         }
1750
1751         s->cmdline_idx = 0;
1752         s->cmdline_num = val;
1753         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1754                sizeof(s->map_pid_to_cmdline));
1755         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1756                val * sizeof(*s->map_cmdline_to_pid));
1757
1758         return 0;
1759 }
1760
1761 static int trace_create_savedcmd(void)
1762 {
1763         int ret;
1764
1765         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1766         if (!savedcmd)
1767                 return -ENOMEM;
1768
1769         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1770         if (ret < 0) {
1771                 kfree(savedcmd);
1772                 savedcmd = NULL;
1773                 return -ENOMEM;
1774         }
1775
1776         return 0;
1777 }
1778
1779 int is_tracing_stopped(void)
1780 {
1781         return global_trace.stop_count;
1782 }
1783
1784 /**
1785  * tracing_start - quick start of the tracer
1786  *
1787  * If tracing is enabled but was stopped by tracing_stop,
1788  * this will start the tracer back up.
1789  */
1790 void tracing_start(void)
1791 {
1792         struct ring_buffer *buffer;
1793         unsigned long flags;
1794
1795         if (tracing_disabled)
1796                 return;
1797
1798         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1799         if (--global_trace.stop_count) {
1800                 if (global_trace.stop_count < 0) {
1801                         /* Someone screwed up their debugging */
1802                         WARN_ON_ONCE(1);
1803                         global_trace.stop_count = 0;
1804                 }
1805                 goto out;
1806         }
1807
1808         /* Prevent the buffers from switching */
1809         arch_spin_lock(&global_trace.max_lock);
1810
1811         buffer = global_trace.trace_buffer.buffer;
1812         if (buffer)
1813                 ring_buffer_record_enable(buffer);
1814
1815 #ifdef CONFIG_TRACER_MAX_TRACE
1816         buffer = global_trace.max_buffer.buffer;
1817         if (buffer)
1818                 ring_buffer_record_enable(buffer);
1819 #endif
1820
1821         arch_spin_unlock(&global_trace.max_lock);
1822
1823  out:
1824         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1825 }
1826
1827 static void tracing_start_tr(struct trace_array *tr)
1828 {
1829         struct ring_buffer *buffer;
1830         unsigned long flags;
1831
1832         if (tracing_disabled)
1833                 return;
1834
1835         /* If global, we need to also start the max tracer */
1836         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1837                 return tracing_start();
1838
1839         raw_spin_lock_irqsave(&tr->start_lock, flags);
1840
1841         if (--tr->stop_count) {
1842                 if (tr->stop_count < 0) {
1843                         /* Someone screwed up their debugging */
1844                         WARN_ON_ONCE(1);
1845                         tr->stop_count = 0;
1846                 }
1847                 goto out;
1848         }
1849
1850         buffer = tr->trace_buffer.buffer;
1851         if (buffer)
1852                 ring_buffer_record_enable(buffer);
1853
1854  out:
1855         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1856 }
1857
1858 /**
1859  * tracing_stop - quick stop of the tracer
1860  *
1861  * Light weight way to stop tracing. Use in conjunction with
1862  * tracing_start.
1863  */
1864 void tracing_stop(void)
1865 {
1866         struct ring_buffer *buffer;
1867         unsigned long flags;
1868
1869         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1870         if (global_trace.stop_count++)
1871                 goto out;
1872
1873         /* Prevent the buffers from switching */
1874         arch_spin_lock(&global_trace.max_lock);
1875
1876         buffer = global_trace.trace_buffer.buffer;
1877         if (buffer)
1878                 ring_buffer_record_disable(buffer);
1879
1880 #ifdef CONFIG_TRACER_MAX_TRACE
1881         buffer = global_trace.max_buffer.buffer;
1882         if (buffer)
1883                 ring_buffer_record_disable(buffer);
1884 #endif
1885
1886         arch_spin_unlock(&global_trace.max_lock);
1887
1888  out:
1889         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1890 }
1891
1892 static void tracing_stop_tr(struct trace_array *tr)
1893 {
1894         struct ring_buffer *buffer;
1895         unsigned long flags;
1896
1897         /* If global, we need to also stop the max tracer */
1898         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1899                 return tracing_stop();
1900
1901         raw_spin_lock_irqsave(&tr->start_lock, flags);
1902         if (tr->stop_count++)
1903                 goto out;
1904
1905         buffer = tr->trace_buffer.buffer;
1906         if (buffer)
1907                 ring_buffer_record_disable(buffer);
1908
1909  out:
1910         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1911 }
1912
1913 void trace_stop_cmdline_recording(void);
1914
1915 static int trace_save_cmdline(struct task_struct *tsk)
1916 {
1917         unsigned pid, idx;
1918
1919         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1920                 return 0;
1921
1922         /*
1923          * It's not the end of the world if we don't get
1924          * the lock, but we also don't want to spin
1925          * nor do we want to disable interrupts,
1926          * so if we miss here, then better luck next time.
1927          */
1928         if (!arch_spin_trylock(&trace_cmdline_lock))
1929                 return 0;
1930
1931         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1932         if (idx == NO_CMDLINE_MAP) {
1933                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1934
1935                 /*
1936                  * Check whether the cmdline buffer at idx has a pid
1937                  * mapped. We are going to overwrite that entry so we
1938                  * need to clear the map_pid_to_cmdline. Otherwise we
1939                  * would read the new comm for the old pid.
1940                  */
1941                 pid = savedcmd->map_cmdline_to_pid[idx];
1942                 if (pid != NO_CMDLINE_MAP)
1943                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1944
1945                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1946                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1947
1948                 savedcmd->cmdline_idx = idx;
1949         }
1950
1951         set_cmdline(idx, tsk->comm);
1952
1953         arch_spin_unlock(&trace_cmdline_lock);
1954
1955         return 1;
1956 }
1957
1958 static void __trace_find_cmdline(int pid, char comm[])
1959 {
1960         unsigned map;
1961
1962         if (!pid) {
1963                 strcpy(comm, "<idle>");
1964                 return;
1965         }
1966
1967         if (WARN_ON_ONCE(pid < 0)) {
1968                 strcpy(comm, "<XXX>");
1969                 return;
1970         }
1971
1972         if (pid > PID_MAX_DEFAULT) {
1973                 strcpy(comm, "<...>");
1974                 return;
1975         }
1976
1977         map = savedcmd->map_pid_to_cmdline[pid];
1978         if (map != NO_CMDLINE_MAP)
1979                 strcpy(comm, get_saved_cmdlines(map));
1980         else
1981                 strcpy(comm, "<...>");
1982 }
1983
1984 void trace_find_cmdline(int pid, char comm[])
1985 {
1986         preempt_disable();
1987         arch_spin_lock(&trace_cmdline_lock);
1988
1989         __trace_find_cmdline(pid, comm);
1990
1991         arch_spin_unlock(&trace_cmdline_lock);
1992         preempt_enable();
1993 }
1994
1995 void tracing_record_cmdline(struct task_struct *tsk)
1996 {
1997         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1998                 return;
1999
2000         if (!__this_cpu_read(trace_cmdline_save))
2001                 return;
2002
2003         if (trace_save_cmdline(tsk))
2004                 __this_cpu_write(trace_cmdline_save, false);
2005 }
2006
2007 /*
2008  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2009  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2010  * simplifies those functions and keeps them in sync.
2011  */
2012 enum print_line_t trace_handle_return(struct trace_seq *s)
2013 {
2014         return trace_seq_has_overflowed(s) ?
2015                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2016 }
2017 EXPORT_SYMBOL_GPL(trace_handle_return);
2018
2019 void
2020 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2021                              int pc)
2022 {
2023         struct task_struct *tsk = current;
2024
2025         entry->preempt_count            = pc & 0xff;
2026         entry->pid                      = (tsk) ? tsk->pid : 0;
2027         entry->flags =
2028 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2029                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2030 #else
2031                 TRACE_FLAG_IRQS_NOSUPPORT |
2032 #endif
2033                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2034                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2035                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2036                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2037                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2038 }
2039 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2040
2041 struct ring_buffer_event *
2042 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2043                           int type,
2044                           unsigned long len,
2045                           unsigned long flags, int pc)
2046 {
2047         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2048 }
2049
2050 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2051 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2052 static int trace_buffered_event_ref;
2053
2054 /**
2055  * trace_buffered_event_enable - enable buffering events
2056  *
2057  * When events are being filtered, it is quicker to use a temporary
2058  * buffer to write the event data into if there's a likely chance
2059  * that it will not be committed. The discard of the ring buffer
2060  * is not as fast as committing, and is much slower than copying
2061  * a commit.
2062  *
2063  * When an event is to be filtered, allocate per cpu buffers to
2064  * write the event data into, and if the event is filtered and discarded
2065  * it is simply dropped, otherwise, the entire data is to be committed
2066  * in one shot.
2067  */
2068 void trace_buffered_event_enable(void)
2069 {
2070         struct ring_buffer_event *event;
2071         struct page *page;
2072         int cpu;
2073
2074         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2075
2076         if (trace_buffered_event_ref++)
2077                 return;
2078
2079         for_each_tracing_cpu(cpu) {
2080                 page = alloc_pages_node(cpu_to_node(cpu),
2081                                         GFP_KERNEL | __GFP_NORETRY, 0);
2082                 if (!page)
2083                         goto failed;
2084
2085                 event = page_address(page);
2086                 memset(event, 0, sizeof(*event));
2087
2088                 per_cpu(trace_buffered_event, cpu) = event;
2089
2090                 preempt_disable();
2091                 if (cpu == smp_processor_id() &&
2092                     this_cpu_read(trace_buffered_event) !=
2093                     per_cpu(trace_buffered_event, cpu))
2094                         WARN_ON_ONCE(1);
2095                 preempt_enable();
2096         }
2097
2098         return;
2099  failed:
2100         trace_buffered_event_disable();
2101 }
2102
2103 static void enable_trace_buffered_event(void *data)
2104 {
2105         /* Probably not needed, but do it anyway */
2106         smp_rmb();
2107         this_cpu_dec(trace_buffered_event_cnt);
2108 }
2109
2110 static void disable_trace_buffered_event(void *data)
2111 {
2112         this_cpu_inc(trace_buffered_event_cnt);
2113 }
2114
2115 /**
2116  * trace_buffered_event_disable - disable buffering events
2117  *
2118  * When a filter is removed, it is faster to not use the buffered
2119  * events, and to commit directly into the ring buffer. Free up
2120  * the temp buffers when there are no more users. This requires
2121  * special synchronization with current events.
2122  */
2123 void trace_buffered_event_disable(void)
2124 {
2125         int cpu;
2126
2127         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2128
2129         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2130                 return;
2131
2132         if (--trace_buffered_event_ref)
2133                 return;
2134
2135         preempt_disable();
2136         /* For each CPU, set the buffer as used. */
2137         smp_call_function_many(tracing_buffer_mask,
2138                                disable_trace_buffered_event, NULL, 1);
2139         preempt_enable();
2140
2141         /* Wait for all current users to finish */
2142         synchronize_sched();
2143
2144         for_each_tracing_cpu(cpu) {
2145                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2146                 per_cpu(trace_buffered_event, cpu) = NULL;
2147         }
2148         /*
2149          * Make sure trace_buffered_event is NULL before clearing
2150          * trace_buffered_event_cnt.
2151          */
2152         smp_wmb();
2153
2154         preempt_disable();
2155         /* Do the work on each cpu */
2156         smp_call_function_many(tracing_buffer_mask,
2157                                enable_trace_buffered_event, NULL, 1);
2158         preempt_enable();
2159 }
2160
2161 static struct ring_buffer *temp_buffer;
2162
2163 struct ring_buffer_event *
2164 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2165                           struct trace_event_file *trace_file,
2166                           int type, unsigned long len,
2167                           unsigned long flags, int pc)
2168 {
2169         struct ring_buffer_event *entry;
2170         int val;
2171
2172         *current_rb = trace_file->tr->trace_buffer.buffer;
2173
2174         if ((trace_file->flags &
2175              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2176             (entry = this_cpu_read(trace_buffered_event))) {
2177                 /* Try to use the per cpu buffer first */
2178                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2179                 if (val == 1) {
2180                         trace_event_setup(entry, type, flags, pc);
2181                         entry->array[0] = len;
2182                         return entry;
2183                 }
2184                 this_cpu_dec(trace_buffered_event_cnt);
2185         }
2186
2187         entry = __trace_buffer_lock_reserve(*current_rb,
2188                                             type, len, flags, pc);
2189         /*
2190          * If tracing is off, but we have triggers enabled
2191          * we still need to look at the event data. Use the temp_buffer
2192          * to store the trace event for the tigger to use. It's recusive
2193          * safe and will not be recorded anywhere.
2194          */
2195         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2196                 *current_rb = temp_buffer;
2197                 entry = __trace_buffer_lock_reserve(*current_rb,
2198                                                     type, len, flags, pc);
2199         }
2200         return entry;
2201 }
2202 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2203
2204 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2205 static DEFINE_MUTEX(tracepoint_printk_mutex);
2206
2207 static void output_printk(struct trace_event_buffer *fbuffer)
2208 {
2209         struct trace_event_call *event_call;
2210         struct trace_event *event;
2211         unsigned long flags;
2212         struct trace_iterator *iter = tracepoint_print_iter;
2213
2214         /* We should never get here if iter is NULL */
2215         if (WARN_ON_ONCE(!iter))
2216                 return;
2217
2218         event_call = fbuffer->trace_file->event_call;
2219         if (!event_call || !event_call->event.funcs ||
2220             !event_call->event.funcs->trace)
2221                 return;
2222
2223         event = &fbuffer->trace_file->event_call->event;
2224
2225         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2226         trace_seq_init(&iter->seq);
2227         iter->ent = fbuffer->entry;
2228         event_call->event.funcs->trace(iter, 0, event);
2229         trace_seq_putc(&iter->seq, 0);
2230         printk("%s", iter->seq.buffer);
2231
2232         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2233 }
2234
2235 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2236                              void __user *buffer, size_t *lenp,
2237                              loff_t *ppos)
2238 {
2239         int save_tracepoint_printk;
2240         int ret;
2241
2242         mutex_lock(&tracepoint_printk_mutex);
2243         save_tracepoint_printk = tracepoint_printk;
2244
2245         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2246
2247         /*
2248          * This will force exiting early, as tracepoint_printk
2249          * is always zero when tracepoint_printk_iter is not allocated
2250          */
2251         if (!tracepoint_print_iter)
2252                 tracepoint_printk = 0;
2253
2254         if (save_tracepoint_printk == tracepoint_printk)
2255                 goto out;
2256
2257         if (tracepoint_printk)
2258                 static_key_enable(&tracepoint_printk_key.key);
2259         else
2260                 static_key_disable(&tracepoint_printk_key.key);
2261
2262  out:
2263         mutex_unlock(&tracepoint_printk_mutex);
2264
2265         return ret;
2266 }
2267
2268 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2269 {
2270         if (static_key_false(&tracepoint_printk_key.key))
2271                 output_printk(fbuffer);
2272
2273         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2274                                     fbuffer->event, fbuffer->entry,
2275                                     fbuffer->flags, fbuffer->pc);
2276 }
2277 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2278
2279 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2280                                      struct ring_buffer *buffer,
2281                                      struct ring_buffer_event *event,
2282                                      unsigned long flags, int pc,
2283                                      struct pt_regs *regs)
2284 {
2285         __buffer_unlock_commit(buffer, event);
2286
2287         /*
2288          * If regs is not set, then skip the following callers:
2289          *   trace_buffer_unlock_commit_regs
2290          *   event_trigger_unlock_commit
2291          *   trace_event_buffer_commit
2292          *   trace_event_raw_event_sched_switch
2293          * Note, we can still get here via blktrace, wakeup tracer
2294          * and mmiotrace, but that's ok if they lose a function or
2295          * two. They are that meaningful.
2296          */
2297         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2298         ftrace_trace_userstack(buffer, flags, pc);
2299 }
2300
2301 /*
2302  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2303  */
2304 void
2305 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2306                                    struct ring_buffer_event *event)
2307 {
2308         __buffer_unlock_commit(buffer, event);
2309 }
2310
2311 static void
2312 trace_process_export(struct trace_export *export,
2313                struct ring_buffer_event *event)
2314 {
2315         struct trace_entry *entry;
2316         unsigned int size = 0;
2317
2318         entry = ring_buffer_event_data(event);
2319         size = ring_buffer_event_length(event);
2320         export->write(entry, size);
2321 }
2322
2323 static DEFINE_MUTEX(ftrace_export_lock);
2324
2325 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2326
2327 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2328
2329 static inline void ftrace_exports_enable(void)
2330 {
2331         static_branch_enable(&ftrace_exports_enabled);
2332 }
2333
2334 static inline void ftrace_exports_disable(void)
2335 {
2336         static_branch_disable(&ftrace_exports_enabled);
2337 }
2338
2339 void ftrace_exports(struct ring_buffer_event *event)
2340 {
2341         struct trace_export *export;
2342
2343         preempt_disable_notrace();
2344
2345         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2346         while (export) {
2347                 trace_process_export(export, event);
2348                 export = rcu_dereference_raw_notrace(export->next);
2349         }
2350
2351         preempt_enable_notrace();
2352 }
2353
2354 static inline void
2355 add_trace_export(struct trace_export **list, struct trace_export *export)
2356 {
2357         rcu_assign_pointer(export->next, *list);
2358         /*
2359          * We are entering export into the list but another
2360          * CPU might be walking that list. We need to make sure
2361          * the export->next pointer is valid before another CPU sees
2362          * the export pointer included into the list.
2363          */
2364         rcu_assign_pointer(*list, export);
2365 }
2366
2367 static inline int
2368 rm_trace_export(struct trace_export **list, struct trace_export *export)
2369 {
2370         struct trace_export **p;
2371
2372         for (p = list; *p != NULL; p = &(*p)->next)
2373                 if (*p == export)
2374                         break;
2375
2376         if (*p != export)
2377                 return -1;
2378
2379         rcu_assign_pointer(*p, (*p)->next);
2380
2381         return 0;
2382 }
2383
2384 static inline void
2385 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2386 {
2387         if (*list == NULL)
2388                 ftrace_exports_enable();
2389
2390         add_trace_export(list, export);
2391 }
2392
2393 static inline int
2394 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2395 {
2396         int ret;
2397
2398         ret = rm_trace_export(list, export);
2399         if (*list == NULL)
2400                 ftrace_exports_disable();
2401
2402         return ret;
2403 }
2404
2405 int register_ftrace_export(struct trace_export *export)
2406 {
2407         if (WARN_ON_ONCE(!export->write))
2408                 return -1;
2409
2410         mutex_lock(&ftrace_export_lock);
2411
2412         add_ftrace_export(&ftrace_exports_list, export);
2413
2414         mutex_unlock(&ftrace_export_lock);
2415
2416         return 0;
2417 }
2418 EXPORT_SYMBOL_GPL(register_ftrace_export);
2419
2420 int unregister_ftrace_export(struct trace_export *export)
2421 {
2422         int ret;
2423
2424         mutex_lock(&ftrace_export_lock);
2425
2426         ret = rm_ftrace_export(&ftrace_exports_list, export);
2427
2428         mutex_unlock(&ftrace_export_lock);
2429
2430         return ret;
2431 }
2432 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2433
2434 void
2435 trace_function(struct trace_array *tr,
2436                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2437                int pc)
2438 {
2439         struct trace_event_call *call = &event_function;
2440         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2441         struct ring_buffer_event *event;
2442         struct ftrace_entry *entry;
2443
2444         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2445                                             flags, pc);
2446         if (!event)
2447                 return;
2448         entry   = ring_buffer_event_data(event);
2449         entry->ip                       = ip;
2450         entry->parent_ip                = parent_ip;
2451
2452         if (!call_filter_check_discard(call, entry, buffer, event)) {
2453                 if (static_branch_unlikely(&ftrace_exports_enabled))
2454                         ftrace_exports(event);
2455                 __buffer_unlock_commit(buffer, event);
2456         }
2457 }
2458
2459 #ifdef CONFIG_STACKTRACE
2460
2461 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2462 struct ftrace_stack {
2463         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2464 };
2465
2466 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2467 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2468
2469 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2470                                  unsigned long flags,
2471                                  int skip, int pc, struct pt_regs *regs)
2472 {
2473         struct trace_event_call *call = &event_kernel_stack;
2474         struct ring_buffer_event *event;
2475         struct stack_entry *entry;
2476         struct stack_trace trace;
2477         int use_stack;
2478         int size = FTRACE_STACK_ENTRIES;
2479
2480         trace.nr_entries        = 0;
2481         trace.skip              = skip;
2482
2483         /*
2484          * Add two, for this function and the call to save_stack_trace()
2485          * If regs is set, then these functions will not be in the way.
2486          */
2487         if (!regs)
2488                 trace.skip += 2;
2489
2490         /*
2491          * Since events can happen in NMIs there's no safe way to
2492          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2493          * or NMI comes in, it will just have to use the default
2494          * FTRACE_STACK_SIZE.
2495          */
2496         preempt_disable_notrace();
2497
2498         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2499         /*
2500          * We don't need any atomic variables, just a barrier.
2501          * If an interrupt comes in, we don't care, because it would
2502          * have exited and put the counter back to what we want.
2503          * We just need a barrier to keep gcc from moving things
2504          * around.
2505          */
2506         barrier();
2507         if (use_stack == 1) {
2508                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2509                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2510
2511                 if (regs)
2512                         save_stack_trace_regs(regs, &trace);
2513                 else
2514                         save_stack_trace(&trace);
2515
2516                 if (trace.nr_entries > size)
2517                         size = trace.nr_entries;
2518         } else
2519                 /* From now on, use_stack is a boolean */
2520                 use_stack = 0;
2521
2522         size *= sizeof(unsigned long);
2523
2524         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2525                                             sizeof(*entry) + size, flags, pc);
2526         if (!event)
2527                 goto out;
2528         entry = ring_buffer_event_data(event);
2529
2530         memset(&entry->caller, 0, size);
2531
2532         if (use_stack)
2533                 memcpy(&entry->caller, trace.entries,
2534                        trace.nr_entries * sizeof(unsigned long));
2535         else {
2536                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2537                 trace.entries           = entry->caller;
2538                 if (regs)
2539                         save_stack_trace_regs(regs, &trace);
2540                 else
2541                         save_stack_trace(&trace);
2542         }
2543
2544         entry->size = trace.nr_entries;
2545
2546         if (!call_filter_check_discard(call, entry, buffer, event))
2547                 __buffer_unlock_commit(buffer, event);
2548
2549  out:
2550         /* Again, don't let gcc optimize things here */
2551         barrier();
2552         __this_cpu_dec(ftrace_stack_reserve);
2553         preempt_enable_notrace();
2554
2555 }
2556
2557 static inline void ftrace_trace_stack(struct trace_array *tr,
2558                                       struct ring_buffer *buffer,
2559                                       unsigned long flags,
2560                                       int skip, int pc, struct pt_regs *regs)
2561 {
2562         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2563                 return;
2564
2565         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2566 }
2567
2568 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2569                    int pc)
2570 {
2571         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2572 }
2573
2574 /**
2575  * trace_dump_stack - record a stack back trace in the trace buffer
2576  * @skip: Number of functions to skip (helper handlers)
2577  */
2578 void trace_dump_stack(int skip)
2579 {
2580         unsigned long flags;
2581
2582         if (tracing_disabled || tracing_selftest_running)
2583                 return;
2584
2585         local_save_flags(flags);
2586
2587         /*
2588          * Skip 3 more, seems to get us at the caller of
2589          * this function.
2590          */
2591         skip += 3;
2592         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2593                              flags, skip, preempt_count(), NULL);
2594 }
2595
2596 static DEFINE_PER_CPU(int, user_stack_count);
2597
2598 void
2599 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2600 {
2601         struct trace_event_call *call = &event_user_stack;
2602         struct ring_buffer_event *event;
2603         struct userstack_entry *entry;
2604         struct stack_trace trace;
2605
2606         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2607                 return;
2608
2609         /*
2610          * NMIs can not handle page faults, even with fix ups.
2611          * The save user stack can (and often does) fault.
2612          */
2613         if (unlikely(in_nmi()))
2614                 return;
2615
2616         /*
2617          * prevent recursion, since the user stack tracing may
2618          * trigger other kernel events.
2619          */
2620         preempt_disable();
2621         if (__this_cpu_read(user_stack_count))
2622                 goto out;
2623
2624         __this_cpu_inc(user_stack_count);
2625
2626         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2627                                             sizeof(*entry), flags, pc);
2628         if (!event)
2629                 goto out_drop_count;
2630         entry   = ring_buffer_event_data(event);
2631
2632         entry->tgid             = current->tgid;
2633         memset(&entry->caller, 0, sizeof(entry->caller));
2634
2635         trace.nr_entries        = 0;
2636         trace.max_entries       = FTRACE_STACK_ENTRIES;
2637         trace.skip              = 0;
2638         trace.entries           = entry->caller;
2639
2640         save_stack_trace_user(&trace);
2641         if (!call_filter_check_discard(call, entry, buffer, event))
2642                 __buffer_unlock_commit(buffer, event);
2643
2644  out_drop_count:
2645         __this_cpu_dec(user_stack_count);
2646  out:
2647         preempt_enable();
2648 }
2649
2650 #ifdef UNUSED
2651 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2652 {
2653         ftrace_trace_userstack(tr, flags, preempt_count());
2654 }
2655 #endif /* UNUSED */
2656
2657 #endif /* CONFIG_STACKTRACE */
2658
2659 /* created for use with alloc_percpu */
2660 struct trace_buffer_struct {
2661         int nesting;
2662         char buffer[4][TRACE_BUF_SIZE];
2663 };
2664
2665 static struct trace_buffer_struct *trace_percpu_buffer;
2666
2667 /*
2668  * Thise allows for lockless recording.  If we're nested too deeply, then
2669  * this returns NULL.
2670  */
2671 static char *get_trace_buf(void)
2672 {
2673         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2674
2675         if (!buffer || buffer->nesting >= 4)
2676                 return NULL;
2677
2678         return &buffer->buffer[buffer->nesting++][0];
2679 }
2680
2681 static void put_trace_buf(void)
2682 {
2683         this_cpu_dec(trace_percpu_buffer->nesting);
2684 }
2685
2686 static int alloc_percpu_trace_buffer(void)
2687 {
2688         struct trace_buffer_struct *buffers;
2689
2690         buffers = alloc_percpu(struct trace_buffer_struct);
2691         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2692                 return -ENOMEM;
2693
2694         trace_percpu_buffer = buffers;
2695         return 0;
2696 }
2697
2698 static int buffers_allocated;
2699
2700 void trace_printk_init_buffers(void)
2701 {
2702         if (buffers_allocated)
2703                 return;
2704
2705         if (alloc_percpu_trace_buffer())
2706                 return;
2707
2708         /* trace_printk() is for debug use only. Don't use it in production. */
2709
2710         pr_warn("\n");
2711         pr_warn("**********************************************************\n");
2712         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2713         pr_warn("**                                                      **\n");
2714         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2715         pr_warn("**                                                      **\n");
2716         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2717         pr_warn("** unsafe for production use.                           **\n");
2718         pr_warn("**                                                      **\n");
2719         pr_warn("** If you see this message and you are not debugging    **\n");
2720         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2721         pr_warn("**                                                      **\n");
2722         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2723         pr_warn("**********************************************************\n");
2724
2725         /* Expand the buffers to set size */
2726         tracing_update_buffers();
2727
2728         buffers_allocated = 1;
2729
2730         /*
2731          * trace_printk_init_buffers() can be called by modules.
2732          * If that happens, then we need to start cmdline recording
2733          * directly here. If the global_trace.buffer is already
2734          * allocated here, then this was called by module code.
2735          */
2736         if (global_trace.trace_buffer.buffer)
2737                 tracing_start_cmdline_record();
2738 }
2739
2740 void trace_printk_start_comm(void)
2741 {
2742         /* Start tracing comms if trace printk is set */
2743         if (!buffers_allocated)
2744                 return;
2745         tracing_start_cmdline_record();
2746 }
2747
2748 static void trace_printk_start_stop_comm(int enabled)
2749 {
2750         if (!buffers_allocated)
2751                 return;
2752
2753         if (enabled)
2754                 tracing_start_cmdline_record();
2755         else
2756                 tracing_stop_cmdline_record();
2757 }
2758
2759 /**
2760  * trace_vbprintk - write binary msg to tracing buffer
2761  *
2762  */
2763 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2764 {
2765         struct trace_event_call *call = &event_bprint;
2766         struct ring_buffer_event *event;
2767         struct ring_buffer *buffer;
2768         struct trace_array *tr = &global_trace;
2769         struct bprint_entry *entry;
2770         unsigned long flags;
2771         char *tbuffer;
2772         int len = 0, size, pc;
2773
2774         if (unlikely(tracing_selftest_running || tracing_disabled))
2775                 return 0;
2776
2777         /* Don't pollute graph traces with trace_vprintk internals */
2778         pause_graph_tracing();
2779
2780         pc = preempt_count();
2781         preempt_disable_notrace();
2782
2783         tbuffer = get_trace_buf();
2784         if (!tbuffer) {
2785                 len = 0;
2786                 goto out_nobuffer;
2787         }
2788
2789         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2790
2791         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2792                 goto out;
2793
2794         local_save_flags(flags);
2795         size = sizeof(*entry) + sizeof(u32) * len;
2796         buffer = tr->trace_buffer.buffer;
2797         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2798                                             flags, pc);
2799         if (!event)
2800                 goto out;
2801         entry = ring_buffer_event_data(event);
2802         entry->ip                       = ip;
2803         entry->fmt                      = fmt;
2804
2805         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2806         if (!call_filter_check_discard(call, entry, buffer, event)) {
2807                 __buffer_unlock_commit(buffer, event);
2808                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2809         }
2810
2811 out:
2812         put_trace_buf();
2813
2814 out_nobuffer:
2815         preempt_enable_notrace();
2816         unpause_graph_tracing();
2817
2818         return len;
2819 }
2820 EXPORT_SYMBOL_GPL(trace_vbprintk);
2821
2822 static int
2823 __trace_array_vprintk(struct ring_buffer *buffer,
2824                       unsigned long ip, const char *fmt, va_list args)
2825 {
2826         struct trace_event_call *call = &event_print;
2827         struct ring_buffer_event *event;
2828         int len = 0, size, pc;
2829         struct print_entry *entry;
2830         unsigned long flags;
2831         char *tbuffer;
2832
2833         if (tracing_disabled || tracing_selftest_running)
2834                 return 0;
2835
2836         /* Don't pollute graph traces with trace_vprintk internals */
2837         pause_graph_tracing();
2838
2839         pc = preempt_count();
2840         preempt_disable_notrace();
2841
2842
2843         tbuffer = get_trace_buf();
2844         if (!tbuffer) {
2845                 len = 0;
2846                 goto out_nobuffer;
2847         }
2848
2849         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2850
2851         local_save_flags(flags);
2852         size = sizeof(*entry) + len + 1;
2853         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2854                                             flags, pc);
2855         if (!event)
2856                 goto out;
2857         entry = ring_buffer_event_data(event);
2858         entry->ip = ip;
2859
2860         memcpy(&entry->buf, tbuffer, len + 1);
2861         if (!call_filter_check_discard(call, entry, buffer, event)) {
2862                 __buffer_unlock_commit(buffer, event);
2863                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2864         }
2865
2866 out:
2867         put_trace_buf();
2868
2869 out_nobuffer:
2870         preempt_enable_notrace();
2871         unpause_graph_tracing();
2872
2873         return len;
2874 }
2875
2876 int trace_array_vprintk(struct trace_array *tr,
2877                         unsigned long ip, const char *fmt, va_list args)
2878 {
2879         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2880 }
2881
2882 int trace_array_printk(struct trace_array *tr,
2883                        unsigned long ip, const char *fmt, ...)
2884 {
2885         int ret;
2886         va_list ap;
2887
2888         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2889                 return 0;
2890
2891         va_start(ap, fmt);
2892         ret = trace_array_vprintk(tr, ip, fmt, ap);
2893         va_end(ap);
2894         return ret;
2895 }
2896
2897 int trace_array_printk_buf(struct ring_buffer *buffer,
2898                            unsigned long ip, const char *fmt, ...)
2899 {
2900         int ret;
2901         va_list ap;
2902
2903         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2904                 return 0;
2905
2906         va_start(ap, fmt);
2907         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2908         va_end(ap);
2909         return ret;
2910 }
2911
2912 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2913 {
2914         return trace_array_vprintk(&global_trace, ip, fmt, args);
2915 }
2916 EXPORT_SYMBOL_GPL(trace_vprintk);
2917
2918 static void trace_iterator_increment(struct trace_iterator *iter)
2919 {
2920         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2921
2922         iter->idx++;
2923         if (buf_iter)
2924                 ring_buffer_read(buf_iter, NULL);
2925 }
2926
2927 static struct trace_entry *
2928 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2929                 unsigned long *lost_events)
2930 {
2931         struct ring_buffer_event *event;
2932         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2933
2934         if (buf_iter)
2935                 event = ring_buffer_iter_peek(buf_iter, ts);
2936         else
2937                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2938                                          lost_events);
2939
2940         if (event) {
2941                 iter->ent_size = ring_buffer_event_length(event);
2942                 return ring_buffer_event_data(event);
2943         }
2944         iter->ent_size = 0;
2945         return NULL;
2946 }
2947
2948 static struct trace_entry *
2949 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2950                   unsigned long *missing_events, u64 *ent_ts)
2951 {
2952         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2953         struct trace_entry *ent, *next = NULL;
2954         unsigned long lost_events = 0, next_lost = 0;
2955         int cpu_file = iter->cpu_file;
2956         u64 next_ts = 0, ts;
2957         int next_cpu = -1;
2958         int next_size = 0;
2959         int cpu;
2960
2961         /*
2962          * If we are in a per_cpu trace file, don't bother by iterating over
2963          * all cpu and peek directly.
2964          */
2965         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2966                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2967                         return NULL;
2968                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2969                 if (ent_cpu)
2970                         *ent_cpu = cpu_file;
2971
2972                 return ent;
2973         }
2974
2975         for_each_tracing_cpu(cpu) {
2976
2977                 if (ring_buffer_empty_cpu(buffer, cpu))
2978                         continue;
2979
2980                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2981
2982                 /*
2983                  * Pick the entry with the smallest timestamp:
2984                  */
2985                 if (ent && (!next || ts < next_ts)) {
2986                         next = ent;
2987                         next_cpu = cpu;
2988                         next_ts = ts;
2989                         next_lost = lost_events;
2990                         next_size = iter->ent_size;
2991                 }
2992         }
2993
2994         iter->ent_size = next_size;
2995
2996         if (ent_cpu)
2997                 *ent_cpu = next_cpu;
2998
2999         if (ent_ts)
3000                 *ent_ts = next_ts;
3001
3002         if (missing_events)
3003                 *missing_events = next_lost;
3004
3005         return next;
3006 }
3007
3008 /* Find the next real entry, without updating the iterator itself */
3009 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3010                                           int *ent_cpu, u64 *ent_ts)
3011 {
3012         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3013 }
3014
3015 /* Find the next real entry, and increment the iterator to the next entry */
3016 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3017 {
3018         iter->ent = __find_next_entry(iter, &iter->cpu,
3019                                       &iter->lost_events, &iter->ts);
3020
3021         if (iter->ent)
3022                 trace_iterator_increment(iter);
3023
3024         return iter->ent ? iter : NULL;
3025 }
3026
3027 static void trace_consume(struct trace_iterator *iter)
3028 {
3029         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3030                             &iter->lost_events);
3031 }
3032
3033 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3034 {
3035         struct trace_iterator *iter = m->private;
3036         int i = (int)*pos;
3037         void *ent;
3038
3039         WARN_ON_ONCE(iter->leftover);
3040
3041         (*pos)++;
3042
3043         /* can't go backwards */
3044         if (iter->idx > i)
3045                 return NULL;
3046
3047         if (iter->idx < 0)
3048                 ent = trace_find_next_entry_inc(iter);
3049         else
3050                 ent = iter;
3051
3052         while (ent && iter->idx < i)
3053                 ent = trace_find_next_entry_inc(iter);
3054
3055         iter->pos = *pos;
3056
3057         return ent;
3058 }
3059
3060 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3061 {
3062         struct ring_buffer_event *event;
3063         struct ring_buffer_iter *buf_iter;
3064         unsigned long entries = 0;
3065         u64 ts;
3066
3067         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3068
3069         buf_iter = trace_buffer_iter(iter, cpu);
3070         if (!buf_iter)
3071                 return;
3072
3073         ring_buffer_iter_reset(buf_iter);
3074
3075         /*
3076          * We could have the case with the max latency tracers
3077          * that a reset never took place on a cpu. This is evident
3078          * by the timestamp being before the start of the buffer.
3079          */
3080         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3081                 if (ts >= iter->trace_buffer->time_start)
3082                         break;
3083                 entries++;
3084                 ring_buffer_read(buf_iter, NULL);
3085         }
3086
3087         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3088 }
3089
3090 /*
3091  * The current tracer is copied to avoid a global locking
3092  * all around.
3093  */
3094 static void *s_start(struct seq_file *m, loff_t *pos)
3095 {
3096         struct trace_iterator *iter = m->private;
3097         struct trace_array *tr = iter->tr;
3098         int cpu_file = iter->cpu_file;
3099         void *p = NULL;
3100         loff_t l = 0;
3101         int cpu;
3102
3103         /*
3104          * copy the tracer to avoid using a global lock all around.
3105          * iter->trace is a copy of current_trace, the pointer to the
3106          * name may be used instead of a strcmp(), as iter->trace->name
3107          * will point to the same string as current_trace->name.
3108          */
3109         mutex_lock(&trace_types_lock);
3110         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3111                 *iter->trace = *tr->current_trace;
3112         mutex_unlock(&trace_types_lock);
3113
3114 #ifdef CONFIG_TRACER_MAX_TRACE
3115         if (iter->snapshot && iter->trace->use_max_tr)
3116                 return ERR_PTR(-EBUSY);
3117 #endif
3118
3119         if (!iter->snapshot)
3120                 atomic_inc(&trace_record_cmdline_disabled);
3121
3122         if (*pos != iter->pos) {
3123                 iter->ent = NULL;
3124                 iter->cpu = 0;
3125                 iter->idx = -1;
3126
3127                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3128                         for_each_tracing_cpu(cpu)
3129                                 tracing_iter_reset(iter, cpu);
3130                 } else
3131                         tracing_iter_reset(iter, cpu_file);
3132
3133                 iter->leftover = 0;
3134                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3135                         ;
3136
3137         } else {
3138                 /*
3139                  * If we overflowed the seq_file before, then we want
3140                  * to just reuse the trace_seq buffer again.
3141                  */
3142                 if (iter->leftover)
3143                         p = iter;
3144                 else {
3145                         l = *pos - 1;
3146                         p = s_next(m, p, &l);
3147                 }
3148         }
3149
3150         trace_event_read_lock();
3151         trace_access_lock(cpu_file);
3152         return p;
3153 }
3154
3155 static void s_stop(struct seq_file *m, void *p)
3156 {
3157         struct trace_iterator *iter = m->private;
3158
3159 #ifdef CONFIG_TRACER_MAX_TRACE
3160         if (iter->snapshot && iter->trace->use_max_tr)
3161                 return;
3162 #endif
3163
3164         if (!iter->snapshot)
3165                 atomic_dec(&trace_record_cmdline_disabled);
3166
3167         trace_access_unlock(iter->cpu_file);
3168         trace_event_read_unlock();
3169 }
3170
3171 static void
3172 get_total_entries(struct trace_buffer *buf,
3173                   unsigned long *total, unsigned long *entries)
3174 {
3175         unsigned long count;
3176         int cpu;
3177
3178         *total = 0;
3179         *entries = 0;
3180
3181         for_each_tracing_cpu(cpu) {
3182                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3183                 /*
3184                  * If this buffer has skipped entries, then we hold all
3185                  * entries for the trace and we need to ignore the
3186                  * ones before the time stamp.
3187                  */
3188                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3189                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3190                         /* total is the same as the entries */
3191                         *total += count;
3192                 } else
3193                         *total += count +
3194                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3195                 *entries += count;
3196         }
3197 }
3198
3199 static void print_lat_help_header(struct seq_file *m)
3200 {
3201         seq_puts(m, "#                  _------=> CPU#            \n"
3202                     "#                 / _-----=> irqs-off        \n"
3203                     "#                | / _----=> need-resched    \n"
3204                     "#                || / _---=> hardirq/softirq \n"
3205                     "#                ||| / _--=> preempt-depth   \n"
3206                     "#                |||| /     delay            \n"
3207                     "#  cmd     pid   ||||| time  |   caller      \n"
3208                     "#     \\   /      |||||  \\    |   /         \n");
3209 }
3210
3211 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3212 {
3213         unsigned long total;
3214         unsigned long entries;
3215
3216         get_total_entries(buf, &total, &entries);
3217         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3218                    entries, total, num_online_cpus());
3219         seq_puts(m, "#\n");
3220 }
3221
3222 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3223 {
3224         print_event_info(buf, m);
3225         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
3226                     "#              | |       |          |         |\n");
3227 }
3228
3229 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3230 {
3231         print_event_info(buf, m);
3232         seq_puts(m, "#                              _-----=> irqs-off\n"
3233                     "#                             / _----=> need-resched\n"
3234                     "#                            | / _---=> hardirq/softirq\n"
3235                     "#                            || / _--=> preempt-depth\n"
3236                     "#                            ||| /     delay\n"
3237                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
3238                     "#              | |       |   ||||       |         |\n");
3239 }
3240
3241 void
3242 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3243 {
3244         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3245         struct trace_buffer *buf = iter->trace_buffer;
3246         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3247         struct tracer *type = iter->trace;
3248         unsigned long entries;
3249         unsigned long total;
3250         const char *name = "preemption";
3251
3252         name = type->name;
3253
3254         get_total_entries(buf, &total, &entries);
3255
3256         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3257                    name, UTS_RELEASE);
3258         seq_puts(m, "# -----------------------------------"
3259                  "---------------------------------\n");
3260         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3261                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3262                    nsecs_to_usecs(data->saved_latency),
3263                    entries,
3264                    total,
3265                    buf->cpu,
3266 #if defined(CONFIG_PREEMPT_NONE)
3267                    "server",
3268 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3269                    "desktop",
3270 #elif defined(CONFIG_PREEMPT)
3271                    "preempt",
3272 #else
3273                    "unknown",
3274 #endif
3275                    /* These are reserved for later use */
3276                    0, 0, 0, 0);
3277 #ifdef CONFIG_SMP
3278         seq_printf(m, " #P:%d)\n", num_online_cpus());
3279 #else
3280         seq_puts(m, ")\n");
3281 #endif
3282         seq_puts(m, "#    -----------------\n");
3283         seq_printf(m, "#    | task: %.16s-%d "
3284                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3285                    data->comm, data->pid,
3286                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3287                    data->policy, data->rt_priority);
3288         seq_puts(m, "#    -----------------\n");
3289
3290         if (data->critical_start) {
3291                 seq_puts(m, "#  => started at: ");
3292                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3293                 trace_print_seq(m, &iter->seq);
3294                 seq_puts(m, "\n#  => ended at:   ");
3295                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3296                 trace_print_seq(m, &iter->seq);
3297                 seq_puts(m, "\n#\n");
3298         }
3299
3300         seq_puts(m, "#\n");
3301 }
3302
3303 static void test_cpu_buff_start(struct trace_iterator *iter)
3304 {
3305         struct trace_seq *s = &iter->seq;
3306         struct trace_array *tr = iter->tr;
3307
3308         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3309                 return;
3310
3311         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3312                 return;
3313
3314         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3315                 return;
3316
3317         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3318                 return;
3319
3320         if (iter->started)
3321                 cpumask_set_cpu(iter->cpu, iter->started);
3322
3323         /* Don't print started cpu buffer for the first entry of the trace */
3324         if (iter->idx > 1)
3325                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3326                                 iter->cpu);
3327 }
3328
3329 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3330 {
3331         struct trace_array *tr = iter->tr;
3332         struct trace_seq *s = &iter->seq;
3333         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3334         struct trace_entry *entry;
3335         struct trace_event *event;
3336
3337         entry = iter->ent;
3338
3339         test_cpu_buff_start(iter);
3340
3341         event = ftrace_find_event(entry->type);
3342
3343         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3344                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3345                         trace_print_lat_context(iter);
3346                 else
3347                         trace_print_context(iter);
3348         }
3349
3350         if (trace_seq_has_overflowed(s))
3351                 return TRACE_TYPE_PARTIAL_LINE;
3352
3353         if (event)
3354                 return event->funcs->trace(iter, sym_flags, event);
3355
3356         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3357
3358         return trace_handle_return(s);
3359 }
3360
3361 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3362 {
3363         struct trace_array *tr = iter->tr;
3364         struct trace_seq *s = &iter->seq;
3365         struct trace_entry *entry;
3366         struct trace_event *event;
3367
3368         entry = iter->ent;
3369
3370         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3371                 trace_seq_printf(s, "%d %d %llu ",
3372                                  entry->pid, iter->cpu, iter->ts);
3373
3374         if (trace_seq_has_overflowed(s))
3375                 return TRACE_TYPE_PARTIAL_LINE;
3376
3377         event = ftrace_find_event(entry->type);
3378         if (event)
3379                 return event->funcs->raw(iter, 0, event);
3380
3381         trace_seq_printf(s, "%d ?\n", entry->type);
3382
3383         return trace_handle_return(s);
3384 }
3385
3386 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3387 {
3388         struct trace_array *tr = iter->tr;
3389         struct trace_seq *s = &iter->seq;
3390         unsigned char newline = '\n';
3391         struct trace_entry *entry;
3392         struct trace_event *event;
3393
3394         entry = iter->ent;
3395
3396         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3397                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3398                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3399                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3400                 if (trace_seq_has_overflowed(s))
3401                         return TRACE_TYPE_PARTIAL_LINE;
3402         }
3403
3404         event = ftrace_find_event(entry->type);
3405         if (event) {
3406                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3407                 if (ret != TRACE_TYPE_HANDLED)
3408                         return ret;
3409         }
3410
3411         SEQ_PUT_FIELD(s, newline);
3412
3413         return trace_handle_return(s);
3414 }
3415
3416 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3417 {
3418         struct trace_array *tr = iter->tr;
3419         struct trace_seq *s = &iter->seq;
3420         struct trace_entry *entry;
3421         struct trace_event *event;
3422
3423         entry = iter->ent;
3424
3425         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3426                 SEQ_PUT_FIELD(s, entry->pid);
3427                 SEQ_PUT_FIELD(s, iter->cpu);
3428                 SEQ_PUT_FIELD(s, iter->ts);
3429                 if (trace_seq_has_overflowed(s))
3430                         return TRACE_TYPE_PARTIAL_LINE;
3431         }
3432
3433         event = ftrace_find_event(entry->type);
3434         return event ? event->funcs->binary(iter, 0, event) :
3435                 TRACE_TYPE_HANDLED;
3436 }
3437
3438 int trace_empty(struct trace_iterator *iter)
3439 {
3440         struct ring_buffer_iter *buf_iter;
3441         int cpu;
3442
3443         /* If we are looking at one CPU buffer, only check that one */
3444         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3445                 cpu = iter->cpu_file;
3446                 buf_iter = trace_buffer_iter(iter, cpu);
3447                 if (buf_iter) {
3448                         if (!ring_buffer_iter_empty(buf_iter))
3449                                 return 0;
3450                 } else {
3451                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3452                                 return 0;
3453                 }
3454                 return 1;
3455         }
3456
3457         for_each_tracing_cpu(cpu) {
3458                 buf_iter = trace_buffer_iter(iter, cpu);
3459                 if (buf_iter) {
3460                         if (!ring_buffer_iter_empty(buf_iter))
3461                                 return 0;
3462                 } else {
3463                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3464                                 return 0;
3465                 }
3466         }
3467
3468         return 1;
3469 }
3470
3471 /*  Called with trace_event_read_lock() held. */
3472 enum print_line_t print_trace_line(struct trace_iterator *iter)
3473 {
3474         struct trace_array *tr = iter->tr;
3475         unsigned long trace_flags = tr->trace_flags;
3476         enum print_line_t ret;
3477
3478         if (iter->lost_events) {
3479                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3480                                  iter->cpu, iter->lost_events);
3481                 if (trace_seq_has_overflowed(&iter->seq))
3482                         return TRACE_TYPE_PARTIAL_LINE;
3483         }
3484
3485         if (iter->trace && iter->trace->print_line) {
3486                 ret = iter->trace->print_line(iter);
3487                 if (ret != TRACE_TYPE_UNHANDLED)
3488                         return ret;
3489         }
3490
3491         if (iter->ent->type == TRACE_BPUTS &&
3492                         trace_flags & TRACE_ITER_PRINTK &&
3493                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3494                 return trace_print_bputs_msg_only(iter);
3495
3496         if (iter->ent->type == TRACE_BPRINT &&
3497                         trace_flags & TRACE_ITER_PRINTK &&
3498                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3499                 return trace_print_bprintk_msg_only(iter);
3500
3501         if (iter->ent->type == TRACE_PRINT &&
3502                         trace_flags & TRACE_ITER_PRINTK &&
3503                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3504                 return trace_print_printk_msg_only(iter);
3505
3506         if (trace_flags & TRACE_ITER_BIN)
3507                 return print_bin_fmt(iter);
3508
3509         if (trace_flags & TRACE_ITER_HEX)
3510                 return print_hex_fmt(iter);
3511
3512         if (trace_flags & TRACE_ITER_RAW)
3513                 return print_raw_fmt(iter);
3514
3515         return print_trace_fmt(iter);
3516 }
3517
3518 void trace_latency_header(struct seq_file *m)
3519 {
3520         struct trace_iterator *iter = m->private;
3521         struct trace_array *tr = iter->tr;
3522
3523         /* print nothing if the buffers are empty */
3524         if (trace_empty(iter))
3525                 return;
3526
3527         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3528                 print_trace_header(m, iter);
3529
3530         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3531                 print_lat_help_header(m);
3532 }
3533
3534 void trace_default_header(struct seq_file *m)
3535 {
3536         struct trace_iterator *iter = m->private;
3537         struct trace_array *tr = iter->tr;
3538         unsigned long trace_flags = tr->trace_flags;
3539
3540         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3541                 return;
3542
3543         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3544                 /* print nothing if the buffers are empty */
3545                 if (trace_empty(iter))
3546                         return;
3547                 print_trace_header(m, iter);
3548                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3549                         print_lat_help_header(m);
3550         } else {
3551                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3552                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3553                                 print_func_help_header_irq(iter->trace_buffer, m);
3554                         else
3555                                 print_func_help_header(iter->trace_buffer, m);
3556                 }
3557         }
3558 }
3559
3560 static void test_ftrace_alive(struct seq_file *m)
3561 {
3562         if (!ftrace_is_dead())
3563                 return;
3564         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3565                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3566 }
3567
3568 #ifdef CONFIG_TRACER_MAX_TRACE
3569 static void show_snapshot_main_help(struct seq_file *m)
3570 {
3571         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3572                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3573                     "#                      Takes a snapshot of the main buffer.\n"
3574                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3575                     "#                      (Doesn't have to be '2' works with any number that\n"
3576                     "#                       is not a '0' or '1')\n");
3577 }
3578
3579 static void show_snapshot_percpu_help(struct seq_file *m)
3580 {
3581         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3582 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3583         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3584                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3585 #else
3586         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3587                     "#                     Must use main snapshot file to allocate.\n");
3588 #endif
3589         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3590                     "#                      (Doesn't have to be '2' works with any number that\n"
3591                     "#                       is not a '0' or '1')\n");
3592 }
3593
3594 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3595 {
3596         if (iter->tr->allocated_snapshot)
3597                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3598         else
3599                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3600
3601         seq_puts(m, "# Snapshot commands:\n");
3602         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3603                 show_snapshot_main_help(m);
3604         else
3605                 show_snapshot_percpu_help(m);
3606 }
3607 #else
3608 /* Should never be called */
3609 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3610 #endif
3611
3612 static int s_show(struct seq_file *m, void *v)
3613 {
3614         struct trace_iterator *iter = v;
3615         int ret;
3616
3617         if (iter->ent == NULL) {
3618                 if (iter->tr) {
3619                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3620                         seq_puts(m, "#\n");
3621                         test_ftrace_alive(m);
3622                 }
3623                 if (iter->snapshot && trace_empty(iter))
3624                         print_snapshot_help(m, iter);
3625                 else if (iter->trace && iter->trace->print_header)
3626                         iter->trace->print_header(m);
3627                 else
3628                         trace_default_header(m);
3629
3630         } else if (iter->leftover) {
3631                 /*
3632                  * If we filled the seq_file buffer earlier, we
3633                  * want to just show it now.
3634                  */
3635                 ret = trace_print_seq(m, &iter->seq);
3636
3637                 /* ret should this time be zero, but you never know */
3638                 iter->leftover = ret;
3639
3640         } else {
3641                 print_trace_line(iter);
3642                 ret = trace_print_seq(m, &iter->seq);
3643                 /*
3644                  * If we overflow the seq_file buffer, then it will
3645                  * ask us for this data again at start up.
3646                  * Use that instead.
3647                  *  ret is 0 if seq_file write succeeded.
3648                  *        -1 otherwise.
3649                  */
3650                 iter->leftover = ret;
3651         }
3652
3653         return 0;
3654 }
3655
3656 /*
3657  * Should be used after trace_array_get(), trace_types_lock
3658  * ensures that i_cdev was already initialized.
3659  */
3660 static inline int tracing_get_cpu(struct inode *inode)
3661 {
3662         if (inode->i_cdev) /* See trace_create_cpu_file() */
3663                 return (long)inode->i_cdev - 1;
3664         return RING_BUFFER_ALL_CPUS;
3665 }
3666
3667 static const struct seq_operations tracer_seq_ops = {
3668         .start          = s_start,
3669         .next           = s_next,
3670         .stop           = s_stop,
3671         .show           = s_show,
3672 };
3673
3674 static struct trace_iterator *
3675 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3676 {
3677         struct trace_array *tr = inode->i_private;
3678         struct trace_iterator *iter;
3679         int cpu;
3680
3681         if (tracing_disabled)
3682                 return ERR_PTR(-ENODEV);
3683
3684         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3685         if (!iter)
3686                 return ERR_PTR(-ENOMEM);
3687
3688         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3689                                     GFP_KERNEL);
3690         if (!iter->buffer_iter)
3691                 goto release;
3692
3693         /*
3694          * We make a copy of the current tracer to avoid concurrent
3695          * changes on it while we are reading.
3696          */
3697         mutex_lock(&trace_types_lock);
3698         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3699         if (!iter->trace)
3700                 goto fail;
3701
3702         *iter->trace = *tr->current_trace;
3703
3704         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3705                 goto fail;
3706
3707         iter->tr = tr;
3708
3709 #ifdef CONFIG_TRACER_MAX_TRACE
3710         /* Currently only the top directory has a snapshot */
3711         if (tr->current_trace->print_max || snapshot)
3712                 iter->trace_buffer = &tr->max_buffer;
3713         else
3714 #endif
3715                 iter->trace_buffer = &tr->trace_buffer;
3716         iter->snapshot = snapshot;
3717         iter->pos = -1;
3718         iter->cpu_file = tracing_get_cpu(inode);
3719         mutex_init(&iter->mutex);
3720
3721         /* Notify the tracer early; before we stop tracing. */
3722         if (iter->trace && iter->trace->open)
3723                 iter->trace->open(iter);
3724
3725         /* Annotate start of buffers if we had overruns */
3726         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3727                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3728
3729         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3730         if (trace_clocks[tr->clock_id].in_ns)
3731                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3732
3733         /* stop the trace while dumping if we are not opening "snapshot" */
3734         if (!iter->snapshot)
3735                 tracing_stop_tr(tr);
3736
3737         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3738                 for_each_tracing_cpu(cpu) {
3739                         iter->buffer_iter[cpu] =
3740                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3741                 }
3742                 ring_buffer_read_prepare_sync();
3743                 for_each_tracing_cpu(cpu) {
3744                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3745                         tracing_iter_reset(iter, cpu);
3746                 }
3747         } else {
3748                 cpu = iter->cpu_file;
3749                 iter->buffer_iter[cpu] =
3750                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3751                 ring_buffer_read_prepare_sync();
3752                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3753                 tracing_iter_reset(iter, cpu);
3754         }
3755
3756         mutex_unlock(&trace_types_lock);
3757
3758         return iter;
3759
3760  fail:
3761         mutex_unlock(&trace_types_lock);
3762         kfree(iter->trace);
3763         kfree(iter->buffer_iter);
3764 release:
3765         seq_release_private(inode, file);
3766         return ERR_PTR(-ENOMEM);
3767 }
3768
3769 int tracing_open_generic(struct inode *inode, struct file *filp)
3770 {
3771         if (tracing_disabled)
3772                 return -ENODEV;
3773
3774         filp->private_data = inode->i_private;
3775         return 0;
3776 }
3777
3778 bool tracing_is_disabled(void)
3779 {
3780         return (tracing_disabled) ? true: false;
3781 }
3782
3783 /*
3784  * Open and update trace_array ref count.
3785  * Must have the current trace_array passed to it.
3786  */
3787 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3788 {
3789         struct trace_array *tr = inode->i_private;
3790
3791         if (tracing_disabled)
3792                 return -ENODEV;
3793
3794         if (trace_array_get(tr) < 0)
3795                 return -ENODEV;
3796
3797         filp->private_data = inode->i_private;
3798
3799         return 0;
3800 }
3801
3802 static int tracing_release(struct inode *inode, struct file *file)
3803 {
3804         struct trace_array *tr = inode->i_private;
3805         struct seq_file *m = file->private_data;
3806         struct trace_iterator *iter;
3807         int cpu;
3808
3809         if (!(file->f_mode & FMODE_READ)) {
3810                 trace_array_put(tr);
3811                 return 0;
3812         }
3813
3814         /* Writes do not use seq_file */
3815         iter = m->private;
3816         mutex_lock(&trace_types_lock);
3817
3818         for_each_tracing_cpu(cpu) {
3819                 if (iter->buffer_iter[cpu])
3820                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3821         }
3822
3823         if (iter->trace && iter->trace->close)
3824                 iter->trace->close(iter);
3825
3826         if (!iter->snapshot)
3827                 /* reenable tracing if it was previously enabled */
3828                 tracing_start_tr(tr);
3829
3830         __trace_array_put(tr);
3831
3832         mutex_unlock(&trace_types_lock);
3833
3834         mutex_destroy(&iter->mutex);
3835         free_cpumask_var(iter->started);
3836         kfree(iter->trace);
3837         kfree(iter->buffer_iter);
3838         seq_release_private(inode, file);
3839
3840         return 0;
3841 }
3842
3843 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3844 {
3845         struct trace_array *tr = inode->i_private;
3846
3847         trace_array_put(tr);
3848         return 0;
3849 }
3850
3851 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3852 {
3853         struct trace_array *tr = inode->i_private;
3854
3855         trace_array_put(tr);
3856
3857         return single_release(inode, file);
3858 }
3859
3860 static int tracing_open(struct inode *inode, struct file *file)
3861 {
3862         struct trace_array *tr = inode->i_private;
3863         struct trace_iterator *iter;
3864         int ret = 0;
3865
3866         if (trace_array_get(tr) < 0)
3867                 return -ENODEV;
3868
3869         /* If this file was open for write, then erase contents */
3870         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3871                 int cpu = tracing_get_cpu(inode);
3872
3873                 if (cpu == RING_BUFFER_ALL_CPUS)
3874                         tracing_reset_online_cpus(&tr->trace_buffer);
3875                 else
3876                         tracing_reset(&tr->trace_buffer, cpu);
3877         }
3878
3879         if (file->f_mode & FMODE_READ) {
3880                 iter = __tracing_open(inode, file, false);
3881                 if (IS_ERR(iter))
3882                         ret = PTR_ERR(iter);
3883                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3884                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3885         }
3886
3887         if (ret < 0)
3888                 trace_array_put(tr);
3889
3890         return ret;
3891 }
3892
3893 /*
3894  * Some tracers are not suitable for instance buffers.
3895  * A tracer is always available for the global array (toplevel)
3896  * or if it explicitly states that it is.
3897  */
3898 static bool
3899 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3900 {
3901         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3902 }
3903
3904 /* Find the next tracer that this trace array may use */
3905 static struct tracer *
3906 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3907 {
3908         while (t && !trace_ok_for_array(t, tr))
3909                 t = t->next;
3910
3911         return t;
3912 }
3913
3914 static void *
3915 t_next(struct seq_file *m, void *v, loff_t *pos)
3916 {
3917         struct trace_array *tr = m->private;
3918         struct tracer *t = v;
3919
3920         (*pos)++;
3921
3922         if (t)
3923                 t = get_tracer_for_array(tr, t->next);
3924
3925         return t;
3926 }
3927
3928 static void *t_start(struct seq_file *m, loff_t *pos)
3929 {
3930         struct trace_array *tr = m->private;
3931         struct tracer *t;
3932         loff_t l = 0;
3933
3934         mutex_lock(&trace_types_lock);
3935
3936         t = get_tracer_for_array(tr, trace_types);
3937         for (; t && l < *pos; t = t_next(m, t, &l))
3938                         ;
3939
3940         return t;
3941 }
3942
3943 static void t_stop(struct seq_file *m, void *p)
3944 {
3945         mutex_unlock(&trace_types_lock);
3946 }
3947
3948 static int t_show(struct seq_file *m, void *v)
3949 {
3950         struct tracer *t = v;
3951
3952         if (!t)
3953                 return 0;
3954
3955         seq_puts(m, t->name);
3956         if (t->next)
3957                 seq_putc(m, ' ');
3958         else
3959                 seq_putc(m, '\n');
3960
3961         return 0;
3962 }
3963
3964 static const struct seq_operations show_traces_seq_ops = {
3965         .start          = t_start,
3966         .next           = t_next,
3967         .stop           = t_stop,
3968         .show           = t_show,
3969 };
3970
3971 static int show_traces_open(struct inode *inode, struct file *file)
3972 {
3973         struct trace_array *tr = inode->i_private;
3974         struct seq_file *m;
3975         int ret;
3976
3977         if (tracing_disabled)
3978                 return -ENODEV;
3979
3980         ret = seq_open(file, &show_traces_seq_ops);
3981         if (ret)
3982                 return ret;
3983
3984         m = file->private_data;
3985         m->private = tr;
3986
3987         return 0;
3988 }
3989
3990 static ssize_t
3991 tracing_write_stub(struct file *filp, const char __user *ubuf,
3992                    size_t count, loff_t *ppos)
3993 {
3994         return count;
3995 }
3996
3997 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3998 {
3999         int ret;
4000
4001         if (file->f_mode & FMODE_READ)
4002                 ret = seq_lseek(file, offset, whence);
4003         else
4004                 file->f_pos = ret = 0;
4005
4006         return ret;
4007 }
4008
4009 static const struct file_operations tracing_fops = {
4010         .open           = tracing_open,
4011         .read           = seq_read,
4012         .write          = tracing_write_stub,
4013         .llseek         = tracing_lseek,
4014         .release        = tracing_release,
4015 };
4016
4017 static const struct file_operations show_traces_fops = {
4018         .open           = show_traces_open,
4019         .read           = seq_read,
4020         .release        = seq_release,
4021         .llseek         = seq_lseek,
4022 };
4023
4024 /*
4025  * The tracer itself will not take this lock, but still we want
4026  * to provide a consistent cpumask to user-space:
4027  */
4028 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4029
4030 /*
4031  * Temporary storage for the character representation of the
4032  * CPU bitmask (and one more byte for the newline):
4033  */
4034 static char mask_str[NR_CPUS + 1];
4035
4036 static ssize_t
4037 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4038                      size_t count, loff_t *ppos)
4039 {
4040         struct trace_array *tr = file_inode(filp)->i_private;
4041         int len;
4042
4043         mutex_lock(&tracing_cpumask_update_lock);
4044
4045         len = snprintf(mask_str, count, "%*pb\n",
4046                        cpumask_pr_args(tr->tracing_cpumask));
4047         if (len >= count) {
4048                 count = -EINVAL;
4049                 goto out_err;
4050         }
4051         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4052
4053 out_err:
4054         mutex_unlock(&tracing_cpumask_update_lock);
4055
4056         return count;
4057 }
4058
4059 static ssize_t
4060 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4061                       size_t count, loff_t *ppos)
4062 {
4063         struct trace_array *tr = file_inode(filp)->i_private;
4064         cpumask_var_t tracing_cpumask_new;
4065         int err, cpu;
4066
4067         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4068                 return -ENOMEM;
4069
4070         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4071         if (err)
4072                 goto err_unlock;
4073
4074         mutex_lock(&tracing_cpumask_update_lock);
4075
4076         local_irq_disable();
4077         arch_spin_lock(&tr->max_lock);
4078         for_each_tracing_cpu(cpu) {
4079                 /*
4080                  * Increase/decrease the disabled counter if we are
4081                  * about to flip a bit in the cpumask:
4082                  */
4083                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4084                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4085                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4086                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4087                 }
4088                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4089                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4090                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4091                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4092                 }
4093         }
4094         arch_spin_unlock(&tr->max_lock);
4095         local_irq_enable();
4096
4097         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4098
4099         mutex_unlock(&tracing_cpumask_update_lock);
4100         free_cpumask_var(tracing_cpumask_new);
4101
4102         return count;
4103
4104 err_unlock:
4105         free_cpumask_var(tracing_cpumask_new);
4106
4107         return err;
4108 }
4109
4110 static const struct file_operations tracing_cpumask_fops = {
4111         .open           = tracing_open_generic_tr,
4112         .read           = tracing_cpumask_read,
4113         .write          = tracing_cpumask_write,
4114         .release        = tracing_release_generic_tr,
4115         .llseek         = generic_file_llseek,
4116 };
4117
4118 static int tracing_trace_options_show(struct seq_file *m, void *v)
4119 {
4120         struct tracer_opt *trace_opts;
4121         struct trace_array *tr = m->private;
4122         u32 tracer_flags;
4123         int i;
4124
4125         mutex_lock(&trace_types_lock);
4126         tracer_flags = tr->current_trace->flags->val;
4127         trace_opts = tr->current_trace->flags->opts;
4128
4129         for (i = 0; trace_options[i]; i++) {
4130                 if (tr->trace_flags & (1 << i))
4131                         seq_printf(m, "%s\n", trace_options[i]);
4132                 else
4133                         seq_printf(m, "no%s\n", trace_options[i]);
4134         }
4135
4136         for (i = 0; trace_opts[i].name; i++) {
4137                 if (tracer_flags & trace_opts[i].bit)
4138                         seq_printf(m, "%s\n", trace_opts[i].name);
4139                 else
4140                         seq_printf(m, "no%s\n", trace_opts[i].name);
4141         }
4142         mutex_unlock(&trace_types_lock);
4143
4144         return 0;
4145 }
4146
4147 static int __set_tracer_option(struct trace_array *tr,
4148                                struct tracer_flags *tracer_flags,
4149                                struct tracer_opt *opts, int neg)
4150 {
4151         struct tracer *trace = tracer_flags->trace;
4152         int ret;
4153
4154         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4155         if (ret)
4156                 return ret;
4157
4158         if (neg)
4159                 tracer_flags->val &= ~opts->bit;
4160         else
4161                 tracer_flags->val |= opts->bit;
4162         return 0;
4163 }
4164
4165 /* Try to assign a tracer specific option */
4166 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4167 {
4168         struct tracer *trace = tr->current_trace;
4169         struct tracer_flags *tracer_flags = trace->flags;
4170         struct tracer_opt *opts = NULL;
4171         int i;
4172
4173         for (i = 0; tracer_flags->opts[i].name; i++) {
4174                 opts = &tracer_flags->opts[i];
4175
4176                 if (strcmp(cmp, opts->name) == 0)
4177                         return __set_tracer_option(tr, trace->flags, opts, neg);
4178         }
4179
4180         return -EINVAL;
4181 }
4182
4183 /* Some tracers require overwrite to stay enabled */
4184 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4185 {
4186         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4187                 return -1;
4188
4189         return 0;
4190 }
4191
4192 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4193 {
4194         /* do nothing if flag is already set */
4195         if (!!(tr->trace_flags & mask) == !!enabled)
4196                 return 0;
4197
4198         /* Give the tracer a chance to approve the change */
4199         if (tr->current_trace->flag_changed)
4200                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4201                         return -EINVAL;
4202
4203         if (enabled)
4204                 tr->trace_flags |= mask;
4205         else
4206                 tr->trace_flags &= ~mask;
4207
4208         if (mask == TRACE_ITER_RECORD_CMD)
4209                 trace_event_enable_cmd_record(enabled);
4210
4211         if (mask == TRACE_ITER_EVENT_FORK)
4212                 trace_event_follow_fork(tr, enabled);
4213
4214         if (mask == TRACE_ITER_FUNC_FORK)
4215                 ftrace_pid_follow_fork(tr, enabled);
4216
4217         if (mask == TRACE_ITER_OVERWRITE) {
4218                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4219 #ifdef CONFIG_TRACER_MAX_TRACE
4220                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4221 #endif
4222         }
4223
4224         if (mask == TRACE_ITER_PRINTK) {
4225                 trace_printk_start_stop_comm(enabled);
4226                 trace_printk_control(enabled);
4227         }
4228
4229         return 0;
4230 }
4231
4232 static int trace_set_options(struct trace_array *tr, char *option)
4233 {
4234         char *cmp;
4235         int neg = 0;
4236         int ret = -ENODEV;
4237         int i;
4238         size_t orig_len = strlen(option);
4239
4240         cmp = strstrip(option);
4241
4242         if (strncmp(cmp, "no", 2) == 0) {
4243                 neg = 1;
4244                 cmp += 2;
4245         }
4246
4247         mutex_lock(&trace_types_lock);
4248
4249         for (i = 0; trace_options[i]; i++) {
4250                 if (strcmp(cmp, trace_options[i]) == 0) {
4251                         ret = set_tracer_flag(tr, 1 << i, !neg);
4252                         break;
4253                 }
4254         }
4255
4256         /* If no option could be set, test the specific tracer options */
4257         if (!trace_options[i])
4258                 ret = set_tracer_option(tr, cmp, neg);
4259
4260         mutex_unlock(&trace_types_lock);
4261
4262         /*
4263          * If the first trailing whitespace is replaced with '\0' by strstrip,
4264          * turn it back into a space.
4265          */
4266         if (orig_len > strlen(option))
4267                 option[strlen(option)] = ' ';
4268
4269         return ret;
4270 }
4271
4272 static void __init apply_trace_boot_options(void)
4273 {
4274         char *buf = trace_boot_options_buf;
4275         char *option;
4276
4277         while (true) {
4278                 option = strsep(&buf, ",");
4279
4280                 if (!option)
4281                         break;
4282
4283                 if (*option)
4284                         trace_set_options(&global_trace, option);
4285
4286                 /* Put back the comma to allow this to be called again */
4287                 if (buf)
4288                         *(buf - 1) = ',';
4289         }
4290 }
4291
4292 static ssize_t
4293 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4294                         size_t cnt, loff_t *ppos)
4295 {
4296         struct seq_file *m = filp->private_data;
4297         struct trace_array *tr = m->private;
4298         char buf[64];
4299         int ret;
4300
4301         if (cnt >= sizeof(buf))
4302                 return -EINVAL;
4303
4304         if (copy_from_user(buf, ubuf, cnt))
4305                 return -EFAULT;
4306
4307         buf[cnt] = 0;
4308
4309         ret = trace_set_options(tr, buf);
4310         if (ret < 0)
4311                 return ret;
4312
4313         *ppos += cnt;
4314
4315         return cnt;
4316 }
4317
4318 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4319 {
4320         struct trace_array *tr = inode->i_private;
4321         int ret;
4322
4323         if (tracing_disabled)
4324                 return -ENODEV;
4325
4326         if (trace_array_get(tr) < 0)
4327                 return -ENODEV;
4328
4329         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4330         if (ret < 0)
4331                 trace_array_put(tr);
4332
4333         return ret;
4334 }
4335
4336 static const struct file_operations tracing_iter_fops = {
4337         .open           = tracing_trace_options_open,
4338         .read           = seq_read,
4339         .llseek         = seq_lseek,
4340         .release        = tracing_single_release_tr,
4341         .write          = tracing_trace_options_write,
4342 };
4343
4344 static const char readme_msg[] =
4345         "tracing mini-HOWTO:\n\n"
4346         "# echo 0 > tracing_on : quick way to disable tracing\n"
4347         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4348         " Important files:\n"
4349         "  trace\t\t\t- The static contents of the buffer\n"
4350         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4351         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4352         "  current_tracer\t- function and latency tracers\n"
4353         "  available_tracers\t- list of configured tracers for current_tracer\n"
4354         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4355         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4356         "  trace_clock\t\t-change the clock used to order events\n"
4357         "       local:   Per cpu clock but may not be synced across CPUs\n"
4358         "      global:   Synced across CPUs but slows tracing down.\n"
4359         "     counter:   Not a clock, but just an increment\n"
4360         "      uptime:   Jiffy counter from time of boot\n"
4361         "        perf:   Same clock that perf events use\n"
4362 #ifdef CONFIG_X86_64
4363         "     x86-tsc:   TSC cycle counter\n"
4364 #endif
4365         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4366         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4367         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4368         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4369         "\t\t\t  Remove sub-buffer with rmdir\n"
4370         "  trace_options\t\t- Set format or modify how tracing happens\n"
4371         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4372         "\t\t\t  option name\n"
4373         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4374 #ifdef CONFIG_DYNAMIC_FTRACE
4375         "\n  available_filter_functions - list of functions that can be filtered on\n"
4376         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4377         "\t\t\t  functions\n"
4378         "\t     accepts: func_full_name or glob-matching-pattern\n"
4379         "\t     modules: Can select a group via module\n"
4380         "\t      Format: :mod:<module-name>\n"
4381         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4382         "\t    triggers: a command to perform when function is hit\n"
4383         "\t      Format: <function>:<trigger>[:count]\n"
4384         "\t     trigger: traceon, traceoff\n"
4385         "\t\t      enable_event:<system>:<event>\n"
4386         "\t\t      disable_event:<system>:<event>\n"
4387 #ifdef CONFIG_STACKTRACE
4388         "\t\t      stacktrace\n"
4389 #endif
4390 #ifdef CONFIG_TRACER_SNAPSHOT
4391         "\t\t      snapshot\n"
4392 #endif
4393         "\t\t      dump\n"
4394         "\t\t      cpudump\n"
4395         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4396         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4397         "\t     The first one will disable tracing every time do_fault is hit\n"
4398         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4399         "\t       The first time do trap is hit and it disables tracing, the\n"
4400         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4401         "\t       the counter will not decrement. It only decrements when the\n"
4402         "\t       trigger did work\n"
4403         "\t     To remove trigger without count:\n"
4404         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4405         "\t     To remove trigger with a count:\n"
4406         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4407         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4408         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4409         "\t    modules: Can select a group via module command :mod:\n"
4410         "\t    Does not accept triggers\n"
4411 #endif /* CONFIG_DYNAMIC_FTRACE */
4412 #ifdef CONFIG_FUNCTION_TRACER
4413         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4414         "\t\t    (function)\n"
4415 #endif
4416 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4417         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4418         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4419         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4420 #endif
4421 #ifdef CONFIG_TRACER_SNAPSHOT
4422         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4423         "\t\t\t  snapshot buffer. Read the contents for more\n"
4424         "\t\t\t  information\n"
4425 #endif
4426 #ifdef CONFIG_STACK_TRACER
4427         "  stack_trace\t\t- Shows the max stack trace when active\n"
4428         "  stack_max_size\t- Shows current max stack size that was traced\n"
4429         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4430         "\t\t\t  new trace)\n"
4431 #ifdef CONFIG_DYNAMIC_FTRACE
4432         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4433         "\t\t\t  traces\n"
4434 #endif
4435 #endif /* CONFIG_STACK_TRACER */
4436 #ifdef CONFIG_KPROBE_EVENTS
4437         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4438         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4439 #endif
4440 #ifdef CONFIG_UPROBE_EVENTS
4441         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4442         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4443 #endif
4444 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4445         "\t  accepts: event-definitions (one definition per line)\n"
4446         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4447         "\t           -:[<group>/]<event>\n"
4448 #ifdef CONFIG_KPROBE_EVENTS
4449         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4450 #endif
4451 #ifdef CONFIG_UPROBE_EVENTS
4452         "\t    place: <path>:<offset>\n"
4453 #endif
4454         "\t     args: <name>=fetcharg[:type]\n"
4455         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4456         "\t           $stack<index>, $stack, $retval, $comm\n"
4457         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4458         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4459 #endif
4460         "  events/\t\t- Directory containing all trace event subsystems:\n"
4461         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4462         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4463         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4464         "\t\t\t  events\n"
4465         "      filter\t\t- If set, only events passing filter are traced\n"
4466         "  events/<system>/<event>/\t- Directory containing control files for\n"
4467         "\t\t\t  <event>:\n"
4468         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4469         "      filter\t\t- If set, only events passing filter are traced\n"
4470         "      trigger\t\t- If set, a command to perform when event is hit\n"
4471         "\t    Format: <trigger>[:count][if <filter>]\n"
4472         "\t   trigger: traceon, traceoff\n"
4473         "\t            enable_event:<system>:<event>\n"
4474         "\t            disable_event:<system>:<event>\n"
4475 #ifdef CONFIG_HIST_TRIGGERS
4476         "\t            enable_hist:<system>:<event>\n"
4477         "\t            disable_hist:<system>:<event>\n"
4478 #endif
4479 #ifdef CONFIG_STACKTRACE
4480         "\t\t    stacktrace\n"
4481 #endif
4482 #ifdef CONFIG_TRACER_SNAPSHOT
4483         "\t\t    snapshot\n"
4484 #endif
4485 #ifdef CONFIG_HIST_TRIGGERS
4486         "\t\t    hist (see below)\n"
4487 #endif
4488         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4489         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4490         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4491         "\t                  events/block/block_unplug/trigger\n"
4492         "\t   The first disables tracing every time block_unplug is hit.\n"
4493         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4494         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4495         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4496         "\t   Like function triggers, the counter is only decremented if it\n"
4497         "\t    enabled or disabled tracing.\n"
4498         "\t   To remove a trigger without a count:\n"
4499         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4500         "\t   To remove a trigger with a count:\n"
4501         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4502         "\t   Filters can be ignored when removing a trigger.\n"
4503 #ifdef CONFIG_HIST_TRIGGERS
4504         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4505         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4506         "\t            [:values=<field1[,field2,...]>]\n"
4507         "\t            [:sort=<field1[,field2,...]>]\n"
4508         "\t            [:size=#entries]\n"
4509         "\t            [:pause][:continue][:clear]\n"
4510         "\t            [:name=histname1]\n"
4511         "\t            [if <filter>]\n\n"
4512         "\t    When a matching event is hit, an entry is added to a hash\n"
4513         "\t    table using the key(s) and value(s) named, and the value of a\n"
4514         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4515         "\t    correspond to fields in the event's format description.  Keys\n"
4516         "\t    can be any field, or the special string 'stacktrace'.\n"
4517         "\t    Compound keys consisting of up to two fields can be specified\n"
4518         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4519         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4520         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4521         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4522         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4523         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4524         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4525         "\t    its histogram data will be shared with other triggers of the\n"
4526         "\t    same name, and trigger hits will update this common data.\n\n"
4527         "\t    Reading the 'hist' file for the event will dump the hash\n"
4528         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4529         "\t    triggers attached to an event, there will be a table for each\n"
4530         "\t    trigger in the output.  The table displayed for a named\n"
4531         "\t    trigger will be the same as any other instance having the\n"
4532         "\t    same name.  The default format used to display a given field\n"
4533         "\t    can be modified by appending any of the following modifiers\n"
4534         "\t    to the field name, as applicable:\n\n"
4535         "\t            .hex        display a number as a hex value\n"
4536         "\t            .sym        display an address as a symbol\n"
4537         "\t            .sym-offset display an address as a symbol and offset\n"
4538         "\t            .execname   display a common_pid as a program name\n"
4539         "\t            .syscall    display a syscall id as a syscall name\n\n"
4540         "\t            .log2       display log2 value rather than raw number\n\n"
4541         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4542         "\t    trigger or to start a hist trigger but not log any events\n"
4543         "\t    until told to do so.  'continue' can be used to start or\n"
4544         "\t    restart a paused hist trigger.\n\n"
4545         "\t    The 'clear' parameter will clear the contents of a running\n"
4546         "\t    hist trigger and leave its current paused/active state\n"
4547         "\t    unchanged.\n\n"
4548         "\t    The enable_hist and disable_hist triggers can be used to\n"
4549         "\t    have one event conditionally start and stop another event's\n"
4550         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4551         "\t    the enable_event and disable_event triggers.\n"
4552 #endif
4553 ;
4554
4555 static ssize_t
4556 tracing_readme_read(struct file *filp, char __user *ubuf,
4557                        size_t cnt, loff_t *ppos)
4558 {
4559         return simple_read_from_buffer(ubuf, cnt, ppos,
4560                                         readme_msg, strlen(readme_msg));
4561 }
4562
4563 static const struct file_operations tracing_readme_fops = {
4564         .open           = tracing_open_generic,
4565         .read           = tracing_readme_read,
4566         .llseek         = generic_file_llseek,
4567 };
4568
4569 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4570 {
4571         unsigned int *ptr = v;
4572
4573         if (*pos || m->count)
4574                 ptr++;
4575
4576         (*pos)++;
4577
4578         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4579              ptr++) {
4580                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4581                         continue;
4582
4583                 return ptr;
4584         }
4585
4586         return NULL;
4587 }
4588
4589 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4590 {
4591         void *v;
4592         loff_t l = 0;
4593
4594         preempt_disable();
4595         arch_spin_lock(&trace_cmdline_lock);
4596
4597         v = &savedcmd->map_cmdline_to_pid[0];
4598         while (l <= *pos) {
4599                 v = saved_cmdlines_next(m, v, &l);
4600                 if (!v)
4601                         return NULL;
4602         }
4603
4604         return v;
4605 }
4606
4607 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4608 {
4609         arch_spin_unlock(&trace_cmdline_lock);
4610         preempt_enable();
4611 }
4612
4613 static int saved_cmdlines_show(struct seq_file *m, void *v)
4614 {
4615         char buf[TASK_COMM_LEN];
4616         unsigned int *pid = v;
4617
4618         __trace_find_cmdline(*pid, buf);
4619         seq_printf(m, "%d %s\n", *pid, buf);
4620         return 0;
4621 }
4622
4623 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4624         .start          = saved_cmdlines_start,
4625         .next           = saved_cmdlines_next,
4626         .stop           = saved_cmdlines_stop,
4627         .show           = saved_cmdlines_show,
4628 };
4629
4630 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4631 {
4632         if (tracing_disabled)
4633                 return -ENODEV;
4634
4635         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4636 }
4637
4638 static const struct file_operations tracing_saved_cmdlines_fops = {
4639         .open           = tracing_saved_cmdlines_open,
4640         .read           = seq_read,
4641         .llseek         = seq_lseek,
4642         .release        = seq_release,
4643 };
4644
4645 static ssize_t
4646 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4647                                  size_t cnt, loff_t *ppos)
4648 {
4649         char buf[64];
4650         int r;
4651
4652         arch_spin_lock(&trace_cmdline_lock);
4653         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4654         arch_spin_unlock(&trace_cmdline_lock);
4655
4656         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4657 }
4658
4659 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4660 {
4661         kfree(s->saved_cmdlines);
4662         kfree(s->map_cmdline_to_pid);
4663         kfree(s);
4664 }
4665
4666 static int tracing_resize_saved_cmdlines(unsigned int val)
4667 {
4668         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4669
4670         s = kmalloc(sizeof(*s), GFP_KERNEL);
4671         if (!s)
4672                 return -ENOMEM;
4673
4674         if (allocate_cmdlines_buffer(val, s) < 0) {
4675                 kfree(s);
4676                 return -ENOMEM;
4677         }
4678
4679         arch_spin_lock(&trace_cmdline_lock);
4680         savedcmd_temp = savedcmd;
4681         savedcmd = s;
4682         arch_spin_unlock(&trace_cmdline_lock);
4683         free_saved_cmdlines_buffer(savedcmd_temp);
4684
4685         return 0;
4686 }
4687
4688 static ssize_t
4689 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4690                                   size_t cnt, loff_t *ppos)
4691 {
4692         unsigned long val;
4693         int ret;
4694
4695         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4696         if (ret)
4697                 return ret;
4698
4699         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4700         if (!val || val > PID_MAX_DEFAULT)
4701                 return -EINVAL;
4702
4703         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4704         if (ret < 0)
4705                 return ret;
4706
4707         *ppos += cnt;
4708
4709         return cnt;
4710 }
4711
4712 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4713         .open           = tracing_open_generic,
4714         .read           = tracing_saved_cmdlines_size_read,
4715         .write          = tracing_saved_cmdlines_size_write,
4716 };
4717
4718 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4719 static union trace_enum_map_item *
4720 update_enum_map(union trace_enum_map_item *ptr)
4721 {
4722         if (!ptr->map.enum_string) {
4723                 if (ptr->tail.next) {
4724                         ptr = ptr->tail.next;
4725                         /* Set ptr to the next real item (skip head) */
4726                         ptr++;
4727                 } else
4728                         return NULL;
4729         }
4730         return ptr;
4731 }
4732
4733 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4734 {
4735         union trace_enum_map_item *ptr = v;
4736
4737         /*
4738          * Paranoid! If ptr points to end, we don't want to increment past it.
4739          * This really should never happen.
4740          */
4741         ptr = update_enum_map(ptr);
4742         if (WARN_ON_ONCE(!ptr))
4743                 return NULL;
4744
4745         ptr++;
4746
4747         (*pos)++;
4748
4749         ptr = update_enum_map(ptr);
4750
4751         return ptr;
4752 }
4753
4754 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4755 {
4756         union trace_enum_map_item *v;
4757         loff_t l = 0;
4758
4759         mutex_lock(&trace_enum_mutex);
4760
4761         v = trace_enum_maps;
4762         if (v)
4763                 v++;
4764
4765         while (v && l < *pos) {
4766                 v = enum_map_next(m, v, &l);
4767         }
4768
4769         return v;
4770 }
4771
4772 static void enum_map_stop(struct seq_file *m, void *v)
4773 {
4774         mutex_unlock(&trace_enum_mutex);
4775 }
4776
4777 static int enum_map_show(struct seq_file *m, void *v)
4778 {
4779         union trace_enum_map_item *ptr = v;
4780
4781         seq_printf(m, "%s %ld (%s)\n",
4782                    ptr->map.enum_string, ptr->map.enum_value,
4783                    ptr->map.system);
4784
4785         return 0;
4786 }
4787
4788 static const struct seq_operations tracing_enum_map_seq_ops = {
4789         .start          = enum_map_start,
4790         .next           = enum_map_next,
4791         .stop           = enum_map_stop,
4792         .show           = enum_map_show,
4793 };
4794
4795 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4796 {
4797         if (tracing_disabled)
4798                 return -ENODEV;
4799
4800         return seq_open(filp, &tracing_enum_map_seq_ops);
4801 }
4802
4803 static const struct file_operations tracing_enum_map_fops = {
4804         .open           = tracing_enum_map_open,
4805         .read           = seq_read,
4806         .llseek         = seq_lseek,
4807         .release        = seq_release,
4808 };
4809
4810 static inline union trace_enum_map_item *
4811 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4812 {
4813         /* Return tail of array given the head */
4814         return ptr + ptr->head.length + 1;
4815 }
4816
4817 static void
4818 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4819                            int len)
4820 {
4821         struct trace_enum_map **stop;
4822         struct trace_enum_map **map;
4823         union trace_enum_map_item *map_array;
4824         union trace_enum_map_item *ptr;
4825
4826         stop = start + len;
4827
4828         /*
4829          * The trace_enum_maps contains the map plus a head and tail item,
4830          * where the head holds the module and length of array, and the
4831          * tail holds a pointer to the next list.
4832          */
4833         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4834         if (!map_array) {
4835                 pr_warn("Unable to allocate trace enum mapping\n");
4836                 return;
4837         }
4838
4839         mutex_lock(&trace_enum_mutex);
4840
4841         if (!trace_enum_maps)
4842                 trace_enum_maps = map_array;
4843         else {
4844                 ptr = trace_enum_maps;
4845                 for (;;) {
4846                         ptr = trace_enum_jmp_to_tail(ptr);
4847                         if (!ptr->tail.next)
4848                                 break;
4849                         ptr = ptr->tail.next;
4850
4851                 }
4852                 ptr->tail.next = map_array;
4853         }
4854         map_array->head.mod = mod;
4855         map_array->head.length = len;
4856         map_array++;
4857
4858         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4859                 map_array->map = **map;
4860                 map_array++;
4861         }
4862         memset(map_array, 0, sizeof(*map_array));
4863
4864         mutex_unlock(&trace_enum_mutex);
4865 }
4866
4867 static void trace_create_enum_file(struct dentry *d_tracer)
4868 {
4869         trace_create_file("enum_map", 0444, d_tracer,
4870                           NULL, &tracing_enum_map_fops);
4871 }
4872
4873 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4874 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4875 static inline void trace_insert_enum_map_file(struct module *mod,
4876                               struct trace_enum_map **start, int len) { }
4877 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4878
4879 static void trace_insert_enum_map(struct module *mod,
4880                                   struct trace_enum_map **start, int len)
4881 {
4882         struct trace_enum_map **map;
4883
4884         if (len <= 0)
4885                 return;
4886
4887         map = start;
4888
4889         trace_event_enum_update(map, len);
4890
4891         trace_insert_enum_map_file(mod, start, len);
4892 }
4893
4894 static ssize_t
4895 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4896                        size_t cnt, loff_t *ppos)
4897 {
4898         struct trace_array *tr = filp->private_data;
4899         char buf[MAX_TRACER_SIZE+2];
4900         int r;
4901
4902         mutex_lock(&trace_types_lock);
4903         r = sprintf(buf, "%s\n", tr->current_trace->name);
4904         mutex_unlock(&trace_types_lock);
4905
4906         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4907 }
4908
4909 int tracer_init(struct tracer *t, struct trace_array *tr)
4910 {
4911         tracing_reset_online_cpus(&tr->trace_buffer);
4912         return t->init(tr);
4913 }
4914
4915 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4916 {
4917         int cpu;
4918
4919         for_each_tracing_cpu(cpu)
4920                 per_cpu_ptr(buf->data, cpu)->entries = val;
4921 }
4922
4923 #ifdef CONFIG_TRACER_MAX_TRACE
4924 /* resize @tr's buffer to the size of @size_tr's entries */
4925 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4926                                         struct trace_buffer *size_buf, int cpu_id)
4927 {
4928         int cpu, ret = 0;
4929
4930         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4931                 for_each_tracing_cpu(cpu) {
4932                         ret = ring_buffer_resize(trace_buf->buffer,
4933                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4934                         if (ret < 0)
4935                                 break;
4936                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4937                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4938                 }
4939         } else {
4940                 ret = ring_buffer_resize(trace_buf->buffer,
4941                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4942                 if (ret == 0)
4943                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4944                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4945         }
4946
4947         return ret;
4948 }
4949 #endif /* CONFIG_TRACER_MAX_TRACE */
4950
4951 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4952                                         unsigned long size, int cpu)
4953 {
4954         int ret;
4955
4956         /*
4957          * If kernel or user changes the size of the ring buffer
4958          * we use the size that was given, and we can forget about
4959          * expanding it later.
4960          */
4961         ring_buffer_expanded = true;
4962
4963         /* May be called before buffers are initialized */
4964         if (!tr->trace_buffer.buffer)
4965                 return 0;
4966
4967         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4968         if (ret < 0)
4969                 return ret;
4970
4971 #ifdef CONFIG_TRACER_MAX_TRACE
4972         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4973             !tr->current_trace->use_max_tr)
4974                 goto out;
4975
4976         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4977         if (ret < 0) {
4978                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4979                                                      &tr->trace_buffer, cpu);
4980                 if (r < 0) {
4981                         /*
4982                          * AARGH! We are left with different
4983                          * size max buffer!!!!
4984                          * The max buffer is our "snapshot" buffer.
4985                          * When a tracer needs a snapshot (one of the
4986                          * latency tracers), it swaps the max buffer
4987                          * with the saved snap shot. We succeeded to
4988                          * update the size of the main buffer, but failed to
4989                          * update the size of the max buffer. But when we tried
4990                          * to reset the main buffer to the original size, we
4991                          * failed there too. This is very unlikely to
4992                          * happen, but if it does, warn and kill all
4993                          * tracing.
4994                          */
4995                         WARN_ON(1);
4996                         tracing_disabled = 1;
4997                 }
4998                 return ret;
4999         }
5000
5001         if (cpu == RING_BUFFER_ALL_CPUS)
5002                 set_buffer_entries(&tr->max_buffer, size);
5003         else
5004                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5005
5006  out:
5007 #endif /* CONFIG_TRACER_MAX_TRACE */
5008
5009         if (cpu == RING_BUFFER_ALL_CPUS)
5010                 set_buffer_entries(&tr->trace_buffer, size);
5011         else
5012                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5013
5014         return ret;
5015 }
5016
5017 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5018                                           unsigned long size, int cpu_id)
5019 {
5020         int ret = size;
5021
5022         mutex_lock(&trace_types_lock);
5023
5024         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5025                 /* make sure, this cpu is enabled in the mask */
5026                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5027                         ret = -EINVAL;
5028                         goto out;
5029                 }
5030         }
5031
5032         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5033         if (ret < 0)
5034                 ret = -ENOMEM;
5035
5036 out:
5037         mutex_unlock(&trace_types_lock);
5038
5039         return ret;
5040 }
5041
5042
5043 /**
5044  * tracing_update_buffers - used by tracing facility to expand ring buffers
5045  *
5046  * To save on memory when the tracing is never used on a system with it
5047  * configured in. The ring buffers are set to a minimum size. But once
5048  * a user starts to use the tracing facility, then they need to grow
5049  * to their default size.
5050  *
5051  * This function is to be called when a tracer is about to be used.
5052  */
5053 int tracing_update_buffers(void)
5054 {
5055         int ret = 0;
5056
5057         mutex_lock(&trace_types_lock);
5058         if (!ring_buffer_expanded)
5059                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5060                                                 RING_BUFFER_ALL_CPUS);
5061         mutex_unlock(&trace_types_lock);
5062
5063         return ret;
5064 }
5065
5066 struct trace_option_dentry;
5067
5068 static void
5069 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5070
5071 /*
5072  * Used to clear out the tracer before deletion of an instance.
5073  * Must have trace_types_lock held.
5074  */
5075 static void tracing_set_nop(struct trace_array *tr)
5076 {
5077         if (tr->current_trace == &nop_trace)
5078                 return;
5079         
5080         tr->current_trace->enabled--;
5081
5082         if (tr->current_trace->reset)
5083                 tr->current_trace->reset(tr);
5084
5085         tr->current_trace = &nop_trace;
5086 }
5087
5088 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5089 {
5090         /* Only enable if the directory has been created already. */
5091         if (!tr->dir)
5092                 return;
5093
5094         create_trace_option_files(tr, t);
5095 }
5096
5097 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5098 {
5099         struct tracer *t;
5100 #ifdef CONFIG_TRACER_MAX_TRACE
5101         bool had_max_tr;
5102 #endif
5103         int ret = 0;
5104
5105         mutex_lock(&trace_types_lock);
5106
5107         if (!ring_buffer_expanded) {
5108                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5109                                                 RING_BUFFER_ALL_CPUS);
5110                 if (ret < 0)
5111                         goto out;
5112                 ret = 0;
5113         }
5114
5115         for (t = trace_types; t; t = t->next) {
5116                 if (strcmp(t->name, buf) == 0)
5117                         break;
5118         }
5119         if (!t) {
5120                 ret = -EINVAL;
5121                 goto out;
5122         }
5123         if (t == tr->current_trace)
5124                 goto out;
5125
5126         /* Some tracers are only allowed for the top level buffer */
5127         if (!trace_ok_for_array(t, tr)) {
5128                 ret = -EINVAL;
5129                 goto out;
5130         }
5131
5132         /* If trace pipe files are being read, we can't change the tracer */
5133         if (tr->current_trace->ref) {
5134                 ret = -EBUSY;
5135                 goto out;
5136         }
5137
5138         trace_branch_disable();
5139
5140         tr->current_trace->enabled--;
5141
5142         if (tr->current_trace->reset)
5143                 tr->current_trace->reset(tr);
5144
5145         /* Current trace needs to be nop_trace before synchronize_sched */
5146         tr->current_trace = &nop_trace;
5147
5148 #ifdef CONFIG_TRACER_MAX_TRACE
5149         had_max_tr = tr->allocated_snapshot;
5150
5151         if (had_max_tr && !t->use_max_tr) {
5152                 /*
5153                  * We need to make sure that the update_max_tr sees that
5154                  * current_trace changed to nop_trace to keep it from
5155                  * swapping the buffers after we resize it.
5156                  * The update_max_tr is called from interrupts disabled
5157                  * so a synchronized_sched() is sufficient.
5158                  */
5159                 synchronize_sched();
5160                 free_snapshot(tr);
5161         }
5162 #endif
5163
5164 #ifdef CONFIG_TRACER_MAX_TRACE
5165         if (t->use_max_tr && !had_max_tr) {
5166                 ret = alloc_snapshot(tr);
5167                 if (ret < 0)
5168                         goto out;
5169         }
5170 #endif
5171
5172         if (t->init) {
5173                 ret = tracer_init(t, tr);
5174                 if (ret)
5175                         goto out;
5176         }
5177
5178         tr->current_trace = t;
5179         tr->current_trace->enabled++;
5180         trace_branch_enable(tr);
5181  out:
5182         mutex_unlock(&trace_types_lock);
5183
5184         return ret;
5185 }
5186
5187 static ssize_t
5188 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5189                         size_t cnt, loff_t *ppos)
5190 {
5191         struct trace_array *tr = filp->private_data;
5192         char buf[MAX_TRACER_SIZE+1];
5193         int i;
5194         size_t ret;
5195         int err;
5196
5197         ret = cnt;
5198
5199         if (cnt > MAX_TRACER_SIZE)
5200                 cnt = MAX_TRACER_SIZE;
5201
5202         if (copy_from_user(buf, ubuf, cnt))
5203                 return -EFAULT;
5204
5205         buf[cnt] = 0;
5206
5207         /* strip ending whitespace. */
5208         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5209                 buf[i] = 0;
5210
5211         err = tracing_set_tracer(tr, buf);
5212         if (err)
5213                 return err;
5214
5215         *ppos += ret;
5216
5217         return ret;
5218 }
5219
5220 static ssize_t
5221 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5222                    size_t cnt, loff_t *ppos)
5223 {
5224         char buf[64];
5225         int r;
5226
5227         r = snprintf(buf, sizeof(buf), "%ld\n",
5228                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5229         if (r > sizeof(buf))
5230                 r = sizeof(buf);
5231         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5232 }
5233
5234 static ssize_t
5235 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5236                     size_t cnt, loff_t *ppos)
5237 {
5238         unsigned long val;
5239         int ret;
5240
5241         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5242         if (ret)
5243                 return ret;
5244
5245         *ptr = val * 1000;
5246
5247         return cnt;
5248 }
5249
5250 static ssize_t
5251 tracing_thresh_read(struct file *filp, char __user *ubuf,
5252                     size_t cnt, loff_t *ppos)
5253 {
5254         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5255 }
5256
5257 static ssize_t
5258 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5259                      size_t cnt, loff_t *ppos)
5260 {
5261         struct trace_array *tr = filp->private_data;
5262         int ret;
5263
5264         mutex_lock(&trace_types_lock);
5265         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5266         if (ret < 0)
5267                 goto out;
5268
5269         if (tr->current_trace->update_thresh) {
5270                 ret = tr->current_trace->update_thresh(tr);
5271                 if (ret < 0)
5272                         goto out;
5273         }
5274
5275         ret = cnt;
5276 out:
5277         mutex_unlock(&trace_types_lock);
5278
5279         return ret;
5280 }
5281
5282 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5283
5284 static ssize_t
5285 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5286                      size_t cnt, loff_t *ppos)
5287 {
5288         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5289 }
5290
5291 static ssize_t
5292 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5293                       size_t cnt, loff_t *ppos)
5294 {
5295         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5296 }
5297
5298 #endif
5299
5300 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5301 {
5302         struct trace_array *tr = inode->i_private;
5303         struct trace_iterator *iter;
5304         int ret = 0;
5305
5306         if (tracing_disabled)
5307                 return -ENODEV;
5308
5309         if (trace_array_get(tr) < 0)
5310                 return -ENODEV;
5311
5312         mutex_lock(&trace_types_lock);
5313
5314         /* create a buffer to store the information to pass to userspace */
5315         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5316         if (!iter) {
5317                 ret = -ENOMEM;
5318                 __trace_array_put(tr);
5319                 goto out;
5320         }
5321
5322         trace_seq_init(&iter->seq);
5323         iter->trace = tr->current_trace;
5324
5325         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5326                 ret = -ENOMEM;
5327                 goto fail;
5328         }
5329
5330         /* trace pipe does not show start of buffer */
5331         cpumask_setall(iter->started);
5332
5333         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5334                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5335
5336         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5337         if (trace_clocks[tr->clock_id].in_ns)
5338                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5339
5340         iter->tr = tr;
5341         iter->trace_buffer = &tr->trace_buffer;
5342         iter->cpu_file = tracing_get_cpu(inode);
5343         mutex_init(&iter->mutex);
5344         filp->private_data = iter;
5345
5346         if (iter->trace->pipe_open)
5347                 iter->trace->pipe_open(iter);
5348
5349         nonseekable_open(inode, filp);
5350
5351         tr->current_trace->ref++;
5352 out:
5353         mutex_unlock(&trace_types_lock);
5354         return ret;
5355
5356 fail:
5357         kfree(iter->trace);
5358         kfree(iter);
5359         __trace_array_put(tr);
5360         mutex_unlock(&trace_types_lock);
5361         return ret;
5362 }
5363
5364 static int tracing_release_pipe(struct inode *inode, struct file *file)
5365 {
5366         struct trace_iterator *iter = file->private_data;
5367         struct trace_array *tr = inode->i_private;
5368
5369         mutex_lock(&trace_types_lock);
5370
5371         tr->current_trace->ref--;
5372
5373         if (iter->trace->pipe_close)
5374                 iter->trace->pipe_close(iter);
5375
5376         mutex_unlock(&trace_types_lock);
5377
5378         free_cpumask_var(iter->started);
5379         mutex_destroy(&iter->mutex);
5380         kfree(iter);
5381
5382         trace_array_put(tr);
5383
5384         return 0;
5385 }
5386
5387 static unsigned int
5388 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5389 {
5390         struct trace_array *tr = iter->tr;
5391
5392         /* Iterators are static, they should be filled or empty */
5393         if (trace_buffer_iter(iter, iter->cpu_file))
5394                 return POLLIN | POLLRDNORM;
5395
5396         if (tr->trace_flags & TRACE_ITER_BLOCK)
5397                 /*
5398                  * Always select as readable when in blocking mode
5399                  */
5400                 return POLLIN | POLLRDNORM;
5401         else
5402                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5403                                              filp, poll_table);
5404 }
5405
5406 static unsigned int
5407 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5408 {
5409         struct trace_iterator *iter = filp->private_data;
5410
5411         return trace_poll(iter, filp, poll_table);
5412 }
5413
5414 /* Must be called with iter->mutex held. */
5415 static int tracing_wait_pipe(struct file *filp)
5416 {
5417         struct trace_iterator *iter = filp->private_data;
5418         int ret;
5419
5420         while (trace_empty(iter)) {
5421
5422                 if ((filp->f_flags & O_NONBLOCK)) {
5423                         return -EAGAIN;
5424                 }
5425
5426                 /*
5427                  * We block until we read something and tracing is disabled.
5428                  * We still block if tracing is disabled, but we have never
5429                  * read anything. This allows a user to cat this file, and
5430                  * then enable tracing. But after we have read something,
5431                  * we give an EOF when tracing is again disabled.
5432                  *
5433                  * iter->pos will be 0 if we haven't read anything.
5434                  */
5435                 if (!tracing_is_on() && iter->pos)
5436                         break;
5437
5438                 mutex_unlock(&iter->mutex);
5439
5440                 ret = wait_on_pipe(iter, false);
5441
5442                 mutex_lock(&iter->mutex);
5443
5444                 if (ret)
5445                         return ret;
5446         }
5447
5448         return 1;
5449 }
5450
5451 /*
5452  * Consumer reader.
5453  */
5454 static ssize_t
5455 tracing_read_pipe(struct file *filp, char __user *ubuf,
5456                   size_t cnt, loff_t *ppos)
5457 {
5458         struct trace_iterator *iter = filp->private_data;
5459         ssize_t sret;
5460
5461         /*
5462          * Avoid more than one consumer on a single file descriptor
5463          * This is just a matter of traces coherency, the ring buffer itself
5464          * is protected.
5465          */
5466         mutex_lock(&iter->mutex);
5467
5468         /* return any leftover data */
5469         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5470         if (sret != -EBUSY)
5471                 goto out;
5472
5473         trace_seq_init(&iter->seq);
5474
5475         if (iter->trace->read) {
5476                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5477                 if (sret)
5478                         goto out;
5479         }
5480
5481 waitagain:
5482         sret = tracing_wait_pipe(filp);
5483         if (sret <= 0)
5484                 goto out;
5485
5486         /* stop when tracing is finished */
5487         if (trace_empty(iter)) {
5488                 sret = 0;
5489                 goto out;
5490         }
5491
5492         if (cnt >= PAGE_SIZE)
5493                 cnt = PAGE_SIZE - 1;
5494
5495         /* reset all but tr, trace, and overruns */
5496         memset(&iter->seq, 0,
5497                sizeof(struct trace_iterator) -
5498                offsetof(struct trace_iterator, seq));
5499         cpumask_clear(iter->started);
5500         iter->pos = -1;
5501
5502         trace_event_read_lock();
5503         trace_access_lock(iter->cpu_file);
5504         while (trace_find_next_entry_inc(iter) != NULL) {
5505                 enum print_line_t ret;
5506                 int save_len = iter->seq.seq.len;
5507
5508                 ret = print_trace_line(iter);
5509                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5510                         /* don't print partial lines */
5511                         iter->seq.seq.len = save_len;
5512                         break;
5513                 }
5514                 if (ret != TRACE_TYPE_NO_CONSUME)
5515                         trace_consume(iter);
5516
5517                 if (trace_seq_used(&iter->seq) >= cnt)
5518                         break;
5519
5520                 /*
5521                  * Setting the full flag means we reached the trace_seq buffer
5522                  * size and we should leave by partial output condition above.
5523                  * One of the trace_seq_* functions is not used properly.
5524                  */
5525                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5526                           iter->ent->type);
5527         }
5528         trace_access_unlock(iter->cpu_file);
5529         trace_event_read_unlock();
5530
5531         /* Now copy what we have to the user */
5532         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5533         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5534                 trace_seq_init(&iter->seq);
5535
5536         /*
5537          * If there was nothing to send to user, in spite of consuming trace
5538          * entries, go back to wait for more entries.
5539          */
5540         if (sret == -EBUSY)
5541                 goto waitagain;
5542
5543 out:
5544         mutex_unlock(&iter->mutex);
5545
5546         return sret;
5547 }
5548
5549 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5550                                      unsigned int idx)
5551 {
5552         __free_page(spd->pages[idx]);
5553 }
5554
5555 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5556         .can_merge              = 0,
5557         .confirm                = generic_pipe_buf_confirm,
5558         .release                = generic_pipe_buf_release,
5559         .steal                  = generic_pipe_buf_steal,
5560         .get                    = generic_pipe_buf_get,
5561 };
5562
5563 static size_t
5564 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5565 {
5566         size_t count;
5567         int save_len;
5568         int ret;
5569
5570         /* Seq buffer is page-sized, exactly what we need. */
5571         for (;;) {
5572                 save_len = iter->seq.seq.len;
5573                 ret = print_trace_line(iter);
5574
5575                 if (trace_seq_has_overflowed(&iter->seq)) {
5576                         iter->seq.seq.len = save_len;
5577                         break;
5578                 }
5579
5580                 /*
5581                  * This should not be hit, because it should only
5582                  * be set if the iter->seq overflowed. But check it
5583                  * anyway to be safe.
5584                  */
5585                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5586                         iter->seq.seq.len = save_len;
5587                         break;
5588                 }
5589
5590                 count = trace_seq_used(&iter->seq) - save_len;
5591                 if (rem < count) {
5592                         rem = 0;
5593                         iter->seq.seq.len = save_len;
5594                         break;
5595                 }
5596
5597                 if (ret != TRACE_TYPE_NO_CONSUME)
5598                         trace_consume(iter);
5599                 rem -= count;
5600                 if (!trace_find_next_entry_inc(iter))   {
5601                         rem = 0;
5602                         iter->ent = NULL;
5603                         break;
5604                 }
5605         }
5606
5607         return rem;
5608 }
5609
5610 static ssize_t tracing_splice_read_pipe(struct file *filp,
5611                                         loff_t *ppos,
5612                                         struct pipe_inode_info *pipe,
5613                                         size_t len,
5614                                         unsigned int flags)
5615 {
5616         struct page *pages_def[PIPE_DEF_BUFFERS];
5617         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5618         struct trace_iterator *iter = filp->private_data;
5619         struct splice_pipe_desc spd = {
5620                 .pages          = pages_def,
5621                 .partial        = partial_def,
5622                 .nr_pages       = 0, /* This gets updated below. */
5623                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5624                 .flags          = flags,
5625                 .ops            = &tracing_pipe_buf_ops,
5626                 .spd_release    = tracing_spd_release_pipe,
5627         };
5628         ssize_t ret;
5629         size_t rem;
5630         unsigned int i;
5631
5632         if (splice_grow_spd(pipe, &spd))
5633                 return -ENOMEM;
5634
5635         mutex_lock(&iter->mutex);
5636
5637         if (iter->trace->splice_read) {
5638                 ret = iter->trace->splice_read(iter, filp,
5639                                                ppos, pipe, len, flags);
5640                 if (ret)
5641                         goto out_err;
5642         }
5643
5644         ret = tracing_wait_pipe(filp);
5645         if (ret <= 0)
5646                 goto out_err;
5647
5648         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5649                 ret = -EFAULT;
5650                 goto out_err;
5651         }
5652
5653         trace_event_read_lock();
5654         trace_access_lock(iter->cpu_file);
5655
5656         /* Fill as many pages as possible. */
5657         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5658                 spd.pages[i] = alloc_page(GFP_KERNEL);
5659                 if (!spd.pages[i])
5660                         break;
5661
5662                 rem = tracing_fill_pipe_page(rem, iter);
5663
5664                 /* Copy the data into the page, so we can start over. */
5665                 ret = trace_seq_to_buffer(&iter->seq,
5666                                           page_address(spd.pages[i]),
5667                                           trace_seq_used(&iter->seq));
5668                 if (ret < 0) {
5669                         __free_page(spd.pages[i]);
5670                         break;
5671                 }
5672                 spd.partial[i].offset = 0;
5673                 spd.partial[i].len = trace_seq_used(&iter->seq);
5674
5675                 trace_seq_init(&iter->seq);
5676         }
5677
5678         trace_access_unlock(iter->cpu_file);
5679         trace_event_read_unlock();
5680         mutex_unlock(&iter->mutex);
5681
5682         spd.nr_pages = i;
5683
5684         if (i)
5685                 ret = splice_to_pipe(pipe, &spd);
5686         else
5687                 ret = 0;
5688 out:
5689         splice_shrink_spd(&spd);
5690         return ret;
5691
5692 out_err:
5693         mutex_unlock(&iter->mutex);
5694         goto out;
5695 }
5696
5697 static ssize_t
5698 tracing_entries_read(struct file *filp, char __user *ubuf,
5699                      size_t cnt, loff_t *ppos)
5700 {
5701         struct inode *inode = file_inode(filp);
5702         struct trace_array *tr = inode->i_private;
5703         int cpu = tracing_get_cpu(inode);
5704         char buf[64];
5705         int r = 0;
5706         ssize_t ret;
5707
5708         mutex_lock(&trace_types_lock);
5709
5710         if (cpu == RING_BUFFER_ALL_CPUS) {
5711                 int cpu, buf_size_same;
5712                 unsigned long size;
5713
5714                 size = 0;
5715                 buf_size_same = 1;
5716                 /* check if all cpu sizes are same */
5717                 for_each_tracing_cpu(cpu) {
5718                         /* fill in the size from first enabled cpu */
5719                         if (size == 0)
5720                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5721                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5722                                 buf_size_same = 0;
5723                                 break;
5724                         }
5725                 }
5726
5727                 if (buf_size_same) {
5728                         if (!ring_buffer_expanded)
5729                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5730                                             size >> 10,
5731                                             trace_buf_size >> 10);
5732                         else
5733                                 r = sprintf(buf, "%lu\n", size >> 10);
5734                 } else
5735                         r = sprintf(buf, "X\n");
5736         } else
5737                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5738
5739         mutex_unlock(&trace_types_lock);
5740
5741         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5742         return ret;
5743 }
5744
5745 static ssize_t
5746 tracing_entries_write(struct file *filp, const char __user *ubuf,
5747                       size_t cnt, loff_t *ppos)
5748 {
5749         struct inode *inode = file_inode(filp);
5750         struct trace_array *tr = inode->i_private;
5751         unsigned long val;
5752         int ret;
5753
5754         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5755         if (ret)
5756                 return ret;
5757
5758         /* must have at least 1 entry */
5759         if (!val)
5760                 return -EINVAL;
5761
5762         /* value is in KB */
5763         val <<= 10;
5764         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5765         if (ret < 0)
5766                 return ret;
5767
5768         *ppos += cnt;
5769
5770         return cnt;
5771 }
5772
5773 static ssize_t
5774 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5775                                 size_t cnt, loff_t *ppos)
5776 {
5777         struct trace_array *tr = filp->private_data;
5778         char buf[64];
5779         int r, cpu;
5780         unsigned long size = 0, expanded_size = 0;
5781
5782         mutex_lock(&trace_types_lock);
5783         for_each_tracing_cpu(cpu) {
5784                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5785                 if (!ring_buffer_expanded)
5786                         expanded_size += trace_buf_size >> 10;
5787         }
5788         if (ring_buffer_expanded)
5789                 r = sprintf(buf, "%lu\n", size);
5790         else
5791                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5792         mutex_unlock(&trace_types_lock);
5793
5794         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5795 }
5796
5797 static ssize_t
5798 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5799                           size_t cnt, loff_t *ppos)
5800 {
5801         /*
5802          * There is no need to read what the user has written, this function
5803          * is just to make sure that there is no error when "echo" is used
5804          */
5805
5806         *ppos += cnt;
5807
5808         return cnt;
5809 }
5810
5811 static int
5812 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5813 {
5814         struct trace_array *tr = inode->i_private;
5815
5816         /* disable tracing ? */
5817         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5818                 tracer_tracing_off(tr);
5819         /* resize the ring buffer to 0 */
5820         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5821
5822         trace_array_put(tr);
5823
5824         return 0;
5825 }
5826
5827 static ssize_t
5828 tracing_mark_write(struct file *filp, const char __user *ubuf,
5829                                         size_t cnt, loff_t *fpos)
5830 {
5831         struct trace_array *tr = filp->private_data;
5832         struct ring_buffer_event *event;
5833         struct ring_buffer *buffer;
5834         struct print_entry *entry;
5835         unsigned long irq_flags;
5836         const char faulted[] = "<faulted>";
5837         ssize_t written;
5838         int size;
5839         int len;
5840
5841 /* Used in tracing_mark_raw_write() as well */
5842 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5843
5844         if (tracing_disabled)
5845                 return -EINVAL;
5846
5847         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5848                 return -EINVAL;
5849
5850         if (cnt > TRACE_BUF_SIZE)
5851                 cnt = TRACE_BUF_SIZE;
5852
5853         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5854
5855         local_save_flags(irq_flags);
5856         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5857
5858         /* If less than "<faulted>", then make sure we can still add that */
5859         if (cnt < FAULTED_SIZE)
5860                 size += FAULTED_SIZE - cnt;
5861
5862         buffer = tr->trace_buffer.buffer;
5863         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5864                                             irq_flags, preempt_count());
5865         if (unlikely(!event))
5866                 /* Ring buffer disabled, return as if not open for write */
5867                 return -EBADF;
5868
5869         entry = ring_buffer_event_data(event);
5870         entry->ip = _THIS_IP_;
5871
5872         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5873         if (len) {
5874                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5875                 cnt = FAULTED_SIZE;
5876                 written = -EFAULT;
5877         } else
5878                 written = cnt;
5879         len = cnt;
5880
5881         if (entry->buf[cnt - 1] != '\n') {
5882                 entry->buf[cnt] = '\n';
5883                 entry->buf[cnt + 1] = '\0';
5884         } else
5885                 entry->buf[cnt] = '\0';
5886
5887         __buffer_unlock_commit(buffer, event);
5888
5889         if (written > 0)
5890                 *fpos += written;
5891
5892         return written;
5893 }
5894
5895 /* Limit it for now to 3K (including tag) */
5896 #define RAW_DATA_MAX_SIZE (1024*3)
5897
5898 static ssize_t
5899 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5900                                         size_t cnt, loff_t *fpos)
5901 {
5902         struct trace_array *tr = filp->private_data;
5903         struct ring_buffer_event *event;
5904         struct ring_buffer *buffer;
5905         struct raw_data_entry *entry;
5906         const char faulted[] = "<faulted>";
5907         unsigned long irq_flags;
5908         ssize_t written;
5909         int size;
5910         int len;
5911
5912 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5913
5914         if (tracing_disabled)
5915                 return -EINVAL;
5916
5917         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5918                 return -EINVAL;
5919
5920         /* The marker must at least have a tag id */
5921         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5922                 return -EINVAL;
5923
5924         if (cnt > TRACE_BUF_SIZE)
5925                 cnt = TRACE_BUF_SIZE;
5926
5927         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5928
5929         local_save_flags(irq_flags);
5930         size = sizeof(*entry) + cnt;
5931         if (cnt < FAULT_SIZE_ID)
5932                 size += FAULT_SIZE_ID - cnt;
5933
5934         buffer = tr->trace_buffer.buffer;
5935         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5936                                             irq_flags, preempt_count());
5937         if (!event)
5938                 /* Ring buffer disabled, return as if not open for write */
5939                 return -EBADF;
5940
5941         entry = ring_buffer_event_data(event);
5942
5943         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5944         if (len) {
5945                 entry->id = -1;
5946                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5947                 written = -EFAULT;
5948         } else
5949                 written = cnt;
5950
5951         __buffer_unlock_commit(buffer, event);
5952
5953         if (written > 0)
5954                 *fpos += written;
5955
5956         return written;
5957 }
5958
5959 static int tracing_clock_show(struct seq_file *m, void *v)
5960 {
5961         struct trace_array *tr = m->private;
5962         int i;
5963
5964         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5965                 seq_printf(m,
5966                         "%s%s%s%s", i ? " " : "",
5967                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5968                         i == tr->clock_id ? "]" : "");
5969         seq_putc(m, '\n');
5970
5971         return 0;
5972 }
5973
5974 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5975 {
5976         int i;
5977
5978         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5979                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5980                         break;
5981         }
5982         if (i == ARRAY_SIZE(trace_clocks))
5983                 return -EINVAL;
5984
5985         mutex_lock(&trace_types_lock);
5986
5987         tr->clock_id = i;
5988
5989         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5990
5991         /*
5992          * New clock may not be consistent with the previous clock.
5993          * Reset the buffer so that it doesn't have incomparable timestamps.
5994          */
5995         tracing_reset_online_cpus(&tr->trace_buffer);
5996
5997 #ifdef CONFIG_TRACER_MAX_TRACE
5998         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5999                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6000         tracing_reset_online_cpus(&tr->max_buffer);
6001 #endif
6002
6003         mutex_unlock(&trace_types_lock);
6004
6005         return 0;
6006 }
6007
6008 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6009                                    size_t cnt, loff_t *fpos)
6010 {
6011         struct seq_file *m = filp->private_data;
6012         struct trace_array *tr = m->private;
6013         char buf[64];
6014         const char *clockstr;
6015         int ret;
6016
6017         if (cnt >= sizeof(buf))
6018                 return -EINVAL;
6019
6020         if (copy_from_user(buf, ubuf, cnt))
6021                 return -EFAULT;
6022
6023         buf[cnt] = 0;
6024
6025         clockstr = strstrip(buf);
6026
6027         ret = tracing_set_clock(tr, clockstr);
6028         if (ret)
6029                 return ret;
6030
6031         *fpos += cnt;
6032
6033         return cnt;
6034 }
6035
6036 static int tracing_clock_open(struct inode *inode, struct file *file)
6037 {
6038         struct trace_array *tr = inode->i_private;
6039         int ret;
6040
6041         if (tracing_disabled)
6042                 return -ENODEV;
6043
6044         if (trace_array_get(tr))
6045                 return -ENODEV;
6046
6047         ret = single_open(file, tracing_clock_show, inode->i_private);
6048         if (ret < 0)
6049                 trace_array_put(tr);
6050
6051         return ret;
6052 }
6053
6054 struct ftrace_buffer_info {
6055         struct trace_iterator   iter;
6056         void                    *spare;
6057         unsigned int            spare_cpu;
6058         unsigned int            read;
6059 };
6060
6061 #ifdef CONFIG_TRACER_SNAPSHOT
6062 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6063 {
6064         struct trace_array *tr = inode->i_private;
6065         struct trace_iterator *iter;
6066         struct seq_file *m;
6067         int ret = 0;
6068
6069         if (trace_array_get(tr) < 0)
6070                 return -ENODEV;
6071
6072         if (file->f_mode & FMODE_READ) {
6073                 iter = __tracing_open(inode, file, true);
6074                 if (IS_ERR(iter))
6075                         ret = PTR_ERR(iter);
6076         } else {
6077                 /* Writes still need the seq_file to hold the private data */
6078                 ret = -ENOMEM;
6079                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6080                 if (!m)
6081                         goto out;
6082                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6083                 if (!iter) {
6084                         kfree(m);
6085                         goto out;
6086                 }
6087                 ret = 0;
6088
6089                 iter->tr = tr;
6090                 iter->trace_buffer = &tr->max_buffer;
6091                 iter->cpu_file = tracing_get_cpu(inode);
6092                 m->private = iter;
6093                 file->private_data = m;
6094         }
6095 out:
6096         if (ret < 0)
6097                 trace_array_put(tr);
6098
6099         return ret;
6100 }
6101
6102 static ssize_t
6103 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6104                        loff_t *ppos)
6105 {
6106         struct seq_file *m = filp->private_data;
6107         struct trace_iterator *iter = m->private;
6108         struct trace_array *tr = iter->tr;
6109         unsigned long val;
6110         int ret;
6111
6112         ret = tracing_update_buffers();
6113         if (ret < 0)
6114                 return ret;
6115
6116         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6117         if (ret)
6118                 return ret;
6119
6120         mutex_lock(&trace_types_lock);
6121
6122         if (tr->current_trace->use_max_tr) {
6123                 ret = -EBUSY;
6124                 goto out;
6125         }
6126
6127         switch (val) {
6128         case 0:
6129                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6130                         ret = -EINVAL;
6131                         break;
6132                 }
6133                 if (tr->allocated_snapshot)
6134                         free_snapshot(tr);
6135                 break;
6136         case 1:
6137 /* Only allow per-cpu swap if the ring buffer supports it */
6138 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6139                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6140                         ret = -EINVAL;
6141                         break;
6142                 }
6143 #endif
6144                 if (!tr->allocated_snapshot) {
6145                         ret = alloc_snapshot(tr);
6146                         if (ret < 0)
6147                                 break;
6148                 }
6149                 local_irq_disable();
6150                 /* Now, we're going to swap */
6151                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6152                         update_max_tr(tr, current, smp_processor_id());
6153                 else
6154                         update_max_tr_single(tr, current, iter->cpu_file);
6155                 local_irq_enable();
6156                 break;
6157         default:
6158                 if (tr->allocated_snapshot) {
6159                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6160                                 tracing_reset_online_cpus(&tr->max_buffer);
6161                         else
6162                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6163                 }
6164                 break;
6165         }
6166
6167         if (ret >= 0) {
6168                 *ppos += cnt;
6169                 ret = cnt;
6170         }
6171 out:
6172         mutex_unlock(&trace_types_lock);
6173         return ret;
6174 }
6175
6176 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6177 {
6178         struct seq_file *m = file->private_data;
6179         int ret;
6180
6181         ret = tracing_release(inode, file);
6182
6183         if (file->f_mode & FMODE_READ)
6184                 return ret;
6185
6186         /* If write only, the seq_file is just a stub */
6187         if (m)
6188                 kfree(m->private);
6189         kfree(m);
6190
6191         return 0;
6192 }
6193
6194 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6195 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6196                                     size_t count, loff_t *ppos);
6197 static int tracing_buffers_release(struct inode *inode, struct file *file);
6198 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6199                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6200
6201 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6202 {
6203         struct ftrace_buffer_info *info;
6204         int ret;
6205
6206         ret = tracing_buffers_open(inode, filp);
6207         if (ret < 0)
6208                 return ret;
6209
6210         info = filp->private_data;
6211
6212         if (info->iter.trace->use_max_tr) {
6213                 tracing_buffers_release(inode, filp);
6214                 return -EBUSY;
6215         }
6216
6217         info->iter.snapshot = true;
6218         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6219
6220         return ret;
6221 }
6222
6223 #endif /* CONFIG_TRACER_SNAPSHOT */
6224
6225
6226 static const struct file_operations tracing_thresh_fops = {
6227         .open           = tracing_open_generic,
6228         .read           = tracing_thresh_read,
6229         .write          = tracing_thresh_write,
6230         .llseek         = generic_file_llseek,
6231 };
6232
6233 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6234 static const struct file_operations tracing_max_lat_fops = {
6235         .open           = tracing_open_generic,
6236         .read           = tracing_max_lat_read,
6237         .write          = tracing_max_lat_write,
6238         .llseek         = generic_file_llseek,
6239 };
6240 #endif
6241
6242 static const struct file_operations set_tracer_fops = {
6243         .open           = tracing_open_generic,
6244         .read           = tracing_set_trace_read,
6245         .write          = tracing_set_trace_write,
6246         .llseek         = generic_file_llseek,
6247 };
6248
6249 static const struct file_operations tracing_pipe_fops = {
6250         .open           = tracing_open_pipe,
6251         .poll           = tracing_poll_pipe,
6252         .read           = tracing_read_pipe,
6253         .splice_read    = tracing_splice_read_pipe,
6254         .release        = tracing_release_pipe,
6255         .llseek         = no_llseek,
6256 };
6257
6258 static const struct file_operations tracing_entries_fops = {
6259         .open           = tracing_open_generic_tr,
6260         .read           = tracing_entries_read,
6261         .write          = tracing_entries_write,
6262         .llseek         = generic_file_llseek,
6263         .release        = tracing_release_generic_tr,
6264 };
6265
6266 static const struct file_operations tracing_total_entries_fops = {
6267         .open           = tracing_open_generic_tr,
6268         .read           = tracing_total_entries_read,
6269         .llseek         = generic_file_llseek,
6270         .release        = tracing_release_generic_tr,
6271 };
6272
6273 static const struct file_operations tracing_free_buffer_fops = {
6274         .open           = tracing_open_generic_tr,
6275         .write          = tracing_free_buffer_write,
6276         .release        = tracing_free_buffer_release,
6277 };
6278
6279 static const struct file_operations tracing_mark_fops = {
6280         .open           = tracing_open_generic_tr,
6281         .write          = tracing_mark_write,
6282         .llseek         = generic_file_llseek,
6283         .release        = tracing_release_generic_tr,
6284 };
6285
6286 static const struct file_operations tracing_mark_raw_fops = {
6287         .open           = tracing_open_generic_tr,
6288         .write          = tracing_mark_raw_write,
6289         .llseek         = generic_file_llseek,
6290         .release        = tracing_release_generic_tr,
6291 };
6292
6293 static const struct file_operations trace_clock_fops = {
6294         .open           = tracing_clock_open,
6295         .read           = seq_read,
6296         .llseek         = seq_lseek,
6297         .release        = tracing_single_release_tr,
6298         .write          = tracing_clock_write,
6299 };
6300
6301 #ifdef CONFIG_TRACER_SNAPSHOT
6302 static const struct file_operations snapshot_fops = {
6303         .open           = tracing_snapshot_open,
6304         .read           = seq_read,
6305         .write          = tracing_snapshot_write,
6306         .llseek         = tracing_lseek,
6307         .release        = tracing_snapshot_release,
6308 };
6309
6310 static const struct file_operations snapshot_raw_fops = {
6311         .open           = snapshot_raw_open,
6312         .read           = tracing_buffers_read,
6313         .release        = tracing_buffers_release,
6314         .splice_read    = tracing_buffers_splice_read,
6315         .llseek         = no_llseek,
6316 };
6317
6318 #endif /* CONFIG_TRACER_SNAPSHOT */
6319
6320 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6321 {
6322         struct trace_array *tr = inode->i_private;
6323         struct ftrace_buffer_info *info;
6324         int ret;
6325
6326         if (tracing_disabled)
6327                 return -ENODEV;
6328
6329         if (trace_array_get(tr) < 0)
6330                 return -ENODEV;
6331
6332         info = kzalloc(sizeof(*info), GFP_KERNEL);
6333         if (!info) {
6334                 trace_array_put(tr);
6335                 return -ENOMEM;
6336         }
6337
6338         mutex_lock(&trace_types_lock);
6339
6340         info->iter.tr           = tr;
6341         info->iter.cpu_file     = tracing_get_cpu(inode);
6342         info->iter.trace        = tr->current_trace;
6343         info->iter.trace_buffer = &tr->trace_buffer;
6344         info->spare             = NULL;
6345         /* Force reading ring buffer for first read */
6346         info->read              = (unsigned int)-1;
6347
6348         filp->private_data = info;
6349
6350         tr->current_trace->ref++;
6351
6352         mutex_unlock(&trace_types_lock);
6353
6354         ret = nonseekable_open(inode, filp);
6355         if (ret < 0)
6356                 trace_array_put(tr);
6357
6358         return ret;
6359 }
6360
6361 static unsigned int
6362 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6363 {
6364         struct ftrace_buffer_info *info = filp->private_data;
6365         struct trace_iterator *iter = &info->iter;
6366
6367         return trace_poll(iter, filp, poll_table);
6368 }
6369
6370 static ssize_t
6371 tracing_buffers_read(struct file *filp, char __user *ubuf,
6372                      size_t count, loff_t *ppos)
6373 {
6374         struct ftrace_buffer_info *info = filp->private_data;
6375         struct trace_iterator *iter = &info->iter;
6376         ssize_t ret;
6377         ssize_t size;
6378
6379         if (!count)
6380                 return 0;
6381
6382 #ifdef CONFIG_TRACER_MAX_TRACE
6383         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6384                 return -EBUSY;
6385 #endif
6386
6387         if (!info->spare) {
6388                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6389                                                           iter->cpu_file);
6390                 info->spare_cpu = iter->cpu_file;
6391         }
6392         if (!info->spare)
6393                 return -ENOMEM;
6394
6395         /* Do we have previous read data to read? */
6396         if (info->read < PAGE_SIZE)
6397                 goto read;
6398
6399  again:
6400         trace_access_lock(iter->cpu_file);
6401         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6402                                     &info->spare,
6403                                     count,
6404                                     iter->cpu_file, 0);
6405         trace_access_unlock(iter->cpu_file);
6406
6407         if (ret < 0) {
6408                 if (trace_empty(iter)) {
6409                         if ((filp->f_flags & O_NONBLOCK))
6410                                 return -EAGAIN;
6411
6412                         ret = wait_on_pipe(iter, false);
6413                         if (ret)
6414                                 return ret;
6415
6416                         goto again;
6417                 }
6418                 return 0;
6419         }
6420
6421         info->read = 0;
6422  read:
6423         size = PAGE_SIZE - info->read;
6424         if (size > count)
6425                 size = count;
6426
6427         ret = copy_to_user(ubuf, info->spare + info->read, size);
6428         if (ret == size)
6429                 return -EFAULT;
6430
6431         size -= ret;
6432
6433         *ppos += size;
6434         info->read += size;
6435
6436         return size;
6437 }
6438
6439 static int tracing_buffers_release(struct inode *inode, struct file *file)
6440 {
6441         struct ftrace_buffer_info *info = file->private_data;
6442         struct trace_iterator *iter = &info->iter;
6443
6444         mutex_lock(&trace_types_lock);
6445
6446         iter->tr->current_trace->ref--;
6447
6448         __trace_array_put(iter->tr);
6449
6450         if (info->spare)
6451                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6452                                            info->spare_cpu, info->spare);
6453         kfree(info);
6454
6455         mutex_unlock(&trace_types_lock);
6456
6457         return 0;
6458 }
6459
6460 struct buffer_ref {
6461         struct ring_buffer      *buffer;
6462         void                    *page;
6463         int                     cpu;
6464         int                     ref;
6465 };
6466
6467 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6468                                     struct pipe_buffer *buf)
6469 {
6470         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6471
6472         if (--ref->ref)
6473                 return;
6474
6475         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6476         kfree(ref);
6477         buf->private = 0;
6478 }
6479
6480 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6481                                 struct pipe_buffer *buf)
6482 {
6483         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6484
6485         ref->ref++;
6486 }
6487
6488 /* Pipe buffer operations for a buffer. */
6489 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6490         .can_merge              = 0,
6491         .confirm                = generic_pipe_buf_confirm,
6492         .release                = buffer_pipe_buf_release,
6493         .steal                  = generic_pipe_buf_steal,
6494         .get                    = buffer_pipe_buf_get,
6495 };
6496
6497 /*
6498  * Callback from splice_to_pipe(), if we need to release some pages
6499  * at the end of the spd in case we error'ed out in filling the pipe.
6500  */
6501 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6502 {
6503         struct buffer_ref *ref =
6504                 (struct buffer_ref *)spd->partial[i].private;
6505
6506         if (--ref->ref)
6507                 return;
6508
6509         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6510         kfree(ref);
6511         spd->partial[i].private = 0;
6512 }
6513
6514 static ssize_t
6515 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6516                             struct pipe_inode_info *pipe, size_t len,
6517                             unsigned int flags)
6518 {
6519         struct ftrace_buffer_info *info = file->private_data;
6520         struct trace_iterator *iter = &info->iter;
6521         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6522         struct page *pages_def[PIPE_DEF_BUFFERS];
6523         struct splice_pipe_desc spd = {
6524                 .pages          = pages_def,
6525                 .partial        = partial_def,
6526                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6527                 .flags          = flags,
6528                 .ops            = &buffer_pipe_buf_ops,
6529                 .spd_release    = buffer_spd_release,
6530         };
6531         struct buffer_ref *ref;
6532         int entries, size, i;
6533         ssize_t ret = 0;
6534
6535 #ifdef CONFIG_TRACER_MAX_TRACE
6536         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6537                 return -EBUSY;
6538 #endif
6539
6540         if (*ppos & (PAGE_SIZE - 1))
6541                 return -EINVAL;
6542
6543         if (len & (PAGE_SIZE - 1)) {
6544                 if (len < PAGE_SIZE)
6545                         return -EINVAL;
6546                 len &= PAGE_MASK;
6547         }
6548
6549         if (splice_grow_spd(pipe, &spd))
6550                 return -ENOMEM;
6551
6552  again:
6553         trace_access_lock(iter->cpu_file);
6554         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6555
6556         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6557                 struct page *page;
6558                 int r;
6559
6560                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6561                 if (!ref) {
6562                         ret = -ENOMEM;
6563                         break;
6564                 }
6565
6566                 ref->ref = 1;
6567                 ref->buffer = iter->trace_buffer->buffer;
6568                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6569                 if (!ref->page) {
6570                         ret = -ENOMEM;
6571                         kfree(ref);
6572                         break;
6573                 }
6574                 ref->cpu = iter->cpu_file;
6575
6576                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6577                                           len, iter->cpu_file, 1);
6578                 if (r < 0) {
6579                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6580                                                    ref->page);
6581                         kfree(ref);
6582                         break;
6583                 }
6584
6585                 /*
6586                  * zero out any left over data, this is going to
6587                  * user land.
6588                  */
6589                 size = ring_buffer_page_len(ref->page);
6590                 if (size < PAGE_SIZE)
6591                         memset(ref->page + size, 0, PAGE_SIZE - size);
6592
6593                 page = virt_to_page(ref->page);
6594
6595                 spd.pages[i] = page;
6596                 spd.partial[i].len = PAGE_SIZE;
6597                 spd.partial[i].offset = 0;
6598                 spd.partial[i].private = (unsigned long)ref;
6599                 spd.nr_pages++;
6600                 *ppos += PAGE_SIZE;
6601
6602                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6603         }
6604
6605         trace_access_unlock(iter->cpu_file);
6606         spd.nr_pages = i;
6607
6608         /* did we read anything? */
6609         if (!spd.nr_pages) {
6610                 if (ret)
6611                         goto out;
6612
6613                 ret = -EAGAIN;
6614                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6615                         goto out;
6616
6617                 ret = wait_on_pipe(iter, true);
6618                 if (ret)
6619                         goto out;
6620
6621                 goto again;
6622         }
6623
6624         ret = splice_to_pipe(pipe, &spd);
6625 out:
6626         splice_shrink_spd(&spd);
6627
6628         return ret;
6629 }
6630
6631 static const struct file_operations tracing_buffers_fops = {
6632         .open           = tracing_buffers_open,
6633         .read           = tracing_buffers_read,
6634         .poll           = tracing_buffers_poll,
6635         .release        = tracing_buffers_release,
6636         .splice_read    = tracing_buffers_splice_read,
6637         .llseek         = no_llseek,
6638 };
6639
6640 static ssize_t
6641 tracing_stats_read(struct file *filp, char __user *ubuf,
6642                    size_t count, loff_t *ppos)
6643 {
6644         struct inode *inode = file_inode(filp);
6645         struct trace_array *tr = inode->i_private;
6646         struct trace_buffer *trace_buf = &tr->trace_buffer;
6647         int cpu = tracing_get_cpu(inode);
6648         struct trace_seq *s;
6649         unsigned long cnt;
6650         unsigned long long t;
6651         unsigned long usec_rem;
6652
6653         s = kmalloc(sizeof(*s), GFP_KERNEL);
6654         if (!s)
6655                 return -ENOMEM;
6656
6657         trace_seq_init(s);
6658
6659         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6660         trace_seq_printf(s, "entries: %ld\n", cnt);
6661
6662         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6663         trace_seq_printf(s, "overrun: %ld\n", cnt);
6664
6665         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6666         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6667
6668         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6669         trace_seq_printf(s, "bytes: %ld\n", cnt);
6670
6671         if (trace_clocks[tr->clock_id].in_ns) {
6672                 /* local or global for trace_clock */
6673                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6674                 usec_rem = do_div(t, USEC_PER_SEC);
6675                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6676                                                                 t, usec_rem);
6677
6678                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6679                 usec_rem = do_div(t, USEC_PER_SEC);
6680                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6681         } else {
6682                 /* counter or tsc mode for trace_clock */
6683                 trace_seq_printf(s, "oldest event ts: %llu\n",
6684                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6685
6686                 trace_seq_printf(s, "now ts: %llu\n",
6687                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6688         }
6689
6690         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6691         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6692
6693         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6694         trace_seq_printf(s, "read events: %ld\n", cnt);
6695
6696         count = simple_read_from_buffer(ubuf, count, ppos,
6697                                         s->buffer, trace_seq_used(s));
6698
6699         kfree(s);
6700
6701         return count;
6702 }
6703
6704 static const struct file_operations tracing_stats_fops = {
6705         .open           = tracing_open_generic_tr,
6706         .read           = tracing_stats_read,
6707         .llseek         = generic_file_llseek,
6708         .release        = tracing_release_generic_tr,
6709 };
6710
6711 #ifdef CONFIG_DYNAMIC_FTRACE
6712
6713 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6714 {
6715         return 0;
6716 }
6717
6718 static ssize_t
6719 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6720                   size_t cnt, loff_t *ppos)
6721 {
6722         static char ftrace_dyn_info_buffer[1024];
6723         static DEFINE_MUTEX(dyn_info_mutex);
6724         unsigned long *p = filp->private_data;
6725         char *buf = ftrace_dyn_info_buffer;
6726         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6727         int r;
6728
6729         mutex_lock(&dyn_info_mutex);
6730         r = sprintf(buf, "%ld ", *p);
6731
6732         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6733         buf[r++] = '\n';
6734
6735         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6736
6737         mutex_unlock(&dyn_info_mutex);
6738
6739         return r;
6740 }
6741
6742 static const struct file_operations tracing_dyn_info_fops = {
6743         .open           = tracing_open_generic,
6744         .read           = tracing_read_dyn_info,
6745         .llseek         = generic_file_llseek,
6746 };
6747 #endif /* CONFIG_DYNAMIC_FTRACE */
6748
6749 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6750 static void
6751 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6752                 struct trace_array *tr, struct ftrace_probe_ops *ops,
6753                 void *data)
6754 {
6755         tracing_snapshot_instance(tr);
6756 }
6757
6758 static void
6759 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6760                       struct trace_array *tr, struct ftrace_probe_ops *ops,
6761                       void *data)
6762 {
6763         struct ftrace_func_mapper *mapper = data;
6764         long *count = NULL;
6765
6766         if (mapper)
6767                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6768
6769         if (count) {
6770
6771                 if (*count <= 0)
6772                         return;
6773
6774                 (*count)--;
6775         }
6776
6777         tracing_snapshot_instance(tr);
6778 }
6779
6780 static int
6781 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6782                       struct ftrace_probe_ops *ops, void *data)
6783 {
6784         struct ftrace_func_mapper *mapper = data;
6785         long *count = NULL;
6786
6787         seq_printf(m, "%ps:", (void *)ip);
6788
6789         seq_puts(m, "snapshot");
6790
6791         if (mapper)
6792                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6793
6794         if (count)
6795                 seq_printf(m, ":count=%ld\n", *count);
6796         else
6797                 seq_puts(m, ":unlimited\n");
6798
6799         return 0;
6800 }
6801
6802 static int
6803 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
6804                      unsigned long ip, void *init_data, void **data)
6805 {
6806         struct ftrace_func_mapper *mapper = *data;
6807
6808         if (!mapper) {
6809                 mapper = allocate_ftrace_func_mapper();
6810                 if (!mapper)
6811                         return -ENOMEM;
6812                 *data = mapper;
6813         }
6814
6815         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
6816 }
6817
6818 static void
6819 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
6820                      unsigned long ip, void *data)
6821 {
6822         struct ftrace_func_mapper *mapper = data;
6823
6824         if (!ip) {
6825                 if (!mapper)
6826                         return;
6827                 free_ftrace_func_mapper(mapper, NULL);
6828                 return;
6829         }
6830
6831         ftrace_func_mapper_remove_ip(mapper, ip);
6832 }
6833
6834 static struct ftrace_probe_ops snapshot_probe_ops = {
6835         .func                   = ftrace_snapshot,
6836         .print                  = ftrace_snapshot_print,
6837 };
6838
6839 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6840         .func                   = ftrace_count_snapshot,
6841         .print                  = ftrace_snapshot_print,
6842         .init                   = ftrace_snapshot_init,
6843         .free                   = ftrace_snapshot_free,
6844 };
6845
6846 static int
6847 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
6848                                char *glob, char *cmd, char *param, int enable)
6849 {
6850         struct ftrace_probe_ops *ops;
6851         void *count = (void *)-1;
6852         char *number;
6853         int ret;
6854
6855         /* hash funcs only work with set_ftrace_filter */
6856         if (!enable)
6857                 return -EINVAL;
6858
6859         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6860
6861         if (glob[0] == '!')
6862                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
6863
6864         if (!param)
6865                 goto out_reg;
6866
6867         number = strsep(&param, ":");
6868
6869         if (!strlen(number))
6870                 goto out_reg;
6871
6872         /*
6873          * We use the callback data field (which is a pointer)
6874          * as our counter.
6875          */
6876         ret = kstrtoul(number, 0, (unsigned long *)&count);
6877         if (ret)
6878                 return ret;
6879
6880  out_reg:
6881         ret = register_ftrace_function_probe(glob, tr, ops, count);
6882
6883         if (ret >= 0)
6884                 alloc_snapshot(tr);
6885
6886         return ret < 0 ? ret : 0;
6887 }
6888
6889 static struct ftrace_func_command ftrace_snapshot_cmd = {
6890         .name                   = "snapshot",
6891         .func                   = ftrace_trace_snapshot_callback,
6892 };
6893
6894 static __init int register_snapshot_cmd(void)
6895 {
6896         return register_ftrace_command(&ftrace_snapshot_cmd);
6897 }
6898 #else
6899 static inline __init int register_snapshot_cmd(void) { return 0; }
6900 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6901
6902 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6903 {
6904         if (WARN_ON(!tr->dir))
6905                 return ERR_PTR(-ENODEV);
6906
6907         /* Top directory uses NULL as the parent */
6908         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6909                 return NULL;
6910
6911         /* All sub buffers have a descriptor */
6912         return tr->dir;
6913 }
6914
6915 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6916 {
6917         struct dentry *d_tracer;
6918
6919         if (tr->percpu_dir)
6920                 return tr->percpu_dir;
6921
6922         d_tracer = tracing_get_dentry(tr);
6923         if (IS_ERR(d_tracer))
6924                 return NULL;
6925
6926         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6927
6928         WARN_ONCE(!tr->percpu_dir,
6929                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6930
6931         return tr->percpu_dir;
6932 }
6933
6934 static struct dentry *
6935 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6936                       void *data, long cpu, const struct file_operations *fops)
6937 {
6938         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6939
6940         if (ret) /* See tracing_get_cpu() */
6941                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6942         return ret;
6943 }
6944
6945 static void
6946 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6947 {
6948         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6949         struct dentry *d_cpu;
6950         char cpu_dir[30]; /* 30 characters should be more than enough */
6951
6952         if (!d_percpu)
6953                 return;
6954
6955         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6956         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6957         if (!d_cpu) {
6958                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6959                 return;
6960         }
6961
6962         /* per cpu trace_pipe */
6963         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6964                                 tr, cpu, &tracing_pipe_fops);
6965
6966         /* per cpu trace */
6967         trace_create_cpu_file("trace", 0644, d_cpu,
6968                                 tr, cpu, &tracing_fops);
6969
6970         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6971                                 tr, cpu, &tracing_buffers_fops);
6972
6973         trace_create_cpu_file("stats", 0444, d_cpu,
6974                                 tr, cpu, &tracing_stats_fops);
6975
6976         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6977                                 tr, cpu, &tracing_entries_fops);
6978
6979 #ifdef CONFIG_TRACER_SNAPSHOT
6980         trace_create_cpu_file("snapshot", 0644, d_cpu,
6981                                 tr, cpu, &snapshot_fops);
6982
6983         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6984                                 tr, cpu, &snapshot_raw_fops);
6985 #endif
6986 }
6987
6988 #ifdef CONFIG_FTRACE_SELFTEST
6989 /* Let selftest have access to static functions in this file */
6990 #include "trace_selftest.c"
6991 #endif
6992
6993 static ssize_t
6994 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6995                         loff_t *ppos)
6996 {
6997         struct trace_option_dentry *topt = filp->private_data;
6998         char *buf;
6999
7000         if (topt->flags->val & topt->opt->bit)
7001                 buf = "1\n";
7002         else
7003                 buf = "0\n";
7004
7005         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7006 }
7007
7008 static ssize_t
7009 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7010                          loff_t *ppos)
7011 {
7012         struct trace_option_dentry *topt = filp->private_data;
7013         unsigned long val;
7014         int ret;
7015
7016         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7017         if (ret)
7018                 return ret;
7019
7020         if (val != 0 && val != 1)
7021                 return -EINVAL;
7022
7023         if (!!(topt->flags->val & topt->opt->bit) != val) {
7024                 mutex_lock(&trace_types_lock);
7025                 ret = __set_tracer_option(topt->tr, topt->flags,
7026                                           topt->opt, !val);
7027                 mutex_unlock(&trace_types_lock);
7028                 if (ret)
7029                         return ret;
7030         }
7031
7032         *ppos += cnt;
7033
7034         return cnt;
7035 }
7036
7037
7038 static const struct file_operations trace_options_fops = {
7039         .open = tracing_open_generic,
7040         .read = trace_options_read,
7041         .write = trace_options_write,
7042         .llseek = generic_file_llseek,
7043 };
7044
7045 /*
7046  * In order to pass in both the trace_array descriptor as well as the index
7047  * to the flag that the trace option file represents, the trace_array
7048  * has a character array of trace_flags_index[], which holds the index
7049  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7050  * The address of this character array is passed to the flag option file
7051  * read/write callbacks.
7052  *
7053  * In order to extract both the index and the trace_array descriptor,
7054  * get_tr_index() uses the following algorithm.
7055  *
7056  *   idx = *ptr;
7057  *
7058  * As the pointer itself contains the address of the index (remember
7059  * index[1] == 1).
7060  *
7061  * Then to get the trace_array descriptor, by subtracting that index
7062  * from the ptr, we get to the start of the index itself.
7063  *
7064  *   ptr - idx == &index[0]
7065  *
7066  * Then a simple container_of() from that pointer gets us to the
7067  * trace_array descriptor.
7068  */
7069 static void get_tr_index(void *data, struct trace_array **ptr,
7070                          unsigned int *pindex)
7071 {
7072         *pindex = *(unsigned char *)data;
7073
7074         *ptr = container_of(data - *pindex, struct trace_array,
7075                             trace_flags_index);
7076 }
7077
7078 static ssize_t
7079 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7080                         loff_t *ppos)
7081 {
7082         void *tr_index = filp->private_data;
7083         struct trace_array *tr;
7084         unsigned int index;
7085         char *buf;
7086
7087         get_tr_index(tr_index, &tr, &index);
7088
7089         if (tr->trace_flags & (1 << index))
7090                 buf = "1\n";
7091         else
7092                 buf = "0\n";
7093
7094         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7095 }
7096
7097 static ssize_t
7098 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7099                          loff_t *ppos)
7100 {
7101         void *tr_index = filp->private_data;
7102         struct trace_array *tr;
7103         unsigned int index;
7104         unsigned long val;
7105         int ret;
7106
7107         get_tr_index(tr_index, &tr, &index);
7108
7109         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7110         if (ret)
7111                 return ret;
7112
7113         if (val != 0 && val != 1)
7114                 return -EINVAL;
7115
7116         mutex_lock(&trace_types_lock);
7117         ret = set_tracer_flag(tr, 1 << index, val);
7118         mutex_unlock(&trace_types_lock);
7119
7120         if (ret < 0)
7121                 return ret;
7122
7123         *ppos += cnt;
7124
7125         return cnt;
7126 }
7127
7128 static const struct file_operations trace_options_core_fops = {
7129         .open = tracing_open_generic,
7130         .read = trace_options_core_read,
7131         .write = trace_options_core_write,
7132         .llseek = generic_file_llseek,
7133 };
7134
7135 struct dentry *trace_create_file(const char *name,
7136                                  umode_t mode,
7137                                  struct dentry *parent,
7138                                  void *data,
7139                                  const struct file_operations *fops)
7140 {
7141         struct dentry *ret;
7142
7143         ret = tracefs_create_file(name, mode, parent, data, fops);
7144         if (!ret)
7145                 pr_warn("Could not create tracefs '%s' entry\n", name);
7146
7147         return ret;
7148 }
7149
7150
7151 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7152 {
7153         struct dentry *d_tracer;
7154
7155         if (tr->options)
7156                 return tr->options;
7157
7158         d_tracer = tracing_get_dentry(tr);
7159         if (IS_ERR(d_tracer))
7160                 return NULL;
7161
7162         tr->options = tracefs_create_dir("options", d_tracer);
7163         if (!tr->options) {
7164                 pr_warn("Could not create tracefs directory 'options'\n");
7165                 return NULL;
7166         }
7167
7168         return tr->options;
7169 }
7170
7171 static void
7172 create_trace_option_file(struct trace_array *tr,
7173                          struct trace_option_dentry *topt,
7174                          struct tracer_flags *flags,
7175                          struct tracer_opt *opt)
7176 {
7177         struct dentry *t_options;
7178
7179         t_options = trace_options_init_dentry(tr);
7180         if (!t_options)
7181                 return;
7182
7183         topt->flags = flags;
7184         topt->opt = opt;
7185         topt->tr = tr;
7186
7187         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7188                                     &trace_options_fops);
7189
7190 }
7191
7192 static void
7193 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7194 {
7195         struct trace_option_dentry *topts;
7196         struct trace_options *tr_topts;
7197         struct tracer_flags *flags;
7198         struct tracer_opt *opts;
7199         int cnt;
7200         int i;
7201
7202         if (!tracer)
7203                 return;
7204
7205         flags = tracer->flags;
7206
7207         if (!flags || !flags->opts)
7208                 return;
7209
7210         /*
7211          * If this is an instance, only create flags for tracers
7212          * the instance may have.
7213          */
7214         if (!trace_ok_for_array(tracer, tr))
7215                 return;
7216
7217         for (i = 0; i < tr->nr_topts; i++) {
7218                 /* Make sure there's no duplicate flags. */
7219                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7220                         return;
7221         }
7222
7223         opts = flags->opts;
7224
7225         for (cnt = 0; opts[cnt].name; cnt++)
7226                 ;
7227
7228         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7229         if (!topts)
7230                 return;
7231
7232         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7233                             GFP_KERNEL);
7234         if (!tr_topts) {
7235                 kfree(topts);
7236                 return;
7237         }
7238
7239         tr->topts = tr_topts;
7240         tr->topts[tr->nr_topts].tracer = tracer;
7241         tr->topts[tr->nr_topts].topts = topts;
7242         tr->nr_topts++;
7243
7244         for (cnt = 0; opts[cnt].name; cnt++) {
7245                 create_trace_option_file(tr, &topts[cnt], flags,
7246                                          &opts[cnt]);
7247                 WARN_ONCE(topts[cnt].entry == NULL,
7248                           "Failed to create trace option: %s",
7249                           opts[cnt].name);
7250         }
7251 }
7252
7253 static struct dentry *
7254 create_trace_option_core_file(struct trace_array *tr,
7255                               const char *option, long index)
7256 {
7257         struct dentry *t_options;
7258
7259         t_options = trace_options_init_dentry(tr);
7260         if (!t_options)
7261                 return NULL;
7262
7263         return trace_create_file(option, 0644, t_options,
7264                                  (void *)&tr->trace_flags_index[index],
7265                                  &trace_options_core_fops);
7266 }
7267
7268 static void create_trace_options_dir(struct trace_array *tr)
7269 {
7270         struct dentry *t_options;
7271         bool top_level = tr == &global_trace;
7272         int i;
7273
7274         t_options = trace_options_init_dentry(tr);
7275         if (!t_options)
7276                 return;
7277
7278         for (i = 0; trace_options[i]; i++) {
7279                 if (top_level ||
7280                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7281                         create_trace_option_core_file(tr, trace_options[i], i);
7282         }
7283 }
7284
7285 static ssize_t
7286 rb_simple_read(struct file *filp, char __user *ubuf,
7287                size_t cnt, loff_t *ppos)
7288 {
7289         struct trace_array *tr = filp->private_data;
7290         char buf[64];
7291         int r;
7292
7293         r = tracer_tracing_is_on(tr);
7294         r = sprintf(buf, "%d\n", r);
7295
7296         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7297 }
7298
7299 static ssize_t
7300 rb_simple_write(struct file *filp, const char __user *ubuf,
7301                 size_t cnt, loff_t *ppos)
7302 {
7303         struct trace_array *tr = filp->private_data;
7304         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7305         unsigned long val;
7306         int ret;
7307
7308         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7309         if (ret)
7310                 return ret;
7311
7312         if (buffer) {
7313                 mutex_lock(&trace_types_lock);
7314                 if (val) {
7315                         tracer_tracing_on(tr);
7316                         if (tr->current_trace->start)
7317                                 tr->current_trace->start(tr);
7318                 } else {
7319                         tracer_tracing_off(tr);
7320                         if (tr->current_trace->stop)
7321                                 tr->current_trace->stop(tr);
7322                 }
7323                 mutex_unlock(&trace_types_lock);
7324         }
7325
7326         (*ppos)++;
7327
7328         return cnt;
7329 }
7330
7331 static const struct file_operations rb_simple_fops = {
7332         .open           = tracing_open_generic_tr,
7333         .read           = rb_simple_read,
7334         .write          = rb_simple_write,
7335         .release        = tracing_release_generic_tr,
7336         .llseek         = default_llseek,
7337 };
7338
7339 struct dentry *trace_instance_dir;
7340
7341 static void
7342 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7343
7344 static int
7345 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7346 {
7347         enum ring_buffer_flags rb_flags;
7348
7349         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7350
7351         buf->tr = tr;
7352
7353         buf->buffer = ring_buffer_alloc(size, rb_flags);
7354         if (!buf->buffer)
7355                 return -ENOMEM;
7356
7357         buf->data = alloc_percpu(struct trace_array_cpu);
7358         if (!buf->data) {
7359                 ring_buffer_free(buf->buffer);
7360                 return -ENOMEM;
7361         }
7362
7363         /* Allocate the first page for all buffers */
7364         set_buffer_entries(&tr->trace_buffer,
7365                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7366
7367         return 0;
7368 }
7369
7370 static int allocate_trace_buffers(struct trace_array *tr, int size)
7371 {
7372         int ret;
7373
7374         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7375         if (ret)
7376                 return ret;
7377
7378 #ifdef CONFIG_TRACER_MAX_TRACE
7379         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7380                                     allocate_snapshot ? size : 1);
7381         if (WARN_ON(ret)) {
7382                 ring_buffer_free(tr->trace_buffer.buffer);
7383                 free_percpu(tr->trace_buffer.data);
7384                 return -ENOMEM;
7385         }
7386         tr->allocated_snapshot = allocate_snapshot;
7387
7388         /*
7389          * Only the top level trace array gets its snapshot allocated
7390          * from the kernel command line.
7391          */
7392         allocate_snapshot = false;
7393 #endif
7394         return 0;
7395 }
7396
7397 static void free_trace_buffer(struct trace_buffer *buf)
7398 {
7399         if (buf->buffer) {
7400                 ring_buffer_free(buf->buffer);
7401                 buf->buffer = NULL;
7402                 free_percpu(buf->data);
7403                 buf->data = NULL;
7404         }
7405 }
7406
7407 static void free_trace_buffers(struct trace_array *tr)
7408 {
7409         if (!tr)
7410                 return;
7411
7412         free_trace_buffer(&tr->trace_buffer);
7413
7414 #ifdef CONFIG_TRACER_MAX_TRACE
7415         free_trace_buffer(&tr->max_buffer);
7416 #endif
7417 }
7418
7419 static void init_trace_flags_index(struct trace_array *tr)
7420 {
7421         int i;
7422
7423         /* Used by the trace options files */
7424         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7425                 tr->trace_flags_index[i] = i;
7426 }
7427
7428 static void __update_tracer_options(struct trace_array *tr)
7429 {
7430         struct tracer *t;
7431
7432         for (t = trace_types; t; t = t->next)
7433                 add_tracer_options(tr, t);
7434 }
7435
7436 static void update_tracer_options(struct trace_array *tr)
7437 {
7438         mutex_lock(&trace_types_lock);
7439         __update_tracer_options(tr);
7440         mutex_unlock(&trace_types_lock);
7441 }
7442
7443 static int instance_mkdir(const char *name)
7444 {
7445         struct trace_array *tr;
7446         int ret;
7447
7448         mutex_lock(&trace_types_lock);
7449
7450         ret = -EEXIST;
7451         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7452                 if (tr->name && strcmp(tr->name, name) == 0)
7453                         goto out_unlock;
7454         }
7455
7456         ret = -ENOMEM;
7457         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7458         if (!tr)
7459                 goto out_unlock;
7460
7461         tr->name = kstrdup(name, GFP_KERNEL);
7462         if (!tr->name)
7463                 goto out_free_tr;
7464
7465         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7466                 goto out_free_tr;
7467
7468         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7469
7470         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7471
7472         raw_spin_lock_init(&tr->start_lock);
7473
7474         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7475
7476         tr->current_trace = &nop_trace;
7477
7478         INIT_LIST_HEAD(&tr->systems);
7479         INIT_LIST_HEAD(&tr->events);
7480
7481         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7482                 goto out_free_tr;
7483
7484         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7485         if (!tr->dir)
7486                 goto out_free_tr;
7487
7488         ret = event_trace_add_tracer(tr->dir, tr);
7489         if (ret) {
7490                 tracefs_remove_recursive(tr->dir);
7491                 goto out_free_tr;
7492         }
7493
7494         ftrace_init_trace_array(tr);
7495
7496         init_tracer_tracefs(tr, tr->dir);
7497         init_trace_flags_index(tr);
7498         __update_tracer_options(tr);
7499
7500         list_add(&tr->list, &ftrace_trace_arrays);
7501
7502         mutex_unlock(&trace_types_lock);
7503
7504         return 0;
7505
7506  out_free_tr:
7507         free_trace_buffers(tr);
7508         free_cpumask_var(tr->tracing_cpumask);
7509         kfree(tr->name);
7510         kfree(tr);
7511
7512  out_unlock:
7513         mutex_unlock(&trace_types_lock);
7514
7515         return ret;
7516
7517 }
7518
7519 static int instance_rmdir(const char *name)
7520 {
7521         struct trace_array *tr;
7522         int found = 0;
7523         int ret;
7524         int i;
7525
7526         mutex_lock(&trace_types_lock);
7527
7528         ret = -ENODEV;
7529         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7530                 if (tr->name && strcmp(tr->name, name) == 0) {
7531                         found = 1;
7532                         break;
7533                 }
7534         }
7535         if (!found)
7536                 goto out_unlock;
7537
7538         ret = -EBUSY;
7539         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7540                 goto out_unlock;
7541
7542         list_del(&tr->list);
7543
7544         /* Disable all the flags that were enabled coming in */
7545         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7546                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7547                         set_tracer_flag(tr, 1 << i, 0);
7548         }
7549
7550         tracing_set_nop(tr);
7551         event_trace_del_tracer(tr);
7552         ftrace_destroy_function_files(tr);
7553         tracefs_remove_recursive(tr->dir);
7554         free_trace_buffers(tr);
7555
7556         for (i = 0; i < tr->nr_topts; i++) {
7557                 kfree(tr->topts[i].topts);
7558         }
7559         kfree(tr->topts);
7560
7561         kfree(tr->name);
7562         kfree(tr);
7563
7564         ret = 0;
7565
7566  out_unlock:
7567         mutex_unlock(&trace_types_lock);
7568
7569         return ret;
7570 }
7571
7572 static __init void create_trace_instances(struct dentry *d_tracer)
7573 {
7574         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7575                                                          instance_mkdir,
7576                                                          instance_rmdir);
7577         if (WARN_ON(!trace_instance_dir))
7578                 return;
7579 }
7580
7581 static void
7582 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7583 {
7584         int cpu;
7585
7586         trace_create_file("available_tracers", 0444, d_tracer,
7587                         tr, &show_traces_fops);
7588
7589         trace_create_file("current_tracer", 0644, d_tracer,
7590                         tr, &set_tracer_fops);
7591
7592         trace_create_file("tracing_cpumask", 0644, d_tracer,
7593                           tr, &tracing_cpumask_fops);
7594
7595         trace_create_file("trace_options", 0644, d_tracer,
7596                           tr, &tracing_iter_fops);
7597
7598         trace_create_file("trace", 0644, d_tracer,
7599                           tr, &tracing_fops);
7600
7601         trace_create_file("trace_pipe", 0444, d_tracer,
7602                           tr, &tracing_pipe_fops);
7603
7604         trace_create_file("buffer_size_kb", 0644, d_tracer,
7605                           tr, &tracing_entries_fops);
7606
7607         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7608                           tr, &tracing_total_entries_fops);
7609
7610         trace_create_file("free_buffer", 0200, d_tracer,
7611                           tr, &tracing_free_buffer_fops);
7612
7613         trace_create_file("trace_marker", 0220, d_tracer,
7614                           tr, &tracing_mark_fops);
7615
7616         trace_create_file("trace_marker_raw", 0220, d_tracer,
7617                           tr, &tracing_mark_raw_fops);
7618
7619         trace_create_file("trace_clock", 0644, d_tracer, tr,
7620                           &trace_clock_fops);
7621
7622         trace_create_file("tracing_on", 0644, d_tracer,
7623                           tr, &rb_simple_fops);
7624
7625         create_trace_options_dir(tr);
7626
7627 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7628         trace_create_file("tracing_max_latency", 0644, d_tracer,
7629                         &tr->max_latency, &tracing_max_lat_fops);
7630 #endif
7631
7632         if (ftrace_create_function_files(tr, d_tracer))
7633                 WARN(1, "Could not allocate function filter files");
7634
7635 #ifdef CONFIG_TRACER_SNAPSHOT
7636         trace_create_file("snapshot", 0644, d_tracer,
7637                           tr, &snapshot_fops);
7638 #endif
7639
7640         for_each_tracing_cpu(cpu)
7641                 tracing_init_tracefs_percpu(tr, cpu);
7642
7643         ftrace_init_tracefs(tr, d_tracer);
7644 }
7645
7646 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7647 {
7648         struct vfsmount *mnt;
7649         struct file_system_type *type;
7650
7651         /*
7652          * To maintain backward compatibility for tools that mount
7653          * debugfs to get to the tracing facility, tracefs is automatically
7654          * mounted to the debugfs/tracing directory.
7655          */
7656         type = get_fs_type("tracefs");
7657         if (!type)
7658                 return NULL;
7659         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7660         put_filesystem(type);
7661         if (IS_ERR(mnt))
7662                 return NULL;
7663         mntget(mnt);
7664
7665         return mnt;
7666 }
7667
7668 /**
7669  * tracing_init_dentry - initialize top level trace array
7670  *
7671  * This is called when creating files or directories in the tracing
7672  * directory. It is called via fs_initcall() by any of the boot up code
7673  * and expects to return the dentry of the top level tracing directory.
7674  */
7675 struct dentry *tracing_init_dentry(void)
7676 {
7677         struct trace_array *tr = &global_trace;
7678
7679         /* The top level trace array uses  NULL as parent */
7680         if (tr->dir)
7681                 return NULL;
7682
7683         if (WARN_ON(!tracefs_initialized()) ||
7684                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7685                  WARN_ON(!debugfs_initialized())))
7686                 return ERR_PTR(-ENODEV);
7687
7688         /*
7689          * As there may still be users that expect the tracing
7690          * files to exist in debugfs/tracing, we must automount
7691          * the tracefs file system there, so older tools still
7692          * work with the newer kerenl.
7693          */
7694         tr->dir = debugfs_create_automount("tracing", NULL,
7695                                            trace_automount, NULL);
7696         if (!tr->dir) {
7697                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7698                 return ERR_PTR(-ENOMEM);
7699         }
7700
7701         return NULL;
7702 }
7703
7704 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7705 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7706
7707 static void __init trace_enum_init(void)
7708 {
7709         int len;
7710
7711         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7712         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7713 }
7714
7715 #ifdef CONFIG_MODULES
7716 static void trace_module_add_enums(struct module *mod)
7717 {
7718         if (!mod->num_trace_enums)
7719                 return;
7720
7721         /*
7722          * Modules with bad taint do not have events created, do
7723          * not bother with enums either.
7724          */
7725         if (trace_module_has_bad_taint(mod))
7726                 return;
7727
7728         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7729 }
7730
7731 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7732 static void trace_module_remove_enums(struct module *mod)
7733 {
7734         union trace_enum_map_item *map;
7735         union trace_enum_map_item **last = &trace_enum_maps;
7736
7737         if (!mod->num_trace_enums)
7738                 return;
7739
7740         mutex_lock(&trace_enum_mutex);
7741
7742         map = trace_enum_maps;
7743
7744         while (map) {
7745                 if (map->head.mod == mod)
7746                         break;
7747                 map = trace_enum_jmp_to_tail(map);
7748                 last = &map->tail.next;
7749                 map = map->tail.next;
7750         }
7751         if (!map)
7752                 goto out;
7753
7754         *last = trace_enum_jmp_to_tail(map)->tail.next;
7755         kfree(map);
7756  out:
7757         mutex_unlock(&trace_enum_mutex);
7758 }
7759 #else
7760 static inline void trace_module_remove_enums(struct module *mod) { }
7761 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7762
7763 static int trace_module_notify(struct notifier_block *self,
7764                                unsigned long val, void *data)
7765 {
7766         struct module *mod = data;
7767
7768         switch (val) {
7769         case MODULE_STATE_COMING:
7770                 trace_module_add_enums(mod);
7771                 break;
7772         case MODULE_STATE_GOING:
7773                 trace_module_remove_enums(mod);
7774                 break;
7775         }
7776
7777         return 0;
7778 }
7779
7780 static struct notifier_block trace_module_nb = {
7781         .notifier_call = trace_module_notify,
7782         .priority = 0,
7783 };
7784 #endif /* CONFIG_MODULES */
7785
7786 static __init int tracer_init_tracefs(void)
7787 {
7788         struct dentry *d_tracer;
7789
7790         trace_access_lock_init();
7791
7792         d_tracer = tracing_init_dentry();
7793         if (IS_ERR(d_tracer))
7794                 return 0;
7795
7796         init_tracer_tracefs(&global_trace, d_tracer);
7797         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7798
7799         trace_create_file("tracing_thresh", 0644, d_tracer,
7800                         &global_trace, &tracing_thresh_fops);
7801
7802         trace_create_file("README", 0444, d_tracer,
7803                         NULL, &tracing_readme_fops);
7804
7805         trace_create_file("saved_cmdlines", 0444, d_tracer,
7806                         NULL, &tracing_saved_cmdlines_fops);
7807
7808         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7809                           NULL, &tracing_saved_cmdlines_size_fops);
7810
7811         trace_enum_init();
7812
7813         trace_create_enum_file(d_tracer);
7814
7815 #ifdef CONFIG_MODULES
7816         register_module_notifier(&trace_module_nb);
7817 #endif
7818
7819 #ifdef CONFIG_DYNAMIC_FTRACE
7820         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7821                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7822 #endif
7823
7824         create_trace_instances(d_tracer);
7825
7826         update_tracer_options(&global_trace);
7827
7828         return 0;
7829 }
7830
7831 static int trace_panic_handler(struct notifier_block *this,
7832                                unsigned long event, void *unused)
7833 {
7834         if (ftrace_dump_on_oops)
7835                 ftrace_dump(ftrace_dump_on_oops);
7836         return NOTIFY_OK;
7837 }
7838
7839 static struct notifier_block trace_panic_notifier = {
7840         .notifier_call  = trace_panic_handler,
7841         .next           = NULL,
7842         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7843 };
7844
7845 static int trace_die_handler(struct notifier_block *self,
7846                              unsigned long val,
7847                              void *data)
7848 {
7849         switch (val) {
7850         case DIE_OOPS:
7851                 if (ftrace_dump_on_oops)
7852                         ftrace_dump(ftrace_dump_on_oops);
7853                 break;
7854         default:
7855                 break;
7856         }
7857         return NOTIFY_OK;
7858 }
7859
7860 static struct notifier_block trace_die_notifier = {
7861         .notifier_call = trace_die_handler,
7862         .priority = 200
7863 };
7864
7865 /*
7866  * printk is set to max of 1024, we really don't need it that big.
7867  * Nothing should be printing 1000 characters anyway.
7868  */
7869 #define TRACE_MAX_PRINT         1000
7870
7871 /*
7872  * Define here KERN_TRACE so that we have one place to modify
7873  * it if we decide to change what log level the ftrace dump
7874  * should be at.
7875  */
7876 #define KERN_TRACE              KERN_EMERG
7877
7878 void
7879 trace_printk_seq(struct trace_seq *s)
7880 {
7881         /* Probably should print a warning here. */
7882         if (s->seq.len >= TRACE_MAX_PRINT)
7883                 s->seq.len = TRACE_MAX_PRINT;
7884
7885         /*
7886          * More paranoid code. Although the buffer size is set to
7887          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7888          * an extra layer of protection.
7889          */
7890         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7891                 s->seq.len = s->seq.size - 1;
7892
7893         /* should be zero ended, but we are paranoid. */
7894         s->buffer[s->seq.len] = 0;
7895
7896         printk(KERN_TRACE "%s", s->buffer);
7897
7898         trace_seq_init(s);
7899 }
7900
7901 void trace_init_global_iter(struct trace_iterator *iter)
7902 {
7903         iter->tr = &global_trace;
7904         iter->trace = iter->tr->current_trace;
7905         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7906         iter->trace_buffer = &global_trace.trace_buffer;
7907
7908         if (iter->trace && iter->trace->open)
7909                 iter->trace->open(iter);
7910
7911         /* Annotate start of buffers if we had overruns */
7912         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7913                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7914
7915         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7916         if (trace_clocks[iter->tr->clock_id].in_ns)
7917                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7918 }
7919
7920 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7921 {
7922         /* use static because iter can be a bit big for the stack */
7923         static struct trace_iterator iter;
7924         static atomic_t dump_running;
7925         struct trace_array *tr = &global_trace;
7926         unsigned int old_userobj;
7927         unsigned long flags;
7928         int cnt = 0, cpu;
7929
7930         /* Only allow one dump user at a time. */
7931         if (atomic_inc_return(&dump_running) != 1) {
7932                 atomic_dec(&dump_running);
7933                 return;
7934         }
7935
7936         /*
7937          * Always turn off tracing when we dump.
7938          * We don't need to show trace output of what happens
7939          * between multiple crashes.
7940          *
7941          * If the user does a sysrq-z, then they can re-enable
7942          * tracing with echo 1 > tracing_on.
7943          */
7944         tracing_off();
7945
7946         local_irq_save(flags);
7947
7948         /* Simulate the iterator */
7949         trace_init_global_iter(&iter);
7950
7951         for_each_tracing_cpu(cpu) {
7952                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7953         }
7954
7955         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7956
7957         /* don't look at user memory in panic mode */
7958         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7959
7960         switch (oops_dump_mode) {
7961         case DUMP_ALL:
7962                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7963                 break;
7964         case DUMP_ORIG:
7965                 iter.cpu_file = raw_smp_processor_id();
7966                 break;
7967         case DUMP_NONE:
7968                 goto out_enable;
7969         default:
7970                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7971                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7972         }
7973
7974         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7975
7976         /* Did function tracer already get disabled? */
7977         if (ftrace_is_dead()) {
7978                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7979                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7980         }
7981
7982         /*
7983          * We need to stop all tracing on all CPUS to read the
7984          * the next buffer. This is a bit expensive, but is
7985          * not done often. We fill all what we can read,
7986          * and then release the locks again.
7987          */
7988
7989         while (!trace_empty(&iter)) {
7990
7991                 if (!cnt)
7992                         printk(KERN_TRACE "---------------------------------\n");
7993
7994                 cnt++;
7995
7996                 /* reset all but tr, trace, and overruns */
7997                 memset(&iter.seq, 0,
7998                        sizeof(struct trace_iterator) -
7999                        offsetof(struct trace_iterator, seq));
8000                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8001                 iter.pos = -1;
8002
8003                 if (trace_find_next_entry_inc(&iter) != NULL) {
8004                         int ret;
8005
8006                         ret = print_trace_line(&iter);
8007                         if (ret != TRACE_TYPE_NO_CONSUME)
8008                                 trace_consume(&iter);
8009                 }
8010                 touch_nmi_watchdog();
8011
8012                 trace_printk_seq(&iter.seq);
8013         }
8014
8015         if (!cnt)
8016                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8017         else
8018                 printk(KERN_TRACE "---------------------------------\n");
8019
8020  out_enable:
8021         tr->trace_flags |= old_userobj;
8022
8023         for_each_tracing_cpu(cpu) {
8024                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8025         }
8026         atomic_dec(&dump_running);
8027         local_irq_restore(flags);
8028 }
8029 EXPORT_SYMBOL_GPL(ftrace_dump);
8030
8031 __init static int tracer_alloc_buffers(void)
8032 {
8033         int ring_buf_size;
8034         int ret = -ENOMEM;
8035
8036         /*
8037          * Make sure we don't accidently add more trace options
8038          * than we have bits for.
8039          */
8040         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8041
8042         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8043                 goto out;
8044
8045         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8046                 goto out_free_buffer_mask;
8047
8048         /* Only allocate trace_printk buffers if a trace_printk exists */
8049         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8050                 /* Must be called before global_trace.buffer is allocated */
8051                 trace_printk_init_buffers();
8052
8053         /* To save memory, keep the ring buffer size to its minimum */
8054         if (ring_buffer_expanded)
8055                 ring_buf_size = trace_buf_size;
8056         else
8057                 ring_buf_size = 1;
8058
8059         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8060         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8061
8062         raw_spin_lock_init(&global_trace.start_lock);
8063
8064         /*
8065          * The prepare callbacks allocates some memory for the ring buffer. We
8066          * don't free the buffer if the if the CPU goes down. If we were to free
8067          * the buffer, then the user would lose any trace that was in the
8068          * buffer. The memory will be removed once the "instance" is removed.
8069          */
8070         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8071                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8072                                       NULL);
8073         if (ret < 0)
8074                 goto out_free_cpumask;
8075         /* Used for event triggers */
8076         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8077         if (!temp_buffer)
8078                 goto out_rm_hp_state;
8079
8080         if (trace_create_savedcmd() < 0)
8081                 goto out_free_temp_buffer;
8082
8083         /* TODO: make the number of buffers hot pluggable with CPUS */
8084         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8085                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8086                 WARN_ON(1);
8087                 goto out_free_savedcmd;
8088         }
8089
8090         if (global_trace.buffer_disabled)
8091                 tracing_off();
8092
8093         if (trace_boot_clock) {
8094                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8095                 if (ret < 0)
8096                         pr_warn("Trace clock %s not defined, going back to default\n",
8097                                 trace_boot_clock);
8098         }
8099
8100         /*
8101          * register_tracer() might reference current_trace, so it
8102          * needs to be set before we register anything. This is
8103          * just a bootstrap of current_trace anyway.
8104          */
8105         global_trace.current_trace = &nop_trace;
8106
8107         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8108
8109         ftrace_init_global_array_ops(&global_trace);
8110
8111         init_trace_flags_index(&global_trace);
8112
8113         register_tracer(&nop_trace);
8114
8115         /* Function tracing may start here (via kernel command line) */
8116         init_function_trace();
8117
8118         /* All seems OK, enable tracing */
8119         tracing_disabled = 0;
8120
8121         atomic_notifier_chain_register(&panic_notifier_list,
8122                                        &trace_panic_notifier);
8123
8124         register_die_notifier(&trace_die_notifier);
8125
8126         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8127
8128         INIT_LIST_HEAD(&global_trace.systems);
8129         INIT_LIST_HEAD(&global_trace.events);
8130         list_add(&global_trace.list, &ftrace_trace_arrays);
8131
8132         apply_trace_boot_options();
8133
8134         register_snapshot_cmd();
8135
8136         return 0;
8137
8138 out_free_savedcmd:
8139         free_saved_cmdlines_buffer(savedcmd);
8140 out_free_temp_buffer:
8141         ring_buffer_free(temp_buffer);
8142 out_rm_hp_state:
8143         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8144 out_free_cpumask:
8145         free_cpumask_var(global_trace.tracing_cpumask);
8146 out_free_buffer_mask:
8147         free_cpumask_var(tracing_buffer_mask);
8148 out:
8149         return ret;
8150 }
8151
8152 void __init early_trace_init(void)
8153 {
8154         if (tracepoint_printk) {
8155                 tracepoint_print_iter =
8156                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8157                 if (WARN_ON(!tracepoint_print_iter))
8158                         tracepoint_printk = 0;
8159                 else
8160                         static_key_enable(&tracepoint_printk_key.key);
8161         }
8162         tracer_alloc_buffers();
8163 }
8164
8165 void __init trace_init(void)
8166 {
8167         trace_event_init();
8168 }
8169
8170 __init static int clear_boot_tracer(void)
8171 {
8172         /*
8173          * The default tracer at boot buffer is an init section.
8174          * This function is called in lateinit. If we did not
8175          * find the boot tracer, then clear it out, to prevent
8176          * later registration from accessing the buffer that is
8177          * about to be freed.
8178          */
8179         if (!default_bootup_tracer)
8180                 return 0;
8181
8182         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8183                default_bootup_tracer);
8184         default_bootup_tracer = NULL;
8185
8186         return 0;
8187 }
8188
8189 fs_initcall(tracer_init_tracefs);
8190 late_initcall(clear_boot_tracer);