]> git.karo-electronics.de Git - karo-tx-linux.git/blob - kernel/trace/trace.c
clocksource: Use a plain u64 instead of cycle_t
[karo-tx-linux.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
124 /* Map of enums to their values, for "enum_map" file */
125 struct trace_enum_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_enum_map_item;
131
132 struct trace_enum_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "enum_string"
136          */
137         union trace_enum_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_enum_mutex);
142
143 /*
144  * The trace_enum_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved enum_map items.
149  */
150 union trace_enum_map_item {
151         struct trace_enum_map           map;
152         struct trace_enum_map_head      head;
153         struct trace_enum_map_tail      tail;
154 };
155
156 static union trace_enum_map_item *trace_enum_maps;
157 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         TRACE_ITER_EVENT_FORK
261
262 /*
263  * The global_trace is the descriptor that holds the tracing
264  * buffers for the live tracing. For each CPU, it contains
265  * a link list of pages that will store trace entries. The
266  * page descriptor of the pages in the memory is used to hold
267  * the link list by linking the lru item in the page descriptor
268  * to each of the pages in the buffer per CPU.
269  *
270  * For each active CPU there is a data field that holds the
271  * pages for the buffer for that CPU. Each CPU has the same number
272  * of pages allocated for its buffer.
273  */
274 static struct trace_array global_trace = {
275         .trace_flags = TRACE_DEFAULT_FLAGS,
276 };
277
278 LIST_HEAD(ftrace_trace_arrays);
279
280 int trace_array_get(struct trace_array *this_tr)
281 {
282         struct trace_array *tr;
283         int ret = -ENODEV;
284
285         mutex_lock(&trace_types_lock);
286         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
287                 if (tr == this_tr) {
288                         tr->ref++;
289                         ret = 0;
290                         break;
291                 }
292         }
293         mutex_unlock(&trace_types_lock);
294
295         return ret;
296 }
297
298 static void __trace_array_put(struct trace_array *this_tr)
299 {
300         WARN_ON(!this_tr->ref);
301         this_tr->ref--;
302 }
303
304 void trace_array_put(struct trace_array *this_tr)
305 {
306         mutex_lock(&trace_types_lock);
307         __trace_array_put(this_tr);
308         mutex_unlock(&trace_types_lock);
309 }
310
311 int call_filter_check_discard(struct trace_event_call *call, void *rec,
312                               struct ring_buffer *buffer,
313                               struct ring_buffer_event *event)
314 {
315         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
316             !filter_match_preds(call->filter, rec)) {
317                 __trace_event_discard_commit(buffer, event);
318                 return 1;
319         }
320
321         return 0;
322 }
323
324 void trace_free_pid_list(struct trace_pid_list *pid_list)
325 {
326         vfree(pid_list->pids);
327         kfree(pid_list);
328 }
329
330 /**
331  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
332  * @filtered_pids: The list of pids to check
333  * @search_pid: The PID to find in @filtered_pids
334  *
335  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
336  */
337 bool
338 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
339 {
340         /*
341          * If pid_max changed after filtered_pids was created, we
342          * by default ignore all pids greater than the previous pid_max.
343          */
344         if (search_pid >= filtered_pids->pid_max)
345                 return false;
346
347         return test_bit(search_pid, filtered_pids->pids);
348 }
349
350 /**
351  * trace_ignore_this_task - should a task be ignored for tracing
352  * @filtered_pids: The list of pids to check
353  * @task: The task that should be ignored if not filtered
354  *
355  * Checks if @task should be traced or not from @filtered_pids.
356  * Returns true if @task should *NOT* be traced.
357  * Returns false if @task should be traced.
358  */
359 bool
360 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
361 {
362         /*
363          * Return false, because if filtered_pids does not exist,
364          * all pids are good to trace.
365          */
366         if (!filtered_pids)
367                 return false;
368
369         return !trace_find_filtered_pid(filtered_pids, task->pid);
370 }
371
372 /**
373  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
374  * @pid_list: The list to modify
375  * @self: The current task for fork or NULL for exit
376  * @task: The task to add or remove
377  *
378  * If adding a task, if @self is defined, the task is only added if @self
379  * is also included in @pid_list. This happens on fork and tasks should
380  * only be added when the parent is listed. If @self is NULL, then the
381  * @task pid will be removed from the list, which would happen on exit
382  * of a task.
383  */
384 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
385                                   struct task_struct *self,
386                                   struct task_struct *task)
387 {
388         if (!pid_list)
389                 return;
390
391         /* For forks, we only add if the forking task is listed */
392         if (self) {
393                 if (!trace_find_filtered_pid(pid_list, self->pid))
394                         return;
395         }
396
397         /* Sorry, but we don't support pid_max changing after setting */
398         if (task->pid >= pid_list->pid_max)
399                 return;
400
401         /* "self" is set for forks, and NULL for exits */
402         if (self)
403                 set_bit(task->pid, pid_list->pids);
404         else
405                 clear_bit(task->pid, pid_list->pids);
406 }
407
408 /**
409  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
410  * @pid_list: The pid list to show
411  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
412  * @pos: The position of the file
413  *
414  * This is used by the seq_file "next" operation to iterate the pids
415  * listed in a trace_pid_list structure.
416  *
417  * Returns the pid+1 as we want to display pid of zero, but NULL would
418  * stop the iteration.
419  */
420 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
421 {
422         unsigned long pid = (unsigned long)v;
423
424         (*pos)++;
425
426         /* pid already is +1 of the actual prevous bit */
427         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
428
429         /* Return pid + 1 to allow zero to be represented */
430         if (pid < pid_list->pid_max)
431                 return (void *)(pid + 1);
432
433         return NULL;
434 }
435
436 /**
437  * trace_pid_start - Used for seq_file to start reading pid lists
438  * @pid_list: The pid list to show
439  * @pos: The position of the file
440  *
441  * This is used by seq_file "start" operation to start the iteration
442  * of listing pids.
443  *
444  * Returns the pid+1 as we want to display pid of zero, but NULL would
445  * stop the iteration.
446  */
447 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
448 {
449         unsigned long pid;
450         loff_t l = 0;
451
452         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
453         if (pid >= pid_list->pid_max)
454                 return NULL;
455
456         /* Return pid + 1 so that zero can be the exit value */
457         for (pid++; pid && l < *pos;
458              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
459                 ;
460         return (void *)pid;
461 }
462
463 /**
464  * trace_pid_show - show the current pid in seq_file processing
465  * @m: The seq_file structure to write into
466  * @v: A void pointer of the pid (+1) value to display
467  *
468  * Can be directly used by seq_file operations to display the current
469  * pid value.
470  */
471 int trace_pid_show(struct seq_file *m, void *v)
472 {
473         unsigned long pid = (unsigned long)v - 1;
474
475         seq_printf(m, "%lu\n", pid);
476         return 0;
477 }
478
479 /* 128 should be much more than enough */
480 #define PID_BUF_SIZE            127
481
482 int trace_pid_write(struct trace_pid_list *filtered_pids,
483                     struct trace_pid_list **new_pid_list,
484                     const char __user *ubuf, size_t cnt)
485 {
486         struct trace_pid_list *pid_list;
487         struct trace_parser parser;
488         unsigned long val;
489         int nr_pids = 0;
490         ssize_t read = 0;
491         ssize_t ret = 0;
492         loff_t pos;
493         pid_t pid;
494
495         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
496                 return -ENOMEM;
497
498         /*
499          * Always recreate a new array. The write is an all or nothing
500          * operation. Always create a new array when adding new pids by
501          * the user. If the operation fails, then the current list is
502          * not modified.
503          */
504         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
505         if (!pid_list)
506                 return -ENOMEM;
507
508         pid_list->pid_max = READ_ONCE(pid_max);
509
510         /* Only truncating will shrink pid_max */
511         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
512                 pid_list->pid_max = filtered_pids->pid_max;
513
514         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
515         if (!pid_list->pids) {
516                 kfree(pid_list);
517                 return -ENOMEM;
518         }
519
520         if (filtered_pids) {
521                 /* copy the current bits to the new max */
522                 for_each_set_bit(pid, filtered_pids->pids,
523                                  filtered_pids->pid_max) {
524                         set_bit(pid, pid_list->pids);
525                         nr_pids++;
526                 }
527         }
528
529         while (cnt > 0) {
530
531                 pos = 0;
532
533                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
534                 if (ret < 0 || !trace_parser_loaded(&parser))
535                         break;
536
537                 read += ret;
538                 ubuf += ret;
539                 cnt -= ret;
540
541                 parser.buffer[parser.idx] = 0;
542
543                 ret = -EINVAL;
544                 if (kstrtoul(parser.buffer, 0, &val))
545                         break;
546                 if (val >= pid_list->pid_max)
547                         break;
548
549                 pid = (pid_t)val;
550
551                 set_bit(pid, pid_list->pids);
552                 nr_pids++;
553
554                 trace_parser_clear(&parser);
555                 ret = 0;
556         }
557         trace_parser_put(&parser);
558
559         if (ret < 0) {
560                 trace_free_pid_list(pid_list);
561                 return ret;
562         }
563
564         if (!nr_pids) {
565                 /* Cleared the list of pids */
566                 trace_free_pid_list(pid_list);
567                 read = ret;
568                 pid_list = NULL;
569         }
570
571         *new_pid_list = pid_list;
572
573         return read;
574 }
575
576 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
577 {
578         u64 ts;
579
580         /* Early boot up does not have a buffer yet */
581         if (!buf->buffer)
582                 return trace_clock_local();
583
584         ts = ring_buffer_time_stamp(buf->buffer, cpu);
585         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
586
587         return ts;
588 }
589
590 u64 ftrace_now(int cpu)
591 {
592         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
593 }
594
595 /**
596  * tracing_is_enabled - Show if global_trace has been disabled
597  *
598  * Shows if the global trace has been enabled or not. It uses the
599  * mirror flag "buffer_disabled" to be used in fast paths such as for
600  * the irqsoff tracer. But it may be inaccurate due to races. If you
601  * need to know the accurate state, use tracing_is_on() which is a little
602  * slower, but accurate.
603  */
604 int tracing_is_enabled(void)
605 {
606         /*
607          * For quick access (irqsoff uses this in fast path), just
608          * return the mirror variable of the state of the ring buffer.
609          * It's a little racy, but we don't really care.
610          */
611         smp_rmb();
612         return !global_trace.buffer_disabled;
613 }
614
615 /*
616  * trace_buf_size is the size in bytes that is allocated
617  * for a buffer. Note, the number of bytes is always rounded
618  * to page size.
619  *
620  * This number is purposely set to a low number of 16384.
621  * If the dump on oops happens, it will be much appreciated
622  * to not have to wait for all that output. Anyway this can be
623  * boot time and run time configurable.
624  */
625 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
626
627 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
628
629 /* trace_types holds a link list of available tracers. */
630 static struct tracer            *trace_types __read_mostly;
631
632 /*
633  * trace_types_lock is used to protect the trace_types list.
634  */
635 DEFINE_MUTEX(trace_types_lock);
636
637 /*
638  * serialize the access of the ring buffer
639  *
640  * ring buffer serializes readers, but it is low level protection.
641  * The validity of the events (which returns by ring_buffer_peek() ..etc)
642  * are not protected by ring buffer.
643  *
644  * The content of events may become garbage if we allow other process consumes
645  * these events concurrently:
646  *   A) the page of the consumed events may become a normal page
647  *      (not reader page) in ring buffer, and this page will be rewrited
648  *      by events producer.
649  *   B) The page of the consumed events may become a page for splice_read,
650  *      and this page will be returned to system.
651  *
652  * These primitives allow multi process access to different cpu ring buffer
653  * concurrently.
654  *
655  * These primitives don't distinguish read-only and read-consume access.
656  * Multi read-only access are also serialized.
657  */
658
659 #ifdef CONFIG_SMP
660 static DECLARE_RWSEM(all_cpu_access_lock);
661 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
662
663 static inline void trace_access_lock(int cpu)
664 {
665         if (cpu == RING_BUFFER_ALL_CPUS) {
666                 /* gain it for accessing the whole ring buffer. */
667                 down_write(&all_cpu_access_lock);
668         } else {
669                 /* gain it for accessing a cpu ring buffer. */
670
671                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
672                 down_read(&all_cpu_access_lock);
673
674                 /* Secondly block other access to this @cpu ring buffer. */
675                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
676         }
677 }
678
679 static inline void trace_access_unlock(int cpu)
680 {
681         if (cpu == RING_BUFFER_ALL_CPUS) {
682                 up_write(&all_cpu_access_lock);
683         } else {
684                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
685                 up_read(&all_cpu_access_lock);
686         }
687 }
688
689 static inline void trace_access_lock_init(void)
690 {
691         int cpu;
692
693         for_each_possible_cpu(cpu)
694                 mutex_init(&per_cpu(cpu_access_lock, cpu));
695 }
696
697 #else
698
699 static DEFINE_MUTEX(access_lock);
700
701 static inline void trace_access_lock(int cpu)
702 {
703         (void)cpu;
704         mutex_lock(&access_lock);
705 }
706
707 static inline void trace_access_unlock(int cpu)
708 {
709         (void)cpu;
710         mutex_unlock(&access_lock);
711 }
712
713 static inline void trace_access_lock_init(void)
714 {
715 }
716
717 #endif
718
719 #ifdef CONFIG_STACKTRACE
720 static void __ftrace_trace_stack(struct ring_buffer *buffer,
721                                  unsigned long flags,
722                                  int skip, int pc, struct pt_regs *regs);
723 static inline void ftrace_trace_stack(struct trace_array *tr,
724                                       struct ring_buffer *buffer,
725                                       unsigned long flags,
726                                       int skip, int pc, struct pt_regs *regs);
727
728 #else
729 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
730                                         unsigned long flags,
731                                         int skip, int pc, struct pt_regs *regs)
732 {
733 }
734 static inline void ftrace_trace_stack(struct trace_array *tr,
735                                       struct ring_buffer *buffer,
736                                       unsigned long flags,
737                                       int skip, int pc, struct pt_regs *regs)
738 {
739 }
740
741 #endif
742
743 static __always_inline void
744 trace_event_setup(struct ring_buffer_event *event,
745                   int type, unsigned long flags, int pc)
746 {
747         struct trace_entry *ent = ring_buffer_event_data(event);
748
749         tracing_generic_entry_update(ent, flags, pc);
750         ent->type = type;
751 }
752
753 static __always_inline struct ring_buffer_event *
754 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
755                           int type,
756                           unsigned long len,
757                           unsigned long flags, int pc)
758 {
759         struct ring_buffer_event *event;
760
761         event = ring_buffer_lock_reserve(buffer, len);
762         if (event != NULL)
763                 trace_event_setup(event, type, flags, pc);
764
765         return event;
766 }
767
768 static void tracer_tracing_on(struct trace_array *tr)
769 {
770         if (tr->trace_buffer.buffer)
771                 ring_buffer_record_on(tr->trace_buffer.buffer);
772         /*
773          * This flag is looked at when buffers haven't been allocated
774          * yet, or by some tracers (like irqsoff), that just want to
775          * know if the ring buffer has been disabled, but it can handle
776          * races of where it gets disabled but we still do a record.
777          * As the check is in the fast path of the tracers, it is more
778          * important to be fast than accurate.
779          */
780         tr->buffer_disabled = 0;
781         /* Make the flag seen by readers */
782         smp_wmb();
783 }
784
785 /**
786  * tracing_on - enable tracing buffers
787  *
788  * This function enables tracing buffers that may have been
789  * disabled with tracing_off.
790  */
791 void tracing_on(void)
792 {
793         tracer_tracing_on(&global_trace);
794 }
795 EXPORT_SYMBOL_GPL(tracing_on);
796
797
798 static __always_inline void
799 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
800 {
801         __this_cpu_write(trace_cmdline_save, true);
802
803         /* If this is the temp buffer, we need to commit fully */
804         if (this_cpu_read(trace_buffered_event) == event) {
805                 /* Length is in event->array[0] */
806                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
807                 /* Release the temp buffer */
808                 this_cpu_dec(trace_buffered_event_cnt);
809         } else
810                 ring_buffer_unlock_commit(buffer, event);
811 }
812
813 /**
814  * __trace_puts - write a constant string into the trace buffer.
815  * @ip:    The address of the caller
816  * @str:   The constant string to write
817  * @size:  The size of the string.
818  */
819 int __trace_puts(unsigned long ip, const char *str, int size)
820 {
821         struct ring_buffer_event *event;
822         struct ring_buffer *buffer;
823         struct print_entry *entry;
824         unsigned long irq_flags;
825         int alloc;
826         int pc;
827
828         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
829                 return 0;
830
831         pc = preempt_count();
832
833         if (unlikely(tracing_selftest_running || tracing_disabled))
834                 return 0;
835
836         alloc = sizeof(*entry) + size + 2; /* possible \n added */
837
838         local_save_flags(irq_flags);
839         buffer = global_trace.trace_buffer.buffer;
840         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
841                                             irq_flags, pc);
842         if (!event)
843                 return 0;
844
845         entry = ring_buffer_event_data(event);
846         entry->ip = ip;
847
848         memcpy(&entry->buf, str, size);
849
850         /* Add a newline if necessary */
851         if (entry->buf[size - 1] != '\n') {
852                 entry->buf[size] = '\n';
853                 entry->buf[size + 1] = '\0';
854         } else
855                 entry->buf[size] = '\0';
856
857         __buffer_unlock_commit(buffer, event);
858         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
859
860         return size;
861 }
862 EXPORT_SYMBOL_GPL(__trace_puts);
863
864 /**
865  * __trace_bputs - write the pointer to a constant string into trace buffer
866  * @ip:    The address of the caller
867  * @str:   The constant string to write to the buffer to
868  */
869 int __trace_bputs(unsigned long ip, const char *str)
870 {
871         struct ring_buffer_event *event;
872         struct ring_buffer *buffer;
873         struct bputs_entry *entry;
874         unsigned long irq_flags;
875         int size = sizeof(struct bputs_entry);
876         int pc;
877
878         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
879                 return 0;
880
881         pc = preempt_count();
882
883         if (unlikely(tracing_selftest_running || tracing_disabled))
884                 return 0;
885
886         local_save_flags(irq_flags);
887         buffer = global_trace.trace_buffer.buffer;
888         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
889                                             irq_flags, pc);
890         if (!event)
891                 return 0;
892
893         entry = ring_buffer_event_data(event);
894         entry->ip                       = ip;
895         entry->str                      = str;
896
897         __buffer_unlock_commit(buffer, event);
898         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
899
900         return 1;
901 }
902 EXPORT_SYMBOL_GPL(__trace_bputs);
903
904 #ifdef CONFIG_TRACER_SNAPSHOT
905 /**
906  * trace_snapshot - take a snapshot of the current buffer.
907  *
908  * This causes a swap between the snapshot buffer and the current live
909  * tracing buffer. You can use this to take snapshots of the live
910  * trace when some condition is triggered, but continue to trace.
911  *
912  * Note, make sure to allocate the snapshot with either
913  * a tracing_snapshot_alloc(), or by doing it manually
914  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
915  *
916  * If the snapshot buffer is not allocated, it will stop tracing.
917  * Basically making a permanent snapshot.
918  */
919 void tracing_snapshot(void)
920 {
921         struct trace_array *tr = &global_trace;
922         struct tracer *tracer = tr->current_trace;
923         unsigned long flags;
924
925         if (in_nmi()) {
926                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
927                 internal_trace_puts("*** snapshot is being ignored        ***\n");
928                 return;
929         }
930
931         if (!tr->allocated_snapshot) {
932                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
933                 internal_trace_puts("*** stopping trace here!   ***\n");
934                 tracing_off();
935                 return;
936         }
937
938         /* Note, snapshot can not be used when the tracer uses it */
939         if (tracer->use_max_tr) {
940                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
941                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
942                 return;
943         }
944
945         local_irq_save(flags);
946         update_max_tr(tr, current, smp_processor_id());
947         local_irq_restore(flags);
948 }
949 EXPORT_SYMBOL_GPL(tracing_snapshot);
950
951 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
952                                         struct trace_buffer *size_buf, int cpu_id);
953 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
954
955 static int alloc_snapshot(struct trace_array *tr)
956 {
957         int ret;
958
959         if (!tr->allocated_snapshot) {
960
961                 /* allocate spare buffer */
962                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
963                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
964                 if (ret < 0)
965                         return ret;
966
967                 tr->allocated_snapshot = true;
968         }
969
970         return 0;
971 }
972
973 static void free_snapshot(struct trace_array *tr)
974 {
975         /*
976          * We don't free the ring buffer. instead, resize it because
977          * The max_tr ring buffer has some state (e.g. ring->clock) and
978          * we want preserve it.
979          */
980         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
981         set_buffer_entries(&tr->max_buffer, 1);
982         tracing_reset_online_cpus(&tr->max_buffer);
983         tr->allocated_snapshot = false;
984 }
985
986 /**
987  * tracing_alloc_snapshot - allocate snapshot buffer.
988  *
989  * This only allocates the snapshot buffer if it isn't already
990  * allocated - it doesn't also take a snapshot.
991  *
992  * This is meant to be used in cases where the snapshot buffer needs
993  * to be set up for events that can't sleep but need to be able to
994  * trigger a snapshot.
995  */
996 int tracing_alloc_snapshot(void)
997 {
998         struct trace_array *tr = &global_trace;
999         int ret;
1000
1001         ret = alloc_snapshot(tr);
1002         WARN_ON(ret < 0);
1003
1004         return ret;
1005 }
1006 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1007
1008 /**
1009  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1010  *
1011  * This is similar to trace_snapshot(), but it will allocate the
1012  * snapshot buffer if it isn't already allocated. Use this only
1013  * where it is safe to sleep, as the allocation may sleep.
1014  *
1015  * This causes a swap between the snapshot buffer and the current live
1016  * tracing buffer. You can use this to take snapshots of the live
1017  * trace when some condition is triggered, but continue to trace.
1018  */
1019 void tracing_snapshot_alloc(void)
1020 {
1021         int ret;
1022
1023         ret = tracing_alloc_snapshot();
1024         if (ret < 0)
1025                 return;
1026
1027         tracing_snapshot();
1028 }
1029 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1030 #else
1031 void tracing_snapshot(void)
1032 {
1033         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1034 }
1035 EXPORT_SYMBOL_GPL(tracing_snapshot);
1036 int tracing_alloc_snapshot(void)
1037 {
1038         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1039         return -ENODEV;
1040 }
1041 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1042 void tracing_snapshot_alloc(void)
1043 {
1044         /* Give warning */
1045         tracing_snapshot();
1046 }
1047 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1048 #endif /* CONFIG_TRACER_SNAPSHOT */
1049
1050 static void tracer_tracing_off(struct trace_array *tr)
1051 {
1052         if (tr->trace_buffer.buffer)
1053                 ring_buffer_record_off(tr->trace_buffer.buffer);
1054         /*
1055          * This flag is looked at when buffers haven't been allocated
1056          * yet, or by some tracers (like irqsoff), that just want to
1057          * know if the ring buffer has been disabled, but it can handle
1058          * races of where it gets disabled but we still do a record.
1059          * As the check is in the fast path of the tracers, it is more
1060          * important to be fast than accurate.
1061          */
1062         tr->buffer_disabled = 1;
1063         /* Make the flag seen by readers */
1064         smp_wmb();
1065 }
1066
1067 /**
1068  * tracing_off - turn off tracing buffers
1069  *
1070  * This function stops the tracing buffers from recording data.
1071  * It does not disable any overhead the tracers themselves may
1072  * be causing. This function simply causes all recording to
1073  * the ring buffers to fail.
1074  */
1075 void tracing_off(void)
1076 {
1077         tracer_tracing_off(&global_trace);
1078 }
1079 EXPORT_SYMBOL_GPL(tracing_off);
1080
1081 void disable_trace_on_warning(void)
1082 {
1083         if (__disable_trace_on_warning)
1084                 tracing_off();
1085 }
1086
1087 /**
1088  * tracer_tracing_is_on - show real state of ring buffer enabled
1089  * @tr : the trace array to know if ring buffer is enabled
1090  *
1091  * Shows real state of the ring buffer if it is enabled or not.
1092  */
1093 int tracer_tracing_is_on(struct trace_array *tr)
1094 {
1095         if (tr->trace_buffer.buffer)
1096                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1097         return !tr->buffer_disabled;
1098 }
1099
1100 /**
1101  * tracing_is_on - show state of ring buffers enabled
1102  */
1103 int tracing_is_on(void)
1104 {
1105         return tracer_tracing_is_on(&global_trace);
1106 }
1107 EXPORT_SYMBOL_GPL(tracing_is_on);
1108
1109 static int __init set_buf_size(char *str)
1110 {
1111         unsigned long buf_size;
1112
1113         if (!str)
1114                 return 0;
1115         buf_size = memparse(str, &str);
1116         /* nr_entries can not be zero */
1117         if (buf_size == 0)
1118                 return 0;
1119         trace_buf_size = buf_size;
1120         return 1;
1121 }
1122 __setup("trace_buf_size=", set_buf_size);
1123
1124 static int __init set_tracing_thresh(char *str)
1125 {
1126         unsigned long threshold;
1127         int ret;
1128
1129         if (!str)
1130                 return 0;
1131         ret = kstrtoul(str, 0, &threshold);
1132         if (ret < 0)
1133                 return 0;
1134         tracing_thresh = threshold * 1000;
1135         return 1;
1136 }
1137 __setup("tracing_thresh=", set_tracing_thresh);
1138
1139 unsigned long nsecs_to_usecs(unsigned long nsecs)
1140 {
1141         return nsecs / 1000;
1142 }
1143
1144 /*
1145  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1146  * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
1147  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1148  * of strings in the order that the enums were defined.
1149  */
1150 #undef C
1151 #define C(a, b) b
1152
1153 /* These must match the bit postions in trace_iterator_flags */
1154 static const char *trace_options[] = {
1155         TRACE_FLAGS
1156         NULL
1157 };
1158
1159 static struct {
1160         u64 (*func)(void);
1161         const char *name;
1162         int in_ns;              /* is this clock in nanoseconds? */
1163 } trace_clocks[] = {
1164         { trace_clock_local,            "local",        1 },
1165         { trace_clock_global,           "global",       1 },
1166         { trace_clock_counter,          "counter",      0 },
1167         { trace_clock_jiffies,          "uptime",       0 },
1168         { trace_clock,                  "perf",         1 },
1169         { ktime_get_mono_fast_ns,       "mono",         1 },
1170         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1171         { ktime_get_boot_fast_ns,       "boot",         1 },
1172         ARCH_TRACE_CLOCKS
1173 };
1174
1175 /*
1176  * trace_parser_get_init - gets the buffer for trace parser
1177  */
1178 int trace_parser_get_init(struct trace_parser *parser, int size)
1179 {
1180         memset(parser, 0, sizeof(*parser));
1181
1182         parser->buffer = kmalloc(size, GFP_KERNEL);
1183         if (!parser->buffer)
1184                 return 1;
1185
1186         parser->size = size;
1187         return 0;
1188 }
1189
1190 /*
1191  * trace_parser_put - frees the buffer for trace parser
1192  */
1193 void trace_parser_put(struct trace_parser *parser)
1194 {
1195         kfree(parser->buffer);
1196 }
1197
1198 /*
1199  * trace_get_user - reads the user input string separated by  space
1200  * (matched by isspace(ch))
1201  *
1202  * For each string found the 'struct trace_parser' is updated,
1203  * and the function returns.
1204  *
1205  * Returns number of bytes read.
1206  *
1207  * See kernel/trace/trace.h for 'struct trace_parser' details.
1208  */
1209 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1210         size_t cnt, loff_t *ppos)
1211 {
1212         char ch;
1213         size_t read = 0;
1214         ssize_t ret;
1215
1216         if (!*ppos)
1217                 trace_parser_clear(parser);
1218
1219         ret = get_user(ch, ubuf++);
1220         if (ret)
1221                 goto out;
1222
1223         read++;
1224         cnt--;
1225
1226         /*
1227          * The parser is not finished with the last write,
1228          * continue reading the user input without skipping spaces.
1229          */
1230         if (!parser->cont) {
1231                 /* skip white space */
1232                 while (cnt && isspace(ch)) {
1233                         ret = get_user(ch, ubuf++);
1234                         if (ret)
1235                                 goto out;
1236                         read++;
1237                         cnt--;
1238                 }
1239
1240                 /* only spaces were written */
1241                 if (isspace(ch)) {
1242                         *ppos += read;
1243                         ret = read;
1244                         goto out;
1245                 }
1246
1247                 parser->idx = 0;
1248         }
1249
1250         /* read the non-space input */
1251         while (cnt && !isspace(ch)) {
1252                 if (parser->idx < parser->size - 1)
1253                         parser->buffer[parser->idx++] = ch;
1254                 else {
1255                         ret = -EINVAL;
1256                         goto out;
1257                 }
1258                 ret = get_user(ch, ubuf++);
1259                 if (ret)
1260                         goto out;
1261                 read++;
1262                 cnt--;
1263         }
1264
1265         /* We either got finished input or we have to wait for another call. */
1266         if (isspace(ch)) {
1267                 parser->buffer[parser->idx] = 0;
1268                 parser->cont = false;
1269         } else if (parser->idx < parser->size - 1) {
1270                 parser->cont = true;
1271                 parser->buffer[parser->idx++] = ch;
1272         } else {
1273                 ret = -EINVAL;
1274                 goto out;
1275         }
1276
1277         *ppos += read;
1278         ret = read;
1279
1280 out:
1281         return ret;
1282 }
1283
1284 /* TODO add a seq_buf_to_buffer() */
1285 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1286 {
1287         int len;
1288
1289         if (trace_seq_used(s) <= s->seq.readpos)
1290                 return -EBUSY;
1291
1292         len = trace_seq_used(s) - s->seq.readpos;
1293         if (cnt > len)
1294                 cnt = len;
1295         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1296
1297         s->seq.readpos += cnt;
1298         return cnt;
1299 }
1300
1301 unsigned long __read_mostly     tracing_thresh;
1302
1303 #ifdef CONFIG_TRACER_MAX_TRACE
1304 /*
1305  * Copy the new maximum trace into the separate maximum-trace
1306  * structure. (this way the maximum trace is permanently saved,
1307  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1308  */
1309 static void
1310 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1311 {
1312         struct trace_buffer *trace_buf = &tr->trace_buffer;
1313         struct trace_buffer *max_buf = &tr->max_buffer;
1314         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1315         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1316
1317         max_buf->cpu = cpu;
1318         max_buf->time_start = data->preempt_timestamp;
1319
1320         max_data->saved_latency = tr->max_latency;
1321         max_data->critical_start = data->critical_start;
1322         max_data->critical_end = data->critical_end;
1323
1324         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1325         max_data->pid = tsk->pid;
1326         /*
1327          * If tsk == current, then use current_uid(), as that does not use
1328          * RCU. The irq tracer can be called out of RCU scope.
1329          */
1330         if (tsk == current)
1331                 max_data->uid = current_uid();
1332         else
1333                 max_data->uid = task_uid(tsk);
1334
1335         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1336         max_data->policy = tsk->policy;
1337         max_data->rt_priority = tsk->rt_priority;
1338
1339         /* record this tasks comm */
1340         tracing_record_cmdline(tsk);
1341 }
1342
1343 /**
1344  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1345  * @tr: tracer
1346  * @tsk: the task with the latency
1347  * @cpu: The cpu that initiated the trace.
1348  *
1349  * Flip the buffers between the @tr and the max_tr and record information
1350  * about which task was the cause of this latency.
1351  */
1352 void
1353 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1354 {
1355         struct ring_buffer *buf;
1356
1357         if (tr->stop_count)
1358                 return;
1359
1360         WARN_ON_ONCE(!irqs_disabled());
1361
1362         if (!tr->allocated_snapshot) {
1363                 /* Only the nop tracer should hit this when disabling */
1364                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1365                 return;
1366         }
1367
1368         arch_spin_lock(&tr->max_lock);
1369
1370         buf = tr->trace_buffer.buffer;
1371         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1372         tr->max_buffer.buffer = buf;
1373
1374         __update_max_tr(tr, tsk, cpu);
1375         arch_spin_unlock(&tr->max_lock);
1376 }
1377
1378 /**
1379  * update_max_tr_single - only copy one trace over, and reset the rest
1380  * @tr - tracer
1381  * @tsk - task with the latency
1382  * @cpu - the cpu of the buffer to copy.
1383  *
1384  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1385  */
1386 void
1387 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1388 {
1389         int ret;
1390
1391         if (tr->stop_count)
1392                 return;
1393
1394         WARN_ON_ONCE(!irqs_disabled());
1395         if (!tr->allocated_snapshot) {
1396                 /* Only the nop tracer should hit this when disabling */
1397                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1398                 return;
1399         }
1400
1401         arch_spin_lock(&tr->max_lock);
1402
1403         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1404
1405         if (ret == -EBUSY) {
1406                 /*
1407                  * We failed to swap the buffer due to a commit taking
1408                  * place on this CPU. We fail to record, but we reset
1409                  * the max trace buffer (no one writes directly to it)
1410                  * and flag that it failed.
1411                  */
1412                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1413                         "Failed to swap buffers due to commit in progress\n");
1414         }
1415
1416         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1417
1418         __update_max_tr(tr, tsk, cpu);
1419         arch_spin_unlock(&tr->max_lock);
1420 }
1421 #endif /* CONFIG_TRACER_MAX_TRACE */
1422
1423 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1424 {
1425         /* Iterators are static, they should be filled or empty */
1426         if (trace_buffer_iter(iter, iter->cpu_file))
1427                 return 0;
1428
1429         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1430                                 full);
1431 }
1432
1433 #ifdef CONFIG_FTRACE_STARTUP_TEST
1434 static int run_tracer_selftest(struct tracer *type)
1435 {
1436         struct trace_array *tr = &global_trace;
1437         struct tracer *saved_tracer = tr->current_trace;
1438         int ret;
1439
1440         if (!type->selftest || tracing_selftest_disabled)
1441                 return 0;
1442
1443         /*
1444          * Run a selftest on this tracer.
1445          * Here we reset the trace buffer, and set the current
1446          * tracer to be this tracer. The tracer can then run some
1447          * internal tracing to verify that everything is in order.
1448          * If we fail, we do not register this tracer.
1449          */
1450         tracing_reset_online_cpus(&tr->trace_buffer);
1451
1452         tr->current_trace = type;
1453
1454 #ifdef CONFIG_TRACER_MAX_TRACE
1455         if (type->use_max_tr) {
1456                 /* If we expanded the buffers, make sure the max is expanded too */
1457                 if (ring_buffer_expanded)
1458                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1459                                            RING_BUFFER_ALL_CPUS);
1460                 tr->allocated_snapshot = true;
1461         }
1462 #endif
1463
1464         /* the test is responsible for initializing and enabling */
1465         pr_info("Testing tracer %s: ", type->name);
1466         ret = type->selftest(type, tr);
1467         /* the test is responsible for resetting too */
1468         tr->current_trace = saved_tracer;
1469         if (ret) {
1470                 printk(KERN_CONT "FAILED!\n");
1471                 /* Add the warning after printing 'FAILED' */
1472                 WARN_ON(1);
1473                 return -1;
1474         }
1475         /* Only reset on passing, to avoid touching corrupted buffers */
1476         tracing_reset_online_cpus(&tr->trace_buffer);
1477
1478 #ifdef CONFIG_TRACER_MAX_TRACE
1479         if (type->use_max_tr) {
1480                 tr->allocated_snapshot = false;
1481
1482                 /* Shrink the max buffer again */
1483                 if (ring_buffer_expanded)
1484                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1485                                            RING_BUFFER_ALL_CPUS);
1486         }
1487 #endif
1488
1489         printk(KERN_CONT "PASSED\n");
1490         return 0;
1491 }
1492 #else
1493 static inline int run_tracer_selftest(struct tracer *type)
1494 {
1495         return 0;
1496 }
1497 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1498
1499 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1500
1501 static void __init apply_trace_boot_options(void);
1502
1503 /**
1504  * register_tracer - register a tracer with the ftrace system.
1505  * @type - the plugin for the tracer
1506  *
1507  * Register a new plugin tracer.
1508  */
1509 int __init register_tracer(struct tracer *type)
1510 {
1511         struct tracer *t;
1512         int ret = 0;
1513
1514         if (!type->name) {
1515                 pr_info("Tracer must have a name\n");
1516                 return -1;
1517         }
1518
1519         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1520                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1521                 return -1;
1522         }
1523
1524         mutex_lock(&trace_types_lock);
1525
1526         tracing_selftest_running = true;
1527
1528         for (t = trace_types; t; t = t->next) {
1529                 if (strcmp(type->name, t->name) == 0) {
1530                         /* already found */
1531                         pr_info("Tracer %s already registered\n",
1532                                 type->name);
1533                         ret = -1;
1534                         goto out;
1535                 }
1536         }
1537
1538         if (!type->set_flag)
1539                 type->set_flag = &dummy_set_flag;
1540         if (!type->flags) {
1541                 /*allocate a dummy tracer_flags*/
1542                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1543                 if (!type->flags) {
1544                         ret = -ENOMEM;
1545                         goto out;
1546                 }
1547                 type->flags->val = 0;
1548                 type->flags->opts = dummy_tracer_opt;
1549         } else
1550                 if (!type->flags->opts)
1551                         type->flags->opts = dummy_tracer_opt;
1552
1553         /* store the tracer for __set_tracer_option */
1554         type->flags->trace = type;
1555
1556         ret = run_tracer_selftest(type);
1557         if (ret < 0)
1558                 goto out;
1559
1560         type->next = trace_types;
1561         trace_types = type;
1562         add_tracer_options(&global_trace, type);
1563
1564  out:
1565         tracing_selftest_running = false;
1566         mutex_unlock(&trace_types_lock);
1567
1568         if (ret || !default_bootup_tracer)
1569                 goto out_unlock;
1570
1571         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1572                 goto out_unlock;
1573
1574         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1575         /* Do we want this tracer to start on bootup? */
1576         tracing_set_tracer(&global_trace, type->name);
1577         default_bootup_tracer = NULL;
1578
1579         apply_trace_boot_options();
1580
1581         /* disable other selftests, since this will break it. */
1582         tracing_selftest_disabled = true;
1583 #ifdef CONFIG_FTRACE_STARTUP_TEST
1584         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1585                type->name);
1586 #endif
1587
1588  out_unlock:
1589         return ret;
1590 }
1591
1592 void tracing_reset(struct trace_buffer *buf, int cpu)
1593 {
1594         struct ring_buffer *buffer = buf->buffer;
1595
1596         if (!buffer)
1597                 return;
1598
1599         ring_buffer_record_disable(buffer);
1600
1601         /* Make sure all commits have finished */
1602         synchronize_sched();
1603         ring_buffer_reset_cpu(buffer, cpu);
1604
1605         ring_buffer_record_enable(buffer);
1606 }
1607
1608 void tracing_reset_online_cpus(struct trace_buffer *buf)
1609 {
1610         struct ring_buffer *buffer = buf->buffer;
1611         int cpu;
1612
1613         if (!buffer)
1614                 return;
1615
1616         ring_buffer_record_disable(buffer);
1617
1618         /* Make sure all commits have finished */
1619         synchronize_sched();
1620
1621         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1622
1623         for_each_online_cpu(cpu)
1624                 ring_buffer_reset_cpu(buffer, cpu);
1625
1626         ring_buffer_record_enable(buffer);
1627 }
1628
1629 /* Must have trace_types_lock held */
1630 void tracing_reset_all_online_cpus(void)
1631 {
1632         struct trace_array *tr;
1633
1634         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1635                 tracing_reset_online_cpus(&tr->trace_buffer);
1636 #ifdef CONFIG_TRACER_MAX_TRACE
1637                 tracing_reset_online_cpus(&tr->max_buffer);
1638 #endif
1639         }
1640 }
1641
1642 #define SAVED_CMDLINES_DEFAULT 128
1643 #define NO_CMDLINE_MAP UINT_MAX
1644 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1645 struct saved_cmdlines_buffer {
1646         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1647         unsigned *map_cmdline_to_pid;
1648         unsigned cmdline_num;
1649         int cmdline_idx;
1650         char *saved_cmdlines;
1651 };
1652 static struct saved_cmdlines_buffer *savedcmd;
1653
1654 /* temporary disable recording */
1655 static atomic_t trace_record_cmdline_disabled __read_mostly;
1656
1657 static inline char *get_saved_cmdlines(int idx)
1658 {
1659         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1660 }
1661
1662 static inline void set_cmdline(int idx, const char *cmdline)
1663 {
1664         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1665 }
1666
1667 static int allocate_cmdlines_buffer(unsigned int val,
1668                                     struct saved_cmdlines_buffer *s)
1669 {
1670         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1671                                         GFP_KERNEL);
1672         if (!s->map_cmdline_to_pid)
1673                 return -ENOMEM;
1674
1675         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1676         if (!s->saved_cmdlines) {
1677                 kfree(s->map_cmdline_to_pid);
1678                 return -ENOMEM;
1679         }
1680
1681         s->cmdline_idx = 0;
1682         s->cmdline_num = val;
1683         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1684                sizeof(s->map_pid_to_cmdline));
1685         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1686                val * sizeof(*s->map_cmdline_to_pid));
1687
1688         return 0;
1689 }
1690
1691 static int trace_create_savedcmd(void)
1692 {
1693         int ret;
1694
1695         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1696         if (!savedcmd)
1697                 return -ENOMEM;
1698
1699         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1700         if (ret < 0) {
1701                 kfree(savedcmd);
1702                 savedcmd = NULL;
1703                 return -ENOMEM;
1704         }
1705
1706         return 0;
1707 }
1708
1709 int is_tracing_stopped(void)
1710 {
1711         return global_trace.stop_count;
1712 }
1713
1714 /**
1715  * tracing_start - quick start of the tracer
1716  *
1717  * If tracing is enabled but was stopped by tracing_stop,
1718  * this will start the tracer back up.
1719  */
1720 void tracing_start(void)
1721 {
1722         struct ring_buffer *buffer;
1723         unsigned long flags;
1724
1725         if (tracing_disabled)
1726                 return;
1727
1728         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1729         if (--global_trace.stop_count) {
1730                 if (global_trace.stop_count < 0) {
1731                         /* Someone screwed up their debugging */
1732                         WARN_ON_ONCE(1);
1733                         global_trace.stop_count = 0;
1734                 }
1735                 goto out;
1736         }
1737
1738         /* Prevent the buffers from switching */
1739         arch_spin_lock(&global_trace.max_lock);
1740
1741         buffer = global_trace.trace_buffer.buffer;
1742         if (buffer)
1743                 ring_buffer_record_enable(buffer);
1744
1745 #ifdef CONFIG_TRACER_MAX_TRACE
1746         buffer = global_trace.max_buffer.buffer;
1747         if (buffer)
1748                 ring_buffer_record_enable(buffer);
1749 #endif
1750
1751         arch_spin_unlock(&global_trace.max_lock);
1752
1753  out:
1754         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1755 }
1756
1757 static void tracing_start_tr(struct trace_array *tr)
1758 {
1759         struct ring_buffer *buffer;
1760         unsigned long flags;
1761
1762         if (tracing_disabled)
1763                 return;
1764
1765         /* If global, we need to also start the max tracer */
1766         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1767                 return tracing_start();
1768
1769         raw_spin_lock_irqsave(&tr->start_lock, flags);
1770
1771         if (--tr->stop_count) {
1772                 if (tr->stop_count < 0) {
1773                         /* Someone screwed up their debugging */
1774                         WARN_ON_ONCE(1);
1775                         tr->stop_count = 0;
1776                 }
1777                 goto out;
1778         }
1779
1780         buffer = tr->trace_buffer.buffer;
1781         if (buffer)
1782                 ring_buffer_record_enable(buffer);
1783
1784  out:
1785         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1786 }
1787
1788 /**
1789  * tracing_stop - quick stop of the tracer
1790  *
1791  * Light weight way to stop tracing. Use in conjunction with
1792  * tracing_start.
1793  */
1794 void tracing_stop(void)
1795 {
1796         struct ring_buffer *buffer;
1797         unsigned long flags;
1798
1799         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1800         if (global_trace.stop_count++)
1801                 goto out;
1802
1803         /* Prevent the buffers from switching */
1804         arch_spin_lock(&global_trace.max_lock);
1805
1806         buffer = global_trace.trace_buffer.buffer;
1807         if (buffer)
1808                 ring_buffer_record_disable(buffer);
1809
1810 #ifdef CONFIG_TRACER_MAX_TRACE
1811         buffer = global_trace.max_buffer.buffer;
1812         if (buffer)
1813                 ring_buffer_record_disable(buffer);
1814 #endif
1815
1816         arch_spin_unlock(&global_trace.max_lock);
1817
1818  out:
1819         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1820 }
1821
1822 static void tracing_stop_tr(struct trace_array *tr)
1823 {
1824         struct ring_buffer *buffer;
1825         unsigned long flags;
1826
1827         /* If global, we need to also stop the max tracer */
1828         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1829                 return tracing_stop();
1830
1831         raw_spin_lock_irqsave(&tr->start_lock, flags);
1832         if (tr->stop_count++)
1833                 goto out;
1834
1835         buffer = tr->trace_buffer.buffer;
1836         if (buffer)
1837                 ring_buffer_record_disable(buffer);
1838
1839  out:
1840         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1841 }
1842
1843 void trace_stop_cmdline_recording(void);
1844
1845 static int trace_save_cmdline(struct task_struct *tsk)
1846 {
1847         unsigned pid, idx;
1848
1849         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1850                 return 0;
1851
1852         /*
1853          * It's not the end of the world if we don't get
1854          * the lock, but we also don't want to spin
1855          * nor do we want to disable interrupts,
1856          * so if we miss here, then better luck next time.
1857          */
1858         if (!arch_spin_trylock(&trace_cmdline_lock))
1859                 return 0;
1860
1861         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1862         if (idx == NO_CMDLINE_MAP) {
1863                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1864
1865                 /*
1866                  * Check whether the cmdline buffer at idx has a pid
1867                  * mapped. We are going to overwrite that entry so we
1868                  * need to clear the map_pid_to_cmdline. Otherwise we
1869                  * would read the new comm for the old pid.
1870                  */
1871                 pid = savedcmd->map_cmdline_to_pid[idx];
1872                 if (pid != NO_CMDLINE_MAP)
1873                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1874
1875                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1876                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1877
1878                 savedcmd->cmdline_idx = idx;
1879         }
1880
1881         set_cmdline(idx, tsk->comm);
1882
1883         arch_spin_unlock(&trace_cmdline_lock);
1884
1885         return 1;
1886 }
1887
1888 static void __trace_find_cmdline(int pid, char comm[])
1889 {
1890         unsigned map;
1891
1892         if (!pid) {
1893                 strcpy(comm, "<idle>");
1894                 return;
1895         }
1896
1897         if (WARN_ON_ONCE(pid < 0)) {
1898                 strcpy(comm, "<XXX>");
1899                 return;
1900         }
1901
1902         if (pid > PID_MAX_DEFAULT) {
1903                 strcpy(comm, "<...>");
1904                 return;
1905         }
1906
1907         map = savedcmd->map_pid_to_cmdline[pid];
1908         if (map != NO_CMDLINE_MAP)
1909                 strcpy(comm, get_saved_cmdlines(map));
1910         else
1911                 strcpy(comm, "<...>");
1912 }
1913
1914 void trace_find_cmdline(int pid, char comm[])
1915 {
1916         preempt_disable();
1917         arch_spin_lock(&trace_cmdline_lock);
1918
1919         __trace_find_cmdline(pid, comm);
1920
1921         arch_spin_unlock(&trace_cmdline_lock);
1922         preempt_enable();
1923 }
1924
1925 void tracing_record_cmdline(struct task_struct *tsk)
1926 {
1927         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1928                 return;
1929
1930         if (!__this_cpu_read(trace_cmdline_save))
1931                 return;
1932
1933         if (trace_save_cmdline(tsk))
1934                 __this_cpu_write(trace_cmdline_save, false);
1935 }
1936
1937 void
1938 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1939                              int pc)
1940 {
1941         struct task_struct *tsk = current;
1942
1943         entry->preempt_count            = pc & 0xff;
1944         entry->pid                      = (tsk) ? tsk->pid : 0;
1945         entry->flags =
1946 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1947                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1948 #else
1949                 TRACE_FLAG_IRQS_NOSUPPORT |
1950 #endif
1951                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1952                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1953                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
1954                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1955                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1956 }
1957 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1958
1959 struct ring_buffer_event *
1960 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1961                           int type,
1962                           unsigned long len,
1963                           unsigned long flags, int pc)
1964 {
1965         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
1966 }
1967
1968 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
1969 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
1970 static int trace_buffered_event_ref;
1971
1972 /**
1973  * trace_buffered_event_enable - enable buffering events
1974  *
1975  * When events are being filtered, it is quicker to use a temporary
1976  * buffer to write the event data into if there's a likely chance
1977  * that it will not be committed. The discard of the ring buffer
1978  * is not as fast as committing, and is much slower than copying
1979  * a commit.
1980  *
1981  * When an event is to be filtered, allocate per cpu buffers to
1982  * write the event data into, and if the event is filtered and discarded
1983  * it is simply dropped, otherwise, the entire data is to be committed
1984  * in one shot.
1985  */
1986 void trace_buffered_event_enable(void)
1987 {
1988         struct ring_buffer_event *event;
1989         struct page *page;
1990         int cpu;
1991
1992         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
1993
1994         if (trace_buffered_event_ref++)
1995                 return;
1996
1997         for_each_tracing_cpu(cpu) {
1998                 page = alloc_pages_node(cpu_to_node(cpu),
1999                                         GFP_KERNEL | __GFP_NORETRY, 0);
2000                 if (!page)
2001                         goto failed;
2002
2003                 event = page_address(page);
2004                 memset(event, 0, sizeof(*event));
2005
2006                 per_cpu(trace_buffered_event, cpu) = event;
2007
2008                 preempt_disable();
2009                 if (cpu == smp_processor_id() &&
2010                     this_cpu_read(trace_buffered_event) !=
2011                     per_cpu(trace_buffered_event, cpu))
2012                         WARN_ON_ONCE(1);
2013                 preempt_enable();
2014         }
2015
2016         return;
2017  failed:
2018         trace_buffered_event_disable();
2019 }
2020
2021 static void enable_trace_buffered_event(void *data)
2022 {
2023         /* Probably not needed, but do it anyway */
2024         smp_rmb();
2025         this_cpu_dec(trace_buffered_event_cnt);
2026 }
2027
2028 static void disable_trace_buffered_event(void *data)
2029 {
2030         this_cpu_inc(trace_buffered_event_cnt);
2031 }
2032
2033 /**
2034  * trace_buffered_event_disable - disable buffering events
2035  *
2036  * When a filter is removed, it is faster to not use the buffered
2037  * events, and to commit directly into the ring buffer. Free up
2038  * the temp buffers when there are no more users. This requires
2039  * special synchronization with current events.
2040  */
2041 void trace_buffered_event_disable(void)
2042 {
2043         int cpu;
2044
2045         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2046
2047         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2048                 return;
2049
2050         if (--trace_buffered_event_ref)
2051                 return;
2052
2053         preempt_disable();
2054         /* For each CPU, set the buffer as used. */
2055         smp_call_function_many(tracing_buffer_mask,
2056                                disable_trace_buffered_event, NULL, 1);
2057         preempt_enable();
2058
2059         /* Wait for all current users to finish */
2060         synchronize_sched();
2061
2062         for_each_tracing_cpu(cpu) {
2063                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2064                 per_cpu(trace_buffered_event, cpu) = NULL;
2065         }
2066         /*
2067          * Make sure trace_buffered_event is NULL before clearing
2068          * trace_buffered_event_cnt.
2069          */
2070         smp_wmb();
2071
2072         preempt_disable();
2073         /* Do the work on each cpu */
2074         smp_call_function_many(tracing_buffer_mask,
2075                                enable_trace_buffered_event, NULL, 1);
2076         preempt_enable();
2077 }
2078
2079 static struct ring_buffer *temp_buffer;
2080
2081 struct ring_buffer_event *
2082 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2083                           struct trace_event_file *trace_file,
2084                           int type, unsigned long len,
2085                           unsigned long flags, int pc)
2086 {
2087         struct ring_buffer_event *entry;
2088         int val;
2089
2090         *current_rb = trace_file->tr->trace_buffer.buffer;
2091
2092         if ((trace_file->flags &
2093              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2094             (entry = this_cpu_read(trace_buffered_event))) {
2095                 /* Try to use the per cpu buffer first */
2096                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2097                 if (val == 1) {
2098                         trace_event_setup(entry, type, flags, pc);
2099                         entry->array[0] = len;
2100                         return entry;
2101                 }
2102                 this_cpu_dec(trace_buffered_event_cnt);
2103         }
2104
2105         entry = __trace_buffer_lock_reserve(*current_rb,
2106                                             type, len, flags, pc);
2107         /*
2108          * If tracing is off, but we have triggers enabled
2109          * we still need to look at the event data. Use the temp_buffer
2110          * to store the trace event for the tigger to use. It's recusive
2111          * safe and will not be recorded anywhere.
2112          */
2113         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2114                 *current_rb = temp_buffer;
2115                 entry = __trace_buffer_lock_reserve(*current_rb,
2116                                                     type, len, flags, pc);
2117         }
2118         return entry;
2119 }
2120 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2121
2122 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2123 static DEFINE_MUTEX(tracepoint_printk_mutex);
2124
2125 static void output_printk(struct trace_event_buffer *fbuffer)
2126 {
2127         struct trace_event_call *event_call;
2128         struct trace_event *event;
2129         unsigned long flags;
2130         struct trace_iterator *iter = tracepoint_print_iter;
2131
2132         /* We should never get here if iter is NULL */
2133         if (WARN_ON_ONCE(!iter))
2134                 return;
2135
2136         event_call = fbuffer->trace_file->event_call;
2137         if (!event_call || !event_call->event.funcs ||
2138             !event_call->event.funcs->trace)
2139                 return;
2140
2141         event = &fbuffer->trace_file->event_call->event;
2142
2143         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2144         trace_seq_init(&iter->seq);
2145         iter->ent = fbuffer->entry;
2146         event_call->event.funcs->trace(iter, 0, event);
2147         trace_seq_putc(&iter->seq, 0);
2148         printk("%s", iter->seq.buffer);
2149
2150         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2151 }
2152
2153 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2154                              void __user *buffer, size_t *lenp,
2155                              loff_t *ppos)
2156 {
2157         int save_tracepoint_printk;
2158         int ret;
2159
2160         mutex_lock(&tracepoint_printk_mutex);
2161         save_tracepoint_printk = tracepoint_printk;
2162
2163         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2164
2165         /*
2166          * This will force exiting early, as tracepoint_printk
2167          * is always zero when tracepoint_printk_iter is not allocated
2168          */
2169         if (!tracepoint_print_iter)
2170                 tracepoint_printk = 0;
2171
2172         if (save_tracepoint_printk == tracepoint_printk)
2173                 goto out;
2174
2175         if (tracepoint_printk)
2176                 static_key_enable(&tracepoint_printk_key.key);
2177         else
2178                 static_key_disable(&tracepoint_printk_key.key);
2179
2180  out:
2181         mutex_unlock(&tracepoint_printk_mutex);
2182
2183         return ret;
2184 }
2185
2186 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2187 {
2188         if (static_key_false(&tracepoint_printk_key.key))
2189                 output_printk(fbuffer);
2190
2191         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2192                                     fbuffer->event, fbuffer->entry,
2193                                     fbuffer->flags, fbuffer->pc);
2194 }
2195 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2196
2197 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2198                                      struct ring_buffer *buffer,
2199                                      struct ring_buffer_event *event,
2200                                      unsigned long flags, int pc,
2201                                      struct pt_regs *regs)
2202 {
2203         __buffer_unlock_commit(buffer, event);
2204
2205         /*
2206          * If regs is not set, then skip the following callers:
2207          *   trace_buffer_unlock_commit_regs
2208          *   event_trigger_unlock_commit
2209          *   trace_event_buffer_commit
2210          *   trace_event_raw_event_sched_switch
2211          * Note, we can still get here via blktrace, wakeup tracer
2212          * and mmiotrace, but that's ok if they lose a function or
2213          * two. They are that meaningful.
2214          */
2215         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2216         ftrace_trace_userstack(buffer, flags, pc);
2217 }
2218
2219 /*
2220  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2221  */
2222 void
2223 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2224                                    struct ring_buffer_event *event)
2225 {
2226         __buffer_unlock_commit(buffer, event);
2227 }
2228
2229 static void
2230 trace_process_export(struct trace_export *export,
2231                struct ring_buffer_event *event)
2232 {
2233         struct trace_entry *entry;
2234         unsigned int size = 0;
2235
2236         entry = ring_buffer_event_data(event);
2237         size = ring_buffer_event_length(event);
2238         export->write(entry, size);
2239 }
2240
2241 static DEFINE_MUTEX(ftrace_export_lock);
2242
2243 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2244
2245 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2246
2247 static inline void ftrace_exports_enable(void)
2248 {
2249         static_branch_enable(&ftrace_exports_enabled);
2250 }
2251
2252 static inline void ftrace_exports_disable(void)
2253 {
2254         static_branch_disable(&ftrace_exports_enabled);
2255 }
2256
2257 void ftrace_exports(struct ring_buffer_event *event)
2258 {
2259         struct trace_export *export;
2260
2261         preempt_disable_notrace();
2262
2263         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2264         while (export) {
2265                 trace_process_export(export, event);
2266                 export = rcu_dereference_raw_notrace(export->next);
2267         }
2268
2269         preempt_enable_notrace();
2270 }
2271
2272 static inline void
2273 add_trace_export(struct trace_export **list, struct trace_export *export)
2274 {
2275         rcu_assign_pointer(export->next, *list);
2276         /*
2277          * We are entering export into the list but another
2278          * CPU might be walking that list. We need to make sure
2279          * the export->next pointer is valid before another CPU sees
2280          * the export pointer included into the list.
2281          */
2282         rcu_assign_pointer(*list, export);
2283 }
2284
2285 static inline int
2286 rm_trace_export(struct trace_export **list, struct trace_export *export)
2287 {
2288         struct trace_export **p;
2289
2290         for (p = list; *p != NULL; p = &(*p)->next)
2291                 if (*p == export)
2292                         break;
2293
2294         if (*p != export)
2295                 return -1;
2296
2297         rcu_assign_pointer(*p, (*p)->next);
2298
2299         return 0;
2300 }
2301
2302 static inline void
2303 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2304 {
2305         if (*list == NULL)
2306                 ftrace_exports_enable();
2307
2308         add_trace_export(list, export);
2309 }
2310
2311 static inline int
2312 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2313 {
2314         int ret;
2315
2316         ret = rm_trace_export(list, export);
2317         if (*list == NULL)
2318                 ftrace_exports_disable();
2319
2320         return ret;
2321 }
2322
2323 int register_ftrace_export(struct trace_export *export)
2324 {
2325         if (WARN_ON_ONCE(!export->write))
2326                 return -1;
2327
2328         mutex_lock(&ftrace_export_lock);
2329
2330         add_ftrace_export(&ftrace_exports_list, export);
2331
2332         mutex_unlock(&ftrace_export_lock);
2333
2334         return 0;
2335 }
2336 EXPORT_SYMBOL_GPL(register_ftrace_export);
2337
2338 int unregister_ftrace_export(struct trace_export *export)
2339 {
2340         int ret;
2341
2342         mutex_lock(&ftrace_export_lock);
2343
2344         ret = rm_ftrace_export(&ftrace_exports_list, export);
2345
2346         mutex_unlock(&ftrace_export_lock);
2347
2348         return ret;
2349 }
2350 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2351
2352 void
2353 trace_function(struct trace_array *tr,
2354                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2355                int pc)
2356 {
2357         struct trace_event_call *call = &event_function;
2358         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2359         struct ring_buffer_event *event;
2360         struct ftrace_entry *entry;
2361
2362         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2363                                             flags, pc);
2364         if (!event)
2365                 return;
2366         entry   = ring_buffer_event_data(event);
2367         entry->ip                       = ip;
2368         entry->parent_ip                = parent_ip;
2369
2370         if (!call_filter_check_discard(call, entry, buffer, event)) {
2371                 if (static_branch_unlikely(&ftrace_exports_enabled))
2372                         ftrace_exports(event);
2373                 __buffer_unlock_commit(buffer, event);
2374         }
2375 }
2376
2377 #ifdef CONFIG_STACKTRACE
2378
2379 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2380 struct ftrace_stack {
2381         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2382 };
2383
2384 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2385 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2386
2387 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2388                                  unsigned long flags,
2389                                  int skip, int pc, struct pt_regs *regs)
2390 {
2391         struct trace_event_call *call = &event_kernel_stack;
2392         struct ring_buffer_event *event;
2393         struct stack_entry *entry;
2394         struct stack_trace trace;
2395         int use_stack;
2396         int size = FTRACE_STACK_ENTRIES;
2397
2398         trace.nr_entries        = 0;
2399         trace.skip              = skip;
2400
2401         /*
2402          * Add two, for this function and the call to save_stack_trace()
2403          * If regs is set, then these functions will not be in the way.
2404          */
2405         if (!regs)
2406                 trace.skip += 2;
2407
2408         /*
2409          * Since events can happen in NMIs there's no safe way to
2410          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2411          * or NMI comes in, it will just have to use the default
2412          * FTRACE_STACK_SIZE.
2413          */
2414         preempt_disable_notrace();
2415
2416         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2417         /*
2418          * We don't need any atomic variables, just a barrier.
2419          * If an interrupt comes in, we don't care, because it would
2420          * have exited and put the counter back to what we want.
2421          * We just need a barrier to keep gcc from moving things
2422          * around.
2423          */
2424         barrier();
2425         if (use_stack == 1) {
2426                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2427                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2428
2429                 if (regs)
2430                         save_stack_trace_regs(regs, &trace);
2431                 else
2432                         save_stack_trace(&trace);
2433
2434                 if (trace.nr_entries > size)
2435                         size = trace.nr_entries;
2436         } else
2437                 /* From now on, use_stack is a boolean */
2438                 use_stack = 0;
2439
2440         size *= sizeof(unsigned long);
2441
2442         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2443                                             sizeof(*entry) + size, flags, pc);
2444         if (!event)
2445                 goto out;
2446         entry = ring_buffer_event_data(event);
2447
2448         memset(&entry->caller, 0, size);
2449
2450         if (use_stack)
2451                 memcpy(&entry->caller, trace.entries,
2452                        trace.nr_entries * sizeof(unsigned long));
2453         else {
2454                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2455                 trace.entries           = entry->caller;
2456                 if (regs)
2457                         save_stack_trace_regs(regs, &trace);
2458                 else
2459                         save_stack_trace(&trace);
2460         }
2461
2462         entry->size = trace.nr_entries;
2463
2464         if (!call_filter_check_discard(call, entry, buffer, event))
2465                 __buffer_unlock_commit(buffer, event);
2466
2467  out:
2468         /* Again, don't let gcc optimize things here */
2469         barrier();
2470         __this_cpu_dec(ftrace_stack_reserve);
2471         preempt_enable_notrace();
2472
2473 }
2474
2475 static inline void ftrace_trace_stack(struct trace_array *tr,
2476                                       struct ring_buffer *buffer,
2477                                       unsigned long flags,
2478                                       int skip, int pc, struct pt_regs *regs)
2479 {
2480         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2481                 return;
2482
2483         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2484 }
2485
2486 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2487                    int pc)
2488 {
2489         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
2490 }
2491
2492 /**
2493  * trace_dump_stack - record a stack back trace in the trace buffer
2494  * @skip: Number of functions to skip (helper handlers)
2495  */
2496 void trace_dump_stack(int skip)
2497 {
2498         unsigned long flags;
2499
2500         if (tracing_disabled || tracing_selftest_running)
2501                 return;
2502
2503         local_save_flags(flags);
2504
2505         /*
2506          * Skip 3 more, seems to get us at the caller of
2507          * this function.
2508          */
2509         skip += 3;
2510         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2511                              flags, skip, preempt_count(), NULL);
2512 }
2513
2514 static DEFINE_PER_CPU(int, user_stack_count);
2515
2516 void
2517 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2518 {
2519         struct trace_event_call *call = &event_user_stack;
2520         struct ring_buffer_event *event;
2521         struct userstack_entry *entry;
2522         struct stack_trace trace;
2523
2524         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2525                 return;
2526
2527         /*
2528          * NMIs can not handle page faults, even with fix ups.
2529          * The save user stack can (and often does) fault.
2530          */
2531         if (unlikely(in_nmi()))
2532                 return;
2533
2534         /*
2535          * prevent recursion, since the user stack tracing may
2536          * trigger other kernel events.
2537          */
2538         preempt_disable();
2539         if (__this_cpu_read(user_stack_count))
2540                 goto out;
2541
2542         __this_cpu_inc(user_stack_count);
2543
2544         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2545                                             sizeof(*entry), flags, pc);
2546         if (!event)
2547                 goto out_drop_count;
2548         entry   = ring_buffer_event_data(event);
2549
2550         entry->tgid             = current->tgid;
2551         memset(&entry->caller, 0, sizeof(entry->caller));
2552
2553         trace.nr_entries        = 0;
2554         trace.max_entries       = FTRACE_STACK_ENTRIES;
2555         trace.skip              = 0;
2556         trace.entries           = entry->caller;
2557
2558         save_stack_trace_user(&trace);
2559         if (!call_filter_check_discard(call, entry, buffer, event))
2560                 __buffer_unlock_commit(buffer, event);
2561
2562  out_drop_count:
2563         __this_cpu_dec(user_stack_count);
2564  out:
2565         preempt_enable();
2566 }
2567
2568 #ifdef UNUSED
2569 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2570 {
2571         ftrace_trace_userstack(tr, flags, preempt_count());
2572 }
2573 #endif /* UNUSED */
2574
2575 #endif /* CONFIG_STACKTRACE */
2576
2577 /* created for use with alloc_percpu */
2578 struct trace_buffer_struct {
2579         int nesting;
2580         char buffer[4][TRACE_BUF_SIZE];
2581 };
2582
2583 static struct trace_buffer_struct *trace_percpu_buffer;
2584
2585 /*
2586  * Thise allows for lockless recording.  If we're nested too deeply, then
2587  * this returns NULL.
2588  */
2589 static char *get_trace_buf(void)
2590 {
2591         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2592
2593         if (!buffer || buffer->nesting >= 4)
2594                 return NULL;
2595
2596         return &buffer->buffer[buffer->nesting++][0];
2597 }
2598
2599 static void put_trace_buf(void)
2600 {
2601         this_cpu_dec(trace_percpu_buffer->nesting);
2602 }
2603
2604 static int alloc_percpu_trace_buffer(void)
2605 {
2606         struct trace_buffer_struct *buffers;
2607
2608         buffers = alloc_percpu(struct trace_buffer_struct);
2609         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2610                 return -ENOMEM;
2611
2612         trace_percpu_buffer = buffers;
2613         return 0;
2614 }
2615
2616 static int buffers_allocated;
2617
2618 void trace_printk_init_buffers(void)
2619 {
2620         if (buffers_allocated)
2621                 return;
2622
2623         if (alloc_percpu_trace_buffer())
2624                 return;
2625
2626         /* trace_printk() is for debug use only. Don't use it in production. */
2627
2628         pr_warn("\n");
2629         pr_warn("**********************************************************\n");
2630         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2631         pr_warn("**                                                      **\n");
2632         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2633         pr_warn("**                                                      **\n");
2634         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2635         pr_warn("** unsafe for production use.                           **\n");
2636         pr_warn("**                                                      **\n");
2637         pr_warn("** If you see this message and you are not debugging    **\n");
2638         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2639         pr_warn("**                                                      **\n");
2640         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2641         pr_warn("**********************************************************\n");
2642
2643         /* Expand the buffers to set size */
2644         tracing_update_buffers();
2645
2646         buffers_allocated = 1;
2647
2648         /*
2649          * trace_printk_init_buffers() can be called by modules.
2650          * If that happens, then we need to start cmdline recording
2651          * directly here. If the global_trace.buffer is already
2652          * allocated here, then this was called by module code.
2653          */
2654         if (global_trace.trace_buffer.buffer)
2655                 tracing_start_cmdline_record();
2656 }
2657
2658 void trace_printk_start_comm(void)
2659 {
2660         /* Start tracing comms if trace printk is set */
2661         if (!buffers_allocated)
2662                 return;
2663         tracing_start_cmdline_record();
2664 }
2665
2666 static void trace_printk_start_stop_comm(int enabled)
2667 {
2668         if (!buffers_allocated)
2669                 return;
2670
2671         if (enabled)
2672                 tracing_start_cmdline_record();
2673         else
2674                 tracing_stop_cmdline_record();
2675 }
2676
2677 /**
2678  * trace_vbprintk - write binary msg to tracing buffer
2679  *
2680  */
2681 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2682 {
2683         struct trace_event_call *call = &event_bprint;
2684         struct ring_buffer_event *event;
2685         struct ring_buffer *buffer;
2686         struct trace_array *tr = &global_trace;
2687         struct bprint_entry *entry;
2688         unsigned long flags;
2689         char *tbuffer;
2690         int len = 0, size, pc;
2691
2692         if (unlikely(tracing_selftest_running || tracing_disabled))
2693                 return 0;
2694
2695         /* Don't pollute graph traces with trace_vprintk internals */
2696         pause_graph_tracing();
2697
2698         pc = preempt_count();
2699         preempt_disable_notrace();
2700
2701         tbuffer = get_trace_buf();
2702         if (!tbuffer) {
2703                 len = 0;
2704                 goto out_nobuffer;
2705         }
2706
2707         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2708
2709         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2710                 goto out;
2711
2712         local_save_flags(flags);
2713         size = sizeof(*entry) + sizeof(u32) * len;
2714         buffer = tr->trace_buffer.buffer;
2715         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2716                                             flags, pc);
2717         if (!event)
2718                 goto out;
2719         entry = ring_buffer_event_data(event);
2720         entry->ip                       = ip;
2721         entry->fmt                      = fmt;
2722
2723         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2724         if (!call_filter_check_discard(call, entry, buffer, event)) {
2725                 __buffer_unlock_commit(buffer, event);
2726                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2727         }
2728
2729 out:
2730         put_trace_buf();
2731
2732 out_nobuffer:
2733         preempt_enable_notrace();
2734         unpause_graph_tracing();
2735
2736         return len;
2737 }
2738 EXPORT_SYMBOL_GPL(trace_vbprintk);
2739
2740 static int
2741 __trace_array_vprintk(struct ring_buffer *buffer,
2742                       unsigned long ip, const char *fmt, va_list args)
2743 {
2744         struct trace_event_call *call = &event_print;
2745         struct ring_buffer_event *event;
2746         int len = 0, size, pc;
2747         struct print_entry *entry;
2748         unsigned long flags;
2749         char *tbuffer;
2750
2751         if (tracing_disabled || tracing_selftest_running)
2752                 return 0;
2753
2754         /* Don't pollute graph traces with trace_vprintk internals */
2755         pause_graph_tracing();
2756
2757         pc = preempt_count();
2758         preempt_disable_notrace();
2759
2760
2761         tbuffer = get_trace_buf();
2762         if (!tbuffer) {
2763                 len = 0;
2764                 goto out_nobuffer;
2765         }
2766
2767         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2768
2769         local_save_flags(flags);
2770         size = sizeof(*entry) + len + 1;
2771         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2772                                             flags, pc);
2773         if (!event)
2774                 goto out;
2775         entry = ring_buffer_event_data(event);
2776         entry->ip = ip;
2777
2778         memcpy(&entry->buf, tbuffer, len + 1);
2779         if (!call_filter_check_discard(call, entry, buffer, event)) {
2780                 __buffer_unlock_commit(buffer, event);
2781                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2782         }
2783
2784 out:
2785         put_trace_buf();
2786
2787 out_nobuffer:
2788         preempt_enable_notrace();
2789         unpause_graph_tracing();
2790
2791         return len;
2792 }
2793
2794 int trace_array_vprintk(struct trace_array *tr,
2795                         unsigned long ip, const char *fmt, va_list args)
2796 {
2797         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2798 }
2799
2800 int trace_array_printk(struct trace_array *tr,
2801                        unsigned long ip, const char *fmt, ...)
2802 {
2803         int ret;
2804         va_list ap;
2805
2806         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2807                 return 0;
2808
2809         va_start(ap, fmt);
2810         ret = trace_array_vprintk(tr, ip, fmt, ap);
2811         va_end(ap);
2812         return ret;
2813 }
2814
2815 int trace_array_printk_buf(struct ring_buffer *buffer,
2816                            unsigned long ip, const char *fmt, ...)
2817 {
2818         int ret;
2819         va_list ap;
2820
2821         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2822                 return 0;
2823
2824         va_start(ap, fmt);
2825         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2826         va_end(ap);
2827         return ret;
2828 }
2829
2830 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2831 {
2832         return trace_array_vprintk(&global_trace, ip, fmt, args);
2833 }
2834 EXPORT_SYMBOL_GPL(trace_vprintk);
2835
2836 static void trace_iterator_increment(struct trace_iterator *iter)
2837 {
2838         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2839
2840         iter->idx++;
2841         if (buf_iter)
2842                 ring_buffer_read(buf_iter, NULL);
2843 }
2844
2845 static struct trace_entry *
2846 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2847                 unsigned long *lost_events)
2848 {
2849         struct ring_buffer_event *event;
2850         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2851
2852         if (buf_iter)
2853                 event = ring_buffer_iter_peek(buf_iter, ts);
2854         else
2855                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2856                                          lost_events);
2857
2858         if (event) {
2859                 iter->ent_size = ring_buffer_event_length(event);
2860                 return ring_buffer_event_data(event);
2861         }
2862         iter->ent_size = 0;
2863         return NULL;
2864 }
2865
2866 static struct trace_entry *
2867 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2868                   unsigned long *missing_events, u64 *ent_ts)
2869 {
2870         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2871         struct trace_entry *ent, *next = NULL;
2872         unsigned long lost_events = 0, next_lost = 0;
2873         int cpu_file = iter->cpu_file;
2874         u64 next_ts = 0, ts;
2875         int next_cpu = -1;
2876         int next_size = 0;
2877         int cpu;
2878
2879         /*
2880          * If we are in a per_cpu trace file, don't bother by iterating over
2881          * all cpu and peek directly.
2882          */
2883         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2884                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2885                         return NULL;
2886                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2887                 if (ent_cpu)
2888                         *ent_cpu = cpu_file;
2889
2890                 return ent;
2891         }
2892
2893         for_each_tracing_cpu(cpu) {
2894
2895                 if (ring_buffer_empty_cpu(buffer, cpu))
2896                         continue;
2897
2898                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2899
2900                 /*
2901                  * Pick the entry with the smallest timestamp:
2902                  */
2903                 if (ent && (!next || ts < next_ts)) {
2904                         next = ent;
2905                         next_cpu = cpu;
2906                         next_ts = ts;
2907                         next_lost = lost_events;
2908                         next_size = iter->ent_size;
2909                 }
2910         }
2911
2912         iter->ent_size = next_size;
2913
2914         if (ent_cpu)
2915                 *ent_cpu = next_cpu;
2916
2917         if (ent_ts)
2918                 *ent_ts = next_ts;
2919
2920         if (missing_events)
2921                 *missing_events = next_lost;
2922
2923         return next;
2924 }
2925
2926 /* Find the next real entry, without updating the iterator itself */
2927 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2928                                           int *ent_cpu, u64 *ent_ts)
2929 {
2930         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2931 }
2932
2933 /* Find the next real entry, and increment the iterator to the next entry */
2934 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2935 {
2936         iter->ent = __find_next_entry(iter, &iter->cpu,
2937                                       &iter->lost_events, &iter->ts);
2938
2939         if (iter->ent)
2940                 trace_iterator_increment(iter);
2941
2942         return iter->ent ? iter : NULL;
2943 }
2944
2945 static void trace_consume(struct trace_iterator *iter)
2946 {
2947         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2948                             &iter->lost_events);
2949 }
2950
2951 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2952 {
2953         struct trace_iterator *iter = m->private;
2954         int i = (int)*pos;
2955         void *ent;
2956
2957         WARN_ON_ONCE(iter->leftover);
2958
2959         (*pos)++;
2960
2961         /* can't go backwards */
2962         if (iter->idx > i)
2963                 return NULL;
2964
2965         if (iter->idx < 0)
2966                 ent = trace_find_next_entry_inc(iter);
2967         else
2968                 ent = iter;
2969
2970         while (ent && iter->idx < i)
2971                 ent = trace_find_next_entry_inc(iter);
2972
2973         iter->pos = *pos;
2974
2975         return ent;
2976 }
2977
2978 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2979 {
2980         struct ring_buffer_event *event;
2981         struct ring_buffer_iter *buf_iter;
2982         unsigned long entries = 0;
2983         u64 ts;
2984
2985         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2986
2987         buf_iter = trace_buffer_iter(iter, cpu);
2988         if (!buf_iter)
2989                 return;
2990
2991         ring_buffer_iter_reset(buf_iter);
2992
2993         /*
2994          * We could have the case with the max latency tracers
2995          * that a reset never took place on a cpu. This is evident
2996          * by the timestamp being before the start of the buffer.
2997          */
2998         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2999                 if (ts >= iter->trace_buffer->time_start)
3000                         break;
3001                 entries++;
3002                 ring_buffer_read(buf_iter, NULL);
3003         }
3004
3005         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3006 }
3007
3008 /*
3009  * The current tracer is copied to avoid a global locking
3010  * all around.
3011  */
3012 static void *s_start(struct seq_file *m, loff_t *pos)
3013 {
3014         struct trace_iterator *iter = m->private;
3015         struct trace_array *tr = iter->tr;
3016         int cpu_file = iter->cpu_file;
3017         void *p = NULL;
3018         loff_t l = 0;
3019         int cpu;
3020
3021         /*
3022          * copy the tracer to avoid using a global lock all around.
3023          * iter->trace is a copy of current_trace, the pointer to the
3024          * name may be used instead of a strcmp(), as iter->trace->name
3025          * will point to the same string as current_trace->name.
3026          */
3027         mutex_lock(&trace_types_lock);
3028         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3029                 *iter->trace = *tr->current_trace;
3030         mutex_unlock(&trace_types_lock);
3031
3032 #ifdef CONFIG_TRACER_MAX_TRACE
3033         if (iter->snapshot && iter->trace->use_max_tr)
3034                 return ERR_PTR(-EBUSY);
3035 #endif
3036
3037         if (!iter->snapshot)
3038                 atomic_inc(&trace_record_cmdline_disabled);
3039
3040         if (*pos != iter->pos) {
3041                 iter->ent = NULL;
3042                 iter->cpu = 0;
3043                 iter->idx = -1;
3044
3045                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3046                         for_each_tracing_cpu(cpu)
3047                                 tracing_iter_reset(iter, cpu);
3048                 } else
3049                         tracing_iter_reset(iter, cpu_file);
3050
3051                 iter->leftover = 0;
3052                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3053                         ;
3054
3055         } else {
3056                 /*
3057                  * If we overflowed the seq_file before, then we want
3058                  * to just reuse the trace_seq buffer again.
3059                  */
3060                 if (iter->leftover)
3061                         p = iter;
3062                 else {
3063                         l = *pos - 1;
3064                         p = s_next(m, p, &l);
3065                 }
3066         }
3067
3068         trace_event_read_lock();
3069         trace_access_lock(cpu_file);
3070         return p;
3071 }
3072
3073 static void s_stop(struct seq_file *m, void *p)
3074 {
3075         struct trace_iterator *iter = m->private;
3076
3077 #ifdef CONFIG_TRACER_MAX_TRACE
3078         if (iter->snapshot && iter->trace->use_max_tr)
3079                 return;
3080 #endif
3081
3082         if (!iter->snapshot)
3083                 atomic_dec(&trace_record_cmdline_disabled);
3084
3085         trace_access_unlock(iter->cpu_file);
3086         trace_event_read_unlock();
3087 }
3088
3089 static void
3090 get_total_entries(struct trace_buffer *buf,
3091                   unsigned long *total, unsigned long *entries)
3092 {
3093         unsigned long count;
3094         int cpu;
3095
3096         *total = 0;
3097         *entries = 0;
3098
3099         for_each_tracing_cpu(cpu) {
3100                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3101                 /*
3102                  * If this buffer has skipped entries, then we hold all
3103                  * entries for the trace and we need to ignore the
3104                  * ones before the time stamp.
3105                  */
3106                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3107                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3108                         /* total is the same as the entries */
3109                         *total += count;
3110                 } else
3111                         *total += count +
3112                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3113                 *entries += count;
3114         }
3115 }
3116
3117 static void print_lat_help_header(struct seq_file *m)
3118 {
3119         seq_puts(m, "#                  _------=> CPU#            \n"
3120                     "#                 / _-----=> irqs-off        \n"
3121                     "#                | / _----=> need-resched    \n"
3122                     "#                || / _---=> hardirq/softirq \n"
3123                     "#                ||| / _--=> preempt-depth   \n"
3124                     "#                |||| /     delay            \n"
3125                     "#  cmd     pid   ||||| time  |   caller      \n"
3126                     "#     \\   /      |||||  \\    |   /         \n");
3127 }
3128
3129 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3130 {
3131         unsigned long total;
3132         unsigned long entries;
3133
3134         get_total_entries(buf, &total, &entries);
3135         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3136                    entries, total, num_online_cpus());
3137         seq_puts(m, "#\n");
3138 }
3139
3140 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3141 {
3142         print_event_info(buf, m);
3143         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
3144                     "#              | |       |          |         |\n");
3145 }
3146
3147 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3148 {
3149         print_event_info(buf, m);
3150         seq_puts(m, "#                              _-----=> irqs-off\n"
3151                     "#                             / _----=> need-resched\n"
3152                     "#                            | / _---=> hardirq/softirq\n"
3153                     "#                            || / _--=> preempt-depth\n"
3154                     "#                            ||| /     delay\n"
3155                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
3156                     "#              | |       |   ||||       |         |\n");
3157 }
3158
3159 void
3160 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3161 {
3162         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3163         struct trace_buffer *buf = iter->trace_buffer;
3164         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3165         struct tracer *type = iter->trace;
3166         unsigned long entries;
3167         unsigned long total;
3168         const char *name = "preemption";
3169
3170         name = type->name;
3171
3172         get_total_entries(buf, &total, &entries);
3173
3174         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3175                    name, UTS_RELEASE);
3176         seq_puts(m, "# -----------------------------------"
3177                  "---------------------------------\n");
3178         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3179                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3180                    nsecs_to_usecs(data->saved_latency),
3181                    entries,
3182                    total,
3183                    buf->cpu,
3184 #if defined(CONFIG_PREEMPT_NONE)
3185                    "server",
3186 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3187                    "desktop",
3188 #elif defined(CONFIG_PREEMPT)
3189                    "preempt",
3190 #else
3191                    "unknown",
3192 #endif
3193                    /* These are reserved for later use */
3194                    0, 0, 0, 0);
3195 #ifdef CONFIG_SMP
3196         seq_printf(m, " #P:%d)\n", num_online_cpus());
3197 #else
3198         seq_puts(m, ")\n");
3199 #endif
3200         seq_puts(m, "#    -----------------\n");
3201         seq_printf(m, "#    | task: %.16s-%d "
3202                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3203                    data->comm, data->pid,
3204                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3205                    data->policy, data->rt_priority);
3206         seq_puts(m, "#    -----------------\n");
3207
3208         if (data->critical_start) {
3209                 seq_puts(m, "#  => started at: ");
3210                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3211                 trace_print_seq(m, &iter->seq);
3212                 seq_puts(m, "\n#  => ended at:   ");
3213                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3214                 trace_print_seq(m, &iter->seq);
3215                 seq_puts(m, "\n#\n");
3216         }
3217
3218         seq_puts(m, "#\n");
3219 }
3220
3221 static void test_cpu_buff_start(struct trace_iterator *iter)
3222 {
3223         struct trace_seq *s = &iter->seq;
3224         struct trace_array *tr = iter->tr;
3225
3226         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3227                 return;
3228
3229         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3230                 return;
3231
3232         if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
3233                 return;
3234
3235         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3236                 return;
3237
3238         if (iter->started)
3239                 cpumask_set_cpu(iter->cpu, iter->started);
3240
3241         /* Don't print started cpu buffer for the first entry of the trace */
3242         if (iter->idx > 1)
3243                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3244                                 iter->cpu);
3245 }
3246
3247 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3248 {
3249         struct trace_array *tr = iter->tr;
3250         struct trace_seq *s = &iter->seq;
3251         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3252         struct trace_entry *entry;
3253         struct trace_event *event;
3254
3255         entry = iter->ent;
3256
3257         test_cpu_buff_start(iter);
3258
3259         event = ftrace_find_event(entry->type);
3260
3261         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3262                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3263                         trace_print_lat_context(iter);
3264                 else
3265                         trace_print_context(iter);
3266         }
3267
3268         if (trace_seq_has_overflowed(s))
3269                 return TRACE_TYPE_PARTIAL_LINE;
3270
3271         if (event)
3272                 return event->funcs->trace(iter, sym_flags, event);
3273
3274         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3275
3276         return trace_handle_return(s);
3277 }
3278
3279 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3280 {
3281         struct trace_array *tr = iter->tr;
3282         struct trace_seq *s = &iter->seq;
3283         struct trace_entry *entry;
3284         struct trace_event *event;
3285
3286         entry = iter->ent;
3287
3288         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3289                 trace_seq_printf(s, "%d %d %llu ",
3290                                  entry->pid, iter->cpu, iter->ts);
3291
3292         if (trace_seq_has_overflowed(s))
3293                 return TRACE_TYPE_PARTIAL_LINE;
3294
3295         event = ftrace_find_event(entry->type);
3296         if (event)
3297                 return event->funcs->raw(iter, 0, event);
3298
3299         trace_seq_printf(s, "%d ?\n", entry->type);
3300
3301         return trace_handle_return(s);
3302 }
3303
3304 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3305 {
3306         struct trace_array *tr = iter->tr;
3307         struct trace_seq *s = &iter->seq;
3308         unsigned char newline = '\n';
3309         struct trace_entry *entry;
3310         struct trace_event *event;
3311
3312         entry = iter->ent;
3313
3314         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3315                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3316                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3317                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3318                 if (trace_seq_has_overflowed(s))
3319                         return TRACE_TYPE_PARTIAL_LINE;
3320         }
3321
3322         event = ftrace_find_event(entry->type);
3323         if (event) {
3324                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3325                 if (ret != TRACE_TYPE_HANDLED)
3326                         return ret;
3327         }
3328
3329         SEQ_PUT_FIELD(s, newline);
3330
3331         return trace_handle_return(s);
3332 }
3333
3334 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3335 {
3336         struct trace_array *tr = iter->tr;
3337         struct trace_seq *s = &iter->seq;
3338         struct trace_entry *entry;
3339         struct trace_event *event;
3340
3341         entry = iter->ent;
3342
3343         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3344                 SEQ_PUT_FIELD(s, entry->pid);
3345                 SEQ_PUT_FIELD(s, iter->cpu);
3346                 SEQ_PUT_FIELD(s, iter->ts);
3347                 if (trace_seq_has_overflowed(s))
3348                         return TRACE_TYPE_PARTIAL_LINE;
3349         }
3350
3351         event = ftrace_find_event(entry->type);
3352         return event ? event->funcs->binary(iter, 0, event) :
3353                 TRACE_TYPE_HANDLED;
3354 }
3355
3356 int trace_empty(struct trace_iterator *iter)
3357 {
3358         struct ring_buffer_iter *buf_iter;
3359         int cpu;
3360
3361         /* If we are looking at one CPU buffer, only check that one */
3362         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3363                 cpu = iter->cpu_file;
3364                 buf_iter = trace_buffer_iter(iter, cpu);
3365                 if (buf_iter) {
3366                         if (!ring_buffer_iter_empty(buf_iter))
3367                                 return 0;
3368                 } else {
3369                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3370                                 return 0;
3371                 }
3372                 return 1;
3373         }
3374
3375         for_each_tracing_cpu(cpu) {
3376                 buf_iter = trace_buffer_iter(iter, cpu);
3377                 if (buf_iter) {
3378                         if (!ring_buffer_iter_empty(buf_iter))
3379                                 return 0;
3380                 } else {
3381                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3382                                 return 0;
3383                 }
3384         }
3385
3386         return 1;
3387 }
3388
3389 /*  Called with trace_event_read_lock() held. */
3390 enum print_line_t print_trace_line(struct trace_iterator *iter)
3391 {
3392         struct trace_array *tr = iter->tr;
3393         unsigned long trace_flags = tr->trace_flags;
3394         enum print_line_t ret;
3395
3396         if (iter->lost_events) {
3397                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3398                                  iter->cpu, iter->lost_events);
3399                 if (trace_seq_has_overflowed(&iter->seq))
3400                         return TRACE_TYPE_PARTIAL_LINE;
3401         }
3402
3403         if (iter->trace && iter->trace->print_line) {
3404                 ret = iter->trace->print_line(iter);
3405                 if (ret != TRACE_TYPE_UNHANDLED)
3406                         return ret;
3407         }
3408
3409         if (iter->ent->type == TRACE_BPUTS &&
3410                         trace_flags & TRACE_ITER_PRINTK &&
3411                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3412                 return trace_print_bputs_msg_only(iter);
3413
3414         if (iter->ent->type == TRACE_BPRINT &&
3415                         trace_flags & TRACE_ITER_PRINTK &&
3416                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3417                 return trace_print_bprintk_msg_only(iter);
3418
3419         if (iter->ent->type == TRACE_PRINT &&
3420                         trace_flags & TRACE_ITER_PRINTK &&
3421                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3422                 return trace_print_printk_msg_only(iter);
3423
3424         if (trace_flags & TRACE_ITER_BIN)
3425                 return print_bin_fmt(iter);
3426
3427         if (trace_flags & TRACE_ITER_HEX)
3428                 return print_hex_fmt(iter);
3429
3430         if (trace_flags & TRACE_ITER_RAW)
3431                 return print_raw_fmt(iter);
3432
3433         return print_trace_fmt(iter);
3434 }
3435
3436 void trace_latency_header(struct seq_file *m)
3437 {
3438         struct trace_iterator *iter = m->private;
3439         struct trace_array *tr = iter->tr;
3440
3441         /* print nothing if the buffers are empty */
3442         if (trace_empty(iter))
3443                 return;
3444
3445         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3446                 print_trace_header(m, iter);
3447
3448         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3449                 print_lat_help_header(m);
3450 }
3451
3452 void trace_default_header(struct seq_file *m)
3453 {
3454         struct trace_iterator *iter = m->private;
3455         struct trace_array *tr = iter->tr;
3456         unsigned long trace_flags = tr->trace_flags;
3457
3458         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3459                 return;
3460
3461         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3462                 /* print nothing if the buffers are empty */
3463                 if (trace_empty(iter))
3464                         return;
3465                 print_trace_header(m, iter);
3466                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3467                         print_lat_help_header(m);
3468         } else {
3469                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3470                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3471                                 print_func_help_header_irq(iter->trace_buffer, m);
3472                         else
3473                                 print_func_help_header(iter->trace_buffer, m);
3474                 }
3475         }
3476 }
3477
3478 static void test_ftrace_alive(struct seq_file *m)
3479 {
3480         if (!ftrace_is_dead())
3481                 return;
3482         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3483                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3484 }
3485
3486 #ifdef CONFIG_TRACER_MAX_TRACE
3487 static void show_snapshot_main_help(struct seq_file *m)
3488 {
3489         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3490                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3491                     "#                      Takes a snapshot of the main buffer.\n"
3492                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3493                     "#                      (Doesn't have to be '2' works with any number that\n"
3494                     "#                       is not a '0' or '1')\n");
3495 }
3496
3497 static void show_snapshot_percpu_help(struct seq_file *m)
3498 {
3499         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3500 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3501         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3502                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3503 #else
3504         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3505                     "#                     Must use main snapshot file to allocate.\n");
3506 #endif
3507         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3508                     "#                      (Doesn't have to be '2' works with any number that\n"
3509                     "#                       is not a '0' or '1')\n");
3510 }
3511
3512 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3513 {
3514         if (iter->tr->allocated_snapshot)
3515                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3516         else
3517                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3518
3519         seq_puts(m, "# Snapshot commands:\n");
3520         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3521                 show_snapshot_main_help(m);
3522         else
3523                 show_snapshot_percpu_help(m);
3524 }
3525 #else
3526 /* Should never be called */
3527 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3528 #endif
3529
3530 static int s_show(struct seq_file *m, void *v)
3531 {
3532         struct trace_iterator *iter = v;
3533         int ret;
3534
3535         if (iter->ent == NULL) {
3536                 if (iter->tr) {
3537                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3538                         seq_puts(m, "#\n");
3539                         test_ftrace_alive(m);
3540                 }
3541                 if (iter->snapshot && trace_empty(iter))
3542                         print_snapshot_help(m, iter);
3543                 else if (iter->trace && iter->trace->print_header)
3544                         iter->trace->print_header(m);
3545                 else
3546                         trace_default_header(m);
3547
3548         } else if (iter->leftover) {
3549                 /*
3550                  * If we filled the seq_file buffer earlier, we
3551                  * want to just show it now.
3552                  */
3553                 ret = trace_print_seq(m, &iter->seq);
3554
3555                 /* ret should this time be zero, but you never know */
3556                 iter->leftover = ret;
3557
3558         } else {
3559                 print_trace_line(iter);
3560                 ret = trace_print_seq(m, &iter->seq);
3561                 /*
3562                  * If we overflow the seq_file buffer, then it will
3563                  * ask us for this data again at start up.
3564                  * Use that instead.
3565                  *  ret is 0 if seq_file write succeeded.
3566                  *        -1 otherwise.
3567                  */
3568                 iter->leftover = ret;
3569         }
3570
3571         return 0;
3572 }
3573
3574 /*
3575  * Should be used after trace_array_get(), trace_types_lock
3576  * ensures that i_cdev was already initialized.
3577  */
3578 static inline int tracing_get_cpu(struct inode *inode)
3579 {
3580         if (inode->i_cdev) /* See trace_create_cpu_file() */
3581                 return (long)inode->i_cdev - 1;
3582         return RING_BUFFER_ALL_CPUS;
3583 }
3584
3585 static const struct seq_operations tracer_seq_ops = {
3586         .start          = s_start,
3587         .next           = s_next,
3588         .stop           = s_stop,
3589         .show           = s_show,
3590 };
3591
3592 static struct trace_iterator *
3593 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3594 {
3595         struct trace_array *tr = inode->i_private;
3596         struct trace_iterator *iter;
3597         int cpu;
3598
3599         if (tracing_disabled)
3600                 return ERR_PTR(-ENODEV);
3601
3602         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3603         if (!iter)
3604                 return ERR_PTR(-ENOMEM);
3605
3606         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3607                                     GFP_KERNEL);
3608         if (!iter->buffer_iter)
3609                 goto release;
3610
3611         /*
3612          * We make a copy of the current tracer to avoid concurrent
3613          * changes on it while we are reading.
3614          */
3615         mutex_lock(&trace_types_lock);
3616         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3617         if (!iter->trace)
3618                 goto fail;
3619
3620         *iter->trace = *tr->current_trace;
3621
3622         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3623                 goto fail;
3624
3625         iter->tr = tr;
3626
3627 #ifdef CONFIG_TRACER_MAX_TRACE
3628         /* Currently only the top directory has a snapshot */
3629         if (tr->current_trace->print_max || snapshot)
3630                 iter->trace_buffer = &tr->max_buffer;
3631         else
3632 #endif
3633                 iter->trace_buffer = &tr->trace_buffer;
3634         iter->snapshot = snapshot;
3635         iter->pos = -1;
3636         iter->cpu_file = tracing_get_cpu(inode);
3637         mutex_init(&iter->mutex);
3638
3639         /* Notify the tracer early; before we stop tracing. */
3640         if (iter->trace && iter->trace->open)
3641                 iter->trace->open(iter);
3642
3643         /* Annotate start of buffers if we had overruns */
3644         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3645                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3646
3647         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3648         if (trace_clocks[tr->clock_id].in_ns)
3649                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3650
3651         /* stop the trace while dumping if we are not opening "snapshot" */
3652         if (!iter->snapshot)
3653                 tracing_stop_tr(tr);
3654
3655         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3656                 for_each_tracing_cpu(cpu) {
3657                         iter->buffer_iter[cpu] =
3658                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3659                 }
3660                 ring_buffer_read_prepare_sync();
3661                 for_each_tracing_cpu(cpu) {
3662                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3663                         tracing_iter_reset(iter, cpu);
3664                 }
3665         } else {
3666                 cpu = iter->cpu_file;
3667                 iter->buffer_iter[cpu] =
3668                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3669                 ring_buffer_read_prepare_sync();
3670                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3671                 tracing_iter_reset(iter, cpu);
3672         }
3673
3674         mutex_unlock(&trace_types_lock);
3675
3676         return iter;
3677
3678  fail:
3679         mutex_unlock(&trace_types_lock);
3680         kfree(iter->trace);
3681         kfree(iter->buffer_iter);
3682 release:
3683         seq_release_private(inode, file);
3684         return ERR_PTR(-ENOMEM);
3685 }
3686
3687 int tracing_open_generic(struct inode *inode, struct file *filp)
3688 {
3689         if (tracing_disabled)
3690                 return -ENODEV;
3691
3692         filp->private_data = inode->i_private;
3693         return 0;
3694 }
3695
3696 bool tracing_is_disabled(void)
3697 {
3698         return (tracing_disabled) ? true: false;
3699 }
3700
3701 /*
3702  * Open and update trace_array ref count.
3703  * Must have the current trace_array passed to it.
3704  */
3705 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3706 {
3707         struct trace_array *tr = inode->i_private;
3708
3709         if (tracing_disabled)
3710                 return -ENODEV;
3711
3712         if (trace_array_get(tr) < 0)
3713                 return -ENODEV;
3714
3715         filp->private_data = inode->i_private;
3716
3717         return 0;
3718 }
3719
3720 static int tracing_release(struct inode *inode, struct file *file)
3721 {
3722         struct trace_array *tr = inode->i_private;
3723         struct seq_file *m = file->private_data;
3724         struct trace_iterator *iter;
3725         int cpu;
3726
3727         if (!(file->f_mode & FMODE_READ)) {
3728                 trace_array_put(tr);
3729                 return 0;
3730         }
3731
3732         /* Writes do not use seq_file */
3733         iter = m->private;
3734         mutex_lock(&trace_types_lock);
3735
3736         for_each_tracing_cpu(cpu) {
3737                 if (iter->buffer_iter[cpu])
3738                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3739         }
3740
3741         if (iter->trace && iter->trace->close)
3742                 iter->trace->close(iter);
3743
3744         if (!iter->snapshot)
3745                 /* reenable tracing if it was previously enabled */
3746                 tracing_start_tr(tr);
3747
3748         __trace_array_put(tr);
3749
3750         mutex_unlock(&trace_types_lock);
3751
3752         mutex_destroy(&iter->mutex);
3753         free_cpumask_var(iter->started);
3754         kfree(iter->trace);
3755         kfree(iter->buffer_iter);
3756         seq_release_private(inode, file);
3757
3758         return 0;
3759 }
3760
3761 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3762 {
3763         struct trace_array *tr = inode->i_private;
3764
3765         trace_array_put(tr);
3766         return 0;
3767 }
3768
3769 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3770 {
3771         struct trace_array *tr = inode->i_private;
3772
3773         trace_array_put(tr);
3774
3775         return single_release(inode, file);
3776 }
3777
3778 static int tracing_open(struct inode *inode, struct file *file)
3779 {
3780         struct trace_array *tr = inode->i_private;
3781         struct trace_iterator *iter;
3782         int ret = 0;
3783
3784         if (trace_array_get(tr) < 0)
3785                 return -ENODEV;
3786
3787         /* If this file was open for write, then erase contents */
3788         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3789                 int cpu = tracing_get_cpu(inode);
3790
3791                 if (cpu == RING_BUFFER_ALL_CPUS)
3792                         tracing_reset_online_cpus(&tr->trace_buffer);
3793                 else
3794                         tracing_reset(&tr->trace_buffer, cpu);
3795         }
3796
3797         if (file->f_mode & FMODE_READ) {
3798                 iter = __tracing_open(inode, file, false);
3799                 if (IS_ERR(iter))
3800                         ret = PTR_ERR(iter);
3801                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3802                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3803         }
3804
3805         if (ret < 0)
3806                 trace_array_put(tr);
3807
3808         return ret;
3809 }
3810
3811 /*
3812  * Some tracers are not suitable for instance buffers.
3813  * A tracer is always available for the global array (toplevel)
3814  * or if it explicitly states that it is.
3815  */
3816 static bool
3817 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3818 {
3819         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3820 }
3821
3822 /* Find the next tracer that this trace array may use */
3823 static struct tracer *
3824 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3825 {
3826         while (t && !trace_ok_for_array(t, tr))
3827                 t = t->next;
3828
3829         return t;
3830 }
3831
3832 static void *
3833 t_next(struct seq_file *m, void *v, loff_t *pos)
3834 {
3835         struct trace_array *tr = m->private;
3836         struct tracer *t = v;
3837
3838         (*pos)++;
3839
3840         if (t)
3841                 t = get_tracer_for_array(tr, t->next);
3842
3843         return t;
3844 }
3845
3846 static void *t_start(struct seq_file *m, loff_t *pos)
3847 {
3848         struct trace_array *tr = m->private;
3849         struct tracer *t;
3850         loff_t l = 0;
3851
3852         mutex_lock(&trace_types_lock);
3853
3854         t = get_tracer_for_array(tr, trace_types);
3855         for (; t && l < *pos; t = t_next(m, t, &l))
3856                         ;
3857
3858         return t;
3859 }
3860
3861 static void t_stop(struct seq_file *m, void *p)
3862 {
3863         mutex_unlock(&trace_types_lock);
3864 }
3865
3866 static int t_show(struct seq_file *m, void *v)
3867 {
3868         struct tracer *t = v;
3869
3870         if (!t)
3871                 return 0;
3872
3873         seq_puts(m, t->name);
3874         if (t->next)
3875                 seq_putc(m, ' ');
3876         else
3877                 seq_putc(m, '\n');
3878
3879         return 0;
3880 }
3881
3882 static const struct seq_operations show_traces_seq_ops = {
3883         .start          = t_start,
3884         .next           = t_next,
3885         .stop           = t_stop,
3886         .show           = t_show,
3887 };
3888
3889 static int show_traces_open(struct inode *inode, struct file *file)
3890 {
3891         struct trace_array *tr = inode->i_private;
3892         struct seq_file *m;
3893         int ret;
3894
3895         if (tracing_disabled)
3896                 return -ENODEV;
3897
3898         ret = seq_open(file, &show_traces_seq_ops);
3899         if (ret)
3900                 return ret;
3901
3902         m = file->private_data;
3903         m->private = tr;
3904
3905         return 0;
3906 }
3907
3908 static ssize_t
3909 tracing_write_stub(struct file *filp, const char __user *ubuf,
3910                    size_t count, loff_t *ppos)
3911 {
3912         return count;
3913 }
3914
3915 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3916 {
3917         int ret;
3918
3919         if (file->f_mode & FMODE_READ)
3920                 ret = seq_lseek(file, offset, whence);
3921         else
3922                 file->f_pos = ret = 0;
3923
3924         return ret;
3925 }
3926
3927 static const struct file_operations tracing_fops = {
3928         .open           = tracing_open,
3929         .read           = seq_read,
3930         .write          = tracing_write_stub,
3931         .llseek         = tracing_lseek,
3932         .release        = tracing_release,
3933 };
3934
3935 static const struct file_operations show_traces_fops = {
3936         .open           = show_traces_open,
3937         .read           = seq_read,
3938         .release        = seq_release,
3939         .llseek         = seq_lseek,
3940 };
3941
3942 /*
3943  * The tracer itself will not take this lock, but still we want
3944  * to provide a consistent cpumask to user-space:
3945  */
3946 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3947
3948 /*
3949  * Temporary storage for the character representation of the
3950  * CPU bitmask (and one more byte for the newline):
3951  */
3952 static char mask_str[NR_CPUS + 1];
3953
3954 static ssize_t
3955 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3956                      size_t count, loff_t *ppos)
3957 {
3958         struct trace_array *tr = file_inode(filp)->i_private;
3959         int len;
3960
3961         mutex_lock(&tracing_cpumask_update_lock);
3962
3963         len = snprintf(mask_str, count, "%*pb\n",
3964                        cpumask_pr_args(tr->tracing_cpumask));
3965         if (len >= count) {
3966                 count = -EINVAL;
3967                 goto out_err;
3968         }
3969         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3970
3971 out_err:
3972         mutex_unlock(&tracing_cpumask_update_lock);
3973
3974         return count;
3975 }
3976
3977 static ssize_t
3978 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3979                       size_t count, loff_t *ppos)
3980 {
3981         struct trace_array *tr = file_inode(filp)->i_private;
3982         cpumask_var_t tracing_cpumask_new;
3983         int err, cpu;
3984
3985         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3986                 return -ENOMEM;
3987
3988         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3989         if (err)
3990                 goto err_unlock;
3991
3992         mutex_lock(&tracing_cpumask_update_lock);
3993
3994         local_irq_disable();
3995         arch_spin_lock(&tr->max_lock);
3996         for_each_tracing_cpu(cpu) {
3997                 /*
3998                  * Increase/decrease the disabled counter if we are
3999                  * about to flip a bit in the cpumask:
4000                  */
4001                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4002                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4003                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4004                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4005                 }
4006                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4007                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4008                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4009                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4010                 }
4011         }
4012         arch_spin_unlock(&tr->max_lock);
4013         local_irq_enable();
4014
4015         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4016
4017         mutex_unlock(&tracing_cpumask_update_lock);
4018         free_cpumask_var(tracing_cpumask_new);
4019
4020         return count;
4021
4022 err_unlock:
4023         free_cpumask_var(tracing_cpumask_new);
4024
4025         return err;
4026 }
4027
4028 static const struct file_operations tracing_cpumask_fops = {
4029         .open           = tracing_open_generic_tr,
4030         .read           = tracing_cpumask_read,
4031         .write          = tracing_cpumask_write,
4032         .release        = tracing_release_generic_tr,
4033         .llseek         = generic_file_llseek,
4034 };
4035
4036 static int tracing_trace_options_show(struct seq_file *m, void *v)
4037 {
4038         struct tracer_opt *trace_opts;
4039         struct trace_array *tr = m->private;
4040         u32 tracer_flags;
4041         int i;
4042
4043         mutex_lock(&trace_types_lock);
4044         tracer_flags = tr->current_trace->flags->val;
4045         trace_opts = tr->current_trace->flags->opts;
4046
4047         for (i = 0; trace_options[i]; i++) {
4048                 if (tr->trace_flags & (1 << i))
4049                         seq_printf(m, "%s\n", trace_options[i]);
4050                 else
4051                         seq_printf(m, "no%s\n", trace_options[i]);
4052         }
4053
4054         for (i = 0; trace_opts[i].name; i++) {
4055                 if (tracer_flags & trace_opts[i].bit)
4056                         seq_printf(m, "%s\n", trace_opts[i].name);
4057                 else
4058                         seq_printf(m, "no%s\n", trace_opts[i].name);
4059         }
4060         mutex_unlock(&trace_types_lock);
4061
4062         return 0;
4063 }
4064
4065 static int __set_tracer_option(struct trace_array *tr,
4066                                struct tracer_flags *tracer_flags,
4067                                struct tracer_opt *opts, int neg)
4068 {
4069         struct tracer *trace = tracer_flags->trace;
4070         int ret;
4071
4072         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4073         if (ret)
4074                 return ret;
4075
4076         if (neg)
4077                 tracer_flags->val &= ~opts->bit;
4078         else
4079                 tracer_flags->val |= opts->bit;
4080         return 0;
4081 }
4082
4083 /* Try to assign a tracer specific option */
4084 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4085 {
4086         struct tracer *trace = tr->current_trace;
4087         struct tracer_flags *tracer_flags = trace->flags;
4088         struct tracer_opt *opts = NULL;
4089         int i;
4090
4091         for (i = 0; tracer_flags->opts[i].name; i++) {
4092                 opts = &tracer_flags->opts[i];
4093
4094                 if (strcmp(cmp, opts->name) == 0)
4095                         return __set_tracer_option(tr, trace->flags, opts, neg);
4096         }
4097
4098         return -EINVAL;
4099 }
4100
4101 /* Some tracers require overwrite to stay enabled */
4102 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4103 {
4104         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4105                 return -1;
4106
4107         return 0;
4108 }
4109
4110 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4111 {
4112         /* do nothing if flag is already set */
4113         if (!!(tr->trace_flags & mask) == !!enabled)
4114                 return 0;
4115
4116         /* Give the tracer a chance to approve the change */
4117         if (tr->current_trace->flag_changed)
4118                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4119                         return -EINVAL;
4120
4121         if (enabled)
4122                 tr->trace_flags |= mask;
4123         else
4124                 tr->trace_flags &= ~mask;
4125
4126         if (mask == TRACE_ITER_RECORD_CMD)
4127                 trace_event_enable_cmd_record(enabled);
4128
4129         if (mask == TRACE_ITER_EVENT_FORK)
4130                 trace_event_follow_fork(tr, enabled);
4131
4132         if (mask == TRACE_ITER_OVERWRITE) {
4133                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4134 #ifdef CONFIG_TRACER_MAX_TRACE
4135                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4136 #endif
4137         }
4138
4139         if (mask == TRACE_ITER_PRINTK) {
4140                 trace_printk_start_stop_comm(enabled);
4141                 trace_printk_control(enabled);
4142         }
4143
4144         return 0;
4145 }
4146
4147 static int trace_set_options(struct trace_array *tr, char *option)
4148 {
4149         char *cmp;
4150         int neg = 0;
4151         int ret = -ENODEV;
4152         int i;
4153         size_t orig_len = strlen(option);
4154
4155         cmp = strstrip(option);
4156
4157         if (strncmp(cmp, "no", 2) == 0) {
4158                 neg = 1;
4159                 cmp += 2;
4160         }
4161
4162         mutex_lock(&trace_types_lock);
4163
4164         for (i = 0; trace_options[i]; i++) {
4165                 if (strcmp(cmp, trace_options[i]) == 0) {
4166                         ret = set_tracer_flag(tr, 1 << i, !neg);
4167                         break;
4168                 }
4169         }
4170
4171         /* If no option could be set, test the specific tracer options */
4172         if (!trace_options[i])
4173                 ret = set_tracer_option(tr, cmp, neg);
4174
4175         mutex_unlock(&trace_types_lock);
4176
4177         /*
4178          * If the first trailing whitespace is replaced with '\0' by strstrip,
4179          * turn it back into a space.
4180          */
4181         if (orig_len > strlen(option))
4182                 option[strlen(option)] = ' ';
4183
4184         return ret;
4185 }
4186
4187 static void __init apply_trace_boot_options(void)
4188 {
4189         char *buf = trace_boot_options_buf;
4190         char *option;
4191
4192         while (true) {
4193                 option = strsep(&buf, ",");
4194
4195                 if (!option)
4196                         break;
4197
4198                 if (*option)
4199                         trace_set_options(&global_trace, option);
4200
4201                 /* Put back the comma to allow this to be called again */
4202                 if (buf)
4203                         *(buf - 1) = ',';
4204         }
4205 }
4206
4207 static ssize_t
4208 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4209                         size_t cnt, loff_t *ppos)
4210 {
4211         struct seq_file *m = filp->private_data;
4212         struct trace_array *tr = m->private;
4213         char buf[64];
4214         int ret;
4215
4216         if (cnt >= sizeof(buf))
4217                 return -EINVAL;
4218
4219         if (copy_from_user(buf, ubuf, cnt))
4220                 return -EFAULT;
4221
4222         buf[cnt] = 0;
4223
4224         ret = trace_set_options(tr, buf);
4225         if (ret < 0)
4226                 return ret;
4227
4228         *ppos += cnt;
4229
4230         return cnt;
4231 }
4232
4233 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4234 {
4235         struct trace_array *tr = inode->i_private;
4236         int ret;
4237
4238         if (tracing_disabled)
4239                 return -ENODEV;
4240
4241         if (trace_array_get(tr) < 0)
4242                 return -ENODEV;
4243
4244         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4245         if (ret < 0)
4246                 trace_array_put(tr);
4247
4248         return ret;
4249 }
4250
4251 static const struct file_operations tracing_iter_fops = {
4252         .open           = tracing_trace_options_open,
4253         .read           = seq_read,
4254         .llseek         = seq_lseek,
4255         .release        = tracing_single_release_tr,
4256         .write          = tracing_trace_options_write,
4257 };
4258
4259 static const char readme_msg[] =
4260         "tracing mini-HOWTO:\n\n"
4261         "# echo 0 > tracing_on : quick way to disable tracing\n"
4262         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4263         " Important files:\n"
4264         "  trace\t\t\t- The static contents of the buffer\n"
4265         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4266         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4267         "  current_tracer\t- function and latency tracers\n"
4268         "  available_tracers\t- list of configured tracers for current_tracer\n"
4269         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4270         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4271         "  trace_clock\t\t-change the clock used to order events\n"
4272         "       local:   Per cpu clock but may not be synced across CPUs\n"
4273         "      global:   Synced across CPUs but slows tracing down.\n"
4274         "     counter:   Not a clock, but just an increment\n"
4275         "      uptime:   Jiffy counter from time of boot\n"
4276         "        perf:   Same clock that perf events use\n"
4277 #ifdef CONFIG_X86_64
4278         "     x86-tsc:   TSC cycle counter\n"
4279 #endif
4280         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4281         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4282         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4283         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4284         "\t\t\t  Remove sub-buffer with rmdir\n"
4285         "  trace_options\t\t- Set format or modify how tracing happens\n"
4286         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4287         "\t\t\t  option name\n"
4288         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4289 #ifdef CONFIG_DYNAMIC_FTRACE
4290         "\n  available_filter_functions - list of functions that can be filtered on\n"
4291         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4292         "\t\t\t  functions\n"
4293         "\t     accepts: func_full_name or glob-matching-pattern\n"
4294         "\t     modules: Can select a group via module\n"
4295         "\t      Format: :mod:<module-name>\n"
4296         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4297         "\t    triggers: a command to perform when function is hit\n"
4298         "\t      Format: <function>:<trigger>[:count]\n"
4299         "\t     trigger: traceon, traceoff\n"
4300         "\t\t      enable_event:<system>:<event>\n"
4301         "\t\t      disable_event:<system>:<event>\n"
4302 #ifdef CONFIG_STACKTRACE
4303         "\t\t      stacktrace\n"
4304 #endif
4305 #ifdef CONFIG_TRACER_SNAPSHOT
4306         "\t\t      snapshot\n"
4307 #endif
4308         "\t\t      dump\n"
4309         "\t\t      cpudump\n"
4310         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4311         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4312         "\t     The first one will disable tracing every time do_fault is hit\n"
4313         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4314         "\t       The first time do trap is hit and it disables tracing, the\n"
4315         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4316         "\t       the counter will not decrement. It only decrements when the\n"
4317         "\t       trigger did work\n"
4318         "\t     To remove trigger without count:\n"
4319         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4320         "\t     To remove trigger with a count:\n"
4321         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4322         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4323         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4324         "\t    modules: Can select a group via module command :mod:\n"
4325         "\t    Does not accept triggers\n"
4326 #endif /* CONFIG_DYNAMIC_FTRACE */
4327 #ifdef CONFIG_FUNCTION_TRACER
4328         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4329         "\t\t    (function)\n"
4330 #endif
4331 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4332         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4333         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4334         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4335 #endif
4336 #ifdef CONFIG_TRACER_SNAPSHOT
4337         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4338         "\t\t\t  snapshot buffer. Read the contents for more\n"
4339         "\t\t\t  information\n"
4340 #endif
4341 #ifdef CONFIG_STACK_TRACER
4342         "  stack_trace\t\t- Shows the max stack trace when active\n"
4343         "  stack_max_size\t- Shows current max stack size that was traced\n"
4344         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4345         "\t\t\t  new trace)\n"
4346 #ifdef CONFIG_DYNAMIC_FTRACE
4347         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4348         "\t\t\t  traces\n"
4349 #endif
4350 #endif /* CONFIG_STACK_TRACER */
4351 #ifdef CONFIG_KPROBE_EVENT
4352         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4353         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4354 #endif
4355 #ifdef CONFIG_UPROBE_EVENT
4356         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4357         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4358 #endif
4359 #if defined(CONFIG_KPROBE_EVENT) || defined(CONFIG_UPROBE_EVENT)
4360         "\t  accepts: event-definitions (one definition per line)\n"
4361         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4362         "\t           -:[<group>/]<event>\n"
4363 #ifdef CONFIG_KPROBE_EVENT
4364         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4365 #endif
4366 #ifdef CONFIG_UPROBE_EVENT
4367         "\t    place: <path>:<offset>\n"
4368 #endif
4369         "\t     args: <name>=fetcharg[:type]\n"
4370         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4371         "\t           $stack<index>, $stack, $retval, $comm\n"
4372         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4373         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4374 #endif
4375         "  events/\t\t- Directory containing all trace event subsystems:\n"
4376         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4377         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4378         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4379         "\t\t\t  events\n"
4380         "      filter\t\t- If set, only events passing filter are traced\n"
4381         "  events/<system>/<event>/\t- Directory containing control files for\n"
4382         "\t\t\t  <event>:\n"
4383         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4384         "      filter\t\t- If set, only events passing filter are traced\n"
4385         "      trigger\t\t- If set, a command to perform when event is hit\n"
4386         "\t    Format: <trigger>[:count][if <filter>]\n"
4387         "\t   trigger: traceon, traceoff\n"
4388         "\t            enable_event:<system>:<event>\n"
4389         "\t            disable_event:<system>:<event>\n"
4390 #ifdef CONFIG_HIST_TRIGGERS
4391         "\t            enable_hist:<system>:<event>\n"
4392         "\t            disable_hist:<system>:<event>\n"
4393 #endif
4394 #ifdef CONFIG_STACKTRACE
4395         "\t\t    stacktrace\n"
4396 #endif
4397 #ifdef CONFIG_TRACER_SNAPSHOT
4398         "\t\t    snapshot\n"
4399 #endif
4400 #ifdef CONFIG_HIST_TRIGGERS
4401         "\t\t    hist (see below)\n"
4402 #endif
4403         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4404         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4405         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4406         "\t                  events/block/block_unplug/trigger\n"
4407         "\t   The first disables tracing every time block_unplug is hit.\n"
4408         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4409         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4410         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4411         "\t   Like function triggers, the counter is only decremented if it\n"
4412         "\t    enabled or disabled tracing.\n"
4413         "\t   To remove a trigger without a count:\n"
4414         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4415         "\t   To remove a trigger with a count:\n"
4416         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4417         "\t   Filters can be ignored when removing a trigger.\n"
4418 #ifdef CONFIG_HIST_TRIGGERS
4419         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4420         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4421         "\t            [:values=<field1[,field2,...]>]\n"
4422         "\t            [:sort=<field1[,field2,...]>]\n"
4423         "\t            [:size=#entries]\n"
4424         "\t            [:pause][:continue][:clear]\n"
4425         "\t            [:name=histname1]\n"
4426         "\t            [if <filter>]\n\n"
4427         "\t    When a matching event is hit, an entry is added to a hash\n"
4428         "\t    table using the key(s) and value(s) named, and the value of a\n"
4429         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4430         "\t    correspond to fields in the event's format description.  Keys\n"
4431         "\t    can be any field, or the special string 'stacktrace'.\n"
4432         "\t    Compound keys consisting of up to two fields can be specified\n"
4433         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4434         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4435         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4436         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4437         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4438         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4439         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4440         "\t    its histogram data will be shared with other triggers of the\n"
4441         "\t    same name, and trigger hits will update this common data.\n\n"
4442         "\t    Reading the 'hist' file for the event will dump the hash\n"
4443         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4444         "\t    triggers attached to an event, there will be a table for each\n"
4445         "\t    trigger in the output.  The table displayed for a named\n"
4446         "\t    trigger will be the same as any other instance having the\n"
4447         "\t    same name.  The default format used to display a given field\n"
4448         "\t    can be modified by appending any of the following modifiers\n"
4449         "\t    to the field name, as applicable:\n\n"
4450         "\t            .hex        display a number as a hex value\n"
4451         "\t            .sym        display an address as a symbol\n"
4452         "\t            .sym-offset display an address as a symbol and offset\n"
4453         "\t            .execname   display a common_pid as a program name\n"
4454         "\t            .syscall    display a syscall id as a syscall name\n\n"
4455         "\t            .log2       display log2 value rather than raw number\n\n"
4456         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4457         "\t    trigger or to start a hist trigger but not log any events\n"
4458         "\t    until told to do so.  'continue' can be used to start or\n"
4459         "\t    restart a paused hist trigger.\n\n"
4460         "\t    The 'clear' parameter will clear the contents of a running\n"
4461         "\t    hist trigger and leave its current paused/active state\n"
4462         "\t    unchanged.\n\n"
4463         "\t    The enable_hist and disable_hist triggers can be used to\n"
4464         "\t    have one event conditionally start and stop another event's\n"
4465         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4466         "\t    the enable_event and disable_event triggers.\n"
4467 #endif
4468 ;
4469
4470 static ssize_t
4471 tracing_readme_read(struct file *filp, char __user *ubuf,
4472                        size_t cnt, loff_t *ppos)
4473 {
4474         return simple_read_from_buffer(ubuf, cnt, ppos,
4475                                         readme_msg, strlen(readme_msg));
4476 }
4477
4478 static const struct file_operations tracing_readme_fops = {
4479         .open           = tracing_open_generic,
4480         .read           = tracing_readme_read,
4481         .llseek         = generic_file_llseek,
4482 };
4483
4484 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4485 {
4486         unsigned int *ptr = v;
4487
4488         if (*pos || m->count)
4489                 ptr++;
4490
4491         (*pos)++;
4492
4493         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4494              ptr++) {
4495                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4496                         continue;
4497
4498                 return ptr;
4499         }
4500
4501         return NULL;
4502 }
4503
4504 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4505 {
4506         void *v;
4507         loff_t l = 0;
4508
4509         preempt_disable();
4510         arch_spin_lock(&trace_cmdline_lock);
4511
4512         v = &savedcmd->map_cmdline_to_pid[0];
4513         while (l <= *pos) {
4514                 v = saved_cmdlines_next(m, v, &l);
4515                 if (!v)
4516                         return NULL;
4517         }
4518
4519         return v;
4520 }
4521
4522 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4523 {
4524         arch_spin_unlock(&trace_cmdline_lock);
4525         preempt_enable();
4526 }
4527
4528 static int saved_cmdlines_show(struct seq_file *m, void *v)
4529 {
4530         char buf[TASK_COMM_LEN];
4531         unsigned int *pid = v;
4532
4533         __trace_find_cmdline(*pid, buf);
4534         seq_printf(m, "%d %s\n", *pid, buf);
4535         return 0;
4536 }
4537
4538 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4539         .start          = saved_cmdlines_start,
4540         .next           = saved_cmdlines_next,
4541         .stop           = saved_cmdlines_stop,
4542         .show           = saved_cmdlines_show,
4543 };
4544
4545 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4546 {
4547         if (tracing_disabled)
4548                 return -ENODEV;
4549
4550         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4551 }
4552
4553 static const struct file_operations tracing_saved_cmdlines_fops = {
4554         .open           = tracing_saved_cmdlines_open,
4555         .read           = seq_read,
4556         .llseek         = seq_lseek,
4557         .release        = seq_release,
4558 };
4559
4560 static ssize_t
4561 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4562                                  size_t cnt, loff_t *ppos)
4563 {
4564         char buf[64];
4565         int r;
4566
4567         arch_spin_lock(&trace_cmdline_lock);
4568         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4569         arch_spin_unlock(&trace_cmdline_lock);
4570
4571         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4572 }
4573
4574 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4575 {
4576         kfree(s->saved_cmdlines);
4577         kfree(s->map_cmdline_to_pid);
4578         kfree(s);
4579 }
4580
4581 static int tracing_resize_saved_cmdlines(unsigned int val)
4582 {
4583         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4584
4585         s = kmalloc(sizeof(*s), GFP_KERNEL);
4586         if (!s)
4587                 return -ENOMEM;
4588
4589         if (allocate_cmdlines_buffer(val, s) < 0) {
4590                 kfree(s);
4591                 return -ENOMEM;
4592         }
4593
4594         arch_spin_lock(&trace_cmdline_lock);
4595         savedcmd_temp = savedcmd;
4596         savedcmd = s;
4597         arch_spin_unlock(&trace_cmdline_lock);
4598         free_saved_cmdlines_buffer(savedcmd_temp);
4599
4600         return 0;
4601 }
4602
4603 static ssize_t
4604 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4605                                   size_t cnt, loff_t *ppos)
4606 {
4607         unsigned long val;
4608         int ret;
4609
4610         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4611         if (ret)
4612                 return ret;
4613
4614         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4615         if (!val || val > PID_MAX_DEFAULT)
4616                 return -EINVAL;
4617
4618         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4619         if (ret < 0)
4620                 return ret;
4621
4622         *ppos += cnt;
4623
4624         return cnt;
4625 }
4626
4627 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4628         .open           = tracing_open_generic,
4629         .read           = tracing_saved_cmdlines_size_read,
4630         .write          = tracing_saved_cmdlines_size_write,
4631 };
4632
4633 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
4634 static union trace_enum_map_item *
4635 update_enum_map(union trace_enum_map_item *ptr)
4636 {
4637         if (!ptr->map.enum_string) {
4638                 if (ptr->tail.next) {
4639                         ptr = ptr->tail.next;
4640                         /* Set ptr to the next real item (skip head) */
4641                         ptr++;
4642                 } else
4643                         return NULL;
4644         }
4645         return ptr;
4646 }
4647
4648 static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
4649 {
4650         union trace_enum_map_item *ptr = v;
4651
4652         /*
4653          * Paranoid! If ptr points to end, we don't want to increment past it.
4654          * This really should never happen.
4655          */
4656         ptr = update_enum_map(ptr);
4657         if (WARN_ON_ONCE(!ptr))
4658                 return NULL;
4659
4660         ptr++;
4661
4662         (*pos)++;
4663
4664         ptr = update_enum_map(ptr);
4665
4666         return ptr;
4667 }
4668
4669 static void *enum_map_start(struct seq_file *m, loff_t *pos)
4670 {
4671         union trace_enum_map_item *v;
4672         loff_t l = 0;
4673
4674         mutex_lock(&trace_enum_mutex);
4675
4676         v = trace_enum_maps;
4677         if (v)
4678                 v++;
4679
4680         while (v && l < *pos) {
4681                 v = enum_map_next(m, v, &l);
4682         }
4683
4684         return v;
4685 }
4686
4687 static void enum_map_stop(struct seq_file *m, void *v)
4688 {
4689         mutex_unlock(&trace_enum_mutex);
4690 }
4691
4692 static int enum_map_show(struct seq_file *m, void *v)
4693 {
4694         union trace_enum_map_item *ptr = v;
4695
4696         seq_printf(m, "%s %ld (%s)\n",
4697                    ptr->map.enum_string, ptr->map.enum_value,
4698                    ptr->map.system);
4699
4700         return 0;
4701 }
4702
4703 static const struct seq_operations tracing_enum_map_seq_ops = {
4704         .start          = enum_map_start,
4705         .next           = enum_map_next,
4706         .stop           = enum_map_stop,
4707         .show           = enum_map_show,
4708 };
4709
4710 static int tracing_enum_map_open(struct inode *inode, struct file *filp)
4711 {
4712         if (tracing_disabled)
4713                 return -ENODEV;
4714
4715         return seq_open(filp, &tracing_enum_map_seq_ops);
4716 }
4717
4718 static const struct file_operations tracing_enum_map_fops = {
4719         .open           = tracing_enum_map_open,
4720         .read           = seq_read,
4721         .llseek         = seq_lseek,
4722         .release        = seq_release,
4723 };
4724
4725 static inline union trace_enum_map_item *
4726 trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
4727 {
4728         /* Return tail of array given the head */
4729         return ptr + ptr->head.length + 1;
4730 }
4731
4732 static void
4733 trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
4734                            int len)
4735 {
4736         struct trace_enum_map **stop;
4737         struct trace_enum_map **map;
4738         union trace_enum_map_item *map_array;
4739         union trace_enum_map_item *ptr;
4740
4741         stop = start + len;
4742
4743         /*
4744          * The trace_enum_maps contains the map plus a head and tail item,
4745          * where the head holds the module and length of array, and the
4746          * tail holds a pointer to the next list.
4747          */
4748         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4749         if (!map_array) {
4750                 pr_warn("Unable to allocate trace enum mapping\n");
4751                 return;
4752         }
4753
4754         mutex_lock(&trace_enum_mutex);
4755
4756         if (!trace_enum_maps)
4757                 trace_enum_maps = map_array;
4758         else {
4759                 ptr = trace_enum_maps;
4760                 for (;;) {
4761                         ptr = trace_enum_jmp_to_tail(ptr);
4762                         if (!ptr->tail.next)
4763                                 break;
4764                         ptr = ptr->tail.next;
4765
4766                 }
4767                 ptr->tail.next = map_array;
4768         }
4769         map_array->head.mod = mod;
4770         map_array->head.length = len;
4771         map_array++;
4772
4773         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4774                 map_array->map = **map;
4775                 map_array++;
4776         }
4777         memset(map_array, 0, sizeof(*map_array));
4778
4779         mutex_unlock(&trace_enum_mutex);
4780 }
4781
4782 static void trace_create_enum_file(struct dentry *d_tracer)
4783 {
4784         trace_create_file("enum_map", 0444, d_tracer,
4785                           NULL, &tracing_enum_map_fops);
4786 }
4787
4788 #else /* CONFIG_TRACE_ENUM_MAP_FILE */
4789 static inline void trace_create_enum_file(struct dentry *d_tracer) { }
4790 static inline void trace_insert_enum_map_file(struct module *mod,
4791                               struct trace_enum_map **start, int len) { }
4792 #endif /* !CONFIG_TRACE_ENUM_MAP_FILE */
4793
4794 static void trace_insert_enum_map(struct module *mod,
4795                                   struct trace_enum_map **start, int len)
4796 {
4797         struct trace_enum_map **map;
4798
4799         if (len <= 0)
4800                 return;
4801
4802         map = start;
4803
4804         trace_event_enum_update(map, len);
4805
4806         trace_insert_enum_map_file(mod, start, len);
4807 }
4808
4809 static ssize_t
4810 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4811                        size_t cnt, loff_t *ppos)
4812 {
4813         struct trace_array *tr = filp->private_data;
4814         char buf[MAX_TRACER_SIZE+2];
4815         int r;
4816
4817         mutex_lock(&trace_types_lock);
4818         r = sprintf(buf, "%s\n", tr->current_trace->name);
4819         mutex_unlock(&trace_types_lock);
4820
4821         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4822 }
4823
4824 int tracer_init(struct tracer *t, struct trace_array *tr)
4825 {
4826         tracing_reset_online_cpus(&tr->trace_buffer);
4827         return t->init(tr);
4828 }
4829
4830 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4831 {
4832         int cpu;
4833
4834         for_each_tracing_cpu(cpu)
4835                 per_cpu_ptr(buf->data, cpu)->entries = val;
4836 }
4837
4838 #ifdef CONFIG_TRACER_MAX_TRACE
4839 /* resize @tr's buffer to the size of @size_tr's entries */
4840 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4841                                         struct trace_buffer *size_buf, int cpu_id)
4842 {
4843         int cpu, ret = 0;
4844
4845         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4846                 for_each_tracing_cpu(cpu) {
4847                         ret = ring_buffer_resize(trace_buf->buffer,
4848                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4849                         if (ret < 0)
4850                                 break;
4851                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4852                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4853                 }
4854         } else {
4855                 ret = ring_buffer_resize(trace_buf->buffer,
4856                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4857                 if (ret == 0)
4858                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4859                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4860         }
4861
4862         return ret;
4863 }
4864 #endif /* CONFIG_TRACER_MAX_TRACE */
4865
4866 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4867                                         unsigned long size, int cpu)
4868 {
4869         int ret;
4870
4871         /*
4872          * If kernel or user changes the size of the ring buffer
4873          * we use the size that was given, and we can forget about
4874          * expanding it later.
4875          */
4876         ring_buffer_expanded = true;
4877
4878         /* May be called before buffers are initialized */
4879         if (!tr->trace_buffer.buffer)
4880                 return 0;
4881
4882         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4883         if (ret < 0)
4884                 return ret;
4885
4886 #ifdef CONFIG_TRACER_MAX_TRACE
4887         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
4888             !tr->current_trace->use_max_tr)
4889                 goto out;
4890
4891         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4892         if (ret < 0) {
4893                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
4894                                                      &tr->trace_buffer, cpu);
4895                 if (r < 0) {
4896                         /*
4897                          * AARGH! We are left with different
4898                          * size max buffer!!!!
4899                          * The max buffer is our "snapshot" buffer.
4900                          * When a tracer needs a snapshot (one of the
4901                          * latency tracers), it swaps the max buffer
4902                          * with the saved snap shot. We succeeded to
4903                          * update the size of the main buffer, but failed to
4904                          * update the size of the max buffer. But when we tried
4905                          * to reset the main buffer to the original size, we
4906                          * failed there too. This is very unlikely to
4907                          * happen, but if it does, warn and kill all
4908                          * tracing.
4909                          */
4910                         WARN_ON(1);
4911                         tracing_disabled = 1;
4912                 }
4913                 return ret;
4914         }
4915
4916         if (cpu == RING_BUFFER_ALL_CPUS)
4917                 set_buffer_entries(&tr->max_buffer, size);
4918         else
4919                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4920
4921  out:
4922 #endif /* CONFIG_TRACER_MAX_TRACE */
4923
4924         if (cpu == RING_BUFFER_ALL_CPUS)
4925                 set_buffer_entries(&tr->trace_buffer, size);
4926         else
4927                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4928
4929         return ret;
4930 }
4931
4932 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4933                                           unsigned long size, int cpu_id)
4934 {
4935         int ret = size;
4936
4937         mutex_lock(&trace_types_lock);
4938
4939         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4940                 /* make sure, this cpu is enabled in the mask */
4941                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4942                         ret = -EINVAL;
4943                         goto out;
4944                 }
4945         }
4946
4947         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4948         if (ret < 0)
4949                 ret = -ENOMEM;
4950
4951 out:
4952         mutex_unlock(&trace_types_lock);
4953
4954         return ret;
4955 }
4956
4957
4958 /**
4959  * tracing_update_buffers - used by tracing facility to expand ring buffers
4960  *
4961  * To save on memory when the tracing is never used on a system with it
4962  * configured in. The ring buffers are set to a minimum size. But once
4963  * a user starts to use the tracing facility, then they need to grow
4964  * to their default size.
4965  *
4966  * This function is to be called when a tracer is about to be used.
4967  */
4968 int tracing_update_buffers(void)
4969 {
4970         int ret = 0;
4971
4972         mutex_lock(&trace_types_lock);
4973         if (!ring_buffer_expanded)
4974                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4975                                                 RING_BUFFER_ALL_CPUS);
4976         mutex_unlock(&trace_types_lock);
4977
4978         return ret;
4979 }
4980
4981 struct trace_option_dentry;
4982
4983 static void
4984 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4985
4986 /*
4987  * Used to clear out the tracer before deletion of an instance.
4988  * Must have trace_types_lock held.
4989  */
4990 static void tracing_set_nop(struct trace_array *tr)
4991 {
4992         if (tr->current_trace == &nop_trace)
4993                 return;
4994         
4995         tr->current_trace->enabled--;
4996
4997         if (tr->current_trace->reset)
4998                 tr->current_trace->reset(tr);
4999
5000         tr->current_trace = &nop_trace;
5001 }
5002
5003 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5004 {
5005         /* Only enable if the directory has been created already. */
5006         if (!tr->dir)
5007                 return;
5008
5009         create_trace_option_files(tr, t);
5010 }
5011
5012 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5013 {
5014         struct tracer *t;
5015 #ifdef CONFIG_TRACER_MAX_TRACE
5016         bool had_max_tr;
5017 #endif
5018         int ret = 0;
5019
5020         mutex_lock(&trace_types_lock);
5021
5022         if (!ring_buffer_expanded) {
5023                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5024                                                 RING_BUFFER_ALL_CPUS);
5025                 if (ret < 0)
5026                         goto out;
5027                 ret = 0;
5028         }
5029
5030         for (t = trace_types; t; t = t->next) {
5031                 if (strcmp(t->name, buf) == 0)
5032                         break;
5033         }
5034         if (!t) {
5035                 ret = -EINVAL;
5036                 goto out;
5037         }
5038         if (t == tr->current_trace)
5039                 goto out;
5040
5041         /* Some tracers are only allowed for the top level buffer */
5042         if (!trace_ok_for_array(t, tr)) {
5043                 ret = -EINVAL;
5044                 goto out;
5045         }
5046
5047         /* If trace pipe files are being read, we can't change the tracer */
5048         if (tr->current_trace->ref) {
5049                 ret = -EBUSY;
5050                 goto out;
5051         }
5052
5053         trace_branch_disable();
5054
5055         tr->current_trace->enabled--;
5056
5057         if (tr->current_trace->reset)
5058                 tr->current_trace->reset(tr);
5059
5060         /* Current trace needs to be nop_trace before synchronize_sched */
5061         tr->current_trace = &nop_trace;
5062
5063 #ifdef CONFIG_TRACER_MAX_TRACE
5064         had_max_tr = tr->allocated_snapshot;
5065
5066         if (had_max_tr && !t->use_max_tr) {
5067                 /*
5068                  * We need to make sure that the update_max_tr sees that
5069                  * current_trace changed to nop_trace to keep it from
5070                  * swapping the buffers after we resize it.
5071                  * The update_max_tr is called from interrupts disabled
5072                  * so a synchronized_sched() is sufficient.
5073                  */
5074                 synchronize_sched();
5075                 free_snapshot(tr);
5076         }
5077 #endif
5078
5079 #ifdef CONFIG_TRACER_MAX_TRACE
5080         if (t->use_max_tr && !had_max_tr) {
5081                 ret = alloc_snapshot(tr);
5082                 if (ret < 0)
5083                         goto out;
5084         }
5085 #endif
5086
5087         if (t->init) {
5088                 ret = tracer_init(t, tr);
5089                 if (ret)
5090                         goto out;
5091         }
5092
5093         tr->current_trace = t;
5094         tr->current_trace->enabled++;
5095         trace_branch_enable(tr);
5096  out:
5097         mutex_unlock(&trace_types_lock);
5098
5099         return ret;
5100 }
5101
5102 static ssize_t
5103 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5104                         size_t cnt, loff_t *ppos)
5105 {
5106         struct trace_array *tr = filp->private_data;
5107         char buf[MAX_TRACER_SIZE+1];
5108         int i;
5109         size_t ret;
5110         int err;
5111
5112         ret = cnt;
5113
5114         if (cnt > MAX_TRACER_SIZE)
5115                 cnt = MAX_TRACER_SIZE;
5116
5117         if (copy_from_user(buf, ubuf, cnt))
5118                 return -EFAULT;
5119
5120         buf[cnt] = 0;
5121
5122         /* strip ending whitespace. */
5123         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5124                 buf[i] = 0;
5125
5126         err = tracing_set_tracer(tr, buf);
5127         if (err)
5128                 return err;
5129
5130         *ppos += ret;
5131
5132         return ret;
5133 }
5134
5135 static ssize_t
5136 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5137                    size_t cnt, loff_t *ppos)
5138 {
5139         char buf[64];
5140         int r;
5141
5142         r = snprintf(buf, sizeof(buf), "%ld\n",
5143                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5144         if (r > sizeof(buf))
5145                 r = sizeof(buf);
5146         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5147 }
5148
5149 static ssize_t
5150 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5151                     size_t cnt, loff_t *ppos)
5152 {
5153         unsigned long val;
5154         int ret;
5155
5156         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5157         if (ret)
5158                 return ret;
5159
5160         *ptr = val * 1000;
5161
5162         return cnt;
5163 }
5164
5165 static ssize_t
5166 tracing_thresh_read(struct file *filp, char __user *ubuf,
5167                     size_t cnt, loff_t *ppos)
5168 {
5169         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5170 }
5171
5172 static ssize_t
5173 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5174                      size_t cnt, loff_t *ppos)
5175 {
5176         struct trace_array *tr = filp->private_data;
5177         int ret;
5178
5179         mutex_lock(&trace_types_lock);
5180         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5181         if (ret < 0)
5182                 goto out;
5183
5184         if (tr->current_trace->update_thresh) {
5185                 ret = tr->current_trace->update_thresh(tr);
5186                 if (ret < 0)
5187                         goto out;
5188         }
5189
5190         ret = cnt;
5191 out:
5192         mutex_unlock(&trace_types_lock);
5193
5194         return ret;
5195 }
5196
5197 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5198
5199 static ssize_t
5200 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5201                      size_t cnt, loff_t *ppos)
5202 {
5203         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5204 }
5205
5206 static ssize_t
5207 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5208                       size_t cnt, loff_t *ppos)
5209 {
5210         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5211 }
5212
5213 #endif
5214
5215 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5216 {
5217         struct trace_array *tr = inode->i_private;
5218         struct trace_iterator *iter;
5219         int ret = 0;
5220
5221         if (tracing_disabled)
5222                 return -ENODEV;
5223
5224         if (trace_array_get(tr) < 0)
5225                 return -ENODEV;
5226
5227         mutex_lock(&trace_types_lock);
5228
5229         /* create a buffer to store the information to pass to userspace */
5230         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5231         if (!iter) {
5232                 ret = -ENOMEM;
5233                 __trace_array_put(tr);
5234                 goto out;
5235         }
5236
5237         trace_seq_init(&iter->seq);
5238         iter->trace = tr->current_trace;
5239
5240         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5241                 ret = -ENOMEM;
5242                 goto fail;
5243         }
5244
5245         /* trace pipe does not show start of buffer */
5246         cpumask_setall(iter->started);
5247
5248         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5249                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5250
5251         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5252         if (trace_clocks[tr->clock_id].in_ns)
5253                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5254
5255         iter->tr = tr;
5256         iter->trace_buffer = &tr->trace_buffer;
5257         iter->cpu_file = tracing_get_cpu(inode);
5258         mutex_init(&iter->mutex);
5259         filp->private_data = iter;
5260
5261         if (iter->trace->pipe_open)
5262                 iter->trace->pipe_open(iter);
5263
5264         nonseekable_open(inode, filp);
5265
5266         tr->current_trace->ref++;
5267 out:
5268         mutex_unlock(&trace_types_lock);
5269         return ret;
5270
5271 fail:
5272         kfree(iter->trace);
5273         kfree(iter);
5274         __trace_array_put(tr);
5275         mutex_unlock(&trace_types_lock);
5276         return ret;
5277 }
5278
5279 static int tracing_release_pipe(struct inode *inode, struct file *file)
5280 {
5281         struct trace_iterator *iter = file->private_data;
5282         struct trace_array *tr = inode->i_private;
5283
5284         mutex_lock(&trace_types_lock);
5285
5286         tr->current_trace->ref--;
5287
5288         if (iter->trace->pipe_close)
5289                 iter->trace->pipe_close(iter);
5290
5291         mutex_unlock(&trace_types_lock);
5292
5293         free_cpumask_var(iter->started);
5294         mutex_destroy(&iter->mutex);
5295         kfree(iter);
5296
5297         trace_array_put(tr);
5298
5299         return 0;
5300 }
5301
5302 static unsigned int
5303 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5304 {
5305         struct trace_array *tr = iter->tr;
5306
5307         /* Iterators are static, they should be filled or empty */
5308         if (trace_buffer_iter(iter, iter->cpu_file))
5309                 return POLLIN | POLLRDNORM;
5310
5311         if (tr->trace_flags & TRACE_ITER_BLOCK)
5312                 /*
5313                  * Always select as readable when in blocking mode
5314                  */
5315                 return POLLIN | POLLRDNORM;
5316         else
5317                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5318                                              filp, poll_table);
5319 }
5320
5321 static unsigned int
5322 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5323 {
5324         struct trace_iterator *iter = filp->private_data;
5325
5326         return trace_poll(iter, filp, poll_table);
5327 }
5328
5329 /* Must be called with iter->mutex held. */
5330 static int tracing_wait_pipe(struct file *filp)
5331 {
5332         struct trace_iterator *iter = filp->private_data;
5333         int ret;
5334
5335         while (trace_empty(iter)) {
5336
5337                 if ((filp->f_flags & O_NONBLOCK)) {
5338                         return -EAGAIN;
5339                 }
5340
5341                 /*
5342                  * We block until we read something and tracing is disabled.
5343                  * We still block if tracing is disabled, but we have never
5344                  * read anything. This allows a user to cat this file, and
5345                  * then enable tracing. But after we have read something,
5346                  * we give an EOF when tracing is again disabled.
5347                  *
5348                  * iter->pos will be 0 if we haven't read anything.
5349                  */
5350                 if (!tracing_is_on() && iter->pos)
5351                         break;
5352
5353                 mutex_unlock(&iter->mutex);
5354
5355                 ret = wait_on_pipe(iter, false);
5356
5357                 mutex_lock(&iter->mutex);
5358
5359                 if (ret)
5360                         return ret;
5361         }
5362
5363         return 1;
5364 }
5365
5366 /*
5367  * Consumer reader.
5368  */
5369 static ssize_t
5370 tracing_read_pipe(struct file *filp, char __user *ubuf,
5371                   size_t cnt, loff_t *ppos)
5372 {
5373         struct trace_iterator *iter = filp->private_data;
5374         ssize_t sret;
5375
5376         /*
5377          * Avoid more than one consumer on a single file descriptor
5378          * This is just a matter of traces coherency, the ring buffer itself
5379          * is protected.
5380          */
5381         mutex_lock(&iter->mutex);
5382
5383         /* return any leftover data */
5384         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5385         if (sret != -EBUSY)
5386                 goto out;
5387
5388         trace_seq_init(&iter->seq);
5389
5390         if (iter->trace->read) {
5391                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5392                 if (sret)
5393                         goto out;
5394         }
5395
5396 waitagain:
5397         sret = tracing_wait_pipe(filp);
5398         if (sret <= 0)
5399                 goto out;
5400
5401         /* stop when tracing is finished */
5402         if (trace_empty(iter)) {
5403                 sret = 0;
5404                 goto out;
5405         }
5406
5407         if (cnt >= PAGE_SIZE)
5408                 cnt = PAGE_SIZE - 1;
5409
5410         /* reset all but tr, trace, and overruns */
5411         memset(&iter->seq, 0,
5412                sizeof(struct trace_iterator) -
5413                offsetof(struct trace_iterator, seq));
5414         cpumask_clear(iter->started);
5415         iter->pos = -1;
5416
5417         trace_event_read_lock();
5418         trace_access_lock(iter->cpu_file);
5419         while (trace_find_next_entry_inc(iter) != NULL) {
5420                 enum print_line_t ret;
5421                 int save_len = iter->seq.seq.len;
5422
5423                 ret = print_trace_line(iter);
5424                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5425                         /* don't print partial lines */
5426                         iter->seq.seq.len = save_len;
5427                         break;
5428                 }
5429                 if (ret != TRACE_TYPE_NO_CONSUME)
5430                         trace_consume(iter);
5431
5432                 if (trace_seq_used(&iter->seq) >= cnt)
5433                         break;
5434
5435                 /*
5436                  * Setting the full flag means we reached the trace_seq buffer
5437                  * size and we should leave by partial output condition above.
5438                  * One of the trace_seq_* functions is not used properly.
5439                  */
5440                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5441                           iter->ent->type);
5442         }
5443         trace_access_unlock(iter->cpu_file);
5444         trace_event_read_unlock();
5445
5446         /* Now copy what we have to the user */
5447         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5448         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5449                 trace_seq_init(&iter->seq);
5450
5451         /*
5452          * If there was nothing to send to user, in spite of consuming trace
5453          * entries, go back to wait for more entries.
5454          */
5455         if (sret == -EBUSY)
5456                 goto waitagain;
5457
5458 out:
5459         mutex_unlock(&iter->mutex);
5460
5461         return sret;
5462 }
5463
5464 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5465                                      unsigned int idx)
5466 {
5467         __free_page(spd->pages[idx]);
5468 }
5469
5470 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5471         .can_merge              = 0,
5472         .confirm                = generic_pipe_buf_confirm,
5473         .release                = generic_pipe_buf_release,
5474         .steal                  = generic_pipe_buf_steal,
5475         .get                    = generic_pipe_buf_get,
5476 };
5477
5478 static size_t
5479 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5480 {
5481         size_t count;
5482         int save_len;
5483         int ret;
5484
5485         /* Seq buffer is page-sized, exactly what we need. */
5486         for (;;) {
5487                 save_len = iter->seq.seq.len;
5488                 ret = print_trace_line(iter);
5489
5490                 if (trace_seq_has_overflowed(&iter->seq)) {
5491                         iter->seq.seq.len = save_len;
5492                         break;
5493                 }
5494
5495                 /*
5496                  * This should not be hit, because it should only
5497                  * be set if the iter->seq overflowed. But check it
5498                  * anyway to be safe.
5499                  */
5500                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5501                         iter->seq.seq.len = save_len;
5502                         break;
5503                 }
5504
5505                 count = trace_seq_used(&iter->seq) - save_len;
5506                 if (rem < count) {
5507                         rem = 0;
5508                         iter->seq.seq.len = save_len;
5509                         break;
5510                 }
5511
5512                 if (ret != TRACE_TYPE_NO_CONSUME)
5513                         trace_consume(iter);
5514                 rem -= count;
5515                 if (!trace_find_next_entry_inc(iter))   {
5516                         rem = 0;
5517                         iter->ent = NULL;
5518                         break;
5519                 }
5520         }
5521
5522         return rem;
5523 }
5524
5525 static ssize_t tracing_splice_read_pipe(struct file *filp,
5526                                         loff_t *ppos,
5527                                         struct pipe_inode_info *pipe,
5528                                         size_t len,
5529                                         unsigned int flags)
5530 {
5531         struct page *pages_def[PIPE_DEF_BUFFERS];
5532         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5533         struct trace_iterator *iter = filp->private_data;
5534         struct splice_pipe_desc spd = {
5535                 .pages          = pages_def,
5536                 .partial        = partial_def,
5537                 .nr_pages       = 0, /* This gets updated below. */
5538                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5539                 .flags          = flags,
5540                 .ops            = &tracing_pipe_buf_ops,
5541                 .spd_release    = tracing_spd_release_pipe,
5542         };
5543         ssize_t ret;
5544         size_t rem;
5545         unsigned int i;
5546
5547         if (splice_grow_spd(pipe, &spd))
5548                 return -ENOMEM;
5549
5550         mutex_lock(&iter->mutex);
5551
5552         if (iter->trace->splice_read) {
5553                 ret = iter->trace->splice_read(iter, filp,
5554                                                ppos, pipe, len, flags);
5555                 if (ret)
5556                         goto out_err;
5557         }
5558
5559         ret = tracing_wait_pipe(filp);
5560         if (ret <= 0)
5561                 goto out_err;
5562
5563         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5564                 ret = -EFAULT;
5565                 goto out_err;
5566         }
5567
5568         trace_event_read_lock();
5569         trace_access_lock(iter->cpu_file);
5570
5571         /* Fill as many pages as possible. */
5572         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5573                 spd.pages[i] = alloc_page(GFP_KERNEL);
5574                 if (!spd.pages[i])
5575                         break;
5576
5577                 rem = tracing_fill_pipe_page(rem, iter);
5578
5579                 /* Copy the data into the page, so we can start over. */
5580                 ret = trace_seq_to_buffer(&iter->seq,
5581                                           page_address(spd.pages[i]),
5582                                           trace_seq_used(&iter->seq));
5583                 if (ret < 0) {
5584                         __free_page(spd.pages[i]);
5585                         break;
5586                 }
5587                 spd.partial[i].offset = 0;
5588                 spd.partial[i].len = trace_seq_used(&iter->seq);
5589
5590                 trace_seq_init(&iter->seq);
5591         }
5592
5593         trace_access_unlock(iter->cpu_file);
5594         trace_event_read_unlock();
5595         mutex_unlock(&iter->mutex);
5596
5597         spd.nr_pages = i;
5598
5599         if (i)
5600                 ret = splice_to_pipe(pipe, &spd);
5601         else
5602                 ret = 0;
5603 out:
5604         splice_shrink_spd(&spd);
5605         return ret;
5606
5607 out_err:
5608         mutex_unlock(&iter->mutex);
5609         goto out;
5610 }
5611
5612 static ssize_t
5613 tracing_entries_read(struct file *filp, char __user *ubuf,
5614                      size_t cnt, loff_t *ppos)
5615 {
5616         struct inode *inode = file_inode(filp);
5617         struct trace_array *tr = inode->i_private;
5618         int cpu = tracing_get_cpu(inode);
5619         char buf[64];
5620         int r = 0;
5621         ssize_t ret;
5622
5623         mutex_lock(&trace_types_lock);
5624
5625         if (cpu == RING_BUFFER_ALL_CPUS) {
5626                 int cpu, buf_size_same;
5627                 unsigned long size;
5628
5629                 size = 0;
5630                 buf_size_same = 1;
5631                 /* check if all cpu sizes are same */
5632                 for_each_tracing_cpu(cpu) {
5633                         /* fill in the size from first enabled cpu */
5634                         if (size == 0)
5635                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5636                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5637                                 buf_size_same = 0;
5638                                 break;
5639                         }
5640                 }
5641
5642                 if (buf_size_same) {
5643                         if (!ring_buffer_expanded)
5644                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5645                                             size >> 10,
5646                                             trace_buf_size >> 10);
5647                         else
5648                                 r = sprintf(buf, "%lu\n", size >> 10);
5649                 } else
5650                         r = sprintf(buf, "X\n");
5651         } else
5652                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5653
5654         mutex_unlock(&trace_types_lock);
5655
5656         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5657         return ret;
5658 }
5659
5660 static ssize_t
5661 tracing_entries_write(struct file *filp, const char __user *ubuf,
5662                       size_t cnt, loff_t *ppos)
5663 {
5664         struct inode *inode = file_inode(filp);
5665         struct trace_array *tr = inode->i_private;
5666         unsigned long val;
5667         int ret;
5668
5669         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5670         if (ret)
5671                 return ret;
5672
5673         /* must have at least 1 entry */
5674         if (!val)
5675                 return -EINVAL;
5676
5677         /* value is in KB */
5678         val <<= 10;
5679         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5680         if (ret < 0)
5681                 return ret;
5682
5683         *ppos += cnt;
5684
5685         return cnt;
5686 }
5687
5688 static ssize_t
5689 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5690                                 size_t cnt, loff_t *ppos)
5691 {
5692         struct trace_array *tr = filp->private_data;
5693         char buf[64];
5694         int r, cpu;
5695         unsigned long size = 0, expanded_size = 0;
5696
5697         mutex_lock(&trace_types_lock);
5698         for_each_tracing_cpu(cpu) {
5699                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5700                 if (!ring_buffer_expanded)
5701                         expanded_size += trace_buf_size >> 10;
5702         }
5703         if (ring_buffer_expanded)
5704                 r = sprintf(buf, "%lu\n", size);
5705         else
5706                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5707         mutex_unlock(&trace_types_lock);
5708
5709         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5710 }
5711
5712 static ssize_t
5713 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5714                           size_t cnt, loff_t *ppos)
5715 {
5716         /*
5717          * There is no need to read what the user has written, this function
5718          * is just to make sure that there is no error when "echo" is used
5719          */
5720
5721         *ppos += cnt;
5722
5723         return cnt;
5724 }
5725
5726 static int
5727 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5728 {
5729         struct trace_array *tr = inode->i_private;
5730
5731         /* disable tracing ? */
5732         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5733                 tracer_tracing_off(tr);
5734         /* resize the ring buffer to 0 */
5735         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5736
5737         trace_array_put(tr);
5738
5739         return 0;
5740 }
5741
5742 static ssize_t
5743 tracing_mark_write(struct file *filp, const char __user *ubuf,
5744                                         size_t cnt, loff_t *fpos)
5745 {
5746         struct trace_array *tr = filp->private_data;
5747         struct ring_buffer_event *event;
5748         struct ring_buffer *buffer;
5749         struct print_entry *entry;
5750         unsigned long irq_flags;
5751         const char faulted[] = "<faulted>";
5752         ssize_t written;
5753         int size;
5754         int len;
5755
5756 /* Used in tracing_mark_raw_write() as well */
5757 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5758
5759         if (tracing_disabled)
5760                 return -EINVAL;
5761
5762         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5763                 return -EINVAL;
5764
5765         if (cnt > TRACE_BUF_SIZE)
5766                 cnt = TRACE_BUF_SIZE;
5767
5768         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5769
5770         local_save_flags(irq_flags);
5771         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5772
5773         /* If less than "<faulted>", then make sure we can still add that */
5774         if (cnt < FAULTED_SIZE)
5775                 size += FAULTED_SIZE - cnt;
5776
5777         buffer = tr->trace_buffer.buffer;
5778         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5779                                             irq_flags, preempt_count());
5780         if (unlikely(!event))
5781                 /* Ring buffer disabled, return as if not open for write */
5782                 return -EBADF;
5783
5784         entry = ring_buffer_event_data(event);
5785         entry->ip = _THIS_IP_;
5786
5787         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5788         if (len) {
5789                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5790                 cnt = FAULTED_SIZE;
5791                 written = -EFAULT;
5792         } else
5793                 written = cnt;
5794         len = cnt;
5795
5796         if (entry->buf[cnt - 1] != '\n') {
5797                 entry->buf[cnt] = '\n';
5798                 entry->buf[cnt + 1] = '\0';
5799         } else
5800                 entry->buf[cnt] = '\0';
5801
5802         __buffer_unlock_commit(buffer, event);
5803
5804         if (written > 0)
5805                 *fpos += written;
5806
5807         return written;
5808 }
5809
5810 /* Limit it for now to 3K (including tag) */
5811 #define RAW_DATA_MAX_SIZE (1024*3)
5812
5813 static ssize_t
5814 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5815                                         size_t cnt, loff_t *fpos)
5816 {
5817         struct trace_array *tr = filp->private_data;
5818         struct ring_buffer_event *event;
5819         struct ring_buffer *buffer;
5820         struct raw_data_entry *entry;
5821         const char faulted[] = "<faulted>";
5822         unsigned long irq_flags;
5823         ssize_t written;
5824         int size;
5825         int len;
5826
5827 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5828
5829         if (tracing_disabled)
5830                 return -EINVAL;
5831
5832         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5833                 return -EINVAL;
5834
5835         /* The marker must at least have a tag id */
5836         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5837                 return -EINVAL;
5838
5839         if (cnt > TRACE_BUF_SIZE)
5840                 cnt = TRACE_BUF_SIZE;
5841
5842         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5843
5844         local_save_flags(irq_flags);
5845         size = sizeof(*entry) + cnt;
5846         if (cnt < FAULT_SIZE_ID)
5847                 size += FAULT_SIZE_ID - cnt;
5848
5849         buffer = tr->trace_buffer.buffer;
5850         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5851                                             irq_flags, preempt_count());
5852         if (!event)
5853                 /* Ring buffer disabled, return as if not open for write */
5854                 return -EBADF;
5855
5856         entry = ring_buffer_event_data(event);
5857
5858         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5859         if (len) {
5860                 entry->id = -1;
5861                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5862                 written = -EFAULT;
5863         } else
5864                 written = cnt;
5865
5866         __buffer_unlock_commit(buffer, event);
5867
5868         if (written > 0)
5869                 *fpos += written;
5870
5871         return written;
5872 }
5873
5874 static int tracing_clock_show(struct seq_file *m, void *v)
5875 {
5876         struct trace_array *tr = m->private;
5877         int i;
5878
5879         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5880                 seq_printf(m,
5881                         "%s%s%s%s", i ? " " : "",
5882                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5883                         i == tr->clock_id ? "]" : "");
5884         seq_putc(m, '\n');
5885
5886         return 0;
5887 }
5888
5889 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5890 {
5891         int i;
5892
5893         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
5894                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
5895                         break;
5896         }
5897         if (i == ARRAY_SIZE(trace_clocks))
5898                 return -EINVAL;
5899
5900         mutex_lock(&trace_types_lock);
5901
5902         tr->clock_id = i;
5903
5904         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5905
5906         /*
5907          * New clock may not be consistent with the previous clock.
5908          * Reset the buffer so that it doesn't have incomparable timestamps.
5909          */
5910         tracing_reset_online_cpus(&tr->trace_buffer);
5911
5912 #ifdef CONFIG_TRACER_MAX_TRACE
5913         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
5914                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5915         tracing_reset_online_cpus(&tr->max_buffer);
5916 #endif
5917
5918         mutex_unlock(&trace_types_lock);
5919
5920         return 0;
5921 }
5922
5923 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
5924                                    size_t cnt, loff_t *fpos)
5925 {
5926         struct seq_file *m = filp->private_data;
5927         struct trace_array *tr = m->private;
5928         char buf[64];
5929         const char *clockstr;
5930         int ret;
5931
5932         if (cnt >= sizeof(buf))
5933                 return -EINVAL;
5934
5935         if (copy_from_user(buf, ubuf, cnt))
5936                 return -EFAULT;
5937
5938         buf[cnt] = 0;
5939
5940         clockstr = strstrip(buf);
5941
5942         ret = tracing_set_clock(tr, clockstr);
5943         if (ret)
5944                 return ret;
5945
5946         *fpos += cnt;
5947
5948         return cnt;
5949 }
5950
5951 static int tracing_clock_open(struct inode *inode, struct file *file)
5952 {
5953         struct trace_array *tr = inode->i_private;
5954         int ret;
5955
5956         if (tracing_disabled)
5957                 return -ENODEV;
5958
5959         if (trace_array_get(tr))
5960                 return -ENODEV;
5961
5962         ret = single_open(file, tracing_clock_show, inode->i_private);
5963         if (ret < 0)
5964                 trace_array_put(tr);
5965
5966         return ret;
5967 }
5968
5969 struct ftrace_buffer_info {
5970         struct trace_iterator   iter;
5971         void                    *spare;
5972         unsigned int            read;
5973 };
5974
5975 #ifdef CONFIG_TRACER_SNAPSHOT
5976 static int tracing_snapshot_open(struct inode *inode, struct file *file)
5977 {
5978         struct trace_array *tr = inode->i_private;
5979         struct trace_iterator *iter;
5980         struct seq_file *m;
5981         int ret = 0;
5982
5983         if (trace_array_get(tr) < 0)
5984                 return -ENODEV;
5985
5986         if (file->f_mode & FMODE_READ) {
5987                 iter = __tracing_open(inode, file, true);
5988                 if (IS_ERR(iter))
5989                         ret = PTR_ERR(iter);
5990         } else {
5991                 /* Writes still need the seq_file to hold the private data */
5992                 ret = -ENOMEM;
5993                 m = kzalloc(sizeof(*m), GFP_KERNEL);
5994                 if (!m)
5995                         goto out;
5996                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5997                 if (!iter) {
5998                         kfree(m);
5999                         goto out;
6000                 }
6001                 ret = 0;
6002
6003                 iter->tr = tr;
6004                 iter->trace_buffer = &tr->max_buffer;
6005                 iter->cpu_file = tracing_get_cpu(inode);
6006                 m->private = iter;
6007                 file->private_data = m;
6008         }
6009 out:
6010         if (ret < 0)
6011                 trace_array_put(tr);
6012
6013         return ret;
6014 }
6015
6016 static ssize_t
6017 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6018                        loff_t *ppos)
6019 {
6020         struct seq_file *m = filp->private_data;
6021         struct trace_iterator *iter = m->private;
6022         struct trace_array *tr = iter->tr;
6023         unsigned long val;
6024         int ret;
6025
6026         ret = tracing_update_buffers();
6027         if (ret < 0)
6028                 return ret;
6029
6030         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6031         if (ret)
6032                 return ret;
6033
6034         mutex_lock(&trace_types_lock);
6035
6036         if (tr->current_trace->use_max_tr) {
6037                 ret = -EBUSY;
6038                 goto out;
6039         }
6040
6041         switch (val) {
6042         case 0:
6043                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6044                         ret = -EINVAL;
6045                         break;
6046                 }
6047                 if (tr->allocated_snapshot)
6048                         free_snapshot(tr);
6049                 break;
6050         case 1:
6051 /* Only allow per-cpu swap if the ring buffer supports it */
6052 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6053                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6054                         ret = -EINVAL;
6055                         break;
6056                 }
6057 #endif
6058                 if (!tr->allocated_snapshot) {
6059                         ret = alloc_snapshot(tr);
6060                         if (ret < 0)
6061                                 break;
6062                 }
6063                 local_irq_disable();
6064                 /* Now, we're going to swap */
6065                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6066                         update_max_tr(tr, current, smp_processor_id());
6067                 else
6068                         update_max_tr_single(tr, current, iter->cpu_file);
6069                 local_irq_enable();
6070                 break;
6071         default:
6072                 if (tr->allocated_snapshot) {
6073                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6074                                 tracing_reset_online_cpus(&tr->max_buffer);
6075                         else
6076                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6077                 }
6078                 break;
6079         }
6080
6081         if (ret >= 0) {
6082                 *ppos += cnt;
6083                 ret = cnt;
6084         }
6085 out:
6086         mutex_unlock(&trace_types_lock);
6087         return ret;
6088 }
6089
6090 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6091 {
6092         struct seq_file *m = file->private_data;
6093         int ret;
6094
6095         ret = tracing_release(inode, file);
6096
6097         if (file->f_mode & FMODE_READ)
6098                 return ret;
6099
6100         /* If write only, the seq_file is just a stub */
6101         if (m)
6102                 kfree(m->private);
6103         kfree(m);
6104
6105         return 0;
6106 }
6107
6108 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6109 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6110                                     size_t count, loff_t *ppos);
6111 static int tracing_buffers_release(struct inode *inode, struct file *file);
6112 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6113                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6114
6115 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6116 {
6117         struct ftrace_buffer_info *info;
6118         int ret;
6119
6120         ret = tracing_buffers_open(inode, filp);
6121         if (ret < 0)
6122                 return ret;
6123
6124         info = filp->private_data;
6125
6126         if (info->iter.trace->use_max_tr) {
6127                 tracing_buffers_release(inode, filp);
6128                 return -EBUSY;
6129         }
6130
6131         info->iter.snapshot = true;
6132         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6133
6134         return ret;
6135 }
6136
6137 #endif /* CONFIG_TRACER_SNAPSHOT */
6138
6139
6140 static const struct file_operations tracing_thresh_fops = {
6141         .open           = tracing_open_generic,
6142         .read           = tracing_thresh_read,
6143         .write          = tracing_thresh_write,
6144         .llseek         = generic_file_llseek,
6145 };
6146
6147 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6148 static const struct file_operations tracing_max_lat_fops = {
6149         .open           = tracing_open_generic,
6150         .read           = tracing_max_lat_read,
6151         .write          = tracing_max_lat_write,
6152         .llseek         = generic_file_llseek,
6153 };
6154 #endif
6155
6156 static const struct file_operations set_tracer_fops = {
6157         .open           = tracing_open_generic,
6158         .read           = tracing_set_trace_read,
6159         .write          = tracing_set_trace_write,
6160         .llseek         = generic_file_llseek,
6161 };
6162
6163 static const struct file_operations tracing_pipe_fops = {
6164         .open           = tracing_open_pipe,
6165         .poll           = tracing_poll_pipe,
6166         .read           = tracing_read_pipe,
6167         .splice_read    = tracing_splice_read_pipe,
6168         .release        = tracing_release_pipe,
6169         .llseek         = no_llseek,
6170 };
6171
6172 static const struct file_operations tracing_entries_fops = {
6173         .open           = tracing_open_generic_tr,
6174         .read           = tracing_entries_read,
6175         .write          = tracing_entries_write,
6176         .llseek         = generic_file_llseek,
6177         .release        = tracing_release_generic_tr,
6178 };
6179
6180 static const struct file_operations tracing_total_entries_fops = {
6181         .open           = tracing_open_generic_tr,
6182         .read           = tracing_total_entries_read,
6183         .llseek         = generic_file_llseek,
6184         .release        = tracing_release_generic_tr,
6185 };
6186
6187 static const struct file_operations tracing_free_buffer_fops = {
6188         .open           = tracing_open_generic_tr,
6189         .write          = tracing_free_buffer_write,
6190         .release        = tracing_free_buffer_release,
6191 };
6192
6193 static const struct file_operations tracing_mark_fops = {
6194         .open           = tracing_open_generic_tr,
6195         .write          = tracing_mark_write,
6196         .llseek         = generic_file_llseek,
6197         .release        = tracing_release_generic_tr,
6198 };
6199
6200 static const struct file_operations tracing_mark_raw_fops = {
6201         .open           = tracing_open_generic_tr,
6202         .write          = tracing_mark_raw_write,
6203         .llseek         = generic_file_llseek,
6204         .release        = tracing_release_generic_tr,
6205 };
6206
6207 static const struct file_operations trace_clock_fops = {
6208         .open           = tracing_clock_open,
6209         .read           = seq_read,
6210         .llseek         = seq_lseek,
6211         .release        = tracing_single_release_tr,
6212         .write          = tracing_clock_write,
6213 };
6214
6215 #ifdef CONFIG_TRACER_SNAPSHOT
6216 static const struct file_operations snapshot_fops = {
6217         .open           = tracing_snapshot_open,
6218         .read           = seq_read,
6219         .write          = tracing_snapshot_write,
6220         .llseek         = tracing_lseek,
6221         .release        = tracing_snapshot_release,
6222 };
6223
6224 static const struct file_operations snapshot_raw_fops = {
6225         .open           = snapshot_raw_open,
6226         .read           = tracing_buffers_read,
6227         .release        = tracing_buffers_release,
6228         .splice_read    = tracing_buffers_splice_read,
6229         .llseek         = no_llseek,
6230 };
6231
6232 #endif /* CONFIG_TRACER_SNAPSHOT */
6233
6234 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6235 {
6236         struct trace_array *tr = inode->i_private;
6237         struct ftrace_buffer_info *info;
6238         int ret;
6239
6240         if (tracing_disabled)
6241                 return -ENODEV;
6242
6243         if (trace_array_get(tr) < 0)
6244                 return -ENODEV;
6245
6246         info = kzalloc(sizeof(*info), GFP_KERNEL);
6247         if (!info) {
6248                 trace_array_put(tr);
6249                 return -ENOMEM;
6250         }
6251
6252         mutex_lock(&trace_types_lock);
6253
6254         info->iter.tr           = tr;
6255         info->iter.cpu_file     = tracing_get_cpu(inode);
6256         info->iter.trace        = tr->current_trace;
6257         info->iter.trace_buffer = &tr->trace_buffer;
6258         info->spare             = NULL;
6259         /* Force reading ring buffer for first read */
6260         info->read              = (unsigned int)-1;
6261
6262         filp->private_data = info;
6263
6264         tr->current_trace->ref++;
6265
6266         mutex_unlock(&trace_types_lock);
6267
6268         ret = nonseekable_open(inode, filp);
6269         if (ret < 0)
6270                 trace_array_put(tr);
6271
6272         return ret;
6273 }
6274
6275 static unsigned int
6276 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6277 {
6278         struct ftrace_buffer_info *info = filp->private_data;
6279         struct trace_iterator *iter = &info->iter;
6280
6281         return trace_poll(iter, filp, poll_table);
6282 }
6283
6284 static ssize_t
6285 tracing_buffers_read(struct file *filp, char __user *ubuf,
6286                      size_t count, loff_t *ppos)
6287 {
6288         struct ftrace_buffer_info *info = filp->private_data;
6289         struct trace_iterator *iter = &info->iter;
6290         ssize_t ret;
6291         ssize_t size;
6292
6293         if (!count)
6294                 return 0;
6295
6296 #ifdef CONFIG_TRACER_MAX_TRACE
6297         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6298                 return -EBUSY;
6299 #endif
6300
6301         if (!info->spare)
6302                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6303                                                           iter->cpu_file);
6304         if (!info->spare)
6305                 return -ENOMEM;
6306
6307         /* Do we have previous read data to read? */
6308         if (info->read < PAGE_SIZE)
6309                 goto read;
6310
6311  again:
6312         trace_access_lock(iter->cpu_file);
6313         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6314                                     &info->spare,
6315                                     count,
6316                                     iter->cpu_file, 0);
6317         trace_access_unlock(iter->cpu_file);
6318
6319         if (ret < 0) {
6320                 if (trace_empty(iter)) {
6321                         if ((filp->f_flags & O_NONBLOCK))
6322                                 return -EAGAIN;
6323
6324                         ret = wait_on_pipe(iter, false);
6325                         if (ret)
6326                                 return ret;
6327
6328                         goto again;
6329                 }
6330                 return 0;
6331         }
6332
6333         info->read = 0;
6334  read:
6335         size = PAGE_SIZE - info->read;
6336         if (size > count)
6337                 size = count;
6338
6339         ret = copy_to_user(ubuf, info->spare + info->read, size);
6340         if (ret == size)
6341                 return -EFAULT;
6342
6343         size -= ret;
6344
6345         *ppos += size;
6346         info->read += size;
6347
6348         return size;
6349 }
6350
6351 static int tracing_buffers_release(struct inode *inode, struct file *file)
6352 {
6353         struct ftrace_buffer_info *info = file->private_data;
6354         struct trace_iterator *iter = &info->iter;
6355
6356         mutex_lock(&trace_types_lock);
6357
6358         iter->tr->current_trace->ref--;
6359
6360         __trace_array_put(iter->tr);
6361
6362         if (info->spare)
6363                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
6364         kfree(info);
6365
6366         mutex_unlock(&trace_types_lock);
6367
6368         return 0;
6369 }
6370
6371 struct buffer_ref {
6372         struct ring_buffer      *buffer;
6373         void                    *page;
6374         int                     ref;
6375 };
6376
6377 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6378                                     struct pipe_buffer *buf)
6379 {
6380         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6381
6382         if (--ref->ref)
6383                 return;
6384
6385         ring_buffer_free_read_page(ref->buffer, ref->page);
6386         kfree(ref);
6387         buf->private = 0;
6388 }
6389
6390 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6391                                 struct pipe_buffer *buf)
6392 {
6393         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6394
6395         ref->ref++;
6396 }
6397
6398 /* Pipe buffer operations for a buffer. */
6399 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6400         .can_merge              = 0,
6401         .confirm                = generic_pipe_buf_confirm,
6402         .release                = buffer_pipe_buf_release,
6403         .steal                  = generic_pipe_buf_steal,
6404         .get                    = buffer_pipe_buf_get,
6405 };
6406
6407 /*
6408  * Callback from splice_to_pipe(), if we need to release some pages
6409  * at the end of the spd in case we error'ed out in filling the pipe.
6410  */
6411 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6412 {
6413         struct buffer_ref *ref =
6414                 (struct buffer_ref *)spd->partial[i].private;
6415
6416         if (--ref->ref)
6417                 return;
6418
6419         ring_buffer_free_read_page(ref->buffer, ref->page);
6420         kfree(ref);
6421         spd->partial[i].private = 0;
6422 }
6423
6424 static ssize_t
6425 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6426                             struct pipe_inode_info *pipe, size_t len,
6427                             unsigned int flags)
6428 {
6429         struct ftrace_buffer_info *info = file->private_data;
6430         struct trace_iterator *iter = &info->iter;
6431         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6432         struct page *pages_def[PIPE_DEF_BUFFERS];
6433         struct splice_pipe_desc spd = {
6434                 .pages          = pages_def,
6435                 .partial        = partial_def,
6436                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6437                 .flags          = flags,
6438                 .ops            = &buffer_pipe_buf_ops,
6439                 .spd_release    = buffer_spd_release,
6440         };
6441         struct buffer_ref *ref;
6442         int entries, size, i;
6443         ssize_t ret = 0;
6444
6445 #ifdef CONFIG_TRACER_MAX_TRACE
6446         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6447                 return -EBUSY;
6448 #endif
6449
6450         if (*ppos & (PAGE_SIZE - 1))
6451                 return -EINVAL;
6452
6453         if (len & (PAGE_SIZE - 1)) {
6454                 if (len < PAGE_SIZE)
6455                         return -EINVAL;
6456                 len &= PAGE_MASK;
6457         }
6458
6459         if (splice_grow_spd(pipe, &spd))
6460                 return -ENOMEM;
6461
6462  again:
6463         trace_access_lock(iter->cpu_file);
6464         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6465
6466         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6467                 struct page *page;
6468                 int r;
6469
6470                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6471                 if (!ref) {
6472                         ret = -ENOMEM;
6473                         break;
6474                 }
6475
6476                 ref->ref = 1;
6477                 ref->buffer = iter->trace_buffer->buffer;
6478                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6479                 if (!ref->page) {
6480                         ret = -ENOMEM;
6481                         kfree(ref);
6482                         break;
6483                 }
6484
6485                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6486                                           len, iter->cpu_file, 1);
6487                 if (r < 0) {
6488                         ring_buffer_free_read_page(ref->buffer, ref->page);
6489                         kfree(ref);
6490                         break;
6491                 }
6492
6493                 /*
6494                  * zero out any left over data, this is going to
6495                  * user land.
6496                  */
6497                 size = ring_buffer_page_len(ref->page);
6498                 if (size < PAGE_SIZE)
6499                         memset(ref->page + size, 0, PAGE_SIZE - size);
6500
6501                 page = virt_to_page(ref->page);
6502
6503                 spd.pages[i] = page;
6504                 spd.partial[i].len = PAGE_SIZE;
6505                 spd.partial[i].offset = 0;
6506                 spd.partial[i].private = (unsigned long)ref;
6507                 spd.nr_pages++;
6508                 *ppos += PAGE_SIZE;
6509
6510                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6511         }
6512
6513         trace_access_unlock(iter->cpu_file);
6514         spd.nr_pages = i;
6515
6516         /* did we read anything? */
6517         if (!spd.nr_pages) {
6518                 if (ret)
6519                         goto out;
6520
6521                 ret = -EAGAIN;
6522                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6523                         goto out;
6524
6525                 ret = wait_on_pipe(iter, true);
6526                 if (ret)
6527                         goto out;
6528
6529                 goto again;
6530         }
6531
6532         ret = splice_to_pipe(pipe, &spd);
6533 out:
6534         splice_shrink_spd(&spd);
6535
6536         return ret;
6537 }
6538
6539 static const struct file_operations tracing_buffers_fops = {
6540         .open           = tracing_buffers_open,
6541         .read           = tracing_buffers_read,
6542         .poll           = tracing_buffers_poll,
6543         .release        = tracing_buffers_release,
6544         .splice_read    = tracing_buffers_splice_read,
6545         .llseek         = no_llseek,
6546 };
6547
6548 static ssize_t
6549 tracing_stats_read(struct file *filp, char __user *ubuf,
6550                    size_t count, loff_t *ppos)
6551 {
6552         struct inode *inode = file_inode(filp);
6553         struct trace_array *tr = inode->i_private;
6554         struct trace_buffer *trace_buf = &tr->trace_buffer;
6555         int cpu = tracing_get_cpu(inode);
6556         struct trace_seq *s;
6557         unsigned long cnt;
6558         unsigned long long t;
6559         unsigned long usec_rem;
6560
6561         s = kmalloc(sizeof(*s), GFP_KERNEL);
6562         if (!s)
6563                 return -ENOMEM;
6564
6565         trace_seq_init(s);
6566
6567         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6568         trace_seq_printf(s, "entries: %ld\n", cnt);
6569
6570         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6571         trace_seq_printf(s, "overrun: %ld\n", cnt);
6572
6573         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6574         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6575
6576         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6577         trace_seq_printf(s, "bytes: %ld\n", cnt);
6578
6579         if (trace_clocks[tr->clock_id].in_ns) {
6580                 /* local or global for trace_clock */
6581                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6582                 usec_rem = do_div(t, USEC_PER_SEC);
6583                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6584                                                                 t, usec_rem);
6585
6586                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6587                 usec_rem = do_div(t, USEC_PER_SEC);
6588                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6589         } else {
6590                 /* counter or tsc mode for trace_clock */
6591                 trace_seq_printf(s, "oldest event ts: %llu\n",
6592                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6593
6594                 trace_seq_printf(s, "now ts: %llu\n",
6595                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6596         }
6597
6598         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6599         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6600
6601         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6602         trace_seq_printf(s, "read events: %ld\n", cnt);
6603
6604         count = simple_read_from_buffer(ubuf, count, ppos,
6605                                         s->buffer, trace_seq_used(s));
6606
6607         kfree(s);
6608
6609         return count;
6610 }
6611
6612 static const struct file_operations tracing_stats_fops = {
6613         .open           = tracing_open_generic_tr,
6614         .read           = tracing_stats_read,
6615         .llseek         = generic_file_llseek,
6616         .release        = tracing_release_generic_tr,
6617 };
6618
6619 #ifdef CONFIG_DYNAMIC_FTRACE
6620
6621 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
6622 {
6623         return 0;
6624 }
6625
6626 static ssize_t
6627 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6628                   size_t cnt, loff_t *ppos)
6629 {
6630         static char ftrace_dyn_info_buffer[1024];
6631         static DEFINE_MUTEX(dyn_info_mutex);
6632         unsigned long *p = filp->private_data;
6633         char *buf = ftrace_dyn_info_buffer;
6634         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6635         int r;
6636
6637         mutex_lock(&dyn_info_mutex);
6638         r = sprintf(buf, "%ld ", *p);
6639
6640         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6641         buf[r++] = '\n';
6642
6643         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6644
6645         mutex_unlock(&dyn_info_mutex);
6646
6647         return r;
6648 }
6649
6650 static const struct file_operations tracing_dyn_info_fops = {
6651         .open           = tracing_open_generic,
6652         .read           = tracing_read_dyn_info,
6653         .llseek         = generic_file_llseek,
6654 };
6655 #endif /* CONFIG_DYNAMIC_FTRACE */
6656
6657 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6658 static void
6659 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6660 {
6661         tracing_snapshot();
6662 }
6663
6664 static void
6665 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6666 {
6667         unsigned long *count = (long *)data;
6668
6669         if (!*count)
6670                 return;
6671
6672         if (*count != -1)
6673                 (*count)--;
6674
6675         tracing_snapshot();
6676 }
6677
6678 static int
6679 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6680                       struct ftrace_probe_ops *ops, void *data)
6681 {
6682         long count = (long)data;
6683
6684         seq_printf(m, "%ps:", (void *)ip);
6685
6686         seq_puts(m, "snapshot");
6687
6688         if (count == -1)
6689                 seq_puts(m, ":unlimited\n");
6690         else
6691                 seq_printf(m, ":count=%ld\n", count);
6692
6693         return 0;
6694 }
6695
6696 static struct ftrace_probe_ops snapshot_probe_ops = {
6697         .func                   = ftrace_snapshot,
6698         .print                  = ftrace_snapshot_print,
6699 };
6700
6701 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6702         .func                   = ftrace_count_snapshot,
6703         .print                  = ftrace_snapshot_print,
6704 };
6705
6706 static int
6707 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
6708                                char *glob, char *cmd, char *param, int enable)
6709 {
6710         struct ftrace_probe_ops *ops;
6711         void *count = (void *)-1;
6712         char *number;
6713         int ret;
6714
6715         /* hash funcs only work with set_ftrace_filter */
6716         if (!enable)
6717                 return -EINVAL;
6718
6719         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6720
6721         if (glob[0] == '!') {
6722                 unregister_ftrace_function_probe_func(glob+1, ops);
6723                 return 0;
6724         }
6725
6726         if (!param)
6727                 goto out_reg;
6728
6729         number = strsep(&param, ":");
6730
6731         if (!strlen(number))
6732                 goto out_reg;
6733
6734         /*
6735          * We use the callback data field (which is a pointer)
6736          * as our counter.
6737          */
6738         ret = kstrtoul(number, 0, (unsigned long *)&count);
6739         if (ret)
6740                 return ret;
6741
6742  out_reg:
6743         ret = register_ftrace_function_probe(glob, ops, count);
6744
6745         if (ret >= 0)
6746                 alloc_snapshot(&global_trace);
6747
6748         return ret < 0 ? ret : 0;
6749 }
6750
6751 static struct ftrace_func_command ftrace_snapshot_cmd = {
6752         .name                   = "snapshot",
6753         .func                   = ftrace_trace_snapshot_callback,
6754 };
6755
6756 static __init int register_snapshot_cmd(void)
6757 {
6758         return register_ftrace_command(&ftrace_snapshot_cmd);
6759 }
6760 #else
6761 static inline __init int register_snapshot_cmd(void) { return 0; }
6762 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6763
6764 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6765 {
6766         if (WARN_ON(!tr->dir))
6767                 return ERR_PTR(-ENODEV);
6768
6769         /* Top directory uses NULL as the parent */
6770         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6771                 return NULL;
6772
6773         /* All sub buffers have a descriptor */
6774         return tr->dir;
6775 }
6776
6777 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6778 {
6779         struct dentry *d_tracer;
6780
6781         if (tr->percpu_dir)
6782                 return tr->percpu_dir;
6783
6784         d_tracer = tracing_get_dentry(tr);
6785         if (IS_ERR(d_tracer))
6786                 return NULL;
6787
6788         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6789
6790         WARN_ONCE(!tr->percpu_dir,
6791                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6792
6793         return tr->percpu_dir;
6794 }
6795
6796 static struct dentry *
6797 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6798                       void *data, long cpu, const struct file_operations *fops)
6799 {
6800         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6801
6802         if (ret) /* See tracing_get_cpu() */
6803                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6804         return ret;
6805 }
6806
6807 static void
6808 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6809 {
6810         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6811         struct dentry *d_cpu;
6812         char cpu_dir[30]; /* 30 characters should be more than enough */
6813
6814         if (!d_percpu)
6815                 return;
6816
6817         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6818         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6819         if (!d_cpu) {
6820                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6821                 return;
6822         }
6823
6824         /* per cpu trace_pipe */
6825         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6826                                 tr, cpu, &tracing_pipe_fops);
6827
6828         /* per cpu trace */
6829         trace_create_cpu_file("trace", 0644, d_cpu,
6830                                 tr, cpu, &tracing_fops);
6831
6832         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6833                                 tr, cpu, &tracing_buffers_fops);
6834
6835         trace_create_cpu_file("stats", 0444, d_cpu,
6836                                 tr, cpu, &tracing_stats_fops);
6837
6838         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6839                                 tr, cpu, &tracing_entries_fops);
6840
6841 #ifdef CONFIG_TRACER_SNAPSHOT
6842         trace_create_cpu_file("snapshot", 0644, d_cpu,
6843                                 tr, cpu, &snapshot_fops);
6844
6845         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6846                                 tr, cpu, &snapshot_raw_fops);
6847 #endif
6848 }
6849
6850 #ifdef CONFIG_FTRACE_SELFTEST
6851 /* Let selftest have access to static functions in this file */
6852 #include "trace_selftest.c"
6853 #endif
6854
6855 static ssize_t
6856 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
6857                         loff_t *ppos)
6858 {
6859         struct trace_option_dentry *topt = filp->private_data;
6860         char *buf;
6861
6862         if (topt->flags->val & topt->opt->bit)
6863                 buf = "1\n";
6864         else
6865                 buf = "0\n";
6866
6867         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6868 }
6869
6870 static ssize_t
6871 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
6872                          loff_t *ppos)
6873 {
6874         struct trace_option_dentry *topt = filp->private_data;
6875         unsigned long val;
6876         int ret;
6877
6878         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6879         if (ret)
6880                 return ret;
6881
6882         if (val != 0 && val != 1)
6883                 return -EINVAL;
6884
6885         if (!!(topt->flags->val & topt->opt->bit) != val) {
6886                 mutex_lock(&trace_types_lock);
6887                 ret = __set_tracer_option(topt->tr, topt->flags,
6888                                           topt->opt, !val);
6889                 mutex_unlock(&trace_types_lock);
6890                 if (ret)
6891                         return ret;
6892         }
6893
6894         *ppos += cnt;
6895
6896         return cnt;
6897 }
6898
6899
6900 static const struct file_operations trace_options_fops = {
6901         .open = tracing_open_generic,
6902         .read = trace_options_read,
6903         .write = trace_options_write,
6904         .llseek = generic_file_llseek,
6905 };
6906
6907 /*
6908  * In order to pass in both the trace_array descriptor as well as the index
6909  * to the flag that the trace option file represents, the trace_array
6910  * has a character array of trace_flags_index[], which holds the index
6911  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
6912  * The address of this character array is passed to the flag option file
6913  * read/write callbacks.
6914  *
6915  * In order to extract both the index and the trace_array descriptor,
6916  * get_tr_index() uses the following algorithm.
6917  *
6918  *   idx = *ptr;
6919  *
6920  * As the pointer itself contains the address of the index (remember
6921  * index[1] == 1).
6922  *
6923  * Then to get the trace_array descriptor, by subtracting that index
6924  * from the ptr, we get to the start of the index itself.
6925  *
6926  *   ptr - idx == &index[0]
6927  *
6928  * Then a simple container_of() from that pointer gets us to the
6929  * trace_array descriptor.
6930  */
6931 static void get_tr_index(void *data, struct trace_array **ptr,
6932                          unsigned int *pindex)
6933 {
6934         *pindex = *(unsigned char *)data;
6935
6936         *ptr = container_of(data - *pindex, struct trace_array,
6937                             trace_flags_index);
6938 }
6939
6940 static ssize_t
6941 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
6942                         loff_t *ppos)
6943 {
6944         void *tr_index = filp->private_data;
6945         struct trace_array *tr;
6946         unsigned int index;
6947         char *buf;
6948
6949         get_tr_index(tr_index, &tr, &index);
6950
6951         if (tr->trace_flags & (1 << index))
6952                 buf = "1\n";
6953         else
6954                 buf = "0\n";
6955
6956         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
6957 }
6958
6959 static ssize_t
6960 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
6961                          loff_t *ppos)
6962 {
6963         void *tr_index = filp->private_data;
6964         struct trace_array *tr;
6965         unsigned int index;
6966         unsigned long val;
6967         int ret;
6968
6969         get_tr_index(tr_index, &tr, &index);
6970
6971         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6972         if (ret)
6973                 return ret;
6974
6975         if (val != 0 && val != 1)
6976                 return -EINVAL;
6977
6978         mutex_lock(&trace_types_lock);
6979         ret = set_tracer_flag(tr, 1 << index, val);
6980         mutex_unlock(&trace_types_lock);
6981
6982         if (ret < 0)
6983                 return ret;
6984
6985         *ppos += cnt;
6986
6987         return cnt;
6988 }
6989
6990 static const struct file_operations trace_options_core_fops = {
6991         .open = tracing_open_generic,
6992         .read = trace_options_core_read,
6993         .write = trace_options_core_write,
6994         .llseek = generic_file_llseek,
6995 };
6996
6997 struct dentry *trace_create_file(const char *name,
6998                                  umode_t mode,
6999                                  struct dentry *parent,
7000                                  void *data,
7001                                  const struct file_operations *fops)
7002 {
7003         struct dentry *ret;
7004
7005         ret = tracefs_create_file(name, mode, parent, data, fops);
7006         if (!ret)
7007                 pr_warn("Could not create tracefs '%s' entry\n", name);
7008
7009         return ret;
7010 }
7011
7012
7013 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7014 {
7015         struct dentry *d_tracer;
7016
7017         if (tr->options)
7018                 return tr->options;
7019
7020         d_tracer = tracing_get_dentry(tr);
7021         if (IS_ERR(d_tracer))
7022                 return NULL;
7023
7024         tr->options = tracefs_create_dir("options", d_tracer);
7025         if (!tr->options) {
7026                 pr_warn("Could not create tracefs directory 'options'\n");
7027                 return NULL;
7028         }
7029
7030         return tr->options;
7031 }
7032
7033 static void
7034 create_trace_option_file(struct trace_array *tr,
7035                          struct trace_option_dentry *topt,
7036                          struct tracer_flags *flags,
7037                          struct tracer_opt *opt)
7038 {
7039         struct dentry *t_options;
7040
7041         t_options = trace_options_init_dentry(tr);
7042         if (!t_options)
7043                 return;
7044
7045         topt->flags = flags;
7046         topt->opt = opt;
7047         topt->tr = tr;
7048
7049         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7050                                     &trace_options_fops);
7051
7052 }
7053
7054 static void
7055 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7056 {
7057         struct trace_option_dentry *topts;
7058         struct trace_options *tr_topts;
7059         struct tracer_flags *flags;
7060         struct tracer_opt *opts;
7061         int cnt;
7062         int i;
7063
7064         if (!tracer)
7065                 return;
7066
7067         flags = tracer->flags;
7068
7069         if (!flags || !flags->opts)
7070                 return;
7071
7072         /*
7073          * If this is an instance, only create flags for tracers
7074          * the instance may have.
7075          */
7076         if (!trace_ok_for_array(tracer, tr))
7077                 return;
7078
7079         for (i = 0; i < tr->nr_topts; i++) {
7080                 /* Make sure there's no duplicate flags. */
7081                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7082                         return;
7083         }
7084
7085         opts = flags->opts;
7086
7087         for (cnt = 0; opts[cnt].name; cnt++)
7088                 ;
7089
7090         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7091         if (!topts)
7092                 return;
7093
7094         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7095                             GFP_KERNEL);
7096         if (!tr_topts) {
7097                 kfree(topts);
7098                 return;
7099         }
7100
7101         tr->topts = tr_topts;
7102         tr->topts[tr->nr_topts].tracer = tracer;
7103         tr->topts[tr->nr_topts].topts = topts;
7104         tr->nr_topts++;
7105
7106         for (cnt = 0; opts[cnt].name; cnt++) {
7107                 create_trace_option_file(tr, &topts[cnt], flags,
7108                                          &opts[cnt]);
7109                 WARN_ONCE(topts[cnt].entry == NULL,
7110                           "Failed to create trace option: %s",
7111                           opts[cnt].name);
7112         }
7113 }
7114
7115 static struct dentry *
7116 create_trace_option_core_file(struct trace_array *tr,
7117                               const char *option, long index)
7118 {
7119         struct dentry *t_options;
7120
7121         t_options = trace_options_init_dentry(tr);
7122         if (!t_options)
7123                 return NULL;
7124
7125         return trace_create_file(option, 0644, t_options,
7126                                  (void *)&tr->trace_flags_index[index],
7127                                  &trace_options_core_fops);
7128 }
7129
7130 static void create_trace_options_dir(struct trace_array *tr)
7131 {
7132         struct dentry *t_options;
7133         bool top_level = tr == &global_trace;
7134         int i;
7135
7136         t_options = trace_options_init_dentry(tr);
7137         if (!t_options)
7138                 return;
7139
7140         for (i = 0; trace_options[i]; i++) {
7141                 if (top_level ||
7142                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7143                         create_trace_option_core_file(tr, trace_options[i], i);
7144         }
7145 }
7146
7147 static ssize_t
7148 rb_simple_read(struct file *filp, char __user *ubuf,
7149                size_t cnt, loff_t *ppos)
7150 {
7151         struct trace_array *tr = filp->private_data;
7152         char buf[64];
7153         int r;
7154
7155         r = tracer_tracing_is_on(tr);
7156         r = sprintf(buf, "%d\n", r);
7157
7158         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7159 }
7160
7161 static ssize_t
7162 rb_simple_write(struct file *filp, const char __user *ubuf,
7163                 size_t cnt, loff_t *ppos)
7164 {
7165         struct trace_array *tr = filp->private_data;
7166         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7167         unsigned long val;
7168         int ret;
7169
7170         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7171         if (ret)
7172                 return ret;
7173
7174         if (buffer) {
7175                 mutex_lock(&trace_types_lock);
7176                 if (val) {
7177                         tracer_tracing_on(tr);
7178                         if (tr->current_trace->start)
7179                                 tr->current_trace->start(tr);
7180                 } else {
7181                         tracer_tracing_off(tr);
7182                         if (tr->current_trace->stop)
7183                                 tr->current_trace->stop(tr);
7184                 }
7185                 mutex_unlock(&trace_types_lock);
7186         }
7187
7188         (*ppos)++;
7189
7190         return cnt;
7191 }
7192
7193 static const struct file_operations rb_simple_fops = {
7194         .open           = tracing_open_generic_tr,
7195         .read           = rb_simple_read,
7196         .write          = rb_simple_write,
7197         .release        = tracing_release_generic_tr,
7198         .llseek         = default_llseek,
7199 };
7200
7201 struct dentry *trace_instance_dir;
7202
7203 static void
7204 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7205
7206 static int
7207 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7208 {
7209         enum ring_buffer_flags rb_flags;
7210
7211         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7212
7213         buf->tr = tr;
7214
7215         buf->buffer = ring_buffer_alloc(size, rb_flags);
7216         if (!buf->buffer)
7217                 return -ENOMEM;
7218
7219         buf->data = alloc_percpu(struct trace_array_cpu);
7220         if (!buf->data) {
7221                 ring_buffer_free(buf->buffer);
7222                 return -ENOMEM;
7223         }
7224
7225         /* Allocate the first page for all buffers */
7226         set_buffer_entries(&tr->trace_buffer,
7227                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7228
7229         return 0;
7230 }
7231
7232 static int allocate_trace_buffers(struct trace_array *tr, int size)
7233 {
7234         int ret;
7235
7236         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7237         if (ret)
7238                 return ret;
7239
7240 #ifdef CONFIG_TRACER_MAX_TRACE
7241         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7242                                     allocate_snapshot ? size : 1);
7243         if (WARN_ON(ret)) {
7244                 ring_buffer_free(tr->trace_buffer.buffer);
7245                 free_percpu(tr->trace_buffer.data);
7246                 return -ENOMEM;
7247         }
7248         tr->allocated_snapshot = allocate_snapshot;
7249
7250         /*
7251          * Only the top level trace array gets its snapshot allocated
7252          * from the kernel command line.
7253          */
7254         allocate_snapshot = false;
7255 #endif
7256         return 0;
7257 }
7258
7259 static void free_trace_buffer(struct trace_buffer *buf)
7260 {
7261         if (buf->buffer) {
7262                 ring_buffer_free(buf->buffer);
7263                 buf->buffer = NULL;
7264                 free_percpu(buf->data);
7265                 buf->data = NULL;
7266         }
7267 }
7268
7269 static void free_trace_buffers(struct trace_array *tr)
7270 {
7271         if (!tr)
7272                 return;
7273
7274         free_trace_buffer(&tr->trace_buffer);
7275
7276 #ifdef CONFIG_TRACER_MAX_TRACE
7277         free_trace_buffer(&tr->max_buffer);
7278 #endif
7279 }
7280
7281 static void init_trace_flags_index(struct trace_array *tr)
7282 {
7283         int i;
7284
7285         /* Used by the trace options files */
7286         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7287                 tr->trace_flags_index[i] = i;
7288 }
7289
7290 static void __update_tracer_options(struct trace_array *tr)
7291 {
7292         struct tracer *t;
7293
7294         for (t = trace_types; t; t = t->next)
7295                 add_tracer_options(tr, t);
7296 }
7297
7298 static void update_tracer_options(struct trace_array *tr)
7299 {
7300         mutex_lock(&trace_types_lock);
7301         __update_tracer_options(tr);
7302         mutex_unlock(&trace_types_lock);
7303 }
7304
7305 static int instance_mkdir(const char *name)
7306 {
7307         struct trace_array *tr;
7308         int ret;
7309
7310         mutex_lock(&trace_types_lock);
7311
7312         ret = -EEXIST;
7313         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7314                 if (tr->name && strcmp(tr->name, name) == 0)
7315                         goto out_unlock;
7316         }
7317
7318         ret = -ENOMEM;
7319         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7320         if (!tr)
7321                 goto out_unlock;
7322
7323         tr->name = kstrdup(name, GFP_KERNEL);
7324         if (!tr->name)
7325                 goto out_free_tr;
7326
7327         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7328                 goto out_free_tr;
7329
7330         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7331
7332         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7333
7334         raw_spin_lock_init(&tr->start_lock);
7335
7336         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7337
7338         tr->current_trace = &nop_trace;
7339
7340         INIT_LIST_HEAD(&tr->systems);
7341         INIT_LIST_HEAD(&tr->events);
7342
7343         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7344                 goto out_free_tr;
7345
7346         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7347         if (!tr->dir)
7348                 goto out_free_tr;
7349
7350         ret = event_trace_add_tracer(tr->dir, tr);
7351         if (ret) {
7352                 tracefs_remove_recursive(tr->dir);
7353                 goto out_free_tr;
7354         }
7355
7356         init_tracer_tracefs(tr, tr->dir);
7357         init_trace_flags_index(tr);
7358         __update_tracer_options(tr);
7359
7360         list_add(&tr->list, &ftrace_trace_arrays);
7361
7362         mutex_unlock(&trace_types_lock);
7363
7364         return 0;
7365
7366  out_free_tr:
7367         free_trace_buffers(tr);
7368         free_cpumask_var(tr->tracing_cpumask);
7369         kfree(tr->name);
7370         kfree(tr);
7371
7372  out_unlock:
7373         mutex_unlock(&trace_types_lock);
7374
7375         return ret;
7376
7377 }
7378
7379 static int instance_rmdir(const char *name)
7380 {
7381         struct trace_array *tr;
7382         int found = 0;
7383         int ret;
7384         int i;
7385
7386         mutex_lock(&trace_types_lock);
7387
7388         ret = -ENODEV;
7389         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7390                 if (tr->name && strcmp(tr->name, name) == 0) {
7391                         found = 1;
7392                         break;
7393                 }
7394         }
7395         if (!found)
7396                 goto out_unlock;
7397
7398         ret = -EBUSY;
7399         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7400                 goto out_unlock;
7401
7402         list_del(&tr->list);
7403
7404         /* Disable all the flags that were enabled coming in */
7405         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7406                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7407                         set_tracer_flag(tr, 1 << i, 0);
7408         }
7409
7410         tracing_set_nop(tr);
7411         event_trace_del_tracer(tr);
7412         ftrace_destroy_function_files(tr);
7413         tracefs_remove_recursive(tr->dir);
7414         free_trace_buffers(tr);
7415
7416         for (i = 0; i < tr->nr_topts; i++) {
7417                 kfree(tr->topts[i].topts);
7418         }
7419         kfree(tr->topts);
7420
7421         kfree(tr->name);
7422         kfree(tr);
7423
7424         ret = 0;
7425
7426  out_unlock:
7427         mutex_unlock(&trace_types_lock);
7428
7429         return ret;
7430 }
7431
7432 static __init void create_trace_instances(struct dentry *d_tracer)
7433 {
7434         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7435                                                          instance_mkdir,
7436                                                          instance_rmdir);
7437         if (WARN_ON(!trace_instance_dir))
7438                 return;
7439 }
7440
7441 static void
7442 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7443 {
7444         int cpu;
7445
7446         trace_create_file("available_tracers", 0444, d_tracer,
7447                         tr, &show_traces_fops);
7448
7449         trace_create_file("current_tracer", 0644, d_tracer,
7450                         tr, &set_tracer_fops);
7451
7452         trace_create_file("tracing_cpumask", 0644, d_tracer,
7453                           tr, &tracing_cpumask_fops);
7454
7455         trace_create_file("trace_options", 0644, d_tracer,
7456                           tr, &tracing_iter_fops);
7457
7458         trace_create_file("trace", 0644, d_tracer,
7459                           tr, &tracing_fops);
7460
7461         trace_create_file("trace_pipe", 0444, d_tracer,
7462                           tr, &tracing_pipe_fops);
7463
7464         trace_create_file("buffer_size_kb", 0644, d_tracer,
7465                           tr, &tracing_entries_fops);
7466
7467         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7468                           tr, &tracing_total_entries_fops);
7469
7470         trace_create_file("free_buffer", 0200, d_tracer,
7471                           tr, &tracing_free_buffer_fops);
7472
7473         trace_create_file("trace_marker", 0220, d_tracer,
7474                           tr, &tracing_mark_fops);
7475
7476         trace_create_file("trace_marker_raw", 0220, d_tracer,
7477                           tr, &tracing_mark_raw_fops);
7478
7479         trace_create_file("trace_clock", 0644, d_tracer, tr,
7480                           &trace_clock_fops);
7481
7482         trace_create_file("tracing_on", 0644, d_tracer,
7483                           tr, &rb_simple_fops);
7484
7485         create_trace_options_dir(tr);
7486
7487 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7488         trace_create_file("tracing_max_latency", 0644, d_tracer,
7489                         &tr->max_latency, &tracing_max_lat_fops);
7490 #endif
7491
7492         if (ftrace_create_function_files(tr, d_tracer))
7493                 WARN(1, "Could not allocate function filter files");
7494
7495 #ifdef CONFIG_TRACER_SNAPSHOT
7496         trace_create_file("snapshot", 0644, d_tracer,
7497                           tr, &snapshot_fops);
7498 #endif
7499
7500         for_each_tracing_cpu(cpu)
7501                 tracing_init_tracefs_percpu(tr, cpu);
7502
7503         ftrace_init_tracefs(tr, d_tracer);
7504 }
7505
7506 static struct vfsmount *trace_automount(void *ingore)
7507 {
7508         struct vfsmount *mnt;
7509         struct file_system_type *type;
7510
7511         /*
7512          * To maintain backward compatibility for tools that mount
7513          * debugfs to get to the tracing facility, tracefs is automatically
7514          * mounted to the debugfs/tracing directory.
7515          */
7516         type = get_fs_type("tracefs");
7517         if (!type)
7518                 return NULL;
7519         mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
7520         put_filesystem(type);
7521         if (IS_ERR(mnt))
7522                 return NULL;
7523         mntget(mnt);
7524
7525         return mnt;
7526 }
7527
7528 /**
7529  * tracing_init_dentry - initialize top level trace array
7530  *
7531  * This is called when creating files or directories in the tracing
7532  * directory. It is called via fs_initcall() by any of the boot up code
7533  * and expects to return the dentry of the top level tracing directory.
7534  */
7535 struct dentry *tracing_init_dentry(void)
7536 {
7537         struct trace_array *tr = &global_trace;
7538
7539         /* The top level trace array uses  NULL as parent */
7540         if (tr->dir)
7541                 return NULL;
7542
7543         if (WARN_ON(!tracefs_initialized()) ||
7544                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7545                  WARN_ON(!debugfs_initialized())))
7546                 return ERR_PTR(-ENODEV);
7547
7548         /*
7549          * As there may still be users that expect the tracing
7550          * files to exist in debugfs/tracing, we must automount
7551          * the tracefs file system there, so older tools still
7552          * work with the newer kerenl.
7553          */
7554         tr->dir = debugfs_create_automount("tracing", NULL,
7555                                            trace_automount, NULL);
7556         if (!tr->dir) {
7557                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7558                 return ERR_PTR(-ENOMEM);
7559         }
7560
7561         return NULL;
7562 }
7563
7564 extern struct trace_enum_map *__start_ftrace_enum_maps[];
7565 extern struct trace_enum_map *__stop_ftrace_enum_maps[];
7566
7567 static void __init trace_enum_init(void)
7568 {
7569         int len;
7570
7571         len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
7572         trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
7573 }
7574
7575 #ifdef CONFIG_MODULES
7576 static void trace_module_add_enums(struct module *mod)
7577 {
7578         if (!mod->num_trace_enums)
7579                 return;
7580
7581         /*
7582          * Modules with bad taint do not have events created, do
7583          * not bother with enums either.
7584          */
7585         if (trace_module_has_bad_taint(mod))
7586                 return;
7587
7588         trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
7589 }
7590
7591 #ifdef CONFIG_TRACE_ENUM_MAP_FILE
7592 static void trace_module_remove_enums(struct module *mod)
7593 {
7594         union trace_enum_map_item *map;
7595         union trace_enum_map_item **last = &trace_enum_maps;
7596
7597         if (!mod->num_trace_enums)
7598                 return;
7599
7600         mutex_lock(&trace_enum_mutex);
7601
7602         map = trace_enum_maps;
7603
7604         while (map) {
7605                 if (map->head.mod == mod)
7606                         break;
7607                 map = trace_enum_jmp_to_tail(map);
7608                 last = &map->tail.next;
7609                 map = map->tail.next;
7610         }
7611         if (!map)
7612                 goto out;
7613
7614         *last = trace_enum_jmp_to_tail(map)->tail.next;
7615         kfree(map);
7616  out:
7617         mutex_unlock(&trace_enum_mutex);
7618 }
7619 #else
7620 static inline void trace_module_remove_enums(struct module *mod) { }
7621 #endif /* CONFIG_TRACE_ENUM_MAP_FILE */
7622
7623 static int trace_module_notify(struct notifier_block *self,
7624                                unsigned long val, void *data)
7625 {
7626         struct module *mod = data;
7627
7628         switch (val) {
7629         case MODULE_STATE_COMING:
7630                 trace_module_add_enums(mod);
7631                 break;
7632         case MODULE_STATE_GOING:
7633                 trace_module_remove_enums(mod);
7634                 break;
7635         }
7636
7637         return 0;
7638 }
7639
7640 static struct notifier_block trace_module_nb = {
7641         .notifier_call = trace_module_notify,
7642         .priority = 0,
7643 };
7644 #endif /* CONFIG_MODULES */
7645
7646 static __init int tracer_init_tracefs(void)
7647 {
7648         struct dentry *d_tracer;
7649
7650         trace_access_lock_init();
7651
7652         d_tracer = tracing_init_dentry();
7653         if (IS_ERR(d_tracer))
7654                 return 0;
7655
7656         init_tracer_tracefs(&global_trace, d_tracer);
7657         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7658
7659         trace_create_file("tracing_thresh", 0644, d_tracer,
7660                         &global_trace, &tracing_thresh_fops);
7661
7662         trace_create_file("README", 0444, d_tracer,
7663                         NULL, &tracing_readme_fops);
7664
7665         trace_create_file("saved_cmdlines", 0444, d_tracer,
7666                         NULL, &tracing_saved_cmdlines_fops);
7667
7668         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7669                           NULL, &tracing_saved_cmdlines_size_fops);
7670
7671         trace_enum_init();
7672
7673         trace_create_enum_file(d_tracer);
7674
7675 #ifdef CONFIG_MODULES
7676         register_module_notifier(&trace_module_nb);
7677 #endif
7678
7679 #ifdef CONFIG_DYNAMIC_FTRACE
7680         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7681                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7682 #endif
7683
7684         create_trace_instances(d_tracer);
7685
7686         update_tracer_options(&global_trace);
7687
7688         return 0;
7689 }
7690
7691 static int trace_panic_handler(struct notifier_block *this,
7692                                unsigned long event, void *unused)
7693 {
7694         if (ftrace_dump_on_oops)
7695                 ftrace_dump(ftrace_dump_on_oops);
7696         return NOTIFY_OK;
7697 }
7698
7699 static struct notifier_block trace_panic_notifier = {
7700         .notifier_call  = trace_panic_handler,
7701         .next           = NULL,
7702         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7703 };
7704
7705 static int trace_die_handler(struct notifier_block *self,
7706                              unsigned long val,
7707                              void *data)
7708 {
7709         switch (val) {
7710         case DIE_OOPS:
7711                 if (ftrace_dump_on_oops)
7712                         ftrace_dump(ftrace_dump_on_oops);
7713                 break;
7714         default:
7715                 break;
7716         }
7717         return NOTIFY_OK;
7718 }
7719
7720 static struct notifier_block trace_die_notifier = {
7721         .notifier_call = trace_die_handler,
7722         .priority = 200
7723 };
7724
7725 /*
7726  * printk is set to max of 1024, we really don't need it that big.
7727  * Nothing should be printing 1000 characters anyway.
7728  */
7729 #define TRACE_MAX_PRINT         1000
7730
7731 /*
7732  * Define here KERN_TRACE so that we have one place to modify
7733  * it if we decide to change what log level the ftrace dump
7734  * should be at.
7735  */
7736 #define KERN_TRACE              KERN_EMERG
7737
7738 void
7739 trace_printk_seq(struct trace_seq *s)
7740 {
7741         /* Probably should print a warning here. */
7742         if (s->seq.len >= TRACE_MAX_PRINT)
7743                 s->seq.len = TRACE_MAX_PRINT;
7744
7745         /*
7746          * More paranoid code. Although the buffer size is set to
7747          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7748          * an extra layer of protection.
7749          */
7750         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7751                 s->seq.len = s->seq.size - 1;
7752
7753         /* should be zero ended, but we are paranoid. */
7754         s->buffer[s->seq.len] = 0;
7755
7756         printk(KERN_TRACE "%s", s->buffer);
7757
7758         trace_seq_init(s);
7759 }
7760
7761 void trace_init_global_iter(struct trace_iterator *iter)
7762 {
7763         iter->tr = &global_trace;
7764         iter->trace = iter->tr->current_trace;
7765         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7766         iter->trace_buffer = &global_trace.trace_buffer;
7767
7768         if (iter->trace && iter->trace->open)
7769                 iter->trace->open(iter);
7770
7771         /* Annotate start of buffers if we had overruns */
7772         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7773                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7774
7775         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7776         if (trace_clocks[iter->tr->clock_id].in_ns)
7777                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7778 }
7779
7780 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7781 {
7782         /* use static because iter can be a bit big for the stack */
7783         static struct trace_iterator iter;
7784         static atomic_t dump_running;
7785         struct trace_array *tr = &global_trace;
7786         unsigned int old_userobj;
7787         unsigned long flags;
7788         int cnt = 0, cpu;
7789
7790         /* Only allow one dump user at a time. */
7791         if (atomic_inc_return(&dump_running) != 1) {
7792                 atomic_dec(&dump_running);
7793                 return;
7794         }
7795
7796         /*
7797          * Always turn off tracing when we dump.
7798          * We don't need to show trace output of what happens
7799          * between multiple crashes.
7800          *
7801          * If the user does a sysrq-z, then they can re-enable
7802          * tracing with echo 1 > tracing_on.
7803          */
7804         tracing_off();
7805
7806         local_irq_save(flags);
7807
7808         /* Simulate the iterator */
7809         trace_init_global_iter(&iter);
7810
7811         for_each_tracing_cpu(cpu) {
7812                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7813         }
7814
7815         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7816
7817         /* don't look at user memory in panic mode */
7818         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7819
7820         switch (oops_dump_mode) {
7821         case DUMP_ALL:
7822                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7823                 break;
7824         case DUMP_ORIG:
7825                 iter.cpu_file = raw_smp_processor_id();
7826                 break;
7827         case DUMP_NONE:
7828                 goto out_enable;
7829         default:
7830                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7831                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7832         }
7833
7834         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7835
7836         /* Did function tracer already get disabled? */
7837         if (ftrace_is_dead()) {
7838                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7839                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7840         }
7841
7842         /*
7843          * We need to stop all tracing on all CPUS to read the
7844          * the next buffer. This is a bit expensive, but is
7845          * not done often. We fill all what we can read,
7846          * and then release the locks again.
7847          */
7848
7849         while (!trace_empty(&iter)) {
7850
7851                 if (!cnt)
7852                         printk(KERN_TRACE "---------------------------------\n");
7853
7854                 cnt++;
7855
7856                 /* reset all but tr, trace, and overruns */
7857                 memset(&iter.seq, 0,
7858                        sizeof(struct trace_iterator) -
7859                        offsetof(struct trace_iterator, seq));
7860                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
7861                 iter.pos = -1;
7862
7863                 if (trace_find_next_entry_inc(&iter) != NULL) {
7864                         int ret;
7865
7866                         ret = print_trace_line(&iter);
7867                         if (ret != TRACE_TYPE_NO_CONSUME)
7868                                 trace_consume(&iter);
7869                 }
7870                 touch_nmi_watchdog();
7871
7872                 trace_printk_seq(&iter.seq);
7873         }
7874
7875         if (!cnt)
7876                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
7877         else
7878                 printk(KERN_TRACE "---------------------------------\n");
7879
7880  out_enable:
7881         tr->trace_flags |= old_userobj;
7882
7883         for_each_tracing_cpu(cpu) {
7884                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7885         }
7886         atomic_dec(&dump_running);
7887         local_irq_restore(flags);
7888 }
7889 EXPORT_SYMBOL_GPL(ftrace_dump);
7890
7891 __init static int tracer_alloc_buffers(void)
7892 {
7893         int ring_buf_size;
7894         int ret = -ENOMEM;
7895
7896         /*
7897          * Make sure we don't accidently add more trace options
7898          * than we have bits for.
7899          */
7900         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7901
7902         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
7903                 goto out;
7904
7905         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7906                 goto out_free_buffer_mask;
7907
7908         /* Only allocate trace_printk buffers if a trace_printk exists */
7909         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7910                 /* Must be called before global_trace.buffer is allocated */
7911                 trace_printk_init_buffers();
7912
7913         /* To save memory, keep the ring buffer size to its minimum */
7914         if (ring_buffer_expanded)
7915                 ring_buf_size = trace_buf_size;
7916         else
7917                 ring_buf_size = 1;
7918
7919         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7920         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7921
7922         raw_spin_lock_init(&global_trace.start_lock);
7923
7924         /*
7925          * The prepare callbacks allocates some memory for the ring buffer. We
7926          * don't free the buffer if the if the CPU goes down. If we were to free
7927          * the buffer, then the user would lose any trace that was in the
7928          * buffer. The memory will be removed once the "instance" is removed.
7929          */
7930         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
7931                                       "trace/RB:preapre", trace_rb_cpu_prepare,
7932                                       NULL);
7933         if (ret < 0)
7934                 goto out_free_cpumask;
7935         /* Used for event triggers */
7936         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
7937         if (!temp_buffer)
7938                 goto out_rm_hp_state;
7939
7940         if (trace_create_savedcmd() < 0)
7941                 goto out_free_temp_buffer;
7942
7943         /* TODO: make the number of buffers hot pluggable with CPUS */
7944         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7945                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
7946                 WARN_ON(1);
7947                 goto out_free_savedcmd;
7948         }
7949
7950         if (global_trace.buffer_disabled)
7951                 tracing_off();
7952
7953         if (trace_boot_clock) {
7954                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
7955                 if (ret < 0)
7956                         pr_warn("Trace clock %s not defined, going back to default\n",
7957                                 trace_boot_clock);
7958         }
7959
7960         /*
7961          * register_tracer() might reference current_trace, so it
7962          * needs to be set before we register anything. This is
7963          * just a bootstrap of current_trace anyway.
7964          */
7965         global_trace.current_trace = &nop_trace;
7966
7967         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7968
7969         ftrace_init_global_array_ops(&global_trace);
7970
7971         init_trace_flags_index(&global_trace);
7972
7973         register_tracer(&nop_trace);
7974
7975         /* All seems OK, enable tracing */
7976         tracing_disabled = 0;
7977
7978         atomic_notifier_chain_register(&panic_notifier_list,
7979                                        &trace_panic_notifier);
7980
7981         register_die_notifier(&trace_die_notifier);
7982
7983         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
7984
7985         INIT_LIST_HEAD(&global_trace.systems);
7986         INIT_LIST_HEAD(&global_trace.events);
7987         list_add(&global_trace.list, &ftrace_trace_arrays);
7988
7989         apply_trace_boot_options();
7990
7991         register_snapshot_cmd();
7992
7993         return 0;
7994
7995 out_free_savedcmd:
7996         free_saved_cmdlines_buffer(savedcmd);
7997 out_free_temp_buffer:
7998         ring_buffer_free(temp_buffer);
7999 out_rm_hp_state:
8000         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8001 out_free_cpumask:
8002         free_cpumask_var(global_trace.tracing_cpumask);
8003 out_free_buffer_mask:
8004         free_cpumask_var(tracing_buffer_mask);
8005 out:
8006         return ret;
8007 }
8008
8009 void __init trace_init(void)
8010 {
8011         if (tracepoint_printk) {
8012                 tracepoint_print_iter =
8013                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8014                 if (WARN_ON(!tracepoint_print_iter))
8015                         tracepoint_printk = 0;
8016                 else
8017                         static_key_enable(&tracepoint_printk_key.key);
8018         }
8019         tracer_alloc_buffers();
8020         trace_event_init();
8021 }
8022
8023 __init static int clear_boot_tracer(void)
8024 {
8025         /*
8026          * The default tracer at boot buffer is an init section.
8027          * This function is called in lateinit. If we did not
8028          * find the boot tracer, then clear it out, to prevent
8029          * later registration from accessing the buffer that is
8030          * about to be freed.
8031          */
8032         if (!default_bootup_tracer)
8033                 return 0;
8034
8035         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8036                default_bootup_tracer);
8037         default_bootup_tracer = NULL;
8038
8039         return 0;
8040 }
8041
8042 fs_initcall(tracer_init_tracefs);
8043 late_initcall(clear_boot_tracer);