]> git.karo-electronics.de Git - linux-beck.git/blob - kernel/trace/trace.c
Merge branch 'trace/ftrace/urgent' into trace/ftrace/core
[linux-beck.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/pagemap.h>
24 #include <linux/hardirq.h>
25 #include <linux/linkage.h>
26 #include <linux/uaccess.h>
27 #include <linux/kprobes.h>
28 #include <linux/ftrace.h>
29 #include <linux/module.h>
30 #include <linux/percpu.h>
31 #include <linux/splice.h>
32 #include <linux/kdebug.h>
33 #include <linux/string.h>
34 #include <linux/rwsem.h>
35 #include <linux/slab.h>
36 #include <linux/ctype.h>
37 #include <linux/init.h>
38 #include <linux/poll.h>
39 #include <linux/nmi.h>
40 #include <linux/fs.h>
41 #include <linux/sched/rt.h>
42
43 #include "trace.h"
44 #include "trace_output.h"
45
46 /*
47  * On boot up, the ring buffer is set to the minimum size, so that
48  * we do not waste memory on systems that are not using tracing.
49  */
50 bool ring_buffer_expanded;
51
52 /*
53  * We need to change this state when a selftest is running.
54  * A selftest will lurk into the ring-buffer to count the
55  * entries inserted during the selftest although some concurrent
56  * insertions into the ring-buffer such as trace_printk could occurred
57  * at the same time, giving false positive or negative results.
58  */
59 static bool __read_mostly tracing_selftest_running;
60
61 /*
62  * If a tracer is running, we do not want to run SELFTEST.
63  */
64 bool __read_mostly tracing_selftest_disabled;
65
66 /* For tracers that don't implement custom flags */
67 static struct tracer_opt dummy_tracer_opt[] = {
68         { }
69 };
70
71 static struct tracer_flags dummy_tracer_flags = {
72         .val = 0,
73         .opts = dummy_tracer_opt
74 };
75
76 static int
77 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
78 {
79         return 0;
80 }
81
82 /*
83  * To prevent the comm cache from being overwritten when no
84  * tracing is active, only save the comm when a trace event
85  * occurred.
86  */
87 static DEFINE_PER_CPU(bool, trace_cmdline_save);
88
89 /*
90  * Kill all tracing for good (never come back).
91  * It is initialized to 1 but will turn to zero if the initialization
92  * of the tracer is successful. But that is the only place that sets
93  * this back to zero.
94  */
95 static int tracing_disabled = 1;
96
97 DEFINE_PER_CPU(int, ftrace_cpu_disabled);
98
99 cpumask_var_t __read_mostly     tracing_buffer_mask;
100
101 /*
102  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
103  *
104  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
105  * is set, then ftrace_dump is called. This will output the contents
106  * of the ftrace buffers to the console.  This is very useful for
107  * capturing traces that lead to crashes and outputing it to a
108  * serial console.
109  *
110  * It is default off, but you can enable it with either specifying
111  * "ftrace_dump_on_oops" in the kernel command line, or setting
112  * /proc/sys/kernel/ftrace_dump_on_oops
113  * Set 1 if you want to dump buffers of all CPUs
114  * Set 2 if you want to dump the buffer of the CPU that triggered oops
115  */
116
117 enum ftrace_dump_mode ftrace_dump_on_oops;
118
119 /* When set, tracing will stop when a WARN*() is hit */
120 int __disable_trace_on_warning;
121
122 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
123
124 #define MAX_TRACER_SIZE         100
125 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
126 static char *default_bootup_tracer;
127
128 static bool allocate_snapshot;
129
130 static int __init set_cmdline_ftrace(char *str)
131 {
132         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
133         default_bootup_tracer = bootup_tracer_buf;
134         /* We are using ftrace early, expand it */
135         ring_buffer_expanded = true;
136         return 1;
137 }
138 __setup("ftrace=", set_cmdline_ftrace);
139
140 static int __init set_ftrace_dump_on_oops(char *str)
141 {
142         if (*str++ != '=' || !*str) {
143                 ftrace_dump_on_oops = DUMP_ALL;
144                 return 1;
145         }
146
147         if (!strcmp("orig_cpu", str)) {
148                 ftrace_dump_on_oops = DUMP_ORIG;
149                 return 1;
150         }
151
152         return 0;
153 }
154 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
155
156 static int __init stop_trace_on_warning(char *str)
157 {
158         __disable_trace_on_warning = 1;
159         return 1;
160 }
161 __setup("traceoff_on_warning=", stop_trace_on_warning);
162
163 static int __init boot_alloc_snapshot(char *str)
164 {
165         allocate_snapshot = true;
166         /* We also need the main ring buffer expanded */
167         ring_buffer_expanded = true;
168         return 1;
169 }
170 __setup("alloc_snapshot", boot_alloc_snapshot);
171
172
173 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
174 static char *trace_boot_options __initdata;
175
176 static int __init set_trace_boot_options(char *str)
177 {
178         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
179         trace_boot_options = trace_boot_options_buf;
180         return 0;
181 }
182 __setup("trace_options=", set_trace_boot_options);
183
184 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
185 static char *trace_boot_clock __initdata;
186
187 static int __init set_trace_boot_clock(char *str)
188 {
189         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
190         trace_boot_clock = trace_boot_clock_buf;
191         return 0;
192 }
193 __setup("trace_clock=", set_trace_boot_clock);
194
195
196 unsigned long long ns2usecs(cycle_t nsec)
197 {
198         nsec += 500;
199         do_div(nsec, 1000);
200         return nsec;
201 }
202
203 /*
204  * The global_trace is the descriptor that holds the tracing
205  * buffers for the live tracing. For each CPU, it contains
206  * a link list of pages that will store trace entries. The
207  * page descriptor of the pages in the memory is used to hold
208  * the link list by linking the lru item in the page descriptor
209  * to each of the pages in the buffer per CPU.
210  *
211  * For each active CPU there is a data field that holds the
212  * pages for the buffer for that CPU. Each CPU has the same number
213  * of pages allocated for its buffer.
214  */
215 static struct trace_array       global_trace;
216
217 LIST_HEAD(ftrace_trace_arrays);
218
219 int trace_array_get(struct trace_array *this_tr)
220 {
221         struct trace_array *tr;
222         int ret = -ENODEV;
223
224         mutex_lock(&trace_types_lock);
225         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
226                 if (tr == this_tr) {
227                         tr->ref++;
228                         ret = 0;
229                         break;
230                 }
231         }
232         mutex_unlock(&trace_types_lock);
233
234         return ret;
235 }
236
237 static void __trace_array_put(struct trace_array *this_tr)
238 {
239         WARN_ON(!this_tr->ref);
240         this_tr->ref--;
241 }
242
243 void trace_array_put(struct trace_array *this_tr)
244 {
245         mutex_lock(&trace_types_lock);
246         __trace_array_put(this_tr);
247         mutex_unlock(&trace_types_lock);
248 }
249
250 int filter_check_discard(struct ftrace_event_file *file, void *rec,
251                          struct ring_buffer *buffer,
252                          struct ring_buffer_event *event)
253 {
254         if (unlikely(file->flags & FTRACE_EVENT_FL_FILTERED) &&
255             !filter_match_preds(file->filter, rec)) {
256                 ring_buffer_discard_commit(buffer, event);
257                 return 1;
258         }
259
260         return 0;
261 }
262 EXPORT_SYMBOL_GPL(filter_check_discard);
263
264 int call_filter_check_discard(struct ftrace_event_call *call, void *rec,
265                               struct ring_buffer *buffer,
266                               struct ring_buffer_event *event)
267 {
268         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
269             !filter_match_preds(call->filter, rec)) {
270                 ring_buffer_discard_commit(buffer, event);
271                 return 1;
272         }
273
274         return 0;
275 }
276 EXPORT_SYMBOL_GPL(call_filter_check_discard);
277
278 static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
279 {
280         u64 ts;
281
282         /* Early boot up does not have a buffer yet */
283         if (!buf->buffer)
284                 return trace_clock_local();
285
286         ts = ring_buffer_time_stamp(buf->buffer, cpu);
287         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
288
289         return ts;
290 }
291
292 cycle_t ftrace_now(int cpu)
293 {
294         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
295 }
296
297 /**
298  * tracing_is_enabled - Show if global_trace has been disabled
299  *
300  * Shows if the global trace has been enabled or not. It uses the
301  * mirror flag "buffer_disabled" to be used in fast paths such as for
302  * the irqsoff tracer. But it may be inaccurate due to races. If you
303  * need to know the accurate state, use tracing_is_on() which is a little
304  * slower, but accurate.
305  */
306 int tracing_is_enabled(void)
307 {
308         /*
309          * For quick access (irqsoff uses this in fast path), just
310          * return the mirror variable of the state of the ring buffer.
311          * It's a little racy, but we don't really care.
312          */
313         smp_rmb();
314         return !global_trace.buffer_disabled;
315 }
316
317 /*
318  * trace_buf_size is the size in bytes that is allocated
319  * for a buffer. Note, the number of bytes is always rounded
320  * to page size.
321  *
322  * This number is purposely set to a low number of 16384.
323  * If the dump on oops happens, it will be much appreciated
324  * to not have to wait for all that output. Anyway this can be
325  * boot time and run time configurable.
326  */
327 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
328
329 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
330
331 /* trace_types holds a link list of available tracers. */
332 static struct tracer            *trace_types __read_mostly;
333
334 /*
335  * trace_types_lock is used to protect the trace_types list.
336  */
337 DEFINE_MUTEX(trace_types_lock);
338
339 /*
340  * serialize the access of the ring buffer
341  *
342  * ring buffer serializes readers, but it is low level protection.
343  * The validity of the events (which returns by ring_buffer_peek() ..etc)
344  * are not protected by ring buffer.
345  *
346  * The content of events may become garbage if we allow other process consumes
347  * these events concurrently:
348  *   A) the page of the consumed events may become a normal page
349  *      (not reader page) in ring buffer, and this page will be rewrited
350  *      by events producer.
351  *   B) The page of the consumed events may become a page for splice_read,
352  *      and this page will be returned to system.
353  *
354  * These primitives allow multi process access to different cpu ring buffer
355  * concurrently.
356  *
357  * These primitives don't distinguish read-only and read-consume access.
358  * Multi read-only access are also serialized.
359  */
360
361 #ifdef CONFIG_SMP
362 static DECLARE_RWSEM(all_cpu_access_lock);
363 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
364
365 static inline void trace_access_lock(int cpu)
366 {
367         if (cpu == RING_BUFFER_ALL_CPUS) {
368                 /* gain it for accessing the whole ring buffer. */
369                 down_write(&all_cpu_access_lock);
370         } else {
371                 /* gain it for accessing a cpu ring buffer. */
372
373                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
374                 down_read(&all_cpu_access_lock);
375
376                 /* Secondly block other access to this @cpu ring buffer. */
377                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
378         }
379 }
380
381 static inline void trace_access_unlock(int cpu)
382 {
383         if (cpu == RING_BUFFER_ALL_CPUS) {
384                 up_write(&all_cpu_access_lock);
385         } else {
386                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
387                 up_read(&all_cpu_access_lock);
388         }
389 }
390
391 static inline void trace_access_lock_init(void)
392 {
393         int cpu;
394
395         for_each_possible_cpu(cpu)
396                 mutex_init(&per_cpu(cpu_access_lock, cpu));
397 }
398
399 #else
400
401 static DEFINE_MUTEX(access_lock);
402
403 static inline void trace_access_lock(int cpu)
404 {
405         (void)cpu;
406         mutex_lock(&access_lock);
407 }
408
409 static inline void trace_access_unlock(int cpu)
410 {
411         (void)cpu;
412         mutex_unlock(&access_lock);
413 }
414
415 static inline void trace_access_lock_init(void)
416 {
417 }
418
419 #endif
420
421 /* trace_flags holds trace_options default values */
422 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
423         TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
424         TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |
425         TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | TRACE_ITER_FUNCTION;
426
427 static void tracer_tracing_on(struct trace_array *tr)
428 {
429         if (tr->trace_buffer.buffer)
430                 ring_buffer_record_on(tr->trace_buffer.buffer);
431         /*
432          * This flag is looked at when buffers haven't been allocated
433          * yet, or by some tracers (like irqsoff), that just want to
434          * know if the ring buffer has been disabled, but it can handle
435          * races of where it gets disabled but we still do a record.
436          * As the check is in the fast path of the tracers, it is more
437          * important to be fast than accurate.
438          */
439         tr->buffer_disabled = 0;
440         /* Make the flag seen by readers */
441         smp_wmb();
442 }
443
444 /**
445  * tracing_on - enable tracing buffers
446  *
447  * This function enables tracing buffers that may have been
448  * disabled with tracing_off.
449  */
450 void tracing_on(void)
451 {
452         tracer_tracing_on(&global_trace);
453 }
454 EXPORT_SYMBOL_GPL(tracing_on);
455
456 /**
457  * __trace_puts - write a constant string into the trace buffer.
458  * @ip:    The address of the caller
459  * @str:   The constant string to write
460  * @size:  The size of the string.
461  */
462 int __trace_puts(unsigned long ip, const char *str, int size)
463 {
464         struct ring_buffer_event *event;
465         struct ring_buffer *buffer;
466         struct print_entry *entry;
467         unsigned long irq_flags;
468         int alloc;
469
470         if (unlikely(tracing_selftest_running || tracing_disabled))
471                 return 0;
472
473         alloc = sizeof(*entry) + size + 2; /* possible \n added */
474
475         local_save_flags(irq_flags);
476         buffer = global_trace.trace_buffer.buffer;
477         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
478                                           irq_flags, preempt_count());
479         if (!event)
480                 return 0;
481
482         entry = ring_buffer_event_data(event);
483         entry->ip = ip;
484
485         memcpy(&entry->buf, str, size);
486
487         /* Add a newline if necessary */
488         if (entry->buf[size - 1] != '\n') {
489                 entry->buf[size] = '\n';
490                 entry->buf[size + 1] = '\0';
491         } else
492                 entry->buf[size] = '\0';
493
494         __buffer_unlock_commit(buffer, event);
495
496         return size;
497 }
498 EXPORT_SYMBOL_GPL(__trace_puts);
499
500 /**
501  * __trace_bputs - write the pointer to a constant string into trace buffer
502  * @ip:    The address of the caller
503  * @str:   The constant string to write to the buffer to
504  */
505 int __trace_bputs(unsigned long ip, const char *str)
506 {
507         struct ring_buffer_event *event;
508         struct ring_buffer *buffer;
509         struct bputs_entry *entry;
510         unsigned long irq_flags;
511         int size = sizeof(struct bputs_entry);
512
513         if (unlikely(tracing_selftest_running || tracing_disabled))
514                 return 0;
515
516         local_save_flags(irq_flags);
517         buffer = global_trace.trace_buffer.buffer;
518         event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
519                                           irq_flags, preempt_count());
520         if (!event)
521                 return 0;
522
523         entry = ring_buffer_event_data(event);
524         entry->ip                       = ip;
525         entry->str                      = str;
526
527         __buffer_unlock_commit(buffer, event);
528
529         return 1;
530 }
531 EXPORT_SYMBOL_GPL(__trace_bputs);
532
533 #ifdef CONFIG_TRACER_SNAPSHOT
534 /**
535  * trace_snapshot - take a snapshot of the current buffer.
536  *
537  * This causes a swap between the snapshot buffer and the current live
538  * tracing buffer. You can use this to take snapshots of the live
539  * trace when some condition is triggered, but continue to trace.
540  *
541  * Note, make sure to allocate the snapshot with either
542  * a tracing_snapshot_alloc(), or by doing it manually
543  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
544  *
545  * If the snapshot buffer is not allocated, it will stop tracing.
546  * Basically making a permanent snapshot.
547  */
548 void tracing_snapshot(void)
549 {
550         struct trace_array *tr = &global_trace;
551         struct tracer *tracer = tr->current_trace;
552         unsigned long flags;
553
554         if (in_nmi()) {
555                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
556                 internal_trace_puts("*** snapshot is being ignored        ***\n");
557                 return;
558         }
559
560         if (!tr->allocated_snapshot) {
561                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
562                 internal_trace_puts("*** stopping trace here!   ***\n");
563                 tracing_off();
564                 return;
565         }
566
567         /* Note, snapshot can not be used when the tracer uses it */
568         if (tracer->use_max_tr) {
569                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
570                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
571                 return;
572         }
573
574         local_irq_save(flags);
575         update_max_tr(tr, current, smp_processor_id());
576         local_irq_restore(flags);
577 }
578 EXPORT_SYMBOL_GPL(tracing_snapshot);
579
580 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
581                                         struct trace_buffer *size_buf, int cpu_id);
582 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
583
584 static int alloc_snapshot(struct trace_array *tr)
585 {
586         int ret;
587
588         if (!tr->allocated_snapshot) {
589
590                 /* allocate spare buffer */
591                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
592                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
593                 if (ret < 0)
594                         return ret;
595
596                 tr->allocated_snapshot = true;
597         }
598
599         return 0;
600 }
601
602 static void free_snapshot(struct trace_array *tr)
603 {
604         /*
605          * We don't free the ring buffer. instead, resize it because
606          * The max_tr ring buffer has some state (e.g. ring->clock) and
607          * we want preserve it.
608          */
609         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
610         set_buffer_entries(&tr->max_buffer, 1);
611         tracing_reset_online_cpus(&tr->max_buffer);
612         tr->allocated_snapshot = false;
613 }
614
615 /**
616  * tracing_alloc_snapshot - allocate snapshot buffer.
617  *
618  * This only allocates the snapshot buffer if it isn't already
619  * allocated - it doesn't also take a snapshot.
620  *
621  * This is meant to be used in cases where the snapshot buffer needs
622  * to be set up for events that can't sleep but need to be able to
623  * trigger a snapshot.
624  */
625 int tracing_alloc_snapshot(void)
626 {
627         struct trace_array *tr = &global_trace;
628         int ret;
629
630         ret = alloc_snapshot(tr);
631         WARN_ON(ret < 0);
632
633         return ret;
634 }
635 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
636
637 /**
638  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
639  *
640  * This is similar to trace_snapshot(), but it will allocate the
641  * snapshot buffer if it isn't already allocated. Use this only
642  * where it is safe to sleep, as the allocation may sleep.
643  *
644  * This causes a swap between the snapshot buffer and the current live
645  * tracing buffer. You can use this to take snapshots of the live
646  * trace when some condition is triggered, but continue to trace.
647  */
648 void tracing_snapshot_alloc(void)
649 {
650         int ret;
651
652         ret = tracing_alloc_snapshot();
653         if (ret < 0)
654                 return;
655
656         tracing_snapshot();
657 }
658 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
659 #else
660 void tracing_snapshot(void)
661 {
662         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
663 }
664 EXPORT_SYMBOL_GPL(tracing_snapshot);
665 int tracing_alloc_snapshot(void)
666 {
667         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
668         return -ENODEV;
669 }
670 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
671 void tracing_snapshot_alloc(void)
672 {
673         /* Give warning */
674         tracing_snapshot();
675 }
676 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
677 #endif /* CONFIG_TRACER_SNAPSHOT */
678
679 static void tracer_tracing_off(struct trace_array *tr)
680 {
681         if (tr->trace_buffer.buffer)
682                 ring_buffer_record_off(tr->trace_buffer.buffer);
683         /*
684          * This flag is looked at when buffers haven't been allocated
685          * yet, or by some tracers (like irqsoff), that just want to
686          * know if the ring buffer has been disabled, but it can handle
687          * races of where it gets disabled but we still do a record.
688          * As the check is in the fast path of the tracers, it is more
689          * important to be fast than accurate.
690          */
691         tr->buffer_disabled = 1;
692         /* Make the flag seen by readers */
693         smp_wmb();
694 }
695
696 /**
697  * tracing_off - turn off tracing buffers
698  *
699  * This function stops the tracing buffers from recording data.
700  * It does not disable any overhead the tracers themselves may
701  * be causing. This function simply causes all recording to
702  * the ring buffers to fail.
703  */
704 void tracing_off(void)
705 {
706         tracer_tracing_off(&global_trace);
707 }
708 EXPORT_SYMBOL_GPL(tracing_off);
709
710 void disable_trace_on_warning(void)
711 {
712         if (__disable_trace_on_warning)
713                 tracing_off();
714 }
715
716 /**
717  * tracer_tracing_is_on - show real state of ring buffer enabled
718  * @tr : the trace array to know if ring buffer is enabled
719  *
720  * Shows real state of the ring buffer if it is enabled or not.
721  */
722 static int tracer_tracing_is_on(struct trace_array *tr)
723 {
724         if (tr->trace_buffer.buffer)
725                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
726         return !tr->buffer_disabled;
727 }
728
729 /**
730  * tracing_is_on - show state of ring buffers enabled
731  */
732 int tracing_is_on(void)
733 {
734         return tracer_tracing_is_on(&global_trace);
735 }
736 EXPORT_SYMBOL_GPL(tracing_is_on);
737
738 static int __init set_buf_size(char *str)
739 {
740         unsigned long buf_size;
741
742         if (!str)
743                 return 0;
744         buf_size = memparse(str, &str);
745         /* nr_entries can not be zero */
746         if (buf_size == 0)
747                 return 0;
748         trace_buf_size = buf_size;
749         return 1;
750 }
751 __setup("trace_buf_size=", set_buf_size);
752
753 static int __init set_tracing_thresh(char *str)
754 {
755         unsigned long threshold;
756         int ret;
757
758         if (!str)
759                 return 0;
760         ret = kstrtoul(str, 0, &threshold);
761         if (ret < 0)
762                 return 0;
763         tracing_thresh = threshold * 1000;
764         return 1;
765 }
766 __setup("tracing_thresh=", set_tracing_thresh);
767
768 unsigned long nsecs_to_usecs(unsigned long nsecs)
769 {
770         return nsecs / 1000;
771 }
772
773 /* These must match the bit postions in trace_iterator_flags */
774 static const char *trace_options[] = {
775         "print-parent",
776         "sym-offset",
777         "sym-addr",
778         "verbose",
779         "raw",
780         "hex",
781         "bin",
782         "block",
783         "stacktrace",
784         "trace_printk",
785         "ftrace_preempt",
786         "branch",
787         "annotate",
788         "userstacktrace",
789         "sym-userobj",
790         "printk-msg-only",
791         "context-info",
792         "latency-format",
793         "sleep-time",
794         "graph-time",
795         "record-cmd",
796         "overwrite",
797         "disable_on_free",
798         "irq-info",
799         "markers",
800         "function-trace",
801         NULL
802 };
803
804 static struct {
805         u64 (*func)(void);
806         const char *name;
807         int in_ns;              /* is this clock in nanoseconds? */
808 } trace_clocks[] = {
809         { trace_clock_local,    "local",        1 },
810         { trace_clock_global,   "global",       1 },
811         { trace_clock_counter,  "counter",      0 },
812         { trace_clock_jiffies,  "uptime",       1 },
813         { trace_clock,          "perf",         1 },
814         ARCH_TRACE_CLOCKS
815 };
816
817 /*
818  * trace_parser_get_init - gets the buffer for trace parser
819  */
820 int trace_parser_get_init(struct trace_parser *parser, int size)
821 {
822         memset(parser, 0, sizeof(*parser));
823
824         parser->buffer = kmalloc(size, GFP_KERNEL);
825         if (!parser->buffer)
826                 return 1;
827
828         parser->size = size;
829         return 0;
830 }
831
832 /*
833  * trace_parser_put - frees the buffer for trace parser
834  */
835 void trace_parser_put(struct trace_parser *parser)
836 {
837         kfree(parser->buffer);
838 }
839
840 /*
841  * trace_get_user - reads the user input string separated by  space
842  * (matched by isspace(ch))
843  *
844  * For each string found the 'struct trace_parser' is updated,
845  * and the function returns.
846  *
847  * Returns number of bytes read.
848  *
849  * See kernel/trace/trace.h for 'struct trace_parser' details.
850  */
851 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
852         size_t cnt, loff_t *ppos)
853 {
854         char ch;
855         size_t read = 0;
856         ssize_t ret;
857
858         if (!*ppos)
859                 trace_parser_clear(parser);
860
861         ret = get_user(ch, ubuf++);
862         if (ret)
863                 goto out;
864
865         read++;
866         cnt--;
867
868         /*
869          * The parser is not finished with the last write,
870          * continue reading the user input without skipping spaces.
871          */
872         if (!parser->cont) {
873                 /* skip white space */
874                 while (cnt && isspace(ch)) {
875                         ret = get_user(ch, ubuf++);
876                         if (ret)
877                                 goto out;
878                         read++;
879                         cnt--;
880                 }
881
882                 /* only spaces were written */
883                 if (isspace(ch)) {
884                         *ppos += read;
885                         ret = read;
886                         goto out;
887                 }
888
889                 parser->idx = 0;
890         }
891
892         /* read the non-space input */
893         while (cnt && !isspace(ch)) {
894                 if (parser->idx < parser->size - 1)
895                         parser->buffer[parser->idx++] = ch;
896                 else {
897                         ret = -EINVAL;
898                         goto out;
899                 }
900                 ret = get_user(ch, ubuf++);
901                 if (ret)
902                         goto out;
903                 read++;
904                 cnt--;
905         }
906
907         /* We either got finished input or we have to wait for another call. */
908         if (isspace(ch)) {
909                 parser->buffer[parser->idx] = 0;
910                 parser->cont = false;
911         } else if (parser->idx < parser->size - 1) {
912                 parser->cont = true;
913                 parser->buffer[parser->idx++] = ch;
914         } else {
915                 ret = -EINVAL;
916                 goto out;
917         }
918
919         *ppos += read;
920         ret = read;
921
922 out:
923         return ret;
924 }
925
926 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
927 {
928         int len;
929
930         if (s->len <= s->readpos)
931                 return -EBUSY;
932
933         len = s->len - s->readpos;
934         if (cnt > len)
935                 cnt = len;
936         memcpy(buf, s->buffer + s->readpos, cnt);
937
938         s->readpos += cnt;
939         return cnt;
940 }
941
942 unsigned long __read_mostly     tracing_thresh;
943
944 #ifdef CONFIG_TRACER_MAX_TRACE
945 /*
946  * Copy the new maximum trace into the separate maximum-trace
947  * structure. (this way the maximum trace is permanently saved,
948  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
949  */
950 static void
951 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
952 {
953         struct trace_buffer *trace_buf = &tr->trace_buffer;
954         struct trace_buffer *max_buf = &tr->max_buffer;
955         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
956         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
957
958         max_buf->cpu = cpu;
959         max_buf->time_start = data->preempt_timestamp;
960
961         max_data->saved_latency = tr->max_latency;
962         max_data->critical_start = data->critical_start;
963         max_data->critical_end = data->critical_end;
964
965         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
966         max_data->pid = tsk->pid;
967         /*
968          * If tsk == current, then use current_uid(), as that does not use
969          * RCU. The irq tracer can be called out of RCU scope.
970          */
971         if (tsk == current)
972                 max_data->uid = current_uid();
973         else
974                 max_data->uid = task_uid(tsk);
975
976         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
977         max_data->policy = tsk->policy;
978         max_data->rt_priority = tsk->rt_priority;
979
980         /* record this tasks comm */
981         tracing_record_cmdline(tsk);
982 }
983
984 /**
985  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
986  * @tr: tracer
987  * @tsk: the task with the latency
988  * @cpu: The cpu that initiated the trace.
989  *
990  * Flip the buffers between the @tr and the max_tr and record information
991  * about which task was the cause of this latency.
992  */
993 void
994 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
995 {
996         struct ring_buffer *buf;
997
998         if (tr->stop_count)
999                 return;
1000
1001         WARN_ON_ONCE(!irqs_disabled());
1002
1003         if (!tr->allocated_snapshot) {
1004                 /* Only the nop tracer should hit this when disabling */
1005                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1006                 return;
1007         }
1008
1009         arch_spin_lock(&tr->max_lock);
1010
1011         buf = tr->trace_buffer.buffer;
1012         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1013         tr->max_buffer.buffer = buf;
1014
1015         __update_max_tr(tr, tsk, cpu);
1016         arch_spin_unlock(&tr->max_lock);
1017 }
1018
1019 /**
1020  * update_max_tr_single - only copy one trace over, and reset the rest
1021  * @tr - tracer
1022  * @tsk - task with the latency
1023  * @cpu - the cpu of the buffer to copy.
1024  *
1025  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1026  */
1027 void
1028 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1029 {
1030         int ret;
1031
1032         if (tr->stop_count)
1033                 return;
1034
1035         WARN_ON_ONCE(!irqs_disabled());
1036         if (!tr->allocated_snapshot) {
1037                 /* Only the nop tracer should hit this when disabling */
1038                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1039                 return;
1040         }
1041
1042         arch_spin_lock(&tr->max_lock);
1043
1044         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1045
1046         if (ret == -EBUSY) {
1047                 /*
1048                  * We failed to swap the buffer due to a commit taking
1049                  * place on this CPU. We fail to record, but we reset
1050                  * the max trace buffer (no one writes directly to it)
1051                  * and flag that it failed.
1052                  */
1053                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1054                         "Failed to swap buffers due to commit in progress\n");
1055         }
1056
1057         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1058
1059         __update_max_tr(tr, tsk, cpu);
1060         arch_spin_unlock(&tr->max_lock);
1061 }
1062 #endif /* CONFIG_TRACER_MAX_TRACE */
1063
1064 static int wait_on_pipe(struct trace_iterator *iter)
1065 {
1066         /* Iterators are static, they should be filled or empty */
1067         if (trace_buffer_iter(iter, iter->cpu_file))
1068                 return 0;
1069
1070         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
1071 }
1072
1073 #ifdef CONFIG_FTRACE_STARTUP_TEST
1074 static int run_tracer_selftest(struct tracer *type)
1075 {
1076         struct trace_array *tr = &global_trace;
1077         struct tracer *saved_tracer = tr->current_trace;
1078         int ret;
1079
1080         if (!type->selftest || tracing_selftest_disabled)
1081                 return 0;
1082
1083         /*
1084          * Run a selftest on this tracer.
1085          * Here we reset the trace buffer, and set the current
1086          * tracer to be this tracer. The tracer can then run some
1087          * internal tracing to verify that everything is in order.
1088          * If we fail, we do not register this tracer.
1089          */
1090         tracing_reset_online_cpus(&tr->trace_buffer);
1091
1092         tr->current_trace = type;
1093
1094 #ifdef CONFIG_TRACER_MAX_TRACE
1095         if (type->use_max_tr) {
1096                 /* If we expanded the buffers, make sure the max is expanded too */
1097                 if (ring_buffer_expanded)
1098                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1099                                            RING_BUFFER_ALL_CPUS);
1100                 tr->allocated_snapshot = true;
1101         }
1102 #endif
1103
1104         /* the test is responsible for initializing and enabling */
1105         pr_info("Testing tracer %s: ", type->name);
1106         ret = type->selftest(type, tr);
1107         /* the test is responsible for resetting too */
1108         tr->current_trace = saved_tracer;
1109         if (ret) {
1110                 printk(KERN_CONT "FAILED!\n");
1111                 /* Add the warning after printing 'FAILED' */
1112                 WARN_ON(1);
1113                 return -1;
1114         }
1115         /* Only reset on passing, to avoid touching corrupted buffers */
1116         tracing_reset_online_cpus(&tr->trace_buffer);
1117
1118 #ifdef CONFIG_TRACER_MAX_TRACE
1119         if (type->use_max_tr) {
1120                 tr->allocated_snapshot = false;
1121
1122                 /* Shrink the max buffer again */
1123                 if (ring_buffer_expanded)
1124                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1125                                            RING_BUFFER_ALL_CPUS);
1126         }
1127 #endif
1128
1129         printk(KERN_CONT "PASSED\n");
1130         return 0;
1131 }
1132 #else
1133 static inline int run_tracer_selftest(struct tracer *type)
1134 {
1135         return 0;
1136 }
1137 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1138
1139 /**
1140  * register_tracer - register a tracer with the ftrace system.
1141  * @type - the plugin for the tracer
1142  *
1143  * Register a new plugin tracer.
1144  */
1145 int register_tracer(struct tracer *type)
1146 {
1147         struct tracer *t;
1148         int ret = 0;
1149
1150         if (!type->name) {
1151                 pr_info("Tracer must have a name\n");
1152                 return -1;
1153         }
1154
1155         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1156                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1157                 return -1;
1158         }
1159
1160         mutex_lock(&trace_types_lock);
1161
1162         tracing_selftest_running = true;
1163
1164         for (t = trace_types; t; t = t->next) {
1165                 if (strcmp(type->name, t->name) == 0) {
1166                         /* already found */
1167                         pr_info("Tracer %s already registered\n",
1168                                 type->name);
1169                         ret = -1;
1170                         goto out;
1171                 }
1172         }
1173
1174         if (!type->set_flag)
1175                 type->set_flag = &dummy_set_flag;
1176         if (!type->flags)
1177                 type->flags = &dummy_tracer_flags;
1178         else
1179                 if (!type->flags->opts)
1180                         type->flags->opts = dummy_tracer_opt;
1181
1182         ret = run_tracer_selftest(type);
1183         if (ret < 0)
1184                 goto out;
1185
1186         type->next = trace_types;
1187         trace_types = type;
1188
1189  out:
1190         tracing_selftest_running = false;
1191         mutex_unlock(&trace_types_lock);
1192
1193         if (ret || !default_bootup_tracer)
1194                 goto out_unlock;
1195
1196         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1197                 goto out_unlock;
1198
1199         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1200         /* Do we want this tracer to start on bootup? */
1201         tracing_set_tracer(&global_trace, type->name);
1202         default_bootup_tracer = NULL;
1203         /* disable other selftests, since this will break it. */
1204         tracing_selftest_disabled = true;
1205 #ifdef CONFIG_FTRACE_STARTUP_TEST
1206         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1207                type->name);
1208 #endif
1209
1210  out_unlock:
1211         return ret;
1212 }
1213
1214 void tracing_reset(struct trace_buffer *buf, int cpu)
1215 {
1216         struct ring_buffer *buffer = buf->buffer;
1217
1218         if (!buffer)
1219                 return;
1220
1221         ring_buffer_record_disable(buffer);
1222
1223         /* Make sure all commits have finished */
1224         synchronize_sched();
1225         ring_buffer_reset_cpu(buffer, cpu);
1226
1227         ring_buffer_record_enable(buffer);
1228 }
1229
1230 void tracing_reset_online_cpus(struct trace_buffer *buf)
1231 {
1232         struct ring_buffer *buffer = buf->buffer;
1233         int cpu;
1234
1235         if (!buffer)
1236                 return;
1237
1238         ring_buffer_record_disable(buffer);
1239
1240         /* Make sure all commits have finished */
1241         synchronize_sched();
1242
1243         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1244
1245         for_each_online_cpu(cpu)
1246                 ring_buffer_reset_cpu(buffer, cpu);
1247
1248         ring_buffer_record_enable(buffer);
1249 }
1250
1251 /* Must have trace_types_lock held */
1252 void tracing_reset_all_online_cpus(void)
1253 {
1254         struct trace_array *tr;
1255
1256         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1257                 tracing_reset_online_cpus(&tr->trace_buffer);
1258 #ifdef CONFIG_TRACER_MAX_TRACE
1259                 tracing_reset_online_cpus(&tr->max_buffer);
1260 #endif
1261         }
1262 }
1263
1264 #define SAVED_CMDLINES_DEFAULT 128
1265 #define NO_CMDLINE_MAP UINT_MAX
1266 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1267 struct saved_cmdlines_buffer {
1268         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1269         unsigned *map_cmdline_to_pid;
1270         unsigned cmdline_num;
1271         int cmdline_idx;
1272         char *saved_cmdlines;
1273 };
1274 static struct saved_cmdlines_buffer *savedcmd;
1275
1276 /* temporary disable recording */
1277 static atomic_t trace_record_cmdline_disabled __read_mostly;
1278
1279 static inline char *get_saved_cmdlines(int idx)
1280 {
1281         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1282 }
1283
1284 static inline void set_cmdline(int idx, const char *cmdline)
1285 {
1286         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1287 }
1288
1289 static int allocate_cmdlines_buffer(unsigned int val,
1290                                     struct saved_cmdlines_buffer *s)
1291 {
1292         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1293                                         GFP_KERNEL);
1294         if (!s->map_cmdline_to_pid)
1295                 return -ENOMEM;
1296
1297         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1298         if (!s->saved_cmdlines) {
1299                 kfree(s->map_cmdline_to_pid);
1300                 return -ENOMEM;
1301         }
1302
1303         s->cmdline_idx = 0;
1304         s->cmdline_num = val;
1305         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1306                sizeof(s->map_pid_to_cmdline));
1307         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1308                val * sizeof(*s->map_cmdline_to_pid));
1309
1310         return 0;
1311 }
1312
1313 static int trace_create_savedcmd(void)
1314 {
1315         int ret;
1316
1317         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1318         if (!savedcmd)
1319                 return -ENOMEM;
1320
1321         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1322         if (ret < 0) {
1323                 kfree(savedcmd);
1324                 savedcmd = NULL;
1325                 return -ENOMEM;
1326         }
1327
1328         return 0;
1329 }
1330
1331 int is_tracing_stopped(void)
1332 {
1333         return global_trace.stop_count;
1334 }
1335
1336 /**
1337  * tracing_start - quick start of the tracer
1338  *
1339  * If tracing is enabled but was stopped by tracing_stop,
1340  * this will start the tracer back up.
1341  */
1342 void tracing_start(void)
1343 {
1344         struct ring_buffer *buffer;
1345         unsigned long flags;
1346
1347         if (tracing_disabled)
1348                 return;
1349
1350         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1351         if (--global_trace.stop_count) {
1352                 if (global_trace.stop_count < 0) {
1353                         /* Someone screwed up their debugging */
1354                         WARN_ON_ONCE(1);
1355                         global_trace.stop_count = 0;
1356                 }
1357                 goto out;
1358         }
1359
1360         /* Prevent the buffers from switching */
1361         arch_spin_lock(&global_trace.max_lock);
1362
1363         buffer = global_trace.trace_buffer.buffer;
1364         if (buffer)
1365                 ring_buffer_record_enable(buffer);
1366
1367 #ifdef CONFIG_TRACER_MAX_TRACE
1368         buffer = global_trace.max_buffer.buffer;
1369         if (buffer)
1370                 ring_buffer_record_enable(buffer);
1371 #endif
1372
1373         arch_spin_unlock(&global_trace.max_lock);
1374
1375  out:
1376         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1377 }
1378
1379 static void tracing_start_tr(struct trace_array *tr)
1380 {
1381         struct ring_buffer *buffer;
1382         unsigned long flags;
1383
1384         if (tracing_disabled)
1385                 return;
1386
1387         /* If global, we need to also start the max tracer */
1388         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1389                 return tracing_start();
1390
1391         raw_spin_lock_irqsave(&tr->start_lock, flags);
1392
1393         if (--tr->stop_count) {
1394                 if (tr->stop_count < 0) {
1395                         /* Someone screwed up their debugging */
1396                         WARN_ON_ONCE(1);
1397                         tr->stop_count = 0;
1398                 }
1399                 goto out;
1400         }
1401
1402         buffer = tr->trace_buffer.buffer;
1403         if (buffer)
1404                 ring_buffer_record_enable(buffer);
1405
1406  out:
1407         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1408 }
1409
1410 /**
1411  * tracing_stop - quick stop of the tracer
1412  *
1413  * Light weight way to stop tracing. Use in conjunction with
1414  * tracing_start.
1415  */
1416 void tracing_stop(void)
1417 {
1418         struct ring_buffer *buffer;
1419         unsigned long flags;
1420
1421         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1422         if (global_trace.stop_count++)
1423                 goto out;
1424
1425         /* Prevent the buffers from switching */
1426         arch_spin_lock(&global_trace.max_lock);
1427
1428         buffer = global_trace.trace_buffer.buffer;
1429         if (buffer)
1430                 ring_buffer_record_disable(buffer);
1431
1432 #ifdef CONFIG_TRACER_MAX_TRACE
1433         buffer = global_trace.max_buffer.buffer;
1434         if (buffer)
1435                 ring_buffer_record_disable(buffer);
1436 #endif
1437
1438         arch_spin_unlock(&global_trace.max_lock);
1439
1440  out:
1441         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1442 }
1443
1444 static void tracing_stop_tr(struct trace_array *tr)
1445 {
1446         struct ring_buffer *buffer;
1447         unsigned long flags;
1448
1449         /* If global, we need to also stop the max tracer */
1450         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1451                 return tracing_stop();
1452
1453         raw_spin_lock_irqsave(&tr->start_lock, flags);
1454         if (tr->stop_count++)
1455                 goto out;
1456
1457         buffer = tr->trace_buffer.buffer;
1458         if (buffer)
1459                 ring_buffer_record_disable(buffer);
1460
1461  out:
1462         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1463 }
1464
1465 void trace_stop_cmdline_recording(void);
1466
1467 static int trace_save_cmdline(struct task_struct *tsk)
1468 {
1469         unsigned pid, idx;
1470
1471         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1472                 return 0;
1473
1474         /*
1475          * It's not the end of the world if we don't get
1476          * the lock, but we also don't want to spin
1477          * nor do we want to disable interrupts,
1478          * so if we miss here, then better luck next time.
1479          */
1480         if (!arch_spin_trylock(&trace_cmdline_lock))
1481                 return 0;
1482
1483         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1484         if (idx == NO_CMDLINE_MAP) {
1485                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1486
1487                 /*
1488                  * Check whether the cmdline buffer at idx has a pid
1489                  * mapped. We are going to overwrite that entry so we
1490                  * need to clear the map_pid_to_cmdline. Otherwise we
1491                  * would read the new comm for the old pid.
1492                  */
1493                 pid = savedcmd->map_cmdline_to_pid[idx];
1494                 if (pid != NO_CMDLINE_MAP)
1495                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1496
1497                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1498                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1499
1500                 savedcmd->cmdline_idx = idx;
1501         }
1502
1503         set_cmdline(idx, tsk->comm);
1504
1505         arch_spin_unlock(&trace_cmdline_lock);
1506
1507         return 1;
1508 }
1509
1510 static void __trace_find_cmdline(int pid, char comm[])
1511 {
1512         unsigned map;
1513
1514         if (!pid) {
1515                 strcpy(comm, "<idle>");
1516                 return;
1517         }
1518
1519         if (WARN_ON_ONCE(pid < 0)) {
1520                 strcpy(comm, "<XXX>");
1521                 return;
1522         }
1523
1524         if (pid > PID_MAX_DEFAULT) {
1525                 strcpy(comm, "<...>");
1526                 return;
1527         }
1528
1529         map = savedcmd->map_pid_to_cmdline[pid];
1530         if (map != NO_CMDLINE_MAP)
1531                 strcpy(comm, get_saved_cmdlines(map));
1532         else
1533                 strcpy(comm, "<...>");
1534 }
1535
1536 void trace_find_cmdline(int pid, char comm[])
1537 {
1538         preempt_disable();
1539         arch_spin_lock(&trace_cmdline_lock);
1540
1541         __trace_find_cmdline(pid, comm);
1542
1543         arch_spin_unlock(&trace_cmdline_lock);
1544         preempt_enable();
1545 }
1546
1547 void tracing_record_cmdline(struct task_struct *tsk)
1548 {
1549         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1550                 return;
1551
1552         if (!__this_cpu_read(trace_cmdline_save))
1553                 return;
1554
1555         if (trace_save_cmdline(tsk))
1556                 __this_cpu_write(trace_cmdline_save, false);
1557 }
1558
1559 void
1560 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
1561                              int pc)
1562 {
1563         struct task_struct *tsk = current;
1564
1565         entry->preempt_count            = pc & 0xff;
1566         entry->pid                      = (tsk) ? tsk->pid : 0;
1567         entry->flags =
1568 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1569                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1570 #else
1571                 TRACE_FLAG_IRQS_NOSUPPORT |
1572 #endif
1573                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
1574                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1575                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
1576                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1577 }
1578 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1579
1580 struct ring_buffer_event *
1581 trace_buffer_lock_reserve(struct ring_buffer *buffer,
1582                           int type,
1583                           unsigned long len,
1584                           unsigned long flags, int pc)
1585 {
1586         struct ring_buffer_event *event;
1587
1588         event = ring_buffer_lock_reserve(buffer, len);
1589         if (event != NULL) {
1590                 struct trace_entry *ent = ring_buffer_event_data(event);
1591
1592                 tracing_generic_entry_update(ent, flags, pc);
1593                 ent->type = type;
1594         }
1595
1596         return event;
1597 }
1598
1599 void
1600 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
1601 {
1602         __this_cpu_write(trace_cmdline_save, true);
1603         ring_buffer_unlock_commit(buffer, event);
1604 }
1605
1606 static inline void
1607 __trace_buffer_unlock_commit(struct ring_buffer *buffer,
1608                              struct ring_buffer_event *event,
1609                              unsigned long flags, int pc)
1610 {
1611         __buffer_unlock_commit(buffer, event);
1612
1613         ftrace_trace_stack(buffer, flags, 6, pc);
1614         ftrace_trace_userstack(buffer, flags, pc);
1615 }
1616
1617 void trace_buffer_unlock_commit(struct ring_buffer *buffer,
1618                                 struct ring_buffer_event *event,
1619                                 unsigned long flags, int pc)
1620 {
1621         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1622 }
1623 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1624
1625 static struct ring_buffer *temp_buffer;
1626
1627 struct ring_buffer_event *
1628 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1629                           struct ftrace_event_file *ftrace_file,
1630                           int type, unsigned long len,
1631                           unsigned long flags, int pc)
1632 {
1633         struct ring_buffer_event *entry;
1634
1635         *current_rb = ftrace_file->tr->trace_buffer.buffer;
1636         entry = trace_buffer_lock_reserve(*current_rb,
1637                                          type, len, flags, pc);
1638         /*
1639          * If tracing is off, but we have triggers enabled
1640          * we still need to look at the event data. Use the temp_buffer
1641          * to store the trace event for the tigger to use. It's recusive
1642          * safe and will not be recorded anywhere.
1643          */
1644         if (!entry && ftrace_file->flags & FTRACE_EVENT_FL_TRIGGER_COND) {
1645                 *current_rb = temp_buffer;
1646                 entry = trace_buffer_lock_reserve(*current_rb,
1647                                                   type, len, flags, pc);
1648         }
1649         return entry;
1650 }
1651 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
1652
1653 struct ring_buffer_event *
1654 trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
1655                                   int type, unsigned long len,
1656                                   unsigned long flags, int pc)
1657 {
1658         *current_rb = global_trace.trace_buffer.buffer;
1659         return trace_buffer_lock_reserve(*current_rb,
1660                                          type, len, flags, pc);
1661 }
1662 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1663
1664 void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
1665                                         struct ring_buffer_event *event,
1666                                         unsigned long flags, int pc)
1667 {
1668         __trace_buffer_unlock_commit(buffer, event, flags, pc);
1669 }
1670 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
1671
1672 void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer,
1673                                      struct ring_buffer_event *event,
1674                                      unsigned long flags, int pc,
1675                                      struct pt_regs *regs)
1676 {
1677         __buffer_unlock_commit(buffer, event);
1678
1679         ftrace_trace_stack_regs(buffer, flags, 0, pc, regs);
1680         ftrace_trace_userstack(buffer, flags, pc);
1681 }
1682 EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1683
1684 void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
1685                                          struct ring_buffer_event *event)
1686 {
1687         ring_buffer_discard_commit(buffer, event);
1688 }
1689 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1690
1691 void
1692 trace_function(struct trace_array *tr,
1693                unsigned long ip, unsigned long parent_ip, unsigned long flags,
1694                int pc)
1695 {
1696         struct ftrace_event_call *call = &event_function;
1697         struct ring_buffer *buffer = tr->trace_buffer.buffer;
1698         struct ring_buffer_event *event;
1699         struct ftrace_entry *entry;
1700
1701         /* If we are reading the ring buffer, don't trace */
1702         if (unlikely(__this_cpu_read(ftrace_cpu_disabled)))
1703                 return;
1704
1705         event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1706                                           flags, pc);
1707         if (!event)
1708                 return;
1709         entry   = ring_buffer_event_data(event);
1710         entry->ip                       = ip;
1711         entry->parent_ip                = parent_ip;
1712
1713         if (!call_filter_check_discard(call, entry, buffer, event))
1714                 __buffer_unlock_commit(buffer, event);
1715 }
1716
1717 #ifdef CONFIG_STACKTRACE
1718
1719 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
1720 struct ftrace_stack {
1721         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
1722 };
1723
1724 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
1725 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
1726
1727 static void __ftrace_trace_stack(struct ring_buffer *buffer,
1728                                  unsigned long flags,
1729                                  int skip, int pc, struct pt_regs *regs)
1730 {
1731         struct ftrace_event_call *call = &event_kernel_stack;
1732         struct ring_buffer_event *event;
1733         struct stack_entry *entry;
1734         struct stack_trace trace;
1735         int use_stack;
1736         int size = FTRACE_STACK_ENTRIES;
1737
1738         trace.nr_entries        = 0;
1739         trace.skip              = skip;
1740
1741         /*
1742          * Since events can happen in NMIs there's no safe way to
1743          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
1744          * or NMI comes in, it will just have to use the default
1745          * FTRACE_STACK_SIZE.
1746          */
1747         preempt_disable_notrace();
1748
1749         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1750         /*
1751          * We don't need any atomic variables, just a barrier.
1752          * If an interrupt comes in, we don't care, because it would
1753          * have exited and put the counter back to what we want.
1754          * We just need a barrier to keep gcc from moving things
1755          * around.
1756          */
1757         barrier();
1758         if (use_stack == 1) {
1759                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
1760                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
1761
1762                 if (regs)
1763                         save_stack_trace_regs(regs, &trace);
1764                 else
1765                         save_stack_trace(&trace);
1766
1767                 if (trace.nr_entries > size)
1768                         size = trace.nr_entries;
1769         } else
1770                 /* From now on, use_stack is a boolean */
1771                 use_stack = 0;
1772
1773         size *= sizeof(unsigned long);
1774
1775         event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1776                                           sizeof(*entry) + size, flags, pc);
1777         if (!event)
1778                 goto out;
1779         entry = ring_buffer_event_data(event);
1780
1781         memset(&entry->caller, 0, size);
1782
1783         if (use_stack)
1784                 memcpy(&entry->caller, trace.entries,
1785                        trace.nr_entries * sizeof(unsigned long));
1786         else {
1787                 trace.max_entries       = FTRACE_STACK_ENTRIES;
1788                 trace.entries           = entry->caller;
1789                 if (regs)
1790                         save_stack_trace_regs(regs, &trace);
1791                 else
1792                         save_stack_trace(&trace);
1793         }
1794
1795         entry->size = trace.nr_entries;
1796
1797         if (!call_filter_check_discard(call, entry, buffer, event))
1798                 __buffer_unlock_commit(buffer, event);
1799
1800  out:
1801         /* Again, don't let gcc optimize things here */
1802         barrier();
1803         __this_cpu_dec(ftrace_stack_reserve);
1804         preempt_enable_notrace();
1805
1806 }
1807
1808 void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
1809                              int skip, int pc, struct pt_regs *regs)
1810 {
1811         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1812                 return;
1813
1814         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
1815 }
1816
1817 void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
1818                         int skip, int pc)
1819 {
1820         if (!(trace_flags & TRACE_ITER_STACKTRACE))
1821                 return;
1822
1823         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
1824 }
1825
1826 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
1827                    int pc)
1828 {
1829         __ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1830 }
1831
1832 /**
1833  * trace_dump_stack - record a stack back trace in the trace buffer
1834  * @skip: Number of functions to skip (helper handlers)
1835  */
1836 void trace_dump_stack(int skip)
1837 {
1838         unsigned long flags;
1839
1840         if (tracing_disabled || tracing_selftest_running)
1841                 return;
1842
1843         local_save_flags(flags);
1844
1845         /*
1846          * Skip 3 more, seems to get us at the caller of
1847          * this function.
1848          */
1849         skip += 3;
1850         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
1851                              flags, skip, preempt_count(), NULL);
1852 }
1853
1854 static DEFINE_PER_CPU(int, user_stack_count);
1855
1856 void
1857 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1858 {
1859         struct ftrace_event_call *call = &event_user_stack;
1860         struct ring_buffer_event *event;
1861         struct userstack_entry *entry;
1862         struct stack_trace trace;
1863
1864         if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
1865                 return;
1866
1867         /*
1868          * NMIs can not handle page faults, even with fix ups.
1869          * The save user stack can (and often does) fault.
1870          */
1871         if (unlikely(in_nmi()))
1872                 return;
1873
1874         /*
1875          * prevent recursion, since the user stack tracing may
1876          * trigger other kernel events.
1877          */
1878         preempt_disable();
1879         if (__this_cpu_read(user_stack_count))
1880                 goto out;
1881
1882         __this_cpu_inc(user_stack_count);
1883
1884         event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1885                                           sizeof(*entry), flags, pc);
1886         if (!event)
1887                 goto out_drop_count;
1888         entry   = ring_buffer_event_data(event);
1889
1890         entry->tgid             = current->tgid;
1891         memset(&entry->caller, 0, sizeof(entry->caller));
1892
1893         trace.nr_entries        = 0;
1894         trace.max_entries       = FTRACE_STACK_ENTRIES;
1895         trace.skip              = 0;
1896         trace.entries           = entry->caller;
1897
1898         save_stack_trace_user(&trace);
1899         if (!call_filter_check_discard(call, entry, buffer, event))
1900                 __buffer_unlock_commit(buffer, event);
1901
1902  out_drop_count:
1903         __this_cpu_dec(user_stack_count);
1904  out:
1905         preempt_enable();
1906 }
1907
1908 #ifdef UNUSED
1909 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1910 {
1911         ftrace_trace_userstack(tr, flags, preempt_count());
1912 }
1913 #endif /* UNUSED */
1914
1915 #endif /* CONFIG_STACKTRACE */
1916
1917 /* created for use with alloc_percpu */
1918 struct trace_buffer_struct {
1919         char buffer[TRACE_BUF_SIZE];
1920 };
1921
1922 static struct trace_buffer_struct *trace_percpu_buffer;
1923 static struct trace_buffer_struct *trace_percpu_sirq_buffer;
1924 static struct trace_buffer_struct *trace_percpu_irq_buffer;
1925 static struct trace_buffer_struct *trace_percpu_nmi_buffer;
1926
1927 /*
1928  * The buffer used is dependent on the context. There is a per cpu
1929  * buffer for normal context, softirq contex, hard irq context and
1930  * for NMI context. Thise allows for lockless recording.
1931  *
1932  * Note, if the buffers failed to be allocated, then this returns NULL
1933  */
1934 static char *get_trace_buf(void)
1935 {
1936         struct trace_buffer_struct *percpu_buffer;
1937
1938         /*
1939          * If we have allocated per cpu buffers, then we do not
1940          * need to do any locking.
1941          */
1942         if (in_nmi())
1943                 percpu_buffer = trace_percpu_nmi_buffer;
1944         else if (in_irq())
1945                 percpu_buffer = trace_percpu_irq_buffer;
1946         else if (in_softirq())
1947                 percpu_buffer = trace_percpu_sirq_buffer;
1948         else
1949                 percpu_buffer = trace_percpu_buffer;
1950
1951         if (!percpu_buffer)
1952                 return NULL;
1953
1954         return this_cpu_ptr(&percpu_buffer->buffer[0]);
1955 }
1956
1957 static int alloc_percpu_trace_buffer(void)
1958 {
1959         struct trace_buffer_struct *buffers;
1960         struct trace_buffer_struct *sirq_buffers;
1961         struct trace_buffer_struct *irq_buffers;
1962         struct trace_buffer_struct *nmi_buffers;
1963
1964         buffers = alloc_percpu(struct trace_buffer_struct);
1965         if (!buffers)
1966                 goto err_warn;
1967
1968         sirq_buffers = alloc_percpu(struct trace_buffer_struct);
1969         if (!sirq_buffers)
1970                 goto err_sirq;
1971
1972         irq_buffers = alloc_percpu(struct trace_buffer_struct);
1973         if (!irq_buffers)
1974                 goto err_irq;
1975
1976         nmi_buffers = alloc_percpu(struct trace_buffer_struct);
1977         if (!nmi_buffers)
1978                 goto err_nmi;
1979
1980         trace_percpu_buffer = buffers;
1981         trace_percpu_sirq_buffer = sirq_buffers;
1982         trace_percpu_irq_buffer = irq_buffers;
1983         trace_percpu_nmi_buffer = nmi_buffers;
1984
1985         return 0;
1986
1987  err_nmi:
1988         free_percpu(irq_buffers);
1989  err_irq:
1990         free_percpu(sirq_buffers);
1991  err_sirq:
1992         free_percpu(buffers);
1993  err_warn:
1994         WARN(1, "Could not allocate percpu trace_printk buffer");
1995         return -ENOMEM;
1996 }
1997
1998 static int buffers_allocated;
1999
2000 void trace_printk_init_buffers(void)
2001 {
2002         if (buffers_allocated)
2003                 return;
2004
2005         if (alloc_percpu_trace_buffer())
2006                 return;
2007
2008         /* trace_printk() is for debug use only. Don't use it in production. */
2009
2010         pr_warning("\n**********************************************************\n");
2011         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2012         pr_warning("**                                                      **\n");
2013         pr_warning("** trace_printk() being used. Allocating extra memory.  **\n");
2014         pr_warning("**                                                      **\n");
2015         pr_warning("** This means that this is a DEBUG kernel and it is     **\n");
2016         pr_warning("** unsafe for produciton use.                           **\n");
2017         pr_warning("**                                                      **\n");
2018         pr_warning("** If you see this message and you are not debugging    **\n");
2019         pr_warning("** the kernel, report this immediately to your vendor!  **\n");
2020         pr_warning("**                                                      **\n");
2021         pr_warning("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2022         pr_warning("**********************************************************\n");
2023
2024         /* Expand the buffers to set size */
2025         tracing_update_buffers();
2026
2027         buffers_allocated = 1;
2028
2029         /*
2030          * trace_printk_init_buffers() can be called by modules.
2031          * If that happens, then we need to start cmdline recording
2032          * directly here. If the global_trace.buffer is already
2033          * allocated here, then this was called by module code.
2034          */
2035         if (global_trace.trace_buffer.buffer)
2036                 tracing_start_cmdline_record();
2037 }
2038
2039 void trace_printk_start_comm(void)
2040 {
2041         /* Start tracing comms if trace printk is set */
2042         if (!buffers_allocated)
2043                 return;
2044         tracing_start_cmdline_record();
2045 }
2046
2047 static void trace_printk_start_stop_comm(int enabled)
2048 {
2049         if (!buffers_allocated)
2050                 return;
2051
2052         if (enabled)
2053                 tracing_start_cmdline_record();
2054         else
2055                 tracing_stop_cmdline_record();
2056 }
2057
2058 /**
2059  * trace_vbprintk - write binary msg to tracing buffer
2060  *
2061  */
2062 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2063 {
2064         struct ftrace_event_call *call = &event_bprint;
2065         struct ring_buffer_event *event;
2066         struct ring_buffer *buffer;
2067         struct trace_array *tr = &global_trace;
2068         struct bprint_entry *entry;
2069         unsigned long flags;
2070         char *tbuffer;
2071         int len = 0, size, pc;
2072
2073         if (unlikely(tracing_selftest_running || tracing_disabled))
2074                 return 0;
2075
2076         /* Don't pollute graph traces with trace_vprintk internals */
2077         pause_graph_tracing();
2078
2079         pc = preempt_count();
2080         preempt_disable_notrace();
2081
2082         tbuffer = get_trace_buf();
2083         if (!tbuffer) {
2084                 len = 0;
2085                 goto out;
2086         }
2087
2088         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2089
2090         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2091                 goto out;
2092
2093         local_save_flags(flags);
2094         size = sizeof(*entry) + sizeof(u32) * len;
2095         buffer = tr->trace_buffer.buffer;
2096         event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2097                                           flags, pc);
2098         if (!event)
2099                 goto out;
2100         entry = ring_buffer_event_data(event);
2101         entry->ip                       = ip;
2102         entry->fmt                      = fmt;
2103
2104         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2105         if (!call_filter_check_discard(call, entry, buffer, event)) {
2106                 __buffer_unlock_commit(buffer, event);
2107                 ftrace_trace_stack(buffer, flags, 6, pc);
2108         }
2109
2110 out:
2111         preempt_enable_notrace();
2112         unpause_graph_tracing();
2113
2114         return len;
2115 }
2116 EXPORT_SYMBOL_GPL(trace_vbprintk);
2117
2118 static int
2119 __trace_array_vprintk(struct ring_buffer *buffer,
2120                       unsigned long ip, const char *fmt, va_list args)
2121 {
2122         struct ftrace_event_call *call = &event_print;
2123         struct ring_buffer_event *event;
2124         int len = 0, size, pc;
2125         struct print_entry *entry;
2126         unsigned long flags;
2127         char *tbuffer;
2128
2129         if (tracing_disabled || tracing_selftest_running)
2130                 return 0;
2131
2132         /* Don't pollute graph traces with trace_vprintk internals */
2133         pause_graph_tracing();
2134
2135         pc = preempt_count();
2136         preempt_disable_notrace();
2137
2138
2139         tbuffer = get_trace_buf();
2140         if (!tbuffer) {
2141                 len = 0;
2142                 goto out;
2143         }
2144
2145         len = vsnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2146         if (len > TRACE_BUF_SIZE)
2147                 goto out;
2148
2149         local_save_flags(flags);
2150         size = sizeof(*entry) + len + 1;
2151         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2152                                           flags, pc);
2153         if (!event)
2154                 goto out;
2155         entry = ring_buffer_event_data(event);
2156         entry->ip = ip;
2157
2158         memcpy(&entry->buf, tbuffer, len);
2159         entry->buf[len] = '\0';
2160         if (!call_filter_check_discard(call, entry, buffer, event)) {
2161                 __buffer_unlock_commit(buffer, event);
2162                 ftrace_trace_stack(buffer, flags, 6, pc);
2163         }
2164  out:
2165         preempt_enable_notrace();
2166         unpause_graph_tracing();
2167
2168         return len;
2169 }
2170
2171 int trace_array_vprintk(struct trace_array *tr,
2172                         unsigned long ip, const char *fmt, va_list args)
2173 {
2174         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2175 }
2176
2177 int trace_array_printk(struct trace_array *tr,
2178                        unsigned long ip, const char *fmt, ...)
2179 {
2180         int ret;
2181         va_list ap;
2182
2183         if (!(trace_flags & TRACE_ITER_PRINTK))
2184                 return 0;
2185
2186         va_start(ap, fmt);
2187         ret = trace_array_vprintk(tr, ip, fmt, ap);
2188         va_end(ap);
2189         return ret;
2190 }
2191
2192 int trace_array_printk_buf(struct ring_buffer *buffer,
2193                            unsigned long ip, const char *fmt, ...)
2194 {
2195         int ret;
2196         va_list ap;
2197
2198         if (!(trace_flags & TRACE_ITER_PRINTK))
2199                 return 0;
2200
2201         va_start(ap, fmt);
2202         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2203         va_end(ap);
2204         return ret;
2205 }
2206
2207 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2208 {
2209         return trace_array_vprintk(&global_trace, ip, fmt, args);
2210 }
2211 EXPORT_SYMBOL_GPL(trace_vprintk);
2212
2213 static void trace_iterator_increment(struct trace_iterator *iter)
2214 {
2215         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2216
2217         iter->idx++;
2218         if (buf_iter)
2219                 ring_buffer_read(buf_iter, NULL);
2220 }
2221
2222 static struct trace_entry *
2223 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2224                 unsigned long *lost_events)
2225 {
2226         struct ring_buffer_event *event;
2227         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2228
2229         if (buf_iter)
2230                 event = ring_buffer_iter_peek(buf_iter, ts);
2231         else
2232                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2233                                          lost_events);
2234
2235         if (event) {
2236                 iter->ent_size = ring_buffer_event_length(event);
2237                 return ring_buffer_event_data(event);
2238         }
2239         iter->ent_size = 0;
2240         return NULL;
2241 }
2242
2243 static struct trace_entry *
2244 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2245                   unsigned long *missing_events, u64 *ent_ts)
2246 {
2247         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2248         struct trace_entry *ent, *next = NULL;
2249         unsigned long lost_events = 0, next_lost = 0;
2250         int cpu_file = iter->cpu_file;
2251         u64 next_ts = 0, ts;
2252         int next_cpu = -1;
2253         int next_size = 0;
2254         int cpu;
2255
2256         /*
2257          * If we are in a per_cpu trace file, don't bother by iterating over
2258          * all cpu and peek directly.
2259          */
2260         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2261                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2262                         return NULL;
2263                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2264                 if (ent_cpu)
2265                         *ent_cpu = cpu_file;
2266
2267                 return ent;
2268         }
2269
2270         for_each_tracing_cpu(cpu) {
2271
2272                 if (ring_buffer_empty_cpu(buffer, cpu))
2273                         continue;
2274
2275                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2276
2277                 /*
2278                  * Pick the entry with the smallest timestamp:
2279                  */
2280                 if (ent && (!next || ts < next_ts)) {
2281                         next = ent;
2282                         next_cpu = cpu;
2283                         next_ts = ts;
2284                         next_lost = lost_events;
2285                         next_size = iter->ent_size;
2286                 }
2287         }
2288
2289         iter->ent_size = next_size;
2290
2291         if (ent_cpu)
2292                 *ent_cpu = next_cpu;
2293
2294         if (ent_ts)
2295                 *ent_ts = next_ts;
2296
2297         if (missing_events)
2298                 *missing_events = next_lost;
2299
2300         return next;
2301 }
2302
2303 /* Find the next real entry, without updating the iterator itself */
2304 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
2305                                           int *ent_cpu, u64 *ent_ts)
2306 {
2307         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2308 }
2309
2310 /* Find the next real entry, and increment the iterator to the next entry */
2311 void *trace_find_next_entry_inc(struct trace_iterator *iter)
2312 {
2313         iter->ent = __find_next_entry(iter, &iter->cpu,
2314                                       &iter->lost_events, &iter->ts);
2315
2316         if (iter->ent)
2317                 trace_iterator_increment(iter);
2318
2319         return iter->ent ? iter : NULL;
2320 }
2321
2322 static void trace_consume(struct trace_iterator *iter)
2323 {
2324         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2325                             &iter->lost_events);
2326 }
2327
2328 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2329 {
2330         struct trace_iterator *iter = m->private;
2331         int i = (int)*pos;
2332         void *ent;
2333
2334         WARN_ON_ONCE(iter->leftover);
2335
2336         (*pos)++;
2337
2338         /* can't go backwards */
2339         if (iter->idx > i)
2340                 return NULL;
2341
2342         if (iter->idx < 0)
2343                 ent = trace_find_next_entry_inc(iter);
2344         else
2345                 ent = iter;
2346
2347         while (ent && iter->idx < i)
2348                 ent = trace_find_next_entry_inc(iter);
2349
2350         iter->pos = *pos;
2351
2352         return ent;
2353 }
2354
2355 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2356 {
2357         struct ring_buffer_event *event;
2358         struct ring_buffer_iter *buf_iter;
2359         unsigned long entries = 0;
2360         u64 ts;
2361
2362         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2363
2364         buf_iter = trace_buffer_iter(iter, cpu);
2365         if (!buf_iter)
2366                 return;
2367
2368         ring_buffer_iter_reset(buf_iter);
2369
2370         /*
2371          * We could have the case with the max latency tracers
2372          * that a reset never took place on a cpu. This is evident
2373          * by the timestamp being before the start of the buffer.
2374          */
2375         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2376                 if (ts >= iter->trace_buffer->time_start)
2377                         break;
2378                 entries++;
2379                 ring_buffer_read(buf_iter, NULL);
2380         }
2381
2382         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2383 }
2384
2385 /*
2386  * The current tracer is copied to avoid a global locking
2387  * all around.
2388  */
2389 static void *s_start(struct seq_file *m, loff_t *pos)
2390 {
2391         struct trace_iterator *iter = m->private;
2392         struct trace_array *tr = iter->tr;
2393         int cpu_file = iter->cpu_file;
2394         void *p = NULL;
2395         loff_t l = 0;
2396         int cpu;
2397
2398         /*
2399          * copy the tracer to avoid using a global lock all around.
2400          * iter->trace is a copy of current_trace, the pointer to the
2401          * name may be used instead of a strcmp(), as iter->trace->name
2402          * will point to the same string as current_trace->name.
2403          */
2404         mutex_lock(&trace_types_lock);
2405         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
2406                 *iter->trace = *tr->current_trace;
2407         mutex_unlock(&trace_types_lock);
2408
2409 #ifdef CONFIG_TRACER_MAX_TRACE
2410         if (iter->snapshot && iter->trace->use_max_tr)
2411                 return ERR_PTR(-EBUSY);
2412 #endif
2413
2414         if (!iter->snapshot)
2415                 atomic_inc(&trace_record_cmdline_disabled);
2416
2417         if (*pos != iter->pos) {
2418                 iter->ent = NULL;
2419                 iter->cpu = 0;
2420                 iter->idx = -1;
2421
2422                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
2423                         for_each_tracing_cpu(cpu)
2424                                 tracing_iter_reset(iter, cpu);
2425                 } else
2426                         tracing_iter_reset(iter, cpu_file);
2427
2428                 iter->leftover = 0;
2429                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
2430                         ;
2431
2432         } else {
2433                 /*
2434                  * If we overflowed the seq_file before, then we want
2435                  * to just reuse the trace_seq buffer again.
2436                  */
2437                 if (iter->leftover)
2438                         p = iter;
2439                 else {
2440                         l = *pos - 1;
2441                         p = s_next(m, p, &l);
2442                 }
2443         }
2444
2445         trace_event_read_lock();
2446         trace_access_lock(cpu_file);
2447         return p;
2448 }
2449
2450 static void s_stop(struct seq_file *m, void *p)
2451 {
2452         struct trace_iterator *iter = m->private;
2453
2454 #ifdef CONFIG_TRACER_MAX_TRACE
2455         if (iter->snapshot && iter->trace->use_max_tr)
2456                 return;
2457 #endif
2458
2459         if (!iter->snapshot)
2460                 atomic_dec(&trace_record_cmdline_disabled);
2461
2462         trace_access_unlock(iter->cpu_file);
2463         trace_event_read_unlock();
2464 }
2465
2466 static void
2467 get_total_entries(struct trace_buffer *buf,
2468                   unsigned long *total, unsigned long *entries)
2469 {
2470         unsigned long count;
2471         int cpu;
2472
2473         *total = 0;
2474         *entries = 0;
2475
2476         for_each_tracing_cpu(cpu) {
2477                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
2478                 /*
2479                  * If this buffer has skipped entries, then we hold all
2480                  * entries for the trace and we need to ignore the
2481                  * ones before the time stamp.
2482                  */
2483                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
2484                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2485                         /* total is the same as the entries */
2486                         *total += count;
2487                 } else
2488                         *total += count +
2489                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
2490                 *entries += count;
2491         }
2492 }
2493
2494 static void print_lat_help_header(struct seq_file *m)
2495 {
2496         seq_puts(m, "#                  _------=> CPU#            \n");
2497         seq_puts(m, "#                 / _-----=> irqs-off        \n");
2498         seq_puts(m, "#                | / _----=> need-resched    \n");
2499         seq_puts(m, "#                || / _---=> hardirq/softirq \n");
2500         seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
2501         seq_puts(m, "#                |||| /     delay             \n");
2502         seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
2503         seq_puts(m, "#     \\   /      |||||  \\    |   /           \n");
2504 }
2505
2506 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2507 {
2508         unsigned long total;
2509         unsigned long entries;
2510
2511         get_total_entries(buf, &total, &entries);
2512         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
2513                    entries, total, num_online_cpus());
2514         seq_puts(m, "#\n");
2515 }
2516
2517 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2518 {
2519         print_event_info(buf, m);
2520         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n");
2521         seq_puts(m, "#              | |       |          |         |\n");
2522 }
2523
2524 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2525 {
2526         print_event_info(buf, m);
2527         seq_puts(m, "#                              _-----=> irqs-off\n");
2528         seq_puts(m, "#                             / _----=> need-resched\n");
2529         seq_puts(m, "#                            | / _---=> hardirq/softirq\n");
2530         seq_puts(m, "#                            || / _--=> preempt-depth\n");
2531         seq_puts(m, "#                            ||| /     delay\n");
2532         seq_puts(m, "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n");
2533         seq_puts(m, "#              | |       |   ||||       |         |\n");
2534 }
2535
2536 void
2537 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
2538 {
2539         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2540         struct trace_buffer *buf = iter->trace_buffer;
2541         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2542         struct tracer *type = iter->trace;
2543         unsigned long entries;
2544         unsigned long total;
2545         const char *name = "preemption";
2546
2547         name = type->name;
2548
2549         get_total_entries(buf, &total, &entries);
2550
2551         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2552                    name, UTS_RELEASE);
2553         seq_puts(m, "# -----------------------------------"
2554                  "---------------------------------\n");
2555         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2556                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
2557                    nsecs_to_usecs(data->saved_latency),
2558                    entries,
2559                    total,
2560                    buf->cpu,
2561 #if defined(CONFIG_PREEMPT_NONE)
2562                    "server",
2563 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
2564                    "desktop",
2565 #elif defined(CONFIG_PREEMPT)
2566                    "preempt",
2567 #else
2568                    "unknown",
2569 #endif
2570                    /* These are reserved for later use */
2571                    0, 0, 0, 0);
2572 #ifdef CONFIG_SMP
2573         seq_printf(m, " #P:%d)\n", num_online_cpus());
2574 #else
2575         seq_puts(m, ")\n");
2576 #endif
2577         seq_puts(m, "#    -----------------\n");
2578         seq_printf(m, "#    | task: %.16s-%d "
2579                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2580                    data->comm, data->pid,
2581                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2582                    data->policy, data->rt_priority);
2583         seq_puts(m, "#    -----------------\n");
2584
2585         if (data->critical_start) {
2586                 seq_puts(m, "#  => started at: ");
2587                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
2588                 trace_print_seq(m, &iter->seq);
2589                 seq_puts(m, "\n#  => ended at:   ");
2590                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
2591                 trace_print_seq(m, &iter->seq);
2592                 seq_puts(m, "\n#\n");
2593         }
2594
2595         seq_puts(m, "#\n");
2596 }
2597
2598 static void test_cpu_buff_start(struct trace_iterator *iter)
2599 {
2600         struct trace_seq *s = &iter->seq;
2601
2602         if (!(trace_flags & TRACE_ITER_ANNOTATE))
2603                 return;
2604
2605         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
2606                 return;
2607
2608         if (cpumask_test_cpu(iter->cpu, iter->started))
2609                 return;
2610
2611         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2612                 return;
2613
2614         cpumask_set_cpu(iter->cpu, iter->started);
2615
2616         /* Don't print started cpu buffer for the first entry of the trace */
2617         if (iter->idx > 1)
2618                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
2619                                 iter->cpu);
2620 }
2621
2622 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2623 {
2624         struct trace_seq *s = &iter->seq;
2625         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
2626         struct trace_entry *entry;
2627         struct trace_event *event;
2628
2629         entry = iter->ent;
2630
2631         test_cpu_buff_start(iter);
2632
2633         event = ftrace_find_event(entry->type);
2634
2635         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2636                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2637                         if (!trace_print_lat_context(iter))
2638                                 goto partial;
2639                 } else {
2640                         if (!trace_print_context(iter))
2641                                 goto partial;
2642                 }
2643         }
2644
2645         if (event)
2646                 return event->funcs->trace(iter, sym_flags, event);
2647
2648         if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
2649                 goto partial;
2650
2651         return TRACE_TYPE_HANDLED;
2652 partial:
2653         return TRACE_TYPE_PARTIAL_LINE;
2654 }
2655
2656 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
2657 {
2658         struct trace_seq *s = &iter->seq;
2659         struct trace_entry *entry;
2660         struct trace_event *event;
2661
2662         entry = iter->ent;
2663
2664         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2665                 if (!trace_seq_printf(s, "%d %d %llu ",
2666                                       entry->pid, iter->cpu, iter->ts))
2667                         goto partial;
2668         }
2669
2670         event = ftrace_find_event(entry->type);
2671         if (event)
2672                 return event->funcs->raw(iter, 0, event);
2673
2674         if (!trace_seq_printf(s, "%d ?\n", entry->type))
2675                 goto partial;
2676
2677         return TRACE_TYPE_HANDLED;
2678 partial:
2679         return TRACE_TYPE_PARTIAL_LINE;
2680 }
2681
2682 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2683 {
2684         struct trace_seq *s = &iter->seq;
2685         unsigned char newline = '\n';
2686         struct trace_entry *entry;
2687         struct trace_event *event;
2688
2689         entry = iter->ent;
2690
2691         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2692                 SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
2693                 SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
2694                 SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
2695         }
2696
2697         event = ftrace_find_event(entry->type);
2698         if (event) {
2699                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
2700                 if (ret != TRACE_TYPE_HANDLED)
2701                         return ret;
2702         }
2703
2704         SEQ_PUT_FIELD_RET(s, newline);
2705
2706         return TRACE_TYPE_HANDLED;
2707 }
2708
2709 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
2710 {
2711         struct trace_seq *s = &iter->seq;
2712         struct trace_entry *entry;
2713         struct trace_event *event;
2714
2715         entry = iter->ent;
2716
2717         if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
2718                 SEQ_PUT_FIELD_RET(s, entry->pid);
2719                 SEQ_PUT_FIELD_RET(s, iter->cpu);
2720                 SEQ_PUT_FIELD_RET(s, iter->ts);
2721         }
2722
2723         event = ftrace_find_event(entry->type);
2724         return event ? event->funcs->binary(iter, 0, event) :
2725                 TRACE_TYPE_HANDLED;
2726 }
2727
2728 int trace_empty(struct trace_iterator *iter)
2729 {
2730         struct ring_buffer_iter *buf_iter;
2731         int cpu;
2732
2733         /* If we are looking at one CPU buffer, only check that one */
2734         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2735                 cpu = iter->cpu_file;
2736                 buf_iter = trace_buffer_iter(iter, cpu);
2737                 if (buf_iter) {
2738                         if (!ring_buffer_iter_empty(buf_iter))
2739                                 return 0;
2740                 } else {
2741                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2742                                 return 0;
2743                 }
2744                 return 1;
2745         }
2746
2747         for_each_tracing_cpu(cpu) {
2748                 buf_iter = trace_buffer_iter(iter, cpu);
2749                 if (buf_iter) {
2750                         if (!ring_buffer_iter_empty(buf_iter))
2751                                 return 0;
2752                 } else {
2753                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2754                                 return 0;
2755                 }
2756         }
2757
2758         return 1;
2759 }
2760
2761 /*  Called with trace_event_read_lock() held. */
2762 enum print_line_t print_trace_line(struct trace_iterator *iter)
2763 {
2764         enum print_line_t ret;
2765
2766         if (iter->lost_events &&
2767             !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
2768                                  iter->cpu, iter->lost_events))
2769                 return TRACE_TYPE_PARTIAL_LINE;
2770
2771         if (iter->trace && iter->trace->print_line) {
2772                 ret = iter->trace->print_line(iter);
2773                 if (ret != TRACE_TYPE_UNHANDLED)
2774                         return ret;
2775         }
2776
2777         if (iter->ent->type == TRACE_BPUTS &&
2778                         trace_flags & TRACE_ITER_PRINTK &&
2779                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2780                 return trace_print_bputs_msg_only(iter);
2781
2782         if (iter->ent->type == TRACE_BPRINT &&
2783                         trace_flags & TRACE_ITER_PRINTK &&
2784                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2785                 return trace_print_bprintk_msg_only(iter);
2786
2787         if (iter->ent->type == TRACE_PRINT &&
2788                         trace_flags & TRACE_ITER_PRINTK &&
2789                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2790                 return trace_print_printk_msg_only(iter);
2791
2792         if (trace_flags & TRACE_ITER_BIN)
2793                 return print_bin_fmt(iter);
2794
2795         if (trace_flags & TRACE_ITER_HEX)
2796                 return print_hex_fmt(iter);
2797
2798         if (trace_flags & TRACE_ITER_RAW)
2799                 return print_raw_fmt(iter);
2800
2801         return print_trace_fmt(iter);
2802 }
2803
2804 void trace_latency_header(struct seq_file *m)
2805 {
2806         struct trace_iterator *iter = m->private;
2807
2808         /* print nothing if the buffers are empty */
2809         if (trace_empty(iter))
2810                 return;
2811
2812         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
2813                 print_trace_header(m, iter);
2814
2815         if (!(trace_flags & TRACE_ITER_VERBOSE))
2816                 print_lat_help_header(m);
2817 }
2818
2819 void trace_default_header(struct seq_file *m)
2820 {
2821         struct trace_iterator *iter = m->private;
2822
2823         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
2824                 return;
2825
2826         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
2827                 /* print nothing if the buffers are empty */
2828                 if (trace_empty(iter))
2829                         return;
2830                 print_trace_header(m, iter);
2831                 if (!(trace_flags & TRACE_ITER_VERBOSE))
2832                         print_lat_help_header(m);
2833         } else {
2834                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
2835                         if (trace_flags & TRACE_ITER_IRQ_INFO)
2836                                 print_func_help_header_irq(iter->trace_buffer, m);
2837                         else
2838                                 print_func_help_header(iter->trace_buffer, m);
2839                 }
2840         }
2841 }
2842
2843 static void test_ftrace_alive(struct seq_file *m)
2844 {
2845         if (!ftrace_is_dead())
2846                 return;
2847         seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
2848         seq_printf(m, "#          MAY BE MISSING FUNCTION EVENTS\n");
2849 }
2850
2851 #ifdef CONFIG_TRACER_MAX_TRACE
2852 static void show_snapshot_main_help(struct seq_file *m)
2853 {
2854         seq_printf(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n");
2855         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2856         seq_printf(m, "#                      Takes a snapshot of the main buffer.\n");
2857         seq_printf(m, "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n");
2858         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2859         seq_printf(m, "#                       is not a '0' or '1')\n");
2860 }
2861
2862 static void show_snapshot_percpu_help(struct seq_file *m)
2863 {
2864         seq_printf(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2865 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2866         seq_printf(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n");
2867         seq_printf(m, "#                      Takes a snapshot of the main buffer for this cpu.\n");
2868 #else
2869         seq_printf(m, "# echo 1 > snapshot : Not supported with this kernel.\n");
2870         seq_printf(m, "#                     Must use main snapshot file to allocate.\n");
2871 #endif
2872         seq_printf(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n");
2873         seq_printf(m, "#                      (Doesn't have to be '2' works with any number that\n");
2874         seq_printf(m, "#                       is not a '0' or '1')\n");
2875 }
2876
2877 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
2878 {
2879         if (iter->tr->allocated_snapshot)
2880                 seq_printf(m, "#\n# * Snapshot is allocated *\n#\n");
2881         else
2882                 seq_printf(m, "#\n# * Snapshot is freed *\n#\n");
2883
2884         seq_printf(m, "# Snapshot commands:\n");
2885         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
2886                 show_snapshot_main_help(m);
2887         else
2888                 show_snapshot_percpu_help(m);
2889 }
2890 #else
2891 /* Should never be called */
2892 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
2893 #endif
2894
2895 static int s_show(struct seq_file *m, void *v)
2896 {
2897         struct trace_iterator *iter = v;
2898         int ret;
2899
2900         if (iter->ent == NULL) {
2901                 if (iter->tr) {
2902                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
2903                         seq_puts(m, "#\n");
2904                         test_ftrace_alive(m);
2905                 }
2906                 if (iter->snapshot && trace_empty(iter))
2907                         print_snapshot_help(m, iter);
2908                 else if (iter->trace && iter->trace->print_header)
2909                         iter->trace->print_header(m);
2910                 else
2911                         trace_default_header(m);
2912
2913         } else if (iter->leftover) {
2914                 /*
2915                  * If we filled the seq_file buffer earlier, we
2916                  * want to just show it now.
2917                  */
2918                 ret = trace_print_seq(m, &iter->seq);
2919
2920                 /* ret should this time be zero, but you never know */
2921                 iter->leftover = ret;
2922
2923         } else {
2924                 print_trace_line(iter);
2925                 ret = trace_print_seq(m, &iter->seq);
2926                 /*
2927                  * If we overflow the seq_file buffer, then it will
2928                  * ask us for this data again at start up.
2929                  * Use that instead.
2930                  *  ret is 0 if seq_file write succeeded.
2931                  *        -1 otherwise.
2932                  */
2933                 iter->leftover = ret;
2934         }
2935
2936         return 0;
2937 }
2938
2939 /*
2940  * Should be used after trace_array_get(), trace_types_lock
2941  * ensures that i_cdev was already initialized.
2942  */
2943 static inline int tracing_get_cpu(struct inode *inode)
2944 {
2945         if (inode->i_cdev) /* See trace_create_cpu_file() */
2946                 return (long)inode->i_cdev - 1;
2947         return RING_BUFFER_ALL_CPUS;
2948 }
2949
2950 static const struct seq_operations tracer_seq_ops = {
2951         .start          = s_start,
2952         .next           = s_next,
2953         .stop           = s_stop,
2954         .show           = s_show,
2955 };
2956
2957 static struct trace_iterator *
2958 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
2959 {
2960         struct trace_array *tr = inode->i_private;
2961         struct trace_iterator *iter;
2962         int cpu;
2963
2964         if (tracing_disabled)
2965                 return ERR_PTR(-ENODEV);
2966
2967         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
2968         if (!iter)
2969                 return ERR_PTR(-ENOMEM);
2970
2971         iter->buffer_iter = kzalloc(sizeof(*iter->buffer_iter) * num_possible_cpus(),
2972                                     GFP_KERNEL);
2973         if (!iter->buffer_iter)
2974                 goto release;
2975
2976         /*
2977          * We make a copy of the current tracer to avoid concurrent
2978          * changes on it while we are reading.
2979          */
2980         mutex_lock(&trace_types_lock);
2981         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
2982         if (!iter->trace)
2983                 goto fail;
2984
2985         *iter->trace = *tr->current_trace;
2986
2987         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
2988                 goto fail;
2989
2990         iter->tr = tr;
2991
2992 #ifdef CONFIG_TRACER_MAX_TRACE
2993         /* Currently only the top directory has a snapshot */
2994         if (tr->current_trace->print_max || snapshot)
2995                 iter->trace_buffer = &tr->max_buffer;
2996         else
2997 #endif
2998                 iter->trace_buffer = &tr->trace_buffer;
2999         iter->snapshot = snapshot;
3000         iter->pos = -1;
3001         iter->cpu_file = tracing_get_cpu(inode);
3002         mutex_init(&iter->mutex);
3003
3004         /* Notify the tracer early; before we stop tracing. */
3005         if (iter->trace && iter->trace->open)
3006                 iter->trace->open(iter);
3007
3008         /* Annotate start of buffers if we had overruns */
3009         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3010                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3011
3012         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3013         if (trace_clocks[tr->clock_id].in_ns)
3014                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3015
3016         /* stop the trace while dumping if we are not opening "snapshot" */
3017         if (!iter->snapshot)
3018                 tracing_stop_tr(tr);
3019
3020         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3021                 for_each_tracing_cpu(cpu) {
3022                         iter->buffer_iter[cpu] =
3023                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3024                 }
3025                 ring_buffer_read_prepare_sync();
3026                 for_each_tracing_cpu(cpu) {
3027                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3028                         tracing_iter_reset(iter, cpu);
3029                 }
3030         } else {
3031                 cpu = iter->cpu_file;
3032                 iter->buffer_iter[cpu] =
3033                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3034                 ring_buffer_read_prepare_sync();
3035                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3036                 tracing_iter_reset(iter, cpu);
3037         }
3038
3039         mutex_unlock(&trace_types_lock);
3040
3041         return iter;
3042
3043  fail:
3044         mutex_unlock(&trace_types_lock);
3045         kfree(iter->trace);
3046         kfree(iter->buffer_iter);
3047 release:
3048         seq_release_private(inode, file);
3049         return ERR_PTR(-ENOMEM);
3050 }
3051
3052 int tracing_open_generic(struct inode *inode, struct file *filp)
3053 {
3054         if (tracing_disabled)
3055                 return -ENODEV;
3056
3057         filp->private_data = inode->i_private;
3058         return 0;
3059 }
3060
3061 bool tracing_is_disabled(void)
3062 {
3063         return (tracing_disabled) ? true: false;
3064 }
3065
3066 /*
3067  * Open and update trace_array ref count.
3068  * Must have the current trace_array passed to it.
3069  */
3070 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3071 {
3072         struct trace_array *tr = inode->i_private;
3073
3074         if (tracing_disabled)
3075                 return -ENODEV;
3076
3077         if (trace_array_get(tr) < 0)
3078                 return -ENODEV;
3079
3080         filp->private_data = inode->i_private;
3081
3082         return 0;
3083 }
3084
3085 static int tracing_release(struct inode *inode, struct file *file)
3086 {
3087         struct trace_array *tr = inode->i_private;
3088         struct seq_file *m = file->private_data;
3089         struct trace_iterator *iter;
3090         int cpu;
3091
3092         if (!(file->f_mode & FMODE_READ)) {
3093                 trace_array_put(tr);
3094                 return 0;
3095         }
3096
3097         /* Writes do not use seq_file */
3098         iter = m->private;
3099         mutex_lock(&trace_types_lock);
3100
3101         for_each_tracing_cpu(cpu) {
3102                 if (iter->buffer_iter[cpu])
3103                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3104         }
3105
3106         if (iter->trace && iter->trace->close)
3107                 iter->trace->close(iter);
3108
3109         if (!iter->snapshot)
3110                 /* reenable tracing if it was previously enabled */
3111                 tracing_start_tr(tr);
3112
3113         __trace_array_put(tr);
3114
3115         mutex_unlock(&trace_types_lock);
3116
3117         mutex_destroy(&iter->mutex);
3118         free_cpumask_var(iter->started);
3119         kfree(iter->trace);
3120         kfree(iter->buffer_iter);
3121         seq_release_private(inode, file);
3122
3123         return 0;
3124 }
3125
3126 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3127 {
3128         struct trace_array *tr = inode->i_private;
3129
3130         trace_array_put(tr);
3131         return 0;
3132 }
3133
3134 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3135 {
3136         struct trace_array *tr = inode->i_private;
3137
3138         trace_array_put(tr);
3139
3140         return single_release(inode, file);
3141 }
3142
3143 static int tracing_open(struct inode *inode, struct file *file)
3144 {
3145         struct trace_array *tr = inode->i_private;
3146         struct trace_iterator *iter;
3147         int ret = 0;
3148
3149         if (trace_array_get(tr) < 0)
3150                 return -ENODEV;
3151
3152         /* If this file was open for write, then erase contents */
3153         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3154                 int cpu = tracing_get_cpu(inode);
3155
3156                 if (cpu == RING_BUFFER_ALL_CPUS)
3157                         tracing_reset_online_cpus(&tr->trace_buffer);
3158                 else
3159                         tracing_reset(&tr->trace_buffer, cpu);
3160         }
3161
3162         if (file->f_mode & FMODE_READ) {
3163                 iter = __tracing_open(inode, file, false);
3164                 if (IS_ERR(iter))
3165                         ret = PTR_ERR(iter);
3166                 else if (trace_flags & TRACE_ITER_LATENCY_FMT)
3167                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3168         }
3169
3170         if (ret < 0)
3171                 trace_array_put(tr);
3172
3173         return ret;
3174 }
3175
3176 /*
3177  * Some tracers are not suitable for instance buffers.
3178  * A tracer is always available for the global array (toplevel)
3179  * or if it explicitly states that it is.
3180  */
3181 static bool
3182 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3183 {
3184         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3185 }
3186
3187 /* Find the next tracer that this trace array may use */
3188 static struct tracer *
3189 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3190 {
3191         while (t && !trace_ok_for_array(t, tr))
3192                 t = t->next;
3193
3194         return t;
3195 }
3196
3197 static void *
3198 t_next(struct seq_file *m, void *v, loff_t *pos)
3199 {
3200         struct trace_array *tr = m->private;
3201         struct tracer *t = v;
3202
3203         (*pos)++;
3204
3205         if (t)
3206                 t = get_tracer_for_array(tr, t->next);
3207
3208         return t;
3209 }
3210
3211 static void *t_start(struct seq_file *m, loff_t *pos)
3212 {
3213         struct trace_array *tr = m->private;
3214         struct tracer *t;
3215         loff_t l = 0;
3216
3217         mutex_lock(&trace_types_lock);
3218
3219         t = get_tracer_for_array(tr, trace_types);
3220         for (; t && l < *pos; t = t_next(m, t, &l))
3221                         ;
3222
3223         return t;
3224 }
3225
3226 static void t_stop(struct seq_file *m, void *p)
3227 {
3228         mutex_unlock(&trace_types_lock);
3229 }
3230
3231 static int t_show(struct seq_file *m, void *v)
3232 {
3233         struct tracer *t = v;
3234
3235         if (!t)
3236                 return 0;
3237
3238         seq_printf(m, "%s", t->name);
3239         if (t->next)
3240                 seq_putc(m, ' ');
3241         else
3242                 seq_putc(m, '\n');
3243
3244         return 0;
3245 }
3246
3247 static const struct seq_operations show_traces_seq_ops = {
3248         .start          = t_start,
3249         .next           = t_next,
3250         .stop           = t_stop,
3251         .show           = t_show,
3252 };
3253
3254 static int show_traces_open(struct inode *inode, struct file *file)
3255 {
3256         struct trace_array *tr = inode->i_private;
3257         struct seq_file *m;
3258         int ret;
3259
3260         if (tracing_disabled)
3261                 return -ENODEV;
3262
3263         ret = seq_open(file, &show_traces_seq_ops);
3264         if (ret)
3265                 return ret;
3266
3267         m = file->private_data;
3268         m->private = tr;
3269
3270         return 0;
3271 }
3272
3273 static ssize_t
3274 tracing_write_stub(struct file *filp, const char __user *ubuf,
3275                    size_t count, loff_t *ppos)
3276 {
3277         return count;
3278 }
3279
3280 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3281 {
3282         int ret;
3283
3284         if (file->f_mode & FMODE_READ)
3285                 ret = seq_lseek(file, offset, whence);
3286         else
3287                 file->f_pos = ret = 0;
3288
3289         return ret;
3290 }
3291
3292 static const struct file_operations tracing_fops = {
3293         .open           = tracing_open,
3294         .read           = seq_read,
3295         .write          = tracing_write_stub,
3296         .llseek         = tracing_lseek,
3297         .release        = tracing_release,
3298 };
3299
3300 static const struct file_operations show_traces_fops = {
3301         .open           = show_traces_open,
3302         .read           = seq_read,
3303         .release        = seq_release,
3304         .llseek         = seq_lseek,
3305 };
3306
3307 /*
3308  * The tracer itself will not take this lock, but still we want
3309  * to provide a consistent cpumask to user-space:
3310  */
3311 static DEFINE_MUTEX(tracing_cpumask_update_lock);
3312
3313 /*
3314  * Temporary storage for the character representation of the
3315  * CPU bitmask (and one more byte for the newline):
3316  */
3317 static char mask_str[NR_CPUS + 1];
3318
3319 static ssize_t
3320 tracing_cpumask_read(struct file *filp, char __user *ubuf,
3321                      size_t count, loff_t *ppos)
3322 {
3323         struct trace_array *tr = file_inode(filp)->i_private;
3324         int len;
3325
3326         mutex_lock(&tracing_cpumask_update_lock);
3327
3328         len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask);
3329         if (count - len < 2) {
3330                 count = -EINVAL;
3331                 goto out_err;
3332         }
3333         len += sprintf(mask_str + len, "\n");
3334         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
3335
3336 out_err:
3337         mutex_unlock(&tracing_cpumask_update_lock);
3338
3339         return count;
3340 }
3341
3342 static ssize_t
3343 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
3344                       size_t count, loff_t *ppos)
3345 {
3346         struct trace_array *tr = file_inode(filp)->i_private;
3347         cpumask_var_t tracing_cpumask_new;
3348         int err, cpu;
3349
3350         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
3351                 return -ENOMEM;
3352
3353         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
3354         if (err)
3355                 goto err_unlock;
3356
3357         mutex_lock(&tracing_cpumask_update_lock);
3358
3359         local_irq_disable();
3360         arch_spin_lock(&tr->max_lock);
3361         for_each_tracing_cpu(cpu) {
3362                 /*
3363                  * Increase/decrease the disabled counter if we are
3364                  * about to flip a bit in the cpumask:
3365                  */
3366                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3367                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3368                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3369                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3370                 }
3371                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3372                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3373                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
3374                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3375                 }
3376         }
3377         arch_spin_unlock(&tr->max_lock);
3378         local_irq_enable();
3379
3380         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3381
3382         mutex_unlock(&tracing_cpumask_update_lock);
3383         free_cpumask_var(tracing_cpumask_new);
3384
3385         return count;
3386
3387 err_unlock:
3388         free_cpumask_var(tracing_cpumask_new);
3389
3390         return err;
3391 }
3392
3393 static const struct file_operations tracing_cpumask_fops = {
3394         .open           = tracing_open_generic_tr,
3395         .read           = tracing_cpumask_read,
3396         .write          = tracing_cpumask_write,
3397         .release        = tracing_release_generic_tr,
3398         .llseek         = generic_file_llseek,
3399 };
3400
3401 static int tracing_trace_options_show(struct seq_file *m, void *v)
3402 {
3403         struct tracer_opt *trace_opts;
3404         struct trace_array *tr = m->private;
3405         u32 tracer_flags;
3406         int i;
3407
3408         mutex_lock(&trace_types_lock);
3409         tracer_flags = tr->current_trace->flags->val;
3410         trace_opts = tr->current_trace->flags->opts;
3411
3412         for (i = 0; trace_options[i]; i++) {
3413                 if (trace_flags & (1 << i))
3414                         seq_printf(m, "%s\n", trace_options[i]);
3415                 else
3416                         seq_printf(m, "no%s\n", trace_options[i]);
3417         }
3418
3419         for (i = 0; trace_opts[i].name; i++) {
3420                 if (tracer_flags & trace_opts[i].bit)
3421                         seq_printf(m, "%s\n", trace_opts[i].name);
3422                 else
3423                         seq_printf(m, "no%s\n", trace_opts[i].name);
3424         }
3425         mutex_unlock(&trace_types_lock);
3426
3427         return 0;
3428 }
3429
3430 static int __set_tracer_option(struct trace_array *tr,
3431                                struct tracer_flags *tracer_flags,
3432                                struct tracer_opt *opts, int neg)
3433 {
3434         struct tracer *trace = tr->current_trace;
3435         int ret;
3436
3437         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3438         if (ret)
3439                 return ret;
3440
3441         if (neg)
3442                 tracer_flags->val &= ~opts->bit;
3443         else
3444                 tracer_flags->val |= opts->bit;
3445         return 0;
3446 }
3447
3448 /* Try to assign a tracer specific option */
3449 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3450 {
3451         struct tracer *trace = tr->current_trace;
3452         struct tracer_flags *tracer_flags = trace->flags;
3453         struct tracer_opt *opts = NULL;
3454         int i;
3455
3456         for (i = 0; tracer_flags->opts[i].name; i++) {
3457                 opts = &tracer_flags->opts[i];
3458
3459                 if (strcmp(cmp, opts->name) == 0)
3460                         return __set_tracer_option(tr, trace->flags, opts, neg);
3461         }
3462
3463         return -EINVAL;
3464 }
3465
3466 /* Some tracers require overwrite to stay enabled */
3467 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
3468 {
3469         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
3470                 return -1;
3471
3472         return 0;
3473 }
3474
3475 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3476 {
3477         /* do nothing if flag is already set */
3478         if (!!(trace_flags & mask) == !!enabled)
3479                 return 0;
3480
3481         /* Give the tracer a chance to approve the change */
3482         if (tr->current_trace->flag_changed)
3483                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3484                         return -EINVAL;
3485
3486         if (enabled)
3487                 trace_flags |= mask;
3488         else
3489                 trace_flags &= ~mask;
3490
3491         if (mask == TRACE_ITER_RECORD_CMD)
3492                 trace_event_enable_cmd_record(enabled);
3493
3494         if (mask == TRACE_ITER_OVERWRITE) {
3495                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3496 #ifdef CONFIG_TRACER_MAX_TRACE
3497                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3498 #endif
3499         }
3500
3501         if (mask == TRACE_ITER_PRINTK)
3502                 trace_printk_start_stop_comm(enabled);
3503
3504         return 0;
3505 }
3506
3507 static int trace_set_options(struct trace_array *tr, char *option)
3508 {
3509         char *cmp;
3510         int neg = 0;
3511         int ret = -ENODEV;
3512         int i;
3513
3514         cmp = strstrip(option);
3515
3516         if (strncmp(cmp, "no", 2) == 0) {
3517                 neg = 1;
3518                 cmp += 2;
3519         }
3520
3521         mutex_lock(&trace_types_lock);
3522
3523         for (i = 0; trace_options[i]; i++) {
3524                 if (strcmp(cmp, trace_options[i]) == 0) {
3525                         ret = set_tracer_flag(tr, 1 << i, !neg);
3526                         break;
3527                 }
3528         }
3529
3530         /* If no option could be set, test the specific tracer options */
3531         if (!trace_options[i])
3532                 ret = set_tracer_option(tr, cmp, neg);
3533
3534         mutex_unlock(&trace_types_lock);
3535
3536         return ret;
3537 }
3538
3539 static ssize_t
3540 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
3541                         size_t cnt, loff_t *ppos)
3542 {
3543         struct seq_file *m = filp->private_data;
3544         struct trace_array *tr = m->private;
3545         char buf[64];
3546         int ret;
3547
3548         if (cnt >= sizeof(buf))
3549                 return -EINVAL;
3550
3551         if (copy_from_user(&buf, ubuf, cnt))
3552                 return -EFAULT;
3553
3554         buf[cnt] = 0;
3555
3556         ret = trace_set_options(tr, buf);
3557         if (ret < 0)
3558                 return ret;
3559
3560         *ppos += cnt;
3561
3562         return cnt;
3563 }
3564
3565 static int tracing_trace_options_open(struct inode *inode, struct file *file)
3566 {
3567         struct trace_array *tr = inode->i_private;
3568         int ret;
3569
3570         if (tracing_disabled)
3571                 return -ENODEV;
3572
3573         if (trace_array_get(tr) < 0)
3574                 return -ENODEV;
3575
3576         ret = single_open(file, tracing_trace_options_show, inode->i_private);
3577         if (ret < 0)
3578                 trace_array_put(tr);
3579
3580         return ret;
3581 }
3582
3583 static const struct file_operations tracing_iter_fops = {
3584         .open           = tracing_trace_options_open,
3585         .read           = seq_read,
3586         .llseek         = seq_lseek,
3587         .release        = tracing_single_release_tr,
3588         .write          = tracing_trace_options_write,
3589 };
3590
3591 static const char readme_msg[] =
3592         "tracing mini-HOWTO:\n\n"
3593         "# echo 0 > tracing_on : quick way to disable tracing\n"
3594         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
3595         " Important files:\n"
3596         "  trace\t\t\t- The static contents of the buffer\n"
3597         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
3598         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
3599         "  current_tracer\t- function and latency tracers\n"
3600         "  available_tracers\t- list of configured tracers for current_tracer\n"
3601         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
3602         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
3603         "  trace_clock\t\t-change the clock used to order events\n"
3604         "       local:   Per cpu clock but may not be synced across CPUs\n"
3605         "      global:   Synced across CPUs but slows tracing down.\n"
3606         "     counter:   Not a clock, but just an increment\n"
3607         "      uptime:   Jiffy counter from time of boot\n"
3608         "        perf:   Same clock that perf events use\n"
3609 #ifdef CONFIG_X86_64
3610         "     x86-tsc:   TSC cycle counter\n"
3611 #endif
3612         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
3613         "  tracing_cpumask\t- Limit which CPUs to trace\n"
3614         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
3615         "\t\t\t  Remove sub-buffer with rmdir\n"
3616         "  trace_options\t\t- Set format or modify how tracing happens\n"
3617         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
3618         "\t\t\t  option name\n"
3619         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3620 #ifdef CONFIG_DYNAMIC_FTRACE
3621         "\n  available_filter_functions - list of functions that can be filtered on\n"
3622         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
3623         "\t\t\t  functions\n"
3624         "\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3625         "\t     modules: Can select a group via module\n"
3626         "\t      Format: :mod:<module-name>\n"
3627         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
3628         "\t    triggers: a command to perform when function is hit\n"
3629         "\t      Format: <function>:<trigger>[:count]\n"
3630         "\t     trigger: traceon, traceoff\n"
3631         "\t\t      enable_event:<system>:<event>\n"
3632         "\t\t      disable_event:<system>:<event>\n"
3633 #ifdef CONFIG_STACKTRACE
3634         "\t\t      stacktrace\n"
3635 #endif
3636 #ifdef CONFIG_TRACER_SNAPSHOT
3637         "\t\t      snapshot\n"
3638 #endif
3639         "\t\t      dump\n"
3640         "\t\t      cpudump\n"
3641         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
3642         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
3643         "\t     The first one will disable tracing every time do_fault is hit\n"
3644         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
3645         "\t       The first time do trap is hit and it disables tracing, the\n"
3646         "\t       counter will decrement to 2. If tracing is already disabled,\n"
3647         "\t       the counter will not decrement. It only decrements when the\n"
3648         "\t       trigger did work\n"
3649         "\t     To remove trigger without count:\n"
3650         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
3651         "\t     To remove trigger with a count:\n"
3652         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3653         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3654         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
3655         "\t    modules: Can select a group via module command :mod:\n"
3656         "\t    Does not accept triggers\n"
3657 #endif /* CONFIG_DYNAMIC_FTRACE */
3658 #ifdef CONFIG_FUNCTION_TRACER
3659         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
3660         "\t\t    (function)\n"
3661 #endif
3662 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
3663         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3664         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3665         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
3666 #endif
3667 #ifdef CONFIG_TRACER_SNAPSHOT
3668         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
3669         "\t\t\t  snapshot buffer. Read the contents for more\n"
3670         "\t\t\t  information\n"
3671 #endif
3672 #ifdef CONFIG_STACK_TRACER
3673         "  stack_trace\t\t- Shows the max stack trace when active\n"
3674         "  stack_max_size\t- Shows current max stack size that was traced\n"
3675         "\t\t\t  Write into this file to reset the max size (trigger a\n"
3676         "\t\t\t  new trace)\n"
3677 #ifdef CONFIG_DYNAMIC_FTRACE
3678         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
3679         "\t\t\t  traces\n"
3680 #endif
3681 #endif /* CONFIG_STACK_TRACER */
3682         "  events/\t\t- Directory containing all trace event subsystems:\n"
3683         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
3684         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
3685         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
3686         "\t\t\t  events\n"
3687         "      filter\t\t- If set, only events passing filter are traced\n"
3688         "  events/<system>/<event>/\t- Directory containing control files for\n"
3689         "\t\t\t  <event>:\n"
3690         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
3691         "      filter\t\t- If set, only events passing filter are traced\n"
3692         "      trigger\t\t- If set, a command to perform when event is hit\n"
3693         "\t    Format: <trigger>[:count][if <filter>]\n"
3694         "\t   trigger: traceon, traceoff\n"
3695         "\t            enable_event:<system>:<event>\n"
3696         "\t            disable_event:<system>:<event>\n"
3697 #ifdef CONFIG_STACKTRACE
3698         "\t\t    stacktrace\n"
3699 #endif
3700 #ifdef CONFIG_TRACER_SNAPSHOT
3701         "\t\t    snapshot\n"
3702 #endif
3703         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
3704         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
3705         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
3706         "\t                  events/block/block_unplug/trigger\n"
3707         "\t   The first disables tracing every time block_unplug is hit.\n"
3708         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
3709         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
3710         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
3711         "\t   Like function triggers, the counter is only decremented if it\n"
3712         "\t    enabled or disabled tracing.\n"
3713         "\t   To remove a trigger without a count:\n"
3714         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
3715         "\t   To remove a trigger with a count:\n"
3716         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
3717         "\t   Filters can be ignored when removing a trigger.\n"
3718 ;
3719
3720 static ssize_t
3721 tracing_readme_read(struct file *filp, char __user *ubuf,
3722                        size_t cnt, loff_t *ppos)
3723 {
3724         return simple_read_from_buffer(ubuf, cnt, ppos,
3725                                         readme_msg, strlen(readme_msg));
3726 }
3727
3728 static const struct file_operations tracing_readme_fops = {
3729         .open           = tracing_open_generic,
3730         .read           = tracing_readme_read,
3731         .llseek         = generic_file_llseek,
3732 };
3733
3734 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
3735 {
3736         unsigned int *ptr = v;
3737
3738         if (*pos || m->count)
3739                 ptr++;
3740
3741         (*pos)++;
3742
3743         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
3744              ptr++) {
3745                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
3746                         continue;
3747
3748                 return ptr;
3749         }
3750
3751         return NULL;
3752 }
3753
3754 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
3755 {
3756         void *v;
3757         loff_t l = 0;
3758
3759         preempt_disable();
3760         arch_spin_lock(&trace_cmdline_lock);
3761
3762         v = &savedcmd->map_cmdline_to_pid[0];
3763         while (l <= *pos) {
3764                 v = saved_cmdlines_next(m, v, &l);
3765                 if (!v)
3766                         return NULL;
3767         }
3768
3769         return v;
3770 }
3771
3772 static void saved_cmdlines_stop(struct seq_file *m, void *v)
3773 {
3774         arch_spin_unlock(&trace_cmdline_lock);
3775         preempt_enable();
3776 }
3777
3778 static int saved_cmdlines_show(struct seq_file *m, void *v)
3779 {
3780         char buf[TASK_COMM_LEN];
3781         unsigned int *pid = v;
3782
3783         __trace_find_cmdline(*pid, buf);
3784         seq_printf(m, "%d %s\n", *pid, buf);
3785         return 0;
3786 }
3787
3788 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
3789         .start          = saved_cmdlines_start,
3790         .next           = saved_cmdlines_next,
3791         .stop           = saved_cmdlines_stop,
3792         .show           = saved_cmdlines_show,
3793 };
3794
3795 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
3796 {
3797         if (tracing_disabled)
3798                 return -ENODEV;
3799
3800         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3801 }
3802
3803 static const struct file_operations tracing_saved_cmdlines_fops = {
3804         .open           = tracing_saved_cmdlines_open,
3805         .read           = seq_read,
3806         .llseek         = seq_lseek,
3807         .release        = seq_release,
3808 };
3809
3810 static ssize_t
3811 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
3812                                  size_t cnt, loff_t *ppos)
3813 {
3814         char buf[64];
3815         int r;
3816
3817         arch_spin_lock(&trace_cmdline_lock);
3818         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3819         arch_spin_unlock(&trace_cmdline_lock);
3820
3821         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3822 }
3823
3824 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
3825 {
3826         kfree(s->saved_cmdlines);
3827         kfree(s->map_cmdline_to_pid);
3828         kfree(s);
3829 }
3830
3831 static int tracing_resize_saved_cmdlines(unsigned int val)
3832 {
3833         struct saved_cmdlines_buffer *s, *savedcmd_temp;
3834
3835         s = kmalloc(sizeof(*s), GFP_KERNEL);
3836         if (!s)
3837                 return -ENOMEM;
3838
3839         if (allocate_cmdlines_buffer(val, s) < 0) {
3840                 kfree(s);
3841                 return -ENOMEM;
3842         }
3843
3844         arch_spin_lock(&trace_cmdline_lock);
3845         savedcmd_temp = savedcmd;
3846         savedcmd = s;
3847         arch_spin_unlock(&trace_cmdline_lock);
3848         free_saved_cmdlines_buffer(savedcmd_temp);
3849
3850         return 0;
3851 }
3852
3853 static ssize_t
3854 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
3855                                   size_t cnt, loff_t *ppos)
3856 {
3857         unsigned long val;
3858         int ret;
3859
3860         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
3861         if (ret)
3862                 return ret;
3863
3864         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
3865         if (!val || val > PID_MAX_DEFAULT)
3866                 return -EINVAL;
3867
3868         ret = tracing_resize_saved_cmdlines((unsigned int)val);
3869         if (ret < 0)
3870                 return ret;
3871
3872         *ppos += cnt;
3873
3874         return cnt;
3875 }
3876
3877 static const struct file_operations tracing_saved_cmdlines_size_fops = {
3878         .open           = tracing_open_generic,
3879         .read           = tracing_saved_cmdlines_size_read,
3880         .write          = tracing_saved_cmdlines_size_write,
3881 };
3882
3883 static ssize_t
3884 tracing_set_trace_read(struct file *filp, char __user *ubuf,
3885                        size_t cnt, loff_t *ppos)
3886 {
3887         struct trace_array *tr = filp->private_data;
3888         char buf[MAX_TRACER_SIZE+2];
3889         int r;
3890
3891         mutex_lock(&trace_types_lock);
3892         r = sprintf(buf, "%s\n", tr->current_trace->name);
3893         mutex_unlock(&trace_types_lock);
3894
3895         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
3896 }
3897
3898 int tracer_init(struct tracer *t, struct trace_array *tr)
3899 {
3900         tracing_reset_online_cpus(&tr->trace_buffer);
3901         return t->init(tr);
3902 }
3903
3904 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
3905 {
3906         int cpu;
3907
3908         for_each_tracing_cpu(cpu)
3909                 per_cpu_ptr(buf->data, cpu)->entries = val;
3910 }
3911
3912 #ifdef CONFIG_TRACER_MAX_TRACE
3913 /* resize @tr's buffer to the size of @size_tr's entries */
3914 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
3915                                         struct trace_buffer *size_buf, int cpu_id)
3916 {
3917         int cpu, ret = 0;
3918
3919         if (cpu_id == RING_BUFFER_ALL_CPUS) {
3920                 for_each_tracing_cpu(cpu) {
3921                         ret = ring_buffer_resize(trace_buf->buffer,
3922                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
3923                         if (ret < 0)
3924                                 break;
3925                         per_cpu_ptr(trace_buf->data, cpu)->entries =
3926                                 per_cpu_ptr(size_buf->data, cpu)->entries;
3927                 }
3928         } else {
3929                 ret = ring_buffer_resize(trace_buf->buffer,
3930                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
3931                 if (ret == 0)
3932                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
3933                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
3934         }
3935
3936         return ret;
3937 }
3938 #endif /* CONFIG_TRACER_MAX_TRACE */
3939
3940 static int __tracing_resize_ring_buffer(struct trace_array *tr,
3941                                         unsigned long size, int cpu)
3942 {
3943         int ret;
3944
3945         /*
3946          * If kernel or user changes the size of the ring buffer
3947          * we use the size that was given, and we can forget about
3948          * expanding it later.
3949          */
3950         ring_buffer_expanded = true;
3951
3952         /* May be called before buffers are initialized */
3953         if (!tr->trace_buffer.buffer)
3954                 return 0;
3955
3956         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
3957         if (ret < 0)
3958                 return ret;
3959
3960 #ifdef CONFIG_TRACER_MAX_TRACE
3961         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
3962             !tr->current_trace->use_max_tr)
3963                 goto out;
3964
3965         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
3966         if (ret < 0) {
3967                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
3968                                                      &tr->trace_buffer, cpu);
3969                 if (r < 0) {
3970                         /*
3971                          * AARGH! We are left with different
3972                          * size max buffer!!!!
3973                          * The max buffer is our "snapshot" buffer.
3974                          * When a tracer needs a snapshot (one of the
3975                          * latency tracers), it swaps the max buffer
3976                          * with the saved snap shot. We succeeded to
3977                          * update the size of the main buffer, but failed to
3978                          * update the size of the max buffer. But when we tried
3979                          * to reset the main buffer to the original size, we
3980                          * failed there too. This is very unlikely to
3981                          * happen, but if it does, warn and kill all
3982                          * tracing.
3983                          */
3984                         WARN_ON(1);
3985                         tracing_disabled = 1;
3986                 }
3987                 return ret;
3988         }
3989
3990         if (cpu == RING_BUFFER_ALL_CPUS)
3991                 set_buffer_entries(&tr->max_buffer, size);
3992         else
3993                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
3994
3995  out:
3996 #endif /* CONFIG_TRACER_MAX_TRACE */
3997
3998         if (cpu == RING_BUFFER_ALL_CPUS)
3999                 set_buffer_entries(&tr->trace_buffer, size);
4000         else
4001                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4002
4003         return ret;
4004 }
4005
4006 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
4007                                           unsigned long size, int cpu_id)
4008 {
4009         int ret = size;
4010
4011         mutex_lock(&trace_types_lock);
4012
4013         if (cpu_id != RING_BUFFER_ALL_CPUS) {
4014                 /* make sure, this cpu is enabled in the mask */
4015                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
4016                         ret = -EINVAL;
4017                         goto out;
4018                 }
4019         }
4020
4021         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4022         if (ret < 0)
4023                 ret = -ENOMEM;
4024
4025 out:
4026         mutex_unlock(&trace_types_lock);
4027
4028         return ret;
4029 }
4030
4031
4032 /**
4033  * tracing_update_buffers - used by tracing facility to expand ring buffers
4034  *
4035  * To save on memory when the tracing is never used on a system with it
4036  * configured in. The ring buffers are set to a minimum size. But once
4037  * a user starts to use the tracing facility, then they need to grow
4038  * to their default size.
4039  *
4040  * This function is to be called when a tracer is about to be used.
4041  */
4042 int tracing_update_buffers(void)
4043 {
4044         int ret = 0;
4045
4046         mutex_lock(&trace_types_lock);
4047         if (!ring_buffer_expanded)
4048                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4049                                                 RING_BUFFER_ALL_CPUS);
4050         mutex_unlock(&trace_types_lock);
4051
4052         return ret;
4053 }
4054
4055 struct trace_option_dentry;
4056
4057 static struct trace_option_dentry *
4058 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4059
4060 static void
4061 destroy_trace_option_files(struct trace_option_dentry *topts);
4062
4063 /*
4064  * Used to clear out the tracer before deletion of an instance.
4065  * Must have trace_types_lock held.
4066  */
4067 static void tracing_set_nop(struct trace_array *tr)
4068 {
4069         if (tr->current_trace == &nop_trace)
4070                 return;
4071         
4072         tr->current_trace->enabled--;
4073
4074         if (tr->current_trace->reset)
4075                 tr->current_trace->reset(tr);
4076
4077         tr->current_trace = &nop_trace;
4078 }
4079
4080 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
4081 {
4082         static struct trace_option_dentry *topts;
4083         struct tracer *t;
4084 #ifdef CONFIG_TRACER_MAX_TRACE
4085         bool had_max_tr;
4086 #endif
4087         int ret = 0;
4088
4089         mutex_lock(&trace_types_lock);
4090
4091         if (!ring_buffer_expanded) {
4092                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4093                                                 RING_BUFFER_ALL_CPUS);
4094                 if (ret < 0)
4095                         goto out;
4096                 ret = 0;
4097         }
4098
4099         for (t = trace_types; t; t = t->next) {
4100                 if (strcmp(t->name, buf) == 0)
4101                         break;
4102         }
4103         if (!t) {
4104                 ret = -EINVAL;
4105                 goto out;
4106         }
4107         if (t == tr->current_trace)
4108                 goto out;
4109
4110         /* Some tracers are only allowed for the top level buffer */
4111         if (!trace_ok_for_array(t, tr)) {
4112                 ret = -EINVAL;
4113                 goto out;
4114         }
4115
4116         trace_branch_disable();
4117
4118         tr->current_trace->enabled--;
4119
4120         if (tr->current_trace->reset)
4121                 tr->current_trace->reset(tr);
4122
4123         /* Current trace needs to be nop_trace before synchronize_sched */
4124         tr->current_trace = &nop_trace;
4125
4126 #ifdef CONFIG_TRACER_MAX_TRACE
4127         had_max_tr = tr->allocated_snapshot;
4128
4129         if (had_max_tr && !t->use_max_tr) {
4130                 /*
4131                  * We need to make sure that the update_max_tr sees that
4132                  * current_trace changed to nop_trace to keep it from
4133                  * swapping the buffers after we resize it.
4134                  * The update_max_tr is called from interrupts disabled
4135                  * so a synchronized_sched() is sufficient.
4136                  */
4137                 synchronize_sched();
4138                 free_snapshot(tr);
4139         }
4140 #endif
4141         /* Currently, only the top instance has options */
4142         if (tr->flags & TRACE_ARRAY_FL_GLOBAL) {
4143                 destroy_trace_option_files(topts);
4144                 topts = create_trace_option_files(tr, t);
4145         }
4146
4147 #ifdef CONFIG_TRACER_MAX_TRACE
4148         if (t->use_max_tr && !had_max_tr) {
4149                 ret = alloc_snapshot(tr);
4150                 if (ret < 0)
4151                         goto out;
4152         }
4153 #endif
4154
4155         if (t->init) {
4156                 ret = tracer_init(t, tr);
4157                 if (ret)
4158                         goto out;
4159         }
4160
4161         tr->current_trace = t;
4162         tr->current_trace->enabled++;
4163         trace_branch_enable(tr);
4164  out:
4165         mutex_unlock(&trace_types_lock);
4166
4167         return ret;
4168 }
4169
4170 static ssize_t
4171 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
4172                         size_t cnt, loff_t *ppos)
4173 {
4174         struct trace_array *tr = filp->private_data;
4175         char buf[MAX_TRACER_SIZE+1];
4176         int i;
4177         size_t ret;
4178         int err;
4179
4180         ret = cnt;
4181
4182         if (cnt > MAX_TRACER_SIZE)
4183                 cnt = MAX_TRACER_SIZE;
4184
4185         if (copy_from_user(&buf, ubuf, cnt))
4186                 return -EFAULT;
4187
4188         buf[cnt] = 0;
4189
4190         /* strip ending whitespace. */
4191         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
4192                 buf[i] = 0;
4193
4194         err = tracing_set_tracer(tr, buf);
4195         if (err)
4196                 return err;
4197
4198         *ppos += ret;
4199
4200         return ret;
4201 }
4202
4203 static ssize_t
4204 tracing_max_lat_read(struct file *filp, char __user *ubuf,
4205                      size_t cnt, loff_t *ppos)
4206 {
4207         unsigned long *ptr = filp->private_data;
4208         char buf[64];
4209         int r;
4210
4211         r = snprintf(buf, sizeof(buf), "%ld\n",
4212                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
4213         if (r > sizeof(buf))
4214                 r = sizeof(buf);
4215         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4216 }
4217
4218 static ssize_t
4219 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
4220                       size_t cnt, loff_t *ppos)
4221 {
4222         unsigned long *ptr = filp->private_data;
4223         unsigned long val;
4224         int ret;
4225
4226         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4227         if (ret)
4228                 return ret;
4229
4230         *ptr = val * 1000;
4231
4232         return cnt;
4233 }
4234
4235 static int tracing_open_pipe(struct inode *inode, struct file *filp)
4236 {
4237         struct trace_array *tr = inode->i_private;
4238         struct trace_iterator *iter;
4239         int ret = 0;
4240
4241         if (tracing_disabled)
4242                 return -ENODEV;
4243
4244         if (trace_array_get(tr) < 0)
4245                 return -ENODEV;
4246
4247         mutex_lock(&trace_types_lock);
4248
4249         /* create a buffer to store the information to pass to userspace */
4250         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4251         if (!iter) {
4252                 ret = -ENOMEM;
4253                 __trace_array_put(tr);
4254                 goto out;
4255         }
4256
4257         /*
4258          * We make a copy of the current tracer to avoid concurrent
4259          * changes on it while we are reading.
4260          */
4261         iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
4262         if (!iter->trace) {
4263                 ret = -ENOMEM;
4264                 goto fail;
4265         }
4266         *iter->trace = *tr->current_trace;
4267
4268         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4269                 ret = -ENOMEM;
4270                 goto fail;
4271         }
4272
4273         /* trace pipe does not show start of buffer */
4274         cpumask_setall(iter->started);
4275
4276         if (trace_flags & TRACE_ITER_LATENCY_FMT)
4277                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4278
4279         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4280         if (trace_clocks[tr->clock_id].in_ns)
4281                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4282
4283         iter->tr = tr;
4284         iter->trace_buffer = &tr->trace_buffer;
4285         iter->cpu_file = tracing_get_cpu(inode);
4286         mutex_init(&iter->mutex);
4287         filp->private_data = iter;
4288
4289         if (iter->trace->pipe_open)
4290                 iter->trace->pipe_open(iter);
4291
4292         nonseekable_open(inode, filp);
4293 out:
4294         mutex_unlock(&trace_types_lock);
4295         return ret;
4296
4297 fail:
4298         kfree(iter->trace);
4299         kfree(iter);
4300         __trace_array_put(tr);
4301         mutex_unlock(&trace_types_lock);
4302         return ret;
4303 }
4304
4305 static int tracing_release_pipe(struct inode *inode, struct file *file)
4306 {
4307         struct trace_iterator *iter = file->private_data;
4308         struct trace_array *tr = inode->i_private;
4309
4310         mutex_lock(&trace_types_lock);
4311
4312         if (iter->trace->pipe_close)
4313                 iter->trace->pipe_close(iter);
4314
4315         mutex_unlock(&trace_types_lock);
4316
4317         free_cpumask_var(iter->started);
4318         mutex_destroy(&iter->mutex);
4319         kfree(iter->trace);
4320         kfree(iter);
4321
4322         trace_array_put(tr);
4323
4324         return 0;
4325 }
4326
4327 static unsigned int
4328 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4329 {
4330         /* Iterators are static, they should be filled or empty */
4331         if (trace_buffer_iter(iter, iter->cpu_file))
4332                 return POLLIN | POLLRDNORM;
4333
4334         if (trace_flags & TRACE_ITER_BLOCK)
4335                 /*
4336                  * Always select as readable when in blocking mode
4337                  */
4338                 return POLLIN | POLLRDNORM;
4339         else
4340                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4341                                              filp, poll_table);
4342 }
4343
4344 static unsigned int
4345 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
4346 {
4347         struct trace_iterator *iter = filp->private_data;
4348
4349         return trace_poll(iter, filp, poll_table);
4350 }
4351
4352 /* Must be called with trace_types_lock mutex held. */
4353 static int tracing_wait_pipe(struct file *filp)
4354 {
4355         struct trace_iterator *iter = filp->private_data;
4356         int ret;
4357
4358         while (trace_empty(iter)) {
4359
4360                 if ((filp->f_flags & O_NONBLOCK)) {
4361                         return -EAGAIN;
4362                 }
4363
4364                 /*
4365                  * We block until we read something and tracing is disabled.
4366                  * We still block if tracing is disabled, but we have never
4367                  * read anything. This allows a user to cat this file, and
4368                  * then enable tracing. But after we have read something,
4369                  * we give an EOF when tracing is again disabled.
4370                  *
4371                  * iter->pos will be 0 if we haven't read anything.
4372                  */
4373                 if (!tracing_is_on() && iter->pos)
4374                         break;
4375
4376                 mutex_unlock(&iter->mutex);
4377
4378                 ret = wait_on_pipe(iter);
4379
4380                 mutex_lock(&iter->mutex);
4381
4382                 if (ret)
4383                         return ret;
4384
4385                 if (signal_pending(current))
4386                         return -EINTR;
4387         }
4388
4389         return 1;
4390 }
4391
4392 /*
4393  * Consumer reader.
4394  */
4395 static ssize_t
4396 tracing_read_pipe(struct file *filp, char __user *ubuf,
4397                   size_t cnt, loff_t *ppos)
4398 {
4399         struct trace_iterator *iter = filp->private_data;
4400         struct trace_array *tr = iter->tr;
4401         ssize_t sret;
4402
4403         /* return any leftover data */
4404         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4405         if (sret != -EBUSY)
4406                 return sret;
4407
4408         trace_seq_init(&iter->seq);
4409
4410         /* copy the tracer to avoid using a global lock all around */
4411         mutex_lock(&trace_types_lock);
4412         if (unlikely(iter->trace->name != tr->current_trace->name))
4413                 *iter->trace = *tr->current_trace;
4414         mutex_unlock(&trace_types_lock);
4415
4416         /*
4417          * Avoid more than one consumer on a single file descriptor
4418          * This is just a matter of traces coherency, the ring buffer itself
4419          * is protected.
4420          */
4421         mutex_lock(&iter->mutex);
4422         if (iter->trace->read) {
4423                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
4424                 if (sret)
4425                         goto out;
4426         }
4427
4428 waitagain:
4429         sret = tracing_wait_pipe(filp);
4430         if (sret <= 0)
4431                 goto out;
4432
4433         /* stop when tracing is finished */
4434         if (trace_empty(iter)) {
4435                 sret = 0;
4436                 goto out;
4437         }
4438
4439         if (cnt >= PAGE_SIZE)
4440                 cnt = PAGE_SIZE - 1;
4441
4442         /* reset all but tr, trace, and overruns */
4443         memset(&iter->seq, 0,
4444                sizeof(struct trace_iterator) -
4445                offsetof(struct trace_iterator, seq));
4446         cpumask_clear(iter->started);
4447         iter->pos = -1;
4448
4449         trace_event_read_lock();
4450         trace_access_lock(iter->cpu_file);
4451         while (trace_find_next_entry_inc(iter) != NULL) {
4452                 enum print_line_t ret;
4453                 int len = iter->seq.len;
4454
4455                 ret = print_trace_line(iter);
4456                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4457                         /* don't print partial lines */
4458                         iter->seq.len = len;
4459                         break;
4460                 }
4461                 if (ret != TRACE_TYPE_NO_CONSUME)
4462                         trace_consume(iter);
4463
4464                 if (iter->seq.len >= cnt)
4465                         break;
4466
4467                 /*
4468                  * Setting the full flag means we reached the trace_seq buffer
4469                  * size and we should leave by partial output condition above.
4470                  * One of the trace_seq_* functions is not used properly.
4471                  */
4472                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
4473                           iter->ent->type);
4474         }
4475         trace_access_unlock(iter->cpu_file);
4476         trace_event_read_unlock();
4477
4478         /* Now copy what we have to the user */
4479         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4480         if (iter->seq.readpos >= iter->seq.len)
4481                 trace_seq_init(&iter->seq);
4482
4483         /*
4484          * If there was nothing to send to user, in spite of consuming trace
4485          * entries, go back to wait for more entries.
4486          */
4487         if (sret == -EBUSY)
4488                 goto waitagain;
4489
4490 out:
4491         mutex_unlock(&iter->mutex);
4492
4493         return sret;
4494 }
4495
4496 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
4497                                      unsigned int idx)
4498 {
4499         __free_page(spd->pages[idx]);
4500 }
4501
4502 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4503         .can_merge              = 0,
4504         .confirm                = generic_pipe_buf_confirm,
4505         .release                = generic_pipe_buf_release,
4506         .steal                  = generic_pipe_buf_steal,
4507         .get                    = generic_pipe_buf_get,
4508 };
4509
4510 static size_t
4511 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4512 {
4513         size_t count;
4514         int ret;
4515
4516         /* Seq buffer is page-sized, exactly what we need. */
4517         for (;;) {
4518                 count = iter->seq.len;
4519                 ret = print_trace_line(iter);
4520                 count = iter->seq.len - count;
4521                 if (rem < count) {
4522                         rem = 0;
4523                         iter->seq.len -= count;
4524                         break;
4525                 }
4526                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4527                         iter->seq.len -= count;
4528                         break;
4529                 }
4530
4531                 if (ret != TRACE_TYPE_NO_CONSUME)
4532                         trace_consume(iter);
4533                 rem -= count;
4534                 if (!trace_find_next_entry_inc(iter))   {
4535                         rem = 0;
4536                         iter->ent = NULL;
4537                         break;
4538                 }
4539         }
4540
4541         return rem;
4542 }
4543
4544 static ssize_t tracing_splice_read_pipe(struct file *filp,
4545                                         loff_t *ppos,
4546                                         struct pipe_inode_info *pipe,
4547                                         size_t len,
4548                                         unsigned int flags)
4549 {
4550         struct page *pages_def[PIPE_DEF_BUFFERS];
4551         struct partial_page partial_def[PIPE_DEF_BUFFERS];
4552         struct trace_iterator *iter = filp->private_data;
4553         struct splice_pipe_desc spd = {
4554                 .pages          = pages_def,
4555                 .partial        = partial_def,
4556                 .nr_pages       = 0, /* This gets updated below. */
4557                 .nr_pages_max   = PIPE_DEF_BUFFERS,
4558                 .flags          = flags,
4559                 .ops            = &tracing_pipe_buf_ops,
4560                 .spd_release    = tracing_spd_release_pipe,
4561         };
4562         struct trace_array *tr = iter->tr;
4563         ssize_t ret;
4564         size_t rem;
4565         unsigned int i;
4566
4567         if (splice_grow_spd(pipe, &spd))
4568                 return -ENOMEM;
4569
4570         /* copy the tracer to avoid using a global lock all around */
4571         mutex_lock(&trace_types_lock);
4572         if (unlikely(iter->trace->name != tr->current_trace->name))
4573                 *iter->trace = *tr->current_trace;
4574         mutex_unlock(&trace_types_lock);
4575
4576         mutex_lock(&iter->mutex);
4577
4578         if (iter->trace->splice_read) {
4579                 ret = iter->trace->splice_read(iter, filp,
4580                                                ppos, pipe, len, flags);
4581                 if (ret)
4582                         goto out_err;
4583         }
4584
4585         ret = tracing_wait_pipe(filp);
4586         if (ret <= 0)
4587                 goto out_err;
4588
4589         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4590                 ret = -EFAULT;
4591                 goto out_err;
4592         }
4593
4594         trace_event_read_lock();
4595         trace_access_lock(iter->cpu_file);
4596
4597         /* Fill as many pages as possible. */
4598         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4599                 spd.pages[i] = alloc_page(GFP_KERNEL);
4600                 if (!spd.pages[i])
4601                         break;
4602
4603                 rem = tracing_fill_pipe_page(rem, iter);
4604
4605                 /* Copy the data into the page, so we can start over. */
4606                 ret = trace_seq_to_buffer(&iter->seq,
4607                                           page_address(spd.pages[i]),
4608                                           iter->seq.len);
4609                 if (ret < 0) {
4610                         __free_page(spd.pages[i]);
4611                         break;
4612                 }
4613                 spd.partial[i].offset = 0;
4614                 spd.partial[i].len = iter->seq.len;
4615
4616                 trace_seq_init(&iter->seq);
4617         }
4618
4619         trace_access_unlock(iter->cpu_file);
4620         trace_event_read_unlock();
4621         mutex_unlock(&iter->mutex);
4622
4623         spd.nr_pages = i;
4624
4625         ret = splice_to_pipe(pipe, &spd);
4626 out:
4627         splice_shrink_spd(&spd);
4628         return ret;
4629
4630 out_err:
4631         mutex_unlock(&iter->mutex);
4632         goto out;
4633 }
4634
4635 static ssize_t
4636 tracing_entries_read(struct file *filp, char __user *ubuf,
4637                      size_t cnt, loff_t *ppos)
4638 {
4639         struct inode *inode = file_inode(filp);
4640         struct trace_array *tr = inode->i_private;
4641         int cpu = tracing_get_cpu(inode);
4642         char buf[64];
4643         int r = 0;
4644         ssize_t ret;
4645
4646         mutex_lock(&trace_types_lock);
4647
4648         if (cpu == RING_BUFFER_ALL_CPUS) {
4649                 int cpu, buf_size_same;
4650                 unsigned long size;
4651
4652                 size = 0;
4653                 buf_size_same = 1;
4654                 /* check if all cpu sizes are same */
4655                 for_each_tracing_cpu(cpu) {
4656                         /* fill in the size from first enabled cpu */
4657                         if (size == 0)
4658                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
4659                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
4660                                 buf_size_same = 0;
4661                                 break;
4662                         }
4663                 }
4664
4665                 if (buf_size_same) {
4666                         if (!ring_buffer_expanded)
4667                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
4668                                             size >> 10,
4669                                             trace_buf_size >> 10);
4670                         else
4671                                 r = sprintf(buf, "%lu\n", size >> 10);
4672                 } else
4673                         r = sprintf(buf, "X\n");
4674         } else
4675                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
4676
4677         mutex_unlock(&trace_types_lock);
4678
4679         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4680         return ret;
4681 }
4682
4683 static ssize_t
4684 tracing_entries_write(struct file *filp, const char __user *ubuf,
4685                       size_t cnt, loff_t *ppos)
4686 {
4687         struct inode *inode = file_inode(filp);
4688         struct trace_array *tr = inode->i_private;
4689         unsigned long val;
4690         int ret;
4691
4692         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4693         if (ret)
4694                 return ret;
4695
4696         /* must have at least 1 entry */
4697         if (!val)
4698                 return -EINVAL;
4699
4700         /* value is in KB */
4701         val <<= 10;
4702         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
4703         if (ret < 0)
4704                 return ret;
4705
4706         *ppos += cnt;
4707
4708         return cnt;
4709 }
4710
4711 static ssize_t
4712 tracing_total_entries_read(struct file *filp, char __user *ubuf,
4713                                 size_t cnt, loff_t *ppos)
4714 {
4715         struct trace_array *tr = filp->private_data;
4716         char buf[64];
4717         int r, cpu;
4718         unsigned long size = 0, expanded_size = 0;
4719
4720         mutex_lock(&trace_types_lock);
4721         for_each_tracing_cpu(cpu) {
4722                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
4723                 if (!ring_buffer_expanded)
4724                         expanded_size += trace_buf_size >> 10;
4725         }
4726         if (ring_buffer_expanded)
4727                 r = sprintf(buf, "%lu\n", size);
4728         else
4729                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
4730         mutex_unlock(&trace_types_lock);
4731
4732         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4733 }
4734
4735 static ssize_t
4736 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
4737                           size_t cnt, loff_t *ppos)
4738 {
4739         /*
4740          * There is no need to read what the user has written, this function
4741          * is just to make sure that there is no error when "echo" is used
4742          */
4743
4744         *ppos += cnt;
4745
4746         return cnt;
4747 }
4748
4749 static int
4750 tracing_free_buffer_release(struct inode *inode, struct file *filp)
4751 {
4752         struct trace_array *tr = inode->i_private;
4753
4754         /* disable tracing ? */
4755         if (trace_flags & TRACE_ITER_STOP_ON_FREE)
4756                 tracer_tracing_off(tr);
4757         /* resize the ring buffer to 0 */
4758         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
4759
4760         trace_array_put(tr);
4761
4762         return 0;
4763 }
4764
4765 static ssize_t
4766 tracing_mark_write(struct file *filp, const char __user *ubuf,
4767                                         size_t cnt, loff_t *fpos)
4768 {
4769         unsigned long addr = (unsigned long)ubuf;
4770         struct trace_array *tr = filp->private_data;
4771         struct ring_buffer_event *event;
4772         struct ring_buffer *buffer;
4773         struct print_entry *entry;
4774         unsigned long irq_flags;
4775         struct page *pages[2];
4776         void *map_page[2];
4777         int nr_pages = 1;
4778         ssize_t written;
4779         int offset;
4780         int size;
4781         int len;
4782         int ret;
4783         int i;
4784
4785         if (tracing_disabled)
4786                 return -EINVAL;
4787
4788         if (!(trace_flags & TRACE_ITER_MARKERS))
4789                 return -EINVAL;
4790
4791         if (cnt > TRACE_BUF_SIZE)
4792                 cnt = TRACE_BUF_SIZE;
4793
4794         /*
4795          * Userspace is injecting traces into the kernel trace buffer.
4796          * We want to be as non intrusive as possible.
4797          * To do so, we do not want to allocate any special buffers
4798          * or take any locks, but instead write the userspace data
4799          * straight into the ring buffer.
4800          *
4801          * First we need to pin the userspace buffer into memory,
4802          * which, most likely it is, because it just referenced it.
4803          * But there's no guarantee that it is. By using get_user_pages_fast()
4804          * and kmap_atomic/kunmap_atomic() we can get access to the
4805          * pages directly. We then write the data directly into the
4806          * ring buffer.
4807          */
4808         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
4809
4810         /* check if we cross pages */
4811         if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
4812                 nr_pages = 2;
4813
4814         offset = addr & (PAGE_SIZE - 1);
4815         addr &= PAGE_MASK;
4816
4817         ret = get_user_pages_fast(addr, nr_pages, 0, pages);
4818         if (ret < nr_pages) {
4819                 while (--ret >= 0)
4820                         put_page(pages[ret]);
4821                 written = -EFAULT;
4822                 goto out;
4823         }
4824
4825         for (i = 0; i < nr_pages; i++)
4826                 map_page[i] = kmap_atomic(pages[i]);
4827
4828         local_save_flags(irq_flags);
4829         size = sizeof(*entry) + cnt + 2; /* possible \n added */
4830         buffer = tr->trace_buffer.buffer;
4831         event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
4832                                           irq_flags, preempt_count());
4833         if (!event) {
4834                 /* Ring buffer disabled, return as if not open for write */
4835                 written = -EBADF;
4836                 goto out_unlock;
4837         }
4838
4839         entry = ring_buffer_event_data(event);
4840         entry->ip = _THIS_IP_;
4841
4842         if (nr_pages == 2) {
4843                 len = PAGE_SIZE - offset;
4844                 memcpy(&entry->buf, map_page[0] + offset, len);
4845                 memcpy(&entry->buf[len], map_page[1], cnt - len);
4846         } else
4847                 memcpy(&entry->buf, map_page[0] + offset, cnt);
4848
4849         if (entry->buf[cnt - 1] != '\n') {
4850                 entry->buf[cnt] = '\n';
4851                 entry->buf[cnt + 1] = '\0';
4852         } else
4853                 entry->buf[cnt] = '\0';
4854
4855         __buffer_unlock_commit(buffer, event);
4856
4857         written = cnt;
4858
4859         *fpos += written;
4860
4861  out_unlock:
4862         for (i = 0; i < nr_pages; i++){
4863                 kunmap_atomic(map_page[i]);
4864                 put_page(pages[i]);
4865         }
4866  out:
4867         return written;
4868 }
4869
4870 static int tracing_clock_show(struct seq_file *m, void *v)
4871 {
4872         struct trace_array *tr = m->private;
4873         int i;
4874
4875         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
4876                 seq_printf(m,
4877                         "%s%s%s%s", i ? " " : "",
4878                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
4879                         i == tr->clock_id ? "]" : "");
4880         seq_putc(m, '\n');
4881
4882         return 0;
4883 }
4884
4885 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
4886 {
4887         int i;
4888
4889         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
4890                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
4891                         break;
4892         }
4893         if (i == ARRAY_SIZE(trace_clocks))
4894                 return -EINVAL;
4895
4896         mutex_lock(&trace_types_lock);
4897
4898         tr->clock_id = i;
4899
4900         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
4901
4902         /*
4903          * New clock may not be consistent with the previous clock.
4904          * Reset the buffer so that it doesn't have incomparable timestamps.
4905          */
4906         tracing_reset_online_cpus(&tr->trace_buffer);
4907
4908 #ifdef CONFIG_TRACER_MAX_TRACE
4909         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
4910                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
4911         tracing_reset_online_cpus(&tr->max_buffer);
4912 #endif
4913
4914         mutex_unlock(&trace_types_lock);
4915
4916         return 0;
4917 }
4918
4919 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
4920                                    size_t cnt, loff_t *fpos)
4921 {
4922         struct seq_file *m = filp->private_data;
4923         struct trace_array *tr = m->private;
4924         char buf[64];
4925         const char *clockstr;
4926         int ret;
4927
4928         if (cnt >= sizeof(buf))
4929                 return -EINVAL;
4930
4931         if (copy_from_user(&buf, ubuf, cnt))
4932                 return -EFAULT;
4933
4934         buf[cnt] = 0;
4935
4936         clockstr = strstrip(buf);
4937
4938         ret = tracing_set_clock(tr, clockstr);
4939         if (ret)
4940                 return ret;
4941
4942         *fpos += cnt;
4943
4944         return cnt;
4945 }
4946
4947 static int tracing_clock_open(struct inode *inode, struct file *file)
4948 {
4949         struct trace_array *tr = inode->i_private;
4950         int ret;
4951
4952         if (tracing_disabled)
4953                 return -ENODEV;
4954
4955         if (trace_array_get(tr))
4956                 return -ENODEV;
4957
4958         ret = single_open(file, tracing_clock_show, inode->i_private);
4959         if (ret < 0)
4960                 trace_array_put(tr);
4961
4962         return ret;
4963 }
4964
4965 struct ftrace_buffer_info {
4966         struct trace_iterator   iter;
4967         void                    *spare;
4968         unsigned int            read;
4969 };
4970
4971 #ifdef CONFIG_TRACER_SNAPSHOT
4972 static int tracing_snapshot_open(struct inode *inode, struct file *file)
4973 {
4974         struct trace_array *tr = inode->i_private;
4975         struct trace_iterator *iter;
4976         struct seq_file *m;
4977         int ret = 0;
4978
4979         if (trace_array_get(tr) < 0)
4980                 return -ENODEV;
4981
4982         if (file->f_mode & FMODE_READ) {
4983                 iter = __tracing_open(inode, file, true);
4984                 if (IS_ERR(iter))
4985                         ret = PTR_ERR(iter);
4986         } else {
4987                 /* Writes still need the seq_file to hold the private data */
4988                 ret = -ENOMEM;
4989                 m = kzalloc(sizeof(*m), GFP_KERNEL);
4990                 if (!m)
4991                         goto out;
4992                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4993                 if (!iter) {
4994                         kfree(m);
4995                         goto out;
4996                 }
4997                 ret = 0;
4998
4999                 iter->tr = tr;
5000                 iter->trace_buffer = &tr->max_buffer;
5001                 iter->cpu_file = tracing_get_cpu(inode);
5002                 m->private = iter;
5003                 file->private_data = m;
5004         }
5005 out:
5006         if (ret < 0)
5007                 trace_array_put(tr);
5008
5009         return ret;
5010 }
5011
5012 static ssize_t
5013 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
5014                        loff_t *ppos)
5015 {
5016         struct seq_file *m = filp->private_data;
5017         struct trace_iterator *iter = m->private;
5018         struct trace_array *tr = iter->tr;
5019         unsigned long val;
5020         int ret;
5021
5022         ret = tracing_update_buffers();
5023         if (ret < 0)
5024                 return ret;
5025
5026         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5027         if (ret)
5028                 return ret;
5029
5030         mutex_lock(&trace_types_lock);
5031
5032         if (tr->current_trace->use_max_tr) {
5033                 ret = -EBUSY;
5034                 goto out;
5035         }
5036
5037         switch (val) {
5038         case 0:
5039                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5040                         ret = -EINVAL;
5041                         break;
5042                 }
5043                 if (tr->allocated_snapshot)
5044                         free_snapshot(tr);
5045                 break;
5046         case 1:
5047 /* Only allow per-cpu swap if the ring buffer supports it */
5048 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
5049                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
5050                         ret = -EINVAL;
5051                         break;
5052                 }
5053 #endif
5054                 if (!tr->allocated_snapshot) {
5055                         ret = alloc_snapshot(tr);
5056                         if (ret < 0)
5057                                 break;
5058                 }
5059                 local_irq_disable();
5060                 /* Now, we're going to swap */
5061                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5062                         update_max_tr(tr, current, smp_processor_id());
5063                 else
5064                         update_max_tr_single(tr, current, iter->cpu_file);
5065                 local_irq_enable();
5066                 break;
5067         default:
5068                 if (tr->allocated_snapshot) {
5069                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5070                                 tracing_reset_online_cpus(&tr->max_buffer);
5071                         else
5072                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
5073                 }
5074                 break;
5075         }
5076
5077         if (ret >= 0) {
5078                 *ppos += cnt;
5079                 ret = cnt;
5080         }
5081 out:
5082         mutex_unlock(&trace_types_lock);
5083         return ret;
5084 }
5085
5086 static int tracing_snapshot_release(struct inode *inode, struct file *file)
5087 {
5088         struct seq_file *m = file->private_data;
5089         int ret;
5090
5091         ret = tracing_release(inode, file);
5092
5093         if (file->f_mode & FMODE_READ)
5094                 return ret;
5095
5096         /* If write only, the seq_file is just a stub */
5097         if (m)
5098                 kfree(m->private);
5099         kfree(m);
5100
5101         return 0;
5102 }
5103
5104 static int tracing_buffers_open(struct inode *inode, struct file *filp);
5105 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
5106                                     size_t count, loff_t *ppos);
5107 static int tracing_buffers_release(struct inode *inode, struct file *file);
5108 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5109                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
5110
5111 static int snapshot_raw_open(struct inode *inode, struct file *filp)
5112 {
5113         struct ftrace_buffer_info *info;
5114         int ret;
5115
5116         ret = tracing_buffers_open(inode, filp);
5117         if (ret < 0)
5118                 return ret;
5119
5120         info = filp->private_data;
5121
5122         if (info->iter.trace->use_max_tr) {
5123                 tracing_buffers_release(inode, filp);
5124                 return -EBUSY;
5125         }
5126
5127         info->iter.snapshot = true;
5128         info->iter.trace_buffer = &info->iter.tr->max_buffer;
5129
5130         return ret;
5131 }
5132
5133 #endif /* CONFIG_TRACER_SNAPSHOT */
5134
5135
5136 static const struct file_operations tracing_max_lat_fops = {
5137         .open           = tracing_open_generic,
5138         .read           = tracing_max_lat_read,
5139         .write          = tracing_max_lat_write,
5140         .llseek         = generic_file_llseek,
5141 };
5142
5143 static const struct file_operations set_tracer_fops = {
5144         .open           = tracing_open_generic,
5145         .read           = tracing_set_trace_read,
5146         .write          = tracing_set_trace_write,
5147         .llseek         = generic_file_llseek,
5148 };
5149
5150 static const struct file_operations tracing_pipe_fops = {
5151         .open           = tracing_open_pipe,
5152         .poll           = tracing_poll_pipe,
5153         .read           = tracing_read_pipe,
5154         .splice_read    = tracing_splice_read_pipe,
5155         .release        = tracing_release_pipe,
5156         .llseek         = no_llseek,
5157 };
5158
5159 static const struct file_operations tracing_entries_fops = {
5160         .open           = tracing_open_generic_tr,
5161         .read           = tracing_entries_read,
5162         .write          = tracing_entries_write,
5163         .llseek         = generic_file_llseek,
5164         .release        = tracing_release_generic_tr,
5165 };
5166
5167 static const struct file_operations tracing_total_entries_fops = {
5168         .open           = tracing_open_generic_tr,
5169         .read           = tracing_total_entries_read,
5170         .llseek         = generic_file_llseek,
5171         .release        = tracing_release_generic_tr,
5172 };
5173
5174 static const struct file_operations tracing_free_buffer_fops = {
5175         .open           = tracing_open_generic_tr,
5176         .write          = tracing_free_buffer_write,
5177         .release        = tracing_free_buffer_release,
5178 };
5179
5180 static const struct file_operations tracing_mark_fops = {
5181         .open           = tracing_open_generic_tr,
5182         .write          = tracing_mark_write,
5183         .llseek         = generic_file_llseek,
5184         .release        = tracing_release_generic_tr,
5185 };
5186
5187 static const struct file_operations trace_clock_fops = {
5188         .open           = tracing_clock_open,
5189         .read           = seq_read,
5190         .llseek         = seq_lseek,
5191         .release        = tracing_single_release_tr,
5192         .write          = tracing_clock_write,
5193 };
5194
5195 #ifdef CONFIG_TRACER_SNAPSHOT
5196 static const struct file_operations snapshot_fops = {
5197         .open           = tracing_snapshot_open,
5198         .read           = seq_read,
5199         .write          = tracing_snapshot_write,
5200         .llseek         = tracing_lseek,
5201         .release        = tracing_snapshot_release,
5202 };
5203
5204 static const struct file_operations snapshot_raw_fops = {
5205         .open           = snapshot_raw_open,
5206         .read           = tracing_buffers_read,
5207         .release        = tracing_buffers_release,
5208         .splice_read    = tracing_buffers_splice_read,
5209         .llseek         = no_llseek,
5210 };
5211
5212 #endif /* CONFIG_TRACER_SNAPSHOT */
5213
5214 static int tracing_buffers_open(struct inode *inode, struct file *filp)
5215 {
5216         struct trace_array *tr = inode->i_private;
5217         struct ftrace_buffer_info *info;
5218         int ret;
5219
5220         if (tracing_disabled)
5221                 return -ENODEV;
5222
5223         if (trace_array_get(tr) < 0)
5224                 return -ENODEV;
5225
5226         info = kzalloc(sizeof(*info), GFP_KERNEL);
5227         if (!info) {
5228                 trace_array_put(tr);
5229                 return -ENOMEM;
5230         }
5231
5232         mutex_lock(&trace_types_lock);
5233
5234         info->iter.tr           = tr;
5235         info->iter.cpu_file     = tracing_get_cpu(inode);
5236         info->iter.trace        = tr->current_trace;
5237         info->iter.trace_buffer = &tr->trace_buffer;
5238         info->spare             = NULL;
5239         /* Force reading ring buffer for first read */
5240         info->read              = (unsigned int)-1;
5241
5242         filp->private_data = info;
5243
5244         mutex_unlock(&trace_types_lock);
5245
5246         ret = nonseekable_open(inode, filp);
5247         if (ret < 0)
5248                 trace_array_put(tr);
5249
5250         return ret;
5251 }
5252
5253 static unsigned int
5254 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
5255 {
5256         struct ftrace_buffer_info *info = filp->private_data;
5257         struct trace_iterator *iter = &info->iter;
5258
5259         return trace_poll(iter, filp, poll_table);
5260 }
5261
5262 static ssize_t
5263 tracing_buffers_read(struct file *filp, char __user *ubuf,
5264                      size_t count, loff_t *ppos)
5265 {
5266         struct ftrace_buffer_info *info = filp->private_data;
5267         struct trace_iterator *iter = &info->iter;
5268         ssize_t ret;
5269         ssize_t size;
5270
5271         if (!count)
5272                 return 0;
5273
5274         mutex_lock(&trace_types_lock);
5275
5276 #ifdef CONFIG_TRACER_MAX_TRACE
5277         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5278                 size = -EBUSY;
5279                 goto out_unlock;
5280         }
5281 #endif
5282
5283         if (!info->spare)
5284                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
5285                                                           iter->cpu_file);
5286         size = -ENOMEM;
5287         if (!info->spare)
5288                 goto out_unlock;
5289
5290         /* Do we have previous read data to read? */
5291         if (info->read < PAGE_SIZE)
5292                 goto read;
5293
5294  again:
5295         trace_access_lock(iter->cpu_file);
5296         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5297                                     &info->spare,
5298                                     count,
5299                                     iter->cpu_file, 0);
5300         trace_access_unlock(iter->cpu_file);
5301
5302         if (ret < 0) {
5303                 if (trace_empty(iter)) {
5304                         if ((filp->f_flags & O_NONBLOCK)) {
5305                                 size = -EAGAIN;
5306                                 goto out_unlock;
5307                         }
5308                         mutex_unlock(&trace_types_lock);
5309                         ret = wait_on_pipe(iter);
5310                         mutex_lock(&trace_types_lock);
5311                         if (ret) {
5312                                 size = ret;
5313                                 goto out_unlock;
5314                         }
5315                         if (signal_pending(current)) {
5316                                 size = -EINTR;
5317                                 goto out_unlock;
5318                         }
5319                         goto again;
5320                 }
5321                 size = 0;
5322                 goto out_unlock;
5323         }
5324
5325         info->read = 0;
5326  read:
5327         size = PAGE_SIZE - info->read;
5328         if (size > count)
5329                 size = count;
5330
5331         ret = copy_to_user(ubuf, info->spare + info->read, size);
5332         if (ret == size) {
5333                 size = -EFAULT;
5334                 goto out_unlock;
5335         }
5336         size -= ret;
5337
5338         *ppos += size;
5339         info->read += size;
5340
5341  out_unlock:
5342         mutex_unlock(&trace_types_lock);
5343
5344         return size;
5345 }
5346
5347 static int tracing_buffers_release(struct inode *inode, struct file *file)
5348 {
5349         struct ftrace_buffer_info *info = file->private_data;
5350         struct trace_iterator *iter = &info->iter;
5351
5352         mutex_lock(&trace_types_lock);
5353
5354         __trace_array_put(iter->tr);
5355
5356         if (info->spare)
5357                 ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5358         kfree(info);
5359
5360         mutex_unlock(&trace_types_lock);
5361
5362         return 0;
5363 }
5364
5365 struct buffer_ref {
5366         struct ring_buffer      *buffer;
5367         void                    *page;
5368         int                     ref;
5369 };
5370
5371 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
5372                                     struct pipe_buffer *buf)
5373 {
5374         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5375
5376         if (--ref->ref)
5377                 return;
5378
5379         ring_buffer_free_read_page(ref->buffer, ref->page);
5380         kfree(ref);
5381         buf->private = 0;
5382 }
5383
5384 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
5385                                 struct pipe_buffer *buf)
5386 {
5387         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
5388
5389         ref->ref++;
5390 }
5391
5392 /* Pipe buffer operations for a buffer. */
5393 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5394         .can_merge              = 0,
5395         .confirm                = generic_pipe_buf_confirm,
5396         .release                = buffer_pipe_buf_release,
5397         .steal                  = generic_pipe_buf_steal,
5398         .get                    = buffer_pipe_buf_get,
5399 };
5400
5401 /*
5402  * Callback from splice_to_pipe(), if we need to release some pages
5403  * at the end of the spd in case we error'ed out in filling the pipe.
5404  */
5405 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
5406 {
5407         struct buffer_ref *ref =
5408                 (struct buffer_ref *)spd->partial[i].private;
5409
5410         if (--ref->ref)
5411                 return;
5412
5413         ring_buffer_free_read_page(ref->buffer, ref->page);
5414         kfree(ref);
5415         spd->partial[i].private = 0;
5416 }
5417
5418 static ssize_t
5419 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
5420                             struct pipe_inode_info *pipe, size_t len,
5421                             unsigned int flags)
5422 {
5423         struct ftrace_buffer_info *info = file->private_data;
5424         struct trace_iterator *iter = &info->iter;
5425         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5426         struct page *pages_def[PIPE_DEF_BUFFERS];
5427         struct splice_pipe_desc spd = {
5428                 .pages          = pages_def,
5429                 .partial        = partial_def,
5430                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5431                 .flags          = flags,
5432                 .ops            = &buffer_pipe_buf_ops,
5433                 .spd_release    = buffer_spd_release,
5434         };
5435         struct buffer_ref *ref;
5436         int entries, size, i;
5437         ssize_t ret;
5438
5439         mutex_lock(&trace_types_lock);
5440
5441 #ifdef CONFIG_TRACER_MAX_TRACE
5442         if (iter->snapshot && iter->tr->current_trace->use_max_tr) {
5443                 ret = -EBUSY;
5444                 goto out;
5445         }
5446 #endif
5447
5448         if (splice_grow_spd(pipe, &spd)) {
5449                 ret = -ENOMEM;
5450                 goto out;
5451         }
5452
5453         if (*ppos & (PAGE_SIZE - 1)) {
5454                 ret = -EINVAL;
5455                 goto out;
5456         }
5457
5458         if (len & (PAGE_SIZE - 1)) {
5459                 if (len < PAGE_SIZE) {
5460                         ret = -EINVAL;
5461                         goto out;
5462                 }
5463                 len &= PAGE_MASK;
5464         }
5465
5466  again:
5467         trace_access_lock(iter->cpu_file);
5468         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5469
5470         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5471                 struct page *page;
5472                 int r;
5473
5474                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5475                 if (!ref)
5476                         break;
5477
5478                 ref->ref = 1;
5479                 ref->buffer = iter->trace_buffer->buffer;
5480                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5481                 if (!ref->page) {
5482                         kfree(ref);
5483                         break;
5484                 }
5485
5486                 r = ring_buffer_read_page(ref->buffer, &ref->page,
5487                                           len, iter->cpu_file, 1);
5488                 if (r < 0) {
5489                         ring_buffer_free_read_page(ref->buffer, ref->page);
5490                         kfree(ref);
5491                         break;
5492                 }
5493
5494                 /*
5495                  * zero out any left over data, this is going to
5496                  * user land.
5497                  */
5498                 size = ring_buffer_page_len(ref->page);
5499                 if (size < PAGE_SIZE)
5500                         memset(ref->page + size, 0, PAGE_SIZE - size);
5501
5502                 page = virt_to_page(ref->page);
5503
5504                 spd.pages[i] = page;
5505                 spd.partial[i].len = PAGE_SIZE;
5506                 spd.partial[i].offset = 0;
5507                 spd.partial[i].private = (unsigned long)ref;
5508                 spd.nr_pages++;
5509                 *ppos += PAGE_SIZE;
5510
5511                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5512         }
5513
5514         trace_access_unlock(iter->cpu_file);
5515         spd.nr_pages = i;
5516
5517         /* did we read anything? */
5518         if (!spd.nr_pages) {
5519                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) {
5520                         ret = -EAGAIN;
5521                         goto out;
5522                 }
5523                 mutex_unlock(&trace_types_lock);
5524                 ret = wait_on_pipe(iter);
5525                 mutex_lock(&trace_types_lock);
5526                 if (ret)
5527                         goto out;
5528                 if (signal_pending(current)) {
5529                         ret = -EINTR;
5530                         goto out;
5531                 }
5532                 goto again;
5533         }
5534
5535         ret = splice_to_pipe(pipe, &spd);
5536         splice_shrink_spd(&spd);
5537 out:
5538         mutex_unlock(&trace_types_lock);
5539
5540         return ret;
5541 }
5542
5543 static const struct file_operations tracing_buffers_fops = {
5544         .open           = tracing_buffers_open,
5545         .read           = tracing_buffers_read,
5546         .poll           = tracing_buffers_poll,
5547         .release        = tracing_buffers_release,
5548         .splice_read    = tracing_buffers_splice_read,
5549         .llseek         = no_llseek,
5550 };
5551
5552 static ssize_t
5553 tracing_stats_read(struct file *filp, char __user *ubuf,
5554                    size_t count, loff_t *ppos)
5555 {
5556         struct inode *inode = file_inode(filp);
5557         struct trace_array *tr = inode->i_private;
5558         struct trace_buffer *trace_buf = &tr->trace_buffer;
5559         int cpu = tracing_get_cpu(inode);
5560         struct trace_seq *s;
5561         unsigned long cnt;
5562         unsigned long long t;
5563         unsigned long usec_rem;
5564
5565         s = kmalloc(sizeof(*s), GFP_KERNEL);
5566         if (!s)
5567                 return -ENOMEM;
5568
5569         trace_seq_init(s);
5570
5571         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5572         trace_seq_printf(s, "entries: %ld\n", cnt);
5573
5574         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5575         trace_seq_printf(s, "overrun: %ld\n", cnt);
5576
5577         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5578         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
5579
5580         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5581         trace_seq_printf(s, "bytes: %ld\n", cnt);
5582
5583         if (trace_clocks[tr->clock_id].in_ns) {
5584                 /* local or global for trace_clock */
5585                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5586                 usec_rem = do_div(t, USEC_PER_SEC);
5587                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
5588                                                                 t, usec_rem);
5589
5590                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5591                 usec_rem = do_div(t, USEC_PER_SEC);
5592                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
5593         } else {
5594                 /* counter or tsc mode for trace_clock */
5595                 trace_seq_printf(s, "oldest event ts: %llu\n",
5596                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5597
5598                 trace_seq_printf(s, "now ts: %llu\n",
5599                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
5600         }
5601
5602         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5603         trace_seq_printf(s, "dropped events: %ld\n", cnt);
5604
5605         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5606         trace_seq_printf(s, "read events: %ld\n", cnt);
5607
5608         count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
5609
5610         kfree(s);
5611
5612         return count;
5613 }
5614
5615 static const struct file_operations tracing_stats_fops = {
5616         .open           = tracing_open_generic_tr,
5617         .read           = tracing_stats_read,
5618         .llseek         = generic_file_llseek,
5619         .release        = tracing_release_generic_tr,
5620 };
5621
5622 #ifdef CONFIG_DYNAMIC_FTRACE
5623
5624 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
5625 {
5626         return 0;
5627 }
5628
5629 static ssize_t
5630 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5631                   size_t cnt, loff_t *ppos)
5632 {
5633         static char ftrace_dyn_info_buffer[1024];
5634         static DEFINE_MUTEX(dyn_info_mutex);
5635         unsigned long *p = filp->private_data;
5636         char *buf = ftrace_dyn_info_buffer;
5637         int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
5638         int r;
5639
5640         mutex_lock(&dyn_info_mutex);
5641         r = sprintf(buf, "%ld ", *p);
5642
5643         r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
5644         buf[r++] = '\n';
5645
5646         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5647
5648         mutex_unlock(&dyn_info_mutex);
5649
5650         return r;
5651 }
5652
5653 static const struct file_operations tracing_dyn_info_fops = {
5654         .open           = tracing_open_generic,
5655         .read           = tracing_read_dyn_info,
5656         .llseek         = generic_file_llseek,
5657 };
5658 #endif /* CONFIG_DYNAMIC_FTRACE */
5659
5660 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
5661 static void
5662 ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5663 {
5664         tracing_snapshot();
5665 }
5666
5667 static void
5668 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
5669 {
5670         unsigned long *count = (long *)data;
5671
5672         if (!*count)
5673                 return;
5674
5675         if (*count != -1)
5676                 (*count)--;
5677
5678         tracing_snapshot();
5679 }
5680
5681 static int
5682 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
5683                       struct ftrace_probe_ops *ops, void *data)
5684 {
5685         long count = (long)data;
5686
5687         seq_printf(m, "%ps:", (void *)ip);
5688
5689         seq_printf(m, "snapshot");
5690
5691         if (count == -1)
5692                 seq_printf(m, ":unlimited\n");
5693         else
5694                 seq_printf(m, ":count=%ld\n", count);
5695
5696         return 0;
5697 }
5698
5699 static struct ftrace_probe_ops snapshot_probe_ops = {
5700         .func                   = ftrace_snapshot,
5701         .print                  = ftrace_snapshot_print,
5702 };
5703
5704 static struct ftrace_probe_ops snapshot_count_probe_ops = {
5705         .func                   = ftrace_count_snapshot,
5706         .print                  = ftrace_snapshot_print,
5707 };
5708
5709 static int
5710 ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
5711                                char *glob, char *cmd, char *param, int enable)
5712 {
5713         struct ftrace_probe_ops *ops;
5714         void *count = (void *)-1;
5715         char *number;
5716         int ret;
5717
5718         /* hash funcs only work with set_ftrace_filter */
5719         if (!enable)
5720                 return -EINVAL;
5721
5722         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
5723
5724         if (glob[0] == '!') {
5725                 unregister_ftrace_function_probe_func(glob+1, ops);
5726                 return 0;
5727         }
5728
5729         if (!param)
5730                 goto out_reg;
5731
5732         number = strsep(&param, ":");
5733
5734         if (!strlen(number))
5735                 goto out_reg;
5736
5737         /*
5738          * We use the callback data field (which is a pointer)
5739          * as our counter.
5740          */
5741         ret = kstrtoul(number, 0, (unsigned long *)&count);
5742         if (ret)
5743                 return ret;
5744
5745  out_reg:
5746         ret = register_ftrace_function_probe(glob, ops, count);
5747
5748         if (ret >= 0)
5749                 alloc_snapshot(&global_trace);
5750
5751         return ret < 0 ? ret : 0;
5752 }
5753
5754 static struct ftrace_func_command ftrace_snapshot_cmd = {
5755         .name                   = "snapshot",
5756         .func                   = ftrace_trace_snapshot_callback,
5757 };
5758
5759 static __init int register_snapshot_cmd(void)
5760 {
5761         return register_ftrace_command(&ftrace_snapshot_cmd);
5762 }
5763 #else
5764 static inline __init int register_snapshot_cmd(void) { return 0; }
5765 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
5766
5767 struct dentry *tracing_init_dentry_tr(struct trace_array *tr)
5768 {
5769         if (tr->dir)
5770                 return tr->dir;
5771
5772         if (!debugfs_initialized())
5773                 return NULL;
5774
5775         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
5776                 tr->dir = debugfs_create_dir("tracing", NULL);
5777
5778         if (!tr->dir)
5779                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
5780
5781         return tr->dir;
5782 }
5783
5784 struct dentry *tracing_init_dentry(void)
5785 {
5786         return tracing_init_dentry_tr(&global_trace);
5787 }
5788
5789 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
5790 {
5791         struct dentry *d_tracer;
5792
5793         if (tr->percpu_dir)
5794                 return tr->percpu_dir;
5795
5796         d_tracer = tracing_init_dentry_tr(tr);
5797         if (!d_tracer)
5798                 return NULL;
5799
5800         tr->percpu_dir = debugfs_create_dir("per_cpu", d_tracer);
5801
5802         WARN_ONCE(!tr->percpu_dir,
5803                   "Could not create debugfs directory 'per_cpu/%d'\n", cpu);
5804
5805         return tr->percpu_dir;
5806 }
5807
5808 static struct dentry *
5809 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
5810                       void *data, long cpu, const struct file_operations *fops)
5811 {
5812         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
5813
5814         if (ret) /* See tracing_get_cpu() */
5815                 ret->d_inode->i_cdev = (void *)(cpu + 1);
5816         return ret;
5817 }
5818
5819 static void
5820 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
5821 {
5822         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
5823         struct dentry *d_cpu;
5824         char cpu_dir[30]; /* 30 characters should be more than enough */
5825
5826         if (!d_percpu)
5827                 return;
5828
5829         snprintf(cpu_dir, 30, "cpu%ld", cpu);
5830         d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
5831         if (!d_cpu) {
5832                 pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
5833                 return;
5834         }
5835
5836         /* per cpu trace_pipe */
5837         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
5838                                 tr, cpu, &tracing_pipe_fops);
5839
5840         /* per cpu trace */
5841         trace_create_cpu_file("trace", 0644, d_cpu,
5842                                 tr, cpu, &tracing_fops);
5843
5844         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
5845                                 tr, cpu, &tracing_buffers_fops);
5846
5847         trace_create_cpu_file("stats", 0444, d_cpu,
5848                                 tr, cpu, &tracing_stats_fops);
5849
5850         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
5851                                 tr, cpu, &tracing_entries_fops);
5852
5853 #ifdef CONFIG_TRACER_SNAPSHOT
5854         trace_create_cpu_file("snapshot", 0644, d_cpu,
5855                                 tr, cpu, &snapshot_fops);
5856
5857         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
5858                                 tr, cpu, &snapshot_raw_fops);
5859 #endif
5860 }
5861
5862 #ifdef CONFIG_FTRACE_SELFTEST
5863 /* Let selftest have access to static functions in this file */
5864 #include "trace_selftest.c"
5865 #endif
5866
5867 struct trace_option_dentry {
5868         struct tracer_opt               *opt;
5869         struct tracer_flags             *flags;
5870         struct trace_array              *tr;
5871         struct dentry                   *entry;
5872 };
5873
5874 static ssize_t
5875 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
5876                         loff_t *ppos)
5877 {
5878         struct trace_option_dentry *topt = filp->private_data;
5879         char *buf;
5880
5881         if (topt->flags->val & topt->opt->bit)
5882                 buf = "1\n";
5883         else
5884                 buf = "0\n";
5885
5886         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5887 }
5888
5889 static ssize_t
5890 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
5891                          loff_t *ppos)
5892 {
5893         struct trace_option_dentry *topt = filp->private_data;
5894         unsigned long val;
5895         int ret;
5896
5897         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5898         if (ret)
5899                 return ret;
5900
5901         if (val != 0 && val != 1)
5902                 return -EINVAL;
5903
5904         if (!!(topt->flags->val & topt->opt->bit) != val) {
5905                 mutex_lock(&trace_types_lock);
5906                 ret = __set_tracer_option(topt->tr, topt->flags,
5907                                           topt->opt, !val);
5908                 mutex_unlock(&trace_types_lock);
5909                 if (ret)
5910                         return ret;
5911         }
5912
5913         *ppos += cnt;
5914
5915         return cnt;
5916 }
5917
5918
5919 static const struct file_operations trace_options_fops = {
5920         .open = tracing_open_generic,
5921         .read = trace_options_read,
5922         .write = trace_options_write,
5923         .llseek = generic_file_llseek,
5924 };
5925
5926 static ssize_t
5927 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
5928                         loff_t *ppos)
5929 {
5930         long index = (long)filp->private_data;
5931         char *buf;
5932
5933         if (trace_flags & (1 << index))
5934                 buf = "1\n";
5935         else
5936                 buf = "0\n";
5937
5938         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
5939 }
5940
5941 static ssize_t
5942 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
5943                          loff_t *ppos)
5944 {
5945         struct trace_array *tr = &global_trace;
5946         long index = (long)filp->private_data;
5947         unsigned long val;
5948         int ret;
5949
5950         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5951         if (ret)
5952                 return ret;
5953
5954         if (val != 0 && val != 1)
5955                 return -EINVAL;
5956
5957         mutex_lock(&trace_types_lock);
5958         ret = set_tracer_flag(tr, 1 << index, val);
5959         mutex_unlock(&trace_types_lock);
5960
5961         if (ret < 0)
5962                 return ret;
5963
5964         *ppos += cnt;
5965
5966         return cnt;
5967 }
5968
5969 static const struct file_operations trace_options_core_fops = {
5970         .open = tracing_open_generic,
5971         .read = trace_options_core_read,
5972         .write = trace_options_core_write,
5973         .llseek = generic_file_llseek,
5974 };
5975
5976 struct dentry *trace_create_file(const char *name,
5977                                  umode_t mode,
5978                                  struct dentry *parent,
5979                                  void *data,
5980                                  const struct file_operations *fops)
5981 {
5982         struct dentry *ret;
5983
5984         ret = debugfs_create_file(name, mode, parent, data, fops);
5985         if (!ret)
5986                 pr_warning("Could not create debugfs '%s' entry\n", name);
5987
5988         return ret;
5989 }
5990
5991
5992 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
5993 {
5994         struct dentry *d_tracer;
5995
5996         if (tr->options)
5997                 return tr->options;
5998
5999         d_tracer = tracing_init_dentry_tr(tr);
6000         if (!d_tracer)
6001                 return NULL;
6002
6003         tr->options = debugfs_create_dir("options", d_tracer);
6004         if (!tr->options) {
6005                 pr_warning("Could not create debugfs directory 'options'\n");
6006                 return NULL;
6007         }
6008
6009         return tr->options;
6010 }
6011
6012 static void
6013 create_trace_option_file(struct trace_array *tr,
6014                          struct trace_option_dentry *topt,
6015                          struct tracer_flags *flags,
6016                          struct tracer_opt *opt)
6017 {
6018         struct dentry *t_options;
6019
6020         t_options = trace_options_init_dentry(tr);
6021         if (!t_options)
6022                 return;
6023
6024         topt->flags = flags;
6025         topt->opt = opt;
6026         topt->tr = tr;
6027
6028         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6029                                     &trace_options_fops);
6030
6031 }
6032
6033 static struct trace_option_dentry *
6034 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6035 {
6036         struct trace_option_dentry *topts;
6037         struct tracer_flags *flags;
6038         struct tracer_opt *opts;
6039         int cnt;
6040
6041         if (!tracer)
6042                 return NULL;
6043
6044         flags = tracer->flags;
6045
6046         if (!flags || !flags->opts)
6047                 return NULL;
6048
6049         opts = flags->opts;
6050
6051         for (cnt = 0; opts[cnt].name; cnt++)
6052                 ;
6053
6054         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6055         if (!topts)
6056                 return NULL;
6057
6058         for (cnt = 0; opts[cnt].name; cnt++)
6059                 create_trace_option_file(tr, &topts[cnt], flags,
6060                                          &opts[cnt]);
6061
6062         return topts;
6063 }
6064
6065 static void
6066 destroy_trace_option_files(struct trace_option_dentry *topts)
6067 {
6068         int cnt;
6069
6070         if (!topts)
6071                 return;
6072
6073         for (cnt = 0; topts[cnt].opt; cnt++)
6074                 debugfs_remove(topts[cnt].entry);
6075
6076         kfree(topts);
6077 }
6078
6079 static struct dentry *
6080 create_trace_option_core_file(struct trace_array *tr,
6081                               const char *option, long index)
6082 {
6083         struct dentry *t_options;
6084
6085         t_options = trace_options_init_dentry(tr);
6086         if (!t_options)
6087                 return NULL;
6088
6089         return trace_create_file(option, 0644, t_options, (void *)index,
6090                                     &trace_options_core_fops);
6091 }
6092
6093 static __init void create_trace_options_dir(struct trace_array *tr)
6094 {
6095         struct dentry *t_options;
6096         int i;
6097
6098         t_options = trace_options_init_dentry(tr);
6099         if (!t_options)
6100                 return;
6101
6102         for (i = 0; trace_options[i]; i++)
6103                 create_trace_option_core_file(tr, trace_options[i], i);
6104 }
6105
6106 static ssize_t
6107 rb_simple_read(struct file *filp, char __user *ubuf,
6108                size_t cnt, loff_t *ppos)
6109 {
6110         struct trace_array *tr = filp->private_data;
6111         char buf[64];
6112         int r;
6113
6114         r = tracer_tracing_is_on(tr);
6115         r = sprintf(buf, "%d\n", r);
6116
6117         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6118 }
6119
6120 static ssize_t
6121 rb_simple_write(struct file *filp, const char __user *ubuf,
6122                 size_t cnt, loff_t *ppos)
6123 {
6124         struct trace_array *tr = filp->private_data;
6125         struct ring_buffer *buffer = tr->trace_buffer.buffer;
6126         unsigned long val;
6127         int ret;
6128
6129         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6130         if (ret)
6131                 return ret;
6132
6133         if (buffer) {
6134                 mutex_lock(&trace_types_lock);
6135                 if (val) {
6136                         tracer_tracing_on(tr);
6137                         if (tr->current_trace->start)
6138                                 tr->current_trace->start(tr);
6139                 } else {
6140                         tracer_tracing_off(tr);
6141                         if (tr->current_trace->stop)
6142                                 tr->current_trace->stop(tr);
6143                 }
6144                 mutex_unlock(&trace_types_lock);
6145         }
6146
6147         (*ppos)++;
6148
6149         return cnt;
6150 }
6151
6152 static const struct file_operations rb_simple_fops = {
6153         .open           = tracing_open_generic_tr,
6154         .read           = rb_simple_read,
6155         .write          = rb_simple_write,
6156         .release        = tracing_release_generic_tr,
6157         .llseek         = default_llseek,
6158 };
6159
6160 struct dentry *trace_instance_dir;
6161
6162 static void
6163 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
6164
6165 static int
6166 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6167 {
6168         enum ring_buffer_flags rb_flags;
6169
6170         rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6171
6172         buf->tr = tr;
6173
6174         buf->buffer = ring_buffer_alloc(size, rb_flags);
6175         if (!buf->buffer)
6176                 return -ENOMEM;
6177
6178         buf->data = alloc_percpu(struct trace_array_cpu);
6179         if (!buf->data) {
6180                 ring_buffer_free(buf->buffer);
6181                 return -ENOMEM;
6182         }
6183
6184         /* Allocate the first page for all buffers */
6185         set_buffer_entries(&tr->trace_buffer,
6186                            ring_buffer_size(tr->trace_buffer.buffer, 0));
6187
6188         return 0;
6189 }
6190
6191 static int allocate_trace_buffers(struct trace_array *tr, int size)
6192 {
6193         int ret;
6194
6195         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
6196         if (ret)
6197                 return ret;
6198
6199 #ifdef CONFIG_TRACER_MAX_TRACE
6200         ret = allocate_trace_buffer(tr, &tr->max_buffer,
6201                                     allocate_snapshot ? size : 1);
6202         if (WARN_ON(ret)) {
6203                 ring_buffer_free(tr->trace_buffer.buffer);
6204                 free_percpu(tr->trace_buffer.data);
6205                 return -ENOMEM;
6206         }
6207         tr->allocated_snapshot = allocate_snapshot;
6208
6209         /*
6210          * Only the top level trace array gets its snapshot allocated
6211          * from the kernel command line.
6212          */
6213         allocate_snapshot = false;
6214 #endif
6215         return 0;
6216 }
6217
6218 static void free_trace_buffer(struct trace_buffer *buf)
6219 {
6220         if (buf->buffer) {
6221                 ring_buffer_free(buf->buffer);
6222                 buf->buffer = NULL;
6223                 free_percpu(buf->data);
6224                 buf->data = NULL;
6225         }
6226 }
6227
6228 static void free_trace_buffers(struct trace_array *tr)
6229 {
6230         if (!tr)
6231                 return;
6232
6233         free_trace_buffer(&tr->trace_buffer);
6234
6235 #ifdef CONFIG_TRACER_MAX_TRACE
6236         free_trace_buffer(&tr->max_buffer);
6237 #endif
6238 }
6239
6240 static int new_instance_create(const char *name)
6241 {
6242         struct trace_array *tr;
6243         int ret;
6244
6245         mutex_lock(&trace_types_lock);
6246
6247         ret = -EEXIST;
6248         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6249                 if (tr->name && strcmp(tr->name, name) == 0)
6250                         goto out_unlock;
6251         }
6252
6253         ret = -ENOMEM;
6254         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
6255         if (!tr)
6256                 goto out_unlock;
6257
6258         tr->name = kstrdup(name, GFP_KERNEL);
6259         if (!tr->name)
6260                 goto out_free_tr;
6261
6262         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
6263                 goto out_free_tr;
6264
6265         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
6266
6267         raw_spin_lock_init(&tr->start_lock);
6268
6269         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6270
6271         tr->current_trace = &nop_trace;
6272
6273         INIT_LIST_HEAD(&tr->systems);
6274         INIT_LIST_HEAD(&tr->events);
6275
6276         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6277                 goto out_free_tr;
6278
6279         tr->dir = debugfs_create_dir(name, trace_instance_dir);
6280         if (!tr->dir)
6281                 goto out_free_tr;
6282
6283         ret = event_trace_add_tracer(tr->dir, tr);
6284         if (ret) {
6285                 debugfs_remove_recursive(tr->dir);
6286                 goto out_free_tr;
6287         }
6288
6289         init_tracer_debugfs(tr, tr->dir);
6290
6291         list_add(&tr->list, &ftrace_trace_arrays);
6292
6293         mutex_unlock(&trace_types_lock);
6294
6295         return 0;
6296
6297  out_free_tr:
6298         free_trace_buffers(tr);
6299         free_cpumask_var(tr->tracing_cpumask);
6300         kfree(tr->name);
6301         kfree(tr);
6302
6303  out_unlock:
6304         mutex_unlock(&trace_types_lock);
6305
6306         return ret;
6307
6308 }
6309
6310 static int instance_delete(const char *name)
6311 {
6312         struct trace_array *tr;
6313         int found = 0;
6314         int ret;
6315
6316         mutex_lock(&trace_types_lock);
6317
6318         ret = -ENODEV;
6319         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
6320                 if (tr->name && strcmp(tr->name, name) == 0) {
6321                         found = 1;
6322                         break;
6323                 }
6324         }
6325         if (!found)
6326                 goto out_unlock;
6327
6328         ret = -EBUSY;
6329         if (tr->ref)
6330                 goto out_unlock;
6331
6332         list_del(&tr->list);
6333
6334         tracing_set_nop(tr);
6335         event_trace_del_tracer(tr);
6336         ftrace_destroy_function_files(tr);
6337         debugfs_remove_recursive(tr->dir);
6338         free_trace_buffers(tr);
6339
6340         kfree(tr->name);
6341         kfree(tr);
6342
6343         ret = 0;
6344
6345  out_unlock:
6346         mutex_unlock(&trace_types_lock);
6347
6348         return ret;
6349 }
6350
6351 static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t mode)
6352 {
6353         struct dentry *parent;
6354         int ret;
6355
6356         /* Paranoid: Make sure the parent is the "instances" directory */
6357         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6358         if (WARN_ON_ONCE(parent != trace_instance_dir))
6359                 return -ENOENT;
6360
6361         /*
6362          * The inode mutex is locked, but debugfs_create_dir() will also
6363          * take the mutex. As the instances directory can not be destroyed
6364          * or changed in any other way, it is safe to unlock it, and
6365          * let the dentry try. If two users try to make the same dir at
6366          * the same time, then the new_instance_create() will determine the
6367          * winner.
6368          */
6369         mutex_unlock(&inode->i_mutex);
6370
6371         ret = new_instance_create(dentry->d_iname);
6372
6373         mutex_lock(&inode->i_mutex);
6374
6375         return ret;
6376 }
6377
6378 static int instance_rmdir(struct inode *inode, struct dentry *dentry)
6379 {
6380         struct dentry *parent;
6381         int ret;
6382
6383         /* Paranoid: Make sure the parent is the "instances" directory */
6384         parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
6385         if (WARN_ON_ONCE(parent != trace_instance_dir))
6386                 return -ENOENT;
6387
6388         /* The caller did a dget() on dentry */
6389         mutex_unlock(&dentry->d_inode->i_mutex);
6390
6391         /*
6392          * The inode mutex is locked, but debugfs_create_dir() will also
6393          * take the mutex. As the instances directory can not be destroyed
6394          * or changed in any other way, it is safe to unlock it, and
6395          * let the dentry try. If two users try to make the same dir at
6396          * the same time, then the instance_delete() will determine the
6397          * winner.
6398          */
6399         mutex_unlock(&inode->i_mutex);
6400
6401         ret = instance_delete(dentry->d_iname);
6402
6403         mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
6404         mutex_lock(&dentry->d_inode->i_mutex);
6405
6406         return ret;
6407 }
6408
6409 static const struct inode_operations instance_dir_inode_operations = {
6410         .lookup         = simple_lookup,
6411         .mkdir          = instance_mkdir,
6412         .rmdir          = instance_rmdir,
6413 };
6414
6415 static __init void create_trace_instances(struct dentry *d_tracer)
6416 {
6417         trace_instance_dir = debugfs_create_dir("instances", d_tracer);
6418         if (WARN_ON(!trace_instance_dir))
6419                 return;
6420
6421         /* Hijack the dir inode operations, to allow mkdir */
6422         trace_instance_dir->d_inode->i_op = &instance_dir_inode_operations;
6423 }
6424
6425 static void
6426 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer)
6427 {
6428         int cpu;
6429
6430         trace_create_file("available_tracers", 0444, d_tracer,
6431                         tr, &show_traces_fops);
6432
6433         trace_create_file("current_tracer", 0644, d_tracer,
6434                         tr, &set_tracer_fops);
6435
6436         trace_create_file("tracing_cpumask", 0644, d_tracer,
6437                           tr, &tracing_cpumask_fops);
6438
6439         trace_create_file("trace_options", 0644, d_tracer,
6440                           tr, &tracing_iter_fops);
6441
6442         trace_create_file("trace", 0644, d_tracer,
6443                           tr, &tracing_fops);
6444
6445         trace_create_file("trace_pipe", 0444, d_tracer,
6446                           tr, &tracing_pipe_fops);
6447
6448         trace_create_file("buffer_size_kb", 0644, d_tracer,
6449                           tr, &tracing_entries_fops);
6450
6451         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
6452                           tr, &tracing_total_entries_fops);
6453
6454         trace_create_file("free_buffer", 0200, d_tracer,
6455                           tr, &tracing_free_buffer_fops);
6456
6457         trace_create_file("trace_marker", 0220, d_tracer,
6458                           tr, &tracing_mark_fops);
6459
6460         trace_create_file("trace_clock", 0644, d_tracer, tr,
6461                           &trace_clock_fops);
6462
6463         trace_create_file("tracing_on", 0644, d_tracer,
6464                           tr, &rb_simple_fops);
6465
6466 #ifdef CONFIG_TRACER_MAX_TRACE
6467         trace_create_file("tracing_max_latency", 0644, d_tracer,
6468                         &tr->max_latency, &tracing_max_lat_fops);
6469 #endif
6470
6471         if (ftrace_create_function_files(tr, d_tracer))
6472                 WARN(1, "Could not allocate function filter files");
6473
6474 #ifdef CONFIG_TRACER_SNAPSHOT
6475         trace_create_file("snapshot", 0644, d_tracer,
6476                           tr, &snapshot_fops);
6477 #endif
6478
6479         for_each_tracing_cpu(cpu)
6480                 tracing_init_debugfs_percpu(tr, cpu);
6481
6482 }
6483
6484 static __init int tracer_init_debugfs(void)
6485 {
6486         struct dentry *d_tracer;
6487
6488         trace_access_lock_init();
6489
6490         d_tracer = tracing_init_dentry();
6491         if (!d_tracer)
6492                 return 0;
6493
6494         init_tracer_debugfs(&global_trace, d_tracer);
6495
6496         trace_create_file("tracing_thresh", 0644, d_tracer,
6497                         &tracing_thresh, &tracing_max_lat_fops);
6498
6499         trace_create_file("README", 0444, d_tracer,
6500                         NULL, &tracing_readme_fops);
6501
6502         trace_create_file("saved_cmdlines", 0444, d_tracer,
6503                         NULL, &tracing_saved_cmdlines_fops);
6504
6505         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
6506                           NULL, &tracing_saved_cmdlines_size_fops);
6507
6508 #ifdef CONFIG_DYNAMIC_FTRACE
6509         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
6510                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
6511 #endif
6512
6513         create_trace_instances(d_tracer);
6514
6515         create_trace_options_dir(&global_trace);
6516
6517         return 0;
6518 }
6519
6520 static int trace_panic_handler(struct notifier_block *this,
6521                                unsigned long event, void *unused)
6522 {
6523         if (ftrace_dump_on_oops)
6524                 ftrace_dump(ftrace_dump_on_oops);
6525         return NOTIFY_OK;
6526 }
6527
6528 static struct notifier_block trace_panic_notifier = {
6529         .notifier_call  = trace_panic_handler,
6530         .next           = NULL,
6531         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
6532 };
6533
6534 static int trace_die_handler(struct notifier_block *self,
6535                              unsigned long val,
6536                              void *data)
6537 {
6538         switch (val) {
6539         case DIE_OOPS:
6540                 if (ftrace_dump_on_oops)
6541                         ftrace_dump(ftrace_dump_on_oops);
6542                 break;
6543         default:
6544                 break;
6545         }
6546         return NOTIFY_OK;
6547 }
6548
6549 static struct notifier_block trace_die_notifier = {
6550         .notifier_call = trace_die_handler,
6551         .priority = 200
6552 };
6553
6554 /*
6555  * printk is set to max of 1024, we really don't need it that big.
6556  * Nothing should be printing 1000 characters anyway.
6557  */
6558 #define TRACE_MAX_PRINT         1000
6559
6560 /*
6561  * Define here KERN_TRACE so that we have one place to modify
6562  * it if we decide to change what log level the ftrace dump
6563  * should be at.
6564  */
6565 #define KERN_TRACE              KERN_EMERG
6566
6567 void
6568 trace_printk_seq(struct trace_seq *s)
6569 {
6570         /* Probably should print a warning here. */
6571         if (s->len >= TRACE_MAX_PRINT)
6572                 s->len = TRACE_MAX_PRINT;
6573
6574         /* should be zero ended, but we are paranoid. */
6575         s->buffer[s->len] = 0;
6576
6577         printk(KERN_TRACE "%s", s->buffer);
6578
6579         trace_seq_init(s);
6580 }
6581
6582 void trace_init_global_iter(struct trace_iterator *iter)
6583 {
6584         iter->tr = &global_trace;
6585         iter->trace = iter->tr->current_trace;
6586         iter->cpu_file = RING_BUFFER_ALL_CPUS;
6587         iter->trace_buffer = &global_trace.trace_buffer;
6588
6589         if (iter->trace && iter->trace->open)
6590                 iter->trace->open(iter);
6591
6592         /* Annotate start of buffers if we had overruns */
6593         if (ring_buffer_overruns(iter->trace_buffer->buffer))
6594                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
6595
6596         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6597         if (trace_clocks[iter->tr->clock_id].in_ns)
6598                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6599 }
6600
6601 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
6602 {
6603         /* use static because iter can be a bit big for the stack */
6604         static struct trace_iterator iter;
6605         static atomic_t dump_running;
6606         unsigned int old_userobj;
6607         unsigned long flags;
6608         int cnt = 0, cpu;
6609
6610         /* Only allow one dump user at a time. */
6611         if (atomic_inc_return(&dump_running) != 1) {
6612                 atomic_dec(&dump_running);
6613                 return;
6614         }
6615
6616         /*
6617          * Always turn off tracing when we dump.
6618          * We don't need to show trace output of what happens
6619          * between multiple crashes.
6620          *
6621          * If the user does a sysrq-z, then they can re-enable
6622          * tracing with echo 1 > tracing_on.
6623          */
6624         tracing_off();
6625
6626         local_irq_save(flags);
6627
6628         /* Simulate the iterator */
6629         trace_init_global_iter(&iter);
6630
6631         for_each_tracing_cpu(cpu) {
6632                 atomic_inc(&per_cpu_ptr(iter.tr->trace_buffer.data, cpu)->disabled);
6633         }
6634
6635         old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
6636
6637         /* don't look at user memory in panic mode */
6638         trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
6639
6640         switch (oops_dump_mode) {
6641         case DUMP_ALL:
6642                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6643                 break;
6644         case DUMP_ORIG:
6645                 iter.cpu_file = raw_smp_processor_id();
6646                 break;
6647         case DUMP_NONE:
6648                 goto out_enable;
6649         default:
6650                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
6651                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
6652         }
6653
6654         printk(KERN_TRACE "Dumping ftrace buffer:\n");
6655
6656         /* Did function tracer already get disabled? */
6657         if (ftrace_is_dead()) {
6658                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
6659                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
6660         }
6661
6662         /*
6663          * We need to stop all tracing on all CPUS to read the
6664          * the next buffer. This is a bit expensive, but is
6665          * not done often. We fill all what we can read,
6666          * and then release the locks again.
6667          */
6668
6669         while (!trace_empty(&iter)) {
6670
6671                 if (!cnt)
6672                         printk(KERN_TRACE "---------------------------------\n");
6673
6674                 cnt++;
6675
6676                 /* reset all but tr, trace, and overruns */
6677                 memset(&iter.seq, 0,
6678                        sizeof(struct trace_iterator) -
6679                        offsetof(struct trace_iterator, seq));
6680                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
6681                 iter.pos = -1;
6682
6683                 if (trace_find_next_entry_inc(&iter) != NULL) {
6684                         int ret;
6685
6686                         ret = print_trace_line(&iter);
6687                         if (ret != TRACE_TYPE_NO_CONSUME)
6688                                 trace_consume(&iter);
6689                 }
6690                 touch_nmi_watchdog();
6691
6692                 trace_printk_seq(&iter.seq);
6693         }
6694
6695         if (!cnt)
6696                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
6697         else
6698                 printk(KERN_TRACE "---------------------------------\n");
6699
6700  out_enable:
6701         trace_flags |= old_userobj;
6702
6703         for_each_tracing_cpu(cpu) {
6704                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
6705         }
6706         atomic_dec(&dump_running);
6707         local_irq_restore(flags);
6708 }
6709 EXPORT_SYMBOL_GPL(ftrace_dump);
6710
6711 __init static int tracer_alloc_buffers(void)
6712 {
6713         int ring_buf_size;
6714         int ret = -ENOMEM;
6715
6716
6717         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
6718                 goto out;
6719
6720         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
6721                 goto out_free_buffer_mask;
6722
6723         /* Only allocate trace_printk buffers if a trace_printk exists */
6724         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
6725                 /* Must be called before global_trace.buffer is allocated */
6726                 trace_printk_init_buffers();
6727
6728         /* To save memory, keep the ring buffer size to its minimum */
6729         if (ring_buffer_expanded)
6730                 ring_buf_size = trace_buf_size;
6731         else
6732                 ring_buf_size = 1;
6733
6734         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
6735         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
6736
6737         raw_spin_lock_init(&global_trace.start_lock);
6738
6739         /* Used for event triggers */
6740         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
6741         if (!temp_buffer)
6742                 goto out_free_cpumask;
6743
6744         if (trace_create_savedcmd() < 0)
6745                 goto out_free_temp_buffer;
6746
6747         /* TODO: make the number of buffers hot pluggable with CPUS */
6748         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
6749                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
6750                 WARN_ON(1);
6751                 goto out_free_savedcmd;
6752         }
6753
6754         if (global_trace.buffer_disabled)
6755                 tracing_off();
6756
6757         if (trace_boot_clock) {
6758                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
6759                 if (ret < 0)
6760                         pr_warning("Trace clock %s not defined, going back to default\n",
6761                                    trace_boot_clock);
6762         }
6763
6764         /*
6765          * register_tracer() might reference current_trace, so it
6766          * needs to be set before we register anything. This is
6767          * just a bootstrap of current_trace anyway.
6768          */
6769         global_trace.current_trace = &nop_trace;
6770
6771         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
6772
6773         ftrace_init_global_array_ops(&global_trace);
6774
6775         register_tracer(&nop_trace);
6776
6777         /* All seems OK, enable tracing */
6778         tracing_disabled = 0;
6779
6780         atomic_notifier_chain_register(&panic_notifier_list,
6781                                        &trace_panic_notifier);
6782
6783         register_die_notifier(&trace_die_notifier);
6784
6785         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
6786
6787         INIT_LIST_HEAD(&global_trace.systems);
6788         INIT_LIST_HEAD(&global_trace.events);
6789         list_add(&global_trace.list, &ftrace_trace_arrays);
6790
6791         while (trace_boot_options) {
6792                 char *option;
6793
6794                 option = strsep(&trace_boot_options, ",");
6795                 trace_set_options(&global_trace, option);
6796         }
6797
6798         register_snapshot_cmd();
6799
6800         return 0;
6801
6802 out_free_savedcmd:
6803         free_saved_cmdlines_buffer(savedcmd);
6804 out_free_temp_buffer:
6805         ring_buffer_free(temp_buffer);
6806 out_free_cpumask:
6807         free_cpumask_var(global_trace.tracing_cpumask);
6808 out_free_buffer_mask:
6809         free_cpumask_var(tracing_buffer_mask);
6810 out:
6811         return ret;
6812 }
6813
6814 __init static int clear_boot_tracer(void)
6815 {
6816         /*
6817          * The default tracer at boot buffer is an init section.
6818          * This function is called in lateinit. If we did not
6819          * find the boot tracer, then clear it out, to prevent
6820          * later registration from accessing the buffer that is
6821          * about to be freed.
6822          */
6823         if (!default_bootup_tracer)
6824                 return 0;
6825
6826         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
6827                default_bootup_tracer);
6828         default_bootup_tracer = NULL;
6829
6830         return 0;
6831 }
6832
6833 early_initcall(tracer_alloc_buffers);
6834 fs_initcall(tracer_init_debugfs);
6835 late_initcall(clear_boot_tracer);