]> git.karo-electronics.de Git - mv-sheeva.git/blob - kernel/trace/trace.c
ftrace: modulize the number of CPU buffers
[mv-sheeva.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 William Lee Irwin III
13  */
14 #include <linux/utsrelease.h>
15 #include <linux/kallsyms.h>
16 #include <linux/seq_file.h>
17 #include <linux/debugfs.h>
18 #include <linux/pagemap.h>
19 #include <linux/hardirq.h>
20 #include <linux/linkage.h>
21 #include <linux/uaccess.h>
22 #include <linux/ftrace.h>
23 #include <linux/module.h>
24 #include <linux/percpu.h>
25 #include <linux/ctype.h>
26 #include <linux/init.h>
27 #include <linux/poll.h>
28 #include <linux/gfp.h>
29 #include <linux/fs.h>
30
31 #include <linux/stacktrace.h>
32
33 #include "trace.h"
34
35 unsigned long __read_mostly     tracing_max_latency = (cycle_t)ULONG_MAX;
36 unsigned long __read_mostly     tracing_thresh;
37
38 static unsigned long __read_mostly      tracing_nr_buffers;
39 static cpumask_t __read_mostly          tracing_buffer_mask;
40
41 #define for_each_tracing_cpu(cpu)       \
42         for_each_cpu_mask(cpu, tracing_buffer_mask)
43
44 /* dummy trace to disable tracing */
45 static struct tracer no_tracer __read_mostly = {
46         .name           = "none",
47 };
48
49 static int trace_alloc_page(void);
50 static int trace_free_page(void);
51
52 static int tracing_disabled = 1;
53
54 long
55 ns2usecs(cycle_t nsec)
56 {
57         nsec += 500;
58         do_div(nsec, 1000);
59         return nsec;
60 }
61
62 cycle_t ftrace_now(int cpu)
63 {
64         return cpu_clock(cpu);
65 }
66
67 static struct trace_array       global_trace;
68
69 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
70
71 static struct trace_array       max_tr;
72
73 static DEFINE_PER_CPU(struct trace_array_cpu, max_data);
74
75 static int                      tracer_enabled = 1;
76 static unsigned long            trace_nr_entries = 65536UL;
77
78 static struct tracer            *trace_types __read_mostly;
79 static struct tracer            *current_trace __read_mostly;
80 static int                      max_tracer_type_len;
81
82 static DEFINE_MUTEX(trace_types_lock);
83 static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
84
85 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT;
86
87 void trace_wake_up(void)
88 {
89         /*
90          * The runqueue_is_locked() can fail, but this is the best we
91          * have for now:
92          */
93         if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
94                 wake_up(&trace_wait);
95 }
96
97 #define ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(struct trace_entry))
98
99 static int __init set_nr_entries(char *str)
100 {
101         unsigned long nr_entries;
102         int ret;
103
104         if (!str)
105                 return 0;
106         ret = strict_strtoul(str, 0, &nr_entries);
107         /* nr_entries can not be zero */
108         if (ret < 0 || nr_entries == 0)
109                 return 0;
110         trace_nr_entries = nr_entries;
111         return 1;
112 }
113 __setup("trace_entries=", set_nr_entries);
114
115 unsigned long nsecs_to_usecs(unsigned long nsecs)
116 {
117         return nsecs / 1000;
118 }
119
120 enum trace_flag_type {
121         TRACE_FLAG_IRQS_OFF             = 0x01,
122         TRACE_FLAG_NEED_RESCHED         = 0x02,
123         TRACE_FLAG_HARDIRQ              = 0x04,
124         TRACE_FLAG_SOFTIRQ              = 0x08,
125 };
126
127 #define TRACE_ITER_SYM_MASK \
128         (TRACE_ITER_PRINT_PARENT|TRACE_ITER_SYM_OFFSET|TRACE_ITER_SYM_ADDR)
129
130 /* These must match the bit postions above */
131 static const char *trace_options[] = {
132         "print-parent",
133         "sym-offset",
134         "sym-addr",
135         "verbose",
136         "raw",
137         "hex",
138         "bin",
139         "block",
140         "stacktrace",
141         "sched-tree",
142         NULL
143 };
144
145 static raw_spinlock_t ftrace_max_lock =
146         (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
147
148 /*
149  * Copy the new maximum trace into the separate maximum-trace
150  * structure. (this way the maximum trace is permanently saved,
151  * for later retrieval via /debugfs/tracing/latency_trace)
152  */
153 static void
154 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
155 {
156         struct trace_array_cpu *data = tr->data[cpu];
157
158         max_tr.cpu = cpu;
159         max_tr.time_start = data->preempt_timestamp;
160
161         data = max_tr.data[cpu];
162         data->saved_latency = tracing_max_latency;
163
164         memcpy(data->comm, tsk->comm, TASK_COMM_LEN);
165         data->pid = tsk->pid;
166         data->uid = tsk->uid;
167         data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
168         data->policy = tsk->policy;
169         data->rt_priority = tsk->rt_priority;
170
171         /* record this tasks comm */
172         tracing_record_cmdline(current);
173 }
174
175 void check_pages(struct trace_array_cpu *data)
176 {
177         struct page *page, *tmp;
178
179         BUG_ON(data->trace_pages.next->prev != &data->trace_pages);
180         BUG_ON(data->trace_pages.prev->next != &data->trace_pages);
181
182         list_for_each_entry_safe(page, tmp, &data->trace_pages, lru) {
183                 BUG_ON(page->lru.next->prev != &page->lru);
184                 BUG_ON(page->lru.prev->next != &page->lru);
185         }
186 }
187
188 void *head_page(struct trace_array_cpu *data)
189 {
190         struct page *page;
191
192         check_pages(data);
193         if (list_empty(&data->trace_pages))
194                 return NULL;
195
196         page = list_entry(data->trace_pages.next, struct page, lru);
197         BUG_ON(&page->lru == &data->trace_pages);
198
199         return page_address(page);
200 }
201
202 int
203 trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
204 {
205         int len = (PAGE_SIZE - 1) - s->len;
206         va_list ap;
207         int ret;
208
209         if (!len)
210                 return 0;
211
212         va_start(ap, fmt);
213         ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
214         va_end(ap);
215
216         /* If we can't write it all, don't bother writing anything */
217         if (ret >= len)
218                 return 0;
219
220         s->len += ret;
221
222         return len;
223 }
224
225 static int
226 trace_seq_puts(struct trace_seq *s, const char *str)
227 {
228         int len = strlen(str);
229
230         if (len > ((PAGE_SIZE - 1) - s->len))
231                 return 0;
232
233         memcpy(s->buffer + s->len, str, len);
234         s->len += len;
235
236         return len;
237 }
238
239 static int
240 trace_seq_putc(struct trace_seq *s, unsigned char c)
241 {
242         if (s->len >= (PAGE_SIZE - 1))
243                 return 0;
244
245         s->buffer[s->len++] = c;
246
247         return 1;
248 }
249
250 static int
251 trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
252 {
253         if (len > ((PAGE_SIZE - 1) - s->len))
254                 return 0;
255
256         memcpy(s->buffer + s->len, mem, len);
257         s->len += len;
258
259         return len;
260 }
261
262 #define HEX_CHARS 17
263 static const char hex2asc[] = "0123456789abcdef";
264
265 static int
266 trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
267 {
268         unsigned char hex[HEX_CHARS];
269         unsigned char *data = mem;
270         unsigned char byte;
271         int i, j;
272
273         BUG_ON(len >= HEX_CHARS);
274
275 #ifdef __BIG_ENDIAN
276         for (i = 0, j = 0; i < len; i++) {
277 #else
278         for (i = len-1, j = 0; i >= 0; i--) {
279 #endif
280                 byte = data[i];
281
282                 hex[j++] = hex2asc[byte & 0x0f];
283                 hex[j++] = hex2asc[byte >> 4];
284         }
285         hex[j++] = ' ';
286
287         return trace_seq_putmem(s, hex, j);
288 }
289
290 static void
291 trace_seq_reset(struct trace_seq *s)
292 {
293         s->len = 0;
294 }
295
296 static void
297 trace_print_seq(struct seq_file *m, struct trace_seq *s)
298 {
299         int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
300
301         s->buffer[len] = 0;
302         seq_puts(m, s->buffer);
303
304         trace_seq_reset(s);
305 }
306
307 static void
308 flip_trace(struct trace_array_cpu *tr1, struct trace_array_cpu *tr2)
309 {
310         struct list_head flip_pages;
311
312         INIT_LIST_HEAD(&flip_pages);
313
314         memcpy(&tr1->trace_head_idx, &tr2->trace_head_idx,
315                 sizeof(struct trace_array_cpu) -
316                 offsetof(struct trace_array_cpu, trace_head_idx));
317
318         check_pages(tr1);
319         check_pages(tr2);
320         list_splice_init(&tr1->trace_pages, &flip_pages);
321         list_splice_init(&tr2->trace_pages, &tr1->trace_pages);
322         list_splice_init(&flip_pages, &tr2->trace_pages);
323         BUG_ON(!list_empty(&flip_pages));
324         check_pages(tr1);
325         check_pages(tr2);
326 }
327
328 void
329 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
330 {
331         struct trace_array_cpu *data;
332         int i;
333
334         WARN_ON_ONCE(!irqs_disabled());
335         __raw_spin_lock(&ftrace_max_lock);
336         /* clear out all the previous traces */
337         for_each_tracing_cpu(i) {
338                 data = tr->data[i];
339                 flip_trace(max_tr.data[i], data);
340                 tracing_reset(data);
341         }
342
343         __update_max_tr(tr, tsk, cpu);
344         __raw_spin_unlock(&ftrace_max_lock);
345 }
346
347 /**
348  * update_max_tr_single - only copy one trace over, and reset the rest
349  * @tr - tracer
350  * @tsk - task with the latency
351  * @cpu - the cpu of the buffer to copy.
352  */
353 void
354 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
355 {
356         struct trace_array_cpu *data = tr->data[cpu];
357         int i;
358
359         WARN_ON_ONCE(!irqs_disabled());
360         __raw_spin_lock(&ftrace_max_lock);
361         for_each_tracing_cpu(i)
362                 tracing_reset(max_tr.data[i]);
363
364         flip_trace(max_tr.data[cpu], data);
365         tracing_reset(data);
366
367         __update_max_tr(tr, tsk, cpu);
368         __raw_spin_unlock(&ftrace_max_lock);
369 }
370
371 int register_tracer(struct tracer *type)
372 {
373         struct tracer *t;
374         int len;
375         int ret = 0;
376
377         if (!type->name) {
378                 pr_info("Tracer must have a name\n");
379                 return -1;
380         }
381
382         mutex_lock(&trace_types_lock);
383         for (t = trace_types; t; t = t->next) {
384                 if (strcmp(type->name, t->name) == 0) {
385                         /* already found */
386                         pr_info("Trace %s already registered\n",
387                                 type->name);
388                         ret = -1;
389                         goto out;
390                 }
391         }
392
393 #ifdef CONFIG_FTRACE_STARTUP_TEST
394         if (type->selftest) {
395                 struct tracer *saved_tracer = current_trace;
396                 struct trace_array_cpu *data;
397                 struct trace_array *tr = &global_trace;
398                 int saved_ctrl = tr->ctrl;
399                 int i;
400                 /*
401                  * Run a selftest on this tracer.
402                  * Here we reset the trace buffer, and set the current
403                  * tracer to be this tracer. The tracer can then run some
404                  * internal tracing to verify that everything is in order.
405                  * If we fail, we do not register this tracer.
406                  */
407                 for_each_tracing_cpu(i) {
408                         data = tr->data[i];
409                         if (!head_page(data))
410                                 continue;
411                         tracing_reset(data);
412                 }
413                 current_trace = type;
414                 tr->ctrl = 0;
415                 /* the test is responsible for initializing and enabling */
416                 pr_info("Testing tracer %s: ", type->name);
417                 ret = type->selftest(type, tr);
418                 /* the test is responsible for resetting too */
419                 current_trace = saved_tracer;
420                 tr->ctrl = saved_ctrl;
421                 if (ret) {
422                         printk(KERN_CONT "FAILED!\n");
423                         goto out;
424                 }
425                 /* Only reset on passing, to avoid touching corrupted buffers */
426                 for_each_tracing_cpu(i) {
427                         data = tr->data[i];
428                         if (!head_page(data))
429                                 continue;
430                         tracing_reset(data);
431                 }
432                 printk(KERN_CONT "PASSED\n");
433         }
434 #endif
435
436         type->next = trace_types;
437         trace_types = type;
438         len = strlen(type->name);
439         if (len > max_tracer_type_len)
440                 max_tracer_type_len = len;
441
442  out:
443         mutex_unlock(&trace_types_lock);
444
445         return ret;
446 }
447
448 void unregister_tracer(struct tracer *type)
449 {
450         struct tracer **t;
451         int len;
452
453         mutex_lock(&trace_types_lock);
454         for (t = &trace_types; *t; t = &(*t)->next) {
455                 if (*t == type)
456                         goto found;
457         }
458         pr_info("Trace %s not registered\n", type->name);
459         goto out;
460
461  found:
462         *t = (*t)->next;
463         if (strlen(type->name) != max_tracer_type_len)
464                 goto out;
465
466         max_tracer_type_len = 0;
467         for (t = &trace_types; *t; t = &(*t)->next) {
468                 len = strlen((*t)->name);
469                 if (len > max_tracer_type_len)
470                         max_tracer_type_len = len;
471         }
472  out:
473         mutex_unlock(&trace_types_lock);
474 }
475
476 void tracing_reset(struct trace_array_cpu *data)
477 {
478         data->trace_idx = 0;
479         data->trace_head = data->trace_tail = head_page(data);
480         data->trace_head_idx = 0;
481         data->trace_tail_idx = 0;
482 }
483
484 #define SAVED_CMDLINES 128
485 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
486 static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
487 static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
488 static int cmdline_idx;
489 static DEFINE_SPINLOCK(trace_cmdline_lock);
490 atomic_t trace_record_cmdline_disabled;
491
492 static void trace_init_cmdlines(void)
493 {
494         memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
495         memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
496         cmdline_idx = 0;
497 }
498
499 void trace_stop_cmdline_recording(void);
500
501 static void trace_save_cmdline(struct task_struct *tsk)
502 {
503         unsigned map;
504         unsigned idx;
505
506         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
507                 return;
508
509         /*
510          * It's not the end of the world if we don't get
511          * the lock, but we also don't want to spin
512          * nor do we want to disable interrupts,
513          * so if we miss here, then better luck next time.
514          */
515         if (!spin_trylock(&trace_cmdline_lock))
516                 return;
517
518         idx = map_pid_to_cmdline[tsk->pid];
519         if (idx >= SAVED_CMDLINES) {
520                 idx = (cmdline_idx + 1) % SAVED_CMDLINES;
521
522                 map = map_cmdline_to_pid[idx];
523                 if (map <= PID_MAX_DEFAULT)
524                         map_pid_to_cmdline[map] = (unsigned)-1;
525
526                 map_pid_to_cmdline[tsk->pid] = idx;
527
528                 cmdline_idx = idx;
529         }
530
531         memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
532
533         spin_unlock(&trace_cmdline_lock);
534 }
535
536 static char *trace_find_cmdline(int pid)
537 {
538         char *cmdline = "<...>";
539         unsigned map;
540
541         if (!pid)
542                 return "<idle>";
543
544         if (pid > PID_MAX_DEFAULT)
545                 goto out;
546
547         map = map_pid_to_cmdline[pid];
548         if (map >= SAVED_CMDLINES)
549                 goto out;
550
551         cmdline = saved_cmdlines[map];
552
553  out:
554         return cmdline;
555 }
556
557 void tracing_record_cmdline(struct task_struct *tsk)
558 {
559         if (atomic_read(&trace_record_cmdline_disabled))
560                 return;
561
562         trace_save_cmdline(tsk);
563 }
564
565 static inline struct list_head *
566 trace_next_list(struct trace_array_cpu *data, struct list_head *next)
567 {
568         /*
569          * Roundrobin - but skip the head (which is not a real page):
570          */
571         next = next->next;
572         if (unlikely(next == &data->trace_pages))
573                 next = next->next;
574         BUG_ON(next == &data->trace_pages);
575
576         return next;
577 }
578
579 static inline void *
580 trace_next_page(struct trace_array_cpu *data, void *addr)
581 {
582         struct list_head *next;
583         struct page *page;
584
585         page = virt_to_page(addr);
586
587         next = trace_next_list(data, &page->lru);
588         page = list_entry(next, struct page, lru);
589
590         return page_address(page);
591 }
592
593 static inline struct trace_entry *
594 tracing_get_trace_entry(struct trace_array *tr, struct trace_array_cpu *data)
595 {
596         unsigned long idx, idx_next;
597         struct trace_entry *entry;
598
599         data->trace_idx++;
600         idx = data->trace_head_idx;
601         idx_next = idx + 1;
602
603         BUG_ON(idx * TRACE_ENTRY_SIZE >= PAGE_SIZE);
604
605         entry = data->trace_head + idx * TRACE_ENTRY_SIZE;
606
607         if (unlikely(idx_next >= ENTRIES_PER_PAGE)) {
608                 data->trace_head = trace_next_page(data, data->trace_head);
609                 idx_next = 0;
610         }
611
612         if (data->trace_head == data->trace_tail &&
613             idx_next == data->trace_tail_idx) {
614                 /* overrun */
615                 data->trace_tail_idx++;
616                 if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
617                         data->trace_tail =
618                                 trace_next_page(data, data->trace_tail);
619                         data->trace_tail_idx = 0;
620                 }
621         }
622
623         data->trace_head_idx = idx_next;
624
625         return entry;
626 }
627
628 static inline void
629 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags)
630 {
631         struct task_struct *tsk = current;
632         unsigned long pc;
633
634         pc = preempt_count();
635
636         entry->preempt_count    = pc & 0xff;
637         entry->pid              = (tsk) ? tsk->pid : 0;
638         entry->t                = ftrace_now(raw_smp_processor_id());
639         entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
640                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
641                 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
642                 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
643 }
644
645 void
646 trace_function(struct trace_array *tr, struct trace_array_cpu *data,
647                unsigned long ip, unsigned long parent_ip, unsigned long flags)
648 {
649         struct trace_entry *entry;
650         unsigned long irq_flags;
651
652         raw_local_irq_save(irq_flags);
653         __raw_spin_lock(&data->lock);
654         entry                   = tracing_get_trace_entry(tr, data);
655         tracing_generic_entry_update(entry, flags);
656         entry->type             = TRACE_FN;
657         entry->fn.ip            = ip;
658         entry->fn.parent_ip     = parent_ip;
659         __raw_spin_unlock(&data->lock);
660         raw_local_irq_restore(irq_flags);
661 }
662
663 void
664 ftrace(struct trace_array *tr, struct trace_array_cpu *data,
665        unsigned long ip, unsigned long parent_ip, unsigned long flags)
666 {
667         if (likely(!atomic_read(&data->disabled)))
668                 trace_function(tr, data, ip, parent_ip, flags);
669 }
670
671 void
672 __trace_special(void *__tr, void *__data,
673                 unsigned long arg1, unsigned long arg2, unsigned long arg3)
674 {
675         struct trace_array_cpu *data = __data;
676         struct trace_array *tr = __tr;
677         struct trace_entry *entry;
678         unsigned long irq_flags;
679
680         raw_local_irq_save(irq_flags);
681         __raw_spin_lock(&data->lock);
682         entry                   = tracing_get_trace_entry(tr, data);
683         tracing_generic_entry_update(entry, 0);
684         entry->type             = TRACE_SPECIAL;
685         entry->special.arg1     = arg1;
686         entry->special.arg2     = arg2;
687         entry->special.arg3     = arg3;
688         __raw_spin_unlock(&data->lock);
689         raw_local_irq_restore(irq_flags);
690
691         trace_wake_up();
692 }
693
694 void __trace_stack(struct trace_array *tr,
695                    struct trace_array_cpu *data,
696                    unsigned long flags,
697                    int skip)
698 {
699         struct trace_entry *entry;
700         struct stack_trace trace;
701
702         if (!(trace_flags & TRACE_ITER_STACKTRACE))
703                 return;
704
705         entry                   = tracing_get_trace_entry(tr, data);
706         tracing_generic_entry_update(entry, flags);
707         entry->type             = TRACE_STACK;
708
709         memset(&entry->stack, 0, sizeof(entry->stack));
710
711         trace.nr_entries        = 0;
712         trace.max_entries       = FTRACE_STACK_ENTRIES;
713         trace.skip              = skip;
714         trace.entries           = entry->stack.caller;
715
716         save_stack_trace(&trace);
717 }
718
719 void
720 tracing_sched_switch_trace(struct trace_array *tr,
721                            struct trace_array_cpu *data,
722                            struct task_struct *prev,
723                            struct task_struct *next,
724                            unsigned long flags)
725 {
726         struct trace_entry *entry;
727         unsigned long irq_flags;
728
729         raw_local_irq_save(irq_flags);
730         __raw_spin_lock(&data->lock);
731         entry                   = tracing_get_trace_entry(tr, data);
732         tracing_generic_entry_update(entry, flags);
733         entry->type             = TRACE_CTX;
734         entry->ctx.prev_pid     = prev->pid;
735         entry->ctx.prev_prio    = prev->prio;
736         entry->ctx.prev_state   = prev->state;
737         entry->ctx.next_pid     = next->pid;
738         entry->ctx.next_prio    = next->prio;
739         entry->ctx.next_state   = next->state;
740         __trace_stack(tr, data, flags, 4);
741         __raw_spin_unlock(&data->lock);
742         raw_local_irq_restore(irq_flags);
743 }
744
745 void
746 tracing_sched_wakeup_trace(struct trace_array *tr,
747                            struct trace_array_cpu *data,
748                            struct task_struct *wakee,
749                            struct task_struct *curr,
750                            unsigned long flags)
751 {
752         struct trace_entry *entry;
753         unsigned long irq_flags;
754
755         raw_local_irq_save(irq_flags);
756         __raw_spin_lock(&data->lock);
757         entry                   = tracing_get_trace_entry(tr, data);
758         tracing_generic_entry_update(entry, flags);
759         entry->type             = TRACE_WAKE;
760         entry->ctx.prev_pid     = curr->pid;
761         entry->ctx.prev_prio    = curr->prio;
762         entry->ctx.prev_state   = curr->state;
763         entry->ctx.next_pid     = wakee->pid;
764         entry->ctx.next_prio    = wakee->prio;
765         entry->ctx.next_state   = wakee->state;
766         __trace_stack(tr, data, flags, 5);
767         __raw_spin_unlock(&data->lock);
768         raw_local_irq_restore(irq_flags);
769
770         trace_wake_up();
771 }
772
773 #ifdef CONFIG_FTRACE
774 static void
775 function_trace_call(unsigned long ip, unsigned long parent_ip)
776 {
777         struct trace_array *tr = &global_trace;
778         struct trace_array_cpu *data;
779         unsigned long flags;
780         long disabled;
781         int cpu;
782
783         if (unlikely(!tracer_enabled))
784                 return;
785
786         local_irq_save(flags);
787         cpu = raw_smp_processor_id();
788         data = tr->data[cpu];
789         disabled = atomic_inc_return(&data->disabled);
790
791         if (likely(disabled == 1))
792                 trace_function(tr, data, ip, parent_ip, flags);
793
794         atomic_dec(&data->disabled);
795         local_irq_restore(flags);
796 }
797
798 static struct ftrace_ops trace_ops __read_mostly =
799 {
800         .func = function_trace_call,
801 };
802
803 void tracing_start_function_trace(void)
804 {
805         register_ftrace_function(&trace_ops);
806 }
807
808 void tracing_stop_function_trace(void)
809 {
810         unregister_ftrace_function(&trace_ops);
811 }
812 #endif
813
814 enum trace_file_type {
815         TRACE_FILE_LAT_FMT      = 1,
816 };
817
818 static struct trace_entry *
819 trace_entry_idx(struct trace_array *tr, struct trace_array_cpu *data,
820                 struct trace_iterator *iter, int cpu)
821 {
822         struct page *page;
823         struct trace_entry *array;
824
825         if (iter->next_idx[cpu] >= tr->entries ||
826             iter->next_idx[cpu] >= data->trace_idx ||
827             (data->trace_head == data->trace_tail &&
828              data->trace_head_idx == data->trace_tail_idx))
829                 return NULL;
830
831         if (!iter->next_page[cpu]) {
832                 /* Initialize the iterator for this cpu trace buffer */
833                 WARN_ON(!data->trace_tail);
834                 page = virt_to_page(data->trace_tail);
835                 iter->next_page[cpu] = &page->lru;
836                 iter->next_page_idx[cpu] = data->trace_tail_idx;
837         }
838
839         page = list_entry(iter->next_page[cpu], struct page, lru);
840         BUG_ON(&data->trace_pages == &page->lru);
841
842         array = page_address(page);
843
844         WARN_ON(iter->next_page_idx[cpu] >= ENTRIES_PER_PAGE);
845         return &array[iter->next_page_idx[cpu]];
846 }
847
848 static struct trace_entry *
849 find_next_entry(struct trace_iterator *iter, int *ent_cpu)
850 {
851         struct trace_array *tr = iter->tr;
852         struct trace_entry *ent, *next = NULL;
853         int next_cpu = -1;
854         int cpu;
855
856         for_each_tracing_cpu(cpu) {
857                 if (!head_page(tr->data[cpu]))
858                         continue;
859                 ent = trace_entry_idx(tr, tr->data[cpu], iter, cpu);
860                 /*
861                  * Pick the entry with the smallest timestamp:
862                  */
863                 if (ent && (!next || ent->t < next->t)) {
864                         next = ent;
865                         next_cpu = cpu;
866                 }
867         }
868
869         if (ent_cpu)
870                 *ent_cpu = next_cpu;
871
872         return next;
873 }
874
875 static void trace_iterator_increment(struct trace_iterator *iter)
876 {
877         iter->idx++;
878         iter->next_idx[iter->cpu]++;
879         iter->next_page_idx[iter->cpu]++;
880
881         if (iter->next_page_idx[iter->cpu] >= ENTRIES_PER_PAGE) {
882                 struct trace_array_cpu *data = iter->tr->data[iter->cpu];
883
884                 iter->next_page_idx[iter->cpu] = 0;
885                 iter->next_page[iter->cpu] =
886                         trace_next_list(data, iter->next_page[iter->cpu]);
887         }
888 }
889
890 static void trace_consume(struct trace_iterator *iter)
891 {
892         struct trace_array_cpu *data = iter->tr->data[iter->cpu];
893
894         data->trace_tail_idx++;
895         if (data->trace_tail_idx >= ENTRIES_PER_PAGE) {
896                 data->trace_tail = trace_next_page(data, data->trace_tail);
897                 data->trace_tail_idx = 0;
898         }
899
900         /* Check if we empty it, then reset the index */
901         if (data->trace_head == data->trace_tail &&
902             data->trace_head_idx == data->trace_tail_idx)
903                 data->trace_idx = 0;
904 }
905
906 static void *find_next_entry_inc(struct trace_iterator *iter)
907 {
908         struct trace_entry *next;
909         int next_cpu = -1;
910
911         next = find_next_entry(iter, &next_cpu);
912
913         iter->prev_ent = iter->ent;
914         iter->prev_cpu = iter->cpu;
915
916         iter->ent = next;
917         iter->cpu = next_cpu;
918
919         if (next)
920                 trace_iterator_increment(iter);
921
922         return next ? iter : NULL;
923 }
924
925 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
926 {
927         struct trace_iterator *iter = m->private;
928         void *last_ent = iter->ent;
929         int i = (int)*pos;
930         void *ent;
931
932         (*pos)++;
933
934         /* can't go backwards */
935         if (iter->idx > i)
936                 return NULL;
937
938         if (iter->idx < 0)
939                 ent = find_next_entry_inc(iter);
940         else
941                 ent = iter;
942
943         while (ent && iter->idx < i)
944                 ent = find_next_entry_inc(iter);
945
946         iter->pos = *pos;
947
948         if (last_ent && !ent)
949                 seq_puts(m, "\n\nvim:ft=help\n");
950
951         return ent;
952 }
953
954 static void *s_start(struct seq_file *m, loff_t *pos)
955 {
956         struct trace_iterator *iter = m->private;
957         void *p = NULL;
958         loff_t l = 0;
959         int i;
960
961         mutex_lock(&trace_types_lock);
962
963         if (!current_trace || current_trace != iter->trace) {
964                 mutex_unlock(&trace_types_lock);
965                 return NULL;
966         }
967
968         atomic_inc(&trace_record_cmdline_disabled);
969
970         /* let the tracer grab locks here if needed */
971         if (current_trace->start)
972                 current_trace->start(iter);
973
974         if (*pos != iter->pos) {
975                 iter->ent = NULL;
976                 iter->cpu = 0;
977                 iter->idx = -1;
978                 iter->prev_ent = NULL;
979                 iter->prev_cpu = -1;
980
981                 for_each_tracing_cpu(i) {
982                         iter->next_idx[i] = 0;
983                         iter->next_page[i] = NULL;
984                 }
985
986                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
987                         ;
988
989         } else {
990                 l = *pos - 1;
991                 p = s_next(m, p, &l);
992         }
993
994         return p;
995 }
996
997 static void s_stop(struct seq_file *m, void *p)
998 {
999         struct trace_iterator *iter = m->private;
1000
1001         atomic_dec(&trace_record_cmdline_disabled);
1002
1003         /* let the tracer release locks here if needed */
1004         if (current_trace && current_trace == iter->trace && iter->trace->stop)
1005                 iter->trace->stop(iter);
1006
1007         mutex_unlock(&trace_types_lock);
1008 }
1009
1010 static int
1011 seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
1012 {
1013 #ifdef CONFIG_KALLSYMS
1014         char str[KSYM_SYMBOL_LEN];
1015
1016         kallsyms_lookup(address, NULL, NULL, NULL, str);
1017
1018         return trace_seq_printf(s, fmt, str);
1019 #endif
1020         return 1;
1021 }
1022
1023 static int
1024 seq_print_sym_offset(struct trace_seq *s, const char *fmt,
1025                      unsigned long address)
1026 {
1027 #ifdef CONFIG_KALLSYMS
1028         char str[KSYM_SYMBOL_LEN];
1029
1030         sprint_symbol(str, address);
1031         return trace_seq_printf(s, fmt, str);
1032 #endif
1033         return 1;
1034 }
1035
1036 #ifndef CONFIG_64BIT
1037 # define IP_FMT "%08lx"
1038 #else
1039 # define IP_FMT "%016lx"
1040 #endif
1041
1042 static int
1043 seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
1044 {
1045         int ret;
1046
1047         if (!ip)
1048                 return trace_seq_printf(s, "0");
1049
1050         if (sym_flags & TRACE_ITER_SYM_OFFSET)
1051                 ret = seq_print_sym_offset(s, "%s", ip);
1052         else
1053                 ret = seq_print_sym_short(s, "%s", ip);
1054
1055         if (!ret)
1056                 return 0;
1057
1058         if (sym_flags & TRACE_ITER_SYM_ADDR)
1059                 ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
1060         return ret;
1061 }
1062
1063 static void print_lat_help_header(struct seq_file *m)
1064 {
1065         seq_puts(m, "#                _------=> CPU#            \n");
1066         seq_puts(m, "#               / _-----=> irqs-off        \n");
1067         seq_puts(m, "#              | / _----=> need-resched    \n");
1068         seq_puts(m, "#              || / _---=> hardirq/softirq \n");
1069         seq_puts(m, "#              ||| / _--=> preempt-depth   \n");
1070         seq_puts(m, "#              |||| /                      \n");
1071         seq_puts(m, "#              |||||     delay             \n");
1072         seq_puts(m, "#  cmd     pid ||||| time  |   caller      \n");
1073         seq_puts(m, "#     \\   /    |||||   \\   |   /           \n");
1074 }
1075
1076 static void print_func_help_header(struct seq_file *m)
1077 {
1078         seq_puts(m, "#           TASK-PID   CPU#    TIMESTAMP  FUNCTION\n");
1079         seq_puts(m, "#              | |      |          |         |\n");
1080 }
1081
1082
1083 static void
1084 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
1085 {
1086         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1087         struct trace_array *tr = iter->tr;
1088         struct trace_array_cpu *data = tr->data[tr->cpu];
1089         struct tracer *type = current_trace;
1090         unsigned long total   = 0;
1091         unsigned long entries = 0;
1092         int cpu;
1093         const char *name = "preemption";
1094
1095         if (type)
1096                 name = type->name;
1097
1098         for_each_tracing_cpu(cpu) {
1099                 if (head_page(tr->data[cpu])) {
1100                         total += tr->data[cpu]->trace_idx;
1101                         if (tr->data[cpu]->trace_idx > tr->entries)
1102                                 entries += tr->entries;
1103                         else
1104                                 entries += tr->data[cpu]->trace_idx;
1105                 }
1106         }
1107
1108         seq_printf(m, "%s latency trace v1.1.5 on %s\n",
1109                    name, UTS_RELEASE);
1110         seq_puts(m, "-----------------------------------"
1111                  "---------------------------------\n");
1112         seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
1113                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
1114                    nsecs_to_usecs(data->saved_latency),
1115                    entries,
1116                    total,
1117                    tr->cpu,
1118 #if defined(CONFIG_PREEMPT_NONE)
1119                    "server",
1120 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
1121                    "desktop",
1122 #elif defined(CONFIG_PREEMPT_DESKTOP)
1123                    "preempt",
1124 #else
1125                    "unknown",
1126 #endif
1127                    /* These are reserved for later use */
1128                    0, 0, 0, 0);
1129 #ifdef CONFIG_SMP
1130         seq_printf(m, " #P:%d)\n", num_online_cpus());
1131 #else
1132         seq_puts(m, ")\n");
1133 #endif
1134         seq_puts(m, "    -----------------\n");
1135         seq_printf(m, "    | task: %.16s-%d "
1136                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
1137                    data->comm, data->pid, data->uid, data->nice,
1138                    data->policy, data->rt_priority);
1139         seq_puts(m, "    -----------------\n");
1140
1141         if (data->critical_start) {
1142                 seq_puts(m, " => started at: ");
1143                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
1144                 trace_print_seq(m, &iter->seq);
1145                 seq_puts(m, "\n => ended at:   ");
1146                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
1147                 trace_print_seq(m, &iter->seq);
1148                 seq_puts(m, "\n");
1149         }
1150
1151         seq_puts(m, "\n");
1152 }
1153
1154 static void
1155 lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
1156 {
1157         int hardirq, softirq;
1158         char *comm;
1159
1160         comm = trace_find_cmdline(entry->pid);
1161
1162         trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
1163         trace_seq_printf(s, "%d", cpu);
1164         trace_seq_printf(s, "%c%c",
1165                         (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.',
1166                         ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
1167
1168         hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
1169         softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
1170         if (hardirq && softirq) {
1171                 trace_seq_putc(s, 'H');
1172         } else {
1173                 if (hardirq) {
1174                         trace_seq_putc(s, 'h');
1175                 } else {
1176                         if (softirq)
1177                                 trace_seq_putc(s, 's');
1178                         else
1179                                 trace_seq_putc(s, '.');
1180                 }
1181         }
1182
1183         if (entry->preempt_count)
1184                 trace_seq_printf(s, "%x", entry->preempt_count);
1185         else
1186                 trace_seq_puts(s, ".");
1187 }
1188
1189 unsigned long preempt_mark_thresh = 100;
1190
1191 static void
1192 lat_print_timestamp(struct trace_seq *s, unsigned long long abs_usecs,
1193                     unsigned long rel_usecs)
1194 {
1195         trace_seq_printf(s, " %4lldus", abs_usecs);
1196         if (rel_usecs > preempt_mark_thresh)
1197                 trace_seq_puts(s, "!: ");
1198         else if (rel_usecs > 1)
1199                 trace_seq_puts(s, "+: ");
1200         else
1201                 trace_seq_puts(s, " : ");
1202 }
1203
1204 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
1205
1206 static int
1207 print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
1208 {
1209         struct trace_seq *s = &iter->seq;
1210         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1211         struct trace_entry *next_entry = find_next_entry(iter, NULL);
1212         unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
1213         struct trace_entry *entry = iter->ent;
1214         unsigned long abs_usecs;
1215         unsigned long rel_usecs;
1216         char *comm;
1217         int S, T;
1218         int i;
1219         unsigned state;
1220
1221         if (!next_entry)
1222                 next_entry = entry;
1223         rel_usecs = ns2usecs(next_entry->t - entry->t);
1224         abs_usecs = ns2usecs(entry->t - iter->tr->time_start);
1225
1226         if (verbose) {
1227                 comm = trace_find_cmdline(entry->pid);
1228                 trace_seq_printf(s, "%16s %5d %d %d %08x %08x [%08lx]"
1229                                  " %ld.%03ldms (+%ld.%03ldms): ",
1230                                  comm,
1231                                  entry->pid, cpu, entry->flags,
1232                                  entry->preempt_count, trace_idx,
1233                                  ns2usecs(entry->t),
1234                                  abs_usecs/1000,
1235                                  abs_usecs % 1000, rel_usecs/1000,
1236                                  rel_usecs % 1000);
1237         } else {
1238                 lat_print_generic(s, entry, cpu);
1239                 lat_print_timestamp(s, abs_usecs, rel_usecs);
1240         }
1241         switch (entry->type) {
1242         case TRACE_FN:
1243                 seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1244                 trace_seq_puts(s, " (");
1245                 seq_print_ip_sym(s, entry->fn.parent_ip, sym_flags);
1246                 trace_seq_puts(s, ")\n");
1247                 break;
1248         case TRACE_CTX:
1249         case TRACE_WAKE:
1250                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1251                         state_to_char[entry->ctx.next_state] : 'X';
1252
1253                 state = entry->ctx.prev_state ? __ffs(entry->ctx.prev_state) + 1 : 0;
1254                 S = state < sizeof(state_to_char) - 1 ? state_to_char[state] : 'X';
1255                 comm = trace_find_cmdline(entry->ctx.next_pid);
1256                 trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c %s\n",
1257                                  entry->ctx.prev_pid,
1258                                  entry->ctx.prev_prio,
1259                                  S, entry->type == TRACE_CTX ? "==>" : "  +",
1260                                  entry->ctx.next_pid,
1261                                  entry->ctx.next_prio,
1262                                  T, comm);
1263                 break;
1264         case TRACE_SPECIAL:
1265                 trace_seq_printf(s, "# %ld %ld %ld\n",
1266                                  entry->special.arg1,
1267                                  entry->special.arg2,
1268                                  entry->special.arg3);
1269                 break;
1270         case TRACE_STACK:
1271                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1272                         if (i)
1273                                 trace_seq_puts(s, " <= ");
1274                         seq_print_ip_sym(s, entry->stack.caller[i], sym_flags);
1275                 }
1276                 trace_seq_puts(s, "\n");
1277                 break;
1278         default:
1279                 trace_seq_printf(s, "Unknown type %d\n", entry->type);
1280         }
1281         return 1;
1282 }
1283
1284 static int print_trace_fmt(struct trace_iterator *iter)
1285 {
1286         struct trace_seq *s = &iter->seq;
1287         unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
1288         struct trace_entry *entry;
1289         unsigned long usec_rem;
1290         unsigned long long t;
1291         unsigned long secs;
1292         char *comm;
1293         int ret;
1294         int S, T;
1295         int i;
1296
1297         entry = iter->ent;
1298
1299         comm = trace_find_cmdline(iter->ent->pid);
1300
1301         t = ns2usecs(entry->t);
1302         usec_rem = do_div(t, 1000000ULL);
1303         secs = (unsigned long)t;
1304
1305         ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
1306         if (!ret)
1307                 return 0;
1308         ret = trace_seq_printf(s, "[%02d] ", iter->cpu);
1309         if (!ret)
1310                 return 0;
1311         ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
1312         if (!ret)
1313                 return 0;
1314
1315         switch (entry->type) {
1316         case TRACE_FN:
1317                 ret = seq_print_ip_sym(s, entry->fn.ip, sym_flags);
1318                 if (!ret)
1319                         return 0;
1320                 if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
1321                                                 entry->fn.parent_ip) {
1322                         ret = trace_seq_printf(s, " <-");
1323                         if (!ret)
1324                                 return 0;
1325                         ret = seq_print_ip_sym(s, entry->fn.parent_ip,
1326                                                sym_flags);
1327                         if (!ret)
1328                                 return 0;
1329                 }
1330                 ret = trace_seq_printf(s, "\n");
1331                 if (!ret)
1332                         return 0;
1333                 break;
1334         case TRACE_CTX:
1335         case TRACE_WAKE:
1336                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1337                         state_to_char[entry->ctx.prev_state] : 'X';
1338                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1339                         state_to_char[entry->ctx.next_state] : 'X';
1340                 ret = trace_seq_printf(s, " %5d:%3d:%c %s %5d:%3d:%c\n",
1341                                        entry->ctx.prev_pid,
1342                                        entry->ctx.prev_prio,
1343                                        S,
1344                                        entry->type == TRACE_CTX ? "==>" : "  +",
1345                                        entry->ctx.next_pid,
1346                                        entry->ctx.next_prio,
1347                                        T);
1348                 if (!ret)
1349                         return 0;
1350                 break;
1351         case TRACE_SPECIAL:
1352                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1353                                  entry->special.arg1,
1354                                  entry->special.arg2,
1355                                  entry->special.arg3);
1356                 if (!ret)
1357                         return 0;
1358                 break;
1359         case TRACE_STACK:
1360                 for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
1361                         if (i) {
1362                                 ret = trace_seq_puts(s, " <= ");
1363                                 if (!ret)
1364                                         return 0;
1365                         }
1366                         ret = seq_print_ip_sym(s, entry->stack.caller[i],
1367                                                sym_flags);
1368                         if (!ret)
1369                                 return 0;
1370                 }
1371                 ret = trace_seq_puts(s, "\n");
1372                 if (!ret)
1373                         return 0;
1374                 break;
1375         }
1376         return 1;
1377 }
1378
1379 static int print_raw_fmt(struct trace_iterator *iter)
1380 {
1381         struct trace_seq *s = &iter->seq;
1382         struct trace_entry *entry;
1383         int ret;
1384         int S, T;
1385
1386         entry = iter->ent;
1387
1388         ret = trace_seq_printf(s, "%d %d %llu ",
1389                 entry->pid, iter->cpu, entry->t);
1390         if (!ret)
1391                 return 0;
1392
1393         switch (entry->type) {
1394         case TRACE_FN:
1395                 ret = trace_seq_printf(s, "%x %x\n",
1396                                         entry->fn.ip, entry->fn.parent_ip);
1397                 if (!ret)
1398                         return 0;
1399                 break;
1400         case TRACE_CTX:
1401         case TRACE_WAKE:
1402                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1403                         state_to_char[entry->ctx.prev_state] : 'X';
1404                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1405                         state_to_char[entry->ctx.next_state] : 'X';
1406                 if (entry->type == TRACE_WAKE)
1407                         S = '+';
1408                 ret = trace_seq_printf(s, "%d %d %c %d %d %c\n",
1409                                        entry->ctx.prev_pid,
1410                                        entry->ctx.prev_prio,
1411                                        S,
1412                                        entry->ctx.next_pid,
1413                                        entry->ctx.next_prio,
1414                                        T);
1415                 if (!ret)
1416                         return 0;
1417                 break;
1418         case TRACE_SPECIAL:
1419         case TRACE_STACK:
1420                 ret = trace_seq_printf(s, "# %ld %ld %ld\n",
1421                                  entry->special.arg1,
1422                                  entry->special.arg2,
1423                                  entry->special.arg3);
1424                 if (!ret)
1425                         return 0;
1426                 break;
1427         }
1428         return 1;
1429 }
1430
1431 #define SEQ_PUT_FIELD_RET(s, x)                         \
1432 do {                                                    \
1433         if (!trace_seq_putmem(s, &(x), sizeof(x)))      \
1434                 return 0;                               \
1435 } while (0)
1436
1437 #define SEQ_PUT_HEX_FIELD_RET(s, x)                     \
1438 do {                                                    \
1439         if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))  \
1440                 return 0;                               \
1441 } while (0)
1442
1443 static int print_hex_fmt(struct trace_iterator *iter)
1444 {
1445         struct trace_seq *s = &iter->seq;
1446         unsigned char newline = '\n';
1447         struct trace_entry *entry;
1448         int S, T;
1449
1450         entry = iter->ent;
1451
1452         SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
1453         SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
1454         SEQ_PUT_HEX_FIELD_RET(s, entry->t);
1455
1456         switch (entry->type) {
1457         case TRACE_FN:
1458                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.ip);
1459                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1460                 break;
1461         case TRACE_CTX:
1462         case TRACE_WAKE:
1463                 S = entry->ctx.prev_state < sizeof(state_to_char) ?
1464                         state_to_char[entry->ctx.prev_state] : 'X';
1465                 T = entry->ctx.next_state < sizeof(state_to_char) ?
1466                         state_to_char[entry->ctx.next_state] : 'X';
1467                 if (entry->type == TRACE_WAKE)
1468                         S = '+';
1469                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_pid);
1470                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.prev_prio);
1471                 SEQ_PUT_HEX_FIELD_RET(s, S);
1472                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_pid);
1473                 SEQ_PUT_HEX_FIELD_RET(s, entry->ctx.next_prio);
1474                 SEQ_PUT_HEX_FIELD_RET(s, entry->fn.parent_ip);
1475                 SEQ_PUT_HEX_FIELD_RET(s, T);
1476                 break;
1477         case TRACE_SPECIAL:
1478         case TRACE_STACK:
1479                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg1);
1480                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg2);
1481                 SEQ_PUT_HEX_FIELD_RET(s, entry->special.arg3);
1482                 break;
1483         }
1484         SEQ_PUT_FIELD_RET(s, newline);
1485
1486         return 1;
1487 }
1488
1489 static int print_bin_fmt(struct trace_iterator *iter)
1490 {
1491         struct trace_seq *s = &iter->seq;
1492         struct trace_entry *entry;
1493
1494         entry = iter->ent;
1495
1496         SEQ_PUT_FIELD_RET(s, entry->pid);
1497         SEQ_PUT_FIELD_RET(s, entry->cpu);
1498         SEQ_PUT_FIELD_RET(s, entry->t);
1499
1500         switch (entry->type) {
1501         case TRACE_FN:
1502                 SEQ_PUT_FIELD_RET(s, entry->fn.ip);
1503                 SEQ_PUT_FIELD_RET(s, entry->fn.parent_ip);
1504                 break;
1505         case TRACE_CTX:
1506                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_pid);
1507                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_prio);
1508                 SEQ_PUT_FIELD_RET(s, entry->ctx.prev_state);
1509                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_pid);
1510                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_prio);
1511                 SEQ_PUT_FIELD_RET(s, entry->ctx.next_state);
1512                 break;
1513         case TRACE_SPECIAL:
1514         case TRACE_STACK:
1515                 SEQ_PUT_FIELD_RET(s, entry->special.arg1);
1516                 SEQ_PUT_FIELD_RET(s, entry->special.arg2);
1517                 SEQ_PUT_FIELD_RET(s, entry->special.arg3);
1518                 break;
1519         }
1520         return 1;
1521 }
1522
1523 static int trace_empty(struct trace_iterator *iter)
1524 {
1525         struct trace_array_cpu *data;
1526         int cpu;
1527
1528         for_each_tracing_cpu(cpu) {
1529                 data = iter->tr->data[cpu];
1530
1531                 if (head_page(data) && data->trace_idx &&
1532                     (data->trace_tail != data->trace_head ||
1533                      data->trace_tail_idx != data->trace_head_idx))
1534                         return 0;
1535         }
1536         return 1;
1537 }
1538
1539 static int print_trace_line(struct trace_iterator *iter)
1540 {
1541         if (iter->trace && iter->trace->print_line)
1542                 return iter->trace->print_line(iter);
1543
1544         if (trace_flags & TRACE_ITER_BIN)
1545                 return print_bin_fmt(iter);
1546
1547         if (trace_flags & TRACE_ITER_HEX)
1548                 return print_hex_fmt(iter);
1549
1550         if (trace_flags & TRACE_ITER_RAW)
1551                 return print_raw_fmt(iter);
1552
1553         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
1554                 return print_lat_fmt(iter, iter->idx, iter->cpu);
1555
1556         return print_trace_fmt(iter);
1557 }
1558
1559 static int s_show(struct seq_file *m, void *v)
1560 {
1561         struct trace_iterator *iter = v;
1562
1563         if (iter->ent == NULL) {
1564                 if (iter->tr) {
1565                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
1566                         seq_puts(m, "#\n");
1567                 }
1568                 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
1569                         /* print nothing if the buffers are empty */
1570                         if (trace_empty(iter))
1571                                 return 0;
1572                         print_trace_header(m, iter);
1573                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1574                                 print_lat_help_header(m);
1575                 } else {
1576                         if (!(trace_flags & TRACE_ITER_VERBOSE))
1577                                 print_func_help_header(m);
1578                 }
1579         } else {
1580                 print_trace_line(iter);
1581                 trace_print_seq(m, &iter->seq);
1582         }
1583
1584         return 0;
1585 }
1586
1587 static struct seq_operations tracer_seq_ops = {
1588         .start          = s_start,
1589         .next           = s_next,
1590         .stop           = s_stop,
1591         .show           = s_show,
1592 };
1593
1594 static struct trace_iterator *
1595 __tracing_open(struct inode *inode, struct file *file, int *ret)
1596 {
1597         struct trace_iterator *iter;
1598
1599         if (tracing_disabled) {
1600                 *ret = -ENODEV;
1601                 return NULL;
1602         }
1603
1604         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
1605         if (!iter) {
1606                 *ret = -ENOMEM;
1607                 goto out;
1608         }
1609
1610         mutex_lock(&trace_types_lock);
1611         if (current_trace && current_trace->print_max)
1612                 iter->tr = &max_tr;
1613         else
1614                 iter->tr = inode->i_private;
1615         iter->trace = current_trace;
1616         iter->pos = -1;
1617
1618         /* TODO stop tracer */
1619         *ret = seq_open(file, &tracer_seq_ops);
1620         if (!*ret) {
1621                 struct seq_file *m = file->private_data;
1622                 m->private = iter;
1623
1624                 /* stop the trace while dumping */
1625                 if (iter->tr->ctrl)
1626                         tracer_enabled = 0;
1627
1628                 if (iter->trace && iter->trace->open)
1629                         iter->trace->open(iter);
1630         } else {
1631                 kfree(iter);
1632                 iter = NULL;
1633         }
1634         mutex_unlock(&trace_types_lock);
1635
1636  out:
1637         return iter;
1638 }
1639
1640 int tracing_open_generic(struct inode *inode, struct file *filp)
1641 {
1642         if (tracing_disabled)
1643                 return -ENODEV;
1644
1645         filp->private_data = inode->i_private;
1646         return 0;
1647 }
1648
1649 int tracing_release(struct inode *inode, struct file *file)
1650 {
1651         struct seq_file *m = (struct seq_file *)file->private_data;
1652         struct trace_iterator *iter = m->private;
1653
1654         mutex_lock(&trace_types_lock);
1655         if (iter->trace && iter->trace->close)
1656                 iter->trace->close(iter);
1657
1658         /* reenable tracing if it was previously enabled */
1659         if (iter->tr->ctrl)
1660                 tracer_enabled = 1;
1661         mutex_unlock(&trace_types_lock);
1662
1663         seq_release(inode, file);
1664         kfree(iter);
1665         return 0;
1666 }
1667
1668 static int tracing_open(struct inode *inode, struct file *file)
1669 {
1670         int ret;
1671
1672         __tracing_open(inode, file, &ret);
1673
1674         return ret;
1675 }
1676
1677 static int tracing_lt_open(struct inode *inode, struct file *file)
1678 {
1679         struct trace_iterator *iter;
1680         int ret;
1681
1682         iter = __tracing_open(inode, file, &ret);
1683
1684         if (!ret)
1685                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
1686
1687         return ret;
1688 }
1689
1690
1691 static void *
1692 t_next(struct seq_file *m, void *v, loff_t *pos)
1693 {
1694         struct tracer *t = m->private;
1695
1696         (*pos)++;
1697
1698         if (t)
1699                 t = t->next;
1700
1701         m->private = t;
1702
1703         return t;
1704 }
1705
1706 static void *t_start(struct seq_file *m, loff_t *pos)
1707 {
1708         struct tracer *t = m->private;
1709         loff_t l = 0;
1710
1711         mutex_lock(&trace_types_lock);
1712         for (; t && l < *pos; t = t_next(m, t, &l))
1713                 ;
1714
1715         return t;
1716 }
1717
1718 static void t_stop(struct seq_file *m, void *p)
1719 {
1720         mutex_unlock(&trace_types_lock);
1721 }
1722
1723 static int t_show(struct seq_file *m, void *v)
1724 {
1725         struct tracer *t = v;
1726
1727         if (!t)
1728                 return 0;
1729
1730         seq_printf(m, "%s", t->name);
1731         if (t->next)
1732                 seq_putc(m, ' ');
1733         else
1734                 seq_putc(m, '\n');
1735
1736         return 0;
1737 }
1738
1739 static struct seq_operations show_traces_seq_ops = {
1740         .start          = t_start,
1741         .next           = t_next,
1742         .stop           = t_stop,
1743         .show           = t_show,
1744 };
1745
1746 static int show_traces_open(struct inode *inode, struct file *file)
1747 {
1748         int ret;
1749
1750         if (tracing_disabled)
1751                 return -ENODEV;
1752
1753         ret = seq_open(file, &show_traces_seq_ops);
1754         if (!ret) {
1755                 struct seq_file *m = file->private_data;
1756                 m->private = trace_types;
1757         }
1758
1759         return ret;
1760 }
1761
1762 static struct file_operations tracing_fops = {
1763         .open           = tracing_open,
1764         .read           = seq_read,
1765         .llseek         = seq_lseek,
1766         .release        = tracing_release,
1767 };
1768
1769 static struct file_operations tracing_lt_fops = {
1770         .open           = tracing_lt_open,
1771         .read           = seq_read,
1772         .llseek         = seq_lseek,
1773         .release        = tracing_release,
1774 };
1775
1776 static struct file_operations show_traces_fops = {
1777         .open           = show_traces_open,
1778         .read           = seq_read,
1779         .release        = seq_release,
1780 };
1781
1782 /*
1783  * Only trace on a CPU if the bitmask is set:
1784  */
1785 static cpumask_t tracing_cpumask = CPU_MASK_ALL;
1786
1787 /*
1788  * When tracing/tracing_cpu_mask is modified then this holds
1789  * the new bitmask we are about to install:
1790  */
1791 static cpumask_t tracing_cpumask_new;
1792
1793 /*
1794  * The tracer itself will not take this lock, but still we want
1795  * to provide a consistent cpumask to user-space:
1796  */
1797 static DEFINE_MUTEX(tracing_cpumask_update_lock);
1798
1799 /*
1800  * Temporary storage for the character representation of the
1801  * CPU bitmask (and one more byte for the newline):
1802  */
1803 static char mask_str[NR_CPUS + 1];
1804
1805 static ssize_t
1806 tracing_cpumask_read(struct file *filp, char __user *ubuf,
1807                      size_t count, loff_t *ppos)
1808 {
1809         int len;
1810
1811         mutex_lock(&tracing_cpumask_update_lock);
1812
1813         len = cpumask_scnprintf(mask_str, count, tracing_cpumask);
1814         if (count - len < 2) {
1815                 count = -EINVAL;
1816                 goto out_err;
1817         }
1818         len += sprintf(mask_str + len, "\n");
1819         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
1820
1821 out_err:
1822         mutex_unlock(&tracing_cpumask_update_lock);
1823
1824         return count;
1825 }
1826
1827 static ssize_t
1828 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
1829                       size_t count, loff_t *ppos)
1830 {
1831         int err, cpu;
1832
1833         mutex_lock(&tracing_cpumask_update_lock);
1834         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
1835         if (err)
1836                 goto err_unlock;
1837
1838         raw_local_irq_disable();
1839         __raw_spin_lock(&ftrace_max_lock);
1840         for_each_tracing_cpu(cpu) {
1841                 /*
1842                  * Increase/decrease the disabled counter if we are
1843                  * about to flip a bit in the cpumask:
1844                  */
1845                 if (cpu_isset(cpu, tracing_cpumask) &&
1846                                 !cpu_isset(cpu, tracing_cpumask_new)) {
1847                         atomic_inc(&global_trace.data[cpu]->disabled);
1848                 }
1849                 if (!cpu_isset(cpu, tracing_cpumask) &&
1850                                 cpu_isset(cpu, tracing_cpumask_new)) {
1851                         atomic_dec(&global_trace.data[cpu]->disabled);
1852                 }
1853         }
1854         __raw_spin_unlock(&ftrace_max_lock);
1855         raw_local_irq_enable();
1856
1857         tracing_cpumask = tracing_cpumask_new;
1858
1859         mutex_unlock(&tracing_cpumask_update_lock);
1860
1861         return count;
1862
1863 err_unlock:
1864         mutex_unlock(&tracing_cpumask_update_lock);
1865
1866         return err;
1867 }
1868
1869 static struct file_operations tracing_cpumask_fops = {
1870         .open           = tracing_open_generic,
1871         .read           = tracing_cpumask_read,
1872         .write          = tracing_cpumask_write,
1873 };
1874
1875 static ssize_t
1876 tracing_iter_ctrl_read(struct file *filp, char __user *ubuf,
1877                        size_t cnt, loff_t *ppos)
1878 {
1879         char *buf;
1880         int r = 0;
1881         int len = 0;
1882         int i;
1883
1884         /* calulate max size */
1885         for (i = 0; trace_options[i]; i++) {
1886                 len += strlen(trace_options[i]);
1887                 len += 3; /* "no" and space */
1888         }
1889
1890         /* +2 for \n and \0 */
1891         buf = kmalloc(len + 2, GFP_KERNEL);
1892         if (!buf)
1893                 return -ENOMEM;
1894
1895         for (i = 0; trace_options[i]; i++) {
1896                 if (trace_flags & (1 << i))
1897                         r += sprintf(buf + r, "%s ", trace_options[i]);
1898                 else
1899                         r += sprintf(buf + r, "no%s ", trace_options[i]);
1900         }
1901
1902         r += sprintf(buf + r, "\n");
1903         WARN_ON(r >= len + 2);
1904
1905         r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
1906
1907         kfree(buf);
1908
1909         return r;
1910 }
1911
1912 static ssize_t
1913 tracing_iter_ctrl_write(struct file *filp, const char __user *ubuf,
1914                         size_t cnt, loff_t *ppos)
1915 {
1916         char buf[64];
1917         char *cmp = buf;
1918         int neg = 0;
1919         int i;
1920
1921         if (cnt >= sizeof(buf))
1922                 return -EINVAL;
1923
1924         if (copy_from_user(&buf, ubuf, cnt))
1925                 return -EFAULT;
1926
1927         buf[cnt] = 0;
1928
1929         if (strncmp(buf, "no", 2) == 0) {
1930                 neg = 1;
1931                 cmp += 2;
1932         }
1933
1934         for (i = 0; trace_options[i]; i++) {
1935                 int len = strlen(trace_options[i]);
1936
1937                 if (strncmp(cmp, trace_options[i], len) == 0) {
1938                         if (neg)
1939                                 trace_flags &= ~(1 << i);
1940                         else
1941                                 trace_flags |= (1 << i);
1942                         break;
1943                 }
1944         }
1945         /*
1946          * If no option could be set, return an error:
1947          */
1948         if (!trace_options[i])
1949                 return -EINVAL;
1950
1951         filp->f_pos += cnt;
1952
1953         return cnt;
1954 }
1955
1956 static struct file_operations tracing_iter_fops = {
1957         .open           = tracing_open_generic,
1958         .read           = tracing_iter_ctrl_read,
1959         .write          = tracing_iter_ctrl_write,
1960 };
1961
1962 static const char readme_msg[] =
1963         "tracing mini-HOWTO:\n\n"
1964         "# mkdir /debug\n"
1965         "# mount -t debugfs nodev /debug\n\n"
1966         "# cat /debug/tracing/available_tracers\n"
1967         "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n"
1968         "# cat /debug/tracing/current_tracer\n"
1969         "none\n"
1970         "# echo sched_switch > /debug/tracing/current_tracer\n"
1971         "# cat /debug/tracing/current_tracer\n"
1972         "sched_switch\n"
1973         "# cat /debug/tracing/iter_ctrl\n"
1974         "noprint-parent nosym-offset nosym-addr noverbose\n"
1975         "# echo print-parent > /debug/tracing/iter_ctrl\n"
1976         "# echo 1 > /debug/tracing/tracing_enabled\n"
1977         "# cat /debug/tracing/trace > /tmp/trace.txt\n"
1978         "echo 0 > /debug/tracing/tracing_enabled\n"
1979 ;
1980
1981 static ssize_t
1982 tracing_readme_read(struct file *filp, char __user *ubuf,
1983                        size_t cnt, loff_t *ppos)
1984 {
1985         return simple_read_from_buffer(ubuf, cnt, ppos,
1986                                         readme_msg, strlen(readme_msg));
1987 }
1988
1989 static struct file_operations tracing_readme_fops = {
1990         .open           = tracing_open_generic,
1991         .read           = tracing_readme_read,
1992 };
1993
1994 static ssize_t
1995 tracing_ctrl_read(struct file *filp, char __user *ubuf,
1996                   size_t cnt, loff_t *ppos)
1997 {
1998         struct trace_array *tr = filp->private_data;
1999         char buf[64];
2000         int r;
2001
2002         r = sprintf(buf, "%ld\n", tr->ctrl);
2003         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2004 }
2005
2006 static ssize_t
2007 tracing_ctrl_write(struct file *filp, const char __user *ubuf,
2008                    size_t cnt, loff_t *ppos)
2009 {
2010         struct trace_array *tr = filp->private_data;
2011         char buf[64];
2012         long val;
2013         int ret;
2014
2015         if (cnt >= sizeof(buf))
2016                 return -EINVAL;
2017
2018         if (copy_from_user(&buf, ubuf, cnt))
2019                 return -EFAULT;
2020
2021         buf[cnt] = 0;
2022
2023         ret = strict_strtoul(buf, 10, &val);
2024         if (ret < 0)
2025                 return ret;
2026
2027         val = !!val;
2028
2029         mutex_lock(&trace_types_lock);
2030         if (tr->ctrl ^ val) {
2031                 if (val)
2032                         tracer_enabled = 1;
2033                 else
2034                         tracer_enabled = 0;
2035
2036                 tr->ctrl = val;
2037
2038                 if (current_trace && current_trace->ctrl_update)
2039                         current_trace->ctrl_update(tr);
2040         }
2041         mutex_unlock(&trace_types_lock);
2042
2043         filp->f_pos += cnt;
2044
2045         return cnt;
2046 }
2047
2048 static ssize_t
2049 tracing_set_trace_read(struct file *filp, char __user *ubuf,
2050                        size_t cnt, loff_t *ppos)
2051 {
2052         char buf[max_tracer_type_len+2];
2053         int r;
2054
2055         mutex_lock(&trace_types_lock);
2056         if (current_trace)
2057                 r = sprintf(buf, "%s\n", current_trace->name);
2058         else
2059                 r = sprintf(buf, "\n");
2060         mutex_unlock(&trace_types_lock);
2061
2062         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2063 }
2064
2065 static ssize_t
2066 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
2067                         size_t cnt, loff_t *ppos)
2068 {
2069         struct trace_array *tr = &global_trace;
2070         struct tracer *t;
2071         char buf[max_tracer_type_len+1];
2072         int i;
2073
2074         if (cnt > max_tracer_type_len)
2075                 cnt = max_tracer_type_len;
2076
2077         if (copy_from_user(&buf, ubuf, cnt))
2078                 return -EFAULT;
2079
2080         buf[cnt] = 0;
2081
2082         /* strip ending whitespace. */
2083         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
2084                 buf[i] = 0;
2085
2086         mutex_lock(&trace_types_lock);
2087         for (t = trace_types; t; t = t->next) {
2088                 if (strcmp(t->name, buf) == 0)
2089                         break;
2090         }
2091         if (!t || t == current_trace)
2092                 goto out;
2093
2094         if (current_trace && current_trace->reset)
2095                 current_trace->reset(tr);
2096
2097         current_trace = t;
2098         if (t->init)
2099                 t->init(tr);
2100
2101  out:
2102         mutex_unlock(&trace_types_lock);
2103
2104         filp->f_pos += cnt;
2105
2106         return cnt;
2107 }
2108
2109 static ssize_t
2110 tracing_max_lat_read(struct file *filp, char __user *ubuf,
2111                      size_t cnt, loff_t *ppos)
2112 {
2113         unsigned long *ptr = filp->private_data;
2114         char buf[64];
2115         int r;
2116
2117         r = snprintf(buf, sizeof(buf), "%ld\n",
2118                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
2119         if (r > sizeof(buf))
2120                 r = sizeof(buf);
2121         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2122 }
2123
2124 static ssize_t
2125 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
2126                       size_t cnt, loff_t *ppos)
2127 {
2128         long *ptr = filp->private_data;
2129         char buf[64];
2130         long val;
2131         int ret;
2132
2133         if (cnt >= sizeof(buf))
2134                 return -EINVAL;
2135
2136         if (copy_from_user(&buf, ubuf, cnt))
2137                 return -EFAULT;
2138
2139         buf[cnt] = 0;
2140
2141         ret = strict_strtoul(buf, 10, &val);
2142         if (ret < 0)
2143                 return ret;
2144
2145         *ptr = val * 1000;
2146
2147         return cnt;
2148 }
2149
2150 static atomic_t tracing_reader;
2151
2152 static int tracing_open_pipe(struct inode *inode, struct file *filp)
2153 {
2154         struct trace_iterator *iter;
2155
2156         if (tracing_disabled)
2157                 return -ENODEV;
2158
2159         /* We only allow for reader of the pipe */
2160         if (atomic_inc_return(&tracing_reader) != 1) {
2161                 atomic_dec(&tracing_reader);
2162                 return -EBUSY;
2163         }
2164
2165         /* create a buffer to store the information to pass to userspace */
2166         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
2167         if (!iter)
2168                 return -ENOMEM;
2169
2170         iter->tr = &global_trace;
2171         iter->trace = current_trace;
2172
2173         filp->private_data = iter;
2174
2175         return 0;
2176 }
2177
2178 static int tracing_release_pipe(struct inode *inode, struct file *file)
2179 {
2180         struct trace_iterator *iter = file->private_data;
2181
2182         kfree(iter);
2183         atomic_dec(&tracing_reader);
2184
2185         return 0;
2186 }
2187
2188 static unsigned int
2189 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
2190 {
2191         struct trace_iterator *iter = filp->private_data;
2192
2193         if (trace_flags & TRACE_ITER_BLOCK) {
2194                 /*
2195                  * Always select as readable when in blocking mode
2196                  */
2197                 return POLLIN | POLLRDNORM;
2198         } else {
2199                 if (!trace_empty(iter))
2200                         return POLLIN | POLLRDNORM;
2201                 poll_wait(filp, &trace_wait, poll_table);
2202                 if (!trace_empty(iter))
2203                         return POLLIN | POLLRDNORM;
2204
2205                 return 0;
2206         }
2207 }
2208
2209 /*
2210  * Consumer reader.
2211  */
2212 static ssize_t
2213 tracing_read_pipe(struct file *filp, char __user *ubuf,
2214                   size_t cnt, loff_t *ppos)
2215 {
2216         struct trace_iterator *iter = filp->private_data;
2217         struct trace_array_cpu *data;
2218         struct trace_array *tr = iter->tr;
2219         struct tracer *tracer = iter->trace;
2220         static cpumask_t mask;
2221         static int start;
2222         unsigned long flags;
2223 #ifdef CONFIG_FTRACE
2224         int ftrace_save;
2225 #endif
2226         int read = 0;
2227         int cpu;
2228         int len;
2229         int ret;
2230
2231         /* return any leftover data */
2232         if (iter->seq.len > start) {
2233                 len = iter->seq.len - start;
2234                 if (cnt > len)
2235                         cnt = len;
2236                 ret = copy_to_user(ubuf, iter->seq.buffer + start, cnt);
2237                 if (ret)
2238                         cnt = -EFAULT;
2239
2240                 start += len;
2241
2242                 return cnt;
2243         }
2244
2245         trace_seq_reset(&iter->seq);
2246         start = 0;
2247
2248         while (trace_empty(iter)) {
2249
2250                 if ((filp->f_flags & O_NONBLOCK))
2251                         return -EAGAIN;
2252
2253                 /*
2254                  * This is a make-shift waitqueue. The reason we don't use
2255                  * an actual wait queue is because:
2256                  *  1) we only ever have one waiter
2257                  *  2) the tracing, traces all functions, we don't want
2258                  *     the overhead of calling wake_up and friends
2259                  *     (and tracing them too)
2260                  *     Anyway, this is really very primitive wakeup.
2261                  */
2262                 set_current_state(TASK_INTERRUPTIBLE);
2263                 iter->tr->waiter = current;
2264
2265                 /* sleep for one second, and try again. */
2266                 schedule_timeout(HZ);
2267
2268                 iter->tr->waiter = NULL;
2269
2270                 if (signal_pending(current))
2271                         return -EINTR;
2272
2273                 if (iter->trace != current_trace)
2274                         return 0;
2275
2276                 /*
2277                  * We block until we read something and tracing is disabled.
2278                  * We still block if tracing is disabled, but we have never
2279                  * read anything. This allows a user to cat this file, and
2280                  * then enable tracing. But after we have read something,
2281                  * we give an EOF when tracing is again disabled.
2282                  *
2283                  * iter->pos will be 0 if we haven't read anything.
2284                  */
2285                 if (!tracer_enabled && iter->pos)
2286                         break;
2287
2288                 continue;
2289         }
2290
2291         /* stop when tracing is finished */
2292         if (trace_empty(iter))
2293                 return 0;
2294
2295         if (cnt >= PAGE_SIZE)
2296                 cnt = PAGE_SIZE - 1;
2297
2298         memset(iter, 0, sizeof(*iter));
2299         iter->tr = tr;
2300         iter->trace = tracer;
2301         iter->pos = -1;
2302
2303         /*
2304          * We need to stop all tracing on all CPUS to read the
2305          * the next buffer. This is a bit expensive, but is
2306          * not done often. We fill all what we can read,
2307          * and then release the locks again.
2308          */
2309
2310         cpus_clear(mask);
2311         local_irq_save(flags);
2312 #ifdef CONFIG_FTRACE
2313         ftrace_save = ftrace_enabled;
2314         ftrace_enabled = 0;
2315 #endif
2316         smp_wmb();
2317         for_each_tracing_cpu(cpu) {
2318                 data = iter->tr->data[cpu];
2319
2320                 if (!head_page(data) || !data->trace_idx)
2321                         continue;
2322
2323                 atomic_inc(&data->disabled);
2324                 cpu_set(cpu, mask);
2325         }
2326
2327         for_each_cpu_mask(cpu, mask) {
2328                 data = iter->tr->data[cpu];
2329                 __raw_spin_lock(&data->lock);
2330         }
2331
2332         while (find_next_entry_inc(iter) != NULL) {
2333                 int len = iter->seq.len;
2334
2335                 ret = print_trace_line(iter);
2336                 if (!ret) {
2337                         /* don't print partial lines */
2338                         iter->seq.len = len;
2339                         break;
2340                 }
2341
2342                 trace_consume(iter);
2343
2344                 if (iter->seq.len >= cnt)
2345                         break;
2346         }
2347
2348         for_each_cpu_mask(cpu, mask) {
2349                 data = iter->tr->data[cpu];
2350                 __raw_spin_unlock(&data->lock);
2351         }
2352
2353         for_each_cpu_mask(cpu, mask) {
2354                 data = iter->tr->data[cpu];
2355                 atomic_dec(&data->disabled);
2356         }
2357 #ifdef CONFIG_FTRACE
2358         ftrace_enabled = ftrace_save;
2359 #endif
2360         local_irq_restore(flags);
2361
2362         /* Now copy what we have to the user */
2363         read = iter->seq.len;
2364         if (read > cnt)
2365                 read = cnt;
2366
2367         ret = copy_to_user(ubuf, iter->seq.buffer, read);
2368
2369         if (read < iter->seq.len)
2370                 start = read;
2371         else
2372                 trace_seq_reset(&iter->seq);
2373
2374         if (ret)
2375                 read = -EFAULT;
2376
2377         return read;
2378 }
2379
2380 static ssize_t
2381 tracing_entries_read(struct file *filp, char __user *ubuf,
2382                      size_t cnt, loff_t *ppos)
2383 {
2384         struct trace_array *tr = filp->private_data;
2385         char buf[64];
2386         int r;
2387
2388         r = sprintf(buf, "%lu\n", tr->entries);
2389         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2390 }
2391
2392 static ssize_t
2393 tracing_entries_write(struct file *filp, const char __user *ubuf,
2394                       size_t cnt, loff_t *ppos)
2395 {
2396         unsigned long val;
2397         char buf[64];
2398         int ret;
2399
2400         if (cnt >= sizeof(buf))
2401                 return -EINVAL;
2402
2403         if (copy_from_user(&buf, ubuf, cnt))
2404                 return -EFAULT;
2405
2406         buf[cnt] = 0;
2407
2408         ret = strict_strtoul(buf, 10, &val);
2409         if (ret < 0)
2410                 return ret;
2411
2412         /* must have at least 1 entry */
2413         if (!val)
2414                 return -EINVAL;
2415
2416         mutex_lock(&trace_types_lock);
2417
2418         if (current_trace != &no_tracer) {
2419                 cnt = -EBUSY;
2420                 pr_info("ftrace: set current_tracer to none"
2421                         " before modifying buffer size\n");
2422                 goto out;
2423         }
2424
2425         if (val > global_trace.entries) {
2426                 while (global_trace.entries < val) {
2427                         if (trace_alloc_page()) {
2428                                 cnt = -ENOMEM;
2429                                 goto out;
2430                         }
2431                 }
2432         } else {
2433                 /* include the number of entries in val (inc of page entries) */
2434                 while (global_trace.entries > val + (ENTRIES_PER_PAGE - 1))
2435                         trace_free_page();
2436         }
2437
2438         filp->f_pos += cnt;
2439
2440  out:
2441         max_tr.entries = global_trace.entries;
2442         mutex_unlock(&trace_types_lock);
2443
2444         return cnt;
2445 }
2446
2447 static struct file_operations tracing_max_lat_fops = {
2448         .open           = tracing_open_generic,
2449         .read           = tracing_max_lat_read,
2450         .write          = tracing_max_lat_write,
2451 };
2452
2453 static struct file_operations tracing_ctrl_fops = {
2454         .open           = tracing_open_generic,
2455         .read           = tracing_ctrl_read,
2456         .write          = tracing_ctrl_write,
2457 };
2458
2459 static struct file_operations set_tracer_fops = {
2460         .open           = tracing_open_generic,
2461         .read           = tracing_set_trace_read,
2462         .write          = tracing_set_trace_write,
2463 };
2464
2465 static struct file_operations tracing_pipe_fops = {
2466         .open           = tracing_open_pipe,
2467         .poll           = tracing_poll_pipe,
2468         .read           = tracing_read_pipe,
2469         .release        = tracing_release_pipe,
2470 };
2471
2472 static struct file_operations tracing_entries_fops = {
2473         .open           = tracing_open_generic,
2474         .read           = tracing_entries_read,
2475         .write          = tracing_entries_write,
2476 };
2477
2478 #ifdef CONFIG_DYNAMIC_FTRACE
2479
2480 static ssize_t
2481 tracing_read_long(struct file *filp, char __user *ubuf,
2482                   size_t cnt, loff_t *ppos)
2483 {
2484         unsigned long *p = filp->private_data;
2485         char buf[64];
2486         int r;
2487
2488         r = sprintf(buf, "%ld\n", *p);
2489
2490         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
2491 }
2492
2493 static struct file_operations tracing_read_long_fops = {
2494         .open           = tracing_open_generic,
2495         .read           = tracing_read_long,
2496 };
2497 #endif
2498
2499 static struct dentry *d_tracer;
2500
2501 struct dentry *tracing_init_dentry(void)
2502 {
2503         static int once;
2504
2505         if (d_tracer)
2506                 return d_tracer;
2507
2508         d_tracer = debugfs_create_dir("tracing", NULL);
2509
2510         if (!d_tracer && !once) {
2511                 once = 1;
2512                 pr_warning("Could not create debugfs directory 'tracing'\n");
2513                 return NULL;
2514         }
2515
2516         return d_tracer;
2517 }
2518
2519 #ifdef CONFIG_FTRACE_SELFTEST
2520 /* Let selftest have access to static functions in this file */
2521 #include "trace_selftest.c"
2522 #endif
2523
2524 static __init void tracer_init_debugfs(void)
2525 {
2526         struct dentry *d_tracer;
2527         struct dentry *entry;
2528
2529         d_tracer = tracing_init_dentry();
2530
2531         entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
2532                                     &global_trace, &tracing_ctrl_fops);
2533         if (!entry)
2534                 pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
2535
2536         entry = debugfs_create_file("iter_ctrl", 0644, d_tracer,
2537                                     NULL, &tracing_iter_fops);
2538         if (!entry)
2539                 pr_warning("Could not create debugfs 'iter_ctrl' entry\n");
2540
2541         entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
2542                                     NULL, &tracing_cpumask_fops);
2543         if (!entry)
2544                 pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
2545
2546         entry = debugfs_create_file("latency_trace", 0444, d_tracer,
2547                                     &global_trace, &tracing_lt_fops);
2548         if (!entry)
2549                 pr_warning("Could not create debugfs 'latency_trace' entry\n");
2550
2551         entry = debugfs_create_file("trace", 0444, d_tracer,
2552                                     &global_trace, &tracing_fops);
2553         if (!entry)
2554                 pr_warning("Could not create debugfs 'trace' entry\n");
2555
2556         entry = debugfs_create_file("available_tracers", 0444, d_tracer,
2557                                     &global_trace, &show_traces_fops);
2558         if (!entry)
2559                 pr_warning("Could not create debugfs 'trace' entry\n");
2560
2561         entry = debugfs_create_file("current_tracer", 0444, d_tracer,
2562                                     &global_trace, &set_tracer_fops);
2563         if (!entry)
2564                 pr_warning("Could not create debugfs 'trace' entry\n");
2565
2566         entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
2567                                     &tracing_max_latency,
2568                                     &tracing_max_lat_fops);
2569         if (!entry)
2570                 pr_warning("Could not create debugfs "
2571                            "'tracing_max_latency' entry\n");
2572
2573         entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
2574                                     &tracing_thresh, &tracing_max_lat_fops);
2575         if (!entry)
2576                 pr_warning("Could not create debugfs "
2577                            "'tracing_threash' entry\n");
2578         entry = debugfs_create_file("README", 0644, d_tracer,
2579                                     NULL, &tracing_readme_fops);
2580         if (!entry)
2581                 pr_warning("Could not create debugfs 'README' entry\n");
2582
2583         entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
2584                                     NULL, &tracing_pipe_fops);
2585         if (!entry)
2586                 pr_warning("Could not create debugfs "
2587                            "'tracing_threash' entry\n");
2588
2589         entry = debugfs_create_file("trace_entries", 0644, d_tracer,
2590                                     &global_trace, &tracing_entries_fops);
2591         if (!entry)
2592                 pr_warning("Could not create debugfs "
2593                            "'tracing_threash' entry\n");
2594
2595 #ifdef CONFIG_DYNAMIC_FTRACE
2596         entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
2597                                     &ftrace_update_tot_cnt,
2598                                     &tracing_read_long_fops);
2599         if (!entry)
2600                 pr_warning("Could not create debugfs "
2601                            "'dyn_ftrace_total_info' entry\n");
2602 #endif
2603 }
2604
2605 static int trace_alloc_page(void)
2606 {
2607         struct trace_array_cpu *data;
2608         struct page *page, *tmp;
2609         LIST_HEAD(pages);
2610         void *array;
2611         int i;
2612
2613         /* first allocate a page for each CPU */
2614         for_each_tracing_cpu(i) {
2615                 array = (void *)__get_free_page(GFP_KERNEL);
2616                 if (array == NULL) {
2617                         printk(KERN_ERR "tracer: failed to allocate page"
2618                                "for trace buffer!\n");
2619                         goto free_pages;
2620                 }
2621
2622                 page = virt_to_page(array);
2623                 list_add(&page->lru, &pages);
2624
2625 /* Only allocate if we are actually using the max trace */
2626 #ifdef CONFIG_TRACER_MAX_TRACE
2627                 array = (void *)__get_free_page(GFP_KERNEL);
2628                 if (array == NULL) {
2629                         printk(KERN_ERR "tracer: failed to allocate page"
2630                                "for trace buffer!\n");
2631                         goto free_pages;
2632                 }
2633                 page = virt_to_page(array);
2634                 list_add(&page->lru, &pages);
2635 #endif
2636         }
2637
2638         /* Now that we successfully allocate a page per CPU, add them */
2639         for_each_tracing_cpu(i) {
2640                 data = global_trace.data[i];
2641                 page = list_entry(pages.next, struct page, lru);
2642                 list_del_init(&page->lru);
2643                 list_add_tail(&page->lru, &data->trace_pages);
2644                 ClearPageLRU(page);
2645
2646 #ifdef CONFIG_TRACER_MAX_TRACE
2647                 data = max_tr.data[i];
2648                 page = list_entry(pages.next, struct page, lru);
2649                 list_del_init(&page->lru);
2650                 list_add_tail(&page->lru, &data->trace_pages);
2651                 SetPageLRU(page);
2652 #endif
2653         }
2654         global_trace.entries += ENTRIES_PER_PAGE;
2655
2656         return 0;
2657
2658  free_pages:
2659         list_for_each_entry_safe(page, tmp, &pages, lru) {
2660                 list_del_init(&page->lru);
2661                 __free_page(page);
2662         }
2663         return -ENOMEM;
2664 }
2665
2666 static int trace_free_page(void)
2667 {
2668         struct trace_array_cpu *data;
2669         struct page *page;
2670         struct list_head *p;
2671         int i;
2672         int ret = 0;
2673
2674         /* free one page from each buffer */
2675         for_each_tracing_cpu(i) {
2676                 data = global_trace.data[i];
2677                 p = data->trace_pages.next;
2678                 if (p == &data->trace_pages) {
2679                         /* should never happen */
2680                         WARN_ON(1);
2681                         tracing_disabled = 1;
2682                         ret = -1;
2683                         break;
2684                 }
2685                 page = list_entry(p, struct page, lru);
2686                 ClearPageLRU(page);
2687                 list_del(&page->lru);
2688                 __free_page(page);
2689
2690                 tracing_reset(data);
2691
2692 #ifdef CONFIG_TRACER_MAX_TRACE
2693                 data = max_tr.data[i];
2694                 p = data->trace_pages.next;
2695                 if (p == &data->trace_pages) {
2696                         /* should never happen */
2697                         WARN_ON(1);
2698                         tracing_disabled = 1;
2699                         ret = -1;
2700                         break;
2701                 }
2702                 page = list_entry(p, struct page, lru);
2703                 ClearPageLRU(page);
2704                 list_del(&page->lru);
2705                 __free_page(page);
2706
2707                 tracing_reset(data);
2708 #endif
2709         }
2710         global_trace.entries -= ENTRIES_PER_PAGE;
2711
2712         return ret;
2713 }
2714
2715 __init static int tracer_alloc_buffers(void)
2716 {
2717         struct trace_array_cpu *data;
2718         void *array;
2719         struct page *page;
2720         int pages = 0;
2721         int ret = -ENOMEM;
2722         int i;
2723
2724         global_trace.ctrl = tracer_enabled;
2725
2726         /* TODO: make the number of buffers hot pluggable with CPUS */
2727         tracing_nr_buffers = num_possible_cpus();
2728         tracing_buffer_mask = cpu_possible_map;
2729
2730         /* Allocate the first page for all buffers */
2731         for_each_tracing_cpu(i) {
2732                 data = global_trace.data[i] = &per_cpu(global_trace_cpu, i);
2733                 max_tr.data[i] = &per_cpu(max_data, i);
2734
2735                 array = (void *)__get_free_page(GFP_KERNEL);
2736                 if (array == NULL) {
2737                         printk(KERN_ERR "tracer: failed to allocate page"
2738                                "for trace buffer!\n");
2739                         goto free_buffers;
2740                 }
2741
2742                 /* set the array to the list */
2743                 INIT_LIST_HEAD(&data->trace_pages);
2744                 page = virt_to_page(array);
2745                 list_add(&page->lru, &data->trace_pages);
2746                 /* use the LRU flag to differentiate the two buffers */
2747                 ClearPageLRU(page);
2748
2749                 data->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
2750                 max_tr.data[i]->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
2751
2752 /* Only allocate if we are actually using the max trace */
2753 #ifdef CONFIG_TRACER_MAX_TRACE
2754                 array = (void *)__get_free_page(GFP_KERNEL);
2755                 if (array == NULL) {
2756                         printk(KERN_ERR "tracer: failed to allocate page"
2757                                "for trace buffer!\n");
2758                         goto free_buffers;
2759                 }
2760
2761                 INIT_LIST_HEAD(&max_tr.data[i]->trace_pages);
2762                 page = virt_to_page(array);
2763                 list_add(&page->lru, &max_tr.data[i]->trace_pages);
2764                 SetPageLRU(page);
2765 #endif
2766         }
2767
2768         /*
2769          * Since we allocate by orders of pages, we may be able to
2770          * round up a bit.
2771          */
2772         global_trace.entries = ENTRIES_PER_PAGE;
2773         pages++;
2774
2775         while (global_trace.entries < trace_nr_entries) {
2776                 if (trace_alloc_page())
2777                         break;
2778                 pages++;
2779         }
2780         max_tr.entries = global_trace.entries;
2781
2782         pr_info("tracer: %d pages allocated for %ld",
2783                 pages, trace_nr_entries);
2784         pr_info(" entries of %ld bytes\n", (long)TRACE_ENTRY_SIZE);
2785         pr_info("   actual entries %ld\n", global_trace.entries);
2786
2787         tracer_init_debugfs();
2788
2789         trace_init_cmdlines();
2790
2791         register_tracer(&no_tracer);
2792         current_trace = &no_tracer;
2793
2794         /* All seems OK, enable tracing */
2795         tracing_disabled = 0;
2796
2797         return 0;
2798
2799  free_buffers:
2800         for (i-- ; i >= 0; i--) {
2801                 struct page *page, *tmp;
2802                 struct trace_array_cpu *data = global_trace.data[i];
2803
2804                 if (data) {
2805                         list_for_each_entry_safe(page, tmp,
2806                                                  &data->trace_pages, lru) {
2807                                 list_del_init(&page->lru);
2808                                 __free_page(page);
2809                         }
2810                 }
2811
2812 #ifdef CONFIG_TRACER_MAX_TRACE
2813                 data = max_tr.data[i];
2814                 if (data) {
2815                         list_for_each_entry_safe(page, tmp,
2816                                                  &data->trace_pages, lru) {
2817                                 list_del_init(&page->lru);
2818                                 __free_page(page);
2819                         }
2820                 }
2821 #endif
2822         }
2823         return ret;
2824 }
2825 fs_initcall(tracer_alloc_buffers);