]> git.karo-electronics.de Git - mv-sheeva.git/blob - include/linux/perf_event.h
ac636dd20a0c3224763a1c7e8a640bdd7aaa74f0
[mv-sheeva.git] / include / linux / perf_event.h
1 /*
2  * Performance events:
3  *
4  *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
5  *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
6  *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
7  *
8  * Data type definitions, declarations, prototypes.
9  *
10  *    Started by: Thomas Gleixner and Ingo Molnar
11  *
12  * For licencing details see kernel-base/COPYING
13  */
14 #ifndef _LINUX_PERF_EVENT_H
15 #define _LINUX_PERF_EVENT_H
16
17 #include <linux/types.h>
18 #include <linux/ioctl.h>
19 #include <asm/byteorder.h>
20
21 /*
22  * User-space ABI bits:
23  */
24
25 /*
26  * attr.type
27  */
28 enum perf_type_id {
29         PERF_TYPE_HARDWARE                      = 0,
30         PERF_TYPE_SOFTWARE                      = 1,
31         PERF_TYPE_TRACEPOINT                    = 2,
32         PERF_TYPE_HW_CACHE                      = 3,
33         PERF_TYPE_RAW                           = 4,
34         PERF_TYPE_BREAKPOINT                    = 5,
35
36         PERF_TYPE_MAX,                          /* non-ABI */
37 };
38
39 /*
40  * Generalized performance event event_id types, used by the
41  * attr.event_id parameter of the sys_perf_event_open()
42  * syscall:
43  */
44 enum perf_hw_id {
45         /*
46          * Common hardware events, generalized by the kernel:
47          */
48         PERF_COUNT_HW_CPU_CYCLES                = 0,
49         PERF_COUNT_HW_INSTRUCTIONS              = 1,
50         PERF_COUNT_HW_CACHE_REFERENCES          = 2,
51         PERF_COUNT_HW_CACHE_MISSES              = 3,
52         PERF_COUNT_HW_BRANCH_INSTRUCTIONS       = 4,
53         PERF_COUNT_HW_BRANCH_MISSES             = 5,
54         PERF_COUNT_HW_BUS_CYCLES                = 6,
55         PERF_COUNT_HW_STALLED_CYCLES            = 7,
56
57         PERF_COUNT_HW_MAX,                      /* non-ABI */
58 };
59
60 /*
61  * Generalized hardware cache events:
62  *
63  *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
64  *       { read, write, prefetch } x
65  *       { accesses, misses }
66  */
67 enum perf_hw_cache_id {
68         PERF_COUNT_HW_CACHE_L1D                 = 0,
69         PERF_COUNT_HW_CACHE_L1I                 = 1,
70         PERF_COUNT_HW_CACHE_LL                  = 2,
71         PERF_COUNT_HW_CACHE_DTLB                = 3,
72         PERF_COUNT_HW_CACHE_ITLB                = 4,
73         PERF_COUNT_HW_CACHE_BPU                 = 5,
74
75         PERF_COUNT_HW_CACHE_MAX,                /* non-ABI */
76 };
77
78 enum perf_hw_cache_op_id {
79         PERF_COUNT_HW_CACHE_OP_READ             = 0,
80         PERF_COUNT_HW_CACHE_OP_WRITE            = 1,
81         PERF_COUNT_HW_CACHE_OP_PREFETCH         = 2,
82
83         PERF_COUNT_HW_CACHE_OP_MAX,             /* non-ABI */
84 };
85
86 enum perf_hw_cache_op_result_id {
87         PERF_COUNT_HW_CACHE_RESULT_ACCESS       = 0,
88         PERF_COUNT_HW_CACHE_RESULT_MISS         = 1,
89
90         PERF_COUNT_HW_CACHE_RESULT_MAX,         /* non-ABI */
91 };
92
93 /*
94  * Special "software" events provided by the kernel, even if the hardware
95  * does not support performance events. These events measure various
96  * physical and sw events of the kernel (and allow the profiling of them as
97  * well):
98  */
99 enum perf_sw_ids {
100         PERF_COUNT_SW_CPU_CLOCK                 = 0,
101         PERF_COUNT_SW_TASK_CLOCK                = 1,
102         PERF_COUNT_SW_PAGE_FAULTS               = 2,
103         PERF_COUNT_SW_CONTEXT_SWITCHES          = 3,
104         PERF_COUNT_SW_CPU_MIGRATIONS            = 4,
105         PERF_COUNT_SW_PAGE_FAULTS_MIN           = 5,
106         PERF_COUNT_SW_PAGE_FAULTS_MAJ           = 6,
107         PERF_COUNT_SW_ALIGNMENT_FAULTS          = 7,
108         PERF_COUNT_SW_EMULATION_FAULTS          = 8,
109
110         PERF_COUNT_SW_MAX,                      /* non-ABI */
111 };
112
113 /*
114  * Bits that can be set in attr.sample_type to request information
115  * in the overflow packets.
116  */
117 enum perf_event_sample_format {
118         PERF_SAMPLE_IP                          = 1U << 0,
119         PERF_SAMPLE_TID                         = 1U << 1,
120         PERF_SAMPLE_TIME                        = 1U << 2,
121         PERF_SAMPLE_ADDR                        = 1U << 3,
122         PERF_SAMPLE_READ                        = 1U << 4,
123         PERF_SAMPLE_CALLCHAIN                   = 1U << 5,
124         PERF_SAMPLE_ID                          = 1U << 6,
125         PERF_SAMPLE_CPU                         = 1U << 7,
126         PERF_SAMPLE_PERIOD                      = 1U << 8,
127         PERF_SAMPLE_STREAM_ID                   = 1U << 9,
128         PERF_SAMPLE_RAW                         = 1U << 10,
129
130         PERF_SAMPLE_MAX = 1U << 11,             /* non-ABI */
131 };
132
133 /*
134  * The format of the data returned by read() on a perf event fd,
135  * as specified by attr.read_format:
136  *
137  * struct read_format {
138  *      { u64           value;
139  *        { u64         time_enabled; } && PERF_FORMAT_ENABLED
140  *        { u64         time_running; } && PERF_FORMAT_RUNNING
141  *        { u64         id;           } && PERF_FORMAT_ID
142  *      } && !PERF_FORMAT_GROUP
143  *
144  *      { u64           nr;
145  *        { u64         time_enabled; } && PERF_FORMAT_ENABLED
146  *        { u64         time_running; } && PERF_FORMAT_RUNNING
147  *        { u64         value;
148  *          { u64       id;           } && PERF_FORMAT_ID
149  *        }             cntr[nr];
150  *      } && PERF_FORMAT_GROUP
151  * };
152  */
153 enum perf_event_read_format {
154         PERF_FORMAT_TOTAL_TIME_ENABLED          = 1U << 0,
155         PERF_FORMAT_TOTAL_TIME_RUNNING          = 1U << 1,
156         PERF_FORMAT_ID                          = 1U << 2,
157         PERF_FORMAT_GROUP                       = 1U << 3,
158
159         PERF_FORMAT_MAX = 1U << 4,              /* non-ABI */
160 };
161
162 #define PERF_ATTR_SIZE_VER0     64      /* sizeof first published struct */
163
164 /*
165  * Hardware event_id to monitor via a performance monitoring event:
166  */
167 struct perf_event_attr {
168
169         /*
170          * Major type: hardware/software/tracepoint/etc.
171          */
172         __u32                   type;
173
174         /*
175          * Size of the attr structure, for fwd/bwd compat.
176          */
177         __u32                   size;
178
179         /*
180          * Type specific configuration information.
181          */
182         __u64                   config;
183
184         union {
185                 __u64           sample_period;
186                 __u64           sample_freq;
187         };
188
189         __u64                   sample_type;
190         __u64                   read_format;
191
192         __u64                   disabled       :  1, /* off by default        */
193                                 inherit        :  1, /* children inherit it   */
194                                 pinned         :  1, /* must always be on PMU */
195                                 exclusive      :  1, /* only group on PMU     */
196                                 exclude_user   :  1, /* don't count user      */
197                                 exclude_kernel :  1, /* ditto kernel          */
198                                 exclude_hv     :  1, /* ditto hypervisor      */
199                                 exclude_idle   :  1, /* don't count when idle */
200                                 mmap           :  1, /* include mmap data     */
201                                 comm           :  1, /* include comm data     */
202                                 freq           :  1, /* use freq, not period  */
203                                 inherit_stat   :  1, /* per task counts       */
204                                 enable_on_exec :  1, /* next exec enables     */
205                                 task           :  1, /* trace fork/exit       */
206                                 watermark      :  1, /* wakeup_watermark      */
207                                 /*
208                                  * precise_ip:
209                                  *
210                                  *  0 - SAMPLE_IP can have arbitrary skid
211                                  *  1 - SAMPLE_IP must have constant skid
212                                  *  2 - SAMPLE_IP requested to have 0 skid
213                                  *  3 - SAMPLE_IP must have 0 skid
214                                  *
215                                  *  See also PERF_RECORD_MISC_EXACT_IP
216                                  */
217                                 precise_ip     :  2, /* skid constraint       */
218                                 mmap_data      :  1, /* non-exec mmap data    */
219                                 sample_id_all  :  1, /* sample_type all events */
220
221                                 __reserved_1   : 45;
222
223         union {
224                 __u32           wakeup_events;    /* wakeup every n events */
225                 __u32           wakeup_watermark; /* bytes before wakeup   */
226         };
227
228         __u32                   bp_type;
229         union {
230                 __u64           bp_addr;
231                 __u64           config1; /* extension of config */
232         };
233         union {
234                 __u64           bp_len;
235                 __u64           config2; /* extension of config1 */
236         };
237 };
238
239 /*
240  * Ioctls that can be done on a perf event fd:
241  */
242 #define PERF_EVENT_IOC_ENABLE           _IO ('$', 0)
243 #define PERF_EVENT_IOC_DISABLE          _IO ('$', 1)
244 #define PERF_EVENT_IOC_REFRESH          _IO ('$', 2)
245 #define PERF_EVENT_IOC_RESET            _IO ('$', 3)
246 #define PERF_EVENT_IOC_PERIOD           _IOW('$', 4, __u64)
247 #define PERF_EVENT_IOC_SET_OUTPUT       _IO ('$', 5)
248 #define PERF_EVENT_IOC_SET_FILTER       _IOW('$', 6, char *)
249
250 enum perf_event_ioc_flags {
251         PERF_IOC_FLAG_GROUP             = 1U << 0,
252 };
253
254 /*
255  * Structure of the page that can be mapped via mmap
256  */
257 struct perf_event_mmap_page {
258         __u32   version;                /* version number of this structure */
259         __u32   compat_version;         /* lowest version this is compat with */
260
261         /*
262          * Bits needed to read the hw events in user-space.
263          *
264          *   u32 seq;
265          *   s64 count;
266          *
267          *   do {
268          *     seq = pc->lock;
269          *
270          *     barrier()
271          *     if (pc->index) {
272          *       count = pmc_read(pc->index - 1);
273          *       count += pc->offset;
274          *     } else
275          *       goto regular_read;
276          *
277          *     barrier();
278          *   } while (pc->lock != seq);
279          *
280          * NOTE: for obvious reason this only works on self-monitoring
281          *       processes.
282          */
283         __u32   lock;                   /* seqlock for synchronization */
284         __u32   index;                  /* hardware event identifier */
285         __s64   offset;                 /* add to hardware event value */
286         __u64   time_enabled;           /* time event active */
287         __u64   time_running;           /* time event on cpu */
288
289                 /*
290                  * Hole for extension of the self monitor capabilities
291                  */
292
293         __u64   __reserved[123];        /* align to 1k */
294
295         /*
296          * Control data for the mmap() data buffer.
297          *
298          * User-space reading the @data_head value should issue an rmb(), on
299          * SMP capable platforms, after reading this value -- see
300          * perf_event_wakeup().
301          *
302          * When the mapping is PROT_WRITE the @data_tail value should be
303          * written by userspace to reflect the last read data. In this case
304          * the kernel will not over-write unread data.
305          */
306         __u64   data_head;              /* head in the data section */
307         __u64   data_tail;              /* user-space written tail */
308 };
309
310 #define PERF_RECORD_MISC_CPUMODE_MASK           (7 << 0)
311 #define PERF_RECORD_MISC_CPUMODE_UNKNOWN        (0 << 0)
312 #define PERF_RECORD_MISC_KERNEL                 (1 << 0)
313 #define PERF_RECORD_MISC_USER                   (2 << 0)
314 #define PERF_RECORD_MISC_HYPERVISOR             (3 << 0)
315 #define PERF_RECORD_MISC_GUEST_KERNEL           (4 << 0)
316 #define PERF_RECORD_MISC_GUEST_USER             (5 << 0)
317
318 /*
319  * Indicates that the content of PERF_SAMPLE_IP points to
320  * the actual instruction that triggered the event. See also
321  * perf_event_attr::precise_ip.
322  */
323 #define PERF_RECORD_MISC_EXACT_IP               (1 << 14)
324 /*
325  * Reserve the last bit to indicate some extended misc field
326  */
327 #define PERF_RECORD_MISC_EXT_RESERVED           (1 << 15)
328
329 struct perf_event_header {
330         __u32   type;
331         __u16   misc;
332         __u16   size;
333 };
334
335 enum perf_event_type {
336
337         /*
338          * If perf_event_attr.sample_id_all is set then all event types will
339          * have the sample_type selected fields related to where/when
340          * (identity) an event took place (TID, TIME, ID, CPU, STREAM_ID)
341          * described in PERF_RECORD_SAMPLE below, it will be stashed just after
342          * the perf_event_header and the fields already present for the existing
343          * fields, i.e. at the end of the payload. That way a newer perf.data
344          * file will be supported by older perf tools, with these new optional
345          * fields being ignored.
346          *
347          * The MMAP events record the PROT_EXEC mappings so that we can
348          * correlate userspace IPs to code. They have the following structure:
349          *
350          * struct {
351          *      struct perf_event_header        header;
352          *
353          *      u32                             pid, tid;
354          *      u64                             addr;
355          *      u64                             len;
356          *      u64                             pgoff;
357          *      char                            filename[];
358          * };
359          */
360         PERF_RECORD_MMAP                        = 1,
361
362         /*
363          * struct {
364          *      struct perf_event_header        header;
365          *      u64                             id;
366          *      u64                             lost;
367          * };
368          */
369         PERF_RECORD_LOST                        = 2,
370
371         /*
372          * struct {
373          *      struct perf_event_header        header;
374          *
375          *      u32                             pid, tid;
376          *      char                            comm[];
377          * };
378          */
379         PERF_RECORD_COMM                        = 3,
380
381         /*
382          * struct {
383          *      struct perf_event_header        header;
384          *      u32                             pid, ppid;
385          *      u32                             tid, ptid;
386          *      u64                             time;
387          * };
388          */
389         PERF_RECORD_EXIT                        = 4,
390
391         /*
392          * struct {
393          *      struct perf_event_header        header;
394          *      u64                             time;
395          *      u64                             id;
396          *      u64                             stream_id;
397          * };
398          */
399         PERF_RECORD_THROTTLE                    = 5,
400         PERF_RECORD_UNTHROTTLE                  = 6,
401
402         /*
403          * struct {
404          *      struct perf_event_header        header;
405          *      u32                             pid, ppid;
406          *      u32                             tid, ptid;
407          *      u64                             time;
408          * };
409          */
410         PERF_RECORD_FORK                        = 7,
411
412         /*
413          * struct {
414          *      struct perf_event_header        header;
415          *      u32                             pid, tid;
416          *
417          *      struct read_format              values;
418          * };
419          */
420         PERF_RECORD_READ                        = 8,
421
422         /*
423          * struct {
424          *      struct perf_event_header        header;
425          *
426          *      { u64                   ip;       } && PERF_SAMPLE_IP
427          *      { u32                   pid, tid; } && PERF_SAMPLE_TID
428          *      { u64                   time;     } && PERF_SAMPLE_TIME
429          *      { u64                   addr;     } && PERF_SAMPLE_ADDR
430          *      { u64                   id;       } && PERF_SAMPLE_ID
431          *      { u64                   stream_id;} && PERF_SAMPLE_STREAM_ID
432          *      { u32                   cpu, res; } && PERF_SAMPLE_CPU
433          *      { u64                   period;   } && PERF_SAMPLE_PERIOD
434          *
435          *      { struct read_format    values;   } && PERF_SAMPLE_READ
436          *
437          *      { u64                   nr,
438          *        u64                   ips[nr];  } && PERF_SAMPLE_CALLCHAIN
439          *
440          *      #
441          *      # The RAW record below is opaque data wrt the ABI
442          *      #
443          *      # That is, the ABI doesn't make any promises wrt to
444          *      # the stability of its content, it may vary depending
445          *      # on event, hardware, kernel version and phase of
446          *      # the moon.
447          *      #
448          *      # In other words, PERF_SAMPLE_RAW contents are not an ABI.
449          *      #
450          *
451          *      { u32                   size;
452          *        char                  data[size];}&& PERF_SAMPLE_RAW
453          * };
454          */
455         PERF_RECORD_SAMPLE                      = 9,
456
457         PERF_RECORD_MAX,                        /* non-ABI */
458 };
459
460 enum perf_callchain_context {
461         PERF_CONTEXT_HV                 = (__u64)-32,
462         PERF_CONTEXT_KERNEL             = (__u64)-128,
463         PERF_CONTEXT_USER               = (__u64)-512,
464
465         PERF_CONTEXT_GUEST              = (__u64)-2048,
466         PERF_CONTEXT_GUEST_KERNEL       = (__u64)-2176,
467         PERF_CONTEXT_GUEST_USER         = (__u64)-2560,
468
469         PERF_CONTEXT_MAX                = (__u64)-4095,
470 };
471
472 #define PERF_FLAG_FD_NO_GROUP   (1U << 0)
473 #define PERF_FLAG_FD_OUTPUT     (1U << 1)
474 #define PERF_FLAG_PID_CGROUP    (1U << 2) /* pid=cgroup id, per-cpu mode only */
475
476 #ifdef __KERNEL__
477 /*
478  * Kernel-internal data types and definitions:
479  */
480
481 #ifdef CONFIG_PERF_EVENTS
482 # include <linux/cgroup.h>
483 # include <asm/perf_event.h>
484 # include <asm/local64.h>
485 #endif
486
487 struct perf_guest_info_callbacks {
488         int (*is_in_guest) (void);
489         int (*is_user_mode) (void);
490         unsigned long (*get_guest_ip) (void);
491 };
492
493 #ifdef CONFIG_HAVE_HW_BREAKPOINT
494 #include <asm/hw_breakpoint.h>
495 #endif
496
497 #include <linux/list.h>
498 #include <linux/mutex.h>
499 #include <linux/rculist.h>
500 #include <linux/rcupdate.h>
501 #include <linux/spinlock.h>
502 #include <linux/hrtimer.h>
503 #include <linux/fs.h>
504 #include <linux/pid_namespace.h>
505 #include <linux/workqueue.h>
506 #include <linux/ftrace.h>
507 #include <linux/cpu.h>
508 #include <linux/irq_work.h>
509 #include <linux/jump_label_ref.h>
510 #include <asm/atomic.h>
511 #include <asm/local.h>
512
513 #define PERF_MAX_STACK_DEPTH            255
514
515 struct perf_callchain_entry {
516         __u64                           nr;
517         __u64                           ip[PERF_MAX_STACK_DEPTH];
518 };
519
520 struct perf_raw_record {
521         u32                             size;
522         void                            *data;
523 };
524
525 struct perf_branch_entry {
526         __u64                           from;
527         __u64                           to;
528         __u64                           flags;
529 };
530
531 struct perf_branch_stack {
532         __u64                           nr;
533         struct perf_branch_entry        entries[0];
534 };
535
536 struct task_struct;
537
538 /**
539  * struct hw_perf_event - performance event hardware details:
540  */
541 struct hw_perf_event {
542 #ifdef CONFIG_PERF_EVENTS
543         union {
544                 struct { /* hardware */
545                         u64             config;
546                         u64             last_tag;
547                         unsigned long   config_base;
548                         unsigned long   event_base;
549                         int             idx;
550                         int             last_cpu;
551                         unsigned int    extra_reg;
552                         u64             extra_config;
553                         int             extra_alloc;
554                 };
555                 struct { /* software */
556                         struct hrtimer  hrtimer;
557                 };
558 #ifdef CONFIG_HAVE_HW_BREAKPOINT
559                 struct { /* breakpoint */
560                         struct arch_hw_breakpoint       info;
561                         struct list_head                bp_list;
562                         /*
563                          * Crufty hack to avoid the chicken and egg
564                          * problem hw_breakpoint has with context
565                          * creation and event initalization.
566                          */
567                         struct task_struct              *bp_target;
568                 };
569 #endif
570         };
571         int                             state;
572         local64_t                       prev_count;
573         u64                             sample_period;
574         u64                             last_period;
575         local64_t                       period_left;
576         u64                             interrupts;
577
578         u64                             freq_time_stamp;
579         u64                             freq_count_stamp;
580 #endif
581 };
582
583 /*
584  * hw_perf_event::state flags
585  */
586 #define PERF_HES_STOPPED        0x01 /* the counter is stopped */
587 #define PERF_HES_UPTODATE       0x02 /* event->count up-to-date */
588 #define PERF_HES_ARCH           0x04
589
590 struct perf_event;
591
592 /*
593  * Common implementation detail of pmu::{start,commit,cancel}_txn
594  */
595 #define PERF_EVENT_TXN 0x1
596
597 /**
598  * struct pmu - generic performance monitoring unit
599  */
600 struct pmu {
601         struct list_head                entry;
602
603         struct device                   *dev;
604         char                            *name;
605         int                             type;
606
607         int * __percpu                  pmu_disable_count;
608         struct perf_cpu_context * __percpu pmu_cpu_context;
609         int                             task_ctx_nr;
610
611         /*
612          * Fully disable/enable this PMU, can be used to protect from the PMI
613          * as well as for lazy/batch writing of the MSRs.
614          */
615         void (*pmu_enable)              (struct pmu *pmu); /* optional */
616         void (*pmu_disable)             (struct pmu *pmu); /* optional */
617
618         /*
619          * Try and initialize the event for this PMU.
620          * Should return -ENOENT when the @event doesn't match this PMU.
621          */
622         int (*event_init)               (struct perf_event *event);
623
624 #define PERF_EF_START   0x01            /* start the counter when adding    */
625 #define PERF_EF_RELOAD  0x02            /* reload the counter when starting */
626 #define PERF_EF_UPDATE  0x04            /* update the counter when stopping */
627
628         /*
629          * Adds/Removes a counter to/from the PMU, can be done inside
630          * a transaction, see the ->*_txn() methods.
631          */
632         int  (*add)                     (struct perf_event *event, int flags);
633         void (*del)                     (struct perf_event *event, int flags);
634
635         /*
636          * Starts/Stops a counter present on the PMU. The PMI handler
637          * should stop the counter when perf_event_overflow() returns
638          * !0. ->start() will be used to continue.
639          */
640         void (*start)                   (struct perf_event *event, int flags);
641         void (*stop)                    (struct perf_event *event, int flags);
642
643         /*
644          * Updates the counter value of the event.
645          */
646         void (*read)                    (struct perf_event *event);
647
648         /*
649          * Group events scheduling is treated as a transaction, add
650          * group events as a whole and perform one schedulability test.
651          * If the test fails, roll back the whole group
652          *
653          * Start the transaction, after this ->add() doesn't need to
654          * do schedulability tests.
655          */
656         void (*start_txn)       (struct pmu *pmu); /* optional */
657         /*
658          * If ->start_txn() disabled the ->add() schedulability test
659          * then ->commit_txn() is required to perform one. On success
660          * the transaction is closed. On error the transaction is kept
661          * open until ->cancel_txn() is called.
662          */
663         int  (*commit_txn)      (struct pmu *pmu); /* optional */
664         /*
665          * Will cancel the transaction, assumes ->del() is called
666          * for each successful ->add() during the transaction.
667          */
668         void (*cancel_txn)      (struct pmu *pmu); /* optional */
669 };
670
671 /**
672  * enum perf_event_active_state - the states of a event
673  */
674 enum perf_event_active_state {
675         PERF_EVENT_STATE_ERROR          = -2,
676         PERF_EVENT_STATE_OFF            = -1,
677         PERF_EVENT_STATE_INACTIVE       =  0,
678         PERF_EVENT_STATE_ACTIVE         =  1,
679 };
680
681 struct file;
682
683 #define PERF_BUFFER_WRITABLE            0x01
684
685 struct perf_buffer {
686         atomic_t                        refcount;
687         struct rcu_head                 rcu_head;
688 #ifdef CONFIG_PERF_USE_VMALLOC
689         struct work_struct              work;
690         int                             page_order;     /* allocation order  */
691 #endif
692         int                             nr_pages;       /* nr of data pages  */
693         int                             writable;       /* are we writable   */
694
695         atomic_t                        poll;           /* POLL_ for wakeups */
696
697         local_t                         head;           /* write position    */
698         local_t                         nest;           /* nested writers    */
699         local_t                         events;         /* event limit       */
700         local_t                         wakeup;         /* wakeup stamp      */
701         local_t                         lost;           /* nr records lost   */
702
703         long                            watermark;      /* wakeup watermark  */
704
705         struct perf_event_mmap_page     *user_page;
706         void                            *data_pages[0];
707 };
708
709 struct perf_sample_data;
710
711 typedef void (*perf_overflow_handler_t)(struct perf_event *, int,
712                                         struct perf_sample_data *,
713                                         struct pt_regs *regs);
714
715 enum perf_group_flag {
716         PERF_GROUP_SOFTWARE = 0x1,
717 };
718
719 #define SWEVENT_HLIST_BITS      8
720 #define SWEVENT_HLIST_SIZE      (1 << SWEVENT_HLIST_BITS)
721
722 struct swevent_hlist {
723         struct hlist_head       heads[SWEVENT_HLIST_SIZE];
724         struct rcu_head         rcu_head;
725 };
726
727 #define PERF_ATTACH_CONTEXT     0x01
728 #define PERF_ATTACH_GROUP       0x02
729 #define PERF_ATTACH_TASK        0x04
730
731 #ifdef CONFIG_CGROUP_PERF
732 /*
733  * perf_cgroup_info keeps track of time_enabled for a cgroup.
734  * This is a per-cpu dynamically allocated data structure.
735  */
736 struct perf_cgroup_info {
737         u64 time;
738         u64 timestamp;
739 };
740
741 struct perf_cgroup {
742         struct cgroup_subsys_state css;
743         struct perf_cgroup_info *info;  /* timing info, one per cpu */
744 };
745 #endif
746
747 /**
748  * struct perf_event - performance event kernel representation:
749  */
750 struct perf_event {
751 #ifdef CONFIG_PERF_EVENTS
752         struct list_head                group_entry;
753         struct list_head                event_entry;
754         struct list_head                sibling_list;
755         struct hlist_node               hlist_entry;
756         int                             nr_siblings;
757         int                             group_flags;
758         struct perf_event               *group_leader;
759         struct pmu                      *pmu;
760
761         enum perf_event_active_state    state;
762         unsigned int                    attach_state;
763         local64_t                       count;
764         atomic64_t                      child_count;
765
766         /*
767          * These are the total time in nanoseconds that the event
768          * has been enabled (i.e. eligible to run, and the task has
769          * been scheduled in, if this is a per-task event)
770          * and running (scheduled onto the CPU), respectively.
771          *
772          * They are computed from tstamp_enabled, tstamp_running and
773          * tstamp_stopped when the event is in INACTIVE or ACTIVE state.
774          */
775         u64                             total_time_enabled;
776         u64                             total_time_running;
777
778         /*
779          * These are timestamps used for computing total_time_enabled
780          * and total_time_running when the event is in INACTIVE or
781          * ACTIVE state, measured in nanoseconds from an arbitrary point
782          * in time.
783          * tstamp_enabled: the notional time when the event was enabled
784          * tstamp_running: the notional time when the event was scheduled on
785          * tstamp_stopped: in INACTIVE state, the notional time when the
786          *      event was scheduled off.
787          */
788         u64                             tstamp_enabled;
789         u64                             tstamp_running;
790         u64                             tstamp_stopped;
791
792         /*
793          * timestamp shadows the actual context timing but it can
794          * be safely used in NMI interrupt context. It reflects the
795          * context time as it was when the event was last scheduled in.
796          *
797          * ctx_time already accounts for ctx->timestamp. Therefore to
798          * compute ctx_time for a sample, simply add perf_clock().
799          */
800         u64                             shadow_ctx_time;
801
802         struct perf_event_attr          attr;
803         u16                             header_size;
804         u16                             id_header_size;
805         u16                             read_size;
806         struct hw_perf_event            hw;
807
808         struct perf_event_context       *ctx;
809         struct file                     *filp;
810
811         /*
812          * These accumulate total time (in nanoseconds) that children
813          * events have been enabled and running, respectively.
814          */
815         atomic64_t                      child_total_time_enabled;
816         atomic64_t                      child_total_time_running;
817
818         /*
819          * Protect attach/detach and child_list:
820          */
821         struct mutex                    child_mutex;
822         struct list_head                child_list;
823         struct perf_event               *parent;
824
825         int                             oncpu;
826         int                             cpu;
827
828         struct list_head                owner_entry;
829         struct task_struct              *owner;
830
831         /* mmap bits */
832         struct mutex                    mmap_mutex;
833         atomic_t                        mmap_count;
834         int                             mmap_locked;
835         struct user_struct              *mmap_user;
836         struct perf_buffer              *buffer;
837
838         /* poll related */
839         wait_queue_head_t               waitq;
840         struct fasync_struct            *fasync;
841
842         /* delayed work for NMIs and such */
843         int                             pending_wakeup;
844         int                             pending_kill;
845         int                             pending_disable;
846         struct irq_work                 pending;
847
848         atomic_t                        event_limit;
849
850         void (*destroy)(struct perf_event *);
851         struct rcu_head                 rcu_head;
852
853         struct pid_namespace            *ns;
854         u64                             id;
855
856         perf_overflow_handler_t         overflow_handler;
857
858 #ifdef CONFIG_EVENT_TRACING
859         struct ftrace_event_call        *tp_event;
860         struct event_filter             *filter;
861 #endif
862
863 #ifdef CONFIG_CGROUP_PERF
864         struct perf_cgroup              *cgrp; /* cgroup event is attach to */
865         int                             cgrp_defer_enabled;
866 #endif
867
868 #endif /* CONFIG_PERF_EVENTS */
869 };
870
871 enum perf_event_context_type {
872         task_context,
873         cpu_context,
874 };
875
876 /**
877  * struct perf_event_context - event context structure
878  *
879  * Used as a container for task events and CPU events as well:
880  */
881 struct perf_event_context {
882         struct pmu                      *pmu;
883         enum perf_event_context_type    type;
884         /*
885          * Protect the states of the events in the list,
886          * nr_active, and the list:
887          */
888         raw_spinlock_t                  lock;
889         /*
890          * Protect the list of events.  Locking either mutex or lock
891          * is sufficient to ensure the list doesn't change; to change
892          * the list you need to lock both the mutex and the spinlock.
893          */
894         struct mutex                    mutex;
895
896         struct list_head                pinned_groups;
897         struct list_head                flexible_groups;
898         struct list_head                event_list;
899         int                             nr_events;
900         int                             nr_active;
901         int                             is_active;
902         int                             nr_stat;
903         int                             rotate_disable;
904         atomic_t                        refcount;
905         struct task_struct              *task;
906
907         /*
908          * Context clock, runs when context enabled.
909          */
910         u64                             time;
911         u64                             timestamp;
912
913         /*
914          * These fields let us detect when two contexts have both
915          * been cloned (inherited) from a common ancestor.
916          */
917         struct perf_event_context       *parent_ctx;
918         u64                             parent_gen;
919         u64                             generation;
920         int                             pin_count;
921         struct rcu_head                 rcu_head;
922         int                             nr_cgroups; /* cgroup events present */
923 };
924
925 /*
926  * Number of contexts where an event can trigger:
927  *      task, softirq, hardirq, nmi.
928  */
929 #define PERF_NR_CONTEXTS        4
930
931 /**
932  * struct perf_event_cpu_context - per cpu event context structure
933  */
934 struct perf_cpu_context {
935         struct perf_event_context       ctx;
936         struct perf_event_context       *task_ctx;
937         int                             active_oncpu;
938         int                             exclusive;
939         struct list_head                rotation_list;
940         int                             jiffies_interval;
941         struct pmu                      *active_pmu;
942         struct perf_cgroup              *cgrp;
943 };
944
945 struct perf_output_handle {
946         struct perf_event               *event;
947         struct perf_buffer              *buffer;
948         unsigned long                   wakeup;
949         unsigned long                   size;
950         void                            *addr;
951         int                             page;
952         int                             nmi;
953         int                             sample;
954 };
955
956 #ifdef CONFIG_PERF_EVENTS
957
958 extern int perf_pmu_register(struct pmu *pmu, char *name, int type);
959 extern void perf_pmu_unregister(struct pmu *pmu);
960
961 extern int perf_num_counters(void);
962 extern const char *perf_pmu_name(void);
963 extern void __perf_event_task_sched_in(struct task_struct *task);
964 extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
965 extern int perf_event_init_task(struct task_struct *child);
966 extern void perf_event_exit_task(struct task_struct *child);
967 extern void perf_event_free_task(struct task_struct *task);
968 extern void perf_event_delayed_put(struct task_struct *task);
969 extern void perf_event_print_debug(void);
970 extern void perf_pmu_disable(struct pmu *pmu);
971 extern void perf_pmu_enable(struct pmu *pmu);
972 extern int perf_event_task_disable(void);
973 extern int perf_event_task_enable(void);
974 extern void perf_event_update_userpage(struct perf_event *event);
975 extern int perf_event_release_kernel(struct perf_event *event);
976 extern struct perf_event *
977 perf_event_create_kernel_counter(struct perf_event_attr *attr,
978                                 int cpu,
979                                 struct task_struct *task,
980                                 perf_overflow_handler_t callback);
981 extern u64 perf_event_read_value(struct perf_event *event,
982                                  u64 *enabled, u64 *running);
983
984 struct perf_sample_data {
985         u64                             type;
986
987         u64                             ip;
988         struct {
989                 u32     pid;
990                 u32     tid;
991         }                               tid_entry;
992         u64                             time;
993         u64                             addr;
994         u64                             id;
995         u64                             stream_id;
996         struct {
997                 u32     cpu;
998                 u32     reserved;
999         }                               cpu_entry;
1000         u64                             period;
1001         struct perf_callchain_entry     *callchain;
1002         struct perf_raw_record          *raw;
1003 };
1004
1005 static inline
1006 void perf_sample_data_init(struct perf_sample_data *data, u64 addr)
1007 {
1008         data->addr = addr;
1009         data->raw  = NULL;
1010 }
1011
1012 extern void perf_output_sample(struct perf_output_handle *handle,
1013                                struct perf_event_header *header,
1014                                struct perf_sample_data *data,
1015                                struct perf_event *event);
1016 extern void perf_prepare_sample(struct perf_event_header *header,
1017                                 struct perf_sample_data *data,
1018                                 struct perf_event *event,
1019                                 struct pt_regs *regs);
1020
1021 extern int perf_event_overflow(struct perf_event *event, int nmi,
1022                                  struct perf_sample_data *data,
1023                                  struct pt_regs *regs);
1024
1025 static inline bool is_sampling_event(struct perf_event *event)
1026 {
1027         return event->attr.sample_period != 0;
1028 }
1029
1030 /*
1031  * Return 1 for a software event, 0 for a hardware event
1032  */
1033 static inline int is_software_event(struct perf_event *event)
1034 {
1035         return event->pmu->task_ctx_nr == perf_sw_context;
1036 }
1037
1038 extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
1039
1040 extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
1041
1042 #ifndef perf_arch_fetch_caller_regs
1043 static inline void
1044 perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
1045 #endif
1046
1047 /*
1048  * Take a snapshot of the regs. Skip ip and frame pointer to
1049  * the nth caller. We only need a few of the regs:
1050  * - ip for PERF_SAMPLE_IP
1051  * - cs for user_mode() tests
1052  * - bp for callchains
1053  * - eflags, for future purposes, just in case
1054  */
1055 static inline void perf_fetch_caller_regs(struct pt_regs *regs)
1056 {
1057         memset(regs, 0, sizeof(*regs));
1058
1059         perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
1060 }
1061
1062 static __always_inline void
1063 perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
1064 {
1065         struct pt_regs hot_regs;
1066
1067         JUMP_LABEL(&perf_swevent_enabled[event_id], have_event);
1068         return;
1069
1070 have_event:
1071         if (!regs) {
1072                 perf_fetch_caller_regs(&hot_regs);
1073                 regs = &hot_regs;
1074         }
1075         __perf_sw_event(event_id, nr, nmi, regs, addr);
1076 }
1077
1078 extern atomic_t perf_sched_events;
1079
1080 static inline void perf_event_task_sched_in(struct task_struct *task)
1081 {
1082         COND_STMT(&perf_sched_events, __perf_event_task_sched_in(task));
1083 }
1084
1085 static inline
1086 void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
1087 {
1088         perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
1089
1090         __perf_event_task_sched_out(task, next);
1091 }
1092
1093 extern void perf_event_mmap(struct vm_area_struct *vma);
1094 extern struct perf_guest_info_callbacks *perf_guest_cbs;
1095 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
1096 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
1097
1098 extern void perf_event_comm(struct task_struct *tsk);
1099 extern void perf_event_fork(struct task_struct *tsk);
1100
1101 /* Callchains */
1102 DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
1103
1104 extern void perf_callchain_user(struct perf_callchain_entry *entry,
1105                                 struct pt_regs *regs);
1106 extern void perf_callchain_kernel(struct perf_callchain_entry *entry,
1107                                   struct pt_regs *regs);
1108
1109
1110 static inline void
1111 perf_callchain_store(struct perf_callchain_entry *entry, u64 ip)
1112 {
1113         if (entry->nr < PERF_MAX_STACK_DEPTH)
1114                 entry->ip[entry->nr++] = ip;
1115 }
1116
1117 extern int sysctl_perf_event_paranoid;
1118 extern int sysctl_perf_event_mlock;
1119 extern int sysctl_perf_event_sample_rate;
1120
1121 extern int perf_proc_update_handler(struct ctl_table *table, int write,
1122                 void __user *buffer, size_t *lenp,
1123                 loff_t *ppos);
1124
1125 static inline bool perf_paranoid_tracepoint_raw(void)
1126 {
1127         return sysctl_perf_event_paranoid > -1;
1128 }
1129
1130 static inline bool perf_paranoid_cpu(void)
1131 {
1132         return sysctl_perf_event_paranoid > 0;
1133 }
1134
1135 static inline bool perf_paranoid_kernel(void)
1136 {
1137         return sysctl_perf_event_paranoid > 1;
1138 }
1139
1140 extern void perf_event_init(void);
1141 extern void perf_tp_event(u64 addr, u64 count, void *record,
1142                           int entry_size, struct pt_regs *regs,
1143                           struct hlist_head *head, int rctx);
1144 extern void perf_bp_event(struct perf_event *event, void *data);
1145
1146 #ifndef perf_misc_flags
1147 #define perf_misc_flags(regs)   (user_mode(regs) ? PERF_RECORD_MISC_USER : \
1148                                  PERF_RECORD_MISC_KERNEL)
1149 #define perf_instruction_pointer(regs)  instruction_pointer(regs)
1150 #endif
1151
1152 extern int perf_output_begin(struct perf_output_handle *handle,
1153                              struct perf_event *event, unsigned int size,
1154                              int nmi, int sample);
1155 extern void perf_output_end(struct perf_output_handle *handle);
1156 extern void perf_output_copy(struct perf_output_handle *handle,
1157                              const void *buf, unsigned int len);
1158 extern int perf_swevent_get_recursion_context(void);
1159 extern void perf_swevent_put_recursion_context(int rctx);
1160 extern void perf_event_enable(struct perf_event *event);
1161 extern void perf_event_disable(struct perf_event *event);
1162 extern void perf_event_task_tick(void);
1163 #else
1164 static inline void
1165 perf_event_task_sched_in(struct task_struct *task)                      { }
1166 static inline void
1167 perf_event_task_sched_out(struct task_struct *task,
1168                             struct task_struct *next)                   { }
1169 static inline int perf_event_init_task(struct task_struct *child)       { return 0; }
1170 static inline void perf_event_exit_task(struct task_struct *child)      { }
1171 static inline void perf_event_free_task(struct task_struct *task)       { }
1172 static inline void perf_event_delayed_put(struct task_struct *task)     { }
1173 static inline void perf_event_print_debug(void)                         { }
1174 static inline int perf_event_task_disable(void)                         { return -EINVAL; }
1175 static inline int perf_event_task_enable(void)                          { return -EINVAL; }
1176
1177 static inline void
1178 perf_sw_event(u32 event_id, u64 nr, int nmi,
1179                      struct pt_regs *regs, u64 addr)                    { }
1180 static inline void
1181 perf_bp_event(struct perf_event *event, void *data)                     { }
1182
1183 static inline int perf_register_guest_info_callbacks
1184 (struct perf_guest_info_callbacks *callbacks) { return 0; }
1185 static inline int perf_unregister_guest_info_callbacks
1186 (struct perf_guest_info_callbacks *callbacks) { return 0; }
1187
1188 static inline void perf_event_mmap(struct vm_area_struct *vma)          { }
1189 static inline void perf_event_comm(struct task_struct *tsk)             { }
1190 static inline void perf_event_fork(struct task_struct *tsk)             { }
1191 static inline void perf_event_init(void)                                { }
1192 static inline int  perf_swevent_get_recursion_context(void)             { return -1; }
1193 static inline void perf_swevent_put_recursion_context(int rctx)         { }
1194 static inline void perf_event_enable(struct perf_event *event)          { }
1195 static inline void perf_event_disable(struct perf_event *event)         { }
1196 static inline void perf_event_task_tick(void)                           { }
1197 #endif
1198
1199 #define perf_output_put(handle, x) \
1200         perf_output_copy((handle), &(x), sizeof(x))
1201
1202 /*
1203  * This has to have a higher priority than migration_notifier in sched.c.
1204  */
1205 #define perf_cpu_notifier(fn)                                   \
1206 do {                                                            \
1207         static struct notifier_block fn##_nb __cpuinitdata =    \
1208                 { .notifier_call = fn, .priority = CPU_PRI_PERF }; \
1209         fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE,             \
1210                 (void *)(unsigned long)smp_processor_id());     \
1211         fn(&fn##_nb, (unsigned long)CPU_STARTING,               \
1212                 (void *)(unsigned long)smp_processor_id());     \
1213         fn(&fn##_nb, (unsigned long)CPU_ONLINE,                 \
1214                 (void *)(unsigned long)smp_processor_id());     \
1215         register_cpu_notifier(&fn##_nb);                        \
1216 } while (0)
1217
1218 #endif /* __KERNEL__ */
1219 #endif /* _LINUX_PERF_EVENT_H */