3 * P4 model-specific MSR operations
5 * @remark Copyright 2002 OProfile authors
6 * @remark Read the file COPYING
8 * @author Graydon Hoare
11 #include <linux/oprofile.h>
12 #include <linux/smp.h>
13 #include <linux/ptrace.h>
14 #include <linux/nmi.h>
16 #include <asm/fixmap.h>
20 #include "op_x86_model.h"
21 #include "op_counter.h"
25 #define NUM_COUNTERS_NON_HT 8
26 #define NUM_ESCRS_NON_HT 45
27 #define NUM_CCCRS_NON_HT 18
28 #define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
30 #define NUM_COUNTERS_HT2 4
31 #define NUM_ESCRS_HT2 23
32 #define NUM_CCCRS_HT2 9
33 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
35 #define OP_CTR_OVERFLOW (1ULL<<31)
37 static unsigned int num_counters = NUM_COUNTERS_NON_HT;
38 static unsigned int num_controls = NUM_CONTROLS_NON_HT;
40 /* this has to be checked dynamically since the
41 hyper-threadedness of a chip is discovered at
43 static inline void setup_num_counters(void)
46 if (smp_num_siblings == 2) {
47 num_counters = NUM_COUNTERS_HT2;
48 num_controls = NUM_CONTROLS_HT2;
53 static int inline addr_increment(void)
56 return smp_num_siblings == 2 ? 2 : 1;
63 /* tables to simulate simplified hardware view of p4 registers */
64 struct p4_counter_binding {
70 struct p4_event_binding {
71 int escr_select; /* value to put in CCCR */
72 int event_select; /* value to put in ESCR */
74 int virt_counter; /* for this counter... */
75 int escr_address; /* use this ESCR */
79 /* nb: these CTR_* defines are a duplicate of defines in
80 event/i386.p4*events. */
83 #define CTR_BPU_0 (1 << 0)
84 #define CTR_MS_0 (1 << 1)
85 #define CTR_FLAME_0 (1 << 2)
86 #define CTR_IQ_4 (1 << 3)
87 #define CTR_BPU_2 (1 << 4)
88 #define CTR_MS_2 (1 << 5)
89 #define CTR_FLAME_2 (1 << 6)
90 #define CTR_IQ_5 (1 << 7)
92 static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
93 { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 },
94 { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 },
95 { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
96 { CTR_IQ_4, MSR_P4_IQ_PERFCTR4, MSR_P4_IQ_CCCR4 },
97 { CTR_BPU_2, MSR_P4_BPU_PERFCTR2, MSR_P4_BPU_CCCR2 },
98 { CTR_MS_2, MSR_P4_MS_PERFCTR2, MSR_P4_MS_CCCR2 },
99 { CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
100 { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 }
103 #define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
105 /* p4 event codes in libop/op_event.h are indices into this table. */
107 static struct p4_event_binding p4_events[NUM_EVENTS] = {
109 { /* BRANCH_RETIRED */
111 { {CTR_IQ_4, MSR_P4_CRU_ESCR2},
112 {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
115 { /* MISPRED_BRANCH_RETIRED */
117 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
118 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
121 { /* TC_DELIVER_MODE */
123 { { CTR_MS_0, MSR_P4_TC_ESCR0},
124 { CTR_MS_2, MSR_P4_TC_ESCR1} }
127 { /* BPU_FETCH_REQUEST */
129 { { CTR_BPU_0, MSR_P4_BPU_ESCR0},
130 { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
133 { /* ITLB_REFERENCE */
135 { { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
136 { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
139 { /* MEMORY_CANCEL */
141 { { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
142 { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
145 { /* MEMORY_COMPLETE */
147 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
148 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
151 { /* LOAD_PORT_REPLAY */
153 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
154 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
157 { /* STORE_PORT_REPLAY */
159 { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
160 { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
163 { /* MOB_LOAD_REPLAY */
165 { { CTR_BPU_0, MSR_P4_MOB_ESCR0},
166 { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
169 { /* PAGE_WALK_TYPE */
171 { { CTR_BPU_0, MSR_P4_PMH_ESCR0},
172 { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
175 { /* BSQ_CACHE_REFERENCE */
177 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
178 { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
181 { /* IOQ_ALLOCATION */
183 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
187 { /* IOQ_ACTIVE_ENTRIES */
189 { { CTR_BPU_2, MSR_P4_FSB_ESCR1},
193 { /* FSB_DATA_ACTIVITY */
195 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
196 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
199 { /* BSQ_ALLOCATION */
201 { { CTR_BPU_0, MSR_P4_BSU_ESCR0},
205 { /* BSQ_ACTIVE_ENTRIES */
207 { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
213 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
214 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
217 { /* SSE_INPUT_ASSIST */
219 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
220 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
223 { /* PACKED_SP_UOP */
225 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
226 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
229 { /* PACKED_DP_UOP */
231 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
232 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
235 { /* SCALAR_SP_UOP */
237 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
238 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
241 { /* SCALAR_DP_UOP */
243 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
244 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
247 { /* 64BIT_MMX_UOP */
249 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
250 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
253 { /* 128BIT_MMX_UOP */
255 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
256 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
261 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
262 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
265 { /* X87_SIMD_MOVES_UOP */
267 { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
268 { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
271 { /* MACHINE_CLEAR */
273 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
274 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
277 { /* GLOBAL_POWER_EVENTS */
278 0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
279 { { CTR_BPU_0, MSR_P4_FSB_ESCR0},
280 { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
285 { { CTR_MS_0, MSR_P4_MS_ESCR0},
286 { CTR_MS_2, MSR_P4_MS_ESCR1} }
289 { /* UOP_QUEUE_WRITES */
291 { { CTR_MS_0, MSR_P4_MS_ESCR0},
292 { CTR_MS_2, MSR_P4_MS_ESCR1} }
295 { /* FRONT_END_EVENT */
297 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
298 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
301 { /* EXECUTION_EVENT */
303 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
304 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
309 { { CTR_IQ_4, MSR_P4_CRU_ESCR2},
310 { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
313 { /* INSTR_RETIRED */
315 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
316 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
321 { { CTR_IQ_4, MSR_P4_CRU_ESCR0},
322 { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
327 { { CTR_IQ_4, MSR_P4_RAT_ESCR0},
328 { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
331 { /* RETIRED_MISPRED_BRANCH_TYPE */
333 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
334 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
337 { /* RETIRED_BRANCH_TYPE */
339 { { CTR_MS_0, MSR_P4_TBPU_ESCR0},
340 { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
345 #define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
347 #define ESCR_RESERVED_BITS 0x80000003
348 #define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
349 #define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
350 #define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
351 #define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
352 #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
353 #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
354 #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
356 #define CCCR_RESERVED_BITS 0x38030FFF
357 #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
358 #define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
359 #define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
360 #define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
361 #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
362 #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
363 #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
364 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
365 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
368 /* this assigns a "stagger" to the current CPU, which is used throughout
369 the code in this module as an extra array offset, to select the "even"
370 or "odd" part of all the divided resources. */
371 static unsigned int get_stagger(void)
374 int cpu = smp_processor_id();
375 return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
381 /* finally, mediate access to a real hardware counter
382 by passing a "virtual" counter numer to this macro,
383 along with your stagger setting. */
384 #define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
386 static unsigned long reset_value[NUM_COUNTERS_NON_HT];
389 static void p4_fill_in_addresses(struct op_msrs * const msrs)
392 unsigned int addr, cccraddr, stag;
394 setup_num_counters();
395 stag = get_stagger();
397 /* the counter & cccr registers we pay attention to */
398 for (i = 0; i < num_counters; ++i) {
399 addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
400 cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
401 if (reserve_perfctr_nmi(addr)) {
402 msrs->counters[i].addr = addr;
403 msrs->controls[i].addr = cccraddr;
407 /* 43 ESCR registers in three or four discontiguous group */
408 for (addr = MSR_P4_BSU_ESCR0 + stag;
409 addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
410 if (reserve_evntsel_nmi(addr))
411 msrs->controls[i].addr = addr;
414 /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
415 * to avoid special case in nmi_{save|restore}_registers() */
416 if (boot_cpu_data.x86_model >= 0x3) {
417 for (addr = MSR_P4_BSU_ESCR0 + stag;
418 addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
419 if (reserve_evntsel_nmi(addr))
420 msrs->controls[i].addr = addr;
423 for (addr = MSR_P4_IQ_ESCR0 + stag;
424 addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
425 if (reserve_evntsel_nmi(addr))
426 msrs->controls[i].addr = addr;
430 for (addr = MSR_P4_RAT_ESCR0 + stag;
431 addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
432 if (reserve_evntsel_nmi(addr))
433 msrs->controls[i].addr = addr;
436 for (addr = MSR_P4_MS_ESCR0 + stag;
437 addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
438 if (reserve_evntsel_nmi(addr))
439 msrs->controls[i].addr = addr;
442 for (addr = MSR_P4_IX_ESCR0 + stag;
443 addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
444 if (reserve_evntsel_nmi(addr))
445 msrs->controls[i].addr = addr;
448 /* there are 2 remaining non-contiguously located ESCRs */
450 if (num_counters == NUM_COUNTERS_NON_HT) {
451 /* standard non-HT CPUs handle both remaining ESCRs*/
452 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
453 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
454 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
455 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
457 } else if (stag == 0) {
458 /* HT CPUs give the first remainder to the even thread, as
459 the 32nd control register */
460 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
461 msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
464 /* and two copies of the second to the odd thread,
465 for the 22st and 23nd control registers */
466 if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
467 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
468 msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
474 static void pmc_setup_one_p4_counter(unsigned int ctr)
477 int const maxbind = 2;
478 unsigned int cccr = 0;
479 unsigned int escr = 0;
480 unsigned int high = 0;
481 unsigned int counter_bit;
482 struct p4_event_binding *ev = NULL;
485 stag = get_stagger();
487 /* convert from counter *number* to counter *bit* */
488 counter_bit = 1 << VIRT_CTR(stag, ctr);
490 /* find our event binding structure. */
491 if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
493 "oprofile: P4 event code 0x%lx out of range\n",
494 counter_config[ctr].event);
498 ev = &(p4_events[counter_config[ctr].event - 1]);
500 for (i = 0; i < maxbind; i++) {
501 if (ev->bindings[i].virt_counter & counter_bit) {
504 rdmsr(ev->bindings[i].escr_address, escr, high);
507 ESCR_SET_USR_0(escr, counter_config[ctr].user);
508 ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
510 ESCR_SET_USR_1(escr, counter_config[ctr].user);
511 ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
513 ESCR_SET_EVENT_SELECT(escr, ev->event_select);
514 ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
515 wrmsr(ev->bindings[i].escr_address, escr, high);
518 rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
521 CCCR_SET_REQUIRED_BITS(cccr);
522 CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
524 CCCR_SET_PMI_OVF_0(cccr);
526 CCCR_SET_PMI_OVF_1(cccr);
527 wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
534 "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
535 counter_config[ctr].event, stag, ctr);
539 static void p4_setup_ctrs(struct op_x86_model_spec const *model,
540 struct op_msrs const * const msrs)
543 unsigned int low, high;
546 stag = get_stagger();
548 rdmsr(MSR_IA32_MISC_ENABLE, low, high);
549 if (!MISC_PMC_ENABLED_P(low)) {
550 printk(KERN_ERR "oprofile: P4 PMC not available\n");
554 /* clear the cccrs we will use */
555 for (i = 0; i < num_counters; i++) {
556 if (unlikely(!msrs->controls[i].addr))
558 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
560 CCCR_SET_REQUIRED_BITS(low);
561 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
564 /* clear all escrs (including those outside our concern) */
565 for (i = num_counters; i < num_controls; i++) {
566 if (unlikely(!msrs->controls[i].addr))
568 wrmsr(msrs->controls[i].addr, 0, 0);
571 /* setup all counters */
572 for (i = 0; i < num_counters; ++i) {
573 if (counter_config[i].enabled && msrs->controls[i].addr) {
574 reset_value[i] = counter_config[i].count;
575 pmc_setup_one_p4_counter(i);
576 wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
577 -(u64)counter_config[i].count);
585 static int p4_check_ctrs(struct pt_regs * const regs,
586 struct op_msrs const * const msrs)
588 unsigned long ctr, low, high, stag, real;
591 stag = get_stagger();
593 for (i = 0; i < num_counters; ++i) {
599 * there is some eccentricity in the hardware which
600 * requires that we perform 2 extra corrections:
602 * - check both the CCCR:OVF flag for overflow and the
603 * counter high bit for un-flagged overflows.
605 * - write the counter back twice to ensure it gets
608 * the former seems to be related to extra NMIs happening
609 * during the current NMI; the latter is reported as errata
610 * N15 in intel doc 249199-029, pentium 4 specification
611 * update, though their suggested work-around does not
612 * appear to solve the problem.
615 real = VIRT_CTR(stag, i);
617 rdmsr(p4_counters[real].cccr_address, low, high);
618 rdmsr(p4_counters[real].counter_address, ctr, high);
619 if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
620 oprofile_add_sample(regs, i);
621 wrmsrl(p4_counters[real].counter_address,
622 -(u64)reset_value[i]);
624 wrmsr(p4_counters[real].cccr_address, low, high);
625 wrmsrl(p4_counters[real].counter_address,
626 -(u64)reset_value[i]);
630 /* P4 quirk: you have to re-unmask the apic vector */
631 apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
633 /* See op_model_ppro.c */
638 static void p4_start(struct op_msrs const * const msrs)
640 unsigned int low, high, stag;
643 stag = get_stagger();
645 for (i = 0; i < num_counters; ++i) {
648 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
649 CCCR_SET_ENABLE(low);
650 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
655 static void p4_stop(struct op_msrs const * const msrs)
657 unsigned int low, high, stag;
660 stag = get_stagger();
662 for (i = 0; i < num_counters; ++i) {
665 rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
666 CCCR_SET_DISABLE(low);
667 wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
671 static void p4_shutdown(struct op_msrs const * const msrs)
675 for (i = 0; i < num_counters; ++i) {
676 if (msrs->counters[i].addr)
677 release_perfctr_nmi(msrs->counters[i].addr);
680 * some of the control registers are specially reserved in
681 * conjunction with the counter registers (hence the starting offset).
682 * This saves a few bits.
684 for (i = num_counters; i < num_controls; ++i) {
685 if (msrs->controls[i].addr)
686 release_evntsel_nmi(msrs->controls[i].addr);
692 struct op_x86_model_spec op_p4_ht2_spec = {
693 .num_counters = NUM_COUNTERS_HT2,
694 .num_controls = NUM_CONTROLS_HT2,
695 .fill_in_addresses = &p4_fill_in_addresses,
696 .setup_ctrs = &p4_setup_ctrs,
697 .check_ctrs = &p4_check_ctrs,
700 .shutdown = &p4_shutdown
704 struct op_x86_model_spec op_p4_spec = {
705 .num_counters = NUM_COUNTERS_NON_HT,
706 .num_controls = NUM_CONTROLS_NON_HT,
707 .fill_in_addresses = &p4_fill_in_addresses,
708 .setup_ctrs = &p4_setup_ctrs,
709 .check_ctrs = &p4_check_ctrs,
712 .shutdown = &p4_shutdown