Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
op_model_p4.c
Go to the documentation of this file.
1 
11 #include <linux/oprofile.h>
12 #include <linux/smp.h>
13 #include <linux/ptrace.h>
14 #include <asm/nmi.h>
15 #include <asm/msr.h>
16 #include <asm/fixmap.h>
17 #include <asm/apic.h>
18 
19 
20 #include "op_x86_model.h"
21 #include "op_counter.h"
22 
23 #define NUM_EVENTS 39
24 
25 #define NUM_COUNTERS_NON_HT 8
26 #define NUM_ESCRS_NON_HT 45
27 #define NUM_CCCRS_NON_HT 18
28 #define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
29 
30 #define NUM_COUNTERS_HT2 4
31 #define NUM_ESCRS_HT2 23
32 #define NUM_CCCRS_HT2 9
33 #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
34 
35 #define OP_CTR_OVERFLOW (1ULL<<31)
36 
37 static unsigned int num_counters = NUM_COUNTERS_NON_HT;
38 static unsigned int num_controls = NUM_CONTROLS_NON_HT;
39 
40 /* this has to be checked dynamically since the
41  hyper-threadedness of a chip is discovered at
42  kernel boot-time. */
43 static inline void setup_num_counters(void)
44 {
45 #ifdef CONFIG_SMP
46  if (smp_num_siblings == 2) {
49  }
50 #endif
51 }
52 
53 static inline int addr_increment(void)
54 {
55 #ifdef CONFIG_SMP
56  return smp_num_siblings == 2 ? 2 : 1;
57 #else
58  return 1;
59 #endif
60 }
61 
62 
63 /* tables to simulate simplified hardware view of p4 registers */
68 };
69 
71  int escr_select; /* value to put in CCCR */
72  int event_select; /* value to put in ESCR */
73  struct {
74  int virt_counter; /* for this counter... */
75  int escr_address; /* use this ESCR */
76  } bindings[2];
77 };
78 
79 /* nb: these CTR_* defines are a duplicate of defines in
80  event/i386.p4*events. */
81 
82 
83 #define CTR_BPU_0 (1 << 0)
84 #define CTR_MS_0 (1 << 1)
85 #define CTR_FLAME_0 (1 << 2)
86 #define CTR_IQ_4 (1 << 3)
87 #define CTR_BPU_2 (1 << 4)
88 #define CTR_MS_2 (1 << 5)
89 #define CTR_FLAME_2 (1 << 6)
90 #define CTR_IQ_5 (1 << 7)
91 
92 static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
101 };
102 
103 #define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
104 
105 /* p4 event codes in libop/op_event.h are indices into this table. */
106 
107 static struct p4_event_binding p4_events[NUM_EVENTS] = {
108 
109  { /* BRANCH_RETIRED */
110  0x05, 0x06,
113  },
114 
115  { /* MISPRED_BRANCH_RETIRED */
116  0x04, 0x03,
119  },
120 
121  { /* TC_DELIVER_MODE */
122  0x01, 0x01,
125  },
126 
127  { /* BPU_FETCH_REQUEST */
128  0x00, 0x03,
131  },
132 
133  { /* ITLB_REFERENCE */
134  0x03, 0x18,
137  },
138 
139  { /* MEMORY_CANCEL */
140  0x05, 0x02,
143  },
144 
145  { /* MEMORY_COMPLETE */
146  0x02, 0x08,
149  },
150 
151  { /* LOAD_PORT_REPLAY */
152  0x02, 0x04,
155  },
156 
157  { /* STORE_PORT_REPLAY */
158  0x02, 0x05,
161  },
162 
163  { /* MOB_LOAD_REPLAY */
164  0x02, 0x03,
167  },
168 
169  { /* PAGE_WALK_TYPE */
170  0x04, 0x01,
173  },
174 
175  { /* BSQ_CACHE_REFERENCE */
176  0x07, 0x0c,
179  },
180 
181  { /* IOQ_ALLOCATION */
182  0x06, 0x03,
184  { 0, 0 } }
185  },
186 
187  { /* IOQ_ACTIVE_ENTRIES */
188  0x06, 0x1a,
190  { 0, 0 } }
191  },
192 
193  { /* FSB_DATA_ACTIVITY */
194  0x06, 0x17,
197  },
198 
199  { /* BSQ_ALLOCATION */
200  0x07, 0x05,
202  { 0, 0 } }
203  },
204 
205  { /* BSQ_ACTIVE_ENTRIES */
206  0x07, 0x06,
207  { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
208  { 0, 0 } }
209  },
210 
211  { /* X87_ASSIST */
212  0x05, 0x03,
215  },
216 
217  { /* SSE_INPUT_ASSIST */
218  0x01, 0x34,
221  },
222 
223  { /* PACKED_SP_UOP */
224  0x01, 0x08,
227  },
228 
229  { /* PACKED_DP_UOP */
230  0x01, 0x0c,
233  },
234 
235  { /* SCALAR_SP_UOP */
236  0x01, 0x0a,
239  },
240 
241  { /* SCALAR_DP_UOP */
242  0x01, 0x0e,
245  },
246 
247  { /* 64BIT_MMX_UOP */
248  0x01, 0x02,
251  },
252 
253  { /* 128BIT_MMX_UOP */
254  0x01, 0x1a,
257  },
258 
259  { /* X87_FP_UOP */
260  0x01, 0x04,
263  },
264 
265  { /* X87_SIMD_MOVES_UOP */
266  0x01, 0x2e,
269  },
270 
271  { /* MACHINE_CLEAR */
272  0x05, 0x02,
275  },
276 
277  { /* GLOBAL_POWER_EVENTS */
278  0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
281  },
282 
283  { /* TC_MS_XFER */
284  0x00, 0x05,
287  },
288 
289  { /* UOP_QUEUE_WRITES */
290  0x00, 0x09,
293  },
294 
295  { /* FRONT_END_EVENT */
296  0x05, 0x08,
299  },
300 
301  { /* EXECUTION_EVENT */
302  0x05, 0x0c,
305  },
306 
307  { /* REPLAY_EVENT */
308  0x05, 0x09,
311  },
312 
313  { /* INSTR_RETIRED */
314  0x04, 0x02,
317  },
318 
319  { /* UOPS_RETIRED */
320  0x04, 0x01,
323  },
324 
325  { /* UOP_TYPE */
326  0x02, 0x02,
329  },
330 
331  { /* RETIRED_MISPRED_BRANCH_TYPE */
332  0x02, 0x05,
335  },
336 
337  { /* RETIRED_BRANCH_TYPE */
338  0x02, 0x04,
341  }
342 };
343 
344 
345 #define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
346 
347 #define ESCR_RESERVED_BITS 0x80000003
348 #define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
349 #define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
350 #define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
351 #define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
352 #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
353 #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
354 #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
355 
356 #define CCCR_RESERVED_BITS 0x38030FFF
357 #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
358 #define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
359 #define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
360 #define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
361 #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
362 #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
363 #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
364 #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
365 #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
366 
367 
368 /* this assigns a "stagger" to the current CPU, which is used throughout
369  the code in this module as an extra array offset, to select the "even"
370  or "odd" part of all the divided resources. */
371 static unsigned int get_stagger(void)
372 {
373 #ifdef CONFIG_SMP
374  int cpu = smp_processor_id();
375  return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
376 #endif
377  return 0;
378 }
379 
380 
381 /* finally, mediate access to a real hardware counter
382  by passing a "virtual" counter numer to this macro,
383  along with your stagger setting. */
384 #define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
385 
386 static unsigned long reset_value[NUM_COUNTERS_NON_HT];
387 
388 static void p4_shutdown(struct op_msrs const * const msrs)
389 {
390  int i;
391 
392  for (i = 0; i < num_counters; ++i) {
393  if (msrs->counters[i].addr)
394  release_perfctr_nmi(msrs->counters[i].addr);
395  }
396  /*
397  * some of the control registers are specially reserved in
398  * conjunction with the counter registers (hence the starting offset).
399  * This saves a few bits.
400  */
401  for (i = num_counters; i < num_controls; ++i) {
402  if (msrs->controls[i].addr)
403  release_evntsel_nmi(msrs->controls[i].addr);
404  }
405 }
406 
407 static int p4_fill_in_addresses(struct op_msrs * const msrs)
408 {
409  unsigned int i;
410  unsigned int addr, cccraddr, stag;
411 
412  setup_num_counters();
413  stag = get_stagger();
414 
415  /* the counter & cccr registers we pay attention to */
416  for (i = 0; i < num_counters; ++i) {
417  addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
418  cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
419  if (reserve_perfctr_nmi(addr)) {
420  msrs->counters[i].addr = addr;
421  msrs->controls[i].addr = cccraddr;
422  }
423  }
424 
425  /* 43 ESCR registers in three or four discontiguous group */
426  for (addr = MSR_P4_BSU_ESCR0 + stag;
427  addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
428  if (reserve_evntsel_nmi(addr))
429  msrs->controls[i].addr = addr;
430  }
431 
432  /* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
433  * to avoid special case in nmi_{save|restore}_registers() */
434  if (boot_cpu_data.x86_model >= 0x3) {
435  for (addr = MSR_P4_BSU_ESCR0 + stag;
436  addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
437  if (reserve_evntsel_nmi(addr))
438  msrs->controls[i].addr = addr;
439  }
440  } else {
441  for (addr = MSR_P4_IQ_ESCR0 + stag;
442  addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
443  if (reserve_evntsel_nmi(addr))
444  msrs->controls[i].addr = addr;
445  }
446  }
447 
448  for (addr = MSR_P4_RAT_ESCR0 + stag;
449  addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
450  if (reserve_evntsel_nmi(addr))
451  msrs->controls[i].addr = addr;
452  }
453 
454  for (addr = MSR_P4_MS_ESCR0 + stag;
455  addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
456  if (reserve_evntsel_nmi(addr))
457  msrs->controls[i].addr = addr;
458  }
459 
460  for (addr = MSR_P4_IX_ESCR0 + stag;
461  addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
462  if (reserve_evntsel_nmi(addr))
463  msrs->controls[i].addr = addr;
464  }
465 
466  /* there are 2 remaining non-contiguously located ESCRs */
467 
468  if (num_counters == NUM_COUNTERS_NON_HT) {
469  /* standard non-HT CPUs handle both remaining ESCRs*/
471  msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
473  msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
474 
475  } else if (stag == 0) {
476  /* HT CPUs give the first remainder to the even thread, as
477  the 32nd control register */
479  msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
480 
481  } else {
482  /* and two copies of the second to the odd thread,
483  for the 22st and 23nd control registers */
485  msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
486  msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
487  }
488  }
489 
490  for (i = 0; i < num_counters; ++i) {
491  if (!counter_config[i].enabled)
492  continue;
493  if (msrs->controls[i].addr)
494  continue;
495  op_x86_warn_reserved(i);
496  p4_shutdown(msrs);
497  return -EBUSY;
498  }
499 
500  return 0;
501 }
502 
503 
504 static void pmc_setup_one_p4_counter(unsigned int ctr)
505 {
506  int i;
507  int const maxbind = 2;
508  unsigned int cccr = 0;
509  unsigned int escr = 0;
510  unsigned int high = 0;
511  unsigned int counter_bit;
512  struct p4_event_binding *ev = NULL;
513  unsigned int stag;
514 
515  stag = get_stagger();
516 
517  /* convert from counter *number* to counter *bit* */
518  counter_bit = 1 << VIRT_CTR(stag, ctr);
519 
520  /* find our event binding structure. */
521  if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
523  "oprofile: P4 event code 0x%lx out of range\n",
524  counter_config[ctr].event);
525  return;
526  }
527 
528  ev = &(p4_events[counter_config[ctr].event - 1]);
529 
530  for (i = 0; i < maxbind; i++) {
531  if (ev->bindings[i].virt_counter & counter_bit) {
532 
533  /* modify ESCR */
534  rdmsr(ev->bindings[i].escr_address, escr, high);
535  ESCR_CLEAR(escr);
536  if (stag == 0) {
537  ESCR_SET_USR_0(escr, counter_config[ctr].user);
538  ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
539  } else {
540  ESCR_SET_USR_1(escr, counter_config[ctr].user);
541  ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
542  }
544  ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
545  wrmsr(ev->bindings[i].escr_address, escr, high);
546 
547  /* modify CCCR */
548  rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
549  cccr, high);
550  CCCR_CLEAR(cccr);
553  if (stag == 0)
554  CCCR_SET_PMI_OVF_0(cccr);
555  else
556  CCCR_SET_PMI_OVF_1(cccr);
557  wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
558  cccr, high);
559  return;
560  }
561  }
562 
564  "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
565  counter_config[ctr].event, stag, ctr);
566 }
567 
568 
569 static void p4_setup_ctrs(struct op_x86_model_spec const *model,
570  struct op_msrs const * const msrs)
571 {
572  unsigned int i;
573  unsigned int low, high;
574  unsigned int stag;
575 
576  stag = get_stagger();
577 
578  rdmsr(MSR_IA32_MISC_ENABLE, low, high);
579  if (!MISC_PMC_ENABLED_P(low)) {
580  printk(KERN_ERR "oprofile: P4 PMC not available\n");
581  return;
582  }
583 
584  /* clear the cccrs we will use */
585  for (i = 0; i < num_counters; i++) {
586  if (unlikely(!msrs->controls[i].addr))
587  continue;
588  rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
589  CCCR_CLEAR(low);
591  wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
592  }
593 
594  /* clear all escrs (including those outside our concern) */
595  for (i = num_counters; i < num_controls; i++) {
596  if (unlikely(!msrs->controls[i].addr))
597  continue;
598  wrmsr(msrs->controls[i].addr, 0, 0);
599  }
600 
601  /* setup all counters */
602  for (i = 0; i < num_counters; ++i) {
603  if (counter_config[i].enabled && msrs->controls[i].addr) {
604  reset_value[i] = counter_config[i].count;
605  pmc_setup_one_p4_counter(i);
606  wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
607  -(u64)counter_config[i].count);
608  } else {
609  reset_value[i] = 0;
610  }
611  }
612 }
613 
614 
615 static int p4_check_ctrs(struct pt_regs * const regs,
616  struct op_msrs const * const msrs)
617 {
618  unsigned long ctr, low, high, stag, real;
619  int i;
620 
621  stag = get_stagger();
622 
623  for (i = 0; i < num_counters; ++i) {
624 
625  if (!reset_value[i])
626  continue;
627 
628  /*
629  * there is some eccentricity in the hardware which
630  * requires that we perform 2 extra corrections:
631  *
632  * - check both the CCCR:OVF flag for overflow and the
633  * counter high bit for un-flagged overflows.
634  *
635  * - write the counter back twice to ensure it gets
636  * updated properly.
637  *
638  * the former seems to be related to extra NMIs happening
639  * during the current NMI; the latter is reported as errata
640  * N15 in intel doc 249199-029, pentium 4 specification
641  * update, though their suggested work-around does not
642  * appear to solve the problem.
643  */
644 
645  real = VIRT_CTR(stag, i);
646 
647  rdmsr(p4_counters[real].cccr_address, low, high);
648  rdmsr(p4_counters[real].counter_address, ctr, high);
649  if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
650  oprofile_add_sample(regs, i);
651  wrmsrl(p4_counters[real].counter_address,
652  -(u64)reset_value[i]);
653  CCCR_CLEAR_OVF(low);
654  wrmsr(p4_counters[real].cccr_address, low, high);
655  wrmsrl(p4_counters[real].counter_address,
656  -(u64)reset_value[i]);
657  }
658  }
659 
660  /* P4 quirk: you have to re-unmask the apic vector */
661  apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
662 
663  /* See op_model_ppro.c */
664  return 1;
665 }
666 
667 
668 static void p4_start(struct op_msrs const * const msrs)
669 {
670  unsigned int low, high, stag;
671  int i;
672 
673  stag = get_stagger();
674 
675  for (i = 0; i < num_counters; ++i) {
676  if (!reset_value[i])
677  continue;
678  rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
679  CCCR_SET_ENABLE(low);
680  wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
681  }
682 }
683 
684 
685 static void p4_stop(struct op_msrs const * const msrs)
686 {
687  unsigned int low, high, stag;
688  int i;
689 
690  stag = get_stagger();
691 
692  for (i = 0; i < num_counters; ++i) {
693  if (!reset_value[i])
694  continue;
695  rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
696  CCCR_SET_DISABLE(low);
697  wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
698  }
699 }
700 
701 #ifdef CONFIG_SMP
703  .num_counters = NUM_COUNTERS_HT2,
704  .num_controls = NUM_CONTROLS_HT2,
705  .fill_in_addresses = &p4_fill_in_addresses,
706  .setup_ctrs = &p4_setup_ctrs,
707  .check_ctrs = &p4_check_ctrs,
708  .start = &p4_start,
709  .stop = &p4_stop,
710  .shutdown = &p4_shutdown
711 };
712 #endif
713 
715  .num_counters = NUM_COUNTERS_NON_HT,
716  .num_controls = NUM_CONTROLS_NON_HT,
717  .fill_in_addresses = &p4_fill_in_addresses,
718  .setup_ctrs = &p4_setup_ctrs,
719  .check_ctrs = &p4_check_ctrs,
720  .start = &p4_start,
721  .stop = &p4_stop,
722  .shutdown = &p4_shutdown
723 };