Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
perf_event_p4.h
Go to the documentation of this file.
1 /*
2  * Netburst Performance Events (P4, old Xeon)
3  */
4 
5 #ifndef PERF_EVENT_P4_H
6 #define PERF_EVENT_P4_H
7 
8 #include <linux/cpu.h>
9 #include <linux/bitops.h>
10 
11 /*
12  * NetBurst has performance MSRs shared between
13  * threads if HT is turned on, ie for both logical
14  * processors (mem: in turn in Atom with HT support
15  * perf-MSRs are not shared and every thread has its
16  * own perf-MSRs set)
17  */
18 #define ARCH_P4_TOTAL_ESCR (46)
19 #define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */
20 #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
21 #define ARCH_P4_MAX_CCCR (18)
22 
23 #define ARCH_P4_CNTRVAL_BITS (40)
24 #define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1)
25 #define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1))
26 
27 #define P4_ESCR_EVENT_MASK 0x7e000000U
28 #define P4_ESCR_EVENT_SHIFT 25
29 #define P4_ESCR_EVENTMASK_MASK 0x01fffe00U
30 #define P4_ESCR_EVENTMASK_SHIFT 9
31 #define P4_ESCR_TAG_MASK 0x000001e0U
32 #define P4_ESCR_TAG_SHIFT 5
33 #define P4_ESCR_TAG_ENABLE 0x00000010U
34 #define P4_ESCR_T0_OS 0x00000008U
35 #define P4_ESCR_T0_USR 0x00000004U
36 #define P4_ESCR_T1_OS 0x00000002U
37 #define P4_ESCR_T1_USR 0x00000001U
38 
39 #define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT)
40 #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT)
41 #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT)
42 
43 #define P4_CCCR_OVF 0x80000000U
44 #define P4_CCCR_CASCADE 0x40000000U
45 #define P4_CCCR_OVF_PMI_T0 0x04000000U
46 #define P4_CCCR_OVF_PMI_T1 0x08000000U
47 #define P4_CCCR_FORCE_OVF 0x02000000U
48 #define P4_CCCR_EDGE 0x01000000U
49 #define P4_CCCR_THRESHOLD_MASK 0x00f00000U
50 #define P4_CCCR_THRESHOLD_SHIFT 20
51 #define P4_CCCR_COMPLEMENT 0x00080000U
52 #define P4_CCCR_COMPARE 0x00040000U
53 #define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U
54 #define P4_CCCR_ESCR_SELECT_SHIFT 13
55 #define P4_CCCR_ENABLE 0x00001000U
56 #define P4_CCCR_THREAD_SINGLE 0x00010000U
57 #define P4_CCCR_THREAD_BOTH 0x00020000U
58 #define P4_CCCR_THREAD_ANY 0x00030000U
59 #define P4_CCCR_RESERVED 0x00000fffU
60 
61 #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
62 #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
63 
64 #define P4_GEN_ESCR_EMASK(class, name, bit) \
65  class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT)
66 #define P4_ESCR_EMASK_BIT(class, name) class##__##name
67 
68 /*
69  * config field is 64bit width and consists of
70  * HT << 63 | ESCR << 32 | CCCR
71  * where HT is HyperThreading bit (since ESCR
72  * has it reserved we may use it for own purpose)
73  *
74  * note that this is NOT the addresses of respective
75  * ESCR and CCCR but rather an only packed value should
76  * be unpacked and written to a proper addresses
77  *
78  * the base idea is to pack as much info as possible
79  */
80 #define p4_config_pack_escr(v) (((u64)(v)) << 32)
81 #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL)
82 #define p4_config_unpack_escr(v) (((u64)(v)) >> 32)
83 #define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL)
84 
85 #define p4_config_unpack_emask(v) \
86  ({ \
87  u32 t = p4_config_unpack_escr((v)); \
88  t = t & P4_ESCR_EVENTMASK_MASK; \
89  t = t >> P4_ESCR_EVENTMASK_SHIFT; \
90  t; \
91  })
92 
93 #define p4_config_unpack_event(v) \
94  ({ \
95  u32 t = p4_config_unpack_escr((v)); \
96  t = t & P4_ESCR_EVENT_MASK; \
97  t = t >> P4_ESCR_EVENT_SHIFT; \
98  t; \
99  })
100 
101 #define P4_CONFIG_HT_SHIFT 63
102 #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
103 
104 /*
105  * If an event has alias it should be marked
106  * with a special bit. (Don't forget to check
107  * P4_PEBS_CONFIG_MASK and related bits on
108  * modification.)
109  */
110 #define P4_CONFIG_ALIASABLE (1 << 9)
111 
112 /*
113  * The bits we allow to pass for RAW events
114  */
115 #define P4_CONFIG_MASK_ESCR \
116  P4_ESCR_EVENT_MASK | \
117  P4_ESCR_EVENTMASK_MASK | \
118  P4_ESCR_TAG_MASK | \
119  P4_ESCR_TAG_ENABLE
120 
121 #define P4_CONFIG_MASK_CCCR \
122  P4_CCCR_EDGE | \
123  P4_CCCR_THRESHOLD_MASK | \
124  P4_CCCR_COMPLEMENT | \
125  P4_CCCR_COMPARE | \
126  P4_CCCR_THREAD_ANY | \
127  P4_CCCR_RESERVED
128 
129 /* some dangerous bits are reserved for kernel internals */
130 #define P4_CONFIG_MASK \
131  (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
132  (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
133 
134 /*
135  * In case of event aliasing we need to preserve some
136  * caller bits, otherwise the mapping won't be complete.
137  */
138 #define P4_CONFIG_EVENT_ALIAS_MASK \
139  (p4_config_pack_escr(P4_CONFIG_MASK_ESCR) | \
140  p4_config_pack_cccr(P4_CCCR_EDGE | \
141  P4_CCCR_THRESHOLD_MASK | \
142  P4_CCCR_COMPLEMENT | \
143  P4_CCCR_COMPARE))
144 
145 #define P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS \
146  ((P4_CONFIG_HT) | \
147  p4_config_pack_escr(P4_ESCR_T0_OS | \
148  P4_ESCR_T0_USR | \
149  P4_ESCR_T1_OS | \
150  P4_ESCR_T1_USR) | \
151  p4_config_pack_cccr(P4_CCCR_OVF | \
152  P4_CCCR_CASCADE | \
153  P4_CCCR_FORCE_OVF | \
154  P4_CCCR_THREAD_ANY | \
155  P4_CCCR_OVF_PMI_T0 | \
156  P4_CCCR_OVF_PMI_T1 | \
157  P4_CONFIG_ALIASABLE))
158 
159 static inline bool p4_is_event_cascaded(u64 config)
160 {
161  u32 cccr = p4_config_unpack_cccr(config);
162  return !!(cccr & P4_CCCR_CASCADE);
163 }
164 
165 static inline int p4_ht_config_thread(u64 config)
166 {
167  return !!(config & P4_CONFIG_HT);
168 }
169 
170 static inline u64 p4_set_ht_bit(u64 config)
171 {
172  return config | P4_CONFIG_HT;
173 }
174 
175 static inline u64 p4_clear_ht_bit(u64 config)
176 {
177  return config & ~P4_CONFIG_HT;
178 }
179 
180 static inline int p4_ht_active(void)
181 {
182 #ifdef CONFIG_SMP
183  return smp_num_siblings > 1;
184 #endif
185  return 0;
186 }
187 
188 static inline int p4_ht_thread(int cpu)
189 {
190 #ifdef CONFIG_SMP
191  if (smp_num_siblings == 2)
192  return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
193 #endif
194  return 0;
195 }
196 
197 static inline int p4_should_swap_ts(u64 config, int cpu)
198 {
199  return p4_ht_config_thread(config) ^ p4_ht_thread(cpu);
200 }
201 
202 static inline u32 p4_default_cccr_conf(int cpu)
203 {
204  /*
205  * Note that P4_CCCR_THREAD_ANY is "required" on
206  * non-HT machines (on HT machines we count TS events
207  * regardless the state of second logical processor
208  */
210 
211  if (!p4_ht_thread(cpu))
212  cccr |= P4_CCCR_OVF_PMI_T0;
213  else
214  cccr |= P4_CCCR_OVF_PMI_T1;
215 
216  return cccr;
217 }
218 
219 static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr)
220 {
221  u32 escr = 0;
222 
223  if (!p4_ht_thread(cpu)) {
224  if (!exclude_os)
225  escr |= P4_ESCR_T0_OS;
226  if (!exclude_usr)
227  escr |= P4_ESCR_T0_USR;
228  } else {
229  if (!exclude_os)
230  escr |= P4_ESCR_T1_OS;
231  if (!exclude_usr)
232  escr |= P4_ESCR_T1_USR;
233  }
234 
235  return escr;
236 }
237 
238 /*
239  * This are the events which should be used in "Event Select"
240  * field of ESCR register, they are like unique keys which allow
241  * the kernel to determinate which CCCR and COUNTER should be
242  * used to track an event
243  */
244 enum P4_EVENTS {
291 };
292 
293 #define P4_OPCODE(event) event##_OPCODE
294 #define P4_OPCODE_ESEL(opcode) ((opcode & 0x00ff) >> 0)
295 #define P4_OPCODE_EVNT(opcode) ((opcode & 0xff00) >> 8)
296 #define P4_OPCODE_PACK(event, sel) (((event) << 8) | sel)
297 
298 /*
299  * Comments below the event represent ESCR restriction
300  * for this event and counter index per ESCR
301  *
302  * MSR_P4_IQ_ESCR0 and MSR_P4_IQ_ESCR1 are available only on early
303  * processor builds (family 0FH, models 01H-02H). These MSRs
304  * are not available on later versions, so that we don't use
305  * them completely
306  *
307  * Also note that CCCR1 do not have P4_CCCR_ENABLE bit properly
308  * working so that we should not use this CCCR and respective
309  * counter as result
310  */
313  /*
314  * MSR_P4_TC_ESCR0: 4, 5
315  * MSR_P4_TC_ESCR1: 6, 7
316  */
317 
319  /*
320  * MSR_P4_BPU_ESCR0: 0, 1
321  * MSR_P4_BPU_ESCR1: 2, 3
322  */
323 
325  /*
326  * MSR_P4_ITLB_ESCR0: 0, 1
327  * MSR_P4_ITLB_ESCR1: 2, 3
328  */
329 
331  /*
332  * MSR_P4_DAC_ESCR0: 8, 9
333  * MSR_P4_DAC_ESCR1: 10, 11
334  */
335 
337  /*
338  * MSR_P4_SAAT_ESCR0: 8, 9
339  * MSR_P4_SAAT_ESCR1: 10, 11
340  */
341 
343  /*
344  * MSR_P4_SAAT_ESCR0: 8, 9
345  * MSR_P4_SAAT_ESCR1: 10, 11
346  */
347 
349  /*
350  * MSR_P4_SAAT_ESCR0: 8, 9
351  * MSR_P4_SAAT_ESCR1: 10, 11
352  */
353 
355  /*
356  * MSR_P4_MOB_ESCR0: 0, 1
357  * MSR_P4_MOB_ESCR1: 2, 3
358  */
359 
361  /*
362  * MSR_P4_PMH_ESCR0: 0, 1
363  * MSR_P4_PMH_ESCR1: 2, 3
364  */
365 
367  /*
368  * MSR_P4_BSU_ESCR0: 0, 1
369  * MSR_P4_BSU_ESCR1: 2, 3
370  */
371 
373  /*
374  * MSR_P4_FSB_ESCR0: 0, 1
375  * MSR_P4_FSB_ESCR1: 2, 3
376  */
377 
379  /*
380  * MSR_P4_FSB_ESCR1: 2, 3
381  */
382 
384  /*
385  * MSR_P4_FSB_ESCR0: 0, 1
386  * MSR_P4_FSB_ESCR1: 2, 3
387  */
388 
390  /*
391  * MSR_P4_BSU_ESCR0: 0, 1
392  */
393 
395  /*
396  * NOTE: no ESCR name in docs, it's guessed
397  * MSR_P4_BSU_ESCR1: 2, 3
398  */
399 
401  /*
402  * MSR_P4_FIRM_ESCR0: 8, 9
403  * MSR_P4_FIRM_ESCR1: 10, 11
404  */
405 
407  /*
408  * MSR_P4_FIRM_ESCR0: 8, 9
409  * MSR_P4_FIRM_ESCR1: 10, 11
410  */
411 
413  /*
414  * MSR_P4_FIRM_ESCR0: 8, 9
415  * MSR_P4_FIRM_ESCR1: 10, 11
416  */
417 
419  /*
420  * MSR_P4_FIRM_ESCR0: 8, 9
421  * MSR_P4_FIRM_ESCR1: 10, 11
422  */
423 
425  /*
426  * MSR_P4_FIRM_ESCR0: 8, 9
427  * MSR_P4_FIRM_ESCR1: 10, 11
428  */
429 
431  /*
432  * MSR_P4_FIRM_ESCR0: 8, 9
433  * MSR_P4_FIRM_ESCR1: 10, 11
434  */
435 
437  /*
438  * MSR_P4_FIRM_ESCR0: 8, 9
439  * MSR_P4_FIRM_ESCR1: 10, 11
440  */
441 
443  /*
444  * MSR_P4_FIRM_ESCR0: 8, 9
445  * MSR_P4_FIRM_ESCR1: 10, 11
446  */
447 
449  /*
450  * MSR_P4_TC_ESCR0: 4, 5
451  * MSR_P4_TC_ESCR1: 6, 7
452  */
453 
455  /*
456  * MSR_P4_FSB_ESCR0: 0, 1
457  * MSR_P4_FSB_ESCR1: 2, 3
458  */
459 
461  /*
462  * MSR_P4_MS_ESCR0: 4, 5
463  * MSR_P4_MS_ESCR1: 6, 7
464  */
465 
467  /*
468  * MSR_P4_MS_ESCR0: 4, 5
469  * MSR_P4_MS_ESCR1: 6, 7
470  */
471 
473  /*
474  * MSR_P4_TBPU_ESCR0: 4, 5
475  * MSR_P4_TBPU_ESCR1: 6, 7
476  */
477 
479  /*
480  * MSR_P4_TBPU_ESCR0: 4, 5
481  * MSR_P4_TBPU_ESCR1: 6, 7
482  */
483 
485  /*
486  * MSR_P4_ALF_ESCR0: 12, 13, 16
487  * MSR_P4_ALF_ESCR1: 14, 15, 17
488  */
489 
491  /*
492  * MSR_P4_DAC_ESCR0: 8, 9
493  * MSR_P4_DAC_ESCR1: 10, 11
494  */
495 
497  /*
498  * MSR_P4_FSB_ESCR0: 0, 1
499  * MSR_P4_FSB_ESCR1: 2, 3
500  */
501 
502  P4_OPCODE(P4_EVENT_BNR) = P4_OPCODE_PACK(0x08, 0x03),
503  /*
504  * MSR_P4_FSB_ESCR0: 0, 1
505  * MSR_P4_FSB_ESCR1: 2, 3
506  */
507 
508  P4_OPCODE(P4_EVENT_SNOOP) = P4_OPCODE_PACK(0x06, 0x03),
509  /*
510  * MSR_P4_FSB_ESCR0: 0, 1
511  * MSR_P4_FSB_ESCR1: 2, 3
512  */
513 
515  /*
516  * MSR_P4_FSB_ESCR0: 0, 1
517  * MSR_P4_FSB_ESCR1: 2, 3
518  */
519 
521  /*
522  * MSR_P4_CRU_ESCR2: 12, 13, 16
523  * MSR_P4_CRU_ESCR3: 14, 15, 17
524  */
525 
527  /*
528  * MSR_P4_CRU_ESCR2: 12, 13, 16
529  * MSR_P4_CRU_ESCR3: 14, 15, 17
530  */
531 
533  /*
534  * MSR_P4_CRU_ESCR2: 12, 13, 16
535  * MSR_P4_CRU_ESCR3: 14, 15, 17
536  */
537 
539  /*
540  * MSR_P4_CRU_ESCR0: 12, 13, 16
541  * MSR_P4_CRU_ESCR1: 14, 15, 17
542  */
543 
545  /*
546  * MSR_P4_CRU_ESCR0: 12, 13, 16
547  * MSR_P4_CRU_ESCR1: 14, 15, 17
548  */
549 
551  /*
552  * MSR_P4_RAT_ESCR0: 12, 13, 16
553  * MSR_P4_RAT_ESCR1: 14, 15, 17
554  */
555 
557  /*
558  * MSR_P4_CRU_ESCR2: 12, 13, 16
559  * MSR_P4_CRU_ESCR3: 14, 15, 17
560  */
561 
563  /*
564  * MSR_P4_CRU_ESCR0: 12, 13, 16
565  * MSR_P4_CRU_ESCR1: 14, 15, 17
566  */
567 
569  /*
570  * MSR_P4_CRU_ESCR2: 12, 13, 16
571  * MSR_P4_CRU_ESCR3: 14, 15, 17
572  */
573 
575  /*
576  * MSR_P4_CRU_ESCR2: 12, 13, 16
577  * MSR_P4_CRU_ESCR3: 14, 15, 17
578  */
579 
581  /*
582  * MSR_P4_CRU_ESCR0: 12, 13, 16
583  * MSR_P4_CRU_ESCR1: 14, 15, 17
584  */
585 };
586 
587 /*
588  * a caller should use P4_ESCR_EMASK_NAME helper to
589  * pick the EventMask needed, for example
590  *
591  * P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)
592  */
601 
603 
607 
610 
613 
615 
617 
622 
625 
635 
647 
659 
666 
672  P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6),
673  P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7),
674  P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8),
675  P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE, 9),
676  P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE, 10),
680 
694 
696 
698 
700 
702 
704 
706 
708 
710 
712 
714 
716 
717  P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0),
718  P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1),
720 
725 
730 
732 
733  P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_EVICTS, 0),
734  P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS, 1),
735 
738 
747 
750 
755 
758 
759  P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGLOADS, 1),
760  P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGSTORES, 2),
761 
766 
768 
774 
778 
781 };
782 
783 /*
784  * Note we have UOP and PEBS bits reserved for now
785  * just in case if we will need them once
786  */
787 #define P4_PEBS_CONFIG_ENABLE (1 << 7)
788 #define P4_PEBS_CONFIG_UOP_TAG (1 << 8)
789 #define P4_PEBS_CONFIG_METRIC_MASK 0x3f
790 #define P4_PEBS_CONFIG_MASK 0xff
791 
792 /*
793  * mem: Only counters MSR_IQ_COUNTER4 (16) and
794  * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
795  */
796 #define P4_PEBS_ENABLE 0x02000000U
797 #define P4_PEBS_ENABLE_UOP_TAG 0x01000000U
798 
799 #define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
800 #define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK)
801 
802 #define p4_config_pebs_has(v, mask) (p4_config_unpack_pebs(v) & (mask))
803 
806 
816 
818 };
819 
820 /*
821  * Notes on internal configuration of ESCR+CCCR tuples
822  *
823  * Since P4 has quite the different architecture of
824  * performance registers in compare with "architectural"
825  * once and we have on 64 bits to keep configuration
826  * of performance event, the following trick is used.
827  *
828  * 1) Since both ESCR and CCCR registers have only low
829  * 32 bits valuable, we pack them into a single 64 bit
830  * configuration. Low 32 bits of such config correspond
831  * to low 32 bits of CCCR register and high 32 bits
832  * correspond to low 32 bits of ESCR register.
833  *
834  * 2) The meaning of every bit of such config field can
835  * be found in Intel SDM but it should be noted that
836  * we "borrow" some reserved bits for own usage and
837  * clean them or set to a proper value when we do
838  * a real write to hardware registers.
839  *
840  * 3) The format of bits of config is the following
841  * and should be either 0 or set to some predefined
842  * values:
843  *
844  * Low 32 bits
845  * -----------
846  * 0-6: P4_PEBS_METRIC enum
847  * 7-11: reserved
848  * 12: reserved (Enable)
849  * 13-15: reserved (ESCR select)
850  * 16-17: Active Thread
851  * 18: Compare
852  * 19: Complement
853  * 20-23: Threshold
854  * 24: Edge
855  * 25: reserved (FORCE_OVF)
856  * 26: reserved (OVF_PMI_T0)
857  * 27: reserved (OVF_PMI_T1)
858  * 28-29: reserved
859  * 30: reserved (Cascade)
860  * 31: reserved (OVF)
861  *
862  * High 32 bits
863  * ------------
864  * 0: reserved (T1_USR)
865  * 1: reserved (T1_OS)
866  * 2: reserved (T0_USR)
867  * 3: reserved (T0_OS)
868  * 4: Tag Enable
869  * 5-8: Tag Value
870  * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper)
871  * 25-30: enum P4_EVENTS
872  * 31: reserved (HT thread)
873  */
874 
875 #endif /* PERF_EVENT_P4_H */
876