Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
nmi_int.c
Go to the documentation of this file.
1 
14 #include <linux/init.h>
15 #include <linux/notifier.h>
16 #include <linux/smp.h>
17 #include <linux/oprofile.h>
18 #include <linux/syscore_ops.h>
19 #include <linux/slab.h>
20 #include <linux/moduleparam.h>
21 #include <linux/kdebug.h>
22 #include <linux/cpu.h>
23 #include <asm/nmi.h>
24 #include <asm/msr.h>
25 #include <asm/apic.h>
26 
27 #include "op_counter.h"
28 #include "op_x86_model.h"
29 
30 static struct op_x86_model_spec *model;
31 static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
32 static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
33 
34 /* must be protected with get_online_cpus()/put_online_cpus(): */
35 static int nmi_enabled;
36 static int ctr_running;
37 
39 
40 /* common functions */
41 
44 {
45  u64 val = 0;
46  u16 event = (u16)counter_config->event;
47 
49  val |= counter_config->user ? ARCH_PERFMON_EVENTSEL_USR : 0;
50  val |= counter_config->kernel ? ARCH_PERFMON_EVENTSEL_OS : 0;
51  val |= (counter_config->unit_mask & 0xFF) << 8;
52  counter_config->extra &= (ARCH_PERFMON_EVENTSEL_INV |
55  val |= counter_config->extra;
56  event &= model->event_mask ? model->event_mask : 0xFF;
57  val |= event & 0xFF;
58  val |= (u64)(event & 0x0F00) << 24;
59 
60  return val;
61 }
62 
63 
64 static int profile_exceptions_notify(unsigned int val, struct pt_regs *regs)
65 {
66  if (ctr_running)
67  model->check_ctrs(regs, &__get_cpu_var(cpu_msrs));
68  else if (!nmi_enabled)
69  return NMI_DONE;
70  else
71  model->stop(&__get_cpu_var(cpu_msrs));
72  return NMI_HANDLED;
73 }
74 
75 static void nmi_cpu_save_registers(struct op_msrs *msrs)
76 {
77  struct op_msr *counters = msrs->counters;
78  struct op_msr *controls = msrs->controls;
79  unsigned int i;
80 
81  for (i = 0; i < model->num_counters; ++i) {
82  if (counters[i].addr)
83  rdmsrl(counters[i].addr, counters[i].saved);
84  }
85 
86  for (i = 0; i < model->num_controls; ++i) {
87  if (controls[i].addr)
88  rdmsrl(controls[i].addr, controls[i].saved);
89  }
90 }
91 
92 static void nmi_cpu_start(void *dummy)
93 {
94  struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
95  if (!msrs->controls)
96  WARN_ON_ONCE(1);
97  else
98  model->start(msrs);
99 }
100 
101 static int nmi_start(void)
102 {
103  get_online_cpus();
104  ctr_running = 1;
105  /* make ctr_running visible to the nmi handler: */
106  smp_mb();
107  on_each_cpu(nmi_cpu_start, NULL, 1);
108  put_online_cpus();
109  return 0;
110 }
111 
112 static void nmi_cpu_stop(void *dummy)
113 {
114  struct op_msrs const *msrs = &__get_cpu_var(cpu_msrs);
115  if (!msrs->controls)
116  WARN_ON_ONCE(1);
117  else
118  model->stop(msrs);
119 }
120 
121 static void nmi_stop(void)
122 {
123  get_online_cpus();
124  on_each_cpu(nmi_cpu_stop, NULL, 1);
125  ctr_running = 0;
126  put_online_cpus();
127 }
128 
129 #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
130 
131 static DEFINE_PER_CPU(int, switch_index);
132 
133 static inline int has_mux(void)
134 {
135  return !!model->switch_ctrl;
136 }
137 
138 inline int op_x86_phys_to_virt(int phys)
139 {
140  return __this_cpu_read(switch_index) + phys;
141 }
142 
143 inline int op_x86_virt_to_phys(int virt)
144 {
145  return virt % model->num_counters;
146 }
147 
148 static void nmi_shutdown_mux(void)
149 {
150  int i;
151 
152  if (!has_mux())
153  return;
154 
156  kfree(per_cpu(cpu_msrs, i).multiplex);
157  per_cpu(cpu_msrs, i).multiplex = NULL;
158  per_cpu(switch_index, i) = 0;
159  }
160 }
161 
162 static int nmi_setup_mux(void)
163 {
164  size_t multiplex_size =
165  sizeof(struct op_msr) * model->num_virt_counters;
166  int i;
167 
168  if (!has_mux())
169  return 1;
170 
172  per_cpu(cpu_msrs, i).multiplex =
173  kzalloc(multiplex_size, GFP_KERNEL);
174  if (!per_cpu(cpu_msrs, i).multiplex)
175  return 0;
176  }
177 
178  return 1;
179 }
180 
181 static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs)
182 {
183  int i;
184  struct op_msr *multiplex = msrs->multiplex;
185 
186  if (!has_mux())
187  return;
188 
189  for (i = 0; i < model->num_virt_counters; ++i) {
190  if (counter_config[i].enabled) {
191  multiplex[i].saved = -(u64)counter_config[i].count;
192  } else {
193  multiplex[i].saved = 0;
194  }
195  }
196 
197  per_cpu(switch_index, cpu) = 0;
198 }
199 
200 static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
201 {
202  struct op_msr *counters = msrs->counters;
203  struct op_msr *multiplex = msrs->multiplex;
204  int i;
205 
206  for (i = 0; i < model->num_counters; ++i) {
207  int virt = op_x86_phys_to_virt(i);
208  if (counters[i].addr)
209  rdmsrl(counters[i].addr, multiplex[virt].saved);
210  }
211 }
212 
213 static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
214 {
215  struct op_msr *counters = msrs->counters;
216  struct op_msr *multiplex = msrs->multiplex;
217  int i;
218 
219  for (i = 0; i < model->num_counters; ++i) {
220  int virt = op_x86_phys_to_virt(i);
221  if (counters[i].addr)
222  wrmsrl(counters[i].addr, multiplex[virt].saved);
223  }
224 }
225 
226 static void nmi_cpu_switch(void *dummy)
227 {
228  int cpu = smp_processor_id();
229  int si = per_cpu(switch_index, cpu);
230  struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
231 
232  nmi_cpu_stop(NULL);
233  nmi_cpu_save_mpx_registers(msrs);
234 
235  /* move to next set */
236  si += model->num_counters;
237  if ((si >= model->num_virt_counters) || (counter_config[si].count == 0))
238  per_cpu(switch_index, cpu) = 0;
239  else
240  per_cpu(switch_index, cpu) = si;
241 
242  model->switch_ctrl(model, msrs);
243  nmi_cpu_restore_mpx_registers(msrs);
244 
245  nmi_cpu_start(NULL);
246 }
247 
248 
249 /*
250  * Quick check to see if multiplexing is necessary.
251  * The check should be sufficient since counters are used
252  * in ordre.
253  */
254 static int nmi_multiplex_on(void)
255 {
256  return counter_config[model->num_counters].count ? 0 : -EINVAL;
257 }
258 
259 static int nmi_switch_event(void)
260 {
261  if (!has_mux())
262  return -ENOSYS; /* not implemented */
263  if (nmi_multiplex_on() < 0)
264  return -EINVAL; /* not necessary */
265 
266  get_online_cpus();
267  if (ctr_running)
268  on_each_cpu(nmi_cpu_switch, NULL, 1);
269  put_online_cpus();
270 
271  return 0;
272 }
273 
274 static inline void mux_init(struct oprofile_operations *ops)
275 {
276  if (has_mux())
277  ops->switch_events = nmi_switch_event;
278 }
279 
280 static void mux_clone(int cpu)
281 {
282  if (!has_mux())
283  return;
284 
285  memcpy(per_cpu(cpu_msrs, cpu).multiplex,
286  per_cpu(cpu_msrs, 0).multiplex,
287  sizeof(struct op_msr) * model->num_virt_counters);
288 }
289 
290 #else
291 
292 inline int op_x86_phys_to_virt(int phys) { return phys; }
293 inline int op_x86_virt_to_phys(int virt) { return virt; }
294 static inline void nmi_shutdown_mux(void) { }
295 static inline int nmi_setup_mux(void) { return 1; }
296 static inline void
297 nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) { }
298 static inline void mux_init(struct oprofile_operations *ops) { }
299 static void mux_clone(int cpu) { }
300 
301 #endif
302 
303 static void free_msrs(void)
304 {
305  int i;
307  kfree(per_cpu(cpu_msrs, i).counters);
308  per_cpu(cpu_msrs, i).counters = NULL;
309  kfree(per_cpu(cpu_msrs, i).controls);
310  per_cpu(cpu_msrs, i).controls = NULL;
311  }
312  nmi_shutdown_mux();
313 }
314 
315 static int allocate_msrs(void)
316 {
317  size_t controls_size = sizeof(struct op_msr) * model->num_controls;
318  size_t counters_size = sizeof(struct op_msr) * model->num_counters;
319 
320  int i;
322  per_cpu(cpu_msrs, i).counters = kzalloc(counters_size,
323  GFP_KERNEL);
324  if (!per_cpu(cpu_msrs, i).counters)
325  goto fail;
326  per_cpu(cpu_msrs, i).controls = kzalloc(controls_size,
327  GFP_KERNEL);
328  if (!per_cpu(cpu_msrs, i).controls)
329  goto fail;
330  }
331 
332  if (!nmi_setup_mux())
333  goto fail;
334 
335  return 1;
336 
337 fail:
338  free_msrs();
339  return 0;
340 }
341 
342 static void nmi_cpu_setup(void *dummy)
343 {
344  int cpu = smp_processor_id();
345  struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
346  nmi_cpu_save_registers(msrs);
347  raw_spin_lock(&oprofilefs_lock);
348  model->setup_ctrs(model, msrs);
349  nmi_cpu_setup_mux(cpu, msrs);
350  raw_spin_unlock(&oprofilefs_lock);
351  per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
352  apic_write(APIC_LVTPC, APIC_DM_NMI);
353 }
354 
355 static void nmi_cpu_restore_registers(struct op_msrs *msrs)
356 {
357  struct op_msr *counters = msrs->counters;
358  struct op_msr *controls = msrs->controls;
359  unsigned int i;
360 
361  for (i = 0; i < model->num_controls; ++i) {
362  if (controls[i].addr)
363  wrmsrl(controls[i].addr, controls[i].saved);
364  }
365 
366  for (i = 0; i < model->num_counters; ++i) {
367  if (counters[i].addr)
368  wrmsrl(counters[i].addr, counters[i].saved);
369  }
370 }
371 
372 static void nmi_cpu_shutdown(void *dummy)
373 {
374  unsigned int v;
375  int cpu = smp_processor_id();
376  struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
377 
378  /* restoring APIC_LVTPC can trigger an apic error because the delivery
379  * mode and vector nr combination can be illegal. That's by design: on
380  * power on apic lvt contain a zero vector nr which are legal only for
381  * NMI delivery mode. So inhibit apic err before restoring lvtpc
382  */
383  v = apic_read(APIC_LVTERR);
384  apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
385  apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
386  apic_write(APIC_LVTERR, v);
387  nmi_cpu_restore_registers(msrs);
388 }
389 
390 static void nmi_cpu_up(void *dummy)
391 {
392  if (nmi_enabled)
393  nmi_cpu_setup(dummy);
394  if (ctr_running)
395  nmi_cpu_start(dummy);
396 }
397 
398 static void nmi_cpu_down(void *dummy)
399 {
400  if (ctr_running)
401  nmi_cpu_stop(dummy);
402  if (nmi_enabled)
403  nmi_cpu_shutdown(dummy);
404 }
405 
406 static int nmi_create_files(struct super_block *sb, struct dentry *root)
407 {
408  unsigned int i;
409 
410  for (i = 0; i < model->num_virt_counters; ++i) {
411  struct dentry *dir;
412  char buf[4];
413 
414  /* quick little hack to _not_ expose a counter if it is not
415  * available for use. This should protect userspace app.
416  * NOTE: assumes 1:1 mapping here (that counters are organized
417  * sequentially in their struct assignment).
418  */
420  continue;
421 
422  snprintf(buf, sizeof(buf), "%d", i);
423  dir = oprofilefs_mkdir(sb, root, buf);
424  oprofilefs_create_ulong(sb, dir, "enabled", &counter_config[i].enabled);
425  oprofilefs_create_ulong(sb, dir, "event", &counter_config[i].event);
426  oprofilefs_create_ulong(sb, dir, "count", &counter_config[i].count);
427  oprofilefs_create_ulong(sb, dir, "unit_mask", &counter_config[i].unit_mask);
428  oprofilefs_create_ulong(sb, dir, "kernel", &counter_config[i].kernel);
429  oprofilefs_create_ulong(sb, dir, "user", &counter_config[i].user);
430  oprofilefs_create_ulong(sb, dir, "extra", &counter_config[i].extra);
431  }
432 
433  return 0;
434 }
435 
436 static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
437  void *data)
438 {
439  int cpu = (unsigned long)data;
440  switch (action) {
441  case CPU_DOWN_FAILED:
442  case CPU_ONLINE:
443  smp_call_function_single(cpu, nmi_cpu_up, NULL, 0);
444  break;
445  case CPU_DOWN_PREPARE:
446  smp_call_function_single(cpu, nmi_cpu_down, NULL, 1);
447  break;
448  }
449  return NOTIFY_DONE;
450 }
451 
452 static struct notifier_block oprofile_cpu_nb = {
453  .notifier_call = oprofile_cpu_notifier
454 };
455 
456 static int nmi_setup(void)
457 {
458  int err = 0;
459  int cpu;
460 
461  if (!allocate_msrs())
462  return -ENOMEM;
463 
464  /* We need to serialize save and setup for HT because the subset
465  * of msrs are distinct for save and setup operations
466  */
467 
468  /* Assume saved/restored counters are the same on all CPUs */
469  err = model->fill_in_addresses(&per_cpu(cpu_msrs, 0));
470  if (err)
471  goto fail;
472 
473  for_each_possible_cpu(cpu) {
474  if (!cpu)
475  continue;
476 
477  memcpy(per_cpu(cpu_msrs, cpu).counters,
478  per_cpu(cpu_msrs, 0).counters,
479  sizeof(struct op_msr) * model->num_counters);
480 
481  memcpy(per_cpu(cpu_msrs, cpu).controls,
482  per_cpu(cpu_msrs, 0).controls,
483  sizeof(struct op_msr) * model->num_controls);
484 
485  mux_clone(cpu);
486  }
487 
488  nmi_enabled = 0;
489  ctr_running = 0;
490  /* make variables visible to the nmi handler: */
491  smp_mb();
492  err = register_nmi_handler(NMI_LOCAL, profile_exceptions_notify,
493  0, "oprofile");
494  if (err)
495  goto fail;
496 
497  get_online_cpus();
498  register_cpu_notifier(&oprofile_cpu_nb);
499  nmi_enabled = 1;
500  /* make nmi_enabled visible to the nmi handler: */
501  smp_mb();
502  on_each_cpu(nmi_cpu_setup, NULL, 1);
503  put_online_cpus();
504 
505  return 0;
506 fail:
507  free_msrs();
508  return err;
509 }
510 
511 static void nmi_shutdown(void)
512 {
513  struct op_msrs *msrs;
514 
515  get_online_cpus();
516  unregister_cpu_notifier(&oprofile_cpu_nb);
517  on_each_cpu(nmi_cpu_shutdown, NULL, 1);
518  nmi_enabled = 0;
519  ctr_running = 0;
520  put_online_cpus();
521  /* make variables visible to the nmi handler: */
522  smp_mb();
523  unregister_nmi_handler(NMI_LOCAL, "oprofile");
524  msrs = &get_cpu_var(cpu_msrs);
525  model->shutdown(msrs);
526  free_msrs();
527  put_cpu_var(cpu_msrs);
528 }
529 
530 #ifdef CONFIG_PM
531 
532 static int nmi_suspend(void)
533 {
534  /* Only one CPU left, just stop that one */
535  if (nmi_enabled == 1)
536  nmi_cpu_stop(NULL);
537  return 0;
538 }
539 
540 static void nmi_resume(void)
541 {
542  if (nmi_enabled == 1)
543  nmi_cpu_start(NULL);
544 }
545 
546 static struct syscore_ops oprofile_syscore_ops = {
547  .resume = nmi_resume,
548  .suspend = nmi_suspend,
549 };
550 
551 static void __init init_suspend_resume(void)
552 {
553  register_syscore_ops(&oprofile_syscore_ops);
554 }
555 
556 static void exit_suspend_resume(void)
557 {
558  unregister_syscore_ops(&oprofile_syscore_ops);
559 }
560 
561 #else
562 
563 static inline void init_suspend_resume(void) { }
564 static inline void exit_suspend_resume(void) { }
565 
566 #endif /* CONFIG_PM */
567 
568 static int __init p4_init(char **cpu_type)
569 {
570  __u8 cpu_model = boot_cpu_data.x86_model;
571 
572  if (cpu_model > 6 || cpu_model == 5)
573  return 0;
574 
575 #ifndef CONFIG_SMP
576  *cpu_type = "i386/p4";
577  model = &op_p4_spec;
578  return 1;
579 #else
580  switch (smp_num_siblings) {
581  case 1:
582  *cpu_type = "i386/p4";
583  model = &op_p4_spec;
584  return 1;
585 
586  case 2:
587  *cpu_type = "i386/p4-ht";
588  model = &op_p4_ht2_spec;
589  return 1;
590  }
591 #endif
592 
593  printk(KERN_INFO "oprofile: P4 HyperThreading detected with > 2 threads\n");
594  printk(KERN_INFO "oprofile: Reverting to timer mode.\n");
595  return 0;
596 }
597 
599  reserved = 0, /* do not force */
602 };
603 
604 static int force_cpu_type;
605 
606 static int set_cpu_type(const char *str, struct kernel_param *kp)
607 {
608  if (!strcmp(str, "timer")) {
609  force_cpu_type = timer;
610  printk(KERN_INFO "oprofile: forcing NMI timer mode\n");
611  } else if (!strcmp(str, "arch_perfmon")) {
612  force_cpu_type = arch_perfmon;
613  printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
614  } else {
615  force_cpu_type = 0;
616  }
617 
618  return 0;
619 }
620 module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
621 
622 static int __init ppro_init(char **cpu_type)
623 {
624  __u8 cpu_model = boot_cpu_data.x86_model;
625  struct op_x86_model_spec *spec = &op_ppro_spec; /* default */
626 
627  if (force_cpu_type == arch_perfmon && cpu_has_arch_perfmon)
628  return 0;
629 
630  /*
631  * Documentation on identifying Intel processors by CPU family
632  * and model can be found in the Intel Software Developer's
633  * Manuals (SDM):
634  *
635  * http://www.intel.com/products/processor/manuals/
636  *
637  * As of May 2010 the documentation for this was in the:
638  * "Intel 64 and IA-32 Architectures Software Developer's
639  * Manual Volume 3B: System Programming Guide", "Table B-1
640  * CPUID Signature Values of DisplayFamily_DisplayModel".
641  */
642  switch (cpu_model) {
643  case 0 ... 2:
644  *cpu_type = "i386/ppro";
645  break;
646  case 3 ... 5:
647  *cpu_type = "i386/pii";
648  break;
649  case 6 ... 8:
650  case 10 ... 11:
651  *cpu_type = "i386/piii";
652  break;
653  case 9:
654  case 13:
655  *cpu_type = "i386/p6_mobile";
656  break;
657  case 14:
658  *cpu_type = "i386/core";
659  break;
660  case 0x0f:
661  case 0x16:
662  case 0x17:
663  case 0x1d:
664  *cpu_type = "i386/core_2";
665  break;
666  case 0x1a:
667  case 0x1e:
668  case 0x2e:
669  spec = &op_arch_perfmon_spec;
670  *cpu_type = "i386/core_i7";
671  break;
672  case 0x1c:
673  *cpu_type = "i386/atom";
674  break;
675  default:
676  /* Unknown */
677  return 0;
678  }
679 
680  model = spec;
681  return 1;
682 }
683 
685 {
686  __u8 vendor = boot_cpu_data.x86_vendor;
687  __u8 family = boot_cpu_data.x86;
688  char *cpu_type = NULL;
689  int ret = 0;
690 
691  if (!cpu_has_apic)
692  return -ENODEV;
693 
694  if (force_cpu_type == timer)
695  return -ENODEV;
696 
697  switch (vendor) {
698  case X86_VENDOR_AMD:
699  /* Needs to be at least an Athlon (or hammer in 32bit mode) */
700 
701  switch (family) {
702  case 6:
703  cpu_type = "i386/athlon";
704  break;
705  case 0xf:
706  /*
707  * Actually it could be i386/hammer too, but
708  * give user space an consistent name.
709  */
710  cpu_type = "x86-64/hammer";
711  break;
712  case 0x10:
713  cpu_type = "x86-64/family10";
714  break;
715  case 0x11:
716  cpu_type = "x86-64/family11h";
717  break;
718  case 0x12:
719  cpu_type = "x86-64/family12h";
720  break;
721  case 0x14:
722  cpu_type = "x86-64/family14h";
723  break;
724  case 0x15:
725  cpu_type = "x86-64/family15h";
726  break;
727  default:
728  return -ENODEV;
729  }
730  model = &op_amd_spec;
731  break;
732 
733  case X86_VENDOR_INTEL:
734  switch (family) {
735  /* Pentium IV */
736  case 0xf:
737  p4_init(&cpu_type);
738  break;
739 
740  /* A P6-class processor */
741  case 6:
742  ppro_init(&cpu_type);
743  break;
744 
745  default:
746  break;
747  }
748 
749  if (cpu_type)
750  break;
751 
752  if (!cpu_has_arch_perfmon)
753  return -ENODEV;
754 
755  /* use arch perfmon as fallback */
756  cpu_type = "i386/arch_perfmon";
757  model = &op_arch_perfmon_spec;
758  break;
759 
760  default:
761  return -ENODEV;
762  }
763 
764  /* default values, can be overwritten by model */
765  ops->create_files = nmi_create_files;
766  ops->setup = nmi_setup;
767  ops->shutdown = nmi_shutdown;
768  ops->start = nmi_start;
769  ops->stop = nmi_stop;
770  ops->cpu_type = cpu_type;
771 
772  if (model->init)
773  ret = model->init(ops);
774  if (ret)
775  return ret;
776 
777  if (!model->num_virt_counters)
778  model->num_virt_counters = model->num_counters;
779 
780  mux_init(ops);
781 
782  init_suspend_resume();
783 
784  printk(KERN_INFO "oprofile: using NMI interrupt.\n");
785  return 0;
786 }
787 
788 void op_nmi_exit(void)
789 {
790  exit_suspend_resume();
791 }