Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
therm_throt.c
Go to the documentation of this file.
1 /*
2  * Thermal throttle event support code (such as syslog messaging and rate
3  * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
4  *
5  * This allows consistent reporting of CPU thermal throttle events.
6  *
7  * Maintains a counter in /sys that keeps track of the number of thermal
8  * events, such that the user knows how bad the thermal problem might be
9  * (since the logging to syslog and mcelog is rate limited).
10  *
11  * Author: Dmitriy Zavin ([email protected])
12  *
13  * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
14  * Inspired by Ross Biro's and Al Borchers' counter code.
15  */
16 #include <linux/interrupt.h>
17 #include <linux/notifier.h>
18 #include <linux/jiffies.h>
19 #include <linux/kernel.h>
20 #include <linux/percpu.h>
21 #include <linux/export.h>
22 #include <linux/types.h>
23 #include <linux/init.h>
24 #include <linux/smp.h>
25 #include <linux/cpu.h>
26 
27 #include <asm/processor.h>
28 #include <asm/apic.h>
29 #include <asm/idle.h>
30 #include <asm/mce.h>
31 #include <asm/msr.h>
32 
33 /* How long to wait between reporting thermal events */
34 #define CHECK_INTERVAL (300 * HZ)
35 
36 #define THERMAL_THROTTLING_EVENT 0
37 #define POWER_LIMIT_EVENT 1
38 
39 /*
40  * Current thermal event state:
41  */
43  bool new_event;
44  int event;
46  unsigned long count;
47  unsigned long last_count;
48 };
49 
50 struct thermal_state {
57 };
58 
59 /* Callback to handle core threshold interrupts */
62 
64 
65 static atomic_t therm_throt_en = ATOMIC_INIT(0);
66 
67 static u32 lvtthmr_init __read_mostly;
68 
69 #ifdef CONFIG_SYSFS
70 #define define_therm_throt_device_one_ro(_name) \
71  static DEVICE_ATTR(_name, 0444, \
72  therm_throt_device_show_##_name, \
73  NULL) \
74 
75 #define define_therm_throt_device_show_func(event, name) \
76  \
77 static ssize_t therm_throt_device_show_##event##_##name( \
78  struct device *dev, \
79  struct device_attribute *attr, \
80  char *buf) \
81 { \
82  unsigned int cpu = dev->id; \
83  ssize_t ret; \
84  \
85  preempt_disable(); /* CPU hotplug */ \
86  if (cpu_online(cpu)) { \
87  ret = sprintf(buf, "%lu\n", \
88  per_cpu(thermal_state, cpu).event.name); \
89  } else \
90  ret = 0; \
91  preempt_enable(); \
92  \
93  return ret; \
94 }
95 
96 define_therm_throt_device_show_func(core_throttle, count);
97 define_therm_throt_device_one_ro(core_throttle_count);
98 
99 define_therm_throt_device_show_func(core_power_limit, count);
100 define_therm_throt_device_one_ro(core_power_limit_count);
101 
102 define_therm_throt_device_show_func(package_throttle, count);
103 define_therm_throt_device_one_ro(package_throttle_count);
104 
105 define_therm_throt_device_show_func(package_power_limit, count);
106 define_therm_throt_device_one_ro(package_power_limit_count);
107 
108 static struct attribute *thermal_throttle_attrs[] = {
109  &dev_attr_core_throttle_count.attr,
110  NULL
111 };
112 
113 static struct attribute_group thermal_attr_group = {
114  .attrs = thermal_throttle_attrs,
115  .name = "thermal_throttle"
116 };
117 #endif /* CONFIG_SYSFS */
118 
119 #define CORE_LEVEL 0
120 #define PACKAGE_LEVEL 1
121 
122 /***
123  * therm_throt_process - Process thermal throttling event from interrupt
124  * @curr: Whether the condition is current or not (boolean), since the
125  * thermal interrupt normally gets called both when the thermal
126  * event begins and once the event has ended.
127  *
128  * This function is called by the thermal interrupt after the
129  * IRQ has been acknowledged.
130  *
131  * It will take care of rate limiting and printing messages to the syslog.
132  *
133  * Returns: 0 : Event should NOT be further logged, i.e. still in
134  * "timeout" from previous log message.
135  * 1 : Event should be logged further, and a message has been
136  * printed to the syslog.
137  */
138 static int therm_throt_process(bool new_event, int event, int level)
139 {
140  struct _thermal_state *state;
141  unsigned int this_cpu = smp_processor_id();
142  bool old_event;
143  u64 now;
144  struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
145 
146  now = get_jiffies_64();
147  if (level == CORE_LEVEL) {
148  if (event == THERMAL_THROTTLING_EVENT)
149  state = &pstate->core_throttle;
150  else if (event == POWER_LIMIT_EVENT)
151  state = &pstate->core_power_limit;
152  else
153  return 0;
154  } else if (level == PACKAGE_LEVEL) {
155  if (event == THERMAL_THROTTLING_EVENT)
156  state = &pstate->package_throttle;
157  else if (event == POWER_LIMIT_EVENT)
158  state = &pstate->package_power_limit;
159  else
160  return 0;
161  } else
162  return 0;
163 
164  old_event = state->new_event;
165  state->new_event = new_event;
166 
167  if (new_event)
168  state->count++;
169 
170  if (time_before64(now, state->next_check) &&
171  state->count != state->last_count)
172  return 0;
173 
174  state->next_check = now + CHECK_INTERVAL;
175  state->last_count = state->count;
176 
177  /* if we just entered the thermal event */
178  if (new_event) {
179  if (event == THERMAL_THROTTLING_EVENT)
180  printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n",
181  this_cpu,
182  level == CORE_LEVEL ? "Core" : "Package",
183  state->count);
184  else
185  printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n",
186  this_cpu,
187  level == CORE_LEVEL ? "Core" : "Package",
188  state->count);
189  return 1;
190  }
191  if (old_event) {
192  if (event == THERMAL_THROTTLING_EVENT)
193  printk(KERN_INFO "CPU%d: %s temperature/speed normal\n",
194  this_cpu,
195  level == CORE_LEVEL ? "Core" : "Package");
196  else
197  printk(KERN_INFO "CPU%d: %s power limit normal\n",
198  this_cpu,
199  level == CORE_LEVEL ? "Core" : "Package");
200  return 1;
201  }
202 
203  return 0;
204 }
205 
206 static int thresh_event_valid(int event)
207 {
208  struct _thermal_state *state;
209  unsigned int this_cpu = smp_processor_id();
210  struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
211  u64 now = get_jiffies_64();
212 
213  state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
214 
215  if (time_before64(now, state->next_check))
216  return 0;
217 
218  state->next_check = now + CHECK_INTERVAL;
219  return 1;
220 }
221 
222 #ifdef CONFIG_SYSFS
223 /* Add/Remove thermal_throttle interface for CPU device: */
224 static __cpuinit int thermal_throttle_add_dev(struct device *dev,
225  unsigned int cpu)
226 {
227  int err;
228  struct cpuinfo_x86 *c = &cpu_data(cpu);
229 
230  err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
231  if (err)
232  return err;
233 
234  if (cpu_has(c, X86_FEATURE_PLN))
235  err = sysfs_add_file_to_group(&dev->kobj,
236  &dev_attr_core_power_limit_count.attr,
237  thermal_attr_group.name);
238  if (cpu_has(c, X86_FEATURE_PTS)) {
239  err = sysfs_add_file_to_group(&dev->kobj,
240  &dev_attr_package_throttle_count.attr,
241  thermal_attr_group.name);
242  if (cpu_has(c, X86_FEATURE_PLN))
243  err = sysfs_add_file_to_group(&dev->kobj,
244  &dev_attr_package_power_limit_count.attr,
245  thermal_attr_group.name);
246  }
247 
248  return err;
249 }
250 
251 static __cpuinit void thermal_throttle_remove_dev(struct device *dev)
252 {
253  sysfs_remove_group(&dev->kobj, &thermal_attr_group);
254 }
255 
256 /* Mutex protecting device creation against CPU hotplug: */
257 static DEFINE_MUTEX(therm_cpu_lock);
258 
259 /* Get notified when a cpu comes on/off. Be hotplug friendly. */
260 static __cpuinit int
261 thermal_throttle_cpu_callback(struct notifier_block *nfb,
262  unsigned long action,
263  void *hcpu)
264 {
265  unsigned int cpu = (unsigned long)hcpu;
266  struct device *dev;
267  int err = 0;
268 
269  dev = get_cpu_device(cpu);
270 
271  switch (action) {
272  case CPU_UP_PREPARE:
274  mutex_lock(&therm_cpu_lock);
275  err = thermal_throttle_add_dev(dev, cpu);
276  mutex_unlock(&therm_cpu_lock);
277  WARN_ON(err);
278  break;
279  case CPU_UP_CANCELED:
281  case CPU_DEAD:
282  case CPU_DEAD_FROZEN:
283  mutex_lock(&therm_cpu_lock);
284  thermal_throttle_remove_dev(dev);
285  mutex_unlock(&therm_cpu_lock);
286  break;
287  }
288  return notifier_from_errno(err);
289 }
290 
291 static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata =
292 {
293  .notifier_call = thermal_throttle_cpu_callback,
294 };
295 
296 static __init int thermal_throttle_init_device(void)
297 {
298  unsigned int cpu = 0;
299  int err;
300 
301  if (!atomic_read(&therm_throt_en))
302  return 0;
303 
304  register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
305 
306 #ifdef CONFIG_HOTPLUG_CPU
307  mutex_lock(&therm_cpu_lock);
308 #endif
309  /* connect live CPUs to sysfs */
310  for_each_online_cpu(cpu) {
311  err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu);
312  WARN_ON(err);
313  }
314 #ifdef CONFIG_HOTPLUG_CPU
315  mutex_unlock(&therm_cpu_lock);
316 #endif
317 
318  return 0;
319 }
320 device_initcall(thermal_throttle_init_device);
321 
322 #endif /* CONFIG_SYSFS */
323 
324 static void notify_thresholds(__u64 msr_val)
325 {
326  /* check whether the interrupt handler is defined;
327  * otherwise simply return
328  */
330  return;
331 
332  /* lower threshold reached */
333  if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
334  platform_thermal_notify(msr_val);
335  /* higher threshold reached */
336  if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
337  platform_thermal_notify(msr_val);
338 }
339 
340 /* Thermal transition interrupt handler */
341 static void intel_thermal_interrupt(void)
342 {
343  __u64 msr_val;
344 
345  rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
346 
347  /* Check for violation of core thermal thresholds*/
348  notify_thresholds(msr_val);
349 
350  if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
352  CORE_LEVEL) != 0)
353  mce_log_therm_throt_event(msr_val);
354 
355  if (this_cpu_has(X86_FEATURE_PLN))
356  therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
358  CORE_LEVEL);
359 
360  if (this_cpu_has(X86_FEATURE_PTS)) {
361  rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
362  therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
364  PACKAGE_LEVEL);
365  if (this_cpu_has(X86_FEATURE_PLN))
366  therm_throt_process(msr_val &
369  PACKAGE_LEVEL);
370  }
371 }
372 
373 static void unexpected_thermal_interrupt(void)
374 {
375  printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
376  smp_processor_id());
377 }
378 
379 static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
380 
382 {
383  irq_enter();
384  exit_idle();
385  inc_irq_stat(irq_thermal_count);
386  smp_thermal_vector();
387  irq_exit();
388  /* Ack only at the end to avoid potential reentry */
389  ack_APIC_irq();
390 }
391 
392 /* Thermal monitoring depends on APIC, ACPI and clock modulation */
393 static int intel_thermal_supported(struct cpuinfo_x86 *c)
394 {
395  if (!cpu_has_apic)
396  return 0;
397  if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
398  return 0;
399  return 1;
400 }
401 
403 {
404  /*
405  * This function is only called on boot CPU. Save the init thermal
406  * LVT value on BSP and use that value to restore APs' thermal LVT
407  * entry BIOS programmed later
408  */
409  if (intel_thermal_supported(&boot_cpu_data))
410  lvtthmr_init = apic_read(APIC_LVTTHMR);
411 }
412 
414 {
415  unsigned int cpu = smp_processor_id();
416  int tm2 = 0;
417  u32 l, h;
418 
419  if (!intel_thermal_supported(c))
420  return;
421 
422  /*
423  * First check if its enabled already, in which case there might
424  * be some SMM goo which handles it, so we can't even put a handler
425  * since it might be delivered via SMI already:
426  */
428 
429  h = lvtthmr_init;
430  /*
431  * The initial value of thermal LVT entries on all APs always reads
432  * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
433  * sequence to them and LVT registers are reset to 0s except for
434  * the mask bits which are set to 1s when APs receive INIT IPI.
435  * If BIOS takes over the thermal interrupt and sets its interrupt
436  * delivery mode to SMI (not fixed), it restores the value that the
437  * BIOS has programmed on AP based on BSP's info we saved since BIOS
438  * is always setting the same value for all threads/cores.
439  */
440  if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
441  apic_write(APIC_LVTTHMR, lvtthmr_init);
442 
443 
444  if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
446  "CPU%d: Thermal monitoring handled by SMI\n", cpu);
447  return;
448  }
449 
450  /* Check whether a vector already exists */
451  if (h & APIC_VECTOR_MASK) {
453  "CPU%d: Thermal LVT vector (%#x) already installed\n",
454  cpu, (h & APIC_VECTOR_MASK));
455  return;
456  }
457 
458  /* early Pentium M models use different method for enabling TM2 */
459  if (cpu_has(c, X86_FEATURE_TM2)) {
460  if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
461  rdmsr(MSR_THERM2_CTL, l, h);
462  if (l & MSR_THERM2_CTL_TM_SELECT)
463  tm2 = 1;
464  } else if (l & MSR_IA32_MISC_ENABLE_TM2)
465  tm2 = 1;
466  }
467 
468  /* We'll mask the thermal vector in the lapic till we're ready: */
470  apic_write(APIC_LVTTHMR, h);
471 
473  if (cpu_has(c, X86_FEATURE_PLN))
477  else
480 
481  if (cpu_has(c, X86_FEATURE_PTS)) {
483  if (cpu_has(c, X86_FEATURE_PLN))
488  else
492  }
493 
494  smp_thermal_vector = intel_thermal_interrupt;
495 
497  wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
498 
499  /* Unmask the thermal vector: */
500  l = apic_read(APIC_LVTTHMR);
501  apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
502 
503  printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n",
504  tm2 ? "TM2" : "TM1");
505 
506  /* enable thermal throttle processing */
507  atomic_set(&therm_throt_en, 1);
508 }