Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
bbc_envctrl.c
Go to the documentation of this file.
1 /* bbc_envctrl.c: UltraSPARC-III environment control driver.
2  *
3  * Copyright (C) 2001, 2008 David S. Miller ([email protected])
4  */
5 
6 #include <linux/kthread.h>
7 #include <linux/delay.h>
8 #include <linux/kmod.h>
9 #include <linux/reboot.h>
10 #include <linux/of.h>
11 #include <linux/slab.h>
12 #include <linux/of_device.h>
13 #include <asm/oplib.h>
14 
15 #include "bbc_i2c.h"
16 #include "max1617.h"
17 
18 #undef ENVCTRL_TRACE
19 
20 /* WARNING: Making changes to this driver is very dangerous.
21  * If you misprogram the sensor chips they can
22  * cut the power on you instantly.
23  */
24 
25 /* Two temperature sensors exist in the SunBLADE-1000 enclosure.
26  * Both are implemented using max1617 i2c devices. Each max1617
27  * monitors 2 temperatures, one for one of the cpu dies and the other
28  * for the ambient temperature.
29  *
30  * The max1617 is capable of being programmed with power-off
31  * temperature values, one low limit and one high limit. These
32  * can be controlled independently for the cpu or ambient temperature.
33  * If a limit is violated, the power is simply shut off. The frequency
34  * with which the max1617 does temperature sampling can be controlled
35  * as well.
36  *
37  * Three fans exist inside the machine, all three are controlled with
38  * an i2c digital to analog converter. There is a fan directed at the
39  * two processor slots, another for the rest of the enclosure, and the
40  * third is for the power supply. The first two fans may be speed
41  * controlled by changing the voltage fed to them. The third fan may
42  * only be completely off or on. The third fan is meant to only be
43  * disabled/enabled when entering/exiting the lowest power-saving
44  * mode of the machine.
45  *
46  * An environmental control kernel thread periodically monitors all
47  * temperature sensors. Based upon the samples it will adjust the
48  * fan speeds to try and keep the system within a certain temperature
49  * range (the goal being to make the fans as quiet as possible without
50  * allowing the system to get too hot).
51  *
52  * If the temperature begins to rise/fall outside of the acceptable
53  * operating range, a periodic warning will be sent to the kernel log.
54  * The fans will be put on full blast to attempt to deal with this
55  * situation. After exceeding the acceptable operating range by a
56  * certain threshold, the kernel thread will shut down the system.
57  * Here, the thread is attempting to shut the machine down cleanly
58  * before the hardware based power-off event is triggered.
59  */
60 
61 /* These settings are in Celsius. We use these defaults only
62  * if we cannot interrogate the cpu-fru SEEPROM.
63  */
64 struct temp_limits {
67 };
68 
69 static struct temp_limits cpu_temp_limits[2] = {
70  { 100, 85, 80, 5, -5, -10 },
71  { 100, 85, 80, 5, -5, -10 },
72 };
73 
74 static struct temp_limits amb_temp_limits[2] = {
75  { 65, 55, 40, 5, -5, -10 },
76  { 65, 55, 40, 5, -5, -10 },
77 };
78 
79 static LIST_HEAD(all_temps);
80 static LIST_HEAD(all_fans);
81 
82 #define CPU_FAN_REG 0xf0
83 #define SYS_FAN_REG 0xf2
84 #define PSUPPLY_FAN_REG 0xf4
85 
86 #define FAN_SPEED_MIN 0x0c
87 #define FAN_SPEED_MAX 0x3f
88 
89 #define PSUPPLY_FAN_ON 0x1f
90 #define PSUPPLY_FAN_OFF 0x00
91 
92 static void set_fan_speeds(struct bbc_fan_control *fp)
93 {
94  /* Put temperatures into range so we don't mis-program
95  * the hardware.
96  */
97  if (fp->cpu_fan_speed < FAN_SPEED_MIN)
99  if (fp->cpu_fan_speed > FAN_SPEED_MAX)
105 #ifdef ENVCTRL_TRACE
106  printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n",
107  fp->index,
109 #endif
110 
114  (fp->psupply_fan_on ?
117 }
118 
119 static void get_current_temps(struct bbc_cpu_temperature *tp)
120 {
121  tp->prev_amb_temp = tp->curr_amb_temp;
122  bbc_i2c_readb(tp->client,
123  (unsigned char *) &tp->curr_amb_temp,
125  tp->prev_cpu_temp = tp->curr_cpu_temp;
126  bbc_i2c_readb(tp->client,
127  (unsigned char *) &tp->curr_cpu_temp,
129 #ifdef ENVCTRL_TRACE
130  printk("temp%d: cpu(%d C) amb(%d C)\n",
131  tp->index,
132  (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp);
133 #endif
134 }
135 
136 
137 static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
138 {
139  static int shutting_down = 0;
140  char *type = "???";
141  s8 val = -1;
142 
143  if (shutting_down != 0)
144  return;
145 
146  if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
147  tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
148  type = "ambient";
149  val = tp->curr_amb_temp;
150  } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
151  tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
152  type = "CPU";
153  val = tp->curr_cpu_temp;
154  }
155 
156  printk(KERN_CRIT "temp%d: Outside of safe %s "
157  "operating temperature, %d C.\n",
158  tp->index, type, val);
159 
160  printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
161 
162  shutting_down = 1;
163  if (orderly_poweroff(true) < 0)
164  printk(KERN_CRIT "envctrl: shutdown execution failed\n");
165 }
166 
167 #define WARN_INTERVAL (30 * HZ)
168 
169 static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
170 {
171  int ret = 0;
172 
173  if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
174  if (tp->curr_amb_temp >=
175  amb_temp_limits[tp->index].high_warn) {
176  printk(KERN_WARNING "temp%d: "
177  "Above safe ambient operating temperature, %d C.\n",
178  tp->index, (int) tp->curr_amb_temp);
179  ret = 1;
180  } else if (tp->curr_amb_temp <
181  amb_temp_limits[tp->index].low_warn) {
182  printk(KERN_WARNING "temp%d: "
183  "Below safe ambient operating temperature, %d C.\n",
184  tp->index, (int) tp->curr_amb_temp);
185  ret = 1;
186  }
187  if (ret)
188  *last_warn = jiffies;
189  } else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn ||
190  tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn)
191  ret = 1;
192 
193  /* Now check the shutdown limits. */
194  if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown ||
195  tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) {
196  do_envctrl_shutdown(tp);
197  ret = 1;
198  }
199 
200  if (ret) {
202  } else if ((tick & (8 - 1)) == 0) {
203  s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10;
204  s8 amb_goal_lo;
205 
206  amb_goal_lo = amb_goal_hi - 3;
207 
208  /* We do not try to avoid 'too cold' events. Basically we
209  * only try to deal with over-heating and fan noise reduction.
210  */
211  if (tp->avg_amb_temp < amb_goal_hi) {
212  if (tp->avg_amb_temp >= amb_goal_lo)
214  else
216  } else {
218  }
219  } else {
221  }
222 }
223 
224 static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick)
225 {
226  int ret = 0;
227 
228  if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) {
229  if (tp->curr_cpu_temp >=
230  cpu_temp_limits[tp->index].high_warn) {
231  printk(KERN_WARNING "temp%d: "
232  "Above safe CPU operating temperature, %d C.\n",
233  tp->index, (int) tp->curr_cpu_temp);
234  ret = 1;
235  } else if (tp->curr_cpu_temp <
236  cpu_temp_limits[tp->index].low_warn) {
237  printk(KERN_WARNING "temp%d: "
238  "Below safe CPU operating temperature, %d C.\n",
239  tp->index, (int) tp->curr_cpu_temp);
240  ret = 1;
241  }
242  if (ret)
243  *last_warn = jiffies;
244  } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn ||
245  tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn)
246  ret = 1;
247 
248  /* Now check the shutdown limits. */
249  if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown ||
250  tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) {
251  do_envctrl_shutdown(tp);
252  ret = 1;
253  }
254 
255  if (ret) {
257  } else if ((tick & (8 - 1)) == 0) {
258  s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10;
259  s8 cpu_goal_lo;
260 
261  cpu_goal_lo = cpu_goal_hi - 3;
262 
263  /* We do not try to avoid 'too cold' events. Basically we
264  * only try to deal with over-heating and fan noise reduction.
265  */
266  if (tp->avg_cpu_temp < cpu_goal_hi) {
267  if (tp->avg_cpu_temp >= cpu_goal_lo)
268  tp->fan_todo[FAN_CPU] = FAN_SAME;
269  else
270  tp->fan_todo[FAN_CPU] = FAN_SLOWER;
271  } else {
272  tp->fan_todo[FAN_CPU] = FAN_FASTER;
273  }
274  } else {
275  tp->fan_todo[FAN_CPU] = FAN_SAME;
276  }
277 }
278 
279 static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn)
280 {
281  tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2);
282  tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2);
283 
284  analyze_ambient_temp(tp, last_warn, tp->sample_tick);
285  analyze_cpu_temp(tp, last_warn, tp->sample_tick);
286 
287  tp->sample_tick++;
288 }
289 
290 static enum fan_action prioritize_fan_action(int which_fan)
291 {
292  struct bbc_cpu_temperature *tp;
293  enum fan_action decision = FAN_STATE_MAX;
294 
295  /* Basically, prioritize what the temperature sensors
296  * recommend we do, and perform that action on all the
297  * fans.
298  */
299  list_for_each_entry(tp, &all_temps, glob_list) {
300  if (tp->fan_todo[which_fan] == FAN_FULLBLAST) {
301  decision = FAN_FULLBLAST;
302  break;
303  }
304  if (tp->fan_todo[which_fan] == FAN_SAME &&
305  decision != FAN_FASTER)
306  decision = FAN_SAME;
307  else if (tp->fan_todo[which_fan] == FAN_FASTER)
308  decision = FAN_FASTER;
309  else if (decision != FAN_FASTER &&
310  decision != FAN_SAME &&
311  tp->fan_todo[which_fan] == FAN_SLOWER)
312  decision = FAN_SLOWER;
313  }
314  if (decision == FAN_STATE_MAX)
315  decision = FAN_SAME;
316 
317  return decision;
318 }
319 
320 static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp)
321 {
322  enum fan_action decision = prioritize_fan_action(FAN_AMBIENT);
323  int ret;
324 
325  if (decision == FAN_SAME)
326  return 0;
327 
328  ret = 1;
329  if (decision == FAN_FULLBLAST) {
330  if (fp->system_fan_speed >= FAN_SPEED_MAX)
331  ret = 0;
332  else
334  } else {
335  if (decision == FAN_FASTER) {
336  if (fp->system_fan_speed >= FAN_SPEED_MAX)
337  ret = 0;
338  else
339  fp->system_fan_speed += 2;
340  } else {
341  int orig_speed = fp->system_fan_speed;
342 
343  if (orig_speed <= FAN_SPEED_MIN ||
344  orig_speed <= (fp->cpu_fan_speed - 3))
345  ret = 0;
346  else
347  fp->system_fan_speed -= 1;
348  }
349  }
350 
351  return ret;
352 }
353 
354 static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp)
355 {
356  enum fan_action decision = prioritize_fan_action(FAN_CPU);
357  int ret;
358 
359  if (decision == FAN_SAME)
360  return 0;
361 
362  ret = 1;
363  if (decision == FAN_FULLBLAST) {
364  if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
365  ret = 0;
366  else
368  } else {
369  if (decision == FAN_FASTER) {
370  if (fp->cpu_fan_speed >= FAN_SPEED_MAX)
371  ret = 0;
372  else {
373  fp->cpu_fan_speed += 2;
374  if (fp->system_fan_speed <
375  (fp->cpu_fan_speed - 3))
376  fp->system_fan_speed =
377  fp->cpu_fan_speed - 3;
378  }
379  } else {
380  if (fp->cpu_fan_speed <= FAN_SPEED_MIN)
381  ret = 0;
382  else
383  fp->cpu_fan_speed -= 1;
384  }
385  }
386 
387  return ret;
388 }
389 
390 static void maybe_new_fan_speeds(struct bbc_fan_control *fp)
391 {
392  int new;
393 
394  new = maybe_new_ambient_fan_speed(fp);
395  new |= maybe_new_cpu_fan_speed(fp);
396 
397  if (new)
398  set_fan_speeds(fp);
399 }
400 
401 static void fans_full_blast(void)
402 {
403  struct bbc_fan_control *fp;
404 
405  /* Since we will not be monitoring things anymore, put
406  * the fans on full blast.
407  */
408  list_for_each_entry(fp, &all_fans, glob_list) {
411  fp->psupply_fan_on = 1;
412  set_fan_speeds(fp);
413  }
414 }
415 
416 #define POLL_INTERVAL (5 * 1000)
417 static unsigned long last_warning_jiffies;
418 static struct task_struct *kenvctrld_task;
419 
420 static int kenvctrld(void *__unused)
421 {
422  printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n");
423  last_warning_jiffies = jiffies - WARN_INTERVAL;
424  for (;;) {
425  struct bbc_cpu_temperature *tp;
426  struct bbc_fan_control *fp;
427 
429  if (kthread_should_stop())
430  break;
431 
432  list_for_each_entry(tp, &all_temps, glob_list) {
433  get_current_temps(tp);
434  analyze_temps(tp, &last_warning_jiffies);
435  }
436  list_for_each_entry(fp, &all_fans, glob_list)
437  maybe_new_fan_speeds(fp);
438  }
439  printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n");
440 
441  fans_full_blast();
442 
443  return 0;
444 }
445 
446 static void attach_one_temp(struct bbc_i2c_bus *bp, struct platform_device *op,
447  int temp_idx)
448 {
449  struct bbc_cpu_temperature *tp;
450 
451  tp = kzalloc(sizeof(*tp), GFP_KERNEL);
452  if (!tp)
453  return;
454 
455  tp->client = bbc_i2c_attach(bp, op);
456  if (!tp->client) {
457  kfree(tp);
458  return;
459  }
460 
461 
462  tp->index = temp_idx;
463 
464  list_add(&tp->glob_list, &all_temps);
465  list_add(&tp->bp_list, &bp->temps);
466 
467  /* Tell it to convert once every 5 seconds, clear all cfg
468  * bits.
469  */
472 
473  /* Program the hard temperature limits into the chip. */
474  bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff,
476  bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff,
478  bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff,
480  bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff,
482 
483  get_current_temps(tp);
484  tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp;
485  tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp;
486 
488  tp->fan_todo[FAN_CPU] = FAN_SAME;
489 }
490 
491 static void attach_one_fan(struct bbc_i2c_bus *bp, struct platform_device *op,
492  int fan_idx)
493 {
494  struct bbc_fan_control *fp;
495 
496  fp = kzalloc(sizeof(*fp), GFP_KERNEL);
497  if (!fp)
498  return;
499 
500  fp->client = bbc_i2c_attach(bp, op);
501  if (!fp->client) {
502  kfree(fp);
503  return;
504  }
505 
506  fp->index = fan_idx;
507 
508  list_add(&fp->glob_list, &all_fans);
509  list_add(&fp->bp_list, &bp->fans);
510 
511  /* The i2c device controlling the fans is write-only.
512  * So the only way to keep track of the current power
513  * level fed to the fans is via software. Choose half
514  * power for cpu/system and 'on' fo the powersupply fan
515  * and set it now.
516  */
517  fp->psupply_fan_on = 1;
522 
523  set_fan_speeds(fp);
524 }
525 
526 static void destroy_one_temp(struct bbc_cpu_temperature *tp)
527 {
528  bbc_i2c_detach(tp->client);
529  kfree(tp);
530 }
531 
532 static void destroy_all_temps(struct bbc_i2c_bus *bp)
533 {
534  struct bbc_cpu_temperature *tp, *tpos;
535 
536  list_for_each_entry_safe(tp, tpos, &bp->temps, bp_list) {
537  list_del(&tp->bp_list);
538  list_del(&tp->glob_list);
539  destroy_one_temp(tp);
540  }
541 }
542 
543 static void destroy_one_fan(struct bbc_fan_control *fp)
544 {
545  bbc_i2c_detach(fp->client);
546  kfree(fp);
547 }
548 
549 static void destroy_all_fans(struct bbc_i2c_bus *bp)
550 {
551  struct bbc_fan_control *fp, *fpos;
552 
553  list_for_each_entry_safe(fp, fpos, &bp->fans, bp_list) {
554  list_del(&fp->bp_list);
555  list_del(&fp->glob_list);
556  destroy_one_fan(fp);
557  }
558 }
559 
561 {
562  struct platform_device *op;
563  int temp_index = 0;
564  int fan_index = 0;
565  int devidx = 0;
566 
567  while ((op = bbc_i2c_getdev(bp, devidx++)) != NULL) {
568  if (!strcmp(op->dev.of_node->name, "temperature"))
569  attach_one_temp(bp, op, temp_index++);
570  if (!strcmp(op->dev.of_node->name, "fan-control"))
571  attach_one_fan(bp, op, fan_index++);
572  }
573  if (temp_index != 0 && fan_index != 0) {
574  kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld");
575  if (IS_ERR(kenvctrld_task)) {
576  int err = PTR_ERR(kenvctrld_task);
577 
578  kenvctrld_task = NULL;
579  destroy_all_temps(bp);
580  destroy_all_fans(bp);
581  return err;
582  }
583  }
584 
585  return 0;
586 }
587 
589 {
590  if (kenvctrld_task)
591  kthread_stop(kenvctrld_task);
592 
593  destroy_all_temps(bp);
594  destroy_all_fans(bp);
595 }