2 * Windfarm PowerMac thermal control.
3 * Control loops for machines with SMU and PPC970MP processors.
5 * Copyright (C) 2005 Paul Mackerras, IBM Corp. <paulus@samba.org>
6 * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp.
8 * Use and redistribute under the terms of the GNU GPL v2.
10 #include <linux/types.h>
11 #include <linux/errno.h>
12 #include <linux/kernel.h>
13 #include <linux/device.h>
14 #include <linux/platform_device.h>
15 #include <linux/reboot.h>
20 #include "windfarm_pid.h"
28 #define DBG(args...) printk(args)
30 #define DBG(args...) do { } while(0)
34 #define DBG_LOTS(args...) printk(args)
36 #define DBG_LOTS(args...) do { } while(0)
39 /* define this to force CPU overtemp to 60 degree, useful for testing
42 #undef HACKED_OVERTEMP
44 /* We currently only handle 2 chips, 4 cores... */
47 #define NR_CPU_FANS 3 * NR_CHIPS
49 /* Controls and sensors */
50 static struct wf_sensor *sens_cpu_temp[NR_CORES];
51 static struct wf_sensor *sens_cpu_power[NR_CORES];
52 static struct wf_sensor *hd_temp;
53 static struct wf_sensor *slots_power;
54 static struct wf_sensor *u4_temp;
56 static struct wf_control *cpu_fans[NR_CPU_FANS];
57 static char *cpu_fan_names[NR_CPU_FANS] = {
65 static struct wf_control *cpufreq_clamp;
67 /* Second pump isn't required (and isn't actually present) */
68 #define CPU_FANS_REQD (NR_CPU_FANS - 2)
72 /* We keep a temperature history for average calculation of 180s */
73 #define CPU_TEMP_HIST_SIZE 180
75 /* Scale factor for fan speed, *100 */
76 static int cpu_fan_scale[NR_CPU_FANS] = {
79 97, /* inlet fans run at 97% of exhaust fan */
81 100, /* updated later */
82 100, /* updated later */
85 static struct wf_control *backside_fan;
86 static struct wf_control *slots_fan;
87 static struct wf_control *drive_bay_fan;
90 static struct wf_cpu_pid_state cpu_pid[NR_CORES];
91 static u32 cpu_thist[CPU_TEMP_HIST_SIZE];
92 static int cpu_thist_pt;
93 static s64 cpu_thist_total;
94 static s32 cpu_all_tmax = 100 << 16;
95 static int cpu_last_target;
96 static struct wf_pid_state backside_pid;
97 static int backside_tick;
98 static struct wf_pid_state slots_pid;
99 static int slots_started;
100 static struct wf_pid_state drive_bay_pid;
101 static int drive_bay_tick;
104 static int have_all_controls;
105 static int have_all_sensors;
108 static int failure_state;
109 #define FAILURE_SENSOR 1
110 #define FAILURE_FAN 2
111 #define FAILURE_PERM 4
112 #define FAILURE_LOW_OVERTEMP 8
113 #define FAILURE_HIGH_OVERTEMP 16
115 /* Overtemp values */
116 #define LOW_OVER_AVERAGE 0
117 #define LOW_OVER_IMMEDIATE (10 << 16)
118 #define LOW_OVER_CLEAR ((-10) << 16)
119 #define HIGH_OVER_IMMEDIATE (14 << 16)
120 #define HIGH_OVER_AVERAGE (10 << 16)
121 #define HIGH_OVER_IMMEDIATE (14 << 16)
124 /* Implementation... */
125 static int create_cpu_loop(int cpu)
129 struct smu_sdbp_header *hdr;
130 struct smu_sdbp_cpupiddata *piddata;
131 struct wf_cpu_pid_param pid;
132 struct wf_control *main_fan = cpu_fans[0];
136 /* Get FVT params to get Tmax; if not found, assume default */
137 hdr = smu_sat_get_sdb_partition(chip, 0xC4 + core, NULL);
139 struct smu_sdbp_fvt *fvt = (struct smu_sdbp_fvt *)&hdr[1];
140 tmax = fvt->maxtemp << 16;
142 tmax = 95 << 16; /* default to 95 degrees C */
144 /* We keep a global tmax for overtemp calculations */
145 if (tmax < cpu_all_tmax)
150 /* Get PID params from the appropriate SAT */
151 hdr = smu_sat_get_sdb_partition(chip, 0xC8 + core, NULL);
153 printk(KERN_WARNING"windfarm: can't get CPU PID fan config\n");
156 piddata = (struct smu_sdbp_cpupiddata *)&hdr[1];
159 * Darwin has a minimum fan speed of 1000 rpm for the 4-way and
160 * 515 for the 2-way. That appears to be overkill, so for now,
161 * impose a minimum of 750 or 515.
163 fmin = (nr_cores > 2) ? 750 : 515;
165 /* Initialize PID loop */
166 pid.interval = 1; /* seconds */
167 pid.history_len = piddata->history_len;
168 pid.gd = piddata->gd;
169 pid.gp = piddata->gp;
170 pid.gr = piddata->gr / piddata->history_len;
171 pid.pmaxadj = (piddata->max_power << 16) - (piddata->power_adj << 8);
172 pid.ttarget = tmax - (piddata->target_temp_delta << 16);
174 pid.min = main_fan->ops->get_min(main_fan);
175 pid.max = main_fan->ops->get_max(main_fan);
179 wf_cpu_pid_init(&cpu_pid[cpu], &pid);
186 static void cpu_max_all_fans(void)
190 /* We max all CPU fans in case of a sensor error. We also do the
191 * cpufreq clamping now, even if it's supposedly done later by the
192 * generic code anyway, we do it earlier here to react faster
195 wf_control_set_max(cpufreq_clamp);
196 for (i = 0; i < NR_CPU_FANS; ++i)
198 wf_control_set_max(cpu_fans[i]);
201 static int cpu_check_overtemp(s32 temp)
206 /* First check for immediate overtemps */
207 if (temp >= (cpu_all_tmax + LOW_OVER_IMMEDIATE)) {
208 new_state |= FAILURE_LOW_OVERTEMP;
209 if ((failure_state & FAILURE_LOW_OVERTEMP) == 0)
210 printk(KERN_ERR "windfarm: Overtemp due to immediate CPU"
213 if (temp >= (cpu_all_tmax + HIGH_OVER_IMMEDIATE)) {
214 new_state |= FAILURE_HIGH_OVERTEMP;
215 if ((failure_state & FAILURE_HIGH_OVERTEMP) == 0)
216 printk(KERN_ERR "windfarm: Critical overtemp due to"
217 " immediate CPU temperature !\n");
220 /* We calculate a history of max temperatures and use that for the
221 * overtemp management
223 t_old = cpu_thist[cpu_thist_pt];
224 cpu_thist[cpu_thist_pt] = temp;
225 cpu_thist_pt = (cpu_thist_pt + 1) % CPU_TEMP_HIST_SIZE;
226 cpu_thist_total -= t_old;
227 cpu_thist_total += temp;
228 t_avg = cpu_thist_total / CPU_TEMP_HIST_SIZE;
230 DBG_LOTS("t_avg = %d.%03d (out: %d.%03d, in: %d.%03d)\n",
231 FIX32TOPRINT(t_avg), FIX32TOPRINT(t_old), FIX32TOPRINT(temp));
233 /* Now check for average overtemps */
234 if (t_avg >= (cpu_all_tmax + LOW_OVER_AVERAGE)) {
235 new_state |= FAILURE_LOW_OVERTEMP;
236 if ((failure_state & FAILURE_LOW_OVERTEMP) == 0)
237 printk(KERN_ERR "windfarm: Overtemp due to average CPU"
240 if (t_avg >= (cpu_all_tmax + HIGH_OVER_AVERAGE)) {
241 new_state |= FAILURE_HIGH_OVERTEMP;
242 if ((failure_state & FAILURE_HIGH_OVERTEMP) == 0)
243 printk(KERN_ERR "windfarm: Critical overtemp due to"
244 " average CPU temperature !\n");
247 /* Now handle overtemp conditions. We don't currently use the windfarm
248 * overtemp handling core as it's not fully suited to the needs of those
249 * new machine. This will be fixed later.
252 /* High overtemp -> immediate shutdown */
253 if (new_state & FAILURE_HIGH_OVERTEMP)
255 if ((failure_state & new_state) != new_state)
257 failure_state |= new_state;
258 } else if ((failure_state & FAILURE_LOW_OVERTEMP) &&
259 (temp < (cpu_all_tmax + LOW_OVER_CLEAR))) {
260 printk(KERN_ERR "windfarm: Overtemp condition cleared !\n");
261 failure_state &= ~FAILURE_LOW_OVERTEMP;
264 return failure_state & (FAILURE_LOW_OVERTEMP | FAILURE_HIGH_OVERTEMP);
267 static void cpu_fans_tick(void)
270 s32 greatest_delta = 0;
271 s32 temp, power, t_max = 0;
272 int i, t, target = 0;
273 struct wf_sensor *sr;
274 struct wf_control *ct;
275 struct wf_cpu_pid_state *sp;
277 DBG_LOTS(KERN_DEBUG);
278 for (cpu = 0; cpu < nr_cores; ++cpu) {
279 /* Get CPU core temperature */
280 sr = sens_cpu_temp[cpu];
281 err = sr->ops->get_value(sr, &temp);
284 printk(KERN_WARNING "windfarm: CPU %d temperature "
285 "sensor error %d\n", cpu, err);
286 failure_state |= FAILURE_SENSOR;
291 /* Keep track of highest temp */
292 t_max = max(t_max, temp);
295 sr = sens_cpu_power[cpu];
296 err = sr->ops->get_value(sr, &power);
299 printk(KERN_WARNING "windfarm: CPU %d power "
300 "sensor error %d\n", cpu, err);
301 failure_state |= FAILURE_SENSOR;
308 t = wf_cpu_pid_run(sp, power, temp);
310 if (cpu == 0 || sp->last_delta > greatest_delta) {
311 greatest_delta = sp->last_delta;
314 DBG_LOTS("[%d] P=%d.%.3d T=%d.%.3d ",
315 cpu, FIX32TOPRINT(power), FIX32TOPRINT(temp));
317 DBG_LOTS("fans = %d, t_max = %d.%03d\n", target, FIX32TOPRINT(t_max));
319 /* Darwin limits decrease to 20 per iteration */
320 if (target < (cpu_last_target - 20))
321 target = cpu_last_target - 20;
322 cpu_last_target = target;
323 for (cpu = 0; cpu < nr_cores; ++cpu)
324 cpu_pid[cpu].target = target;
326 /* Handle possible overtemps */
327 if (cpu_check_overtemp(t_max))
331 for (i = 0; i < NR_CPU_FANS; ++i) {
335 err = ct->ops->set_value(ct, target * cpu_fan_scale[i] / 100);
337 printk(KERN_WARNING "windfarm: fan %s reports "
338 "error %d\n", ct->name, err);
339 failure_state |= FAILURE_FAN;
345 /* Backside/U4 fan */
346 static struct wf_pid_param backside_param = {
356 static void backside_fan_tick(void)
362 if (!backside_fan || !u4_temp)
364 if (!backside_tick) {
365 /* first time; initialize things */
366 printk(KERN_INFO "windfarm: Backside control loop started.\n");
367 backside_param.min = backside_fan->ops->get_min(backside_fan);
368 backside_param.max = backside_fan->ops->get_max(backside_fan);
369 wf_pid_init(&backside_pid, &backside_param);
372 if (--backside_tick > 0)
374 backside_tick = backside_pid.param.interval;
376 err = u4_temp->ops->get_value(u4_temp, &temp);
378 printk(KERN_WARNING "windfarm: U4 temp sensor error %d\n",
380 failure_state |= FAILURE_SENSOR;
381 wf_control_set_max(backside_fan);
384 speed = wf_pid_run(&backside_pid, temp);
385 DBG_LOTS("backside PID temp=%d.%.3d speed=%d\n",
386 FIX32TOPRINT(temp), speed);
388 err = backside_fan->ops->set_value(backside_fan, speed);
390 printk(KERN_WARNING "windfarm: backside fan error %d\n", err);
391 failure_state |= FAILURE_FAN;
396 static struct wf_pid_param drive_bay_prm = {
406 static void drive_bay_fan_tick(void)
412 if (!drive_bay_fan || !hd_temp)
414 if (!drive_bay_tick) {
415 /* first time; initialize things */
416 printk(KERN_INFO "windfarm: Drive bay control loop started.\n");
417 drive_bay_prm.min = drive_bay_fan->ops->get_min(drive_bay_fan);
418 drive_bay_prm.max = drive_bay_fan->ops->get_max(drive_bay_fan);
419 wf_pid_init(&drive_bay_pid, &drive_bay_prm);
422 if (--drive_bay_tick > 0)
424 drive_bay_tick = drive_bay_pid.param.interval;
426 err = hd_temp->ops->get_value(hd_temp, &temp);
428 printk(KERN_WARNING "windfarm: drive bay temp sensor "
430 failure_state |= FAILURE_SENSOR;
431 wf_control_set_max(drive_bay_fan);
434 speed = wf_pid_run(&drive_bay_pid, temp);
435 DBG_LOTS("drive_bay PID temp=%d.%.3d speed=%d\n",
436 FIX32TOPRINT(temp), speed);
438 err = drive_bay_fan->ops->set_value(drive_bay_fan, speed);
440 printk(KERN_WARNING "windfarm: drive bay fan error %d\n", err);
441 failure_state |= FAILURE_FAN;
445 /* PCI slots area fan */
446 /* This makes the fan speed proportional to the power consumed */
447 static struct wf_pid_param slots_param = {
458 static void slots_fan_tick(void)
464 if (!slots_fan || !slots_power)
466 if (!slots_started) {
467 /* first time; initialize things */
468 printk(KERN_INFO "windfarm: Slots control loop started.\n");
469 wf_pid_init(&slots_pid, &slots_param);
473 err = slots_power->ops->get_value(slots_power, &power);
475 printk(KERN_WARNING "windfarm: slots power sensor error %d\n",
477 failure_state |= FAILURE_SENSOR;
478 wf_control_set_max(slots_fan);
481 speed = wf_pid_run(&slots_pid, power);
482 DBG_LOTS("slots PID power=%d.%.3d speed=%d\n",
483 FIX32TOPRINT(power), speed);
485 err = slots_fan->ops->set_value(slots_fan, speed);
487 printk(KERN_WARNING "windfarm: slots fan error %d\n", err);
488 failure_state |= FAILURE_FAN;
492 static void set_fail_state(void)
497 wf_control_set_max(cpufreq_clamp);
498 for (i = 0; i < NR_CPU_FANS; ++i)
500 wf_control_set_max(cpu_fans[i]);
502 wf_control_set_max(backside_fan);
504 wf_control_set_max(slots_fan);
506 wf_control_set_max(drive_bay_fan);
509 static void pm112_tick(void)
515 printk(KERN_INFO "windfarm: CPUs control loops started.\n");
516 for (i = 0; i < nr_cores; ++i) {
517 if (create_cpu_loop(i) < 0) {
518 failure_state = FAILURE_PERM;
523 DBG_LOTS("cpu_all_tmax=%d.%03d\n", FIX32TOPRINT(cpu_all_tmax));
525 #ifdef HACKED_OVERTEMP
526 cpu_all_tmax = 60 << 16;
530 /* Permanent failure, bail out */
531 if (failure_state & FAILURE_PERM)
533 /* Clear all failure bits except low overtemp which will be eventually
534 * cleared by the control loop itself
536 last_failure = failure_state;
537 failure_state &= FAILURE_LOW_OVERTEMP;
541 drive_bay_fan_tick();
543 DBG_LOTS("last_failure: 0x%x, failure_state: %x\n",
544 last_failure, failure_state);
546 /* Check for failures. Any failure causes cpufreq clamping */
547 if (failure_state && last_failure == 0 && cpufreq_clamp)
548 wf_control_set_max(cpufreq_clamp);
549 if (failure_state == 0 && last_failure && cpufreq_clamp)
550 wf_control_set_min(cpufreq_clamp);
552 /* That's it for now, we might want to deal with other failures
553 * differently in the future though
557 static void pm112_new_control(struct wf_control *ct)
561 if (cpufreq_clamp == NULL && !strcmp(ct->name, "cpufreq-clamp")) {
562 if (wf_get_control(ct) == 0)
566 for (i = 0; i < NR_CPU_FANS; ++i) {
567 if (!strcmp(ct->name, cpu_fan_names[i])) {
568 if (cpu_fans[i] == NULL && wf_get_control(ct) == 0)
573 if (i >= NR_CPU_FANS) {
574 /* not a CPU fan, try the others */
575 if (!strcmp(ct->name, "backside-fan")) {
576 if (backside_fan == NULL && wf_get_control(ct) == 0)
578 } else if (!strcmp(ct->name, "slots-fan")) {
579 if (slots_fan == NULL && wf_get_control(ct) == 0)
581 } else if (!strcmp(ct->name, "drive-bay-fan")) {
582 if (drive_bay_fan == NULL && wf_get_control(ct) == 0)
588 for (i = 0; i < CPU_FANS_REQD; ++i)
589 if (cpu_fans[i] == NULL)
592 /* work out pump scaling factors */
593 max_exhaust = cpu_fans[0]->ops->get_max(cpu_fans[0]);
594 for (i = FIRST_PUMP; i <= LAST_PUMP; ++i)
595 if ((ct = cpu_fans[i]) != NULL)
597 ct->ops->get_max(ct) * 100 / max_exhaust;
599 have_all_controls = 1;
602 static void pm112_new_sensor(struct wf_sensor *sr)
606 if (!strncmp(sr->name, "cpu-temp-", 9)) {
607 i = sr->name[9] - '0';
608 if (sr->name[10] == 0 && i < NR_CORES &&
609 sens_cpu_temp[i] == NULL && wf_get_sensor(sr) == 0)
610 sens_cpu_temp[i] = sr;
612 } else if (!strncmp(sr->name, "cpu-power-", 10)) {
613 i = sr->name[10] - '0';
614 if (sr->name[11] == 0 && i < NR_CORES &&
615 sens_cpu_power[i] == NULL && wf_get_sensor(sr) == 0)
616 sens_cpu_power[i] = sr;
617 } else if (!strcmp(sr->name, "hd-temp")) {
618 if (hd_temp == NULL && wf_get_sensor(sr) == 0)
620 } else if (!strcmp(sr->name, "slots-power")) {
621 if (slots_power == NULL && wf_get_sensor(sr) == 0)
623 } else if (!strcmp(sr->name, "backside-temp")) {
624 if (u4_temp == NULL && wf_get_sensor(sr) == 0)
629 /* check if we have all the sensors we need */
630 for (i = 0; i < nr_cores; ++i)
631 if (sens_cpu_temp[i] == NULL || sens_cpu_power[i] == NULL)
634 have_all_sensors = 1;
637 static int pm112_wf_notify(struct notifier_block *self,
638 unsigned long event, void *data)
641 case WF_EVENT_NEW_SENSOR:
642 pm112_new_sensor(data);
644 case WF_EVENT_NEW_CONTROL:
645 pm112_new_control(data);
648 if (have_all_controls && have_all_sensors)
654 static struct notifier_block pm112_events = {
655 .notifier_call = pm112_wf_notify,
658 static int wf_pm112_probe(struct platform_device *dev)
660 wf_register_client(&pm112_events);
664 static int wf_pm112_remove(struct platform_device *dev)
666 wf_unregister_client(&pm112_events);
667 /* should release all sensors and controls */
671 static struct platform_driver wf_pm112_driver = {
672 .probe = wf_pm112_probe,
673 .remove = wf_pm112_remove,
679 static int __init wf_pm112_init(void)
681 struct device_node *cpu;
683 if (!of_machine_is_compatible("PowerMac11,2"))
686 /* Count the number of CPU cores */
688 for_each_node_by_type(cpu, "cpu")
691 printk(KERN_INFO "windfarm: initializing for dual-core desktop G5\n");
694 request_module("windfarm_smu_controls");
695 request_module("windfarm_smu_sensors");
696 request_module("windfarm_smu_sat");
697 request_module("windfarm_lm75_sensor");
698 request_module("windfarm_max6690_sensor");
699 request_module("windfarm_cpufreq_clamp");
703 platform_driver_register(&wf_pm112_driver);
707 static void __exit wf_pm112_exit(void)
709 platform_driver_unregister(&wf_pm112_driver);
712 module_init(wf_pm112_init);
713 module_exit(wf_pm112_exit);
715 MODULE_AUTHOR("Paul Mackerras <paulus@samba.org>");
716 MODULE_DESCRIPTION("Thermal control for PowerMac11,2");
717 MODULE_LICENSE("GPL");
718 MODULE_ALIAS("platform:windfarm");