1 /* $Id: bbc_envctrl.c,v 1.4 2001/04/06 16:48:08 davem Exp $ 2 * bbc_envctrl.c: UltraSPARC-III environment control driver. 3 * 4 * Copyright (C) 2001 David S. Miller (davem@redhat.com) 5 */ 6 7 #define __KERNEL_SYSCALLS__ 8 9 #include <linux/kernel.h> 10 #include <linux/kthread.h> 11 #include <linux/sched.h> 12 #include <linux/slab.h> 13 #include <linux/delay.h> 14 #include <asm/oplib.h> 15 #include <asm/ebus.h> 16 static int errno; 17 #include <asm/unistd.h> 18 19 #include "bbc_i2c.h" 20 #include "max1617.h" 21 22 #undef ENVCTRL_TRACE 23 24 /* WARNING: Making changes to this driver is very dangerous. 25 * If you misprogram the sensor chips they can 26 * cut the power on you instantly. 27 */ 28 29 /* Two temperature sensors exist in the SunBLADE-1000 enclosure. 30 * Both are implemented using max1617 i2c devices. Each max1617 31 * monitors 2 temperatures, one for one of the cpu dies and the other 32 * for the ambient temperature. 33 * 34 * The max1617 is capable of being programmed with power-off 35 * temperature values, one low limit and one high limit. These 36 * can be controlled independently for the cpu or ambient temperature. 37 * If a limit is violated, the power is simply shut off. The frequency 38 * with which the max1617 does temperature sampling can be controlled 39 * as well. 40 * 41 * Three fans exist inside the machine, all three are controlled with 42 * an i2c digital to analog converter. There is a fan directed at the 43 * two processor slots, another for the rest of the enclosure, and the 44 * third is for the power supply. The first two fans may be speed 45 * controlled by changing the voltage fed to them. The third fan may 46 * only be completely off or on. The third fan is meant to only be 47 * disabled/enabled when entering/exiting the lowest power-saving 48 * mode of the machine. 49 * 50 * An environmental control kernel thread periodically monitors all 51 * temperature sensors. Based upon the samples it will adjust the 52 * fan speeds to try and keep the system within a certain temperature 53 * range (the goal being to make the fans as quiet as possible without 54 * allowing the system to get too hot). 55 * 56 * If the temperature begins to rise/fall outside of the acceptable 57 * operating range, a periodic warning will be sent to the kernel log. 58 * The fans will be put on full blast to attempt to deal with this 59 * situation. After exceeding the acceptable operating range by a 60 * certain threshold, the kernel thread will shut down the system. 61 * Here, the thread is attempting to shut the machine down cleanly 62 * before the hardware based power-off event is triggered. 63 */ 64 65 /* These settings are in Celsius. We use these defaults only 66 * if we cannot interrogate the cpu-fru SEEPROM. 67 */ 68 struct temp_limits { 69 s8 high_pwroff, high_shutdown, high_warn; 70 s8 low_warn, low_shutdown, low_pwroff; 71 }; 72 73 static struct temp_limits cpu_temp_limits[2] = { 74 { 100, 85, 80, 5, -5, -10 }, 75 { 100, 85, 80, 5, -5, -10 }, 76 }; 77 78 static struct temp_limits amb_temp_limits[2] = { 79 { 65, 55, 40, 5, -5, -10 }, 80 { 65, 55, 40, 5, -5, -10 }, 81 }; 82 83 enum fan_action { FAN_SLOWER, FAN_SAME, FAN_FASTER, FAN_FULLBLAST, FAN_STATE_MAX }; 84 85 struct bbc_cpu_temperature { 86 struct bbc_cpu_temperature *next; 87 88 struct bbc_i2c_client *client; 89 int index; 90 91 /* Current readings, and history. */ 92 s8 curr_cpu_temp; 93 s8 curr_amb_temp; 94 s8 prev_cpu_temp; 95 s8 prev_amb_temp; 96 s8 avg_cpu_temp; 97 s8 avg_amb_temp; 98 99 int sample_tick; 100 101 enum fan_action fan_todo[2]; 102 #define FAN_AMBIENT 0 103 #define FAN_CPU 1 104 }; 105 106 struct bbc_cpu_temperature *all_bbc_temps; 107 108 struct bbc_fan_control { 109 struct bbc_fan_control *next; 110 111 struct bbc_i2c_client *client; 112 int index; 113 114 int psupply_fan_on; 115 int cpu_fan_speed; 116 int system_fan_speed; 117 }; 118 119 struct bbc_fan_control *all_bbc_fans; 120 121 #define CPU_FAN_REG 0xf0 122 #define SYS_FAN_REG 0xf2 123 #define PSUPPLY_FAN_REG 0xf4 124 125 #define FAN_SPEED_MIN 0x0c 126 #define FAN_SPEED_MAX 0x3f 127 128 #define PSUPPLY_FAN_ON 0x1f 129 #define PSUPPLY_FAN_OFF 0x00 130 131 static void set_fan_speeds(struct bbc_fan_control *fp) 132 { 133 /* Put temperatures into range so we don't mis-program 134 * the hardware. 135 */ 136 if (fp->cpu_fan_speed < FAN_SPEED_MIN) 137 fp->cpu_fan_speed = FAN_SPEED_MIN; 138 if (fp->cpu_fan_speed > FAN_SPEED_MAX) 139 fp->cpu_fan_speed = FAN_SPEED_MAX; 140 if (fp->system_fan_speed < FAN_SPEED_MIN) 141 fp->system_fan_speed = FAN_SPEED_MIN; 142 if (fp->system_fan_speed > FAN_SPEED_MAX) 143 fp->system_fan_speed = FAN_SPEED_MAX; 144 #ifdef ENVCTRL_TRACE 145 printk("fan%d: Changed fan speed to cpu(%02x) sys(%02x)\n", 146 fp->index, 147 fp->cpu_fan_speed, fp->system_fan_speed); 148 #endif 149 150 bbc_i2c_writeb(fp->client, fp->cpu_fan_speed, CPU_FAN_REG); 151 bbc_i2c_writeb(fp->client, fp->system_fan_speed, SYS_FAN_REG); 152 bbc_i2c_writeb(fp->client, 153 (fp->psupply_fan_on ? 154 PSUPPLY_FAN_ON : PSUPPLY_FAN_OFF), 155 PSUPPLY_FAN_REG); 156 } 157 158 static void get_current_temps(struct bbc_cpu_temperature *tp) 159 { 160 tp->prev_amb_temp = tp->curr_amb_temp; 161 bbc_i2c_readb(tp->client, 162 (unsigned char *) &tp->curr_amb_temp, 163 MAX1617_AMB_TEMP); 164 tp->prev_cpu_temp = tp->curr_cpu_temp; 165 bbc_i2c_readb(tp->client, 166 (unsigned char *) &tp->curr_cpu_temp, 167 MAX1617_CPU_TEMP); 168 #ifdef ENVCTRL_TRACE 169 printk("temp%d: cpu(%d C) amb(%d C)\n", 170 tp->index, 171 (int) tp->curr_cpu_temp, (int) tp->curr_amb_temp); 172 #endif 173 } 174 175 176 static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp) 177 { 178 static int shutting_down = 0; 179 static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; 180 char *argv[] = { "/sbin/shutdown", "-h", "now", NULL }; 181 char *type = "???"; 182 s8 val = -1; 183 184 if (shutting_down != 0) 185 return; 186 187 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 188 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 189 type = "ambient"; 190 val = tp->curr_amb_temp; 191 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 192 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 193 type = "CPU"; 194 val = tp->curr_cpu_temp; 195 } 196 197 printk(KERN_CRIT "temp%d: Outside of safe %s " 198 "operating temperature, %d C.\n", 199 tp->index, type, val); 200 201 printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n"); 202 203 shutting_down = 1; 204 if (execve("/sbin/shutdown", argv, envp) < 0) 205 printk(KERN_CRIT "envctrl: shutdown execution failed\n"); 206 } 207 208 #define WARN_INTERVAL (30 * HZ) 209 210 static void analyze_ambient_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 211 { 212 int ret = 0; 213 214 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 215 if (tp->curr_amb_temp >= 216 amb_temp_limits[tp->index].high_warn) { 217 printk(KERN_WARNING "temp%d: " 218 "Above safe ambient operating temperature, %d C.\n", 219 tp->index, (int) tp->curr_amb_temp); 220 ret = 1; 221 } else if (tp->curr_amb_temp < 222 amb_temp_limits[tp->index].low_warn) { 223 printk(KERN_WARNING "temp%d: " 224 "Below safe ambient operating temperature, %d C.\n", 225 tp->index, (int) tp->curr_amb_temp); 226 ret = 1; 227 } 228 if (ret) 229 *last_warn = jiffies; 230 } else if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_warn || 231 tp->curr_amb_temp < amb_temp_limits[tp->index].low_warn) 232 ret = 1; 233 234 /* Now check the shutdown limits. */ 235 if (tp->curr_amb_temp >= amb_temp_limits[tp->index].high_shutdown || 236 tp->curr_amb_temp < amb_temp_limits[tp->index].low_shutdown) { 237 do_envctrl_shutdown(tp); 238 ret = 1; 239 } 240 241 if (ret) { 242 tp->fan_todo[FAN_AMBIENT] = FAN_FULLBLAST; 243 } else if ((tick & (8 - 1)) == 0) { 244 s8 amb_goal_hi = amb_temp_limits[tp->index].high_warn - 10; 245 s8 amb_goal_lo; 246 247 amb_goal_lo = amb_goal_hi - 3; 248 249 /* We do not try to avoid 'too cold' events. Basically we 250 * only try to deal with over-heating and fan noise reduction. 251 */ 252 if (tp->avg_amb_temp < amb_goal_hi) { 253 if (tp->avg_amb_temp >= amb_goal_lo) 254 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 255 else 256 tp->fan_todo[FAN_AMBIENT] = FAN_SLOWER; 257 } else { 258 tp->fan_todo[FAN_AMBIENT] = FAN_FASTER; 259 } 260 } else { 261 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 262 } 263 } 264 265 static void analyze_cpu_temp(struct bbc_cpu_temperature *tp, unsigned long *last_warn, int tick) 266 { 267 int ret = 0; 268 269 if (time_after(jiffies, (*last_warn + WARN_INTERVAL))) { 270 if (tp->curr_cpu_temp >= 271 cpu_temp_limits[tp->index].high_warn) { 272 printk(KERN_WARNING "temp%d: " 273 "Above safe CPU operating temperature, %d C.\n", 274 tp->index, (int) tp->curr_cpu_temp); 275 ret = 1; 276 } else if (tp->curr_cpu_temp < 277 cpu_temp_limits[tp->index].low_warn) { 278 printk(KERN_WARNING "temp%d: " 279 "Below safe CPU operating temperature, %d C.\n", 280 tp->index, (int) tp->curr_cpu_temp); 281 ret = 1; 282 } 283 if (ret) 284 *last_warn = jiffies; 285 } else if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_warn || 286 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_warn) 287 ret = 1; 288 289 /* Now check the shutdown limits. */ 290 if (tp->curr_cpu_temp >= cpu_temp_limits[tp->index].high_shutdown || 291 tp->curr_cpu_temp < cpu_temp_limits[tp->index].low_shutdown) { 292 do_envctrl_shutdown(tp); 293 ret = 1; 294 } 295 296 if (ret) { 297 tp->fan_todo[FAN_CPU] = FAN_FULLBLAST; 298 } else if ((tick & (8 - 1)) == 0) { 299 s8 cpu_goal_hi = cpu_temp_limits[tp->index].high_warn - 10; 300 s8 cpu_goal_lo; 301 302 cpu_goal_lo = cpu_goal_hi - 3; 303 304 /* We do not try to avoid 'too cold' events. Basically we 305 * only try to deal with over-heating and fan noise reduction. 306 */ 307 if (tp->avg_cpu_temp < cpu_goal_hi) { 308 if (tp->avg_cpu_temp >= cpu_goal_lo) 309 tp->fan_todo[FAN_CPU] = FAN_SAME; 310 else 311 tp->fan_todo[FAN_CPU] = FAN_SLOWER; 312 } else { 313 tp->fan_todo[FAN_CPU] = FAN_FASTER; 314 } 315 } else { 316 tp->fan_todo[FAN_CPU] = FAN_SAME; 317 } 318 } 319 320 static void analyze_temps(struct bbc_cpu_temperature *tp, unsigned long *last_warn) 321 { 322 tp->avg_amb_temp = (s8)((int)((int)tp->avg_amb_temp + (int)tp->curr_amb_temp) / 2); 323 tp->avg_cpu_temp = (s8)((int)((int)tp->avg_cpu_temp + (int)tp->curr_cpu_temp) / 2); 324 325 analyze_ambient_temp(tp, last_warn, tp->sample_tick); 326 analyze_cpu_temp(tp, last_warn, tp->sample_tick); 327 328 tp->sample_tick++; 329 } 330 331 static enum fan_action prioritize_fan_action(int which_fan) 332 { 333 struct bbc_cpu_temperature *tp; 334 enum fan_action decision = FAN_STATE_MAX; 335 336 /* Basically, prioritize what the temperature sensors 337 * recommend we do, and perform that action on all the 338 * fans. 339 */ 340 for (tp = all_bbc_temps; tp; tp = tp->next) { 341 if (tp->fan_todo[which_fan] == FAN_FULLBLAST) { 342 decision = FAN_FULLBLAST; 343 break; 344 } 345 if (tp->fan_todo[which_fan] == FAN_SAME && 346 decision != FAN_FASTER) 347 decision = FAN_SAME; 348 else if (tp->fan_todo[which_fan] == FAN_FASTER) 349 decision = FAN_FASTER; 350 else if (decision != FAN_FASTER && 351 decision != FAN_SAME && 352 tp->fan_todo[which_fan] == FAN_SLOWER) 353 decision = FAN_SLOWER; 354 } 355 if (decision == FAN_STATE_MAX) 356 decision = FAN_SAME; 357 358 return decision; 359 } 360 361 static int maybe_new_ambient_fan_speed(struct bbc_fan_control *fp) 362 { 363 enum fan_action decision = prioritize_fan_action(FAN_AMBIENT); 364 int ret; 365 366 if (decision == FAN_SAME) 367 return 0; 368 369 ret = 1; 370 if (decision == FAN_FULLBLAST) { 371 if (fp->system_fan_speed >= FAN_SPEED_MAX) 372 ret = 0; 373 else 374 fp->system_fan_speed = FAN_SPEED_MAX; 375 } else { 376 if (decision == FAN_FASTER) { 377 if (fp->system_fan_speed >= FAN_SPEED_MAX) 378 ret = 0; 379 else 380 fp->system_fan_speed += 2; 381 } else { 382 int orig_speed = fp->system_fan_speed; 383 384 if (orig_speed <= FAN_SPEED_MIN || 385 orig_speed <= (fp->cpu_fan_speed - 3)) 386 ret = 0; 387 else 388 fp->system_fan_speed -= 1; 389 } 390 } 391 392 return ret; 393 } 394 395 static int maybe_new_cpu_fan_speed(struct bbc_fan_control *fp) 396 { 397 enum fan_action decision = prioritize_fan_action(FAN_CPU); 398 int ret; 399 400 if (decision == FAN_SAME) 401 return 0; 402 403 ret = 1; 404 if (decision == FAN_FULLBLAST) { 405 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 406 ret = 0; 407 else 408 fp->cpu_fan_speed = FAN_SPEED_MAX; 409 } else { 410 if (decision == FAN_FASTER) { 411 if (fp->cpu_fan_speed >= FAN_SPEED_MAX) 412 ret = 0; 413 else { 414 fp->cpu_fan_speed += 2; 415 if (fp->system_fan_speed < 416 (fp->cpu_fan_speed - 3)) 417 fp->system_fan_speed = 418 fp->cpu_fan_speed - 3; 419 } 420 } else { 421 if (fp->cpu_fan_speed <= FAN_SPEED_MIN) 422 ret = 0; 423 else 424 fp->cpu_fan_speed -= 1; 425 } 426 } 427 428 return ret; 429 } 430 431 static void maybe_new_fan_speeds(struct bbc_fan_control *fp) 432 { 433 int new; 434 435 new = maybe_new_ambient_fan_speed(fp); 436 new |= maybe_new_cpu_fan_speed(fp); 437 438 if (new) 439 set_fan_speeds(fp); 440 } 441 442 static void fans_full_blast(void) 443 { 444 struct bbc_fan_control *fp; 445 446 /* Since we will not be monitoring things anymore, put 447 * the fans on full blast. 448 */ 449 for (fp = all_bbc_fans; fp; fp = fp->next) { 450 fp->cpu_fan_speed = FAN_SPEED_MAX; 451 fp->system_fan_speed = FAN_SPEED_MAX; 452 fp->psupply_fan_on = 1; 453 set_fan_speeds(fp); 454 } 455 } 456 457 #define POLL_INTERVAL (5 * 1000) 458 static unsigned long last_warning_jiffies; 459 static struct task_struct *kenvctrld_task; 460 461 static int kenvctrld(void *__unused) 462 { 463 printk(KERN_INFO "bbc_envctrl: kenvctrld starting...\n"); 464 last_warning_jiffies = jiffies - WARN_INTERVAL; 465 for (;;) { 466 struct bbc_cpu_temperature *tp; 467 struct bbc_fan_control *fp; 468 469 msleep_interruptible(POLL_INTERVAL); 470 if (kthread_should_stop()) 471 break; 472 473 for (tp = all_bbc_temps; tp; tp = tp->next) { 474 get_current_temps(tp); 475 analyze_temps(tp, &last_warning_jiffies); 476 } 477 for (fp = all_bbc_fans; fp; fp = fp->next) 478 maybe_new_fan_speeds(fp); 479 } 480 printk(KERN_INFO "bbc_envctrl: kenvctrld exiting...\n"); 481 482 fans_full_blast(); 483 484 return 0; 485 } 486 487 static void attach_one_temp(struct linux_ebus_child *echild, int temp_idx) 488 { 489 struct bbc_cpu_temperature *tp = kmalloc(sizeof(*tp), GFP_KERNEL); 490 491 if (!tp) 492 return; 493 memset(tp, 0, sizeof(*tp)); 494 tp->client = bbc_i2c_attach(echild); 495 if (!tp->client) { 496 kfree(tp); 497 return; 498 } 499 500 tp->index = temp_idx; 501 { 502 struct bbc_cpu_temperature **tpp = &all_bbc_temps; 503 while (*tpp) 504 tpp = &((*tpp)->next); 505 tp->next = NULL; 506 *tpp = tp; 507 } 508 509 /* Tell it to convert once every 5 seconds, clear all cfg 510 * bits. 511 */ 512 bbc_i2c_writeb(tp->client, 0x00, MAX1617_WR_CFG_BYTE); 513 bbc_i2c_writeb(tp->client, 0x02, MAX1617_WR_CVRATE_BYTE); 514 515 /* Program the hard temperature limits into the chip. */ 516 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].high_pwroff, 517 MAX1617_WR_AMB_HIGHLIM); 518 bbc_i2c_writeb(tp->client, amb_temp_limits[tp->index].low_pwroff, 519 MAX1617_WR_AMB_LOWLIM); 520 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].high_pwroff, 521 MAX1617_WR_CPU_HIGHLIM); 522 bbc_i2c_writeb(tp->client, cpu_temp_limits[tp->index].low_pwroff, 523 MAX1617_WR_CPU_LOWLIM); 524 525 get_current_temps(tp); 526 tp->prev_cpu_temp = tp->avg_cpu_temp = tp->curr_cpu_temp; 527 tp->prev_amb_temp = tp->avg_amb_temp = tp->curr_amb_temp; 528 529 tp->fan_todo[FAN_AMBIENT] = FAN_SAME; 530 tp->fan_todo[FAN_CPU] = FAN_SAME; 531 } 532 533 static void attach_one_fan(struct linux_ebus_child *echild, int fan_idx) 534 { 535 struct bbc_fan_control *fp = kmalloc(sizeof(*fp), GFP_KERNEL); 536 537 if (!fp) 538 return; 539 memset(fp, 0, sizeof(*fp)); 540 fp->client = bbc_i2c_attach(echild); 541 if (!fp->client) { 542 kfree(fp); 543 return; 544 } 545 546 fp->index = fan_idx; 547 548 { 549 struct bbc_fan_control **fpp = &all_bbc_fans; 550 while (*fpp) 551 fpp = &((*fpp)->next); 552 fp->next = NULL; 553 *fpp = fp; 554 } 555 556 /* The i2c device controlling the fans is write-only. 557 * So the only way to keep track of the current power 558 * level fed to the fans is via software. Choose half 559 * power for cpu/system and 'on' fo the powersupply fan 560 * and set it now. 561 */ 562 fp->psupply_fan_on = 1; 563 fp->cpu_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 564 fp->cpu_fan_speed += FAN_SPEED_MIN; 565 fp->system_fan_speed = (FAN_SPEED_MAX - FAN_SPEED_MIN) / 2; 566 fp->system_fan_speed += FAN_SPEED_MIN; 567 568 set_fan_speeds(fp); 569 } 570 571 int bbc_envctrl_init(void) 572 { 573 struct linux_ebus_child *echild; 574 int temp_index = 0; 575 int fan_index = 0; 576 int devidx = 0; 577 578 while ((echild = bbc_i2c_getdev(devidx++)) != NULL) { 579 if (!strcmp(echild->prom_name, "temperature")) 580 attach_one_temp(echild, temp_index++); 581 if (!strcmp(echild->prom_name, "fan-control")) 582 attach_one_fan(echild, fan_index++); 583 } 584 if (temp_index != 0 && fan_index != 0) { 585 kenvctrld_task = kthread_run(kenvctrld, NULL, "kenvctrld"); 586 if (IS_ERR(kenvctrld_task)) 587 return PTR_ERR(kenvctrld_task); 588 } 589 590 return 0; 591 } 592 593 static void destroy_one_temp(struct bbc_cpu_temperature *tp) 594 { 595 bbc_i2c_detach(tp->client); 596 kfree(tp); 597 } 598 599 static void destroy_one_fan(struct bbc_fan_control *fp) 600 { 601 bbc_i2c_detach(fp->client); 602 kfree(fp); 603 } 604 605 void bbc_envctrl_cleanup(void) 606 { 607 struct bbc_cpu_temperature *tp; 608 struct bbc_fan_control *fp; 609 610 kthread_stop(kenvctrld_task); 611 612 tp = all_bbc_temps; 613 while (tp != NULL) { 614 struct bbc_cpu_temperature *next = tp->next; 615 destroy_one_temp(tp); 616 tp = next; 617 } 618 all_bbc_temps = NULL; 619 620 fp = all_bbc_fans; 621 while (fp != NULL) { 622 struct bbc_fan_control *next = fp->next; 623 destroy_one_fan(fp); 624 fp = next; 625 } 626 all_bbc_fans = NULL; 627 } 628