1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * This file contains the environmental PICL plug-in module. 31 */ 32 33 /* 34 * This plugin sets up the PICLTREE for Chicago WS. 35 * It provides functionality to get/set temperatures and 36 * fan speeds. 37 * 38 * The environmental policy defaults to the auto mode 39 * as programmed by OBP at boot time. 40 */ 41 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <sys/sysmacros.h> 45 #include <limits.h> 46 #include <string.h> 47 #include <strings.h> 48 #include <stdarg.h> 49 #include <alloca.h> 50 #include <unistd.h> 51 #include <sys/processor.h> 52 #include <syslog.h> 53 #include <errno.h> 54 #include <fcntl.h> 55 #include <picl.h> 56 #include <picltree.h> 57 #include <picldefs.h> 58 #include <pthread.h> 59 #include <signal.h> 60 #include <libdevinfo.h> 61 #include <sys/pm.h> 62 #include <sys/open.h> 63 #include <sys/time.h> 64 #include <sys/utsname.h> 65 #include <sys/systeminfo.h> 66 #include <note.h> 67 #include <sys/pic16f747.h> 68 #include "envd.h" 69 #include <sys/scsi/scsi.h> 70 #include <sys/scsi/generic/commands.h> 71 72 int debug_fd; 73 /* 74 * PICL plugin entry points 75 */ 76 static void piclenvd_register(void); 77 static void piclenvd_init(void); 78 static void piclenvd_fini(void); 79 80 /* 81 * Env setup routines 82 */ 83 extern void env_picl_setup(void); 84 extern void env_picl_destroy(void); 85 extern int env_picl_setup_tuneables(void); 86 87 static boolean_t has_fan_failed(env_fan_t *fanp); 88 89 #pragma init(piclenvd_register) 90 91 /* 92 * Plugin registration information 93 */ 94 static picld_plugin_reg_t my_reg_info = { 95 PICLD_PLUGIN_VERSION, 96 PICLD_PLUGIN_CRITICAL, 97 "SUNW_piclenvd", 98 piclenvd_init, 99 piclenvd_fini, 100 }; 101 102 #define REGISTER_INFORMATION_STRING_LENGTH 16 103 static char fan_rpm_string[REGISTER_INFORMATION_STRING_LENGTH] = {0}; 104 static char fan_status_string[REGISTER_INFORMATION_STRING_LENGTH] = {0}; 105 106 static int scsi_log_sense(env_disk_t *diskp, uchar_t page_code, 107 uchar_t *pagebuf, uint16_t pagelen); 108 static int get_disk_temp(env_disk_t *); 109 110 /* 111 * ES Segment stuff 112 */ 113 static es_sensor_blk_t sensor_ctl[MAX_SENSORS]; 114 115 /* 116 * Default limits for sensors, in case ES segment is not present, or has 117 * inconsistent information 118 */ 119 static es_sensor_blk_t sensor_default_ctl[MAX_SENSORS] = { 120 { 121 CPU0_HIGH_POWER_OFF, CPU0_HIGH_SHUTDOWN, CPU0_HIGH_WARNING, 122 CPU0_LOW_WARNING, CPU0_LOW_SHUTDOWN, CPU0_LOW_POWER_OFF 123 }, 124 { 125 CPU1_HIGH_POWER_OFF, CPU1_HIGH_SHUTDOWN, CPU1_HIGH_WARNING, 126 CPU1_LOW_WARNING, CPU1_LOW_SHUTDOWN, CPU1_LOW_POWER_OFF 127 }, 128 { 129 ADT7462_HIGH_POWER_OFF, ADT7462_HIGH_SHUTDOWN, ADT7462_HIGH_WARNING, 130 ADT7462_LOW_WARNING, ADT7462_LOW_SHUTDOWN, ADT7462_LOW_POWER_OFF 131 }, 132 { 133 MB_HIGH_POWER_OFF, MB_HIGH_SHUTDOWN, MB_HIGH_WARNING, 134 MB_LOW_WARNING, MB_LOW_SHUTDOWN, MB_LOW_POWER_OFF 135 }, 136 { 137 LM95221_HIGH_POWER_OFF, LM95221_HIGH_SHUTDOWN, LM95221_HIGH_WARNING, 138 LM95221_LOW_WARNING, LM95221_LOW_SHUTDOWN, LM95221_LOW_POWER_OFF 139 }, 140 { 141 FIRE_HIGH_POWER_OFF, FIRE_HIGH_SHUTDOWN, FIRE_HIGH_WARNING, 142 FIRE_LOW_WARNING, FIRE_LOW_SHUTDOWN, FIRE_LOW_POWER_OFF 143 }, 144 { 145 LSI1064_HIGH_POWER_OFF, LSI1064_HIGH_SHUTDOWN, LSI1064_HIGH_WARNING, 146 LSI1064_LOW_WARNING, LSI1064_LOW_SHUTDOWN, LSI1064_LOW_POWER_OFF 147 }, 148 { 149 FRONT_PANEL_HIGH_POWER_OFF, FRONT_PANEL_HIGH_SHUTDOWN, 150 FRONT_PANEL_HIGH_WARNING, FRONT_PANEL_LOW_WARNING, 151 FRONT_PANEL_LOW_SHUTDOWN, FRONT_PANEL_LOW_POWER_OFF 152 } 153 }; 154 155 /* 156 * Env thread variables 157 */ 158 static boolean_t system_shutdown_started = B_FALSE; 159 static boolean_t system_temp_thr_created = B_FALSE; 160 static pthread_t system_temp_thr_id; 161 static pthread_attr_t thr_attr; 162 static boolean_t disk_temp_thr_created = B_FALSE; 163 static pthread_t disk_temp_thr_id; 164 static boolean_t fan_thr_created = B_FALSE; 165 static pthread_t fan_thr_id; 166 167 /* 168 * PM thread related variables 169 */ 170 static pthread_t pmthr_tid; /* pmthr thread ID */ 171 static int pm_fd = -1; /* PM device file descriptor */ 172 static boolean_t pmthr_created = B_FALSE; 173 static int cur_lpstate; /* cur low power state */ 174 175 /* 176 * Envd plug-in verbose flag set by SUNW_PICLENVD_DEBUG environment var 177 * Setting the verbose tuneable also enables debugging for better 178 * control 179 */ 180 int env_debug = 0; 181 182 /* 183 * These are debug variables for keeping track of the total number 184 * of Fan and Temp sensor retries over the lifetime of the plugin. 185 */ 186 static int total_fan_retries = 0; 187 static int total_temp_retries = 0; 188 189 /* 190 * Fan devices 191 */ 192 static env_fan_t envd_system_fan0 = { 193 ENV_SYSTEM_FAN0, ENV_SYSTEM_FAN0_DEVFS, SYSTEM_FAN0_ID, 194 SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1, 195 }; 196 static env_fan_t envd_system_fan1 = { 197 ENV_SYSTEM_FAN1, ENV_SYSTEM_FAN1_DEVFS, SYSTEM_FAN1_ID, 198 SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1, 199 }; 200 static env_fan_t envd_system_fan2 = { 201 ENV_SYSTEM_FAN2, ENV_SYSTEM_FAN2_DEVFS, SYSTEM_FAN2_ID, 202 SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1, 203 }; 204 static env_fan_t envd_system_fan3 = { 205 ENV_SYSTEM_FAN3, ENV_SYSTEM_FAN3_DEVFS, SYSTEM_FAN3_ID, 206 SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1, 207 }; 208 static env_fan_t envd_system_fan4 = { 209 ENV_SYSTEM_FAN4, ENV_SYSTEM_FAN4_DEVFS, SYSTEM_FAN4_ID, 210 SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1, 211 }; 212 213 /* 214 * Disk devices 215 */ 216 static env_disk_t envd_disk0 = { 217 ENV_DISK0, ENV_DISK0_DEVFS, DISK0_PHYSPATH, DISK0_NODE_PATH, 218 DISK0_ID, -1, 219 }; 220 static env_disk_t envd_disk1 = { 221 ENV_DISK1, ENV_DISK1_DEVFS, DISK1_PHYSPATH, DISK1_NODE_PATH, 222 DISK1_ID, -1, 223 }; 224 static env_disk_t envd_disk2 = { 225 ENV_DISK2, ENV_DISK2_DEVFS, DISK2_PHYSPATH, DISK2_NODE_PATH, 226 DISK2_ID, -1, 227 }; 228 static env_disk_t envd_disk3 = { 229 ENV_DISK3, ENV_DISK3_DEVFS, DISK3_PHYSPATH, DISK3_NODE_PATH, 230 DISK3_ID, -1, 231 }; 232 233 /* 234 * Sensors 235 */ 236 static env_sensor_t envd_sensor_cpu0 = { 237 SENSOR_CPU0, SENSOR_CPU0_DEVFS, CPU0_SENSOR_ID, -1, NULL, 238 }; 239 static env_sensor_t envd_sensor_cpu1 = { 240 SENSOR_CPU1, SENSOR_CPU1_DEVFS, CPU1_SENSOR_ID, -1, NULL, 241 }; 242 static env_sensor_t envd_sensor_adt7462 = { 243 SENSOR_ADT7462, SENSOR_ADT7462_DEVFS, ADT7462_SENSOR_ID, -1, NULL, 244 }; 245 static env_sensor_t envd_sensor_mb = { 246 SENSOR_MB, SENSOR_MB_DEVFS, MB_SENSOR_ID, -1, NULL, 247 }; 248 static env_sensor_t envd_sensor_lm95221 = { 249 SENSOR_LM95221, SENSOR_LM95221_DEVFS, LM95221_SENSOR_ID, -1, NULL, 250 }; 251 static env_sensor_t envd_sensor_fire = { 252 SENSOR_FIRE, SENSOR_FIRE_DEVFS, FIRE_SENSOR_ID, -1, NULL, 253 }; 254 static env_sensor_t envd_sensor_lsi1064 = { 255 SENSOR_LSI1064, SENSOR_LSI1064_DEVFS, LSI1064_SENSOR_ID, -1, NULL, 256 }; 257 static env_sensor_t envd_sensor_front_panel = { 258 SENSOR_FRONT_PANEL, SENSOR_FRONT_PANEL_DEVFS, FRONT_PANEL_SENSOR_ID, 259 -1, NULL, 260 }; 261 262 /* 263 * The vendor-id and device-id are the properties associated with 264 * the SCSI controller. This is used to identify a particular controller 265 * like LSI1064. 266 */ 267 #define VENDOR_ID "vendor-id" 268 #define DEVICE_ID "device-id" 269 270 /* 271 * The implementation for SCSI disk drives to supply info. about 272 * temperature is not mandatory. Hence we first determine if the 273 * temperature page is supported. To do this we need to scan the list 274 * of pages supported. 275 */ 276 #define SUPPORTED_LPAGES 0 277 #define TEMPERATURE_PAGE 0x0D 278 #define LOGPAGEHDRSIZE 4 279 280 /* 281 * NULL terminated array of fans 282 */ 283 static env_fan_t *envd_fans[] = { 284 &envd_system_fan0, 285 &envd_system_fan1, 286 &envd_system_fan2, 287 &envd_system_fan3, 288 &envd_system_fan4, 289 NULL 290 }; 291 292 /* 293 * NULL terminated array of disks 294 */ 295 static env_disk_t *envd_disks[] = { 296 &envd_disk0, 297 &envd_disk1, 298 &envd_disk2, 299 &envd_disk3, 300 NULL 301 }; 302 303 /* 304 * NULL terminated array of temperature sensors 305 */ 306 #define N_ENVD_SENSORS 8 307 static env_sensor_t *envd_sensors[] = { 308 &envd_sensor_cpu0, 309 &envd_sensor_cpu1, 310 &envd_sensor_adt7462, 311 &envd_sensor_mb, 312 &envd_sensor_lm95221, 313 &envd_sensor_fire, 314 &envd_sensor_lsi1064, 315 &envd_sensor_front_panel, 316 NULL 317 }; 318 319 #define NOT_AVAILABLE "NA" 320 321 /* 322 * Tuneables 323 */ 324 #define ENABLE 1 325 #define DISABLE 0 326 327 static int disk_high_warn_temperature = DISK_HIGH_WARN_TEMPERATURE; 328 static int disk_low_warn_temperature = DISK_LOW_WARN_TEMPERATURE; 329 static int disk_high_shutdown_temperature = 330 DISK_HIGH_SHUTDOWN_TEMPERATURE; 331 static int disk_low_shutdown_temperature = DISK_LOW_SHUTDOWN_TEMPERATURE; 332 333 static int disk_scan_interval = DISK_SCAN_INTERVAL; 334 static int sensor_scan_interval = SENSOR_SCAN_INTERVAL; 335 static int fan_scan_interval = FAN_SCAN_INTERVAL; 336 337 static int get_int_val(ptree_rarg_t *parg, void *buf); 338 static int set_int_val(ptree_warg_t *parg, const void *buf); 339 static int get_string_val(ptree_rarg_t *parg, void *buf); 340 static int set_string_val(ptree_warg_t *parg, const void *buf); 341 342 static int shutdown_override = 0; 343 static int sensor_warning_interval = SENSOR_WARNING_INTERVAL; 344 static int sensor_warning_duration = SENSOR_WARNING_DURATION; 345 static int sensor_shutdown_interval = SENSOR_SHUTDOWN_INTERVAL; 346 static int disk_warning_interval = DISK_WARNING_INTERVAL; 347 static int disk_warning_duration = DISK_WARNING_DURATION; 348 static int disk_shutdown_interval = DISK_SHUTDOWN_INTERVAL; 349 350 static int system_temp_monitor = 1; /* enabled */ 351 static int fan_monitor = 1; /* enabled */ 352 static int pm_monitor = 1; /* enabled */ 353 int disk_temp_monitor = 1; /* enabled */ 354 355 static char shutdown_cmd[] = SHUTDOWN_CMD; 356 const char *iofru_devname = I2C_DEVFS "/" IOFRU_DEV; 357 358 env_tuneable_t tuneables[] = { 359 {"system_temp-monitor", PICL_PTYPE_INT, &system_temp_monitor, 360 &get_int_val, &set_int_val, sizeof (int)}, 361 362 {"fan-monitor", PICL_PTYPE_INT, &fan_monitor, 363 &get_int_val, &set_int_val, sizeof (int)}, 364 365 {"pm-monitor", PICL_PTYPE_INT, &pm_monitor, 366 &get_int_val, &set_int_val, sizeof (int)}, 367 368 {"shutdown-override", PICL_PTYPE_INT, &shutdown_override, 369 &get_int_val, &set_int_val, sizeof (int)}, 370 371 {"sensor-warning-duration", PICL_PTYPE_INT, 372 &sensor_warning_duration, 373 &get_int_val, &set_int_val, 374 sizeof (int)}, 375 376 {"disk-scan-interval", PICL_PTYPE_INT, 377 &disk_scan_interval, 378 &get_int_val, &set_int_val, 379 sizeof (int)}, 380 381 {"fan-scan-interval", PICL_PTYPE_INT, 382 &fan_scan_interval, 383 &get_int_val, &set_int_val, 384 sizeof (int)}, 385 386 {"sensor-scan-interval", PICL_PTYPE_INT, 387 &sensor_scan_interval, 388 &get_int_val, &set_int_val, 389 sizeof (int)}, 390 391 {"sensor_warning-interval", PICL_PTYPE_INT, &sensor_warning_interval, 392 &get_int_val, &set_int_val, 393 sizeof (int)}, 394 395 {"sensor_shutdown-interval", PICL_PTYPE_INT, &sensor_shutdown_interval, 396 &get_int_val, &set_int_val, 397 sizeof (int)}, 398 399 {"disk_warning-interval", PICL_PTYPE_INT, &disk_warning_interval, 400 &get_int_val, &set_int_val, 401 sizeof (int)}, 402 403 {"disk_warning-duration", PICL_PTYPE_INT, &disk_warning_duration, 404 &get_int_val, &set_int_val, 405 sizeof (int)}, 406 407 {"disk_shutdown-interval", PICL_PTYPE_INT, &disk_shutdown_interval, 408 &get_int_val, &set_int_val, 409 sizeof (int)}, 410 411 {"shutdown-command", PICL_PTYPE_CHARSTRING, shutdown_cmd, 412 &get_string_val, &set_string_val, 413 sizeof (shutdown_cmd)}, 414 415 {"monitor-disk-temp", PICL_PTYPE_INT, &disk_temp_monitor, 416 &get_int_val, &set_int_val, sizeof (int)}, 417 418 {"disk-high-warn-temperature", PICL_PTYPE_INT, 419 &disk_high_warn_temperature, &get_int_val, 420 &set_int_val, sizeof (int)}, 421 422 {"disk-low-warn-temperature", PICL_PTYPE_INT, 423 &disk_low_warn_temperature, &get_int_val, 424 &set_int_val, sizeof (int)}, 425 426 {"disk-high-shutdown-temperature", PICL_PTYPE_INT, 427 &disk_high_shutdown_temperature, &get_int_val, 428 &set_int_val, sizeof (int)}, 429 430 {"disk-low-shutdown-temperature", PICL_PTYPE_INT, 431 &disk_low_shutdown_temperature, &get_int_val, 432 &set_int_val, sizeof (int)}, 433 434 {"verbose", PICL_PTYPE_INT, &env_debug, 435 &get_int_val, &set_int_val, sizeof (int)} 436 }; 437 438 /* 439 * We use this to figure out how many tuneables there are 440 * This is variable because the publishing routine needs this info 441 * in piclenvsetup.c 442 */ 443 int ntuneables = (sizeof (tuneables)/sizeof (tuneables[0])); 444 445 /* 446 * Lookup fan and return a pointer to env_fan_t data structure. 447 */ 448 env_fan_t * 449 fan_lookup(char *name) 450 { 451 int i; 452 env_fan_t *fanp; 453 454 for (i = 0; (fanp = envd_fans[i]) != NULL; i++) { 455 if (strcmp(fanp->name, name) == 0) 456 return (fanp); 457 } 458 return (NULL); 459 } 460 461 /* 462 * Lookup sensor and return a pointer to env_sensor_t data structure. 463 */ 464 env_sensor_t * 465 sensor_lookup(char *name) 466 { 467 env_sensor_t *sensorp; 468 int i; 469 470 for (i = 0; i < N_ENVD_SENSORS; ++i) { 471 sensorp = envd_sensors[i]; 472 if (strcmp(sensorp->name, name) == 0) 473 return (sensorp); 474 } 475 return (NULL); 476 } 477 478 /* 479 * Lookup disk and return a pointer to env_disk_t data structure. 480 */ 481 env_disk_t * 482 disk_lookup(char *name) 483 { 484 int i; 485 env_disk_t *diskp; 486 487 for (i = 0; (diskp = envd_disks[i]) != NULL; i++) { 488 if (strncmp(diskp->name, name, strlen(name)) == 0) 489 return (diskp); 490 } 491 return (NULL); 492 } 493 494 /* 495 * Get current temperature 496 * Returns -1 on error, 0 if successful 497 */ 498 int 499 get_temperature(env_sensor_t *sensorp, tempr_t *temp) 500 { 501 int fd = sensorp->fd; 502 int retval = 0; 503 504 if (fd == -1) 505 retval = -1; 506 else if (ioctl(fd, PIC_GET_TEMPERATURE, temp) != 0) { 507 508 retval = -1; 509 510 sensorp->error++; 511 512 if (sensorp->error == MAX_SENSOR_RETRIES) { 513 envd_log(LOG_WARNING, ENV_SENSOR_ACCESS_FAIL, 514 sensorp->name, errno, strerror(errno)); 515 } 516 517 total_temp_retries++; 518 (void) sleep(1); 519 520 } else if (sensorp->error != 0) { 521 if (sensorp->error >= MAX_SENSOR_RETRIES) { 522 envd_log(LOG_WARNING, ENV_SENSOR_ACCESS_OK, 523 sensorp->name); 524 } 525 526 sensorp->error = 0; 527 528 if (total_temp_retries && env_debug) { 529 envd_log(LOG_WARNING, 530 "Total retries for sensors = %d", 531 total_temp_retries); 532 } 533 } 534 535 return (retval); 536 } 537 538 /* 539 * Get current disk temperature 540 * Returns -1 on error, 0 if successful 541 */ 542 int 543 disk_temperature(env_disk_t *diskp, tempr_t *temp) 544 { 545 int retval = 0; 546 547 if (diskp == NULL) 548 retval = -1; 549 else 550 *temp = diskp->current_temp; 551 552 return (retval); 553 } 554 555 /* 556 * Get current fan speed 557 * This function returns a RPM value for fanspeed 558 * in fanspeedp. 559 * Returns -1 on error, 0 if successful 560 */ 561 int 562 get_fan_speed(env_fan_t *fanp, fanspeed_t *fanspeedp) 563 { 564 uint8_t tach; 565 int real_tach; 566 int retries; 567 568 if (fanp->fd == -1) 569 return (-1); 570 571 if (has_fan_failed(fanp)) { 572 *fanspeedp = 0; 573 return (0); 574 } 575 576 /* try to read the fan information */ 577 for (retries = 0; retries <= MAX_FAN_RETRIES; retries++) { 578 if (ioctl(fanp->fd, PIC_GET_FAN_SPEED, &tach) == 0) 579 break; 580 (void) sleep(1); 581 } 582 583 total_fan_retries += retries; 584 if (retries == MAX_FAN_RETRIES) 585 return (-1); 586 587 if (total_fan_retries && env_debug) { 588 envd_log(LOG_WARNING, "total retries for fan = %d", 589 total_fan_retries); 590 } 591 592 real_tach = tach << 8; 593 *fanspeedp = TACH_TO_RPM(real_tach); 594 return (0); 595 } 596 597 /* 598 * Set fan speed 599 * This function accepts a percentage of fan speed 600 * from 0-100 and programs the HW monitor fans to the corresponding 601 * fanspeed value. 602 * Returns -1 on error, -2 on invalid args passed, 0 if successful 603 */ 604 int 605 set_fan_speed(env_fan_t *fanp, fanspeed_t fanspeed) 606 { 607 uint8_t speed; 608 609 if (fanp->fd == -1) 610 return (-1); 611 612 if (fanspeed < 0 || fanspeed > 100) 613 return (-2); 614 615 speed = fanspeed; 616 if (ioctl(fanp->fd, PIC_SET_FAN_SPEED, &speed) != 0) 617 return (-1); 618 619 return (0); 620 } 621 622 /* 623 * close all fan devices 624 */ 625 static void 626 envd_close_fans(void) 627 { 628 int i; 629 env_fan_t *fanp; 630 631 for (i = 0; (fanp = envd_fans[i]) != NULL; i++) { 632 if (fanp->fd != -1) { 633 (void) close(fanp->fd); 634 fanp->fd = -1; 635 } 636 } 637 } 638 639 /* 640 * Close sensor devices and freeup resources 641 */ 642 static void 643 envd_close_sensors(void) 644 { 645 env_sensor_t *sensorp; 646 int i; 647 648 for (i = 0; i < N_ENVD_SENSORS; ++i) { 649 sensorp = envd_sensors[i]; 650 if (sensorp->fd != -1) { 651 (void) close(sensorp->fd); 652 sensorp->fd = -1; 653 } 654 } 655 } 656 657 /* 658 * Open fan devices and initialize per fan data structure. 659 */ 660 static int 661 envd_setup_fans(void) 662 { 663 int i, fd; 664 env_fan_t *fanp; 665 int fancnt = 0; 666 picl_nodehdl_t tnodeh; 667 668 for (i = 0; (fanp = envd_fans[i]) != NULL; i++) { 669 fanp->last_status = FAN_OK; 670 671 /* Make sure cpu0/1 present for validating cpu fans */ 672 if (fanp->id == CPU0_FAN_ID) { 673 if (ptree_get_node_by_path(CPU0_PATH, &tnodeh) != 674 PICL_SUCCESS) { 675 if (env_debug) { 676 envd_log(LOG_ERR, 677 "get node by path failed for %s\n", 678 CPU0_PATH); 679 } 680 fanp->present = B_FALSE; 681 continue; 682 } 683 } 684 if (fanp->id == CPU1_FAN_ID) { 685 if (ptree_get_node_by_path(CPU1_PATH, &tnodeh) != 686 PICL_SUCCESS) { 687 if (env_debug) { 688 envd_log(LOG_ERR, 689 "get node by path failed for %s\n", CPU0_PATH); 690 } 691 fanp->present = B_FALSE; 692 continue; 693 } 694 } 695 if ((fd = open(fanp->devfs_path, O_RDWR)) == -1) { 696 envd_log(LOG_CRIT, 697 ENV_FAN_OPEN_FAIL, fanp->name, 698 fanp->devfs_path, errno, strerror(errno)); 699 fanp->present = B_FALSE; 700 continue; 701 } 702 fanp->fd = fd; 703 fanp->present = B_TRUE; 704 fancnt++; 705 } 706 707 if (fancnt == 0) 708 return (-1); 709 710 return (0); 711 } 712 713 static int 714 envd_setup_disks(void) 715 { 716 int ret, i, page_index, page_len; 717 picl_nodehdl_t tnodeh; 718 env_disk_t *diskp; 719 uint_t vendor_id; 720 uint_t device_id; 721 uchar_t log_page[256]; 722 723 if (ptree_get_node_by_path(SCSI_CONTROLLER_NODE_PATH, 724 &tnodeh) != PICL_SUCCESS) { 725 if (env_debug) { 726 envd_log(LOG_ERR, "On-Board SCSI controller %s " 727 "not found in the system.\n", 728 SCSI_CONTROLLER_NODE_PATH); 729 } 730 return (-1); 731 } 732 733 if ((ret = ptree_get_propval_by_name(tnodeh, VENDOR_ID, 734 &vendor_id, sizeof (vendor_id))) != 0) { 735 if (env_debug) { 736 envd_log(LOG_ERR, "Error in getting vendor-id " 737 "for SCSI controller. ret = %d errno = 0x%d\n", 738 ret, errno); 739 } 740 return (-1); 741 } 742 if ((ret = ptree_get_propval_by_name(tnodeh, DEVICE_ID, 743 &device_id, sizeof (device_id))) != 0) { 744 if (env_debug) { 745 envd_log(LOG_ERR, "Error in getting device-id " 746 "for SCSI controller. ret = %d errno = 0x%d\n", 747 ret, errno); 748 } 749 return (-1); 750 } 751 752 /* 753 * We have found LSI1064 SCSi controller onboard. 754 */ 755 for (i = 0; (diskp = envd_disks[i]) != NULL; i++) { 756 if (ptree_get_node_by_path(diskp->nodepath, 757 &tnodeh) != PICL_SUCCESS) { 758 diskp->present = B_FALSE; 759 if (env_debug) { 760 envd_log(LOG_ERR, 761 "DISK %d: %s not found in the system.\n", 762 diskp->id, diskp->nodepath); 763 } 764 continue; 765 } 766 if ((diskp->fd = open(diskp->devfs_path, O_RDONLY)) == -1) { 767 diskp->present = B_FALSE; 768 if (env_debug) { 769 envd_log(LOG_ERR, 770 "Error in opening %s errno = 0x%x\n", 771 diskp->devfs_path, errno); 772 } 773 continue; 774 } 775 diskp->present = B_TRUE; 776 diskp->tpage_supported = B_FALSE; 777 diskp->warning_tstamp = 0; 778 diskp->shutdown_tstamp = 0; 779 diskp->high_warning = disk_high_warn_temperature; 780 diskp->low_warning = disk_low_warn_temperature; 781 diskp->high_shutdown = disk_high_shutdown_temperature; 782 diskp->low_shutdown = disk_low_shutdown_temperature; 783 /* 784 * Find out if the Temperature page is supported by the disk. 785 */ 786 if (scsi_log_sense(diskp, SUPPORTED_LPAGES, log_page, 787 sizeof (log_page)) != 0) { 788 continue; 789 } 790 791 page_len = ((log_page[2] << 8) & 0xFF00) | log_page[3]; 792 793 for (page_index = LOGPAGEHDRSIZE; 794 page_index < page_len + LOGPAGEHDRSIZE; page_index++) { 795 if (log_page[page_index] != TEMPERATURE_PAGE) 796 continue; 797 798 diskp->tpage_supported = B_TRUE; 799 if (env_debug) { 800 envd_log(LOG_ERR, "tpage supported for %s\n", 801 diskp->nodepath); 802 } 803 } 804 805 if (get_disk_temp(diskp) < 0) { 806 envd_log(LOG_ERR, " error reading temperature of:%s\n", 807 diskp->name); 808 } else if (env_debug) { 809 envd_log(LOG_ERR, "%s: temperature = %d\n", 810 diskp->name, diskp->current_temp); 811 } 812 } 813 814 return (0); 815 } 816 817 static int 818 envd_es_setup(void) 819 { 820 seeprom_scn_t scn_hdr; 821 seeprom_seg_t seg_hdr; 822 es_data_t *envseg; 823 es_sensor_t *sensorp; 824 int i, fd, id; 825 int envseg_len, esd_len; 826 char *envsegp; 827 828 /* 829 * Open the front io fru 830 */ 831 if ((fd = open(iofru_devname, O_RDONLY)) == -1) { 832 envd_log(LOG_ERR, ENV_FRU_OPEN_FAIL, iofru_devname, errno); 833 return (-1); 834 } 835 836 /* 837 * Read section header from the fru SEEPROM 838 */ 839 if (lseek(fd, SSCN_OFFSET, SEEK_SET) == (off_t)-1 || 840 read(fd, &scn_hdr, sizeof (scn_hdr)) != sizeof (scn_hdr)) { 841 envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); 842 (void) close(fd); 843 return (-1); 844 } 845 if ((scn_hdr.sscn_tag != SSCN_TAG) || 846 (GET_UNALIGN16(&scn_hdr.sscn_ver) != SSCN_VER)) { 847 envd_log(LOG_ERR, ENV_FRU_BAD_SCNHDR, scn_hdr.sscn_tag, 848 GET_UNALIGN16(&scn_hdr.sscn_ver)); 849 (void) close(fd); 850 return (-1); 851 } 852 853 /* 854 * Locate environmental segment 855 */ 856 for (i = 0; i < scn_hdr.sscn_nsegs; i++) { 857 if (read(fd, &seg_hdr, sizeof (seg_hdr)) != sizeof (seg_hdr)) { 858 envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); 859 (void) close(fd); 860 return (-1); 861 } 862 863 if (env_debug) { 864 envd_log(LOG_INFO, 865 "Seg name: %x off:%x len:%x\n", 866 GET_UNALIGN16(&seg_hdr.sseg_name), 867 GET_UNALIGN16(&seg_hdr.sseg_off), 868 GET_UNALIGN16(&seg_hdr.sseg_len)); 869 } 870 871 if (GET_UNALIGN16(&seg_hdr.sseg_name) == ENVSEG_NAME) 872 break; 873 } 874 if (i == scn_hdr.sscn_nsegs) { 875 envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); 876 (void) close(fd); 877 return (-1); 878 } 879 880 /* 881 * Read environmental segment 882 */ 883 envseg_len = GET_UNALIGN16(&seg_hdr.sseg_len); 884 if ((envseg = malloc(envseg_len)) == NULL) { 885 envd_log(LOG_ERR, ENV_FRU_NOMEM_FOR_SEG, envseg_len); 886 (void) close(fd); 887 return (-1); 888 } 889 890 if (lseek(fd, (off_t)GET_UNALIGN16(&seg_hdr.sseg_off), 891 SEEK_SET) == (off_t)-1 || 892 read(fd, envseg, envseg_len) != envseg_len) { 893 envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); 894 free(envseg); 895 (void) close(fd); 896 return (-1); 897 } 898 899 /* 900 * Check environmental segment data for consistency 901 */ 902 esd_len = sizeof (*envseg) + 903 (envseg->esd_nsensors - 1) * sizeof (envseg->esd_sensors[0]); 904 if (envseg->esd_ver != ENVSEG_VERSION || envseg_len < esd_len) { 905 envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); 906 free(envseg); 907 (void) close(fd); 908 return (-1); 909 } 910 911 /* 912 * Process environmental segment data 913 */ 914 if (envseg->esd_nsensors > MAX_SENSORS) { 915 envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); 916 free(envseg); 917 (void) close(fd); 918 return (-1); 919 } 920 921 sensorp = &(envseg->esd_sensors[0]); 922 envsegp = (char *)envseg; 923 for (i = 0; i < envseg->esd_nsensors; i++) { 924 uint32_t ess_id; 925 926 (void) memcpy(&ess_id, 927 sensorp->ess_id, sizeof (sensorp->ess_id)); 928 929 if (env_debug) { 930 envd_log(LOG_INFO, "\n Sensor Id %x offset %x", 931 ess_id, sensorp->ess_off); 932 } 933 if (ess_id >= MAX_SENSORS) { 934 envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname); 935 free(envseg); 936 (void) close(fd); 937 return (-1); 938 } 939 (void) memcpy(&sensor_ctl[ess_id], &envsegp[sensorp->ess_off], 940 sizeof (es_sensor_blk_t)); 941 942 sensorp++; 943 } 944 945 /* 946 * Match sensor/ES id and point to correct data based on IDs 947 */ 948 for (i = 0; i < N_ENVD_SENSORS; i++) { 949 id = envd_sensors[i]->id; 950 envd_sensors[i]->es = &sensor_ctl[id]; 951 } 952 953 /* 954 * Cleanup and return 955 */ 956 free(envseg); 957 (void) close(fd); 958 959 return (0); 960 } 961 962 static void 963 envd_es_default_setup(void) 964 { 965 int i, id; 966 967 for (i = 0; i < N_ENVD_SENSORS; i++) { 968 id = envd_sensors[i]->id; 969 envd_sensors[i]->es = &sensor_default_ctl[id]; 970 } 971 } 972 973 /* 974 * Open temperature sensor devices and initialize per sensor data structure. 975 */ 976 static int 977 envd_setup_sensors(void) 978 { 979 env_sensor_t *sensorp; 980 int sensorcnt = 0; 981 int i; 982 picl_nodehdl_t tnodeh; 983 984 for (i = 0; i < N_ENVD_SENSORS; i++) { 985 if (env_debug) 986 envd_log(LOG_ERR, "scanning sensor %d\n", i); 987 988 sensorp = envd_sensors[i]; 989 990 /* Initialize sensor's initial state */ 991 sensorp->shutdown_initiated = B_FALSE; 992 sensorp->warning_tstamp = 0; 993 sensorp->shutdown_tstamp = 0; 994 sensorp->error = 0; 995 996 /* Make sure cpu0/1 sensors are present */ 997 if (sensorp->id == CPU0_SENSOR_ID) { 998 if (ptree_get_node_by_path(CPU0_PATH, &tnodeh) != 999 PICL_SUCCESS) { 1000 if (env_debug) { 1001 envd_log(LOG_ERR, 1002 "get node by path failed for %s\n", 1003 CPU0_PATH); 1004 } 1005 sensorp->present = B_FALSE; 1006 continue; 1007 } 1008 } 1009 if (sensorp->id == CPU1_SENSOR_ID) { 1010 if (ptree_get_node_by_path(CPU1_PATH, &tnodeh) != 1011 PICL_SUCCESS) { 1012 if (env_debug) { 1013 envd_log(LOG_ERR, 1014 "get node by path failed for %s\n", 1015 CPU1_PATH); 1016 } 1017 sensorp->present = B_FALSE; 1018 continue; 1019 } 1020 } 1021 1022 sensorp->fd = open(sensorp->devfs_path, O_RDWR); 1023 if (sensorp->fd == -1) { 1024 if (env_debug) { 1025 envd_log(LOG_ERR, ENV_SENSOR_OPEN_FAIL, 1026 sensorp->name, sensorp->devfs_path, 1027 errno, strerror(errno)); 1028 } 1029 sensorp->present = B_FALSE; 1030 continue; 1031 } 1032 1033 /* 1034 * Determine if the front panel is attached, we want the 1035 * information if it exists, but should not shut down 1036 * the system if it is removed. 1037 */ 1038 if (sensorp->id == FRONT_PANEL_SENSOR_ID) { 1039 tempr_t temp; 1040 int tries; 1041 1042 for (tries = 0; tries < MAX_SENSOR_RETRIES; tries++) { 1043 if (ioctl(sensorp->fd, PIC_GET_TEMPERATURE, 1044 &temp) == 0) { 1045 break; 1046 } 1047 (void) sleep(1); 1048 } 1049 if (tries == MAX_SENSOR_RETRIES) 1050 sensorp->present = B_FALSE; 1051 } 1052 1053 sensorp->present = B_TRUE; 1054 sensorcnt++; 1055 } 1056 1057 if (sensorcnt == 0) 1058 return (-1); 1059 1060 return (0); 1061 } 1062 1063 /* ARGSUSED */ 1064 static void * 1065 pmthr(void *args) 1066 { 1067 pm_state_change_t pmstate; 1068 char physpath[PATH_MAX]; 1069 int pre_lpstate; 1070 uint8_t estar_state; 1071 int env_monitor_fd; 1072 1073 pmstate.physpath = physpath; 1074 pmstate.size = sizeof (physpath); 1075 cur_lpstate = 0; 1076 pre_lpstate = 1; 1077 1078 pm_fd = open(PM_DEVICE, O_RDWR); 1079 if (pm_fd == -1) { 1080 envd_log(LOG_ERR, PM_THREAD_EXITING, errno, strerror(errno)); 1081 return (NULL); 1082 } 1083 for (;;) { 1084 /* 1085 * Get PM state change events to check if the system 1086 * is in lowest power state and inform PIC which controls 1087 * fan speeds. 1088 * 1089 * To minimize polling, we use the blocking interface 1090 * to get the power state change event here. 1091 */ 1092 if (ioctl(pm_fd, PM_GET_STATE_CHANGE_WAIT, &pmstate) != 0) { 1093 if (errno != EINTR) 1094 break; 1095 continue; 1096 } 1097 1098 do { 1099 if (env_debug) { 1100 envd_log(LOG_INFO, 1101 "pmstate event:0x%x flags:%x" 1102 "comp:%d oldval:%d newval:%d path:%s\n", 1103 pmstate.event, pmstate.flags, 1104 pmstate.component, 1105 pmstate.old_level, 1106 pmstate.new_level, 1107 pmstate.physpath); 1108 } 1109 cur_lpstate = 1110 (pmstate.flags & PSC_ALL_LOWEST) ? 1 : 0; 1111 } while (ioctl(pm_fd, PM_GET_STATE_CHANGE, &pmstate) == 0); 1112 1113 if (pre_lpstate != cur_lpstate) { 1114 pre_lpstate = cur_lpstate; 1115 estar_state = (cur_lpstate & 0x1); 1116 if (env_debug) 1117 envd_log(LOG_ERR, 1118 "setting PIC ESTAR SATE to %x\n", 1119 estar_state); 1120 1121 env_monitor_fd = open(ENV_MONITOR_DEVFS, O_RDWR); 1122 if (env_monitor_fd != -1) { 1123 if (ioctl(env_monitor_fd, PIC_SET_ESTAR_MODE, 1124 &estar_state) < 0) { 1125 if (env_debug) 1126 envd_log(LOG_ERR, 1127 "unable to set ESTAR_MODE in PIC\n"); 1128 } 1129 (void) close(env_monitor_fd); 1130 } else { 1131 if (env_debug) 1132 envd_log(LOG_ERR, 1133 "Failed to open %s\n", 1134 ENV_MONITOR_DEVFS); 1135 } 1136 } 1137 } 1138 1139 /*NOTREACHED*/ 1140 return (NULL); 1141 } 1142 1143 /* 1144 * This is env thread which monitors the current temperature when 1145 * warning threshold is exceeded. The job is to make sure it does 1146 * not execced/decrease shutdown threshold. If it does it will start 1147 * forced shutdown to avoid reaching hardware poweroff via THERM interrupt. 1148 */ 1149 /*ARGSUSED*/ 1150 static void * 1151 system_temp_thr(void *args) 1152 { 1153 char syscmd[BUFSIZ]; 1154 char msgbuf[BUFSIZ]; 1155 timespec_t to; 1156 int ret, i; 1157 env_sensor_t *sensorp; 1158 pthread_mutex_t env_monitor_mutex = PTHREAD_MUTEX_INITIALIZER; 1159 pthread_cond_t env_monitor_cv = PTHREAD_COND_INITIALIZER; 1160 time_t ct; 1161 tempr_t temp; 1162 1163 for (;;) { 1164 /* 1165 * Sleep for specified seconds before issuing IOCTL 1166 * again. 1167 */ 1168 (void) pthread_mutex_lock(&env_monitor_mutex); 1169 ret = pthread_cond_reltimedwait_np(&env_monitor_cv, 1170 &env_monitor_mutex, &to); 1171 to.tv_sec = sensor_scan_interval; 1172 to.tv_nsec = 0; 1173 if (ret != ETIMEDOUT) { 1174 (void) pthread_mutex_unlock(&env_monitor_mutex); 1175 continue; 1176 } 1177 1178 (void) pthread_mutex_unlock(&env_monitor_mutex); 1179 for (i = 0; i < N_ENVD_SENSORS; i++) { 1180 sensorp = envd_sensors[i]; 1181 if (sensorp->present == B_FALSE) 1182 continue; 1183 if (get_temperature(sensorp, &temp) == -1) 1184 continue; 1185 1186 sensorp->cur_temp = temp; 1187 if (env_debug) { 1188 envd_log(LOG_ERR, 1189 "%s temp = %d", 1190 sensorp->name, sensorp->cur_temp); 1191 } 1192 1193 /* 1194 * If this sensor already triggered system shutdown, 1195 * don't log any more shutdown/warning messages for it. 1196 */ 1197 if (sensorp->shutdown_initiated) 1198 continue; 1199 1200 /* 1201 * Check for the temperature in warning and shutdown 1202 * range and take appropriate action. 1203 */ 1204 if (SENSOR_TEMP_IN_WARNING_RANGE(sensorp->cur_temp, 1205 sensorp)) { 1206 /* 1207 * Check if the temperature has been in 1208 * warning range during last 1209 * sensor_warning_duration interval. If so, 1210 * the temperature is truly in warning range 1211 * and we need to log a warning message, but 1212 * no more than once every 1213 * sensor_warning_interval seconds. 1214 */ 1215 time_t wtstamp = sensorp->warning_tstamp; 1216 1217 ct = (time_t)(gethrtime() / NANOSEC); 1218 if (sensorp->warning_start == 0) 1219 sensorp->warning_start = ct; 1220 if (((ct - sensorp->warning_start) >= 1221 sensor_warning_duration) && 1222 (wtstamp == 0 || (ct - wtstamp) >= 1223 sensor_warning_interval)) { 1224 envd_log(LOG_CRIT, ENV_WARNING_MSG, 1225 sensorp->name, sensorp->cur_temp, 1226 sensorp->es->esb_low_warning, 1227 sensorp->es->esb_high_warning); 1228 sensorp->warning_tstamp = ct; 1229 } 1230 } else if (sensorp->warning_start != 0) 1231 sensorp->warning_start = 0; 1232 1233 if (!shutdown_override && 1234 SENSOR_TEMP_IN_SHUTDOWN_RANGE(sensorp->cur_temp, 1235 sensorp)) { 1236 ct = (time_t)(gethrtime() / NANOSEC); 1237 if (sensorp->shutdown_tstamp == 0) 1238 sensorp->shutdown_tstamp = ct; 1239 1240 /* 1241 * Shutdown the system if the temperature 1242 * remains in the shutdown range for over 1243 * sensor_shutdown_interval seconds. 1244 */ 1245 if ((ct - sensorp->shutdown_tstamp) >= 1246 sensor_shutdown_interval) { 1247 /* 1248 * Log error 1249 */ 1250 sensorp->shutdown_initiated = B_TRUE; 1251 (void) snprintf(msgbuf, sizeof (msgbuf), 1252 ENV_SHUTDOWN_MSG, sensorp->name, 1253 sensorp->cur_temp, 1254 sensorp->es->esb_low_shutdown, 1255 sensorp->es->esb_high_shutdown); 1256 envd_log(LOG_ALERT, msgbuf); 1257 1258 /* 1259 * Shutdown the system (only once) 1260 */ 1261 if (system_shutdown_started == 1262 B_FALSE) { 1263 (void) snprintf(syscmd, 1264 sizeof (syscmd), 1265 "%s \"%s\"", shutdown_cmd, 1266 msgbuf); 1267 1268 envd_log(LOG_ALERT, syscmd); 1269 system_shutdown_started = 1270 B_TRUE; 1271 1272 (void) system(syscmd); 1273 } 1274 } 1275 } else if (sensorp->shutdown_tstamp != 0) 1276 sensorp->shutdown_tstamp = 0; 1277 } 1278 } /* end of forever loop */ 1279 1280 /*NOTREACHED*/ 1281 return (NULL); 1282 } 1283 1284 static int 1285 scsi_log_sense(env_disk_t *diskp, uchar_t page_code, uchar_t *pagebuf, 1286 uint16_t pagelen) 1287 { 1288 struct uscsi_cmd ucmd_buf; 1289 uchar_t cdb_buf[CDB_GROUP1]; 1290 struct scsi_extended_sense sense_buf; 1291 int ret_val; 1292 1293 bzero((void *)&cdb_buf, sizeof (cdb_buf)); 1294 bzero((void *)&ucmd_buf, sizeof (ucmd_buf)); 1295 bzero((void *)&sense_buf, sizeof (sense_buf)); 1296 1297 cdb_buf[0] = SCMD_LOG_SENSE_G1; 1298 cdb_buf[2] = (0x01 << 6) | page_code; 1299 cdb_buf[7] = (uchar_t)((pagelen & 0xFF00) >> 8); 1300 cdb_buf[8] = (uchar_t)(pagelen & 0x00FF); 1301 1302 ucmd_buf.uscsi_cdb = (char *)cdb_buf; 1303 ucmd_buf.uscsi_cdblen = sizeof (cdb_buf); 1304 ucmd_buf.uscsi_bufaddr = (caddr_t)pagebuf; 1305 ucmd_buf.uscsi_buflen = pagelen; 1306 ucmd_buf.uscsi_rqbuf = (caddr_t)&sense_buf; 1307 ucmd_buf.uscsi_rqlen = sizeof (struct scsi_extended_sense); 1308 ucmd_buf.uscsi_flags = USCSI_RQENABLE | USCSI_READ | USCSI_SILENT; 1309 ucmd_buf.uscsi_timeout = 60; 1310 1311 ret_val = ioctl(diskp->fd, USCSICMD, ucmd_buf); 1312 if ((ret_val == 0) && (ucmd_buf.uscsi_status == 0)) { 1313 if (env_debug) 1314 envd_log(LOG_ERR, 1315 "log sense command for page_code 0x%x succeeded\n", page_code); 1316 return (ret_val); 1317 } 1318 if (env_debug) { 1319 envd_log(LOG_ERR, "log sense command for %s failed. " 1320 "page_code 0x%x ret_val = 0x%x " 1321 "status = 0x%x errno = 0x%x\n", diskp->name, page_code, 1322 ret_val, ucmd_buf.uscsi_status, errno); 1323 } 1324 1325 return (1); 1326 } 1327 1328 static int 1329 get_disk_temp(env_disk_t *diskp) 1330 { 1331 int ret; 1332 uchar_t tpage[256]; 1333 1334 ret = scsi_log_sense(diskp, TEMPERATURE_PAGE, tpage, sizeof (tpage)); 1335 if (ret != 0) { 1336 diskp->current_temp = DISK_INVALID_TEMP; 1337 diskp->ref_temp = DISK_INVALID_TEMP; 1338 return (-1); 1339 } 1340 /* 1341 * For the current temperature verify that the parameter 1342 * length is 0x02 and the parameter code is 0x00 1343 * Temperature value of 255(0xFF) is considered INVALID. 1344 */ 1345 if ((tpage[7] == 0x02) && (tpage[4] == 0x00) && 1346 (tpage[5] == 0x00)) { 1347 if (tpage[9] == 0xFF) { 1348 diskp->current_temp = DISK_INVALID_TEMP; 1349 return (-1); 1350 } else { 1351 diskp->current_temp = tpage[9]; 1352 } 1353 } 1354 1355 /* 1356 * For the reference temperature verify that the parameter 1357 * length is 0x02 and the parameter code is 0x01 1358 * Temperature value of 255(0xFF) is considered INVALID. 1359 */ 1360 if ((tpage[13] == 0x02) && (tpage[10] == 0x00) && 1361 (tpage[11] == 0x01)) { 1362 if (tpage[15] == 0xFF) { 1363 diskp->ref_temp = DISK_INVALID_TEMP; 1364 } else { 1365 diskp->ref_temp = tpage[15]; 1366 } 1367 } 1368 return (0); 1369 } 1370 1371 /* ARGSUSED */ 1372 static void * 1373 disk_temp_thr(void *args) 1374 { 1375 char syscmd[BUFSIZ]; 1376 char msgbuf[BUFSIZ]; 1377 timespec_t to; 1378 int ret, i; 1379 env_disk_t *diskp; 1380 pthread_mutex_t env_monitor_mutex = PTHREAD_MUTEX_INITIALIZER; 1381 pthread_cond_t env_monitor_cv = PTHREAD_COND_INITIALIZER; 1382 pm_state_change_t pmstate; 1383 int idle_time; 1384 int disk_pm_fd; 1385 time_t ct; 1386 1387 if ((disk_pm_fd = open(PM_DEVICE, O_RDWR)) == -1) { 1388 envd_log(LOG_ERR, DISK_TEMP_THREAD_EXITING, 1389 errno, strerror(errno)); 1390 return (NULL); 1391 } 1392 1393 for (;;) { 1394 /* 1395 * Sleep for specified seconds before issuing IOCTL 1396 * again. 1397 */ 1398 (void) pthread_mutex_lock(&env_monitor_mutex); 1399 ret = pthread_cond_reltimedwait_np(&env_monitor_cv, 1400 &env_monitor_mutex, &to); 1401 1402 to.tv_sec = disk_scan_interval; 1403 to.tv_nsec = 0; 1404 1405 if (ret != ETIMEDOUT) { 1406 (void) pthread_mutex_unlock( 1407 &env_monitor_mutex); 1408 continue; 1409 } 1410 (void) pthread_mutex_unlock(&env_monitor_mutex); 1411 1412 for (i = 0; (diskp = envd_disks[i]) != NULL; i++) { 1413 if (diskp->present == B_FALSE) 1414 continue; 1415 if (diskp->tpage_supported == B_FALSE) 1416 continue; 1417 /* 1418 * If the disk temperature is above the warning threshold 1419 * continue monitoring until the temperature drops below 1420 * warning threshold. 1421 * if the temperature is in the NORMAL range monitor only 1422 * when the disk is BUSY. 1423 * We do not want to read the disk temperature if the disk is 1424 * is idling. The reason for this is disk will never get into 1425 * lowest power mode if we scan the disk temperature 1426 * peridoically. To avoid this situation we first determine 1427 * the idle_time of the disk. If the disk has been IDLE since 1428 * we scanned the temperature last time we will not read the 1429 * temperature. 1430 */ 1431 if (!DISK_TEMP_IN_WARNING_RANGE(diskp->current_temp, diskp)) { 1432 pmstate.physpath = diskp->physpath; 1433 pmstate.size = strlen(diskp->physpath); 1434 pmstate.component = 0; 1435 if ((idle_time = 1436 ioctl(disk_pm_fd, PM_GET_TIME_IDLE, 1437 &pmstate)) == -1) { 1438 1439 if (errno != EINTR) { 1440 if (env_debug) 1441 envd_log(LOG_ERR, 1442 "ioctl PM_GET_TIME_IDLE failed for DISK0. errno=0x%x\n", 1443 errno); 1444 continue; 1445 } 1446 continue; 1447 } 1448 if (idle_time >= (disk_scan_interval/2)) { 1449 if (env_debug) { 1450 envd_log(LOG_ERR, "%s idle time = %d\n", 1451 diskp->name, idle_time); 1452 } 1453 continue; 1454 } 1455 } 1456 ret = get_disk_temp(diskp); 1457 if (ret != 0) 1458 continue; 1459 if (env_debug) { 1460 envd_log(LOG_ERR, "%s temp = %d ref. temp = %d\n", 1461 diskp->name, diskp->current_temp, diskp->ref_temp); 1462 } 1463 /* 1464 * If this disk already triggered system shutdown, don't 1465 * log any more shutdown/warning messages for it. 1466 */ 1467 if (diskp->shutdown_initiated) 1468 continue; 1469 1470 /* 1471 * Check for the temperature in warning and shutdown range 1472 * and take appropriate action. 1473 */ 1474 if (DISK_TEMP_IN_WARNING_RANGE(diskp->current_temp, diskp)) { 1475 /* 1476 * Check if the temperature has been in warning 1477 * range during last disk_warning_duration interval. 1478 * If so, the temperature is truly in warning 1479 * range and we need to log a warning message, 1480 * but no more than once every disk_warning_interval 1481 * seconds. 1482 */ 1483 time_t wtstamp = diskp->warning_tstamp; 1484 1485 ct = (time_t)(gethrtime() / NANOSEC); 1486 if (diskp->warning_start == 0) 1487 diskp->warning_start = ct; 1488 if (((ct - diskp->warning_start) >= 1489 disk_warning_duration) && (wtstamp == 0 || 1490 (ct - wtstamp) >= disk_warning_interval)) { 1491 envd_log(LOG_CRIT, ENV_WARNING_MSG, 1492 diskp->name, diskp->current_temp, 1493 diskp->low_warning, 1494 diskp->high_warning); 1495 diskp->warning_tstamp = ct; 1496 } 1497 } else if (diskp->warning_start != 0) 1498 diskp->warning_start = 0; 1499 1500 if (!shutdown_override && 1501 DISK_TEMP_IN_SHUTDOWN_RANGE(diskp->current_temp, diskp)) { 1502 ct = (time_t)(gethrtime() / NANOSEC); 1503 if (diskp->shutdown_tstamp == 0) 1504 diskp->shutdown_tstamp = ct; 1505 1506 /* 1507 * Shutdown the system if the temperature remains 1508 * in the shutdown range for over disk_shutdown_interval 1509 * seconds. 1510 */ 1511 if ((ct - diskp->shutdown_tstamp) >= 1512 disk_shutdown_interval) { 1513 /* log error */ 1514 diskp->shutdown_initiated = B_TRUE; 1515 (void) snprintf(msgbuf, sizeof (msgbuf), 1516 ENV_SHUTDOWN_MSG, diskp->name, 1517 diskp->current_temp, diskp->low_shutdown, 1518 diskp->high_shutdown); 1519 envd_log(LOG_ALERT, msgbuf); 1520 1521 /* shutdown the system (only once) */ 1522 if (system_shutdown_started == B_FALSE) { 1523 (void) snprintf(syscmd, sizeof (syscmd), 1524 "%s \"%s\"", shutdown_cmd, msgbuf); 1525 envd_log(LOG_ALERT, syscmd); 1526 system_shutdown_started = B_TRUE; 1527 (void) system(syscmd); 1528 } 1529 } 1530 } else if (diskp->shutdown_tstamp != 0) 1531 diskp->shutdown_tstamp = 0; 1532 } 1533 } /* end of forever loop */ 1534 } 1535 1536 static void * 1537 fan_thr(void *args) 1538 { 1539 char msgbuf[BUFSIZ]; 1540 timespec_t to; 1541 int ret, i; 1542 pthread_mutex_t env_monitor_mutex = PTHREAD_MUTEX_INITIALIZER; 1543 pthread_cond_t env_monitor_cv = PTHREAD_COND_INITIALIZER; 1544 env_fan_t *fanp; 1545 1546 #ifdef __lint 1547 args = args; 1548 #endif 1549 1550 for (;;) { 1551 /* 1552 * Sleep for specified seconds before issuing IOCTL 1553 * again. 1554 */ 1555 (void) pthread_mutex_lock(&env_monitor_mutex); 1556 ret = pthread_cond_reltimedwait_np(&env_monitor_cv, 1557 &env_monitor_mutex, &to); 1558 to.tv_sec = fan_scan_interval; 1559 to.tv_nsec = 0; 1560 if (ret != ETIMEDOUT) { 1561 (void) pthread_mutex_unlock(&env_monitor_mutex); 1562 continue; 1563 } 1564 (void) pthread_mutex_unlock(&env_monitor_mutex); 1565 1566 for (i = 0; (fanp = envd_fans[i]) != NULL; i++) { 1567 if (fanp->present == B_FALSE) 1568 continue; 1569 /* 1570 * We initiate shutdown if fan status indicates 1571 * failure. Also, don't warn repeatedly. 1572 */ 1573 if (has_fan_failed(fanp) == B_TRUE) { 1574 if (fanp->last_status == FAN_FAILED) 1575 continue; 1576 fanp->last_status = FAN_FAILED; 1577 (void) snprintf(msgbuf, sizeof (msgbuf), 1578 ENV_FAN_FAILURE_WARNING_MSG, fanp->name, 1579 fan_rpm_string, fan_status_string); 1580 envd_log(LOG_ALERT, msgbuf); 1581 } else { 1582 if (fanp->last_status == FAN_OK) 1583 continue; 1584 fanp->last_status = FAN_OK; 1585 (void) snprintf(msgbuf, sizeof (msgbuf), 1586 ENV_FAN_OK_MSG, fanp->name); 1587 envd_log(LOG_ALERT, msgbuf); 1588 } 1589 } 1590 } 1591 1592 /*NOTREACHED*/ 1593 return (NULL); 1594 } 1595 1596 /* 1597 * Setup envrionmental monitor state and start threads to monitor 1598 * temperature, fan, disk and power management state. 1599 * Returns -1 on error, 0 if successful. 1600 */ 1601 static int 1602 envd_setup(void) 1603 { 1604 1605 if (getenv("SUNW_piclenvd_debug") != NULL) 1606 env_debug = 1; 1607 1608 if (pthread_attr_init(&thr_attr) != 0 || 1609 pthread_attr_setscope(&thr_attr, PTHREAD_SCOPE_SYSTEM) != 0) { 1610 return (-1); 1611 } 1612 1613 /* 1614 * If ES segment is not present or has inconsistent information, we 1615 * use default values for sensor limits. For the sake of simplicity, 1616 * we still store these limits internally in the 'es' member in the 1617 * structure. 1618 */ 1619 if (envd_es_setup() < 0) { 1620 envd_log(LOG_WARNING, ENV_DEFAULT_LIMITS); 1621 envd_es_default_setup(); 1622 } 1623 1624 if (envd_setup_sensors() < 0) { 1625 if (env_debug) 1626 envd_log(LOG_ERR, "Failed to setup sensors\n"); 1627 system_temp_monitor = 0; 1628 } 1629 1630 if (envd_setup_fans() < 0) { 1631 if (env_debug) 1632 envd_log(LOG_ERR, "Failed to setup fans\n"); 1633 fan_monitor = 0; 1634 pm_monitor = 0; 1635 } 1636 1637 if (envd_setup_disks() < 0) { 1638 if (env_debug) 1639 envd_log(LOG_ERR, "Failed to setup disks\n"); 1640 disk_temp_monitor = 0; 1641 } 1642 1643 /* 1644 * Create a thread to monitor system temperatures 1645 */ 1646 if ((system_temp_monitor) && (system_temp_thr_created == B_FALSE)) { 1647 if (pthread_create(&system_temp_thr_id, &thr_attr, 1648 system_temp_thr, NULL) != 0) { 1649 envd_log(LOG_ERR, ENVTHR_THREAD_CREATE_FAILED); 1650 } else { 1651 system_temp_thr_created = B_TRUE; 1652 if (env_debug) 1653 envd_log(LOG_ERR, 1654 "Created thread to monitor system temperatures\n"); 1655 } 1656 } 1657 1658 /* 1659 * Create a thread to monitor fans 1660 */ 1661 if ((fan_monitor) && (fan_thr_created == B_FALSE)) { 1662 if (pthread_create(&fan_thr_id, &thr_attr, fan_thr, NULL) != 0) 1663 envd_log(LOG_ERR, ENVTHR_THREAD_CREATE_FAILED); 1664 else { 1665 fan_thr_created = B_TRUE; 1666 if (env_debug) { 1667 envd_log(LOG_ERR, 1668 "Created thread to monitor system fans\n"); 1669 } 1670 } 1671 } 1672 1673 /* 1674 * Create a thread to monitor PM state 1675 */ 1676 if ((pm_monitor) && (pmthr_created == B_FALSE)) { 1677 if (pthread_create(&pmthr_tid, &thr_attr, pmthr, NULL) != 0) 1678 envd_log(LOG_CRIT, PM_THREAD_CREATE_FAILED); 1679 else { 1680 pmthr_created = B_TRUE; 1681 if (env_debug) 1682 envd_log(LOG_ERR, 1683 "Created thread to monitor system power state\n"); 1684 } 1685 } 1686 1687 /* 1688 * Create a thread to monitor disk temperature 1689 */ 1690 if ((disk_temp_monitor) && (disk_temp_thr_created == B_FALSE)) { 1691 if (pthread_create(&disk_temp_thr_id, &thr_attr, 1692 disk_temp_thr, NULL) != 0) { 1693 envd_log(LOG_ERR, ENVTHR_THREAD_CREATE_FAILED); 1694 } else { 1695 disk_temp_thr_created = B_TRUE; 1696 if (env_debug) 1697 envd_log(LOG_ERR, 1698 "Created thread for disk temperatures\n"); 1699 } 1700 } 1701 1702 return (0); 1703 } 1704 1705 static void 1706 piclenvd_register(void) 1707 { 1708 picld_plugin_register(&my_reg_info); 1709 } 1710 1711 static void 1712 piclenvd_init(void) 1713 { 1714 1715 (void) env_picl_setup_tuneables(); 1716 1717 /* 1718 * Setup the environmental data structures 1719 */ 1720 if (envd_setup() != 0) { 1721 envd_log(LOG_CRIT, ENVD_PLUGIN_INIT_FAILED); 1722 return; 1723 } 1724 1725 /* 1726 * Now setup/populate PICL tree 1727 */ 1728 env_picl_setup(); 1729 } 1730 1731 static void 1732 piclenvd_fini(void) 1733 { 1734 1735 /* 1736 * Invoke env_picl_destroy() to remove any PICL nodes/properties 1737 * (including volatile properties) we created. Once this call 1738 * returns, there can't be any more calls from the PICL framework 1739 * to get current temperature or fan speed. 1740 */ 1741 env_picl_destroy(); 1742 envd_close_sensors(); 1743 envd_close_fans(); 1744 } 1745 1746 /*VARARGS2*/ 1747 void 1748 envd_log(int pri, const char *fmt, ...) 1749 { 1750 va_list ap; 1751 1752 va_start(ap, fmt); 1753 vsyslog(pri, fmt, ap); 1754 va_end(ap); 1755 } 1756 1757 /* 1758 * Tunables support functions 1759 */ 1760 static env_tuneable_t * 1761 tuneable_lookup(picl_prophdl_t proph) 1762 { 1763 int i; 1764 env_tuneable_t *tuneablep = NULL; 1765 1766 for (i = 0; i < ntuneables; i++) { 1767 tuneablep = &tuneables[i]; 1768 if (tuneablep->proph == proph) 1769 return (tuneablep); 1770 } 1771 1772 return (NULL); 1773 } 1774 1775 static int 1776 get_string_val(ptree_rarg_t *parg, void *buf) 1777 { 1778 picl_prophdl_t proph; 1779 env_tuneable_t *tuneablep; 1780 1781 proph = parg->proph; 1782 1783 tuneablep = tuneable_lookup(proph); 1784 1785 if (tuneablep == NULL) 1786 return (PICL_FAILURE); 1787 1788 (void) memcpy(buf, tuneablep->value, tuneablep->nbytes); 1789 1790 return (PICL_SUCCESS); 1791 } 1792 1793 static int 1794 set_string_val(ptree_warg_t *parg, const void *buf) 1795 { 1796 picl_prophdl_t proph; 1797 env_tuneable_t *tuneablep; 1798 1799 if (parg->cred.dc_euid != 0) 1800 return (PICL_PERMDENIED); 1801 1802 proph = parg->proph; 1803 1804 tuneablep = tuneable_lookup(proph); 1805 1806 if (tuneablep == NULL) 1807 return (PICL_FAILURE); 1808 1809 (void) memcpy(tuneables->value, buf, tuneables->nbytes); 1810 1811 1812 return (PICL_SUCCESS); 1813 } 1814 1815 static int 1816 get_int_val(ptree_rarg_t *parg, void *buf) 1817 { 1818 picl_prophdl_t proph; 1819 env_tuneable_t *tuneablep; 1820 1821 proph = parg->proph; 1822 1823 tuneablep = tuneable_lookup(proph); 1824 1825 if (tuneablep == NULL) 1826 return (PICL_FAILURE); 1827 1828 (void) memcpy(buf, tuneablep->value, tuneablep->nbytes); 1829 1830 return (PICL_SUCCESS); 1831 } 1832 1833 static int 1834 set_int_val(ptree_warg_t *parg, const void *buf) 1835 { 1836 picl_prophdl_t proph; 1837 env_tuneable_t *tuneablep; 1838 1839 if (parg->cred.dc_euid != 0) 1840 return (PICL_PERMDENIED); 1841 1842 proph = parg->proph; 1843 1844 tuneablep = tuneable_lookup(proph); 1845 1846 if (tuneablep == NULL) 1847 return (PICL_FAILURE); 1848 1849 (void) memcpy(tuneablep->value, buf, tuneablep->nbytes); 1850 1851 return (PICL_SUCCESS); 1852 } 1853 1854 boolean_t 1855 has_fan_failed(env_fan_t *fanp) 1856 { 1857 fanspeed_t fan_speed; 1858 uchar_t status; 1859 uint8_t tach; 1860 int real_tach; 1861 int ret, ntries; 1862 1863 if (fanp->fd == -1) 1864 return (B_TRUE); 1865 1866 /* 1867 * Read RF_FAN_STATUS bit of the fan fault register, retry if 1868 * the PIC is busy, with a 1 second delay to allow it to update. 1869 */ 1870 for (ntries = 0; ntries < MAX_RETRIES_FOR_FAN_FAULT; ntries++) { 1871 ret = ioctl(fanp->fd, PIC_GET_FAN_STATUS, &status); 1872 if ((ret == 0) && ((status & 0x1) == 0)) 1873 break; 1874 (void) sleep(1); 1875 } 1876 1877 if (ntries > 0) { 1878 if (env_debug) { 1879 envd_log(LOG_ERR, 1880 "%d retries attempted in reading fan status.\n", 1881 ntries); 1882 } 1883 } 1884 1885 if (ntries == MAX_RETRIES_FOR_FAN_FAULT) { 1886 (void) strncpy(fan_status_string, NOT_AVAILABLE, 1887 sizeof (fan_status_string)); 1888 (void) strncpy(fan_rpm_string, NOT_AVAILABLE, 1889 sizeof (fan_rpm_string)); 1890 return (B_TRUE); 1891 } 1892 1893 if (env_debug) 1894 envd_log(LOG_ERR, "fan status = 0x%x\n", status); 1895 1896 /* 1897 * ST_FFAULT bit isn't implemented yet and we're reading only 1898 * individual fan status 1899 */ 1900 if (status & 0x1) { 1901 (void) snprintf(fan_status_string, sizeof (fan_status_string), 1902 "0x%x", status); 1903 if (ioctl(fanp->fd, PIC_GET_FAN_SPEED, &tach) != 0) { 1904 (void) strncpy(fan_rpm_string, NOT_AVAILABLE, 1905 sizeof (fan_rpm_string)); 1906 } else { 1907 real_tach = tach << 8; 1908 fan_speed = TACH_TO_RPM(real_tach); 1909 (void) snprintf(fan_rpm_string, sizeof (fan_rpm_string), 1910 "%d", fan_speed); 1911 } 1912 return (B_TRUE); 1913 } 1914 1915 return (B_FALSE); 1916 } 1917