xref: /illumos-gate/usr/src/cmd/picl/plugins/sun4u/chicago/envd/piclenvd.c (revision 47b4653e9ff2a8aebb64f9e357713fd04108674b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * This file contains the environmental PICL plug-in module.
28  */
29 
30 /*
31  * This plugin sets up the PICLTREE for Chicago WS.
32  * It provides functionality to get/set temperatures and
33  * fan speeds.
34  *
35  * The environmental policy defaults to the auto mode
36  * as programmed by OBP at boot time.
37  */
38 
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <sys/sysmacros.h>
42 #include <limits.h>
43 #include <string.h>
44 #include <strings.h>
45 #include <stdarg.h>
46 #include <alloca.h>
47 #include <unistd.h>
48 #include <sys/processor.h>
49 #include <syslog.h>
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <picl.h>
53 #include <picltree.h>
54 #include <picldefs.h>
55 #include <pthread.h>
56 #include <signal.h>
57 #include <libdevinfo.h>
58 #include <sys/pm.h>
59 #include <sys/open.h>
60 #include <sys/time.h>
61 #include <sys/utsname.h>
62 #include <sys/systeminfo.h>
63 #include <note.h>
64 #include <sys/pic16f747.h>
65 #include "envd.h"
66 #include <sys/scsi/scsi.h>
67 #include <sys/scsi/generic/commands.h>
68 
69 int	debug_fd;
70 /*
71  * PICL plugin entry points
72  */
73 static void piclenvd_register(void);
74 static void piclenvd_init(void);
75 static void piclenvd_fini(void);
76 
77 /*
78  * Env setup routines
79  */
80 extern void env_picl_setup(void);
81 extern void env_picl_destroy(void);
82 extern int env_picl_setup_tuneables(void);
83 
84 static boolean_t has_fan_failed(env_fan_t *fanp);
85 
86 /*
87  * PSU fan fault handling
88  */
89 static boolean_t has_psufan_failed(void);
90 static int psufan_last_status = FAN_OK;
91 
92 #pragma init(piclenvd_register)
93 
94 /*
95  * Plugin registration information
96  */
97 static picld_plugin_reg_t my_reg_info = {
98 	PICLD_PLUGIN_VERSION,
99 	PICLD_PLUGIN_CRITICAL,
100 	"SUNW_piclenvd",
101 	piclenvd_init,
102 	piclenvd_fini,
103 };
104 
105 #define	REGISTER_INFORMATION_STRING_LENGTH	16
106 static char fan_rpm_string[REGISTER_INFORMATION_STRING_LENGTH] = {0};
107 static char fan_status_string[REGISTER_INFORMATION_STRING_LENGTH] = {0};
108 
109 static int	scsi_log_sense(env_disk_t *diskp, uchar_t page_code,
110 			void *pagebuf, uint16_t pagelen, int page_control);
111 static int scsi_mode_select(env_disk_t *diskp, uchar_t page_code,
112 			uchar_t *pagebuf, uint16_t pagelen);
113 
114 static int	get_disk_temp(env_disk_t *);
115 
116 /*
117  * ES Segment stuff
118  */
119 static es_sensor_blk_t sensor_ctl[MAX_SENSORS];
120 
121 /*
122  * Default limits for sensors, in case ES segment is not present, or has
123  * inconsistent information
124  */
125 static es_sensor_blk_t sensor_default_ctl[MAX_SENSORS] = {
126 	{
127 	    CPU0_HIGH_POWER_OFF, CPU0_HIGH_SHUTDOWN, CPU0_HIGH_WARNING,
128 	    CPU0_LOW_WARNING, CPU0_LOW_SHUTDOWN, CPU0_LOW_POWER_OFF
129 	},
130 	{
131 	    CPU1_HIGH_POWER_OFF, CPU1_HIGH_SHUTDOWN, CPU1_HIGH_WARNING,
132 	    CPU1_LOW_WARNING, CPU1_LOW_SHUTDOWN, CPU1_LOW_POWER_OFF
133 	},
134 	{
135 	    ADT7462_HIGH_POWER_OFF, ADT7462_HIGH_SHUTDOWN, ADT7462_HIGH_WARNING,
136 	    ADT7462_LOW_WARNING, ADT7462_LOW_SHUTDOWN, ADT7462_LOW_POWER_OFF
137 	},
138 	{
139 	    MB_HIGH_POWER_OFF, MB_HIGH_SHUTDOWN, MB_HIGH_WARNING,
140 	    MB_LOW_WARNING, MB_LOW_SHUTDOWN, MB_LOW_POWER_OFF
141 	},
142 	{
143 	    LM95221_HIGH_POWER_OFF, LM95221_HIGH_SHUTDOWN, LM95221_HIGH_WARNING,
144 	    LM95221_LOW_WARNING, LM95221_LOW_SHUTDOWN, LM95221_LOW_POWER_OFF
145 	},
146 	{
147 	    FIRE_HIGH_POWER_OFF, FIRE_HIGH_SHUTDOWN, FIRE_HIGH_WARNING,
148 	    FIRE_LOW_WARNING, FIRE_LOW_SHUTDOWN, FIRE_LOW_POWER_OFF
149 	},
150 	{
151 	    LSI1064_HIGH_POWER_OFF, LSI1064_HIGH_SHUTDOWN, LSI1064_HIGH_WARNING,
152 	    LSI1064_LOW_WARNING, LSI1064_LOW_SHUTDOWN, LSI1064_LOW_POWER_OFF
153 	},
154 	{
155 	    FRONT_PANEL_HIGH_POWER_OFF, FRONT_PANEL_HIGH_SHUTDOWN,
156 	    FRONT_PANEL_HIGH_WARNING, FRONT_PANEL_LOW_WARNING,
157 	    FRONT_PANEL_LOW_SHUTDOWN, FRONT_PANEL_LOW_POWER_OFF
158 	},
159 	{
160 	    PSU_HIGH_POWER_OFF, PSU_HIGH_SHUTDOWN, PSU_HIGH_WARNING,
161 	    PSU_LOW_WARNING, PSU_LOW_SHUTDOWN, PSU_LOW_POWER_OFF
162 	}
163 };
164 
165 /*
166  * Env thread variables
167  */
168 static boolean_t  system_shutdown_started = B_FALSE;
169 static boolean_t  system_temp_thr_created = B_FALSE;
170 static pthread_t  system_temp_thr_id;
171 static pthread_attr_t thr_attr;
172 static boolean_t  disk_temp_thr_created = B_FALSE;
173 static pthread_t  disk_temp_thr_id;
174 static boolean_t  fan_thr_created = B_FALSE;
175 static pthread_t  fan_thr_id;
176 
177 /*
178  * PM thread related variables
179  */
180 static pthread_t	pmthr_tid;	/* pmthr thread ID */
181 static int		pm_fd = -1;	/* PM device file descriptor */
182 static boolean_t	pmthr_created = B_FALSE;
183 static int		cur_lpstate;	/* cur low power state */
184 
185 /*
186  * Envd plug-in verbose flag set by SUNW_PICLENVD_DEBUG environment var
187  * Setting the verbose tuneable also enables debugging for better
188  * control
189  */
190 int	env_debug = 0;
191 
192 /*
193  * These are debug variables for keeping track of the total number
194  * of Fan and Temp sensor retries over the lifetime of the plugin.
195  */
196 static int total_fan_retries = 0;
197 static int total_temp_retries = 0;
198 
199 /*
200  * Fan devices
201  */
202 static env_fan_t envd_system_fan0 = {
203 	ENV_SYSTEM_FAN0, ENV_SYSTEM_FAN0_DEVFS, SYSTEM_FAN0_ID,
204 	SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1,
205 };
206 static env_fan_t envd_system_fan1 = {
207 	ENV_SYSTEM_FAN1, ENV_SYSTEM_FAN1_DEVFS, SYSTEM_FAN1_ID,
208 	SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1,
209 };
210 static env_fan_t envd_system_fan2 = {
211 	ENV_SYSTEM_FAN2, ENV_SYSTEM_FAN2_DEVFS, SYSTEM_FAN2_ID,
212 	SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1,
213 };
214 static env_fan_t envd_system_fan3 = {
215 	ENV_SYSTEM_FAN3, ENV_SYSTEM_FAN3_DEVFS, SYSTEM_FAN3_ID,
216 	SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1,
217 };
218 static env_fan_t envd_system_fan4 = {
219 	ENV_SYSTEM_FAN4, ENV_SYSTEM_FAN4_DEVFS, SYSTEM_FAN4_ID,
220 	SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1,
221 };
222 
223 /*
224  * Disk devices
225  */
226 static env_disk_t envd_disk0 = {
227 	ENV_DISK0, ENV_DISK0_DEVFS, DISK0_PHYSPATH, DISK0_NODE_PATH,
228 	DISK0_ID, -1,
229 };
230 static env_disk_t envd_disk1 = {
231 	ENV_DISK1, ENV_DISK1_DEVFS, DISK1_PHYSPATH, DISK1_NODE_PATH,
232 	DISK1_ID, -1,
233 };
234 static env_disk_t envd_disk2 = {
235 	ENV_DISK2, ENV_DISK2_DEVFS, DISK2_PHYSPATH, DISK2_NODE_PATH,
236 	DISK2_ID, -1,
237 };
238 static env_disk_t envd_disk3 = {
239 	ENV_DISK3, ENV_DISK3_DEVFS, DISK3_PHYSPATH, DISK3_NODE_PATH,
240 	DISK3_ID, -1,
241 };
242 
243 /*
244  * Sensors
245  */
246 static env_sensor_t envd_sensor_cpu0 = {
247 	SENSOR_CPU0, SENSOR_CPU0_DEVFS, CPU0_SENSOR_ID, -1, NULL,
248 };
249 static env_sensor_t envd_sensor_cpu1 = {
250 	SENSOR_CPU1, SENSOR_CPU1_DEVFS, CPU1_SENSOR_ID, -1, NULL,
251 };
252 static env_sensor_t envd_sensor_adt7462 = {
253 	SENSOR_ADT7462, SENSOR_ADT7462_DEVFS, ADT7462_SENSOR_ID, -1, NULL,
254 };
255 static env_sensor_t envd_sensor_mb = {
256 	SENSOR_MB, SENSOR_MB_DEVFS, MB_SENSOR_ID, -1, NULL,
257 };
258 static env_sensor_t envd_sensor_lm95221 = {
259 	SENSOR_LM95221, SENSOR_LM95221_DEVFS, LM95221_SENSOR_ID, -1, NULL,
260 };
261 static env_sensor_t envd_sensor_fire = {
262 	SENSOR_FIRE, SENSOR_FIRE_DEVFS, FIRE_SENSOR_ID, -1, NULL,
263 };
264 static env_sensor_t envd_sensor_lsi1064 = {
265 	SENSOR_LSI1064, SENSOR_LSI1064_DEVFS, LSI1064_SENSOR_ID, -1, NULL,
266 };
267 static env_sensor_t envd_sensor_front_panel = {
268 	SENSOR_FRONT_PANEL, SENSOR_FRONT_PANEL_DEVFS, FRONT_PANEL_SENSOR_ID,
269 	-1, NULL,
270 };
271 static env_sensor_t envd_sensor_psu = {
272 	SENSOR_PSU, SENSOR_PSU_DEVFS, PSU_SENSOR_ID, -1, NULL,
273 };
274 
275 /*
276  * The vendor-id and device-id are the properties associated with
277  * the SCSI controller. This is used to identify a particular controller
278  * like LSI1064.
279  */
280 #define	VENDOR_ID	"vendor-id"
281 #define	DEVICE_ID	"device-id"
282 
283 /*
284  * The implementation for SCSI disk drives to supply info. about
285  * temperature is not mandatory. Hence we first determine if the
286  * temperature page is supported. To do this we need to scan the list
287  * of pages supported.
288  */
289 #define	SUPPORTED_LPAGES	0
290 #define	TEMPERATURE_PAGE	0x0D
291 #define	LOGPAGEHDRSIZE	4
292 
293 /*
294  * NULL terminated array of fans
295  */
296 static env_fan_t *envd_fans[] = {
297 	&envd_system_fan0,
298 	&envd_system_fan1,
299 	&envd_system_fan2,
300 	&envd_system_fan3,
301 	&envd_system_fan4,
302 	NULL
303 };
304 
305 /*
306  * NULL terminated array of disks
307  */
308 static env_disk_t *envd_disks[] = {
309 	&envd_disk0,
310 	&envd_disk1,
311 	&envd_disk2,
312 	&envd_disk3,
313 	NULL
314 };
315 
316 /*
317  * NULL terminated array of temperature sensors
318  */
319 #define	N_ENVD_SENSORS	9
320 static env_sensor_t *envd_sensors[] = {
321 	&envd_sensor_cpu0,
322 	&envd_sensor_cpu1,
323 	&envd_sensor_adt7462,
324 	&envd_sensor_mb,
325 	&envd_sensor_lm95221,
326 	&envd_sensor_fire,
327 	&envd_sensor_lsi1064,
328 	&envd_sensor_front_panel,
329 	&envd_sensor_psu,
330 	NULL
331 };
332 
333 #define	NOT_AVAILABLE	"NA"
334 
335 /*
336  * Tuneables
337  */
338 #define	ENABLE	1
339 #define	DISABLE	0
340 
341 static	int	disk_high_warn_temperature	= DISK_HIGH_WARN_TEMPERATURE;
342 static	int	disk_low_warn_temperature	= DISK_LOW_WARN_TEMPERATURE;
343 static	int	disk_high_shutdown_temperature	=
344 						DISK_HIGH_SHUTDOWN_TEMPERATURE;
345 static	int	disk_low_shutdown_temperature	= DISK_LOW_SHUTDOWN_TEMPERATURE;
346 
347 static	int	disk_scan_interval		= DISK_SCAN_INTERVAL;
348 static	int	sensor_scan_interval		= SENSOR_SCAN_INTERVAL;
349 static	int	fan_scan_interval		= FAN_SCAN_INTERVAL;
350 
351 static int get_int_val(ptree_rarg_t *parg, void *buf);
352 static int set_int_val(ptree_warg_t *parg, const void *buf);
353 static int get_string_val(ptree_rarg_t *parg, void *buf);
354 static int set_string_val(ptree_warg_t *parg, const void *buf);
355 
356 static int 	shutdown_override	= 0;
357 static int	sensor_warning_interval	= SENSOR_WARNING_INTERVAL;
358 static int	sensor_warning_duration	= SENSOR_WARNING_DURATION;
359 static int	sensor_shutdown_interval = SENSOR_SHUTDOWN_INTERVAL;
360 static int	disk_warning_interval	= DISK_WARNING_INTERVAL;
361 static int	disk_warning_duration	= DISK_WARNING_DURATION;
362 static int 	disk_shutdown_interval	= DISK_SHUTDOWN_INTERVAL;
363 
364 static int	system_temp_monitor	= 1;	/* enabled */
365 static int	fan_monitor		= 1;	/* enabled */
366 static int	pm_monitor		= 1;	/* enabled */
367 
368 /* Disable disk temperature monitoring until we have LSI fw support */
369 int		disk_temp_monitor	= 0;
370 
371 static char	shutdown_cmd[] = SHUTDOWN_CMD;
372 const char	*iofru_devname = I2C_DEVFS "/" IOFRU_DEV;
373 
374 env_tuneable_t tuneables[] = {
375 	{"system_temp-monitor", PICL_PTYPE_INT, &system_temp_monitor,
376 	    &get_int_val, &set_int_val, sizeof (int)},
377 
378 	{"fan-monitor", PICL_PTYPE_INT, &fan_monitor,
379 	    &get_int_val, &set_int_val, sizeof (int)},
380 
381 	{"pm-monitor", PICL_PTYPE_INT, &pm_monitor,
382 	    &get_int_val, &set_int_val, sizeof (int)},
383 
384 	{"shutdown-override", PICL_PTYPE_INT, &shutdown_override,
385 	    &get_int_val, &set_int_val, sizeof (int)},
386 
387 	{"sensor-warning-duration", PICL_PTYPE_INT,
388 	    &sensor_warning_duration,
389 	    &get_int_val, &set_int_val,
390 	    sizeof (int)},
391 
392 	{"disk-scan-interval", PICL_PTYPE_INT,
393 	    &disk_scan_interval,
394 	    &get_int_val, &set_int_val,
395 	    sizeof (int)},
396 
397 	{"fan-scan-interval", PICL_PTYPE_INT,
398 	    &fan_scan_interval,
399 	    &get_int_val, &set_int_val,
400 	    sizeof (int)},
401 
402 	{"sensor-scan-interval", PICL_PTYPE_INT,
403 	    &sensor_scan_interval,
404 	    &get_int_val, &set_int_val,
405 	    sizeof (int)},
406 
407 	{"sensor_warning-interval", PICL_PTYPE_INT, &sensor_warning_interval,
408 	    &get_int_val, &set_int_val,
409 	    sizeof (int)},
410 
411 	{"sensor_shutdown-interval", PICL_PTYPE_INT, &sensor_shutdown_interval,
412 	    &get_int_val, &set_int_val,
413 	    sizeof (int)},
414 
415 	{"disk_warning-interval", PICL_PTYPE_INT, &disk_warning_interval,
416 	    &get_int_val, &set_int_val,
417 	    sizeof (int)},
418 
419 	{"disk_warning-duration", PICL_PTYPE_INT, &disk_warning_duration,
420 	    &get_int_val, &set_int_val,
421 	    sizeof (int)},
422 
423 	{"disk_shutdown-interval", PICL_PTYPE_INT, &disk_shutdown_interval,
424 	    &get_int_val, &set_int_val,
425 	    sizeof (int)},
426 
427 	{"shutdown-command", PICL_PTYPE_CHARSTRING, shutdown_cmd,
428 	    &get_string_val, &set_string_val,
429 	    sizeof (shutdown_cmd)},
430 
431 	{"monitor-disk-temp", PICL_PTYPE_INT, &disk_temp_monitor,
432 	    &get_int_val, &set_int_val, sizeof (int)},
433 
434 	{"disk-high-warn-temperature", PICL_PTYPE_INT,
435 	    &disk_high_warn_temperature, &get_int_val,
436 	    &set_int_val, sizeof (int)},
437 
438 	{"disk-low-warn-temperature", PICL_PTYPE_INT,
439 	    &disk_low_warn_temperature, &get_int_val,
440 	    &set_int_val, sizeof (int)},
441 
442 	{"disk-high-shutdown-temperature", PICL_PTYPE_INT,
443 	    &disk_high_shutdown_temperature, &get_int_val,
444 	    &set_int_val, sizeof (int)},
445 
446 	{"disk-low-shutdown-temperature", PICL_PTYPE_INT,
447 	    &disk_low_shutdown_temperature, &get_int_val,
448 	    &set_int_val, sizeof (int)},
449 
450 	{"verbose", PICL_PTYPE_INT, &env_debug,
451 	    &get_int_val, &set_int_val, sizeof (int)}
452 };
453 
454 /*
455  * We use this to figure out how many tuneables there are
456  * This is variable because the publishing routine needs this info
457  * in piclenvsetup.c
458  */
459 int	ntuneables = (sizeof (tuneables)/sizeof (tuneables[0]));
460 
461 /*
462  * Lookup fan and return a pointer to env_fan_t data structure.
463  */
464 env_fan_t *
fan_lookup(char * name)465 fan_lookup(char *name)
466 {
467 	int		i;
468 	env_fan_t	*fanp;
469 
470 	for (i = 0; (fanp = envd_fans[i]) != NULL; i++) {
471 		if (strcmp(fanp->name, name) == 0)
472 			return (fanp);
473 	}
474 	return (NULL);
475 }
476 
477 /*
478  * Lookup sensor and return a pointer to env_sensor_t data structure.
479  */
480 env_sensor_t *
sensor_lookup(char * name)481 sensor_lookup(char *name)
482 {
483 	env_sensor_t	*sensorp;
484 	int		i;
485 
486 	for (i = 0; i < N_ENVD_SENSORS; ++i) {
487 		sensorp = envd_sensors[i];
488 		if (strcmp(sensorp->name, name) == 0)
489 			return (sensorp);
490 	}
491 	return (NULL);
492 }
493 
494 /*
495  * Lookup disk and return a pointer to env_disk_t data structure.
496  */
497 env_disk_t *
disk_lookup(char * name)498 disk_lookup(char *name)
499 {
500 	int		i;
501 	env_disk_t	*diskp;
502 
503 	for (i = 0; (diskp = envd_disks[i]) != NULL; i++) {
504 		if (strncmp(diskp->name, name, strlen(name)) == 0)
505 			return (diskp);
506 	}
507 	return (NULL);
508 }
509 
510 /*
511  * Get current temperature
512  * Returns -1 on error, 0 if successful
513  */
514 int
get_temperature(env_sensor_t * sensorp,tempr_t * temp)515 get_temperature(env_sensor_t *sensorp, tempr_t *temp)
516 {
517 	int	fd = sensorp->fd;
518 	int	retval = 0;
519 
520 	if (fd == -1)
521 		retval = -1;
522 	else if (ioctl(fd, PIC_GET_TEMPERATURE, temp) != 0) {
523 
524 		retval = -1;
525 
526 		sensorp->error++;
527 
528 		if (sensorp->error == MAX_SENSOR_RETRIES) {
529 			envd_log(LOG_WARNING, ENV_SENSOR_ACCESS_FAIL,
530 			    sensorp->name, errno, strerror(errno));
531 		}
532 
533 		total_temp_retries++;
534 		(void) sleep(1);
535 
536 	} else if (sensorp->error != 0) {
537 		if (sensorp->error >= MAX_SENSOR_RETRIES) {
538 			envd_log(LOG_WARNING, ENV_SENSOR_ACCESS_OK,
539 			    sensorp->name);
540 		}
541 
542 		sensorp->error = 0;
543 
544 		if (total_temp_retries && env_debug) {
545 			envd_log(LOG_WARNING,
546 			    "Total retries for sensors = %d",
547 			    total_temp_retries);
548 		}
549 	}
550 
551 	return (retval);
552 }
553 
554 /*
555  * Get current disk temperature
556  * Returns -1 on error, 0 if successful
557  */
558 int
disk_temperature(env_disk_t * diskp,tempr_t * temp)559 disk_temperature(env_disk_t *diskp, tempr_t *temp)
560 {
561 	int	retval = 0;
562 
563 	if (diskp == NULL)
564 		retval = -1;
565 	else
566 		*temp = diskp->current_temp;
567 
568 	return (retval);
569 }
570 
571 /*
572  * Get current fan speed
573  * This function returns a RPM value for fanspeed
574  * in fanspeedp.
575  * Returns -1 on error, 0 if successful
576  */
577 int
get_fan_speed(env_fan_t * fanp,fanspeed_t * fanspeedp)578 get_fan_speed(env_fan_t *fanp, fanspeed_t *fanspeedp)
579 {
580 	uint8_t tach;
581 	int	real_tach;
582 	int	retries;
583 
584 	if (fanp->fd == -1)
585 		return (-1);
586 
587 	if (has_fan_failed(fanp)) {
588 		*fanspeedp = 0;
589 		return (0);
590 	}
591 
592 	/* try to read the fan information */
593 	for (retries = 0; retries < MAX_FAN_RETRIES; retries++) {
594 		if (ioctl(fanp->fd, PIC_GET_FAN_SPEED, &tach) == 0)
595 			break;
596 		(void) sleep(1);
597 	}
598 
599 	total_fan_retries += retries;
600 	if (retries >= MAX_FAN_RETRIES)
601 		return (-1);
602 
603 	if (total_fan_retries && env_debug) {
604 		envd_log(LOG_WARNING, "total retries for fan = %d",
605 		    total_fan_retries);
606 	}
607 
608 	real_tach = tach << 8;
609 	*fanspeedp = TACH_TO_RPM(real_tach);
610 	return (0);
611 }
612 
613 /*
614  * Set fan speed
615  * This function accepts a percentage of fan speed
616  * from 0-100 and programs the HW monitor fans to the corresponding
617  * fanspeed value.
618  * Returns -1 on error, -2 on invalid args passed, 0 if successful
619  */
620 int
set_fan_speed(env_fan_t * fanp,fanspeed_t fanspeed)621 set_fan_speed(env_fan_t *fanp, fanspeed_t fanspeed)
622 {
623 	uint8_t	speed;
624 
625 	if (fanp->fd == -1)
626 		return (-1);
627 
628 	if (fanspeed < 0 || fanspeed > 100)
629 		return (-2);
630 
631 	speed = fanspeed;
632 	if (ioctl(fanp->fd, PIC_SET_FAN_SPEED, &speed) != 0)
633 		return (-1);
634 
635 	return (0);
636 }
637 
638 /*
639  * close all fan devices
640  */
641 static void
envd_close_fans(void)642 envd_close_fans(void)
643 {
644 	int		i;
645 	env_fan_t	*fanp;
646 
647 	for (i = 0; (fanp = envd_fans[i]) != NULL; i++) {
648 		if (fanp->fd != -1) {
649 			(void) close(fanp->fd);
650 			fanp->fd = -1;
651 		}
652 	}
653 }
654 
655 /*
656  * Close sensor devices and freeup resources
657  */
658 static void
envd_close_sensors(void)659 envd_close_sensors(void)
660 {
661 	env_sensor_t	*sensorp;
662 	int		i;
663 
664 	for (i = 0; i < N_ENVD_SENSORS; ++i) {
665 		sensorp = envd_sensors[i];
666 		if (sensorp->fd != -1) {
667 			(void) close(sensorp->fd);
668 			sensorp->fd = -1;
669 		}
670 	}
671 }
672 
673 /*
674  * Open fan devices and initialize per fan data structure.
675  */
676 static int
envd_setup_fans(void)677 envd_setup_fans(void)
678 {
679 	int		i, fd;
680 	env_fan_t	*fanp;
681 	int		fancnt = 0;
682 	picl_nodehdl_t tnodeh;
683 
684 	for (i = 0; (fanp = envd_fans[i]) != NULL; i++) {
685 		fanp->last_status = FAN_OK;
686 
687 		/* Make sure cpu0/1 present for validating cpu fans */
688 		if (fanp->id == CPU0_FAN_ID) {
689 			if (ptree_get_node_by_path(CPU0_PATH, &tnodeh) !=
690 			    PICL_SUCCESS) {
691 					if (env_debug) {
692 						envd_log(LOG_ERR,
693 					"get node by path failed for %s\n",
694 						    CPU0_PATH);
695 					}
696 					fanp->present = B_FALSE;
697 					continue;
698 			}
699 		}
700 		if (fanp->id == CPU1_FAN_ID) {
701 			if (ptree_get_node_by_path(CPU1_PATH, &tnodeh) !=
702 			    PICL_SUCCESS) {
703 					if (env_debug) {
704 						envd_log(LOG_ERR,
705 				"get node by path failed for %s\n", CPU0_PATH);
706 					}
707 					fanp->present = B_FALSE;
708 					continue;
709 			}
710 		}
711 		if ((fd = open(fanp->devfs_path, O_RDWR)) == -1) {
712 			envd_log(LOG_CRIT,
713 			    ENV_FAN_OPEN_FAIL, fanp->name,
714 			    fanp->devfs_path, errno, strerror(errno));
715 			fanp->present = B_FALSE;
716 			continue;
717 		}
718 		fanp->fd = fd;
719 		fanp->present = B_TRUE;
720 		fancnt++;
721 	}
722 
723 	if (fancnt == 0)
724 		return (-1);
725 
726 	return (0);
727 }
728 
729 static int
envd_setup_disks(void)730 envd_setup_disks(void)
731 {
732 	int	ret, i, page_index, page_len;
733 	picl_nodehdl_t tnodeh;
734 	env_disk_t	*diskp;
735 	uint_t	vendor_id;
736 	uint_t	device_id;
737 	uchar_t	log_page[256];
738 
739 	if (ptree_get_node_by_path(SCSI_CONTROLLER_NODE_PATH,
740 	    &tnodeh) != PICL_SUCCESS) {
741 		if (env_debug) {
742 			envd_log(LOG_ERR, "On-Board SCSI controller %s "
743 			    "not found in the system.\n",
744 			    SCSI_CONTROLLER_NODE_PATH);
745 		}
746 		return (-1);
747 	}
748 
749 	if ((ret = ptree_get_propval_by_name(tnodeh, VENDOR_ID,
750 	    &vendor_id, sizeof (vendor_id))) != 0) {
751 		if (env_debug) {
752 			envd_log(LOG_ERR, "Error in getting vendor-id "
753 			    "for SCSI controller. ret = %d errno = 0x%d\n",
754 			    ret, errno);
755 		}
756 		return (-1);
757 	}
758 	if ((ret = ptree_get_propval_by_name(tnodeh, DEVICE_ID,
759 	    &device_id, sizeof (device_id))) != 0) {
760 		if (env_debug) {
761 			envd_log(LOG_ERR, "Error in getting device-id "
762 			    "for SCSI controller. ret = %d errno = 0x%d\n",
763 			    ret, errno);
764 		}
765 		return (-1);
766 	}
767 
768 	/*
769 	 * We have found LSI1064 SCSi controller onboard.
770 	 */
771 	for (i = 0; (diskp = envd_disks[i]) != NULL; i++) {
772 		if (ptree_get_node_by_path(diskp->nodepath,
773 		    &tnodeh) != PICL_SUCCESS) {
774 			diskp->present = B_FALSE;
775 			if (env_debug) {
776 				envd_log(LOG_ERR,
777 				    "DISK %d: %s not found in the system.\n",
778 				    diskp->id, diskp->nodepath);
779 			}
780 			continue;
781 		}
782 		if ((diskp->fd = open(diskp->devfs_path, O_RDONLY)) == -1) {
783 			diskp->present = B_FALSE;
784 			if (env_debug) {
785 				envd_log(LOG_ERR,
786 				    "Error in opening %s errno = 0x%x\n",
787 				    diskp->devfs_path, errno);
788 			}
789 			continue;
790 		}
791 		diskp->present = B_TRUE;
792 		diskp->tpage_supported = B_FALSE;
793 		diskp->smart_supported = B_FALSE;
794 		diskp->warning_tstamp = 0;
795 		diskp->shutdown_tstamp = 0;
796 		diskp->high_warning = disk_high_warn_temperature;
797 		diskp->low_warning = disk_low_warn_temperature;
798 		diskp->high_shutdown = disk_high_shutdown_temperature;
799 		diskp->low_shutdown = disk_low_shutdown_temperature;
800 		/*
801 		 * Find out if the Temperature page is supported by the disk.
802 		 */
803 		if (scsi_log_sense(diskp, SUPPORTED_LPAGES, log_page,
804 		    sizeof (log_page), 1) == 0) {
805 
806 			page_len = ((log_page[2] << 8) & 0xFF00) | log_page[3];
807 
808 			for (page_index = LOGPAGEHDRSIZE;
809 			    page_index < page_len + LOGPAGEHDRSIZE;
810 			    page_index++) {
811 				if (log_page[page_index] != TEMPERATURE_PAGE)
812 					continue;
813 
814 				diskp->tpage_supported = B_TRUE;
815 				if (env_debug) {
816 					envd_log(LOG_ERR,
817 					    "tpage supported for %s\n",
818 					    diskp->nodepath);
819 				}
820 			}
821 		}
822 		/*
823 		 * If the temp log page failed, we can check if this is
824 		 * a SATA drive and attempt to read the temperature
825 		 * using the SMART interface.
826 		 */
827 		if (diskp->tpage_supported != B_TRUE) {
828 			uchar_t iec_page[IEC_PAGE_SIZE];
829 
830 			if (env_debug)
831 				envd_log(LOG_ERR, "Turning on SMART\n");
832 
833 			(void) memset(iec_page, 0, sizeof (iec_page));
834 			iec_page[0] = IEC_PAGE;	/* SMART PAGE */
835 			iec_page[1] = 0xa;	/* length */
836 			/* Notification, only when requested */
837 			iec_page[3] = REPORT_ON_REQUEST;
838 
839 			ret = scsi_mode_select(diskp, IEC_PAGE,
840 			    iec_page, sizeof (iec_page));
841 
842 			/*
843 			 * Since we know this is a SMART capable
844 			 * drive, we will try to set the page and
845 			 * determine if the drive is not capable
846 			 * of reading the TEMP page when we
847 			 * try to read the temperature and disable
848 			 * it then. We do not fail when reading
849 			 * or writing this page because we will
850 			 * determine the SMART capabilities
851 			 * when reading the temperature.
852 			 */
853 			if ((ret != 0) && (env_debug)) {
854 				envd_log(LOG_ERR,
855 				    "Failed to set mode page");
856 			}
857 
858 			diskp->smart_supported = B_TRUE;
859 			diskp->tpage_supported = B_TRUE;
860 		}
861 
862 		if (get_disk_temp(diskp) < 0) {
863 			envd_log(LOG_ERR, " error reading temperature of:%s\n",
864 			    diskp->name);
865 		} else if (env_debug) {
866 			envd_log(LOG_ERR, "%s: temperature = %d\n",
867 			    diskp->name, diskp->current_temp);
868 		}
869 
870 	}
871 
872 	return (0);
873 }
874 
875 static int
envd_es_setup(void)876 envd_es_setup(void)
877 {
878 	seeprom_scn_t	scn_hdr;
879 	seeprom_seg_t	seg_hdr;
880 	es_data_t	*envseg;
881 	es_sensor_t	*sensorp;
882 	int		i, fd, id;
883 	int		envseg_len, esd_len;
884 	char		*envsegp;
885 
886 	/*
887 	 * Open the front io fru
888 	 */
889 	if ((fd = open(iofru_devname, O_RDONLY)) == -1) {
890 		envd_log(LOG_ERR, ENV_FRU_OPEN_FAIL, iofru_devname, errno);
891 		return (-1);
892 	}
893 
894 	/*
895 	 * Read section header from the fru SEEPROM
896 	 */
897 	if (lseek(fd, SSCN_OFFSET, SEEK_SET) == (off_t)-1 ||
898 	    read(fd, &scn_hdr, sizeof (scn_hdr)) != sizeof (scn_hdr)) {
899 		envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname);
900 		(void) close(fd);
901 		return (-1);
902 	}
903 	if ((scn_hdr.sscn_tag != SSCN_TAG) ||
904 	    (GET_UNALIGN16(&scn_hdr.sscn_ver) != SSCN_VER)) {
905 		envd_log(LOG_ERR, ENV_FRU_BAD_SCNHDR, scn_hdr.sscn_tag,
906 		    GET_UNALIGN16(&scn_hdr.sscn_ver));
907 		(void) close(fd);
908 		return (-1);
909 	}
910 
911 	/*
912 	 * Locate environmental segment
913 	 */
914 	for (i = 0; i < scn_hdr.sscn_nsegs; i++) {
915 		if (read(fd, &seg_hdr, sizeof (seg_hdr)) != sizeof (seg_hdr)) {
916 			envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname);
917 			(void) close(fd);
918 			return (-1);
919 		}
920 
921 		if (env_debug) {
922 			envd_log(LOG_INFO,
923 			    "Seg name: %x off:%x len:%x\n",
924 			    GET_UNALIGN16(&seg_hdr.sseg_name),
925 			    GET_UNALIGN16(&seg_hdr.sseg_off),
926 			    GET_UNALIGN16(&seg_hdr.sseg_len));
927 		}
928 
929 		if (GET_UNALIGN16(&seg_hdr.sseg_name) == ENVSEG_NAME)
930 			break;
931 	}
932 	if (i == scn_hdr.sscn_nsegs) {
933 		envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname);
934 		(void) close(fd);
935 		return (-1);
936 	}
937 
938 	/*
939 	 * Read environmental segment
940 	 */
941 	envseg_len = GET_UNALIGN16(&seg_hdr.sseg_len);
942 	if ((envseg = malloc(envseg_len)) == NULL) {
943 		envd_log(LOG_ERR, ENV_FRU_NOMEM_FOR_SEG, envseg_len);
944 		(void) close(fd);
945 		return (-1);
946 	}
947 
948 	if (lseek(fd, (off_t)GET_UNALIGN16(&seg_hdr.sseg_off),
949 	    SEEK_SET) == (off_t)-1 ||
950 	    read(fd, envseg, envseg_len) != envseg_len) {
951 		envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname);
952 		free(envseg);
953 		(void) close(fd);
954 		return (-1);
955 	}
956 
957 	/*
958 	 * Check environmental segment data for consistency
959 	 */
960 	esd_len = sizeof (*envseg) +
961 	    (envseg->esd_nsensors - 1) * sizeof (envseg->esd_sensors[0]);
962 	if (envseg->esd_ver != ENVSEG_VERSION || envseg_len < esd_len) {
963 		envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname);
964 		free(envseg);
965 		(void) close(fd);
966 		return (-1);
967 	}
968 
969 	/*
970 	 * Process environmental segment data
971 	 */
972 	if (envseg->esd_nsensors > MAX_SENSORS) {
973 		envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname);
974 		free(envseg);
975 		(void) close(fd);
976 		return (-1);
977 	}
978 
979 	sensorp = &(envseg->esd_sensors[0]);
980 	envsegp = (char *)envseg;
981 	for (i = 0; i < envseg->esd_nsensors; i++) {
982 		uint32_t ess_id;
983 
984 		(void) memcpy(&ess_id,
985 		    sensorp->ess_id, sizeof (sensorp->ess_id));
986 
987 		if (env_debug) {
988 			envd_log(LOG_INFO, "\n Sensor Id %x offset %x",
989 			    ess_id, sensorp->ess_off);
990 		}
991 		if (ess_id >= MAX_SENSORS) {
992 			envd_log(LOG_ERR, ENV_FRU_BAD_ENVSEG, iofru_devname);
993 			free(envseg);
994 			(void) close(fd);
995 			return (-1);
996 		}
997 		(void) memcpy(&sensor_ctl[ess_id], &envsegp[sensorp->ess_off],
998 		    sizeof (es_sensor_blk_t));
999 
1000 		sensorp++;
1001 	}
1002 
1003 	/*
1004 	 * Match sensor/ES id and point to correct data based on IDs
1005 	 */
1006 	for (i = 0; i < N_ENVD_SENSORS; i++) {
1007 		id = envd_sensors[i]->id;
1008 		envd_sensors[i]->es = &sensor_ctl[id];
1009 	}
1010 
1011 	/*
1012 	 * Cleanup and return
1013 	 */
1014 	free(envseg);
1015 	(void) close(fd);
1016 
1017 	return (0);
1018 }
1019 
1020 static void
envd_es_default_setup(void)1021 envd_es_default_setup(void)
1022 {
1023 	int	i, id;
1024 
1025 	for (i = 0; i < N_ENVD_SENSORS; i++) {
1026 		id = envd_sensors[i]->id;
1027 		envd_sensors[i]->es = &sensor_default_ctl[id];
1028 	}
1029 }
1030 
1031 /*
1032  * Open temperature sensor devices and initialize per sensor data structure.
1033  */
1034 static int
envd_setup_sensors(void)1035 envd_setup_sensors(void)
1036 {
1037 	env_sensor_t	*sensorp;
1038 	int		sensorcnt = 0;
1039 	int		i;
1040 	picl_nodehdl_t	tnodeh;
1041 
1042 	for (i = 0; i < N_ENVD_SENSORS; i++) {
1043 		if (env_debug)
1044 			envd_log(LOG_ERR, "scanning sensor %d\n", i);
1045 
1046 		sensorp = envd_sensors[i];
1047 
1048 		/* Initialize sensor's initial state */
1049 		sensorp->shutdown_initiated = B_FALSE;
1050 		sensorp->warning_tstamp = 0;
1051 		sensorp->shutdown_tstamp = 0;
1052 		sensorp->error = 0;
1053 
1054 		/* Make sure cpu0/1 sensors are present */
1055 		if (sensorp->id == CPU0_SENSOR_ID) {
1056 			if (ptree_get_node_by_path(CPU0_PATH, &tnodeh) !=
1057 			    PICL_SUCCESS) {
1058 				if (env_debug) {
1059 					envd_log(LOG_ERR,
1060 					    "get node by path failed for %s\n",
1061 					    CPU0_PATH);
1062 				}
1063 				sensorp->present = B_FALSE;
1064 				continue;
1065 			}
1066 		}
1067 		if (sensorp->id == CPU1_SENSOR_ID) {
1068 			if (ptree_get_node_by_path(CPU1_PATH, &tnodeh) !=
1069 			    PICL_SUCCESS) {
1070 				if (env_debug) {
1071 					envd_log(LOG_ERR,
1072 					    "get node by path failed for %s\n",
1073 					    CPU1_PATH);
1074 				}
1075 				sensorp->present = B_FALSE;
1076 				continue;
1077 			}
1078 		}
1079 
1080 		sensorp->fd = open(sensorp->devfs_path, O_RDWR);
1081 		if (sensorp->fd == -1) {
1082 			if (env_debug) {
1083 				envd_log(LOG_ERR, ENV_SENSOR_OPEN_FAIL,
1084 				    sensorp->name, sensorp->devfs_path,
1085 				    errno, strerror(errno));
1086 			}
1087 			sensorp->present = B_FALSE;
1088 			continue;
1089 		}
1090 
1091 		/*
1092 		 * Determine if the front panel is attached, we want the
1093 		 * information if it exists, but should not shut down
1094 		 * the system if it is removed.
1095 		 */
1096 		if (sensorp->id == FRONT_PANEL_SENSOR_ID) {
1097 			tempr_t temp;
1098 			int	tries;
1099 
1100 			for (tries = 0; tries < MAX_SENSOR_RETRIES; tries++) {
1101 				if (ioctl(sensorp->fd, PIC_GET_TEMPERATURE,
1102 				    &temp) == 0) {
1103 					break;
1104 				}
1105 				(void) sleep(1);
1106 			}
1107 			if (tries == MAX_SENSOR_RETRIES)
1108 				sensorp->present = B_FALSE;
1109 		}
1110 
1111 		sensorp->present = B_TRUE;
1112 		sensorcnt++;
1113 	}
1114 
1115 	if (sensorcnt == 0)
1116 		return (-1);
1117 
1118 	return (0);
1119 }
1120 
1121 /* ARGSUSED */
1122 static void *
pmthr(void * args)1123 pmthr(void *args)
1124 {
1125 	pm_state_change_t	pmstate;
1126 	char			physpath[PATH_MAX];
1127 	int			pre_lpstate;
1128 	uint8_t			estar_state;
1129 	int			env_monitor_fd;
1130 
1131 	pmstate.physpath = physpath;
1132 	pmstate.size = sizeof (physpath);
1133 	cur_lpstate = 0;
1134 	pre_lpstate = 1;
1135 
1136 	pm_fd = open(PM_DEVICE, O_RDWR);
1137 	if (pm_fd == -1) {
1138 		envd_log(LOG_ERR, PM_THREAD_EXITING, errno, strerror(errno));
1139 		return (NULL);
1140 	}
1141 	for (;;) {
1142 		/*
1143 		 * Get PM state change events to check if the system
1144 		 * is in lowest power state and inform PIC which controls
1145 		 * fan speeds.
1146 		 *
1147 		 * To minimize polling, we use the blocking interface
1148 		 * to get the power state change event here.
1149 		 */
1150 		if (ioctl(pm_fd, PM_GET_STATE_CHANGE_WAIT, &pmstate) != 0) {
1151 			if (errno != EINTR)
1152 				break;
1153 			continue;
1154 		}
1155 
1156 		do {
1157 			if (env_debug)  {
1158 				envd_log(LOG_INFO,
1159 				"pmstate event:0x%x flags:%x"
1160 				"comp:%d oldval:%d newval:%d path:%s\n",
1161 				    pmstate.event, pmstate.flags,
1162 				    pmstate.component,
1163 				    pmstate.old_level,
1164 				    pmstate.new_level,
1165 				    pmstate.physpath);
1166 			}
1167 			cur_lpstate =
1168 			    (pmstate.flags & PSC_ALL_LOWEST) ? 1 : 0;
1169 		} while (ioctl(pm_fd, PM_GET_STATE_CHANGE, &pmstate) == 0);
1170 
1171 		if (pre_lpstate != cur_lpstate) {
1172 			pre_lpstate = cur_lpstate;
1173 			estar_state = (cur_lpstate & 0x1);
1174 			if (env_debug)
1175 				envd_log(LOG_ERR,
1176 				    "setting PIC ESTAR SATE to %x\n",
1177 				    estar_state);
1178 
1179 			env_monitor_fd = open(ENV_MONITOR_DEVFS, O_RDWR);
1180 			if (env_monitor_fd != -1) {
1181 				if (ioctl(env_monitor_fd, PIC_SET_ESTAR_MODE,
1182 				    &estar_state) < 0) {
1183 					if (env_debug)
1184 						envd_log(LOG_ERR,
1185 					"unable to set ESTAR_MODE in PIC\n");
1186 				}
1187 				(void) close(env_monitor_fd);
1188 			} else {
1189 				if (env_debug)
1190 					envd_log(LOG_ERR,
1191 				"Failed to open %s\n",
1192 					    ENV_MONITOR_DEVFS);
1193 			}
1194 		}
1195 	}
1196 
1197 	/*NOTREACHED*/
1198 	return (NULL);
1199 }
1200 
1201 /*
1202  * This is env thread which monitors the current temperature when
1203  * warning threshold is exceeded. The job is to make sure it does
1204  * not execced/decrease shutdown threshold. If it does it will start
1205  * forced shutdown to avoid reaching hardware poweroff via THERM interrupt.
1206  */
1207 /*ARGSUSED*/
1208 static void *
system_temp_thr(void * args)1209 system_temp_thr(void *args)
1210 {
1211 	char syscmd[BUFSIZ];
1212 	char msgbuf[BUFSIZ];
1213 	timespec_t	to;
1214 	int	ret, i;
1215 	env_sensor_t	*sensorp;
1216 	pthread_mutex_t	env_monitor_mutex = PTHREAD_MUTEX_INITIALIZER;
1217 	pthread_cond_t	env_monitor_cv = PTHREAD_COND_INITIALIZER;
1218 	time_t	ct;
1219 	tempr_t  temp;
1220 
1221 	for (;;) {
1222 		/*
1223 		 * Sleep for specified seconds before issuing IOCTL
1224 		 * again.
1225 		 */
1226 		(void) pthread_mutex_lock(&env_monitor_mutex);
1227 		ret = pthread_cond_reltimedwait_np(&env_monitor_cv,
1228 		    &env_monitor_mutex, &to);
1229 		to.tv_sec = sensor_scan_interval;
1230 		to.tv_nsec = 0;
1231 		if (ret != ETIMEDOUT) {
1232 			(void) pthread_mutex_unlock(&env_monitor_mutex);
1233 			continue;
1234 		}
1235 
1236 		(void) pthread_mutex_unlock(&env_monitor_mutex);
1237 		for (i = 0; i < N_ENVD_SENSORS; i++) {
1238 			sensorp = envd_sensors[i];
1239 			if (sensorp->present == B_FALSE)
1240 				continue;
1241 			if (get_temperature(sensorp, &temp) == -1)
1242 				continue;
1243 
1244 			sensorp->cur_temp = temp;
1245 			if (env_debug) {
1246 				envd_log(LOG_ERR,
1247 				"%s temp = %d",
1248 				    sensorp->name, sensorp->cur_temp);
1249 			}
1250 
1251 			/*
1252 			 * If this sensor already triggered system shutdown,
1253 			 * don't log any more shutdown/warning messages for it.
1254 			 */
1255 			if (sensorp->shutdown_initiated)
1256 				continue;
1257 
1258 			/*
1259 			 * Check for the temperature in warning and shutdown
1260 			 * range and take appropriate action.
1261 			 */
1262 			if (SENSOR_TEMP_IN_WARNING_RANGE(sensorp->cur_temp,
1263 			    sensorp)) {
1264 				/*
1265 				 * Check if the temperature has been in
1266 				 * warning range during last
1267 				 * sensor_warning_duration interval. If so,
1268 				 * the temperature is truly in warning range
1269 				 * and we need to log a warning message, but
1270 				 * no more than once every
1271 				 * sensor_warning_interval seconds.
1272 				 */
1273 				time_t	wtstamp = sensorp->warning_tstamp;
1274 
1275 				ct = (time_t)(gethrtime() / NANOSEC);
1276 				if (sensorp->warning_start == 0)
1277 					sensorp->warning_start = ct;
1278 				if (((ct - sensorp->warning_start) >=
1279 				    sensor_warning_duration) &&
1280 				    (wtstamp == 0 || (ct - wtstamp) >=
1281 				    sensor_warning_interval)) {
1282 					envd_log(LOG_CRIT, ENV_WARNING_MSG,
1283 					    sensorp->name, sensorp->cur_temp,
1284 					    (int8_t)
1285 					    sensorp->es->esb_low_warning,
1286 					    (int8_t)
1287 					    sensorp->es->esb_high_warning);
1288 
1289 					sensorp->warning_tstamp = ct;
1290 				}
1291 			} else if (sensorp->warning_start != 0)
1292 				sensorp->warning_start = 0;
1293 
1294 			if (!shutdown_override &&
1295 			    SENSOR_TEMP_IN_SHUTDOWN_RANGE(sensorp->cur_temp,
1296 			    sensorp)) {
1297 				ct = (time_t)(gethrtime() / NANOSEC);
1298 				if (sensorp->shutdown_tstamp == 0)
1299 					sensorp->shutdown_tstamp = ct;
1300 
1301 				/*
1302 				 * Shutdown the system if the temperature
1303 				 * remains in the shutdown range for over
1304 				 * sensor_shutdown_interval seconds.
1305 				 */
1306 				if ((ct - sensorp->shutdown_tstamp) >=
1307 				    sensor_shutdown_interval) {
1308 					/*
1309 					 * Log error
1310 					 */
1311 					sensorp->shutdown_initiated = B_TRUE;
1312 
1313 					(void) snprintf(msgbuf, sizeof (msgbuf),
1314 					    ENV_SHUTDOWN_MSG, sensorp->name,
1315 					    sensorp->cur_temp,
1316 					    (int8_t)
1317 					    sensorp->es->esb_low_shutdown,
1318 					    (int8_t)
1319 					    sensorp->es->esb_high_shutdown);
1320 
1321 					envd_log(LOG_ALERT, msgbuf);
1322 
1323 					/*
1324 					 * Shutdown the system (only once)
1325 					 */
1326 					if (system_shutdown_started ==
1327 					    B_FALSE) {
1328 						(void) snprintf(syscmd,
1329 						    sizeof (syscmd),
1330 						    "%s \"%s\"", shutdown_cmd,
1331 						    msgbuf);
1332 
1333 						envd_log(LOG_ALERT, syscmd);
1334 						system_shutdown_started =
1335 						    B_TRUE;
1336 
1337 						(void) system(syscmd);
1338 					}
1339 				}
1340 			} else if (sensorp->shutdown_tstamp != 0)
1341 				sensorp->shutdown_tstamp = 0;
1342 		}
1343 	}	/* end of forever loop */
1344 
1345 	/*NOTREACHED*/
1346 	return (NULL);
1347 }
1348 
1349 static int
scsi_log_sense(env_disk_t * diskp,uchar_t page_code,void * pagebuf,uint16_t pagelen,int page_control)1350 scsi_log_sense(env_disk_t *diskp, uchar_t page_code, void *pagebuf,
1351 		uint16_t pagelen, int page_control)
1352 {
1353 	struct uscsi_cmd	ucmd_buf;
1354 	uchar_t		cdb_buf[CDB_GROUP1];
1355 	struct	scsi_extended_sense	sense_buf;
1356 	int	ret_val;
1357 
1358 	bzero(&cdb_buf, sizeof (cdb_buf));
1359 	bzero(&ucmd_buf, sizeof (ucmd_buf));
1360 	bzero(&sense_buf, sizeof (sense_buf));
1361 
1362 	cdb_buf[0] = SCMD_LOG_SENSE_G1;
1363 
1364 	/*
1365 	 * For SATA we need to have the current threshold value set.
1366 	 * For SAS drives we can use the current cumulative value.
1367 	 * This is set for non-SMART drives, by passing a non-zero
1368 	 * page_control.
1369 	 */
1370 	if (page_control)
1371 		cdb_buf[2] = (0x01 << 6) | page_code;
1372 	else
1373 		cdb_buf[2] = page_code;
1374 
1375 	cdb_buf[7] = (uchar_t)((pagelen & 0xFF00) >> 8);
1376 	cdb_buf[8] = (uchar_t)(pagelen  & 0x00FF);
1377 
1378 	ucmd_buf.uscsi_cdb = (char *)cdb_buf;
1379 	ucmd_buf.uscsi_cdblen = sizeof (cdb_buf);
1380 	ucmd_buf.uscsi_bufaddr = (caddr_t)pagebuf;
1381 	ucmd_buf.uscsi_buflen = pagelen;
1382 	ucmd_buf.uscsi_rqbuf = (caddr_t)&sense_buf;
1383 	ucmd_buf.uscsi_rqlen = sizeof (struct scsi_extended_sense);
1384 	ucmd_buf.uscsi_flags = USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
1385 	ucmd_buf.uscsi_timeout = DEFAULT_SCSI_TIMEOUT;
1386 
1387 	ret_val = ioctl(diskp->fd, USCSICMD, ucmd_buf);
1388 	if ((ret_val == 0) && (ucmd_buf.uscsi_status == 0)) {
1389 		if (env_debug)
1390 			envd_log(LOG_ERR,
1391 		"log sense command for page_code 0x%x succeeded\n", page_code);
1392 		return (ret_val);
1393 	}
1394 	if (env_debug)
1395 		envd_log(LOG_ERR, "log sense command for %s failed. "
1396 		    "page_code 0x%x ret_val = 0x%x "
1397 		    "status = 0x%x errno = 0x%x\n", diskp->name, page_code,
1398 		    ret_val, ucmd_buf.uscsi_status, errno);
1399 
1400 	return (1);
1401 }
1402 
1403 
1404 static int
get_disk_temp(env_disk_t * diskp)1405 get_disk_temp(env_disk_t *diskp)
1406 {
1407 	int	ret;
1408 	uchar_t	tpage[256];
1409 
1410 	if (diskp->smart_supported == B_TRUE) {
1411 		smart_structure	smartpage;
1412 		smart_attribute	*temp_attrib = NULL;
1413 		uint8_t		checksum;
1414 		uint8_t		*index;
1415 		int		i;
1416 
1417 		bzero(&smartpage, sizeof (smartpage));
1418 
1419 		ret = scsi_log_sense(diskp, GET_SMART_INFO,
1420 		    &smartpage, sizeof (smartpage), 0);
1421 
1422 		if (ret != 0) {
1423 			diskp->current_temp = DISK_INVALID_TEMP;
1424 			diskp->ref_temp = DISK_INVALID_TEMP;
1425 			return (-1);
1426 		}
1427 
1428 		/*
1429 		 * verify the checksum of the data. A 2's compliment
1430 		 * of the result addition of the is stored in the
1431 		 * last byte. The sum of all the checksum should be
1432 		 * 0. If the checksum is bad, return an error for
1433 		 * this iteration.
1434 		 */
1435 		index = (uint8_t *)&smartpage;
1436 
1437 		for (i = checksum = 0; i < 512; i++)
1438 			checksum += index[i];
1439 
1440 		if ((checksum != 0) && env_debug) {
1441 			envd_log(LOG_ERR,
1442 			    "SMART checksum error! 0x%x\n", checksum);
1443 
1444 			/*
1445 			 * We got bad data back from the drive, fail this
1446 			 * time around and picl will retry again. If this
1447 			 * continues to fail picl will give this drive a
1448 			 * failed status.
1449 			 */
1450 			diskp->current_temp = DISK_INVALID_TEMP;
1451 			diskp->ref_temp = DISK_INVALID_TEMP;
1452 
1453 			return (-1);
1454 		}
1455 
1456 		/*
1457 		 * Scan through the various SMART data and look for
1458 		 * the complete drive temp.
1459 		 */
1460 
1461 		for (i = 0; (i < SMART_FIELDS) &&
1462 		    (smartpage.attribute[i].id != 0) &&
1463 		    (temp_attrib == NULL); i++) {
1464 
1465 			if (smartpage.attribute[i].id == HDA_TEMP) {
1466 				temp_attrib = &smartpage.attribute[i];
1467 			}
1468 		}
1469 
1470 		/*
1471 		 * If we dont find any temp SMART attributes, this drive
1472 		 * does not support this page, disable temp checking
1473 		 * for this drive.
1474 		 */
1475 		if (temp_attrib == NULL) {
1476 
1477 			/*
1478 			 * If the checksum is valid, the temp. attributes are
1479 			 * not supported, disable this drive from temp.
1480 			 * checking.
1481 			 */
1482 			if (env_debug)
1483 				envd_log(LOG_ERR,
1484 				    "Temp ATTRIBUTE not supported\n");
1485 			diskp->smart_supported = B_FALSE;
1486 			diskp->tpage_supported = B_FALSE;
1487 			diskp->current_temp = DISK_INVALID_TEMP;
1488 			diskp->ref_temp = DISK_INVALID_TEMP;
1489 
1490 			return (-1);
1491 		}
1492 
1493 		if (env_debug) {
1494 			envd_log(LOG_ERR, "flags = 0x%x%x,curr = 0x%x,"
1495 			    "data = 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1496 			    temp_attrib->flags[0], temp_attrib->flags[1],
1497 			    temp_attrib->raw_data[0], temp_attrib->raw_data[1],
1498 			    temp_attrib->raw_data[2], temp_attrib->raw_data[3],
1499 			    temp_attrib->raw_data[4], temp_attrib->raw_data[5],
1500 			    temp_attrib->raw_data[6], temp_attrib->raw_data[7]);
1501 		}
1502 		if (temp_attrib->raw_data[1] != 0xFF) {
1503 			diskp->current_temp = temp_attrib->raw_data[2];
1504 			diskp->ref_temp	= temp_attrib->raw_data[2];
1505 		} else {
1506 			diskp->ref_temp = DISK_INVALID_TEMP;
1507 			diskp->current_temp = DISK_INVALID_TEMP;
1508 
1509 			return (-1);
1510 		}
1511 
1512 	} else {
1513 		ret = scsi_log_sense(diskp, TEMPERATURE_PAGE, tpage,
1514 		    sizeof (tpage), 1);
1515 
1516 		if (ret != 0) {
1517 			diskp->current_temp = DISK_INVALID_TEMP;
1518 			diskp->ref_temp = DISK_INVALID_TEMP;
1519 			return (-1);
1520 		}
1521 		/*
1522 		 * For the current temperature verify that the parameter
1523 		 * length is 0x02 and the parameter code is 0x00
1524 		 * Temperature value of 255(0xFF) is considered INVALID.
1525 		 */
1526 		if ((tpage[7] == 0x02) && (tpage[4] == 0x00) &&
1527 		    (tpage[5] == 0x00)) {
1528 			if (tpage[9] == 0xFF) {
1529 				diskp->current_temp = DISK_INVALID_TEMP;
1530 				return (-1);
1531 			} else {
1532 				diskp->current_temp = tpage[9];
1533 			}
1534 		}
1535 
1536 		/*
1537 		 * For the reference temperature verify that the parameter
1538 		 * length is 0x02 and the parameter code is 0x01
1539 		 * Temperature value of 255(0xFF) is considered INVALID.
1540 		 */
1541 		if ((tpage[13] == 0x02) && (tpage[10] == 0x00) &&
1542 		    (tpage[11] == 0x01)) {
1543 			if (tpage[15] == 0xFF) {
1544 				diskp->ref_temp = DISK_INVALID_TEMP;
1545 			} else {
1546 				diskp->ref_temp = tpage[15];
1547 			}
1548 		}
1549 	}
1550 	return (0);
1551 }
1552 
1553 /* ARGSUSED */
1554 static void *
disk_temp_thr(void * args)1555 disk_temp_thr(void *args)
1556 {
1557 	char syscmd[BUFSIZ];
1558 	char msgbuf[BUFSIZ];
1559 	timespec_t	to;
1560 	int	ret, i;
1561 	env_disk_t	*diskp;
1562 	pthread_mutex_t	env_monitor_mutex = PTHREAD_MUTEX_INITIALIZER;
1563 	pthread_cond_t	env_monitor_cv = PTHREAD_COND_INITIALIZER;
1564 	pm_state_change_t	pmstate;
1565 	int	idle_time;
1566 	int	disk_pm_fd;
1567 	time_t	ct;
1568 
1569 	if ((disk_pm_fd = open(PM_DEVICE, O_RDWR)) == -1) {
1570 		envd_log(LOG_ERR, DISK_TEMP_THREAD_EXITING,
1571 		    errno, strerror(errno));
1572 		return (NULL);
1573 	}
1574 
1575 	for (;;) {
1576 		/*
1577 		 * Sleep for specified seconds before issuing IOCTL
1578 		 * again.
1579 		 */
1580 		(void) pthread_mutex_lock(&env_monitor_mutex);
1581 		ret = pthread_cond_reltimedwait_np(&env_monitor_cv,
1582 		    &env_monitor_mutex, &to);
1583 
1584 		to.tv_sec = disk_scan_interval;
1585 		to.tv_nsec = 0;
1586 
1587 		if (ret != ETIMEDOUT) {
1588 			(void) pthread_mutex_unlock(
1589 			    &env_monitor_mutex);
1590 			continue;
1591 		}
1592 		(void) pthread_mutex_unlock(&env_monitor_mutex);
1593 
1594 		for (i = 0; (diskp = envd_disks[i]) != NULL; i++) {
1595 			if (diskp->present == B_FALSE)
1596 				continue;
1597 			if (diskp->tpage_supported == B_FALSE)
1598 				continue;
1599 		/*
1600 		 * If the disk temperature is above the warning threshold
1601 		 * continue monitoring until the temperature drops below
1602 		 * warning threshold.
1603 		 * if the temperature is in the NORMAL range monitor only
1604 		 * when the disk is BUSY.
1605 		 * We do not want to read the disk temperature if the disk is
1606 		 * is idling. The reason for this is disk will never get into
1607 		 * lowest power mode if we scan the disk temperature
1608 		 * peridoically. To avoid this situation we first determine
1609 		 * the idle_time of the disk. If the disk has been IDLE since
1610 		 * we scanned the temperature last time we will not read the
1611 		 * temperature.
1612 		 */
1613 		if (!DISK_TEMP_IN_WARNING_RANGE(diskp->current_temp, diskp)) {
1614 			pmstate.physpath = diskp->physpath;
1615 			pmstate.size = strlen(diskp->physpath);
1616 			pmstate.component = 0;
1617 			if ((idle_time =
1618 			    ioctl(disk_pm_fd, PM_GET_TIME_IDLE,
1619 			    &pmstate)) == -1) {
1620 
1621 				if (errno != EINTR) {
1622 					if (env_debug)
1623 						envd_log(LOG_ERR,
1624 			"ioctl PM_GET_TIME_IDLE failed for DISK0. errno=0x%x\n",
1625 						    errno);
1626 					continue;
1627 				}
1628 				continue;
1629 			}
1630 			if (idle_time >= (disk_scan_interval/2)) {
1631 				if (env_debug) {
1632 					envd_log(LOG_ERR, "%s idle time = %d\n",
1633 					    diskp->name, idle_time);
1634 				}
1635 				continue;
1636 			}
1637 		}
1638 		ret = get_disk_temp(diskp);
1639 		if (ret != 0)
1640 			continue;
1641 		if (env_debug) {
1642 			envd_log(LOG_ERR, "%s temp = %d ref. temp = %d\n",
1643 			    diskp->name, diskp->current_temp, diskp->ref_temp);
1644 		}
1645 		/*
1646 		 * If this disk already triggered system shutdown, don't
1647 		 * log any more shutdown/warning messages for it.
1648 		 */
1649 		if (diskp->shutdown_initiated)
1650 			continue;
1651 
1652 		/*
1653 		 * Check for the temperature in warning and shutdown range
1654 		 * and take appropriate action.
1655 		 */
1656 		if (DISK_TEMP_IN_WARNING_RANGE(diskp->current_temp, diskp)) {
1657 			/*
1658 			 * Check if the temperature has been in warning
1659 			 * range during last disk_warning_duration interval.
1660 			 * If so, the temperature is truly in warning
1661 			 * range and we need to log a warning message,
1662 			 * but no more than once every disk_warning_interval
1663 			 * seconds.
1664 			 */
1665 			time_t	wtstamp = diskp->warning_tstamp;
1666 
1667 			ct = (time_t)(gethrtime() / NANOSEC);
1668 			if (diskp->warning_start == 0)
1669 				diskp->warning_start = ct;
1670 			if (((ct - diskp->warning_start) >=
1671 			    disk_warning_duration) && (wtstamp == 0 ||
1672 			    (ct - wtstamp) >= disk_warning_interval)) {
1673 				envd_log(LOG_CRIT, ENV_WARNING_MSG,
1674 				    diskp->name, diskp->current_temp,
1675 				    diskp->low_warning,
1676 				    diskp->high_warning);
1677 				diskp->warning_tstamp = ct;
1678 			}
1679 		} else if (diskp->warning_start != 0)
1680 			diskp->warning_start = 0;
1681 
1682 		if (!shutdown_override &&
1683 		    DISK_TEMP_IN_SHUTDOWN_RANGE(diskp->current_temp, diskp)) {
1684 			ct = (time_t)(gethrtime() / NANOSEC);
1685 			if (diskp->shutdown_tstamp == 0)
1686 				diskp->shutdown_tstamp = ct;
1687 
1688 			/*
1689 			 * Shutdown the system if the temperature remains
1690 			 * in the shutdown range for over disk_shutdown_interval
1691 			 * seconds.
1692 			 */
1693 			if ((ct - diskp->shutdown_tstamp) >=
1694 			    disk_shutdown_interval) {
1695 				/* log error */
1696 				diskp->shutdown_initiated = B_TRUE;
1697 				(void) snprintf(msgbuf, sizeof (msgbuf),
1698 				    ENV_SHUTDOWN_MSG, diskp->name,
1699 				    diskp->current_temp, diskp->low_shutdown,
1700 				    diskp->high_shutdown);
1701 				envd_log(LOG_ALERT, msgbuf);
1702 
1703 				/* shutdown the system (only once) */
1704 				if (system_shutdown_started == B_FALSE) {
1705 					(void) snprintf(syscmd, sizeof (syscmd),
1706 					    "%s \"%s\"", shutdown_cmd, msgbuf);
1707 					envd_log(LOG_ALERT, syscmd);
1708 					system_shutdown_started = B_TRUE;
1709 					(void) system(syscmd);
1710 				}
1711 			}
1712 		} else if (diskp->shutdown_tstamp != 0)
1713 			diskp->shutdown_tstamp = 0;
1714 		}
1715 	} /* end of forever loop */
1716 }
1717 
1718 static void *
fan_thr(void * args)1719 fan_thr(void *args)
1720 {
1721 	char msgbuf[BUFSIZ];
1722 	timespec_t	to;
1723 	int	ret, i;
1724 	pthread_mutex_t	env_monitor_mutex = PTHREAD_MUTEX_INITIALIZER;
1725 	pthread_cond_t	env_monitor_cv = PTHREAD_COND_INITIALIZER;
1726 	env_fan_t	*fanp;
1727 
1728 #ifdef	__lint
1729 	args = args;
1730 #endif
1731 
1732 	for (;;) {
1733 		/*
1734 		 * Sleep for specified seconds before issuing IOCTL
1735 		 * again.
1736 		 */
1737 		(void) pthread_mutex_lock(&env_monitor_mutex);
1738 		ret = pthread_cond_reltimedwait_np(&env_monitor_cv,
1739 		    &env_monitor_mutex, &to);
1740 		to.tv_sec = fan_scan_interval;
1741 		to.tv_nsec = 0;
1742 		if (ret != ETIMEDOUT) {
1743 			(void) pthread_mutex_unlock(&env_monitor_mutex);
1744 			continue;
1745 		}
1746 		(void) pthread_mutex_unlock(&env_monitor_mutex);
1747 
1748 		for (i = 0; (fanp = envd_fans[i]) != NULL; i++) {
1749 			if (fanp->present == B_FALSE)
1750 				continue;
1751 
1752 			if (has_fan_failed(fanp) == B_TRUE) {
1753 				if (fanp->last_status == FAN_FAILED)
1754 					continue;
1755 				fanp->last_status = FAN_FAILED;
1756 				(void) snprintf(msgbuf, sizeof (msgbuf),
1757 				    ENV_FAN_FAILURE_WARNING_MSG, fanp->name,
1758 				    fan_rpm_string, fan_status_string);
1759 				envd_log(LOG_ALERT, msgbuf);
1760 			} else {
1761 				if (fanp->last_status == FAN_OK)
1762 					continue;
1763 				fanp->last_status = FAN_OK;
1764 				(void) snprintf(msgbuf, sizeof (msgbuf),
1765 				    ENV_FAN_OK_MSG, fanp->name);
1766 				envd_log(LOG_ALERT, msgbuf);
1767 			}
1768 		}
1769 
1770 		if (has_psufan_failed() == B_TRUE) {
1771 			if (psufan_last_status == FAN_FAILED)
1772 				continue;
1773 			psufan_last_status = FAN_FAILED;
1774 			(void) snprintf(msgbuf, sizeof (msgbuf),
1775 			    ENV_FAN_FAILURE_WARNING_MSG, SENSOR_PSU,
1776 			    fan_rpm_string, fan_status_string);
1777 			envd_log(LOG_ALERT, msgbuf);
1778 		} else {
1779 			if (psufan_last_status == FAN_OK)
1780 				continue;
1781 			psufan_last_status = FAN_OK;
1782 			(void) snprintf(msgbuf, sizeof (msgbuf),
1783 			    ENV_FAN_OK_MSG, SENSOR_PSU);
1784 			envd_log(LOG_ALERT, msgbuf);
1785 		}
1786 	}
1787 
1788 	/*NOTREACHED*/
1789 	return (NULL);
1790 }
1791 
1792 /*
1793  * Setup envrionmental monitor state and start threads to monitor
1794  * temperature, fan, disk and power management state.
1795  * Returns -1 on error, 0 if successful.
1796  */
1797 static int
envd_setup(void)1798 envd_setup(void)
1799 {
1800 
1801 	if (getenv("SUNW_piclenvd_debug") != NULL)
1802 		env_debug = 1;
1803 
1804 	if (pthread_attr_init(&thr_attr) != 0 ||
1805 	    pthread_attr_setscope(&thr_attr, PTHREAD_SCOPE_SYSTEM) != 0) {
1806 		return (-1);
1807 	}
1808 
1809 	/*
1810 	 * If ES segment is not present or has inconsistent information, we
1811 	 * use default values for sensor limits. For the sake of simplicity,
1812 	 * we still store these limits internally in the 'es' member in the
1813 	 * structure.
1814 	 */
1815 	if (envd_es_setup() < 0) {
1816 		envd_log(LOG_WARNING, ENV_DEFAULT_LIMITS);
1817 		envd_es_default_setup();
1818 	}
1819 
1820 	if (envd_setup_sensors() < 0) {
1821 		if (env_debug)
1822 			envd_log(LOG_ERR, "Failed to setup sensors\n");
1823 		system_temp_monitor = 0;
1824 	}
1825 
1826 	if (envd_setup_fans() < 0) {
1827 		if (env_debug)
1828 			envd_log(LOG_ERR, "Failed to setup fans\n");
1829 		fan_monitor = 0;
1830 		pm_monitor = 0;
1831 	}
1832 
1833 	/*
1834 	 * Disable disk temperature monitoring until we have
1835 	 * LSI fw support to read SATA disk temperature
1836 	 */
1837 	if (disk_temp_monitor) {
1838 		if (envd_setup_disks() < 0) {
1839 			if (env_debug)
1840 				envd_log(LOG_ERR, "Failed to setup disks\n");
1841 			disk_temp_monitor = 0;
1842 		}
1843 	}
1844 
1845 	/*
1846 	 * Create a thread to monitor system temperatures
1847 	 */
1848 	if ((system_temp_monitor) && (system_temp_thr_created == B_FALSE)) {
1849 		if (pthread_create(&system_temp_thr_id, &thr_attr,
1850 		    system_temp_thr, NULL) != 0) {
1851 			envd_log(LOG_ERR, ENVTHR_THREAD_CREATE_FAILED);
1852 		} else {
1853 			system_temp_thr_created = B_TRUE;
1854 			if (env_debug)
1855 				envd_log(LOG_ERR,
1856 			"Created thread to monitor system temperatures\n");
1857 		}
1858 	}
1859 
1860 	/*
1861 	 * Create a thread to monitor fans
1862 	 */
1863 	if ((fan_monitor) && (fan_thr_created == B_FALSE)) {
1864 		if (pthread_create(&fan_thr_id, &thr_attr, fan_thr, NULL) != 0)
1865 			envd_log(LOG_ERR, ENVTHR_THREAD_CREATE_FAILED);
1866 		else {
1867 			fan_thr_created = B_TRUE;
1868 			if (env_debug) {
1869 				envd_log(LOG_ERR,
1870 				    "Created thread to monitor system fans\n");
1871 			}
1872 		}
1873 	}
1874 
1875 	/*
1876 	 * Create a thread to monitor PM state
1877 	 */
1878 	if ((pm_monitor) && (pmthr_created == B_FALSE)) {
1879 		if (pthread_create(&pmthr_tid, &thr_attr, pmthr, NULL) != 0)
1880 			envd_log(LOG_CRIT, PM_THREAD_CREATE_FAILED);
1881 		else {
1882 			pmthr_created = B_TRUE;
1883 			if (env_debug)
1884 				envd_log(LOG_ERR,
1885 			"Created thread to monitor system power state\n");
1886 		}
1887 	}
1888 
1889 	/*
1890 	 * Create a thread to monitor disk temperature
1891 	 */
1892 	if ((disk_temp_monitor) && (disk_temp_thr_created == B_FALSE)) {
1893 		if (pthread_create(&disk_temp_thr_id, &thr_attr,
1894 		    disk_temp_thr, NULL) != 0) {
1895 			envd_log(LOG_ERR, ENVTHR_THREAD_CREATE_FAILED);
1896 		} else {
1897 			disk_temp_thr_created = B_TRUE;
1898 			if (env_debug)
1899 				envd_log(LOG_ERR,
1900 			"Created thread for disk temperatures\n");
1901 		}
1902 	}
1903 
1904 	return (0);
1905 }
1906 
1907 static void
piclenvd_register(void)1908 piclenvd_register(void)
1909 {
1910 	picld_plugin_register(&my_reg_info);
1911 }
1912 
1913 static void
piclenvd_init(void)1914 piclenvd_init(void)
1915 {
1916 
1917 	(void) env_picl_setup_tuneables();
1918 
1919 	/*
1920 	 * Do not allow disk temperature monitoring to be enabled
1921 	 * via tuneables. Disk temperature monitoring is disabled
1922 	 * until we have LSI fw support to read the temperature of
1923 	 * SATA disks
1924 	 */
1925 	disk_temp_monitor = 0;
1926 
1927 	/*
1928 	 * Setup the environmental data structures
1929 	 */
1930 	if (envd_setup() != 0) {
1931 		envd_log(LOG_CRIT, ENVD_PLUGIN_INIT_FAILED);
1932 		return;
1933 	}
1934 
1935 	/*
1936 	 * Now setup/populate PICL tree
1937 	 */
1938 	env_picl_setup();
1939 }
1940 
1941 static void
piclenvd_fini(void)1942 piclenvd_fini(void)
1943 {
1944 
1945 	/*
1946 	 * Invoke env_picl_destroy() to remove any PICL nodes/properties
1947 	 * (including volatile properties) we created. Once this call
1948 	 * returns, there can't be any more calls from the PICL framework
1949 	 * to get current temperature or fan speed.
1950 	 */
1951 	env_picl_destroy();
1952 	envd_close_sensors();
1953 	envd_close_fans();
1954 }
1955 
1956 /*VARARGS2*/
1957 void
envd_log(int pri,const char * fmt,...)1958 envd_log(int pri, const char *fmt, ...)
1959 {
1960 	va_list	ap;
1961 
1962 	va_start(ap, fmt);
1963 	vsyslog(pri, fmt, ap);
1964 	va_end(ap);
1965 }
1966 
1967 /*
1968  * Tunables support functions
1969  */
1970 static env_tuneable_t *
tuneable_lookup(picl_prophdl_t proph)1971 tuneable_lookup(picl_prophdl_t proph)
1972 {
1973 	int i;
1974 	env_tuneable_t	*tuneablep = NULL;
1975 
1976 	for (i = 0; i < ntuneables; i++) {
1977 		tuneablep = &tuneables[i];
1978 		if (tuneablep->proph == proph)
1979 			return (tuneablep);
1980 	}
1981 
1982 	return (NULL);
1983 }
1984 
1985 static int
get_string_val(ptree_rarg_t * parg,void * buf)1986 get_string_val(ptree_rarg_t *parg, void *buf)
1987 {
1988 	picl_prophdl_t	proph;
1989 	env_tuneable_t	*tuneablep;
1990 
1991 	proph = parg->proph;
1992 
1993 	tuneablep = tuneable_lookup(proph);
1994 
1995 	if (tuneablep == NULL)
1996 		return (PICL_FAILURE);
1997 
1998 	(void) memcpy(buf, tuneablep->value, tuneablep->nbytes);
1999 
2000 	return (PICL_SUCCESS);
2001 }
2002 
2003 static int
set_string_val(ptree_warg_t * parg,const void * buf)2004 set_string_val(ptree_warg_t *parg, const void *buf)
2005 {
2006 	picl_prophdl_t	proph;
2007 	env_tuneable_t	*tuneablep;
2008 
2009 	if (parg->cred.dc_euid != 0)
2010 		return (PICL_PERMDENIED);
2011 
2012 	proph = parg->proph;
2013 
2014 	tuneablep = tuneable_lookup(proph);
2015 
2016 	if (tuneablep == NULL)
2017 		return (PICL_FAILURE);
2018 
2019 	(void) memcpy(tuneables->value, buf, tuneables->nbytes);
2020 
2021 
2022 	return (PICL_SUCCESS);
2023 }
2024 
2025 static int
get_int_val(ptree_rarg_t * parg,void * buf)2026 get_int_val(ptree_rarg_t *parg, void *buf)
2027 {
2028 	picl_prophdl_t	proph;
2029 	env_tuneable_t	*tuneablep;
2030 
2031 	proph = parg->proph;
2032 
2033 	tuneablep = tuneable_lookup(proph);
2034 
2035 	if (tuneablep == NULL)
2036 		return (PICL_FAILURE);
2037 
2038 	(void) memcpy(buf, tuneablep->value, tuneablep->nbytes);
2039 
2040 	return (PICL_SUCCESS);
2041 }
2042 
2043 static int
set_int_val(ptree_warg_t * parg,const void * buf)2044 set_int_val(ptree_warg_t *parg, const void *buf)
2045 {
2046 	picl_prophdl_t	proph;
2047 	env_tuneable_t	*tuneablep;
2048 
2049 	if (parg->cred.dc_euid != 0)
2050 		return (PICL_PERMDENIED);
2051 
2052 	proph = parg->proph;
2053 
2054 	tuneablep = tuneable_lookup(proph);
2055 
2056 	if (tuneablep == NULL)
2057 		return (PICL_FAILURE);
2058 
2059 	(void) memcpy(tuneablep->value, buf, tuneablep->nbytes);
2060 
2061 	return (PICL_SUCCESS);
2062 }
2063 
2064 boolean_t
has_fan_failed(env_fan_t * fanp)2065 has_fan_failed(env_fan_t *fanp)
2066 {
2067 	fanspeed_t	fan_speed;
2068 	uchar_t		status;
2069 	uint8_t		tach;
2070 	int		real_tach;
2071 	int		ret, ntries;
2072 
2073 	if (fanp->fd == -1)
2074 		return (B_TRUE);
2075 
2076 	/*
2077 	 * Read RF_FAN_STATUS bit of the fan fault register, retry if
2078 	 * the PIC is busy, with a 1 second delay to allow it to update.
2079 	 */
2080 	for (ntries = 0; ntries < MAX_RETRIES_FOR_FAN_FAULT; ntries++) {
2081 		ret = ioctl(fanp->fd, PIC_GET_FAN_STATUS, &status);
2082 		if ((ret == 0) && ((status & 0x1) == 0))
2083 			break;
2084 		(void) sleep(1);
2085 	}
2086 
2087 	if (ntries > 0) {
2088 		if (env_debug) {
2089 			envd_log(LOG_ERR,
2090 			    "%d retries attempted in reading fan status.\n",
2091 			    ntries);
2092 		}
2093 	}
2094 
2095 	if (ntries == MAX_RETRIES_FOR_FAN_FAULT) {
2096 		(void) strncpy(fan_status_string, NOT_AVAILABLE,
2097 		    sizeof (fan_status_string));
2098 		(void) strncpy(fan_rpm_string, NOT_AVAILABLE,
2099 		    sizeof (fan_rpm_string));
2100 		return (B_TRUE);
2101 	}
2102 
2103 	if (env_debug)
2104 		envd_log(LOG_ERR, "fan status = 0x%x\n", status);
2105 
2106 	/*
2107 	 * ST_FFAULT bit isn't implemented yet and we're reading only
2108 	 * individual fan status
2109 	 */
2110 	if (status & 0x1) {
2111 		(void) snprintf(fan_status_string, sizeof (fan_status_string),
2112 		    "0x%x", status);
2113 		if (ioctl(fanp->fd, PIC_GET_FAN_SPEED, &tach) != 0) {
2114 			(void) strncpy(fan_rpm_string, NOT_AVAILABLE,
2115 			    sizeof (fan_rpm_string));
2116 		} else {
2117 			real_tach = tach << 8;
2118 			fan_speed = TACH_TO_RPM(real_tach);
2119 			(void) snprintf(fan_rpm_string, sizeof (fan_rpm_string),
2120 			    "%d", fan_speed);
2121 		}
2122 		return (B_TRUE);
2123 	}
2124 
2125 	return (B_FALSE);
2126 }
2127 
2128 boolean_t
has_psufan_failed(void)2129 has_psufan_failed(void)
2130 {
2131 	uchar_t		status;
2132 	int		ret, ntries;
2133 
2134 	if (envd_sensor_psu.fd == -1)
2135 		return (B_FALSE);
2136 
2137 	/*
2138 	 * For psu, only fan fault is visible, no fan speed
2139 	 */
2140 	(void) strncpy(fan_rpm_string, NOT_AVAILABLE, sizeof (fan_rpm_string));
2141 
2142 	/*
2143 	 * Read RF_FAN_STATUS bit of the fan fault register, retry if
2144 	 * the PIC is busy, with a 1 second delay to allow it to update.
2145 	 */
2146 	for (ntries = 0; ntries < MAX_RETRIES_FOR_FAN_FAULT; ntries++) {
2147 		ret = ioctl(envd_sensor_psu.fd, PIC_GET_FAN_STATUS, &status);
2148 		if ((ret == 0) && ((status & 0x1) == 0))
2149 			break;
2150 		(void) sleep(1);
2151 	}
2152 
2153 	if (ntries > 0) {
2154 		if (env_debug) {
2155 			envd_log(LOG_ERR,
2156 			    "%d retries attempted in reading fan status.\n",
2157 			    ntries);
2158 		}
2159 	}
2160 
2161 	if (ntries == MAX_RETRIES_FOR_FAN_FAULT) {
2162 		(void) strncpy(fan_status_string, NOT_AVAILABLE,
2163 		    sizeof (fan_status_string));
2164 		return (B_TRUE);
2165 	}
2166 
2167 	if (env_debug)
2168 		envd_log(LOG_ERR, "fan status = 0x%x\n", status);
2169 
2170 	if (status & 0x1) {
2171 		(void) snprintf(fan_status_string, sizeof (fan_status_string),
2172 		    "0x%x", status);
2173 		return (B_TRUE);
2174 	}
2175 
2176 	return (B_FALSE);
2177 }
2178 
2179 static int
scsi_mode_select(env_disk_t * diskp,uchar_t page_code,uchar_t * pagebuf,uint16_t pagelen)2180 scsi_mode_select(env_disk_t *diskp, uchar_t page_code, uchar_t *pagebuf,
2181     uint16_t pagelen)
2182 {
2183 	struct uscsi_cmd		ucmd_buf;
2184 	uchar_t				cdb_buf[CDB_GROUP1];
2185 	struct scsi_extended_sense	sense_buf;
2186 	int				ret_val;
2187 
2188 	bzero(&cdb_buf, sizeof (cdb_buf));
2189 	bzero(&ucmd_buf, sizeof (ucmd_buf));
2190 	bzero(&sense_buf, sizeof (sense_buf));
2191 
2192 	cdb_buf[0] = SCMD_MODE_SELECT_G1;
2193 	cdb_buf[1] = 1<<PAGE_FMT;
2194 
2195 	cdb_buf[7] = (uchar_t)((pagelen & 0xFF00) >> 8);
2196 	cdb_buf[8] = (uchar_t)(pagelen  & 0x00FF);
2197 
2198 	ucmd_buf.uscsi_cdb = (char *)cdb_buf;
2199 	ucmd_buf.uscsi_cdblen = sizeof (cdb_buf);
2200 	ucmd_buf.uscsi_bufaddr = (caddr_t)pagebuf;
2201 	ucmd_buf.uscsi_buflen = pagelen;
2202 	ucmd_buf.uscsi_rqbuf = (caddr_t)&sense_buf;
2203 	ucmd_buf.uscsi_rqlen = sizeof (struct scsi_extended_sense);
2204 	ucmd_buf.uscsi_flags = USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
2205 	ucmd_buf.uscsi_timeout = DEFAULT_SCSI_TIMEOUT;
2206 
2207 	ret_val = ioctl(diskp->fd, USCSICMD, ucmd_buf);
2208 
2209 	if (ret_val == 0 && ucmd_buf.uscsi_status == 0) {
2210 		return (ret_val);
2211 	}
2212 	if (env_debug)
2213 		envd_log(LOG_ERR, "mode select command for %s failed. "
2214 		    "page_code 0x%x ret_val = 0x%x "
2215 		    "status = 0x%x errno = 0x%x\n", diskp->name, page_code,
2216 		    ret_val, ucmd_buf.uscsi_status, errno);
2217 
2218 	return (1);
2219 
2220 }
2221