xref: /titanic_41/usr/src/cmd/picl/plugins/sun4u/excalibur/envd/piclenvd.c (revision 360e6f5e7a29d5950aa1985f56811731715da7e5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * This file contains the environmental PICL plug-in module.
31  */
32 
33 
34 /*
35  * Excalibur system contains up to two CPU and two PCI MAX1617 temperature
36  * devices, each consisting of two sensors: die and ambient. Each sensor is
37  * represented as a different minor device and the current temperature is read
38  * via an I2C_GET_TEMPERATURE ioctl call to the max1617 driver. Additionally,
39  * the MAX1617 device supports both a low and high temperature limit, which
40  * can trigger an alert condition, causing power supply to turn off.
41  *
42  * The environmental monitor defines the following thresholds per sensor:
43  *
44  *	high_power_off		high hard shutdown
45  *	high_shutdown		high soft shutdown limit
46  *	high_warning		high warning limit
47  *	low_warning		low warning limit
48  *	low_shutdown		low soft shutdown limit
49  *	low_power_off		low hard shutdown limit
50  *
51  * Above mentioned threshold values can be changed via "piclenvd.conf"
52  * configuration file.
53  *
54  * Environmental monitoring is done by the "envthr" thread. It periodically
55  * monitors both CPU die and CPU ambient temperatures and takes appropriate
56  * action depending upon the current temperature and threshold values for
57  * that sensor. If the temperature reaches the high_shutdown limit or the
58  * low_shutdown limit, and remains there for over shutdown_interval seconds,
59  * it forces a graceful system shutdown via tuneable shutdown_cmd string
60  * variable. Otherwise, if the temperature reaches the high_warning limit
61  * or the low_warning limit, it logs and prints a message on the console.
62  * This message will be printed at most at "warning_interval" seconds
63  * interval, which is also a tuneable variable.
64  *
65  * Excalibur system contains three fans: cpu, system and power supply. The
66  * cpu and system fans are under software control and their speed can be
67  * set to a value in the range 0 through 63. However, the software has no
68  * control over the power supply fan's speed (it's automatically controlled
69  * by the hardware), but it can turn it ON or OFF. When in EStar mode (i.e.
70  * the lowest power state), the environmental monitor turns off the power
71  * supply fan.
72  *
73  * Each fan is represented as a different minor device and the fan speed
74  * can be controlled by writing to the TDA8444 device driver. Note that
75  * these devices are read only and the driver caches the last speed set
76  * for each fan, thus allowing an interface to read the current fan speed
77  * also.
78  *
79  * The policy to control fan speed depends upon the sensor. For CPU die
80  * sensor, different policy is used depending upon whether the temperature
81  * is rising, falling or steady state. In case of CPU ambient sensor, only
82  * one policy (speed proportional to the current temperature) is used.
83  *
84  * The power state monitoring is done by the "pmthr" thread. It uses the
85  * PM_GET_STATE_CHANGE and PM_GET_STATE_CHANGE_WAIT ioctl commands to pick
86  * up any power state change events. It processes all queued power state
87  * change events and determines the curret lowest power state and saves it
88  * in cur_lpstate variable.
89  *
90  * Once the "envthr" and "pmthr" threads have been started, they are never
91  * killed. This is desirable so that we can do environmental monitoring
92  * during reinit process.  The "envd_rwlock" reader/writer lock is used
93  * to protect initialization of global state during reinit process against
94  * the "envthr" and "pmthr" trying to reference that state.
95  */
96 
97 #include <stdio.h>
98 #include <stdlib.h>
99 #include <sys/sysmacros.h>
100 #include <limits.h>
101 #include <string.h>
102 #include <stdarg.h>
103 #include <alloca.h>
104 #include <unistd.h>
105 #include <sys/processor.h>
106 #include <syslog.h>
107 #include <errno.h>
108 #include <fcntl.h>
109 #include <picl.h>
110 #include <picltree.h>
111 #include <picldefs.h>
112 #include <pthread.h>
113 #include <signal.h>
114 #include <libdevinfo.h>
115 #include <sys/pm.h>
116 #include <sys/open.h>
117 #include <sys/time.h>
118 #include <sys/utsname.h>
119 #include <sys/systeminfo.h>
120 #include <sys/i2c/clients/max1617.h>
121 #include <sys/i2c/clients/i2c_client.h>
122 #include <sys/xcalwd.h>
123 #include "envd.h"
124 
125 static pthread_rwlock_t	envd_rwlock = PTHREAD_RWLOCK_INITIALIZER;
126 
127 /*
128  * PICL plguin
129  */
130 static void piclenvd_register(void);
131 static void piclenvd_init(void);
132 static void piclenvd_fini(void);
133 extern void env_picl_setup(void);
134 extern void env_picl_destroy(void);
135 
136 #pragma	init(piclenvd_register)
137 
138 static picld_plugin_reg_t my_reg_info = {
139 	PICLD_PLUGIN_VERSION_1,
140 	PICLD_PLUGIN_CRITICAL,
141 	"SUNW_piclenvd",
142 	piclenvd_init,
143 	piclenvd_fini,
144 };
145 
146 
147 /*
148  * Default threshold values for CPU junction/die and ambient sensors
149  */
150 static sensor_thresh_t cpu_die_thresh_default = {
151 	CPU_DIE_LOW_POWER_OFF, CPU_DIE_HIGH_POWER_OFF,
152 	CPU_DIE_LOW_SHUTDOWN, CPU_DIE_HIGH_SHUTDOWN,
153 	CPU_DIE_LOW_WARNING, CPU_DIE_HIGH_WARNING,
154 	MAX1617_MIN_TEMP, MAX1617_MAX_TEMP,
155 	POLICY_TARGET_TEMP, 2,
156 	CPU_DIE_NORMAL_TARGET, CPU_DIE_OTHER_TARGET,
157 	0, 0, 0, 0
158 };
159 
160 static sensor_thresh_t cpu_amb_thresh_default = {
161 	CPU_AMB_LOW_POWER_OFF, CPU_AMB_HIGH_POWER_OFF,
162 	CPU_AMB_LOW_SHUTDOWN, CPU_AMB_HIGH_SHUTDOWN,
163 	CPU_AMB_LOW_WARNING, CPU_AMB_HIGH_WARNING,
164 	MAX1617_MIN_TEMP, MAX1617_MAX_TEMP,
165 	POLICY_LINEAR, 2,
166 	CPU_AMB_LOW_NOMINAL, CPU_AMB_HIGH_NOMINAL,
167 	0, 0, 0, 0
168 };
169 
170 
171 /*
172  * Dummy sensor threshold data structure for processing threshold tuneables
173  */
174 static sensor_thresh_t	dummy_thresh;
175 
176 /*
177  * Temperature related constants for fan speed adjustment
178  */
179 #define	AVG_TEMP_HYSTERESIS	0.25
180 #define	RISING_TEMP_MARGIN	6
181 #define	FALLING_TEMP_MARGIN	3
182 
183 /*
184  * tuneable variables
185  */
186 #define	FAN_SLOW_ADJUSTMENT	20		/* in percentage */
187 #define	FAN_INCREMENT_LIMIT	6		/* absolute value */
188 #define	FAN_DECREMENT_LIMIT	1		/* absolute value */
189 #define	DEVFSADM_CMD 		"/usr/sbin/devfsadm -i max1617"
190 #define	FRU_DEVFSADM_CMD 	"/usr/sbin/devfsadm -i seeprom"
191 
192 int		env_debug;
193 static int	sensor_poll_interval;
194 static int	warning_interval;
195 static int	warning_duration;
196 static int	shutdown_interval;
197 static int	fan_slow_adjustment;
198 static int	fan_incr_limit;
199 static int	fan_decr_limit;
200 static int	disable_piclenvd;
201 static int	disable_warning;
202 static int	disable_power_off;
203 static int	disable_shutdown;
204 
205 static char	shutdown_cmd[128];
206 static char	devfsadm_cmd[128];
207 static char	fru_devfsadm_cmd[128];
208 static sensor_thresh_t cpu0_die_thresh, cpu0_amb_thresh;
209 static sensor_thresh_t cpu1_die_thresh, cpu1_amb_thresh;
210 
211 /*
212  * Temperature sensors
213  */
214 
215 static env_sensor_t envd_sensors[] = {
216 	{ SENSOR_CPU0_DIE, CPU0_DIE_SENSOR_DEVFS, &cpu0_die_thresh,
217 	    CPU0_FRU_DEVFS, CPU_FRU_DIE_SENSOR,
218 	    SFLAG_TARGET_TEMP | SFLAG_CPU_DIE_SENSOR, -1},
219 	{ SENSOR_CPU0_AMB, CPU0_AMB_SENSOR_DEVFS, &cpu0_amb_thresh,
220 	    CPU0_FRU_DEVFS, CPU_FRU_AMB_SENSOR, SFLAG_CPU_AMB_SENSOR, -1},
221 	{ SENSOR_CPU1_DIE, CPU1_DIE_SENSOR_DEVFS, &cpu1_die_thresh,
222 	    CPU1_FRU_DEVFS, CPU_FRU_DIE_SENSOR,
223 	    SFLAG_TARGET_TEMP | SFLAG_CPU_DIE_SENSOR, -1},
224 	{ SENSOR_CPU1_AMB, CPU1_AMB_SENSOR_DEVFS, &cpu1_amb_thresh,
225 	    CPU1_FRU_DEVFS, CPU_FRU_AMB_SENSOR, SFLAG_CPU_AMB_SENSOR, -1},
226 	{ NULL, NULL, NULL, NULL, 0, 0, -1}
227 };
228 
229 
230 /*
231  * Fan devices
232  */
233 static env_fan_t envd_system_fan = {
234 	ENV_SYSTEM_FAN, ENV_SYSTEM_FAN_DEVFS,
235 	SYSTEM_FAN_SPEED_MIN, SYSTEM_FAN_SPEED_MAX, -1, -1,
236 };
237 
238 static env_fan_t envd_cpu_fan = {
239 	ENV_CPU_FAN, ENV_CPU_FAN_DEVFS,
240 	CPU_FAN_SPEED_MIN, CPU_FAN_SPEED_MAX, -1, -1,
241 };
242 
243 static env_fan_t envd_psupply_fan = {
244 	ENV_PSUPPLY_FAN, ENV_PSUPPLY_FAN_DEVFS,
245 	PSUPPLY_FAN_SPEED_MIN, PSUPPLY_FAN_SPEED_MAX, -1, -1,
246 };
247 
248 static env_fan_t *envd_fans[] = {
249 	&envd_system_fan,
250 	&envd_cpu_fan,
251 	&envd_psupply_fan,
252 	NULL
253 };
254 
255 /*
256  * Linked list of devices advertising lpm-ranges
257  */
258 static lpm_dev_t	*lpm_devices = NULL;
259 
260 /*
261  * Excalibur lpm to system-fan speed
262  * lpm values must be monotonically increasing (avoid divide-by-zero)
263  */
264 static point_t	excal_lpm_system_fan_tbl[] = {
265 	/* {lpm, fspeed} */
266 	{18, 12},
267 	{25, 20},
268 	{33, 26},
269 	{44, 32},
270 	{51, 39},
271 	{63, 52},
272 	{64, 63}
273 };
274 
275 static table_t	lpm_fspeed = {
276 	sizeof (excal_lpm_system_fan_tbl)/ sizeof (point_t),
277 	excal_lpm_system_fan_tbl
278 };
279 
280 /*
281  * Sensor to fan map
282  */
283 typedef struct {
284 	char	*sensor_name;
285 	char	*fan_name;
286 } sensor_fan_map_t;
287 
288 static sensor_fan_map_t sensor_fan_map[] = {
289 	{SENSOR_CPU0_DIE, ENV_CPU_FAN},
290 	{SENSOR_CPU1_DIE, ENV_CPU_FAN},
291 	{SENSOR_CPU0_AMB, ENV_SYSTEM_FAN},
292 	{SENSOR_CPU1_AMB, ENV_SYSTEM_FAN},
293 	{NULL, NULL}
294 };
295 
296 /*
297  * Sensor to PM device map
298  */
299 struct sensor_pmdev {
300 	int		sensor_id;
301 	char		*sensor_name;
302 	char		*pmdev_name;
303 	char		*speed_comp_name;
304 	int		speed_comp;
305 	int		full_power;
306 	int		cur_power;
307 	env_sensor_t	*sensorp;
308 	sensor_pmdev_t	*next;
309 };
310 
311 #define	SPEED_COMPONENT_NAME	"CPU Speed"
312 
313 static sensor_pmdev_t sensor_pmdevs[] = {
314 	{SENSOR_CPU0_ID, SENSOR_CPU0_DIE, NULL, SPEED_COMPONENT_NAME},
315 	{SENSOR_CPU1_ID, SENSOR_CPU1_DIE, NULL, SPEED_COMPONENT_NAME},
316 	{-1, NULL, NULL, NULL}
317 };
318 
319 /*
320  * Environmental thread variables
321  */
322 static boolean_t	system_shutdown_started = B_FALSE;
323 static boolean_t	envthr_created = B_FALSE;	/* envthr created */
324 static pthread_t	envthr_tid;		/* envthr thread ID */
325 static pthread_attr_t	thr_attr;
326 
327 /*
328  * Power management thread (pmthr) variables
329  */
330 static boolean_t	pmdev_names_init = B_FALSE;
331 static pthread_t	pmthr_tid;		/* pmthr thread ID */
332 static int		pmthr_exists = B_FALSE;	/* pmthr exists */
333 static int		pm_fd = -1;		/* PM device file descriptor */
334 static int		cur_lpstate;		/* cur low power state */
335 
336 /*
337  * Miscellaneous variables and declarations
338  */
339 static int	fru_devfsadm_invoked = 0;
340 static int	devfsadm_invoked = 0;
341 static char	tokdel[] = " \t\n\r";
342 static uint_t	envd_sleep(uint_t);
343 
344 /*
345  * Tuneable data structure/array and processing functions
346  */
347 
348 typedef struct {
349 	char		*name;		/* keyword */
350 	int		(*func)(char *, char *, void *, int, char *, int);
351 					/* tuneable processing function */
352 	void		*arg1;		/* tuneable arg1 (memory address) */
353 	int		arg2;		/* tuneable arg2 (size or flags) */
354 } env_tuneable_t;
355 
356 static int process_int_tuneable(char *keyword, char *buf, void *addr,
357     int size, char *fname, int line);
358 static int process_string_tuneable(char *keyword, char *buf, void *addr,
359     int size, char *fname, int line);
360 static int process_threshold_tuneable(char *keyword, char *buf, void *addr,
361     int flags, char *fname, int line);
362 static void process_env_conf_file(void);
363 
364 static env_tuneable_t env_tuneables[] = {
365 	{"low_power_off", process_threshold_tuneable,
366 	    &dummy_thresh.low_power_off, 0},
367 	{"low_shutdown", process_threshold_tuneable,
368 	    &dummy_thresh.low_shutdown, 0},
369 	{"low_warning", process_threshold_tuneable,
370 	    &dummy_thresh.low_warning, 0},
371 	{"high_power_off", process_threshold_tuneable,
372 	    &dummy_thresh.high_power_off, 0},
373 	{"high_shutdown", process_threshold_tuneable,
374 	    &dummy_thresh.high_shutdown, 0},
375 	{"high_warning", process_threshold_tuneable,
376 	    &dummy_thresh.high_warning, 0},
377 	{"force_cpu_fan", process_int_tuneable, &envd_cpu_fan.forced_speed,
378 	    sizeof (envd_cpu_fan.forced_speed)},
379 	{"force_system_fan", process_int_tuneable,
380 	    &envd_system_fan.forced_speed,
381 	    sizeof (envd_system_fan.forced_speed)},
382 
383 	{"cpu_amb_low_power_off", process_threshold_tuneable,
384 	    &dummy_thresh.low_power_off, SFLAG_CPU_AMB_SENSOR},
385 	{"cpu_amb_low_shutdown", process_threshold_tuneable,
386 	    &dummy_thresh.low_shutdown, SFLAG_CPU_AMB_SENSOR},
387 	{"cpu_amb_low_warning", process_threshold_tuneable,
388 	    &dummy_thresh.low_warning, SFLAG_CPU_AMB_SENSOR},
389 	{"cpu_amb_low_nominal", process_threshold_tuneable,
390 	    &dummy_thresh.policy_data[LOW_NOMINAL_LOC], SFLAG_CPU_AMB_SENSOR},
391 	{"cpu_amb_high_power_off", process_threshold_tuneable,
392 	    &dummy_thresh.high_power_off, SFLAG_CPU_AMB_SENSOR},
393 	{"cpu_amb_high_shutdown", process_threshold_tuneable,
394 	    &dummy_thresh.high_shutdown, SFLAG_CPU_AMB_SENSOR},
395 	{"cpu_amb_high_warning", process_threshold_tuneable,
396 	    &dummy_thresh.high_warning, SFLAG_CPU_AMB_SENSOR},
397 	{"cpu_amb_high_nominal", process_threshold_tuneable,
398 	    &dummy_thresh.policy_data[HIGH_NOMINAL_LOC], SFLAG_CPU_AMB_SENSOR},
399 
400 	{"cpu_die_low_power_off", process_threshold_tuneable,
401 	    &dummy_thresh.low_power_off, SFLAG_CPU_DIE_SENSOR},
402 	{"cpu_die_low_shutdown", process_threshold_tuneable,
403 	    &dummy_thresh.low_shutdown, SFLAG_CPU_DIE_SENSOR},
404 	{"cpu_die_low_warning", process_threshold_tuneable,
405 	    &dummy_thresh.low_warning, SFLAG_CPU_DIE_SENSOR},
406 	{"cpu_die_normal_target", process_threshold_tuneable,
407 	    &dummy_thresh.policy_data[0], SFLAG_CPU_DIE_SENSOR},
408 	{"cpu_die_high_power_off", process_threshold_tuneable,
409 	    &dummy_thresh.high_power_off, SFLAG_CPU_DIE_SENSOR},
410 	{"cpu_die_high_shutdown", process_threshold_tuneable,
411 	    &dummy_thresh.high_shutdown, SFLAG_CPU_DIE_SENSOR},
412 	{"cpu_die_high_warning", process_threshold_tuneable,
413 	    &dummy_thresh.high_warning, SFLAG_CPU_DIE_SENSOR},
414 	{"cpu_die_other_target", process_threshold_tuneable,
415 	    &dummy_thresh.policy_data[1], SFLAG_CPU_DIE_SENSOR},
416 
417 	{"sensor_poll_interval", process_int_tuneable, &sensor_poll_interval,
418 	    sizeof (sensor_poll_interval)},
419 	{"warning_interval", process_int_tuneable, &warning_interval,
420 	    sizeof (warning_interval)},
421 	{"warning_duration", process_int_tuneable, &warning_duration,
422 	    sizeof (warning_duration)},
423 	{"disable_piclenvd", process_int_tuneable, &disable_piclenvd,
424 	    sizeof (disable_piclenvd)},
425 	{"disable_power_off", process_int_tuneable, &disable_power_off,
426 	    sizeof (disable_power_off)},
427 	{"disable_warning", process_int_tuneable, &disable_warning,
428 	    sizeof (disable_warning)},
429 	{"disable_shutdown", process_int_tuneable, &disable_shutdown,
430 	    sizeof (disable_shutdown)},
431 	{"shutdown_interval", process_int_tuneable, &shutdown_interval,
432 	    sizeof (shutdown_interval)},
433 	{"shutdown_cmd", process_string_tuneable, &shutdown_cmd[0],
434 	    sizeof (shutdown_cmd)},
435 	{"devfsadm_cmd", process_string_tuneable, &devfsadm_cmd[0],
436 	    sizeof (devfsadm_cmd)},
437 	{"fru_devfsadm_cmd", process_string_tuneable, &fru_devfsadm_cmd[0],
438 	    sizeof (fru_devfsadm_cmd)},
439 	{"fan_slow_adjustment", process_int_tuneable, &fan_slow_adjustment,
440 	    sizeof (fan_slow_adjustment)},
441 	{"fan_incr_limit", process_int_tuneable, &fan_incr_limit,
442 	    sizeof (fan_incr_limit)},
443 	{"fan_decr_limit", process_int_tuneable, &fan_decr_limit,
444 	    sizeof (fan_decr_limit)},
445 	{"env_debug", process_int_tuneable, &env_debug, sizeof (env_debug)},
446 	{ NULL, NULL, NULL, 0}
447 };
448 
449 static void
fini_table(table_t * tblp)450 fini_table(table_t *tblp)
451 {
452 	if (tblp == NULL)
453 		return;
454 	free(tblp->xymap);
455 	free(tblp);
456 }
457 
458 static table_t *
init_table(int npoints)459 init_table(int npoints)
460 {
461 	table_t		*tblp;
462 	point_t		*xy;
463 
464 	if (npoints == 0)
465 		return (NULL);
466 
467 	if ((tblp = malloc(sizeof (*tblp))) == NULL)
468 		return (NULL);
469 
470 	if ((xy = malloc(sizeof (*xy) * npoints)) == NULL) {
471 		free(tblp);
472 		return (NULL);
473 	}
474 
475 	tblp->nentries = npoints;
476 	tblp->xymap = xy;
477 
478 	return (tblp);
479 }
480 
481 /*
482  * Temp-LPM Table format:
483  * temp, lpm, temp, lpm, ...
484  */
485 static table_t *
parse_lpm_ranges(uint32_t * bufp,size_t nbytes)486 parse_lpm_ranges(uint32_t *bufp, size_t nbytes)
487 {
488 	int	nentries;
489 	table_t	*tblp = NULL;
490 	int	i;
491 
492 	if (bufp == NULL)
493 		return (NULL);
494 
495 	/*
496 	 * Table should have at least 2 points
497 	 * and all points should have x and y values
498 	 */
499 	if ((nbytes < (2 * sizeof (point_t))) ||
500 	    (nbytes & (sizeof (point_t) - 1))) {
501 		if (env_debug)
502 			envd_log(LOG_ERR, ENV_INVALID_PROPERTY_FORMAT,
503 			    LPM_RANGES_PROPERTY);
504 		return (NULL);
505 	}
506 
507 	/* number of entries in the temp-lpm table */
508 	nentries = nbytes/sizeof (point_t);
509 
510 	tblp = init_table(nentries);
511 	if (tblp == NULL)
512 		return (tblp);
513 
514 	/* copy the tuples */
515 	tblp->xymap[0].x = (int)*bufp++;
516 	tblp->xymap[0].y = (int)*bufp++;
517 	for (i = 1; i < nentries; ++i) {
518 		tblp->xymap[i].x = (int)*bufp++;
519 		tblp->xymap[i].y = (int)*bufp++;
520 		if (tblp->xymap[i].x <= tblp->xymap[i - 1].x) {
521 			fini_table(tblp);
522 			if (env_debug)
523 				envd_log(LOG_ERR, ENV_INVALID_PROPERTY_FORMAT,
524 				    LPM_RANGES_PROPERTY);
525 			return (NULL);
526 		}
527 	}
528 
529 	return (tblp);
530 }
531 
532 /*
533  * function: calculates y for a given x based on a table of points
534  * for monotonically increasing x values.
535  * 'tbl' specifies the table to use, 'val' specifies the 'x', returns 'y'
536  */
537 static int
y_of_x(table_t * tbl,int xval)538 y_of_x(table_t *tbl, int xval)
539 {
540 	int		i;
541 	int		entries;
542 	point_t		*xymap;
543 	float		newval;
544 	float		dy, dx, slope;
545 
546 	entries = tbl->nentries;
547 	xymap = tbl->xymap;
548 	if (xval <= xymap[0].x)
549 		return (xymap[0].y);
550 	else if (xval >= xymap[entries - 1].x)
551 		return (xymap[entries - 1].y);
552 
553 	for (i = 1; i < entries - 1; i++) {
554 		if (xval == xymap[i].x)
555 			return (xymap[i].y);
556 		if (xval < xymap[i].x)
557 			break;
558 	}
559 
560 	/*
561 	 * Use linear interpolation
562 	 */
563 	dy = (float)(xymap[i].y - xymap[i-1].y);
564 	dx = (float)(xymap[i].x - xymap[i-1].x);
565 	slope = dy/dx;
566 	newval = xymap[i - 1].y + slope * (xval - xymap[i - 1].x);
567 	return ((int)(newval + (newval >= 0 ? 0.5 : -0.5)));
568 }
569 
570 static int
get_lpm_speed(lpm_dev_t * lpmdevs,int temp)571 get_lpm_speed(lpm_dev_t *lpmdevs, int temp)
572 {
573 	lpm_dev_t	*devp;
574 	int		lpm;
575 	int		speed;
576 	int		maxspeed;
577 
578 	if (lpmdevs == NULL)
579 		return (0);
580 	maxspeed = 0;
581 	for (devp = lpmdevs; devp != NULL; devp = devp->next) {
582 		if (devp->temp_lpm_tbl == NULL)
583 			continue;
584 		lpm = y_of_x(devp->temp_lpm_tbl, temp);
585 		if (env_debug)
586 			envd_log(LOG_INFO, "ambient %d lpm %d\n", temp, lpm);
587 		speed = y_of_x(&lpm_fspeed, lpm);
588 		maxspeed = maxspeed > speed ? maxspeed : speed;
589 		if (env_debug)
590 			envd_log(LOG_INFO, "lpm %d fanspeed %d\n", lpm, speed);
591 	}
592 	return (maxspeed);
593 }
594 
595 /*
596  * Callback function used by ptree_walk_tree_by_class
597  */
598 static int
cb_lpm(picl_nodehdl_t nodeh,void * args)599 cb_lpm(picl_nodehdl_t nodeh, void *args)
600 {
601 	lpm_dev_t	**retp = (lpm_dev_t **)args;
602 	int		err;
603 	ptree_propinfo_t	pinfo;
604 	picl_prophdl_t		proph;
605 	size_t			psize;
606 	void			*bufp;
607 	table_t			*temp_lpm_tbl;
608 	lpm_dev_t		*newdev;
609 
610 	err = ptree_get_prop_by_name(nodeh, LPM_RANGES_PROPERTY, &proph);
611 	if (err != PICL_SUCCESS)
612 		return (PICL_WALK_CONTINUE);
613 
614 	err = ptree_get_propinfo(proph, &pinfo);
615 	if ((err != PICL_SUCCESS) ||
616 	    (pinfo.piclinfo.type != PICL_PTYPE_BYTEARRAY))
617 		return (PICL_WALK_CONTINUE);
618 	psize = pinfo.piclinfo.size;
619 	bufp = alloca(psize);
620 
621 	err = ptree_get_propval(proph, bufp, psize);
622 	if (err != PICL_SUCCESS)
623 		return (PICL_WALK_CONTINUE);
624 
625 	temp_lpm_tbl = parse_lpm_ranges(bufp, psize);
626 	if (temp_lpm_tbl == NULL) {
627 		return (PICL_WALK_CONTINUE);
628 	}
629 
630 	newdev = malloc(sizeof (*newdev));
631 	if (newdev == NULL) {
632 		fini_table(temp_lpm_tbl);
633 		return (PICL_WALK_TERMINATE);
634 	}
635 
636 	memset(newdev, 0, sizeof (*newdev));
637 
638 	newdev->nodeh = nodeh;
639 	newdev->temp_lpm_tbl = temp_lpm_tbl;
640 
641 	/* add newdev to the list */
642 	newdev->next = *retp;
643 	*retp = newdev;
644 
645 	return (PICL_WALK_CONTINUE);
646 }
647 
648 /*
649  * Find all devices advertising "lpm-ranges" property, parse and store
650  * the lpm tables for each device
651  */
652 static int
setup_lpm_devices(lpm_dev_t ** devpp)653 setup_lpm_devices(lpm_dev_t **devpp)
654 {
655 	picl_nodehdl_t	plath;
656 	int		err;
657 	lpm_dev_t	*lpmp;
658 
659 	err = ptree_get_node_by_path("/platform", &plath);
660 	if (err != PICL_SUCCESS)
661 		return (err);
662 
663 	lpmp = NULL;
664 	err = ptree_walk_tree_by_class(plath, NULL, (void *)&lpmp, cb_lpm);
665 	if (err == PICL_SUCCESS)
666 		*devpp = lpmp;
667 	return (err);
668 }
669 
670 /*
671  * Remove all lpm_devices and their tables.
672  */
673 static void
delete_lpm_devices(void)674 delete_lpm_devices(void)
675 {
676 	lpm_dev_t	*devp, *next;
677 
678 	(void) pthread_rwlock_wrlock(&envd_rwlock);
679 
680 	if (lpm_devices == NULL) {
681 		(void) pthread_rwlock_unlock(&envd_rwlock);
682 		return;
683 	}
684 
685 	devp = lpm_devices;
686 
687 	while (devp != NULL) {
688 		fini_table(devp->temp_lpm_tbl);
689 		next = devp->next;
690 		free(devp);
691 		devp = next;
692 	}
693 
694 	lpm_devices = NULL;
695 
696 	(void) pthread_rwlock_unlock(&envd_rwlock);
697 }
698 
699 /*
700  * Translate observed (measured) temperature into expected (correct)
701  * temperature
702  */
703 static int
xlate_obs2exp(env_sensor_t * sensorp,tempr_t temp)704 xlate_obs2exp(env_sensor_t *sensorp, tempr_t temp)
705 {
706 	int		i, entries, new_temp, denominator;
707 	tempr_map_t	*map;
708 	float		ftemp;
709 
710 	entries = sensorp->obs2exp_cnt;
711 	map = sensorp->obs2exp_map;
712 	if (entries < 2 || map == NULL)  {
713 		/* no map or can't map it */
714 		new_temp = temp;
715 	} else {
716 		/*
717 		 * Any point beyond the range specified by the map is
718 		 * extrapolated using either the first two or the last
719 		 * two entries in the map.
720 		 */
721 		for (i = 1; i < entries-1; i++)
722 			if (temp < map[i].observed)
723 				break;
724 		/*
725 		 * Interpolate/extrapolate the temperature using linear
726 		 * equation with map[i-1] and map[i] being the two ends
727 		 * of the line segment.
728 		 */
729 		denominator = map[i].observed - map[i-1].observed;
730 		if (denominator == 0) {
731 			/*
732 			 * Infinite slope. Since the temperature reading
733 			 * resolution is 1C, force denominator to 1 to
734 			 * avoid divide by zero.
735 			 */
736 			denominator = 1;
737 		}
738 		ftemp = map[i-1].expected +  (temp - map[i-1].observed) *
739 		    (float)(map[i].expected - map[i-1].expected)/denominator;
740 		new_temp = (int)(ftemp + (ftemp >= 0 ? 0.5 : -0.5));
741 	}
742 
743 	return (new_temp);
744 }
745 
746 
747 /*
748  * Translate expected (correct) temperature into observed (measured)
749  * temperature
750  */
751 static int
xlate_exp2obs(env_sensor_t * sensorp,tempr_t temp)752 xlate_exp2obs(env_sensor_t *sensorp, tempr_t temp)
753 {
754 	int		i, entries, new_temp, denominator;
755 	tempr_map_t	*map;
756 	float		ftemp;
757 	sensor_thresh_t	*threshp = sensorp->temp_thresh;
758 
759 	entries = sensorp->obs2exp_cnt;
760 	map = sensorp->obs2exp_map;
761 	if (entries < 2 || map == NULL)
762 		/* no map or can't map it */
763 		new_temp = temp;
764 	else {
765 		/*
766 		 * Any point beyond the range specified by the map is
767 		 * extrapolated using either the first two or the last
768 		 * two entries in the map.
769 		 */
770 		for (i = 1; i < entries-1; i++)
771 			if (temp < map[i].expected)
772 				break;
773 
774 		/*
775 		 * Interpolate/extrapolate the temperature using linear
776 		 * equation with map[i-1] and map[i] being the two ends
777 		 * of the line segment.
778 		 */
779 		denominator = map[i].expected - map[i-1].expected;
780 		if (denominator == 0) {
781 			/*
782 			 * Infinite slope. Since the temperature reading
783 			 * resolution is 1C, force denominator to 1 to
784 			 * avoid divide by zero.
785 			 */
786 			denominator = 1;
787 		}
788 		ftemp = map[i-1].observed + (temp - map[i-1].expected) *
789 		    (float)(map[i].observed - map[i-1].observed)/denominator;
790 		new_temp = (int)(ftemp + (ftemp >= 0 ? 0.5 : -0.5));
791 	}
792 
793 	if (threshp) {
794 		if (new_temp > threshp->max_limit)
795 			new_temp = threshp->max_limit;
796 		else if (new_temp < threshp->min_limit)
797 			new_temp = threshp->min_limit;
798 	}
799 
800 	return (new_temp);
801 }
802 
803 
804 /*
805  * Check if the specified FRU is present.
806  * Returns 1 if present; 0 otherwise.
807  */
808 static int
fru_present(char * path)809 fru_present(char *path)
810 {
811 	char		*p, physpath[PATH_MAX];
812 	di_node_t	root_node;
813 	int		fru_present = 0;
814 
815 	/*
816 	 * Construct FRU device path by stripping minor
817 	 * node name from the path and use di_init() to
818 	 * see if the node exists.
819 	 */
820 	(void) strlcpy(physpath, path, sizeof (physpath));
821 	p = strrchr(physpath, ':');
822 	if (p != NULL)
823 		*p = '\0';
824 	if ((root_node = di_init(physpath, DINFOMINOR)) != DI_NODE_NIL) {
825 		di_fini(root_node);
826 		fru_present = 1;
827 	}
828 	return (fru_present);
829 }
830 
831 
832 /*
833  * Get environmental segment from the specified FRU SEEPROM
834  */
835 static int
get_envseg(int fd,void ** envsegp,int * envseglenp)836 get_envseg(int fd, void **envsegp, int *envseglenp)
837 {
838 	int			i, segcnt, envseglen;
839 	section_layout_t	section;
840 	segment_layout_t	segment;
841 	uint8_t			*envseg;
842 
843 	if (lseek(fd, (long)SECTION_HDR_OFFSET, 0) == -1L ||
844 	    read(fd, &section, sizeof (section)) != sizeof (section)) {
845 		return (EINVAL);
846 	}
847 
848 	/*
849 	 * Verify we have the correct section and contents are valid
850 	 * For now, we don't verify the CRC.
851 	 */
852 	if (section.header_tag != SECTION_HDR_TAG ||
853 	    GET_UNALIGN16(&section.header_version[0]) != SECTION_HDR_VER) {
854 		if (env_debug)
855 			envd_log(LOG_INFO,
856 			    "Invalid section header tag:%x  version:%x\n",
857 			    section.header_tag,
858 			    GET_UNALIGN16(&section.header_version));
859 		return (EINVAL);
860 	}
861 
862 	/*
863 	 * Locate our environmental segment
864 	 */
865 	segcnt = section.segment_count;
866 	for (i = 0; i < segcnt; i++) {
867 		if (read(fd, &segment, sizeof (segment)) != sizeof (segment)) {
868 			return (errno);
869 		}
870 		if (env_debug > 1)
871 			envd_log(LOG_INFO,
872 			    "Seg name: %x  desc:%x off:%x  len:%x\n",
873 			    GET_UNALIGN16(&segment.name),
874 			    GET_UNALIGN32(&segment.descriptor[0]),
875 			    GET_UNALIGN16(&segment.offset),
876 			    GET_UNALIGN16(&segment.length));
877 
878 		if (GET_UNALIGN16(&segment.name) == ENVSEG_NAME)
879 			break;
880 	}
881 
882 	if (i >= segcnt) {
883 		return (ENOENT);
884 	}
885 
886 	/*
887 	 * Allocate memory to hold the environmental segment data.
888 	 */
889 	envseglen = GET_UNALIGN16(&segment.length);
890 	if ((envseg = malloc(envseglen)) == NULL) {
891 		return (ENOMEM);
892 	}
893 
894 	if (lseek(fd, (long)GET_UNALIGN16(&segment.offset), 0) == -1L ||
895 	    read(fd, envseg, envseglen) != envseglen) {
896 		(void) free(envseg);
897 		return (EIO);
898 	}
899 
900 	*envsegp = envseg;
901 	*envseglenp = envseglen;
902 
903 	if (env_debug > 1) {
904 		char	msgbuf[256];
905 		for (i = 0; i < envseglen; i++) {
906 			(void) sprintf(&msgbuf[3*(i&0xf)], "%2x ", envseg[i]);
907 			if ((i & 0xf) == 0xf || i == (envseglen-1))
908 				envd_log(LOG_INFO, "envseg[%2x]: %s\n",
909 				    (i & ~0xf), msgbuf);
910 		}
911 	}
912 
913 	return (0);
914 }
915 
916 
917 /*
918  * Get all environmental segments
919  */
920 static fruenvseg_t *
get_fru_envsegs(void)921 get_fru_envsegs(void)
922 {
923 	env_sensor_t		*sensorp;
924 	fruenvseg_t		*frup, *fruenvsegs;
925 	envseg_layout_t		*envsegp;
926 	void			*envsegbufp;
927 	int			fd, envseglen, hdrlen;
928 	char			path[PATH_MAX];
929 
930 	fruenvsegs = NULL;
931 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL; sensorp++) {
932 		if (sensorp->fru == NULL)
933 			continue;
934 
935 		for (frup = fruenvsegs; frup != NULL; frup = frup->next)
936 			if (strcmp(frup->fru, sensorp->fru) == 0)
937 				break;
938 
939 		if (frup != NULL)
940 			continue;
941 
942 		frup = (fruenvseg_t *)malloc(sizeof (fruenvseg_t));
943 		if (frup == NULL)
944 			continue;
945 
946 		/* add this FRU to our list */
947 		frup->fru = sensorp->fru;
948 		frup->envsegbufp = NULL;
949 		frup->envseglen = 0;
950 		frup->next = fruenvsegs;
951 		fruenvsegs = frup;
952 
953 		/*
954 		 * Now get the environmental segment from this FRU
955 		 */
956 		(void) strcpy(path, "/devices");
957 		(void) strlcat(path, sensorp->fru, sizeof (path));
958 	retry:
959 		errno = 0;
960 		fd = open(path, O_RDONLY);
961 		if (env_debug > 1)
962 			envd_log(LOG_INFO,
963 			    "fru SEEPROM: %s fd: %d  errno:%d\n",
964 			    path, fd, errno);
965 		if (fd == -1 && errno == ENOENT && fru_present(frup->fru)) {
966 			if (fru_devfsadm_invoked ||
967 			    fru_devfsadm_cmd[0] == '\0') {
968 				envd_log(LOG_CRIT, ENV_FRU_OPEN_FAIL,
969 				    sensorp->fru, errno, strerror(errno));
970 				continue;
971 
972 			}
973 			/*
974 			 * FRU is present but no path exists as
975 			 * someone rebooted the system without
976 			 * "-r" option. Let's invoke "devfsadm"
977 			 * once to create seeprom nodes and try
978 			 * again so that we can monitor all
979 			 * accessible sensors properly and prevent
980 			 * any CPU overheating.
981 			 */
982 			if (env_debug)
983 				envd_log(LOG_INFO,
984 				    "Invoking '%s' to create FRU nodes\n",
985 				    fru_devfsadm_cmd);
986 			fru_devfsadm_invoked = 1;
987 			(void) system(fru_devfsadm_cmd);
988 			goto retry;
989 		}
990 
991 		/*
992 		 * Read environmental segment from this FRU SEEPROM
993 		 */
994 		if (get_envseg(fd, &envsegbufp, &envseglen) == 0) {
995 			/*
996 			 * Validate envseg version number and header length
997 			 */
998 			envsegp = (envseg_layout_t *)envsegbufp;
999 			hdrlen = sizeof (envseg_layout_t) -
1000 			    sizeof (envseg_sensor_t) +
1001 			    (envsegp->sensor_count) * sizeof (envseg_sensor_t);
1002 
1003 			if (envsegp->version != ENVSEG_VERSION ||
1004 			    envseglen < hdrlen) {
1005 				/*
1006 				 * version mismatch or header not big enough
1007 				 */
1008 				envd_log(LOG_CRIT, ENV_FRU_BAD_ENVSEG,
1009 				    sensorp->fru, errno, strerror(errno));
1010 				if (envsegbufp != NULL)
1011 					(void) free(envsegbufp);
1012 			} else {
1013 				frup->envseglen = envseglen;
1014 				frup->envsegbufp = envsegbufp;
1015 			}
1016 		}
1017 		(void) close(fd);
1018 	}
1019 	return (fruenvsegs);
1020 }
1021 
1022 /*
1023  * Process environmental segment for all FRUs.
1024  */
1025 static void
process_fru_envseg()1026 process_fru_envseg()
1027 {
1028 	env_sensor_t		*sensorp;
1029 	sensor_thresh_t		*threshp;
1030 	envseg_layout_t		*envsegp;
1031 	envseg_sensor_data_t	*datap;
1032 	fruenvseg_t		*frup, *fruenvsegs;
1033 	int			i, envseglen, sensorcnt;
1034 	uint_t			offset, length, mapentries;
1035 
1036 	/*
1037 	 * Lookup/read environmental segments from FRU SEEPROMs and
1038 	 * process it. Note that we read each SEEPROM once as it's
1039 	 * a slow device.
1040 	 */
1041 	fruenvsegs = get_fru_envsegs();
1042 
1043 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL; sensorp++) {
1044 		if (sensorp->fru == NULL)
1045 			continue;
1046 
1047 		/*
1048 		 * Locate our FRU environmental segment
1049 		 */
1050 		for (frup = fruenvsegs; frup != NULL; frup = frup->next)
1051 			if (strcmp(frup->fru, sensorp->fru) == 0)
1052 				break;
1053 		if (frup == NULL || frup->envsegbufp == NULL)
1054 			continue;
1055 
1056 		envsegp = (envseg_layout_t *)frup->envsegbufp;
1057 		envseglen = frup->envseglen;
1058 		sensorcnt = envsegp->sensor_count;
1059 
1060 		/*
1061 		 * Locate our sensor data record entry
1062 		 */
1063 		for (i = 0; i < sensorcnt; i++) {
1064 			uint32_t	id;
1065 
1066 			id = GET_UNALIGN32(&envsegp->sensors[i].sensor_id[0]);
1067 			if (env_debug > 1)
1068 				envd_log(LOG_INFO, " sensor[%d]: id:%x\n",
1069 				    i, id);
1070 			if (id == sensorp->fru_sensor)
1071 				break;
1072 		}
1073 
1074 		if (i >= sensorcnt)
1075 			continue;
1076 
1077 		/*
1078 		 * Validate offset/length of our sensor data record
1079 		 */
1080 		offset = (uint_t)GET_UNALIGN16(&envsegp->sensors[i].offset);
1081 		datap =  (envseg_sensor_data_t *)((intptr_t)frup->envsegbufp +
1082 		    offset);
1083 		mapentries =  GET_UNALIGN16(&datap->obs2exp_cnt);
1084 		length = sizeof (envseg_sensor_data_t) - sizeof (envseg_map_t) +
1085 		    mapentries * sizeof (envseg_map_t);
1086 
1087 		if (env_debug > 1)
1088 			envd_log(LOG_INFO, "Found sensor_id:%x idx:%x "
1089 			"off:%x #maps:%x expected length:%x\n",
1090 				sensorp->fru_sensor, i, offset,
1091 				mapentries, length);
1092 
1093 		if (offset >= envseglen || (offset+length) > envseglen) {
1094 			/* corrupted sensor record */
1095 			envd_log(LOG_CRIT, ENV_FRU_BAD_SENSOR_ENTRY,
1096 			    sensorp->fru_sensor, sensorp->name, sensorp->fru);
1097 			continue;
1098 		}
1099 
1100 		if (env_debug > 1) {
1101 			/* print threshold values */
1102 			envd_log(LOG_INFO,
1103 			    "Thresholds: HPwrOff %d  HShutDn %d  HWarn %d\n",
1104 			    datap->high_power_off, datap->high_shutdown,
1105 			    datap->high_warning);
1106 			envd_log(LOG_INFO,
1107 			    "Thresholds: LWarn %d  LShutDn %d  LPwrOff %d\n",
1108 			    datap->low_warning, datap->low_shutdown,
1109 			    datap->low_power_off);
1110 
1111 			/* print policy data */
1112 			envd_log(LOG_INFO,
1113 			    " Policy type: %d #%d data: %x %x %x %x %x %x\n",
1114 			    datap->policy_type, datap->policy_entries,
1115 			    datap->policy_data[0], datap->policy_data[1],
1116 			    datap->policy_data[2], datap->policy_data[3],
1117 			    datap->policy_data[4], datap->policy_data[5]);
1118 
1119 			/* print map table */
1120 			for (i = 0; i < mapentries; i++) {
1121 				envd_log(LOG_INFO, " Map pair# %d: %d %d\n",
1122 				    i, datap->obs2exp_map[i].observed,
1123 				    datap->obs2exp_map[i].expected);
1124 			}
1125 		}
1126 
1127 
1128 		/*
1129 		 * Copy threshold values
1130 		 */
1131 		threshp = sensorp->temp_thresh;
1132 		threshp->high_power_off = datap->high_power_off;
1133 		threshp->high_shutdown = datap->high_shutdown;
1134 		threshp->high_warning = datap->high_warning;
1135 		threshp->low_warning = datap->low_warning;
1136 		threshp->low_shutdown = datap->low_shutdown;
1137 		threshp->low_power_off = datap->low_power_off;
1138 
1139 		/*
1140 		 * Copy policy data
1141 		 */
1142 		threshp->policy_type = datap->policy_type;
1143 		threshp->policy_entries = datap->policy_entries;
1144 		for (i = 0; i < MAX_POLICY_ENTRIES; i++)
1145 			threshp->policy_data[i] =
1146 			    (tempr_t)datap->policy_data[i];
1147 
1148 		/*
1149 		 * Copy temperature mapping info (discard duplicate entries)
1150 		 */
1151 		if (sensorp->obs2exp_map) {
1152 			(void) free(sensorp->obs2exp_map);
1153 			sensorp->obs2exp_map = NULL;
1154 			sensorp->obs2exp_cnt = 0;
1155 		}
1156 		if (mapentries > 0) {
1157 			tempr_map_t	*map;
1158 			int		cnt;
1159 			tempr_t		observed, expected;
1160 
1161 			map = (tempr_map_t *)malloc(mapentries *
1162 			    sizeof (tempr_map_t));
1163 
1164 			if (map == NULL) {
1165 				envd_log(LOG_CRIT, ENV_FRU_SENSOR_MAP_NOMEM,
1166 				    sensorp->fru_sensor, sensorp->name,
1167 				    sensorp->fru);
1168 				continue;
1169 			}
1170 
1171 			for (i = 0, cnt = 0; i < mapentries; i++) {
1172 
1173 				observed = (tempr_t)
1174 				    datap->obs2exp_map[i].observed;
1175 				expected = (tempr_t)
1176 				    datap->obs2exp_map[i].expected;
1177 
1178 				/* ignore if duplicate entry */
1179 				if (cnt > 0 &&
1180 				    observed == map[cnt-1].observed &&
1181 				    expected == map[cnt-1].expected) {
1182 					continue;
1183 				}
1184 				map[cnt].observed = observed;
1185 				map[cnt].expected = expected;
1186 				cnt++;
1187 			}
1188 			sensorp->obs2exp_cnt = cnt;
1189 			sensorp->obs2exp_map = map;
1190 		}
1191 
1192 		if (env_debug > 2 && sensorp->obs2exp_cnt > 1) {
1193 			char	msgbuf[256];
1194 
1195 			envd_log(LOG_INFO,
1196 			    "Measured --> Correct temperature table "
1197 			    "for sensor: %s\n", sensorp->name);
1198 			for (i = -128; i < 128; i++) {
1199 				(void) sprintf(&msgbuf[6*(i&0x7)], "%6d",
1200 				    xlate_obs2exp(sensorp, i));
1201 				if ((i &0x7) == 0x7)
1202 					envd_log(LOG_INFO,
1203 					    "%8d: %s\n", (i & ~0x7), msgbuf);
1204 			}
1205 			if ((i & 0x7) != 0)
1206 				(void) printf("%8d: %s\n", (i & ~0x7), msgbuf);
1207 
1208 			envd_log(LOG_INFO,
1209 			    "Correct --> Measured temperature table "
1210 			    "for sensor: %s\n", sensorp->name);
1211 			for (i = -128; i < 128; i++) {
1212 				(void) sprintf(&msgbuf[6*(i&0x7)], "%6d",
1213 				    xlate_exp2obs(sensorp, i));
1214 				if ((i &0x7) == 0x7)
1215 					envd_log(LOG_INFO,
1216 					    "%8d: %s\n", (i & ~0x7), msgbuf);
1217 			}
1218 			if ((i & 0x7) != 0)
1219 				envd_log(LOG_INFO,
1220 				    "%8d: %s\n", (i & ~0x7), msgbuf);
1221 		}
1222 	}
1223 
1224 	/*
1225 	 * Deallocate environmental segment list
1226 	 */
1227 	while (fruenvsegs) {
1228 		frup = fruenvsegs;
1229 		fruenvsegs = frup->next;
1230 		if (frup->envsegbufp != NULL)
1231 			(void) free(frup->envsegbufp);
1232 		(void) free(frup);
1233 	}
1234 }
1235 
1236 /*
1237  * Lookup fan and return a pointer to env_fan_t data structure.
1238  */
1239 env_fan_t *
fan_lookup(char * name)1240 fan_lookup(char *name)
1241 {
1242 	int		i;
1243 	env_fan_t	*fanp;
1244 
1245 	for (i = 0; (fanp = envd_fans[i]) != NULL; i++) {
1246 		if (strcmp(fanp->name, name) == 0)
1247 			return (fanp);
1248 	}
1249 	return (NULL);
1250 }
1251 
1252 /*
1253  * Lookup sensor and return a pointer to env_sensor_t data structure.
1254  */
1255 env_sensor_t *
sensor_lookup(char * name)1256 sensor_lookup(char *name)
1257 {
1258 	env_sensor_t	*sensorp;
1259 
1260 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL; sensorp++) {
1261 		if (strcmp(sensorp->name, name) == 0)
1262 			return (sensorp);
1263 	}
1264 	return (NULL);
1265 }
1266 
1267 /*
1268  * Get current temperature
1269  * Returns -1 on error, 0 if successful
1270  */
1271 int
get_temperature(env_sensor_t * sensorp,tempr_t * temp)1272 get_temperature(env_sensor_t *sensorp, tempr_t *temp)
1273 {
1274 	int	fd = sensorp->fd;
1275 	int	retval = 0;
1276 	int	expected_temp;
1277 
1278 	if (fd == -1)
1279 		retval = -1;
1280 	else if (ioctl(fd, I2C_GET_TEMPERATURE, temp) == -1) {
1281 		retval = -1;
1282 		if (sensorp->error == 0) {
1283 			sensorp->error = 1;
1284 			envd_log(LOG_WARNING, ENV_SENSOR_ACCESS_FAIL,
1285 			    sensorp->name, errno, strerror(errno));
1286 		}
1287 	} else if (sensorp->error != 0) {
1288 		sensorp->error = 0;
1289 		envd_log(LOG_WARNING, ENV_SENSOR_ACCESS_OK, sensorp->name);
1290 	} else if (sensorp->obs2exp_map != NULL) {
1291 		expected_temp = xlate_obs2exp(sensorp, (tempr_t)*temp);
1292 		if (env_debug > 1)
1293 			envd_log(LOG_INFO,
1294 			    "sensor: %-13s temp:%d  CORRECED to %d\n",
1295 			    sensorp->name, *temp, (tempr_t)expected_temp);
1296 		*temp = (tempr_t)expected_temp;
1297 	}
1298 
1299 	return (retval);
1300 }
1301 
1302 /*
1303  * Get current fan speed
1304  * Returns -1 on error, 0 if successful
1305  */
1306 int
get_fan_speed(env_fan_t * fanp,fanspeed_t * fanspeedp)1307 get_fan_speed(env_fan_t *fanp, fanspeed_t *fanspeedp)
1308 {
1309 	int	fan_fd;
1310 	int	retval = 0;
1311 
1312 	fan_fd = fanp->fd;
1313 	if (fan_fd == -1 || read(fan_fd, fanspeedp, sizeof (fanspeed_t)) !=
1314 	    sizeof (fanspeed_t))
1315 		retval = -1;
1316 	return (retval);
1317 }
1318 
1319 /*
1320  * Set fan speed
1321  * Returns -1 on error, 0 if successful
1322  */
1323 static int
set_fan_speed(env_fan_t * fanp,fanspeed_t fanspeed)1324 set_fan_speed(env_fan_t *fanp, fanspeed_t fanspeed)
1325 {
1326 	int	fan_fd;
1327 	int	retval = 0;
1328 
1329 	fan_fd = fanp->fd;
1330 	if (fan_fd == -1 || write(fan_fd, &fanspeed, sizeof (fanspeed)) !=
1331 	    sizeof (fanspeed_t))
1332 		retval = -1;
1333 	return (retval);
1334 }
1335 
1336 
1337 /*
1338  * close all fan devices
1339  */
1340 static void
envd_close_fans(void)1341 envd_close_fans(void)
1342 {
1343 	int		i;
1344 	env_fan_t	*fanp;
1345 
1346 	for (i = 0; (fanp = envd_fans[i]) != NULL; i++) {
1347 		if (fanp->fd != -1) {
1348 			(void) close(fanp->fd);
1349 			fanp->fd = -1;
1350 		}
1351 	}
1352 }
1353 
1354 /*
1355  * Close sensor devices
1356  */
1357 static void
envd_close_sensors(void)1358 envd_close_sensors(void)
1359 {
1360 	env_sensor_t	*sensorp;
1361 
1362 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL; sensorp++) {
1363 		if (sensorp->fd != -1) {
1364 			(void) close(sensorp->fd);
1365 			sensorp->fd = -1;
1366 		}
1367 	}
1368 }
1369 
1370 /*
1371  * Open PM device
1372  */
1373 static void
envd_open_pm(void)1374 envd_open_pm(void)
1375 {
1376 	pm_fd = open(PM_DEVICE, O_RDONLY);
1377 	if (pm_fd != -1)
1378 		(void) fcntl(pm_fd, F_SETFD, FD_CLOEXEC);
1379 }
1380 
1381 /*
1382  * Close PM device
1383  */
1384 static void
envd_close_pm(void)1385 envd_close_pm(void)
1386 {
1387 	if (pm_fd != -1) {
1388 		(void) close(pm_fd);
1389 		pm_fd = -1;
1390 	}
1391 }
1392 
1393 /*
1394  * Open fan devices and initialize per fan data structure.
1395  * Returns #fans found.
1396  */
1397 static int
envd_setup_fans(void)1398 envd_setup_fans(void)
1399 {
1400 	int		i, fd;
1401 	fanspeed_t	speed;
1402 	env_fan_t	*fanp;
1403 	char		path[PATH_MAX];
1404 	int		fancnt = 0;
1405 	char		*fan_name;
1406 	sensor_fan_map_t *sfmap;
1407 	env_sensor_t	*sensorp;
1408 	int		sensor_cnt;
1409 
1410 	for (i = 0; (fanp = envd_fans[i]) != NULL; i++) {
1411 		if (fanp->fd == -1) {
1412 			fanp->sensor_cnt = 0;
1413 			fanp->cur_speed = 0;
1414 			fanp->prev_speed = 0;
1415 
1416 			(void) strcpy(path, "/devices");
1417 			(void) strlcat(path, fanp->devfs_path, sizeof (path));
1418 			fd = open(path, O_RDWR);
1419 			if (fd == -1) {
1420 				envd_log(LOG_CRIT,
1421 				    ENV_FAN_OPEN_FAIL, fanp->name,
1422 				    fanp->devfs_path, errno, strerror(errno));
1423 				fanp->present = B_FALSE;
1424 				continue;
1425 			}
1426 			(void) fcntl(fd, F_SETFD, FD_CLOEXEC);
1427 			fanp->fd = fd;
1428 			fanp->present = B_TRUE;
1429 		}
1430 		fancnt++;
1431 
1432 		/*
1433 		 * Set initial speed and update cur_speed/prev_speed
1434 		 */
1435 		if (fanp->forced_speed >= 0) {
1436 			speed = (fanspeed_t)fanp->forced_speed;
1437 			if (speed > fanp->speed_max)
1438 				speed = fanp->speed_max;
1439 			if (!disable_piclenvd)
1440 				(void) set_fan_speed(fanp, speed);
1441 		} else if (get_fan_speed(fanp, &speed) == -1) {
1442 			/*
1443 			 * The Fan driver does not know the current fan speed.
1444 			 * Initialize all ON/OFF fans to ON state and all
1445 			 * variable speed fans under software control to 50%
1446 			 * of the max speed and reread the fan to get the
1447 			 * current speed.
1448 			 */
1449 			speed = (fanp == &envd_psupply_fan) ?
1450 				fanp->speed_max : fanp->speed_max/2;
1451 			if (!disable_piclenvd) {
1452 				(void) set_fan_speed(fanp, speed);
1453 				if (get_fan_speed(fanp, &speed) == -1)
1454 					continue;
1455 			}
1456 		}
1457 		fanp->cur_speed = speed;
1458 		fanp->prev_speed = speed;
1459 
1460 		/*
1461 		 * Process sensor_fan_map[] table and initialize sensors[]
1462 		 * array for this fan.
1463 		 */
1464 		fan_name = fanp->name;
1465 		for (sensor_cnt = 0, sfmap = &sensor_fan_map[0];
1466 		    sfmap->sensor_name != NULL; sfmap++) {
1467 			if (strcmp(sfmap->fan_name, fan_name) != 0)
1468 				continue;
1469 			sensorp = sensor_lookup(sfmap->sensor_name);
1470 			if (sensorp != NULL && sensor_cnt < SENSORS_PER_FAN) {
1471 				fanp->sensors[sensor_cnt] = sensorp;
1472 				sensor_cnt++;
1473 			}
1474 		}
1475 		fanp->sensor_cnt = sensor_cnt;
1476 	}
1477 
1478 	return (fancnt);
1479 }
1480 
1481 
1482 /*
1483  * Adjust specified sensor target temperature and fan adjustment rate
1484  */
1485 
1486 static void
adjust_sensor_target(env_sensor_t * sensorp)1487 adjust_sensor_target(env_sensor_t *sensorp)
1488 {
1489 	int		target, index;
1490 	sensor_pmdev_t	*pmdevp;
1491 	sensor_thresh_t	*threshp;
1492 	float		rate;
1493 
1494 	/*
1495 	 * Look at current power state of all power managed devices
1496 	 * associated with this sensor and look up the desired target
1497 	 * temperature and pick the lowest one of those values. Also,
1498 	 * calculate the rate of change based upon whether one or more
1499 	 * of the associated power managed devices are not running at
1500 	 * full power mode.
1501 	 */
1502 
1503 	if (sensorp == NULL || (threshp = sensorp->temp_thresh) == NULL ||
1504 	    threshp->policy_type != POLICY_TARGET_TEMP)
1505 		return;
1506 
1507 	target = threshp->policy_data[0];
1508 	rate = 1.0;
1509 	for (pmdevp = sensorp->pmdevp; pmdevp != NULL; pmdevp = pmdevp->next) {
1510 		index = pmdevp->full_power - pmdevp->cur_power;
1511 		if (index <= 0)
1512 			continue;
1513 
1514 		/* not running at full power */
1515 		if (index >= threshp->policy_entries)
1516 			index = threshp->policy_entries - 1;
1517 		if (target > threshp->policy_data[index])
1518 			target = threshp->policy_data[index];
1519 		if (rate > (float)fan_slow_adjustment/100)
1520 			rate = (float)fan_slow_adjustment/100;
1521 		if (env_debug > 1)
1522 			envd_log(LOG_INFO,
1523 			    "pmdev: %-13s new_target:%d  cur:%d power:%d/%d\n",
1524 			    pmdevp->pmdev_name, target, sensorp->target_temp,
1525 			    pmdevp->cur_power, pmdevp->full_power);
1526 	}
1527 
1528 	if (env_debug)
1529 		envd_log(LOG_INFO,
1530 		    "sensor: %-13s new_target:%d  cur:%d power:%d/%d\n",
1531 		    sensorp->name, target, sensorp->target_temp,
1532 		    ((sensorp->pmdevp) ? sensorp->pmdevp->cur_power : -1),
1533 		    ((sensorp->pmdevp) ? sensorp->pmdevp->full_power : -1));
1534 
1535 	sensorp->fan_adjustment_rate = rate;
1536 	sensorp->target_temp = target;
1537 }
1538 
1539 /*
1540  * Update current power level of all PM devices we are tracking and adjust
1541  * the target temperature associated with the corresponding sensor.
1542  *
1543  * Returns 1 if one or more pmdev power level was adjusted; 0 otherwise.
1544  */
1545 static int
update_pmdev_power()1546 update_pmdev_power()
1547 {
1548 	sensor_pmdev_t	*pmdevp;
1549 	pm_req_t	pmreq;
1550 	int		cur_power;
1551 	int		updated = 0;
1552 
1553 	for (pmdevp = sensor_pmdevs; pmdevp->pmdev_name != NULL; pmdevp++) {
1554 		pmreq.physpath = pmdevp->pmdev_name;
1555 		pmreq.data = NULL;
1556 		pmreq.datasize = 0;
1557 		pmreq.component = pmdevp->speed_comp;
1558 		cur_power = ioctl(pm_fd, PM_GET_CURRENT_POWER, &pmreq);
1559 		if (pmdevp->cur_power != cur_power) {
1560 			pmdevp->cur_power = cur_power;
1561 			if (pmdevp->sensorp) {
1562 				adjust_sensor_target(pmdevp->sensorp);
1563 				updated = 1;
1564 			}
1565 		}
1566 	}
1567 	return (updated);
1568 }
1569 
1570 /*
1571  * Check if the specified sensor is present.
1572  * Returns 1 if present; 0 otherwise.
1573  *
1574  * Note that we don't use ptree_get_node_by_path() here to detect
1575  * if a temperature device is present as we don't want to make
1576  * "devtree" a critical plugin.
1577  */
1578 static int
envd_sensor_present(env_sensor_t * sensorp)1579 envd_sensor_present(env_sensor_t *sensorp)
1580 {
1581 	char		*p, physpath[PATH_MAX];
1582 	di_node_t	root_node;
1583 	int		sensor_present = 0;
1584 
1585 	/*
1586 	 * Construct temperature device path by stripping minor
1587 	 * node name from the devfs_path and use di_init() to
1588 	 * see if the node exists.
1589 	 */
1590 	(void) strcpy(physpath, sensorp->devfs_path);
1591 	p = strrchr(physpath, ':');
1592 	if (p != NULL)
1593 		*p = '\0';
1594 	if ((root_node = di_init(physpath, DINFOMINOR)) != DI_NODE_NIL) {
1595 		di_fini(root_node);
1596 		sensor_present = 1;
1597 	}
1598 	return (sensor_present);
1599 }
1600 
1601 /*
1602  * Open temperature sensor devices and initialize per sensor data structure.
1603  * Returns #sensors found.
1604  */
1605 static int
envd_setup_sensors(void)1606 envd_setup_sensors(void)
1607 {
1608 	tempr_t		temp;
1609 	env_sensor_t	*sensorp;
1610 	char		path[PATH_MAX];
1611 	int		sensorcnt = 0;
1612 	int		sensor_present;
1613 	sensor_thresh_t	*threshp;
1614 	sensor_pmdev_t	*pmdevp;
1615 
1616 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL; sensorp++) {
1617 		if (sensorp->fd != -1) {
1618 			/* Don't reinitialize opened sensor */
1619 			threshp = sensorp->temp_thresh;
1620 			sensorp->pmdevp = NULL;
1621 		} else {
1622 			/* Initialize sensor's initial state */
1623 			sensorp->shutdown_initiated = B_FALSE;
1624 			sensorp->warning_tstamp = 0;
1625 			sensorp->warning_start = 0;
1626 			sensorp->shutdown_tstamp = 0;
1627 			sensorp->pmdevp = NULL;
1628 			sensorp->fan_adjustment_rate = 1.0;
1629 
1630 			threshp = sensorp->temp_thresh;
1631 			temp = (threshp && threshp->policy_entries > 0) ?
1632 			    threshp->policy_data[0] : 0;
1633 			sensorp->target_temp = temp;
1634 			sensorp->cur_temp = temp;
1635 			sensorp->avg_temp = temp;
1636 			sensorp->prev_avg_temp = temp;
1637 			sensorp->error = 0;
1638 
1639 			(void) strcpy(path, "/devices");
1640 			(void) strlcat(path, sensorp->devfs_path,
1641 			    sizeof (path));
1642 		retry:
1643 			sensorp->fd = open(path, O_RDWR);
1644 			if (sensorp->fd == -1) {
1645 				sensor_present = envd_sensor_present(sensorp);
1646 				if (sensor_present && !devfsadm_invoked &&
1647 				    devfsadm_cmd[0] != '\0') {
1648 					/*
1649 					 * Sensor is present but no path
1650 					 * exists as someone rebooted the
1651 					 * system without "-r" option. Let's
1652 					 * invoke "devfsadm" once to create
1653 					 * max1617 sensors paths in /devices
1654 					 * subtree and try again so that we
1655 					 * can monitor all accessible sensors
1656 					 * and prevent any CPU overheating.
1657 					 *
1658 					 * Note that this routine is always
1659 					 * called in main thread context and
1660 					 * serialized with respect to other
1661 					 * plugins' initialization. Hence, it's
1662 					 * safe to use system(3C) call here.
1663 					 */
1664 					devfsadm_invoked = 1;
1665 					(void) system(devfsadm_cmd);
1666 					goto retry;
1667 				}
1668 				if (sensor_present)
1669 					envd_log(LOG_CRIT,
1670 					    ENV_SENSOR_OPEN_FAIL,
1671 					    sensorp->name,
1672 					    sensorp->devfs_path, errno,
1673 					    strerror(errno));
1674 				sensorp->present = B_FALSE;
1675 				continue;
1676 			}
1677 			(void) fcntl(sensorp->fd, F_SETFD, FD_CLOEXEC);
1678 			sensorp->present = B_TRUE;
1679 
1680 			/*
1681 			 * Set cur_temp field to the current temperature value
1682 			 */
1683 			if (get_temperature(sensorp, &temp) == 0) {
1684 				sensorp->cur_temp = temp;
1685 				sensorp->avg_temp = temp;
1686 			}
1687 		}
1688 		sensorcnt++;
1689 
1690 		/*
1691 		 * Set low_power_off and high_power_off limits
1692 		 */
1693 		if (threshp && !disable_power_off) {
1694 			temp = xlate_exp2obs(sensorp, threshp->low_power_off);
1695 			if (env_debug > 1)
1696 				envd_log(LOG_INFO, "sensor: %-13s low_power_"
1697 				"off set to %d (real %d)\n", sensorp->name,
1698 				    (int)temp, threshp->low_power_off);
1699 			(void) ioctl(sensorp->fd, MAX1617_SET_LOW_LIMIT, &temp);
1700 
1701 			temp = xlate_exp2obs(sensorp, threshp->high_power_off);
1702 			if (env_debug > 1)
1703 				envd_log(LOG_INFO, "sensor: %-13s high_power_"
1704 				"off set to %d (real %d)\n", sensorp->name,
1705 				    (int)temp, threshp->high_power_off);
1706 			(void) ioctl(sensorp->fd, MAX1617_SET_HIGH_LIMIT,
1707 			    &temp);
1708 		}
1709 	}
1710 
1711 	/*
1712 	 * Locate "CPU Speed" component for any PM devices associated with
1713 	 * the sensors.
1714 	 */
1715 	for (pmdevp = sensor_pmdevs; pmdevp->sensor_name; pmdevp++) {
1716 		int		i, ncomp;
1717 		char		physpath[PATH_MAX];
1718 		pm_req_t	pmreq;
1719 
1720 		pmdevp->speed_comp = -1;
1721 		pmdevp->full_power = -1;
1722 		pmdevp->cur_power = -1;
1723 		pmdevp->next = NULL;
1724 		pmdevp->sensorp = sensorp = sensor_lookup(pmdevp->sensor_name);
1725 
1726 		/*
1727 		 * Lookup speed component and get full and current power
1728 		 * level for that component.
1729 		 */
1730 		pmreq.physpath = pmdevp->pmdev_name;
1731 		pmreq.data = physpath;
1732 		pmreq.datasize = sizeof (physpath);
1733 
1734 		ncomp = ioctl(pm_fd, PM_GET_NUM_COMPONENTS, &pmreq);
1735 		for (i = 0; i < ncomp; i++) {
1736 			pmreq.component = i;
1737 			physpath[0] = '\0';
1738 			if (ioctl(pm_fd, PM_GET_COMPONENT_NAME, &pmreq) <= 0)
1739 				continue;
1740 			if (strcasecmp(pmreq.data, pmdevp->speed_comp_name))
1741 				continue;
1742 			pmdevp->speed_comp = i;
1743 
1744 
1745 			/*
1746 			 * Get full power and current power level
1747 			 */
1748 			pmdevp->full_power = ioctl(pm_fd, PM_GET_FULL_POWER,
1749 			    &pmreq);
1750 
1751 			pmdevp->cur_power = ioctl(pm_fd, PM_GET_CURRENT_POWER,
1752 			    &pmreq);
1753 
1754 			if (sensorp) {
1755 				pmdevp->next = sensorp->pmdevp;
1756 				sensorp->pmdevp = pmdevp;
1757 				adjust_sensor_target(sensorp);
1758 			}
1759 			break;
1760 		}
1761 		if (env_debug > 1)
1762 			envd_log(LOG_INFO,
1763 			    "sensor:%s %p pmdev:%s comp:%s %d power:%d/%d\n",
1764 			    pmdevp->sensor_name, pmdevp->sensorp,
1765 			    pmdevp->pmdev_name, pmdevp->speed_comp_name,
1766 			    pmdevp->speed_comp, pmdevp->cur_power,
1767 			    pmdevp->full_power);
1768 	}
1769 	return (sensorcnt);
1770 }
1771 
1772 /*
1773  * Read all temperature sensors and take appropriate action based
1774  * upon temperature threshols associated with each sensor. Possible
1775  * actions are:
1776  *
1777  *	temperature > high_shutdown
1778  *	temperature < low_shutdown
1779  *		Gracefully shutdown the system and log/print a message
1780  *		on the system console provided the temperature has been
1781  *		in shutdown range for "shutdown_interval" seconds.
1782  *
1783  *	high_warning < temperature <= high_shutdown
1784  *	low_warning  > temperature >= low_shutdown
1785  *		Log/print a warning message on the system console at most
1786  *		once every "warning_interval" seconds.
1787  *
1788  * Note that the current temperature is recorded in the "cur_temp" field
1789  * within each env_sensor_t structure.
1790  */
1791 static void
monitor_sensors(void)1792 monitor_sensors(void)
1793 {
1794 	tempr_t 	temp;
1795 	env_sensor_t	*sensorp;
1796 	sensor_thresh_t	*threshp;
1797 	time_t		ct;
1798 	char		msgbuf[BUFSIZ];
1799 	char		syscmd[BUFSIZ];
1800 
1801 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL; sensorp++) {
1802 		if (get_temperature(sensorp, &temp) < 0)
1803 			continue;
1804 
1805 		sensorp->prev_avg_temp = sensorp->avg_temp;
1806 		sensorp->cur_temp = temp;
1807 		sensorp->avg_temp = (sensorp->avg_temp + temp)/2;
1808 		threshp = sensorp->temp_thresh;
1809 
1810 		if (env_debug)
1811 			envd_log(LOG_INFO,
1812 			"sensor: %-13s temp  prev_avg:%6.2f  "
1813 			"cur:%d avg_temp:%6.2f power:%d/%d target:%d\n",
1814 			    sensorp->name, sensorp->prev_avg_temp,
1815 			    temp, sensorp->avg_temp, ((sensorp->pmdevp) ?
1816 			    sensorp->pmdevp->cur_power : -1),
1817 			    ((sensorp->pmdevp) ? sensorp->pmdevp->full_power :
1818 			    -1), sensorp->target_temp);
1819 
1820 
1821 		/*
1822 		 * If this sensor already triggered system shutdown, don't
1823 		 * log any more shutdown/warning messages for it.
1824 		 */
1825 		if (sensorp->shutdown_initiated || threshp == NULL)
1826 			continue;
1827 
1828 		/*
1829 		 * Check for the temperature in warning and shutdown range
1830 		 * and take appropriate action.
1831 		 */
1832 		if (TEMP_IN_WARNING_RANGE(temp, threshp) && !disable_warning) {
1833 			/*
1834 			 * Check if the temperature has been in warning
1835 			 * range during last warning_duration interval.
1836 			 * If so, the temperature is truly in warning
1837 			 * range and we need to log a warning message,
1838 			 * but no more than once every warning_interval
1839 			 * seconds.
1840 			 */
1841 			time_t	wtstamp = sensorp->warning_tstamp;
1842 
1843 			ct = (time_t)(gethrtime() / NANOSEC);
1844 			if (sensorp->warning_start == 0)
1845 				sensorp->warning_start = ct;
1846 			if (((ct - sensorp->warning_start) >=
1847 			    warning_duration) && (wtstamp == 0 ||
1848 			    (ct - wtstamp) >= warning_interval)) {
1849 				envd_log(LOG_CRIT, ENV_WARNING_MSG,
1850 				    sensorp->name, temp,
1851 				    threshp->low_warning,
1852 				    threshp->high_warning);
1853 				sensorp->warning_tstamp = ct;
1854 			}
1855 		} else if (sensorp->warning_start != 0)
1856 			sensorp->warning_start = 0;
1857 
1858 		if (TEMP_IN_SHUTDOWN_RANGE(temp, threshp) &&
1859 		    !disable_shutdown) {
1860 			ct = (time_t)(gethrtime() / NANOSEC);
1861 			if (sensorp->shutdown_tstamp == 0)
1862 				sensorp->shutdown_tstamp = ct;
1863 
1864 			/*
1865 			 * Shutdown the system if the temperature remains
1866 			 * in the shutdown range for over shutdown_interval
1867 			 * seconds.
1868 			 */
1869 			if ((ct - sensorp->shutdown_tstamp) >=
1870 			    shutdown_interval) {
1871 				/* log error */
1872 				sensorp->shutdown_initiated = B_TRUE;
1873 				(void) snprintf(msgbuf, sizeof (msgbuf),
1874 				    ENV_SHUTDOWN_MSG, sensorp->name,
1875 				    temp, threshp->low_shutdown,
1876 				    threshp->high_shutdown);
1877 				envd_log(LOG_ALERT, msgbuf);
1878 
1879 				/* shutdown the system (only once) */
1880 				if (system_shutdown_started == B_FALSE) {
1881 					(void) snprintf(syscmd, sizeof (syscmd),
1882 					    "%s \"%s\"", shutdown_cmd, msgbuf);
1883 					envd_log(LOG_ALERT, syscmd);
1884 					system_shutdown_started = B_TRUE;
1885 					(void) system(syscmd);
1886 				}
1887 			}
1888 		} else if (sensorp->shutdown_tstamp != 0)
1889 			sensorp->shutdown_tstamp = 0;
1890 	}
1891 }
1892 
1893 
1894 /*
1895  * Adjust fan speed based upon the current temperature value of various
1896  * sensors affected by the specified fan.
1897  */
1898 static int
adjust_fan_speed(env_fan_t * fanp,lpm_dev_t * devp)1899 adjust_fan_speed(env_fan_t *fanp, lpm_dev_t *devp)
1900 {
1901 	int		i;
1902 	fanspeed_t	fanspeed;
1903 	float		speed, cur_speed, new_speed, max_speed, min_speed;
1904 	env_sensor_t	*sensorp;
1905 	sensor_thresh_t	*threshp;
1906 	tempr_t		temp;
1907 	float		avg_temp, tempdiff, targetdiff;
1908 	int		av_ambient;
1909 	int		amb_cnt;
1910 
1911 
1912 	/*
1913 	 * Get current fan speed
1914 	 */
1915 	if (get_fan_speed(fanp, &fanspeed) < 0)
1916 		return (-1);
1917 	cur_speed = fanp->cur_speed;
1918 	if (fanspeed != (int)cur_speed)
1919 		cur_speed = (float)fanspeed;
1920 
1921 	/*
1922 	 * Calculate new fan speed for each sensor and pick the largest one.
1923 	 */
1924 	min_speed = fanp->speed_min;
1925 	max_speed = fanp->speed_max;
1926 	speed = 0;
1927 	av_ambient = 0;
1928 	amb_cnt = 0;
1929 
1930 	for (i = 0; i < fanp->sensor_cnt; i++) {
1931 		sensorp = fanp->sensors[i];
1932 		if (sensorp == NULL || sensorp->fd == -1 ||
1933 		    sensorp->temp_thresh == NULL)
1934 			continue;
1935 
1936 		temp = sensorp->cur_temp;
1937 		avg_temp = sensorp->avg_temp;
1938 		threshp = sensorp->temp_thresh;
1939 
1940 		/*
1941 		 * Note ambient temperatures to determine lpm for system fan
1942 		 */
1943 		if ((devp != NULL) &&
1944 		    (sensorp->flags & SFLAG_CPU_AMB_SENSOR)) {
1945 			av_ambient += temp;
1946 			amb_cnt++;
1947 		}
1948 
1949 		/*
1950 		 * If the current temperature is above the warning
1951 		 * threshold, use max fan speed.
1952 		 */
1953 		if (temp >= threshp->high_warning) {
1954 			speed = max_speed;
1955 			break;
1956 		} else if (temp <= threshp->low_warning) {
1957 			speed = min_speed;
1958 			break;
1959 		}
1960 
1961 		if (threshp->policy_type == POLICY_TARGET_TEMP) {
1962 			/*
1963 			 * Try to achieve the desired target temperature.
1964 			 * Calculate new fan speed based upon whether the
1965 			 * temperature is rising, falling or steady state.
1966 			 * Also take into consideration the current fan
1967 			 * speed as well as the desired target temperature.
1968 			 */
1969 			float	delta, speed_change;
1970 			float	multiplier;
1971 
1972 			targetdiff = avg_temp - sensorp->target_temp;
1973 			tempdiff = avg_temp - sensorp->prev_avg_temp;
1974 
1975 			if (tempdiff > AVG_TEMP_HYSTERESIS) {
1976 				/*
1977 				 * Temperature is rising. Increase fan
1978 				 * speed 0.5% for every 1C above the
1979 				 * (target - RISING_TEMP_MARGIN) limit.
1980 				 * Also take into consideration temperature
1981 				 * rising rate and the current fan speed.
1982 				 */
1983 				delta = max_speed * .005 *
1984 				    (RISING_TEMP_MARGIN + targetdiff);
1985 				if (delta <= 0)
1986 					multiplier = 0;
1987 				else
1988 					multiplier = tempdiff/4 +
1989 					    ((cur_speed < max_speed/2) ?
1990 					    2 : 1);
1991 			} else if (tempdiff < -AVG_TEMP_HYSTERESIS) {
1992 				/*
1993 				 * Temperature is falling. Decrease fan
1994 				 * speed 0.5% for every 1C below the
1995 				 * (target + FALLING_TEMP_MARGIN) limit.
1996 				 * Also take into consideration temperature
1997 				 * falling rate and the current fan speed.
1998 				 */
1999 				delta = -max_speed * .005 *
2000 				    (FALLING_TEMP_MARGIN - targetdiff);
2001 				if (delta >= 0)
2002 					multiplier = 0;
2003 				else
2004 					multiplier = -tempdiff/4 +
2005 					    ((cur_speed > max_speed/2) ?
2006 					    2 : 1);
2007 			} else {
2008 				/*
2009 				 * Temperature is changing very slowly.
2010 				 * Adjust fan speed by 0.4% for every 1C
2011 				 * below/above the target temperature.
2012 				 */
2013 				delta = max_speed * .004 * targetdiff;
2014 				multiplier = 1.0;
2015 			}
2016 
2017 
2018 			/*
2019 			 * Enforece some bounds on multiplier and the
2020 			 * speed change.
2021 			 */
2022 			multiplier = MIN(multiplier, 3.0);
2023 			speed_change = delta * multiplier *
2024 			    sensorp->fan_adjustment_rate;
2025 			speed_change = MIN(speed_change, fan_incr_limit);
2026 			speed_change = MAX(speed_change, -fan_decr_limit);
2027 			new_speed = cur_speed + speed_change;
2028 
2029 			if (env_debug > 1)
2030 				envd_log(LOG_INFO,
2031 				"sensor: %-8s temp/diff:%d/%3.1f  "
2032 				"target/diff:%d/%3.1f  change:%4.2f x "
2033 				"%4.2f x %4.2f speed %5.2f -> %5.2f\n",
2034 				    sensorp->name, temp, tempdiff,
2035 				    sensorp->target_temp, targetdiff, delta,
2036 				    multiplier, sensorp->fan_adjustment_rate,
2037 				    cur_speed, new_speed);
2038 		} else if (threshp->policy_type == POLICY_LINEAR) {
2039 			/*
2040 			 * Set fan speed linearly within the operating
2041 			 * range specified by the policy_data[LOW_NOMINAL_LOC]
2042 			 * and policy_data[HIGH_NOMINAL_LOC] threshold values.
2043 			 * Fan speed is set to minimum value at LOW_NOMINAL
2044 			 * and to maximum value at HIGH_NOMINAL value.
2045 			 */
2046 			new_speed = min_speed + (max_speed - min_speed) *
2047 			    (avg_temp - threshp->policy_data[LOW_NOMINAL_LOC])/
2048 			    (threshp->policy_data[HIGH_NOMINAL_LOC] -
2049 			    threshp->policy_data[LOW_NOMINAL_LOC]);
2050 			if (env_debug > 1)
2051 				envd_log(LOG_INFO,
2052 				"sensor: %-8s policy: linear, cur_speed %5.2f"\
2053 				" new_speed: %5.2f\n", sensorp->name, cur_speed,
2054 				    new_speed);
2055 		} else {
2056 			new_speed = cur_speed;
2057 		}
2058 		speed = MAX(speed, new_speed);
2059 	}
2060 
2061 	/*
2062 	 * Adjust speed using lpm tables
2063 	 */
2064 	if (amb_cnt > 0) {
2065 		av_ambient = (av_ambient >= 0 ?
2066 			(int)(0.5 + (float)av_ambient/(float)amb_cnt):
2067 			(int)(-0.5 + (float)av_ambient/(float)amb_cnt));
2068 		speed = MAX(speed, (fanspeed_t)get_lpm_speed(devp, av_ambient));
2069 	}
2070 
2071 	speed = MIN(speed, max_speed);
2072 	speed = MAX(speed, min_speed);
2073 
2074 	/*
2075 	 * Record and update fan speed, if different.
2076 	 */
2077 	fanp->prev_speed = fanp->cur_speed;
2078 	fanp->cur_speed = speed;
2079 	if ((fanspeed_t)speed != fanspeed) {
2080 		fanspeed = (fanspeed_t)speed;
2081 		(void) set_fan_speed(fanp, fanspeed);
2082 	}
2083 	if (env_debug)
2084 		envd_log(LOG_INFO,
2085 		    "fan: %-16s speed cur:%6.2f  new:%6.2f\n",
2086 		    fanp->name, fanp->prev_speed, fanp->cur_speed);
2087 
2088 	return (0);
2089 }
2090 /*
2091  * This is the environment thread, which monitors the current temperature
2092  * and power managed state and controls system fan speed.  Temperature is
2093  * polled every sensor-poll_interval seconds duration.
2094  */
2095 /*ARGSUSED*/
2096 static void *
envthr(void * args)2097 envthr(void *args)
2098 {
2099 	env_sensor_t	*sensorp;
2100 	fanspeed_t 	fan_speed;
2101 	env_fan_t	*pmfanp = &envd_psupply_fan;
2102 	int		to;
2103 	int		xwd = -1;
2104 
2105 	for (sensorp = &envd_sensors[0]; sensorp->name != NULL;
2106 	    sensorp++) {
2107 		if (sensorp->obs2exp_map)
2108 			(void) free(sensorp->obs2exp_map);
2109 		sensorp->obs2exp_map = NULL;
2110 		sensorp->obs2exp_cnt = 0;
2111 	}
2112 
2113 	/*
2114 	 * Process environmental segment data, if present,
2115 	 * in the FRU SEEPROM.
2116 	 */
2117 	process_fru_envseg();
2118 
2119 	/*
2120 	 * Process tuneable parameters
2121 	 */
2122 	process_env_conf_file();
2123 
2124 	/*
2125 	 * Setup temperature sensors and fail if we can't open
2126 	 * at least one sensor.
2127 	 */
2128 	if (envd_setup_sensors() <= 0) {
2129 		envd_close_pm();
2130 		return (NULL);
2131 	}
2132 
2133 	to = 3 * sensor_poll_interval + 1;
2134 	xwd = open(XCALWD_DEVFS, O_RDONLY);
2135 	if (xwd < 0) {
2136 		envd_log(LOG_CRIT, ENV_WATCHDOG_INIT_FAIL, errno,
2137 		    strerror(errno));
2138 	} else if (ioctl(xwd, XCALWD_STOPWATCHDOG) < 0 ||
2139 	    ioctl(xwd, XCALWD_STARTWATCHDOG, &to) < 0) {
2140 		envd_log(LOG_CRIT, ENV_WATCHDOG_INIT_FAIL, errno,
2141 		    strerror(errno));
2142 		(void) close(xwd);
2143 		xwd = -1;
2144 	}
2145 
2146 	/*
2147 	 * Setup fan device (don't fail even if we can't access
2148 	 * the fan as we can still monitor temeperature.
2149 	 */
2150 	(void) envd_setup_fans();
2151 
2152 	for (;;) {
2153 		(void) pthread_rwlock_rdlock(&envd_rwlock);
2154 
2155 		/*
2156 		 * If no "pmthr" thread, then we need to update the
2157 		 * current power level for all power managed deviecs
2158 		 * so that we can determine correct target temperature.
2159 		 */
2160 		if (pmthr_exists == B_FALSE)
2161 			(void) update_pmdev_power();
2162 
2163 		if (xwd >= 0)
2164 			(void) ioctl(xwd, XCALWD_KEEPALIVE);
2165 
2166 		if (!disable_piclenvd) {
2167 			/*
2168 			 * Monitor current temperature for all sensors
2169 			 * (current temperature is recorded in the "cur_temp"
2170 			 * field within each sensor data structure)
2171 			 */
2172 			monitor_sensors();
2173 
2174 			/*
2175 			 * Adjust CPU and system fan speed
2176 			 */
2177 			if (envd_cpu_fan.forced_speed < 0)
2178 				(void) adjust_fan_speed(&envd_cpu_fan, NULL);
2179 			if (envd_system_fan.forced_speed < 0)
2180 				(void) adjust_fan_speed(&envd_system_fan,
2181 					lpm_devices);
2182 
2183 			/*
2184 			 * Turn off power supply fan if in lowest power state.
2185 			 */
2186 			fan_speed = (cur_lpstate) ? pmfanp->speed_min :
2187 			    pmfanp->speed_max;
2188 
2189 			if (env_debug)
2190 				envd_log(LOG_INFO,
2191 				"fan: %-16s speed cur:%6.2f  new:%6.2f "
2192 				"low-power:%d\n", pmfanp->name,
2193 				    (float)pmfanp->cur_speed,
2194 				    (float)fan_speed, cur_lpstate);
2195 
2196 			if (fan_speed != (fanspeed_t)pmfanp->cur_speed &&
2197 			    set_fan_speed(pmfanp, fan_speed) == 0)
2198 				pmfanp->cur_speed = fan_speed;
2199 		}
2200 		(void) pthread_rwlock_unlock(&envd_rwlock);
2201 
2202 		/*
2203 		 * Wait for sensor_poll_interval seconds before polling
2204 		 * again. Note that we use our own envd_sleep() routine
2205 		 * as sleep() in POSIX thread library gets affected by
2206 		 * the wall clock time being set back.
2207 		 */
2208 		(void) envd_sleep(sensor_poll_interval);
2209 	}
2210 	/*NOTREACHED*/
2211 	return (NULL);
2212 }
2213 
2214 /*
2215  * This is the power management thread, which monitors all power state
2216  * change events and wakes up the "envthr" thread when the system enters
2217  * or exits the lowest power state.
2218  */
2219 /*ARGSUSED*/
2220 static void *
pmthr(void * args)2221 pmthr(void *args)
2222 {
2223 	pm_state_change_t	pmstate;
2224 	char			physpath[PATH_MAX];
2225 
2226 	pmstate.physpath = physpath;
2227 	pmstate.size = sizeof (physpath);
2228 	cur_lpstate = 0;
2229 
2230 	for (;;) {
2231 		/*
2232 		 * Get PM state change events to check if the system
2233 		 * is in lowest power state and wake up the "envthr"
2234 		 * thread when the power state changes.
2235 		 *
2236 		 * To minimize polling, we use the blocking interface
2237 		 * to get the power state change event here.
2238 		 */
2239 		if (ioctl(pm_fd, PM_GET_STATE_CHANGE_WAIT, &pmstate) != 0) {
2240 			if (errno != EINTR)
2241 				break;
2242 			continue;
2243 		}
2244 
2245 		/*
2246 		 * Extract the lowest power state from the last queued
2247 		 * state change events. We pick up queued state change
2248 		 * events using the non-blocking interface and wake up
2249 		 * the "envthr" thread only after consuming all the
2250 		 * state change events queued at that time.
2251 		 */
2252 		do {
2253 			if (env_debug > 1)  {
2254 				envd_log(LOG_INFO,
2255 				"pmstate event:0x%x flags:%x comp:%d "
2256 				"oldval:%d newval:%d path:%s\n",
2257 				    pmstate.event, pmstate.flags,
2258 				    pmstate.component, pmstate.old_level,
2259 				    pmstate.new_level, pmstate.physpath);
2260 			}
2261 			cur_lpstate =
2262 			    (pmstate.flags & PSC_ALL_LOWEST) ? 1 : 0;
2263 		} while (ioctl(pm_fd, PM_GET_STATE_CHANGE, &pmstate) == 0);
2264 
2265 		/*
2266 		 * Update current PM state for the components we are
2267 		 * tracking. In case of CPU devices, PM state change
2268 		 * event can be generated even before the state change
2269 		 * takes effect, hence we need to get the current state
2270 		 * for all CPU devices every time and recalculate the
2271 		 * target temperature. We do this once after consuming
2272 		 * all the queued events.
2273 		 */
2274 
2275 		(void) pthread_rwlock_rdlock(&envd_rwlock);
2276 		(void) update_pmdev_power();
2277 		(void) pthread_rwlock_unlock(&envd_rwlock);
2278 	}
2279 
2280 	/*
2281 	 * We won't be able to monitor lowest power state any longer,
2282 	 * hence reset it.
2283 	 */
2284 	cur_lpstate = 0;
2285 	envd_log(LOG_ERR, PM_THREAD_EXITING, errno, strerror(errno));
2286 	pmthr_exists = B_FALSE;
2287 	return (NULL);
2288 }
2289 
2290 
2291 /*
2292  * Process sensor threshold related tuneables
2293  */
2294 static int
process_threshold_tuneable(char * keyword,char * buf,void * dummy_thresh_addr,int flags,char * fname,int line)2295 process_threshold_tuneable(char *keyword, char *buf, void *dummy_thresh_addr,
2296     int flags, char *fname, int line)
2297 {
2298 	int		retval = 0;
2299 	long		val;
2300 	void		*addr;
2301 	char		*endp, *sname;
2302 	env_sensor_t	*sensorp;
2303 
2304 	/*
2305 	 * Tuneable entry can be in one of the following formats:
2306 	 *
2307 	 *	threshold-keyword <int-value>
2308 	 *	threshold-keyword <int-value> <sensor-name> ...
2309 	 *
2310 	 * Convert threshold value into integer value and check for
2311 	 * optional sensor name. If no sensor name is specified, then
2312 	 * the tuneable applies to all sensors specified by the "flags".
2313 	 * Otherwise, it is applicable to the specified sensors.
2314 	 *
2315 	 * Note that the dummy_thresh_addr is the address of the threshold
2316 	 * to be changed and is converted into offset by subtracting the
2317 	 * base dummy_thresh address. This offset is added to the base
2318 	 * address of the threshold structure to be update to determine
2319 	 * the final memory address to be modified.
2320 	 */
2321 
2322 	errno = 0;
2323 	val = strtol(buf, &endp, 0);
2324 	sname = strtok(endp, tokdel);
2325 
2326 	if (errno != 0 || val != (tempr_t)val) {
2327 		retval = -1;
2328 		envd_log(LOG_INFO, ENV_CONF_INT_EXPECTED, fname, line, keyword);
2329 	} else if (flags == 0 && sname == NULL) {
2330 		envd_log(LOG_INFO, "SUNW_piclenvd: file:%s line:%d SKIPPED"
2331 		    " as no sensor specified.\n", fname, line, keyword);
2332 		retval = -1;
2333 	} else if (sname == NULL) {
2334 		int	cnt = 0;
2335 
2336 		for (sensorp = &envd_sensors[0]; sensorp->name; sensorp++) {
2337 			if (sensorp->temp_thresh == NULL ||
2338 			    (sensorp->flags & flags) == 0)
2339 				continue;
2340 
2341 			/*
2342 			 * Convert dummy_thresh_addr into memory address
2343 			 * for this sensor threshold values.
2344 			 */
2345 			addr = (char *)sensorp->temp_thresh +
2346 			    (int)((char *)dummy_thresh_addr -
2347 			    (char *)&dummy_thresh);
2348 
2349 			*(tempr_t *)addr = (tempr_t)val;
2350 			cnt++;
2351 			if (env_debug)
2352 				envd_log(LOG_INFO, "SUNW_piclenvd: file:%s "
2353 				"line:%d %s = %d for sensor: '%s'\n",
2354 				    fname, line, keyword, val, sensorp->name);
2355 		}
2356 		if (cnt == 0)
2357 			envd_log(LOG_INFO, "SUNW_piclenvd: file:%s line:%d "
2358 			"%s SKIPPED as no matching sensor found.\n",
2359 			    fname, line, keyword);
2360 	} else {
2361 		/* apply threshold value to the specified sensors */
2362 		do {
2363 			sensorp = sensor_lookup(sname);
2364 			if (sensorp == NULL || sensorp->temp_thresh == NULL ||
2365 			    (flags && (sensorp->flags & flags) == 0)) {
2366 				envd_log(LOG_INFO,
2367 				"SUNW_piclenvd: file:%s line:%d %s SKIPPED"
2368 				" for '%s' as not a valid sensor.\n",
2369 				    fname, line, keyword, sname);
2370 				continue;
2371 			}
2372 			/*
2373 			 * Convert dummy_thresh_addr into memory address
2374 			 * for this sensor threshold values.
2375 			 */
2376 			addr = (char *)sensorp->temp_thresh +
2377 			    (int)((char *)dummy_thresh_addr -
2378 			    (char *)&dummy_thresh);
2379 
2380 			*(tempr_t *)addr = (tempr_t)val;
2381 			if (env_debug)
2382 				envd_log(LOG_INFO, "SUNW_piclenvd: file:%s "
2383 				"line:%d %s = %d for sensor: '%s'\n",
2384 				    fname, line, keyword, val, sensorp->name);
2385 		} while ((sname = strtok(NULL, tokdel)) != NULL);
2386 	}
2387 	return (retval);
2388 }
2389 
2390 
2391 /*
2392  * Process integer tuneables
2393  */
2394 static int
process_int_tuneable(char * keyword,char * buf,void * addr,int size,char * fname,int line)2395 process_int_tuneable(char *keyword, char *buf, void *addr, int size,
2396     char *fname, int line)
2397 {
2398 	int	retval = 0;
2399 	char	*endp;
2400 	long	val;
2401 
2402 	/*
2403 	 * Convert input into integer value and ensure that there is
2404 	 * no other token in the buffer.
2405 	 */
2406 	errno = 0;
2407 	val = strtol(buf, &endp, 0);
2408 	if (errno != 0 || strtok(endp, tokdel) != NULL)
2409 		retval = -1;
2410 	else {
2411 		switch (size) {
2412 		case 1:
2413 			if (val != (int8_t)val)
2414 				retval = -1;
2415 			else
2416 				*(int8_t *)addr = (int8_t)val;
2417 			break;
2418 		case 2:
2419 			if (val != (short)val)
2420 				retval = -1;
2421 			else
2422 				*(short *)addr = (short)val;
2423 			break;
2424 		case 4:
2425 			*(int *)addr = (int)val;
2426 			break;
2427 		default:
2428 			retval = -1;
2429 		}
2430 	}
2431 
2432 	if (retval == -1)
2433 		envd_log(LOG_INFO, ENV_CONF_INT_EXPECTED,
2434 		    fname, line, keyword);
2435 	else if (env_debug)
2436 		envd_log(LOG_INFO, "SUNW_piclenvd: file:%s line:%d %s = %d\n",
2437 		    fname, line, keyword, val);
2438 
2439 	return (retval);
2440 }
2441 
2442 
2443 /*
2444  * Process string tuneables
2445  *
2446  * String value must be within double quotes.  Skip over initial white
2447  * spaces before looking for string value.
2448  */
2449 static int
process_string_tuneable(char * keyword,char * buf,void * addr,int size,char * fname,int line)2450 process_string_tuneable(char *keyword, char *buf, void *addr, int size,
2451     char *fname, int line)
2452 {
2453 	int	retval = 0;
2454 	char	c, *p, *strend;
2455 
2456 	/* Skip over white spaces */
2457 	buf += strspn(buf, tokdel);
2458 
2459 	/*
2460 	 * Parse srting and locate string end (handling escaped double quotes
2461 	 * and other characters)
2462 	 */
2463 	if (buf[0] != '"')
2464 		strend = NULL;
2465 	else {
2466 		for (p = buf+1; (c = *p) != '\0'; p++)
2467 			if (c == '"' || (c == '\\' && *++p == '\0'))
2468 				break;
2469 		strend = (*p == '"') ? p : NULL;
2470 	}
2471 
2472 	if (strend == NULL || (strend-buf) > size ||
2473 	    strtok(strend+1, tokdel) != NULL) {
2474 		envd_log(LOG_WARNING, ENV_CONF_STRING_EXPECTED,
2475 		    fname, line, keyword, size);
2476 		retval = -1;
2477 	} else {
2478 		*strend = '\0';
2479 		(void) strcpy(addr, (caddr_t)buf+1);
2480 		if (env_debug)
2481 			envd_log(LOG_INFO, "SUNW_piclenvd: file:%s line:%d "
2482 			    "%s = \"%s\"\n", fname, line, keyword, buf+1);
2483 	}
2484 
2485 	return (retval);
2486 }
2487 
2488 
2489 /*
2490  * Process configuration file
2491  */
2492 static void
process_env_conf_file(void)2493 process_env_conf_file(void)
2494 {
2495 	int		line, len, toklen;
2496 	char		buf[BUFSIZ];
2497 	FILE		*fp;
2498 	env_tuneable_t	*tunep;
2499 	char		nmbuf[SYS_NMLN];
2500 	char		fname[PATH_MAX];
2501 	char		*tok, *valuep;
2502 	int		skip_line = 0;
2503 
2504 	if (sysinfo(SI_PLATFORM, nmbuf, sizeof (nmbuf)) == -1)
2505 		return;
2506 
2507 	(void) snprintf(fname, sizeof (fname), PICLD_PLAT_PLUGIN_DIRF, nmbuf);
2508 	(void) strlcat(fname, ENV_CONF_FILE, sizeof (fname));
2509 	fp = fopen(fname, "r");
2510 	if (fp == NULL)
2511 		return;
2512 
2513 	/*
2514 	 * Blank lines or lines starting with "#" or "*" in the first
2515 	 * column are ignored. All other lines are assumed to contain
2516 	 * input in the following format:
2517 	 *
2518 	 *	keyword value
2519 	 *
2520 	 * where the "value" can be a signed integer or string (in
2521 	 * double quotes) depending upon the keyword.
2522 	 */
2523 
2524 	for (line = 1; fgets(buf, sizeof (buf), fp) != NULL; line++) {
2525 		len = strlen(buf);
2526 		if (len <= 0)
2527 			continue;
2528 
2529 		/* skip long lines */
2530 		if (buf[len-1] != '\n') {
2531 			skip_line = 1;
2532 			continue;
2533 		} else if (skip_line) {
2534 			skip_line = 0;
2535 			continue;
2536 		} else
2537 			buf[len-1] = '\0';
2538 
2539 		/* skip comments */
2540 		if (buf[0] == '*' || buf[0] == '#')
2541 			continue;
2542 
2543 		/*
2544 		 * Skip over white space to get the keyword
2545 		 */
2546 		tok = buf + strspn(buf, tokdel);
2547 		if (*tok == '\0')
2548 			continue;			/* blank line */
2549 
2550 		toklen = strcspn(tok, tokdel);
2551 		tok[toklen] = '\0';
2552 
2553 		/* Get possible location for value (within current line) */
2554 		valuep = tok + toklen + 1;
2555 		if (valuep > buf+len)
2556 			valuep = buf + len;
2557 
2558 		/*
2559 		 * Lookup the keyword and process value accordingly
2560 		 */
2561 		for (tunep = &env_tuneables[0]; tunep->name != NULL; tunep++) {
2562 			if (strcasecmp(tunep->name, tok) == 0) {
2563 				(void) (*tunep->func)(tok, valuep,
2564 				    tunep->arg1, tunep->arg2, fname, line);
2565 				break;
2566 			}
2567 		}
2568 
2569 		if (tunep->name == NULL)
2570 			envd_log(LOG_INFO, ENV_CONF_UNSUPPORTED_KEYWORD,
2571 			    fname, line, tok);
2572 	}
2573 	(void) fclose(fp);
2574 }
2575 
2576 /*
2577  * Setup envrionmental monitor state and start threads to monitor
2578  * temperature and power management state.
2579  * Returns -1 on error, 0 if successful.
2580  */
2581 
2582 static int
envd_setup(void)2583 envd_setup(void)
2584 {
2585 	char		*valp, *endp;
2586 	int		val;
2587 	int		err;
2588 
2589 	if (pthread_attr_init(&thr_attr) != 0 ||
2590 	    pthread_attr_setscope(&thr_attr, PTHREAD_SCOPE_SYSTEM) != 0)
2591 		return (-1);
2592 
2593 	if (pm_fd == -1)
2594 		envd_open_pm();
2595 
2596 	/*
2597 	 * Setup lpm devices
2598 	 */
2599 	lpm_devices = NULL;
2600 	if ((err = setup_lpm_devices(&lpm_devices)) != PICL_SUCCESS) {
2601 		if (env_debug)
2602 			envd_log(LOG_ERR, "setup_lpm_devices failed err = %d\n",
2603 				err);
2604 	}
2605 
2606 	/*
2607 	 * Initialize global state to initial startup values
2608 	 */
2609 	sensor_poll_interval = SENSOR_POLL_INTERVAL;
2610 	fan_slow_adjustment = FAN_SLOW_ADJUSTMENT;
2611 	fan_incr_limit = FAN_INCREMENT_LIMIT;
2612 	fan_decr_limit = FAN_DECREMENT_LIMIT;
2613 	warning_interval = WARNING_INTERVAL;
2614 	warning_duration = WARNING_DURATION;
2615 	shutdown_interval = SHUTDOWN_INTERVAL;
2616 	disable_piclenvd = 0;
2617 	disable_power_off = 0;
2618 	disable_shutdown = 0;
2619 	disable_warning = 0;
2620 
2621 	(void) strlcpy(shutdown_cmd, SHUTDOWN_CMD, sizeof (shutdown_cmd));
2622 	(void) strlcpy(devfsadm_cmd, DEVFSADM_CMD, sizeof (devfsadm_cmd));
2623 	(void) strlcpy(fru_devfsadm_cmd, FRU_DEVFSADM_CMD,
2624 	    sizeof (fru_devfsadm_cmd));
2625 	envd_cpu_fan.forced_speed = -1;
2626 	envd_system_fan.forced_speed = -1;
2627 
2628 	(void) memcpy(&cpu0_die_thresh, &cpu_die_thresh_default,
2629 	    sizeof (cpu_die_thresh_default));
2630 	(void) memcpy(&cpu0_amb_thresh, &cpu_amb_thresh_default,
2631 	    sizeof (cpu_amb_thresh_default));
2632 	(void) memcpy(&cpu1_die_thresh, &cpu_die_thresh_default,
2633 	    sizeof (cpu_die_thresh_default));
2634 	(void) memcpy(&cpu1_amb_thresh, &cpu_amb_thresh_default,
2635 	    sizeof (cpu_amb_thresh_default));
2636 
2637 	if ((valp = getenv("SUNW_piclenvd_debug")) != NULL) {
2638 		val = strtol(valp, &endp, 0);
2639 		if (strtok(endp, tokdel) == NULL)
2640 			env_debug = val;
2641 	}
2642 
2643 	/*
2644 	 * Create a thread to monitor temperature and control fan
2645 	 * speed.
2646 	 */
2647 	if (envthr_created == B_FALSE && pthread_create(&envthr_tid,
2648 	    &thr_attr, envthr, (void *)NULL) != 0) {
2649 		envd_close_fans();
2650 		envd_close_sensors();
2651 		envd_close_pm();
2652 		envd_log(LOG_CRIT, ENV_THREAD_CREATE_FAILED);
2653 		return (-1);
2654 	}
2655 	envthr_created = B_TRUE;
2656 
2657 	/*
2658 	 * Create a thread to monitor PM state
2659 	 */
2660 	if (pmthr_exists == B_FALSE) {
2661 		if (pm_fd == -1 || pthread_create(&pmthr_tid, &thr_attr,
2662 		    pmthr, (void *)NULL) != 0) {
2663 			envd_log(LOG_CRIT, PM_THREAD_CREATE_FAILED);
2664 		} else
2665 			pmthr_exists = B_TRUE;
2666 	}
2667 	return (0);
2668 }
2669 
2670 /*
2671  * Callback function used by ptree_walk_tree_by_class for the cpu class
2672  */
2673 static int
cb_cpu(picl_nodehdl_t nodeh,void * args)2674 cb_cpu(picl_nodehdl_t nodeh, void *args)
2675 {
2676 	sensor_pmdev_t		*pmdevp;
2677 	int			err;
2678 	ptree_propinfo_t	pinfo;
2679 	picl_prophdl_t		proph;
2680 	size_t			psize;
2681 	int			id;
2682 
2683 	/* Get CPU's ID, it is an int */
2684 	err = ptree_get_propval_by_name(nodeh, PICL_PROP_ID, &id, sizeof (int));
2685 	if (err != PICL_SUCCESS)
2686 		return (PICL_WALK_CONTINUE);
2687 
2688 	/* Get the pmdevp for the CPU */
2689 	pmdevp = sensor_pmdevs;
2690 	while (pmdevp->sensor_id != -1) {
2691 		if (id == pmdevp->sensor_id)
2692 			break;
2693 		pmdevp++;
2694 	}
2695 
2696 	/* Return if didn't find the pmdevp for the cpu id */
2697 	if (pmdevp->sensor_id == -1)
2698 		return (PICL_WALK_CONTINUE);
2699 
2700 	/* Get the devfs-path property */
2701 	err = ptree_get_prop_by_name(nodeh, PICL_PROP_DEVFS_PATH, &proph);
2702 	if (err != PICL_SUCCESS)
2703 		return (PICL_WALK_CONTINUE);
2704 
2705 	err = ptree_get_propinfo(proph, &pinfo);
2706 	if ((err != PICL_SUCCESS) ||
2707 	    (pinfo.piclinfo.type != PICL_PTYPE_CHARSTRING))
2708 		return (PICL_WALK_CONTINUE);
2709 
2710 	psize = pinfo.piclinfo.size;
2711 	pmdevp->pmdev_name = malloc(psize);
2712 	if (pmdevp->pmdev_name == NULL)
2713 		return (PICL_WALK_CONTINUE);
2714 
2715 	err = ptree_get_propval(proph, pmdevp->pmdev_name, psize);
2716 	if (err != PICL_SUCCESS)
2717 		return (PICL_WALK_CONTINUE);
2718 
2719 	return (PICL_WALK_CONTINUE);
2720 }
2721 
2722 /*
2723  * Find the CPU's in the picl tree, set the devfs-path for pmdev_name
2724  */
2725 static void
setup_pmdev_names()2726 setup_pmdev_names()
2727 {
2728 	picl_nodehdl_t	plath;
2729 	int		err;
2730 
2731 	err = ptree_get_node_by_path(PLATFORM_PATH, &plath);
2732 	if (err != PICL_SUCCESS)
2733 		return;
2734 
2735 	err = ptree_walk_tree_by_class(plath, PICL_CLASS_CPU, NULL, cb_cpu);
2736 }
2737 
2738 
2739 static void
piclenvd_register(void)2740 piclenvd_register(void)
2741 {
2742 	picld_plugin_register(&my_reg_info);
2743 }
2744 
2745 static void
piclenvd_init(void)2746 piclenvd_init(void)
2747 {
2748 	/*
2749 	 * Setup the names for the pm sensors, we do it just the first time
2750 	 */
2751 	if (pmdev_names_init == B_FALSE) {
2752 		(void) setup_pmdev_names();
2753 		pmdev_names_init = B_TRUE;
2754 	}
2755 
2756 	/*
2757 	 * Start environmental monitor/threads
2758 	 */
2759 	(void) pthread_rwlock_wrlock(&envd_rwlock);
2760 	if (envd_setup() != 0) {
2761 		(void) pthread_rwlock_unlock(&envd_rwlock);
2762 		envd_log(LOG_CRIT, ENVD_PLUGIN_INIT_FAILED);
2763 		return;
2764 	}
2765 	(void) pthread_rwlock_unlock(&envd_rwlock);
2766 
2767 	/*
2768 	 * Now setup/populate PICL tree
2769 	 */
2770 	env_picl_setup();
2771 }
2772 
2773 static void
piclenvd_fini(void)2774 piclenvd_fini(void)
2775 {
2776 	/*
2777 	 * Delete the lpm device list. After this the lpm information
2778 	 * will not be used in determining the fan speed, till the lpm
2779 	 * device information is initialized by setup_lpm_devices called
2780 	 * by envd_setup.
2781 	 */
2782 	delete_lpm_devices();
2783 
2784 	/*
2785 	 * Invoke env_picl_destroy() to remove any PICL nodes/properties
2786 	 * (including volatile properties) we created. Once this call
2787 	 * returns, there can't be any more calls from the PICL framework
2788 	 * to get current temperature or fan speed.
2789 	 */
2790 	env_picl_destroy();
2791 
2792 	/*
2793 	 * Since this is a critical plug-in, we know that it won't be
2794 	 * unloaded and will be reinited again unless picld process is
2795 	 * going away. Therefore, it's okay to let "envthr" and "pmthr"
2796 	 * continue so that we can monitor the environment during SIGHUP
2797 	 * handling also.
2798 	 */
2799 }
2800 
2801 /*VARARGS2*/
2802 void
envd_log(int pri,const char * fmt,...)2803 envd_log(int pri, const char *fmt, ...)
2804 {
2805 	va_list	ap;
2806 
2807 	va_start(ap, fmt);
2808 	vsyslog(pri, fmt, ap);
2809 	va_end(ap);
2810 }
2811 
2812 #ifdef __lint
2813 /*
2814  * Redefine sigwait to posix style external declaration so that LINT
2815  * does not check against libc version of sigwait() and complain as
2816  * it uses different number of arguments.
2817  */
2818 #define	sigwait	my_posix_sigwait
2819 extern int my_posix_sigwait(const sigset_t *set, int *sig);
2820 #endif
2821 
2822 /*
2823  * sleep() in libpthread gets affected by time being set back, hence
2824  * can cause the "envthr" not to wakeup for extended duration. For
2825  * now, we implement our own sleep() routine below using alarm().
2826  * This will work only if SIGALRM is masked off in all other threads.
2827  * Note that SIGALRM signal is masked off in the main thread, hence
2828  * in all threads, including the envthr, the one calling this routine.
2829  *
2830  * Note that SIGALRM and alarm() can't be used by any other thread
2831  * in this manner.
2832  */
2833 
2834 static unsigned int
envd_sleep(unsigned int sleep_tm)2835 envd_sleep(unsigned int sleep_tm)
2836 {
2837 	int  		sig;
2838 	unsigned int	unslept;
2839 	sigset_t	alrm_mask;
2840 
2841 	if (sleep_tm == 0)
2842 		return (0);
2843 
2844 	(void) sigemptyset(&alrm_mask);
2845 	(void) sigaddset(&alrm_mask, SIGALRM);
2846 
2847 	(void) alarm(sleep_tm);
2848 	(void) sigwait(&alrm_mask, &sig);
2849 
2850 	unslept = alarm(0);
2851 	return (unslept);
2852 }
2853