xref: /titanic_44/usr/src/uts/sun4u/lw8/io/sgenv.c (revision 683b29499b14fddf042df3e4ecb71a1d5bebe3a8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 
28 /*
29  * Serengeti Environmental Information driver (sgenv)
30  *
31  * This driver requests the environmental properties from the SC. These
32  * request-response transactions are transferred through the SBBC mailbox,
33  * between the Domain and the SC.
34  *
35  * All sensors have the same sort of properties: Low and high limits, warning
36  * thresholds, last measured value, time of measurement, units (e.g., degrees
37  * Celsius, volts, etc.), and so on.
38  *
39  * Each sensor is named by a unique Tag. The Tag identifies the geographical
40  * location of the sensor in the Serengeti, and what it is the sensor measures.
41  *
42  * Requestable sensor properties are broken into two types:  Those which are
43  * quasi-constant (infrequently change) - e.g., tolerance-defining low and high
44  * limits; and those which are volatile (typically change) - e.g., the current
45  * measurement.
46  *
47  * Unfortunately, property sets are too large to comprise a single mailbox
48  * message, so the sets are further subdivided into notionally arbitrary
49  * collections. NOTE: The SC-mailbox framework now supports fragmented messages
50  * which could allow us to request the data in larger chunks in the future.
51  *
52  * Each collection is fetched by a separate transaction.
53  *
54  * Firstly there is a transaction to obtain a list of all collections. Each non-
55  * zero key in this list is associated whith one of the collections of sensors.
56  * (This sparse list of keys is then used as an index to obtain all the sensor
57  * data for each collection).
58  *
59  * For each collection, there is one request-reply transaction to obtain a list
60  * of all sensors in that collection and the limits that apply to each; and a
61  * separate request-reply transaction to obtain the measurements from the
62  * sensors in the collection.
63  *
64  * The sgenv driver assembles each property set from the constituent
65  * collections, and caches the assembled property sets into the appropriate
66  * cache (env_cache, board_cache). The caches are created at startup and are
67  * updated on receipt of events from the SC. These events (which include DR
68  * events and ENV events) notify sgenv of configuration changes and
69  * environmental state changes (such as a sensor state change, Fan speed
70  * change).
71  *
72  * The SC-APP maintains a pseudo-sensor in each collection "measuring" changes
73  * to the quasi-constants in that collection. By monitoring these pseudo-sensor
74  * measurements, the kstat driver avoids redundant or speculative re-fetches of
75  * the quasi-constant properties.
76  */
77 
78 #include <sys/time.h>
79 #include <sys/errno.h>
80 #include <sys/kmem.h>
81 #include <sys/stat.h>
82 #include <sys/cmn_err.h>
83 #include <sys/disp.h>
84 
85 #include <sys/conf.h>
86 #include <sys/modctl.h>
87 #include <sys/devops.h>
88 #include <sys/ddi.h>
89 #include <sys/sunddi.h>
90 
91 #include <sys/sgevents.h>
92 #include <sys/sysevent.h>
93 #include <sys/sysevent/eventdefs.h>
94 #include <sys/sysevent/domain.h>
95 #include <sys/sysevent/env.h>
96 
97 #include <sys/serengeti.h>
98 #include <sys/sgfrutypes.h>
99 
100 #include <sys/sgsbbc.h>
101 #include <sys/sgsbbc_iosram.h>
102 #include <sys/sgsbbc_mailbox.h>
103 
104 #include <sys/sbd_ioctl.h>	/* sbd header files needed for board support */
105 #include <sys/sbdp_priv.h>
106 #include <sys/sbd.h>
107 
108 #include <sys/sgenv_impl.h>
109 
110 
111 /*
112  * Global Variables - can be patched from Solaris
113  * ==============================================
114  */
115 
116 /*
117  * the maximum amount of time this driver is prepared to wait for the mailbox
118  * to reply before it decides to timeout. The value is initially set in the
119  * _init() routine to the global Serengeti variable <sbbc_mbox_default_timeout>
120  * but could be tuned specifically for SGENV after booting up the system.
121  */
122 int	sgenv_max_mbox_wait_time = 0;
123 
124 #ifdef DEBUG
125 /*
126  * This variable controls the level of debug output
127  */
128 uint_t		sgenv_debug = SGENV_DEBUG_NONE;
129 #endif
130 
131 
132 /*
133  * Module Variables
134  * ================
135  */
136 
137 /*
138  * Driver entry points
139  */
140 static struct cb_ops sgenv_cb_ops = {
141 	nodev,		/* open() */
142 	nodev,		/* close() */
143 	nodev,		/* strategy() */
144 	nodev,		/* print() */
145 	nodev,		/* dump() */
146 	nodev,		/* read() */
147 	nodev,		/* write() */
148 	nodev,		/* ioctl() */
149 	nodev,		/* devmap() */
150 	nodev,		/* mmap() */
151 	ddi_segmap,	/* segmap() */
152 	nochpoll,	/* poll() */
153 	ddi_prop_op,    /* prop_op() */
154 	NULL,		/* cb_str */
155 	D_NEW | D_MP	/* cb_flag */
156 };
157 
158 
159 static struct dev_ops sgenv_ops = {
160 	DEVO_REV,
161 	0,			/* ref count */
162 	ddi_getinfo_1to1,	/* getinfo() */
163 	nulldev,		/* identify() */
164 	nulldev,		/* probe() */
165 	sgenv_attach,		/* attach() */
166 	sgenv_detach,		/* detach */
167 	nodev,			/* reset */
168 	&sgenv_cb_ops,		/* pointer to cb_ops structure */
169 	(struct bus_ops *)NULL,
170 	nulldev,		/* power() */
171 	ddi_quiesce_not_needed,		/* quiesce() */
172 };
173 
174 /*
175  * Loadable module support.
176  */
177 extern struct mod_ops mod_driverops;
178 
179 static struct modldrv modldrv = {
180 	&mod_driverops,			/* Type of module. This is a driver */
181 	"Environmental Driver",		/* Name of the module */
182 	&sgenv_ops			/* pointer to the dev_ops structure */
183 };
184 
185 static struct modlinkage modlinkage = {
186 	MODREV_1,
187 	&modldrv,
188 	NULL
189 };
190 
191 /* Opaque state structure pointer */
192 static void		*sgenv_statep;
193 
194 /*
195  * <env_cache> is a cache of all the sensor readings which is persistent
196  * between kstat reads. It is created at init and gets updated upon receipt
197  * of events from the SC.
198  *
199  * The kstat_update function takes a copy of the non-zero entries in this
200  * cache and creates a temp buffer called env_cache_snapshot. The
201  * kstat_snapshot function then bcopies the env_cache_snapshot into the
202  * kstat buffer. This is done because there is no way to ensure that the
203  * env_cache won't change between the kstat_update and the kstat_snapshot
204  * which will cause problems as the update sets the ks_data_size.
205  */
206 static env_sensor_t	*env_cache[SGENV_MAX_HPU_KEYS] = {NULL};
207 static void		*env_cache_snapshot = NULL;
208 static size_t		env_cache_snapshot_size = 0;
209 
210 /*
211  * This is set to TRUE the first time env data is stored in the cache
212  * so that at least from then on, old data can be returned if a call to
213  * the mailbox fails.
214  */
215 static int		env_cache_updated = FALSE;
216 
217 /*
218  * This lock is needed by the variable-sized kstat which returns
219  * environmental info. It prevents data-size races with kstat clients.
220  */
221 static kmutex_t		env_kstat_lock;
222 
223 /*
224  * The <env_cache> can be accessed asynchronously by the polling function
225  * and the kstat_read framework. This mutex ensures that access to the data
226  * is controlled correctly.
227  */
228 static kmutex_t		env_cache_lock;
229 
230 /*
231  * We need to store the last time we asked the SC for environmental information
232  * so that we do not send too many requests in a short period of time.
233  */
234 static hrtime_t		last_env_read_time = 0;
235 
236 /*
237  * Variables to coordinate between the handlers which are triggered when
238  * the env cache needs to be updated and the thread which does the work.
239  */
240 static volatile int	env_thread_run = 0;
241 static kthread_t	*env_thread = NULL;
242 static kt_did_t		env_thread_tid;
243 
244 static kcondvar_t	env_flag_cond;
245 static kmutex_t		env_flag_lock;
246 static boolean_t	env_cache_updating = B_FALSE;
247 static boolean_t	env_cache_update_needed = B_TRUE;
248 
249 /*
250  * <board_cache> is a cache of all the board status info and it is persistent
251  * between kstat reads.
252  *
253  * The kstat_update function takes a copy of the non-zero entries in this
254  * cache and copies them into the board_cache_snapshot buffer. The
255  * kstat_snapshot function then bcopies the board_cache_snapshot into the
256  * kstat buffer. This is done because there is no way to ensure that the
257  * board_cache won't change between the kstat_update and the kstat_snapshot
258  * which will cause problems as the update sets the ks_data_size.
259  */
260 static sg_board_info_t	board_cache[SG_MAX_BDS] = {NULL};
261 static sg_board_info_t	board_cache_snapshot[SG_MAX_BDS] = {NULL};
262 static int		board_cache_updated = FALSE;
263 
264 /*
265  * This mutex ensures the <board_cache> is not destroyed while the board data
266  * is being collected.
267  */
268 static kmutex_t		board_cache_lock;
269 
270 /*
271  * This lock is needed by the variable-sized kstat which returns
272  * board status info. It prevents data-size races with kstat clients.
273  */
274 static kmutex_t		board_kstat_lock;
275 
276 /*
277  * This is a count of the number of board readings were stored by
278  * the kstat_update routine - this is needed by the kstat_snapshot routine.
279  */
280 static int		board_count = 0;
281 static int		board_count_snapshot = 0;
282 
283 /*
284  * We need to store the last time we asked the SC for board information
285  * so that we do not send too many requests in a short period of time.
286  */
287 static hrtime_t		last_board_read_time = 0;
288 
289 /*
290  * Variables to coordinate between the handlers which are triggered when
291  * the board cache needs to be updated and the thread which does the work.
292  */
293 static volatile int	board_thread_run = 0;
294 static kthread_t	*board_thread = NULL;
295 static kt_did_t		board_thread_tid;
296 static kcondvar_t	board_flag_cond;
297 
298 static kmutex_t		board_flag_lock;
299 static boolean_t	board_cache_updating = B_FALSE;
300 static boolean_t	board_cache_update_needed = B_TRUE;
301 
302 /*
303  * Used to keep track of the number of sensors associated with each key.
304  * The sum of all the values in this array is used to set ks_data_size.
305  */
306 static int		vol_sensor_count[SGENV_MAX_HPU_KEYS] = {0};
307 
308 /*
309  * This variable keeps a count of the number of errors that have occurred
310  * when we make calls to the mailbox for Env or Board data.
311  */
312 static int		sgenv_mbox_error_count = 0;
313 
314 /*
315  * mutex which protects the keyswitch interrupt handler.
316  */
317 static kmutex_t		keysw_hdlr_lock;
318 
319 /*
320  * mutex which protects the env interrupt handler.
321  */
322 static kmutex_t		env_hdlr_lock;
323 
324 /*
325  * mutex which protects the DR handler interrupt handler.
326  */
327 static kmutex_t		dr_hdlr_lock;
328 
329 /*
330  * Payloads of the event handlers.
331  */
332 static sg_event_key_position_t	keysw_payload;
333 static sbbc_msg_t		keysw_payload_msg;
334 
335 static sg_event_env_changed_t	env_payload;
336 static sbbc_msg_t		env_payload_msg;
337 
338 static sg_event_fan_status_t	fan_payload;
339 static sbbc_msg_t		fan_payload_msg;
340 
341 static sg_system_fru_descriptor_t	dr_payload;
342 static sbbc_msg_t			dr_payload_msg;
343 
344 /*
345  * The following 3 arrays list all possible HPUs, Parts and Device types
346  */
347 
348 /*
349  * ensure that all possible HPUs exported, as described in the main comment
350  * in <sys/sensor_tag.h>, are accounted for here.
351  */
352 static const hpu_value_t hpus[] = {
353 	HPU_ENTRY(SG_HPU_TYPE_UNKNOWN),
354 	HPU_ENTRY(SG_HPU_TYPE_CPU_BOARD),
355 	HPU_ENTRY(SG_HPU_TYPE_PCI_IO_BOARD),
356 	HPU_ENTRY(SG_HPU_TYPE_CPCI_IO_BOARD),
357 	HPU_ENTRY(SG_HPU_TYPE_SP_CPCI_IO_BOARD),
358 	HPU_ENTRY(SG_HPU_TYPE_REPEATER_BOARD),
359 	HPU_ENTRY(SG_HPU_TYPE_L2_REPEATER_BOARD),
360 	HPU_ENTRY(SG_HPU_TYPE_SYSTEM_CONTROLLER_BOARD),
361 	HPU_ENTRY(SG_HPU_TYPE_SP_SYSTEM_CONTROLLER_BOARD),
362 	HPU_ENTRY(SG_HPU_TYPE_A123_POWER_SUPPLY),
363 	HPU_ENTRY(SG_HPU_TYPE_A138_POWER_SUPPLY),
364 	HPU_ENTRY(SG_HPU_TYPE_A145_POWER_SUPPLY),
365 	HPU_ENTRY(SG_HPU_TYPE_A152_POWER_SUPPLY),
366 	HPU_ENTRY(SG_HPU_TYPE_A153_POWER_SUPPLY),
367 	HPU_ENTRY(SG_HPU_TYPE_RACK_FAN_TRAY),
368 	HPU_ENTRY(SG_HPU_TYPE_SP_FAN_TRAY),
369 	HPU_ENTRY(SG_HPU_TYPE_MD_TOP_IO_FAN_TRAY),
370 	HPU_ENTRY(SG_HPU_TYPE_MD_BOTTOM_IO_FAN_TRAY),
371 	HPU_ENTRY(SG_HPU_TYPE_R12_THREE_FAN_TRAY),
372 	HPU_ENTRY(SG_HPU_TYPE_K12_IO_ONE_FAN_TRAY),
373 	HPU_ENTRY(SG_HPU_TYPE_K12_CPU_THREE_FAN_TRAY),
374 	HPU_ENTRY(SG_HPU_TYPE_R24_IO_FOUR_FAN_TRAY),
375 	HPU_ENTRY(SG_HPU_TYPE_R24_CPU_SIX_FAN_TRAY),
376 	0,	(char *)NULL
377 };
378 
379 static const struct part_value parts[] = {
380 	PART_VALUE(SG_SENSOR_PART_SBBC),
381 	PART_VALUE(SG_SENSOR_PART_SDC),
382 	PART_VALUE(SG_SENSOR_PART_AR),
383 	PART_VALUE(SG_SENSOR_PART_CBH),
384 	PART_VALUE(SG_SENSOR_PART_DX),
385 	PART_VALUE(SG_SENSOR_PART_CHEETAH),
386 	PART_VALUE(SG_SENSOR_PART_1_5_VDC),
387 	PART_VALUE(SG_SENSOR_PART_3_3_VDC),
388 	PART_VALUE(SG_SENSOR_PART_5_VDC),
389 	PART_VALUE(SG_SENSOR_PART_12_VDC),
390 	PART_VALUE(SG_SENSOR_PART_48_VDC),
391 	PART_VALUE(SG_SENSOR_PART_CURRENT),
392 	PART_VALUE(SG_SENSOR_PART_BOARD),
393 	PART_VALUE(SG_SENSOR_PART_SCAPP),
394 	PART_VALUE(SG_SENSOR_PART_SCHIZO),
395 	PART_VALUE(SG_SENSOR_PART_FAN),
396 	0,	(char *)NULL
397 };
398 
399 static const struct type_value types[] = {
400 	TYPE_VALUE(SG_SENSOR_TYPE_CURRENT, SG_CURRENT_SCALE),
401 	TYPE_VALUE(SG_SENSOR_TYPE_TEMPERATURE, SG_TEMPERATURE_SCALE),
402 	TYPE_VALUE(SG_SENSOR_TYPE_1_5_VDC, SG_1_5_VDC_SCALE),
403 	TYPE_VALUE(SG_SENSOR_TYPE_1_8_VDC, SG_1_8_VDC_SCALE),
404 	TYPE_VALUE(SG_SENSOR_TYPE_3_3_VDC, SG_3_3_VDC_SCALE),
405 	TYPE_VALUE(SG_SENSOR_TYPE_5_VDC, SG_5_VDC_SCALE),
406 	TYPE_VALUE(SG_SENSOR_TYPE_12_VDC, SG_12_VDC_SCALE),
407 	TYPE_VALUE(SG_SENSOR_TYPE_48_VDC, SG_48_VDC_SCALE),
408 	TYPE_VALUE(SG_SENSOR_TYPE_ENVDB, 1),
409 	TYPE_VALUE(SG_SENSOR_TYPE_COOLING, 1),
410 	0,	(char *)NULL
411 };
412 
413 int
_init(void)414 _init(void)
415 {
416 	int	error = 0;
417 
418 	error = ddi_soft_state_init(&sgenv_statep,
419 	    sizeof (sgenv_soft_state_t), 1);
420 
421 	if (error)
422 		return (error);
423 
424 	error = mod_install(&modlinkage);
425 	if (error) {
426 		ddi_soft_state_fini(&sgenv_statep);
427 		return (error);
428 	}
429 
430 	mutex_init(&env_kstat_lock, NULL, MUTEX_DEFAULT, NULL);
431 	mutex_init(&env_cache_lock, NULL, MUTEX_DEFAULT, NULL);
432 	mutex_init(&env_flag_lock, NULL, MUTEX_DEFAULT, NULL);
433 	cv_init(&env_flag_cond, NULL, CV_DEFAULT, NULL);
434 
435 	mutex_init(&board_cache_lock, NULL, MUTEX_DEFAULT, NULL);
436 	mutex_init(&board_kstat_lock, NULL, MUTEX_DEFAULT, NULL);
437 	mutex_init(&board_flag_lock, NULL, MUTEX_DEFAULT, NULL);
438 	cv_init(&board_flag_cond, NULL, CV_DEFAULT, NULL);
439 
440 	mutex_init(&keysw_hdlr_lock, NULL, MUTEX_DEFAULT, NULL);
441 	mutex_init(&env_hdlr_lock, NULL, MUTEX_DEFAULT, NULL);
442 	mutex_init(&dr_hdlr_lock, NULL, MUTEX_DEFAULT, NULL);
443 
444 	/* set the default timeout value */
445 	sgenv_max_mbox_wait_time = sbbc_mbox_default_timeout;
446 
447 	return (error);
448 }
449 
450 
451 int
_info(struct modinfo * modinfop)452 _info(struct modinfo *modinfop)
453 {
454 	return (mod_info(&modlinkage, modinfop));
455 }
456 
457 
458 int
_fini(void)459 _fini(void)
460 {
461 	int	error = 0;
462 
463 	error = mod_remove(&modlinkage);
464 	if (error)
465 		return (error);
466 
467 	mutex_destroy(&env_kstat_lock);
468 	mutex_destroy(&env_cache_lock);
469 
470 	mutex_destroy(&board_cache_lock);
471 	mutex_destroy(&board_kstat_lock);
472 
473 	mutex_destroy(&keysw_hdlr_lock);
474 	mutex_destroy(&env_hdlr_lock);
475 	mutex_destroy(&dr_hdlr_lock);
476 
477 	ddi_soft_state_fini(&sgenv_statep);
478 
479 	return (error);
480 }
481 
482 
483 static int
sgenv_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)484 sgenv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
485 {
486 	sgenv_soft_state_t	*softsp;
487 
488 	int			instance;
489 	int			err;
490 
491 	switch (cmd) {
492 	case DDI_ATTACH:
493 
494 		instance = ddi_get_instance(dip);
495 
496 		/* allocate a global sgenv_soft_state structure */
497 		err = ddi_soft_state_zalloc(sgenv_statep, instance);
498 		if (err != DDI_SUCCESS) {
499 			cmn_err(CE_WARN, "attach: could not allocate state "
500 			    "structure for inst %d.", instance);
501 			return (DDI_FAILURE);
502 		}
503 
504 		softsp = ddi_get_soft_state(sgenv_statep, instance);
505 		if (softsp == NULL) {
506 			ddi_soft_state_free(sgenv_statep, instance);
507 			cmn_err(CE_WARN, "attach: could not get state "
508 			    "structure for inst %d.", instance);
509 			return (DDI_FAILURE);
510 		}
511 
512 		softsp->dip = dip;
513 		softsp->instance = instance;
514 
515 		err = sgenv_add_kstats(softsp);
516 		if (err != 0) {
517 			/*
518 			 * Some of the kstats may have been created before the
519 			 * error occurred in sgenv_add_kstats(), so we call
520 			 * sgenv_remove_kstats() which removes any kstats
521 			 * already created.
522 			 */
523 			sgenv_remove_kstats(softsp);
524 			ddi_soft_state_free(sgenv_statep, instance);
525 			return (DDI_FAILURE);
526 		}
527 
528 		/*
529 		 * Before we setup the framework to read the data from the SC
530 		 * we need to ensure the caches are initialized correctly.
531 		 */
532 		sgenv_init_board_cache();
533 		sgenv_init_env_cache();
534 
535 		/*
536 		 * Add the threads which will update the env and board caches
537 		 * and post events to Sysevent Framework in the background
538 		 * when the interrupt handlers watching for ENV/DR events
539 		 * indicate to the threads that they need to do so.
540 		 */
541 		err = sgenv_create_cache_update_threads();
542 		if (err != DDI_SUCCESS) {
543 			sgenv_remove_kstats(softsp);
544 			ddi_soft_state_free(sgenv_statep, instance);
545 			return (DDI_FAILURE);
546 		}
547 
548 		err = ddi_create_minor_node(dip, SGENV_DRV_NAME, S_IFCHR,
549 		    instance, DDI_PSEUDO, NULL);
550 		if (err != DDI_SUCCESS) {
551 			sgenv_remove_kstats(softsp);
552 			(void) sgenv_remove_cache_update_threads();
553 			ddi_soft_state_free(sgenv_statep, instance);
554 			return (DDI_FAILURE);
555 		}
556 
557 		/*
558 		 * Add the handlers which watch for unsolicited messages
559 		 * and post event to Sysevent Framework.
560 		 */
561 		err = sgenv_add_intr_handlers();
562 		if (err != DDI_SUCCESS) {
563 			cmn_err(CE_WARN, "Failed to add event handlers");
564 			(void) sgenv_remove_intr_handlers();
565 			sgenv_remove_kstats(softsp);
566 			(void) sgenv_remove_cache_update_threads();
567 			ddi_soft_state_free(sgenv_statep, instance);
568 			return (DDI_FAILURE);
569 		}
570 
571 		ddi_report_dev(dip);
572 
573 		return (DDI_SUCCESS);
574 
575 	case DDI_RESUME:
576 		return (DDI_SUCCESS);
577 
578 	default:
579 		return (DDI_FAILURE);
580 	}
581 }
582 
583 
584 static int
sgenv_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)585 sgenv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
586 {
587 	sgenv_soft_state_t	*softsp;
588 
589 	int	instance;
590 	int	err;
591 
592 	switch (cmd) {
593 	case DDI_DETACH:
594 
595 		instance = ddi_get_instance(dip);
596 
597 		softsp = ddi_get_soft_state(sgenv_statep, instance);
598 		if (softsp == NULL) {
599 			cmn_err(CE_WARN, "detach: could not get state "
600 			    "structure for inst %d.", instance);
601 			return (DDI_FAILURE);
602 		}
603 
604 		err = sgenv_remove_cache_update_threads();
605 		if (err != DDI_SUCCESS) {
606 			cmn_err(CE_WARN, "Failed to remove update threads");
607 		}
608 
609 		/*
610 		 * Remove the handlers which watch for unsolicited messages
611 		 * and post event to Sysevent Framework.
612 		 */
613 		err = sgenv_remove_intr_handlers();
614 		if (err != DDI_SUCCESS) {
615 			cmn_err(CE_WARN, "Failed to remove event handlers");
616 		}
617 
618 		sgenv_remove_kstats(softsp);
619 
620 		ddi_soft_state_free(sgenv_statep, instance);
621 
622 		ddi_remove_minor_node(dip, NULL);
623 
624 		return (DDI_SUCCESS);
625 
626 	case DDI_SUSPEND:
627 		return (DDI_SUCCESS);
628 
629 	default:
630 		return (DDI_FAILURE);
631 	}
632 }
633 
634 
635 static int
sgenv_add_kstats(sgenv_soft_state_t * softsp)636 sgenv_add_kstats(sgenv_soft_state_t *softsp)
637 {
638 	kstat_t		*ksp;
639 	kstat_named_t	*keyswitch_named_data;
640 
641 	int		inst = softsp->instance;
642 
643 	/*
644 	 * Create the 'keyswitch position' named kstat.
645 	 */
646 	ksp = kstat_create(SGENV_DRV_NAME, inst, SG_KEYSWITCH_KSTAT_NAME,
647 	    "misc", KSTAT_TYPE_NAMED, 1, NULL);
648 
649 	if (ksp != NULL) {
650 		/* initialize the named kstat */
651 		keyswitch_named_data = (struct kstat_named *)(ksp->ks_data);
652 
653 		kstat_named_init(&keyswitch_named_data[0],
654 		    POSITION_KSTAT_NAME,
655 		    KSTAT_DATA_INT32);
656 
657 		ksp->ks_update = sgenv_keyswitch_kstat_update;
658 		kstat_install(ksp);
659 
660 		/* update the soft state */
661 		softsp->keyswitch_ksp = ksp;
662 
663 	} else {
664 		cmn_err(CE_WARN, "Keyswitch: kstat_create failed");
665 		return (-1);
666 	}
667 
668 
669 	/*
670 	 * Environmental Information.
671 	 */
672 	ksp = kstat_create(SGENV_DRV_NAME, inst, SG_ENV_INFO_KSTAT_NAME,
673 	    "misc", KSTAT_TYPE_RAW, 0,
674 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
675 
676 	if (ksp != NULL) {
677 		ksp->ks_data = NULL;
678 		ksp->ks_data_size = 0;
679 		ksp->ks_snaptime = 0;
680 		ksp->ks_update = sgenv_env_info_kstat_update;
681 		ksp->ks_snapshot = sgenv_env_info_kstat_snapshot;
682 		ksp->ks_lock = &env_kstat_lock;
683 		kstat_install(ksp);
684 
685 		/* update the soft state */
686 		softsp->env_info_ksp = ksp;
687 
688 	} else {
689 		cmn_err(CE_WARN, "Environmental Info: kstat_create failed");
690 		return (-1);
691 	}
692 
693 
694 	/*
695 	 * Board Status Information.
696 	 */
697 	ksp = kstat_create(SGENV_DRV_NAME, inst, SG_BOARD_STATUS_KSTAT_NAME,
698 	    "misc", KSTAT_TYPE_RAW, 0,
699 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_VAR_SIZE);
700 
701 	if (ksp != NULL) {
702 		ksp->ks_data = NULL;
703 		ksp->ks_data_size = 0;
704 		ksp->ks_snaptime = 0;
705 		ksp->ks_update = sgenv_board_info_kstat_update;
706 		ksp->ks_snapshot = sgenv_board_info_kstat_snapshot;
707 		ksp->ks_lock = &board_kstat_lock;
708 		kstat_install(ksp);
709 
710 		/* update the soft state */
711 		softsp->board_info_ksp = ksp;
712 
713 	} else {
714 		cmn_err(CE_WARN, "Board Status Info: kstat_create failed");
715 		return (-1);
716 	}
717 
718 	return (0);
719 }
720 
721 
722 static void
sgenv_remove_kstats(sgenv_soft_state_t * softsp)723 sgenv_remove_kstats(sgenv_soft_state_t *softsp)
724 {
725 	kstat_t	*ksp;
726 
727 	ksp = softsp->keyswitch_ksp;
728 	if (ksp != NULL) {
729 		softsp->keyswitch_ksp = NULL;
730 		kstat_delete(ksp);
731 	}
732 
733 	ksp = softsp->env_info_ksp;
734 	if (ksp != NULL) {
735 		sgenv_destroy_env_cache();
736 		softsp->env_info_ksp = NULL;
737 		ksp->ks_lock = NULL;
738 		kstat_delete(ksp);
739 	}
740 
741 	ksp = softsp->board_info_ksp;
742 	if (ksp != NULL) {
743 		softsp->board_info_ksp = NULL;
744 		ksp->ks_lock = NULL;
745 		kstat_delete(ksp);
746 	}
747 }
748 
749 
750 /*
751  * This function registers mailbox interrupt handlers to watch for certain
752  * unsolicited mailbox messages, which indicate that some event has occurred.
753  *
754  * Currently only the following events are handled:
755  *	MBOX_EVENT_KEY_SWITCH
756  *	MBOX_EVENT_ENV
757  *		- Thresholds/Limits Exceeded
758  *		- Fan Status changed
759  *
760  * ERRORS:
761  *	We return DDI_FAILURE if we fail to register any one of the
762  *	interrupt handlers.
763  */
764 static int
sgenv_add_intr_handlers(void)765 sgenv_add_intr_handlers(void)
766 {
767 	int	err;
768 
769 	/*
770 	 * Register an interrupt handler with the sgsbbc driver for the
771 	 * MBOX_EVENT_KEY_SWITCH events.
772 	 *	- The virtual keyswitch has changed, we generate a sysevent.
773 	 */
774 	keysw_payload_msg.msg_buf = (caddr_t)&keysw_payload;
775 	keysw_payload_msg.msg_len = sizeof (keysw_payload);
776 
777 	err = sbbc_mbox_reg_intr(MBOX_EVENT_KEY_SWITCH, sgenv_keyswitch_handler,
778 	    &keysw_payload_msg, NULL, &keysw_hdlr_lock);
779 	if (err != 0) {
780 		cmn_err(CE_WARN, "Failed to register MBOX_EVENT_KEY_SWITCH "
781 		    "handler. Err=%d", err);
782 		return (DDI_FAILURE);
783 	}
784 
785 	/*
786 	 * Register an interrupt handler with the sgsbbc driver for the
787 	 * MBOX_EVENT_ENV events.
788 	 *	- Thresholds/Limits Exceeded, we generate a sysevent
789 	 *	and we update our caches.
790 	 */
791 	env_payload_msg.msg_buf = (caddr_t)&env_payload;
792 	env_payload_msg.msg_len = sizeof (env_payload);
793 
794 	err = sbbc_mbox_reg_intr(MBOX_EVENT_ENV, sgenv_env_data_handler,
795 	    &env_payload_msg, NULL, &env_hdlr_lock);
796 	if (err != 0) {
797 		cmn_err(CE_WARN, "Failed to register MBOX_EVENT_ENV "
798 		    "(env) handler. Err=%d", err);
799 		return (DDI_FAILURE);
800 	}
801 
802 	/*
803 	 * Register an interrupt handler with the sgsbbc driver for the
804 	 * MBOX_EVENT_ENV events.
805 	 *	- Fan Status changed, we generate a sysevent, and
806 	 *	we update the env cache only.
807 	 */
808 	fan_payload_msg.msg_buf = (caddr_t)&fan_payload;
809 	fan_payload_msg.msg_len = sizeof (fan_payload);
810 
811 	err = sbbc_mbox_reg_intr(MBOX_EVENT_ENV, sgenv_fan_status_handler,
812 	    &fan_payload_msg, NULL, &env_hdlr_lock);
813 	if (err != 0) {
814 		cmn_err(CE_WARN, "Failed to register MBOX_EVENT_ENV (fan)"
815 		    "handler. Err=%d", err);
816 		return (DDI_FAILURE);
817 	}
818 
819 	/*
820 	 * Register an interrupt handler with the sgsbbc driver for the
821 	 * MBOX_EVENT_GENERIC events.
822 	 *	- DR state change, we update our caches.
823 	 */
824 	dr_payload_msg.msg_buf = (caddr_t)&dr_payload;
825 	dr_payload_msg.msg_len = sizeof (dr_payload);
826 
827 	err = sbbc_mbox_reg_intr(MBOX_EVENT_GENERIC, sgenv_dr_event_handler,
828 	    &dr_payload_msg, NULL, &dr_hdlr_lock);
829 	if (err != 0) {
830 		cmn_err(CE_WARN, "Failed to register MBOX_EVENT_GENERIC (DR)"
831 		    "handler. Err=%d", err);
832 		return (DDI_FAILURE);
833 	}
834 
835 	return (DDI_SUCCESS);
836 }
837 
838 /*
839  * This function unregisters the mailbox interrupt handlers.
840  *
841  * ERRORS:
842  *	We return DDI_FAILURE if we fail to register any one of the
843  *	interrupt handlers.
844  */
845 static int
sgenv_remove_intr_handlers(void)846 sgenv_remove_intr_handlers(void)
847 {
848 	int	rv = DDI_SUCCESS;
849 	int	err;
850 
851 	err = sbbc_mbox_unreg_intr(MBOX_EVENT_KEY_SWITCH,
852 	    sgenv_keyswitch_handler);
853 	if (err != 0) {
854 		cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_KEY_SWITCH "
855 		    "handler. Err=%d", err);
856 		rv = DDI_FAILURE;
857 	}
858 
859 	err = sbbc_mbox_unreg_intr(MBOX_EVENT_ENV, sgenv_env_data_handler);
860 	if (err != 0) {
861 		cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_ENV (env)"
862 		    "handler. Err=%d", err);
863 		rv = DDI_FAILURE;
864 	}
865 
866 	err = sbbc_mbox_unreg_intr(MBOX_EVENT_ENV, sgenv_fan_status_handler);
867 	if (err != 0) {
868 		cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_ENV (fan)"
869 		    "handler. Err=%d", err);
870 		rv = DDI_FAILURE;
871 	}
872 
873 	err = sbbc_mbox_unreg_intr(MBOX_EVENT_GENERIC, sgenv_dr_event_handler);
874 	if (err != 0) {
875 		cmn_err(CE_WARN, "Failed to unregister MBOX_EVENT_GENERIC (DR) "
876 		    "handler. Err=%d", err);
877 		rv = DDI_FAILURE;
878 	}
879 
880 	return (rv);
881 }
882 
883 
884 static int
sgenv_create_cache_update_threads(void)885 sgenv_create_cache_update_threads(void)
886 {
887 	DCMN_ERR_S(f, "sgenv_create_cache_update_threads()");
888 
889 	DCMN_ERR_THREAD(CE_NOTE, "Entering %s", f);
890 
891 	/* Create thread to ensure env_cache is updated */
892 	env_thread_run = 1;
893 
894 	env_thread = thread_create(NULL, 0, sgenv_update_env_cache,
895 	    NULL, 0, &p0, TS_RUN, minclsyspri);
896 	env_thread_tid = env_thread->t_did;
897 
898 	/* Create thread to ensure board_cache is updated */
899 	board_thread_run = 1;
900 
901 	board_thread = thread_create(NULL, 0, sgenv_update_board_cache,
902 	    NULL, 0, &p0, TS_RUN, minclsyspri);
903 	board_thread_tid = board_thread->t_did;
904 
905 	DCMN_ERR_THREAD(CE_NOTE, "Exiting %s", f);
906 
907 	return (DDI_SUCCESS);
908 }
909 
910 
911 static int
sgenv_remove_cache_update_threads(void)912 sgenv_remove_cache_update_threads(void)
913 {
914 	DCMN_ERR_S(f, "sgenv_remove_cache_update_threads()");
915 
916 	DCMN_ERR_THREAD(CE_NOTE, "%s: Waiting for cache update threads", f);
917 
918 	/* Cause the env_cache thread to terminate. */
919 	mutex_enter(&env_flag_lock);
920 	env_thread_run = 0;
921 	cv_signal(&env_flag_cond);
922 	mutex_exit(&env_flag_lock);
923 
924 	thread_join(env_thread_tid);
925 
926 	/* Cause the board_cache thread to terminate. */
927 	mutex_enter(&board_flag_lock);
928 	board_thread_run = 0;
929 	cv_signal(&board_flag_cond);
930 	mutex_exit(&board_flag_lock);
931 
932 	thread_join(board_thread_tid);
933 
934 	DCMN_ERR_THREAD(CE_NOTE, "%s: cache update threads finished", f);
935 
936 	return (DDI_SUCCESS);
937 }
938 
939 
940 static int
sgenv_keyswitch_kstat_update(kstat_t * ksp,int rw)941 sgenv_keyswitch_kstat_update(kstat_t *ksp, int rw)
942 {
943 	sg_keyswitch_kstat_t	*keysw_data;
944 
945 	int8_t	posn;	/* keysw posn read from IO-SRAM */
946 	int	size;	/* size of IO-SRAM chunk */
947 	int	rv = 0;	/* return value of iosram_read() */
948 
949 	keysw_data	= (sg_keyswitch_kstat_t *)ksp->ks_data;
950 
951 	switch (rw) {
952 	case KSTAT_WRITE:
953 		/*
954 		 * Write not permitted
955 		 */
956 		return (EACCES);
957 
958 	case KSTAT_READ:
959 		/*
960 		 * Get the size of the keyswitch IO-SRAM chunk.
961 		 * This should be one byte.
962 		 *
963 		 * If the size is not 1 byte we set the position to UNKNOWN
964 		 *
965 		 * Otherwise we read the keyswitch position from IO-SRAM.
966 		 * Then check that this is a valid keyswitch position.
967 		 * If it is not valid then something is corrupt and set
968 		 * the position to UNKNOWN.
969 		 */
970 		size = iosram_size(SBBC_KEYSWITCH_KEY);
971 		if (size != 1) {
972 			posn = SG_KEYSWITCH_POSN_UNKNOWN;
973 			rv = -1;
974 
975 		} else if ((rv = iosram_read(SBBC_KEYSWITCH_KEY, 0,
976 		    (char *)&posn, size)) != 0) {
977 			posn = SG_KEYSWITCH_POSN_UNKNOWN;
978 
979 		} else {
980 			/* Check posn is not corrupt */
981 			switch (posn) {
982 				case SG_KEYSWITCH_POSN_ON:
983 				case SG_KEYSWITCH_POSN_DIAG:
984 				case SG_KEYSWITCH_POSN_SECURE:
985 					/* value read from kstat is OK */
986 					break;
987 
988 				default:
989 					/* value read from kstat is corrupt */
990 					posn = SG_KEYSWITCH_POSN_UNKNOWN;
991 					break;
992 			}
993 		}
994 
995 		/* Write position to kstat. */
996 		keysw_data->keyswitch_position.value.i32 = posn;
997 
998 		return (rv);
999 
1000 	default:
1001 		return (EINVAL);
1002 	}
1003 }
1004 
1005 static void
sgenv_init_env_cache(void)1006 sgenv_init_env_cache(void)
1007 {
1008 	ASSERT(env_thread_run == 0);
1009 	ASSERT(env_thread == NULL);
1010 }
1011 
1012 
1013 /*
1014  * This thread runs in the background and waits for an interrupt handler
1015  * registered to wait for ENV/DR events from the SC to signal/flag that we
1016  * need to update our Env Cache.
1017  */
1018 static void
sgenv_update_env_cache(void)1019 sgenv_update_env_cache(void)
1020 {
1021 	DCMN_ERR_S(f, "sgenv_update_env_cache()");
1022 
1023 	mutex_enter(&env_flag_lock);
1024 
1025 	while (env_thread_run == 1) {
1026 
1027 		/*
1028 		 * We check to see if the update needed flag is set.
1029 		 * If it is then this means that:
1030 		 *	1) This is the first time through the while loop
1031 		 *	   and we need to initialize the cache.
1032 		 *	2) An interrupt handler was triggered while we
1033 		 *	   we were updating the env cache during the previous
1034 		 *	   iteration of the while loop and we need to refresh
1035 		 *	   the env data to ensure we are completely up to date.
1036 		 *
1037 		 * Otherwise we wait until we get a signal from one of the
1038 		 * interrupt handlers.
1039 		 */
1040 		if (env_cache_update_needed) {
1041 			DCMN_ERR_THREAD(CE_NOTE, "%s: update needed", f);
1042 
1043 			env_cache_update_needed = B_FALSE;
1044 
1045 		} else {
1046 			DCMN_ERR_THREAD(CE_NOTE, "%s: Waiting for signal", f);
1047 
1048 			cv_wait(&env_flag_cond, &env_flag_lock);
1049 
1050 			/* Check if we are being asked to terminate */
1051 			if (env_thread_run == 0) {
1052 				break;
1053 			}
1054 
1055 			env_cache_updating = B_TRUE;
1056 		}
1057 
1058 		mutex_exit(&env_flag_lock);
1059 		(void) sgenv_get_env_info_data();
1060 
1061 		(void) sgenv_check_sensor_thresholds();
1062 		mutex_enter(&env_flag_lock);
1063 
1064 		if (env_cache_update_needed == B_FALSE)
1065 			env_cache_updating = B_FALSE;
1066 	}
1067 
1068 	mutex_exit(&env_flag_lock);
1069 
1070 	DCMN_ERR_THREAD(CE_NOTE, "Exiting %s", f);
1071 
1072 	env_thread_run = -1;
1073 	thread_exit();
1074 }
1075 
1076 
1077 /*
1078  * We always return what is in the env_cache. It is up to the SC to ensure
1079  * that the env_cache is current by sending events to us when something
1080  * changes. The cache will then be updated by going to the SC to get the
1081  * new data. That way the kstat_update code can always be sure that it gets
1082  * current data without having to wait while the SC responds (slowly) to our
1083  * request for data.
1084  *
1085  * The way the update and snapshot code works, we cannot be guaranteed that
1086  * someone won't grab the env_cache_lock between the update and snapshot
1087  * calls so we use a temporary snapshot of the env_cache. We cannot hold
1088  * any locks across the calls from the update to the snapshot as we are
1089  * not guaranteed that the snapshot function will be called. So we create
1090  * the snapshot of the env_cache in the update routine and dump this to the
1091  * kstat user buffer in the snapshot routine. (There are error conditions in
1092  * which the snapshot will not be called by the kstat framework so we need
1093  * to handle these appropriately.)
1094  */
1095 static int
sgenv_env_info_kstat_update(kstat_t * ksp,int rw)1096 sgenv_env_info_kstat_update(kstat_t *ksp, int rw)
1097 {
1098 	DCMN_ERR_S(f, "sgenv_env_info_kstat_update()");
1099 
1100 	int		err = 0;
1101 	int		key_posn;
1102 	env_sensor_t	*ptr;
1103 
1104 	switch (rw) {
1105 	case KSTAT_WRITE:
1106 		/*
1107 		 * Write not permitted
1108 		 */
1109 		return (EACCES);
1110 
1111 	case KSTAT_READ:
1112 
1113 		mutex_enter(&env_cache_lock);
1114 		/*
1115 		 * We now need to ensure that there is enough room allocated
1116 		 * by the kstat framework to return the data via ks_data.
1117 		 * It is possible there may be no data in the cache but
1118 		 * we still return zero sized kstats to ensure no client breaks
1119 		 */
1120 		sgenv_update_env_kstat_size(ksp);
1121 
1122 		/*
1123 		 * If the snapshot still has data (this could be because the
1124 		 * kstat framework discovered an error and did not call the
1125 		 * snapshot code which should have freed this buffer) we free
1126 		 * it here.
1127 		 */
1128 		if ((env_cache_snapshot != NULL) &&
1129 		    (env_cache_snapshot_size > 0)) {
1130 			DCMN_ERR_CACHE(CE_NOTE, "%s freeing "
1131 			    "env_cache_snapshot buf", f);
1132 			kmem_free(env_cache_snapshot, env_cache_snapshot_size);
1133 		}
1134 
1135 		/*
1136 		 * Create a new snapshot buffer based on ks_data_size
1137 		 */
1138 		env_cache_snapshot_size = ksp->ks_data_size;
1139 		env_cache_snapshot = kmem_zalloc(
1140 		    env_cache_snapshot_size, KM_SLEEP);
1141 
1142 		/*
1143 		 * We need to take a fresh snapshot of the env_cache here.
1144 		 * For each sensor collection, we check to see if there is
1145 		 * data in the cache (ie. != NULL). If there is, we copy it
1146 		 * into the snapshot.
1147 		 */
1148 		ptr = env_cache_snapshot;
1149 		for (key_posn = 0; key_posn < SGENV_MAX_HPU_KEYS; key_posn++) {
1150 			if (vol_sensor_count[key_posn] <= 0)
1151 				continue;
1152 
1153 			ASSERT(vol_sensor_count[key_posn] <=
1154 			    SGENV_MAX_SENSORS_PER_KEY);
1155 
1156 			/*
1157 			 * <env_cache> entry should have been allocated
1158 			 * in the kstat_update function already.
1159 			 *
1160 			 * If this <env_cache> entry is NULL, then
1161 			 * it has already been destroyed or cleared
1162 			 * and the sensor readings have disappeared.
1163 			 */
1164 			if (env_cache[key_posn] == NULL) {
1165 				DCMN_ERR(CE_NOTE, "!Cache entry %d has "
1166 				    "disappeared", key_posn);
1167 				vol_sensor_count[key_posn] = 0;
1168 				continue;
1169 			}
1170 
1171 			bcopy(&env_cache[key_posn][0], ptr,
1172 			    sizeof (env_sensor_t) *
1173 			    vol_sensor_count[key_posn]);
1174 			ptr += vol_sensor_count[key_posn];
1175 		}
1176 		mutex_exit(&env_cache_lock);
1177 
1178 		return (err);
1179 
1180 	default:
1181 		return (EINVAL);
1182 	}
1183 }
1184 
1185 static int
sgenv_env_info_kstat_snapshot(kstat_t * ksp,void * buf,int rw)1186 sgenv_env_info_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
1187 {
1188 	DCMN_ERR_S(f, "sgenv_env_info_kstat_snapshot()");
1189 
1190 	switch (rw) {
1191 	case KSTAT_WRITE:
1192 		/*
1193 		 * Write not permitted
1194 		 */
1195 		return (EACCES);
1196 
1197 	case KSTAT_READ:
1198 
1199 		/*
1200 		 * We have taken a snapshot of the env_cache in the
1201 		 * update routine so we simply bcopy this into the
1202 		 * kstat buf. No locks needed here.
1203 		 */
1204 		if (env_cache_snapshot_size > 0)
1205 			bcopy(env_cache_snapshot, buf, env_cache_snapshot_size);
1206 
1207 		ksp->ks_snaptime = last_env_read_time;
1208 
1209 		/*
1210 		 * Free the memory used by the snapshot. If for some reason
1211 		 * the kstat framework does not call this snapshot routine,
1212 		 * we also have a check in the update routine so the next
1213 		 * time it is called it checks for this condition and frees
1214 		 * the snapshot buffer there.
1215 		 */
1216 		DCMN_ERR_CACHE(CE_NOTE, "%s freeing env_cache_snapshot buf", f);
1217 		kmem_free(env_cache_snapshot, env_cache_snapshot_size);
1218 		env_cache_snapshot = NULL;
1219 		env_cache_snapshot_size = 0;
1220 
1221 		return (0);
1222 
1223 	default:
1224 		return (EINVAL);
1225 	}
1226 }
1227 
1228 static void
sgenv_init_board_cache(void)1229 sgenv_init_board_cache(void)
1230 {
1231 	int	i;
1232 
1233 	ASSERT(board_thread_run == 0);
1234 	ASSERT(board_thread == NULL);
1235 
1236 	/*
1237 	 * Init all node-ids to be -1.
1238 	 */
1239 	mutex_enter(&board_cache_lock);
1240 	for (i = 0; i < SG_MAX_BDS; i++)
1241 		board_cache[i].node_id = (-1);
1242 	mutex_exit(&board_cache_lock);
1243 }
1244 
1245 
1246 /*
1247  * This thread runs in the background and waits for an interrupt handler
1248  * registered to wait for DR events from the SC to signal/flag that we
1249  * need to update our Board Cache.
1250  */
1251 static void
sgenv_update_board_cache(void)1252 sgenv_update_board_cache(void)
1253 {
1254 	DCMN_ERR_S(f, "sgenv_update_board_cache()");
1255 
1256 	mutex_enter(&board_flag_lock);
1257 
1258 	while (board_thread_run == 1) {
1259 
1260 		/*
1261 		 * We check to see if the update needed flag is set.
1262 		 * If it is then this means that:
1263 		 *	1) This is the first time through the while loop
1264 		 *	   and we need to initialize the cache.
1265 		 *	2) An interrupt handler was triggered while we
1266 		 *	   we were updating the cache during the previous
1267 		 *	   iteration of the while loop and we need to refresh
1268 		 *	   the env data to ensure we are completely up to date.
1269 		 *
1270 		 * Otherwise we wait until we get a signal from one of the
1271 		 * interrupt handlers.
1272 		 */
1273 		if (board_cache_update_needed) {
1274 			DCMN_ERR_THREAD(CE_NOTE, "%s: update needed", f);
1275 			board_cache_update_needed = B_FALSE;
1276 
1277 		} else {
1278 			DCMN_ERR_THREAD(CE_NOTE, "%s: Waiting for signal", f);
1279 
1280 			cv_wait(&board_flag_cond, &board_flag_lock);
1281 
1282 			/* Check if we are being asked to terminate */
1283 			if (board_thread_run == 0) {
1284 				break;
1285 			}
1286 
1287 			board_cache_updating = B_TRUE;
1288 		}
1289 
1290 		mutex_exit(&board_flag_lock);
1291 		(void) sgenv_get_board_info_data();
1292 		mutex_enter(&board_flag_lock);
1293 
1294 		if (board_cache_update_needed == B_FALSE)
1295 			board_cache_updating = B_FALSE;
1296 	}
1297 
1298 	mutex_exit(&board_flag_lock);
1299 
1300 	DCMN_ERR_THREAD(CE_NOTE, "Exiting %s", f);
1301 
1302 	board_thread_run = -1;
1303 	thread_exit();
1304 }
1305 
1306 
1307 /*
1308  * We always return what is in the board_cache. It is up to the SC to ensure
1309  * that the board_cache is current by sending events to us when something
1310  * changes. The cache will then be updated by going to the SC to get the
1311  * new data. That way the kstat_update code can always be sure that it gets
1312  * current data without having to wait while the SC responds (slowly) to our
1313  * request for data.
1314  *
1315  * The way the update and snapshot code works, we cannot be guaranteed that
1316  * someone won't grab the board_cache_lock between the update and snapshot
1317  * calls so we use a snapshot buffer of the board_cache. We cannot hold
1318  * any locks across the calls from the update to the snapshot as we are
1319  * not guaranteed that the snapshot function will be called. So we create
1320  * the snapshot of the board_cache in the update routine and dump this to the
1321  * kstat user buffer in the snapshot routine. (There are error conditions in
1322  * which the snapshot will not be called by the kstat framework so we need
1323  * to handle these appropriately.)
1324  */
1325 static int
sgenv_board_info_kstat_update(kstat_t * ksp,int rw)1326 sgenv_board_info_kstat_update(kstat_t *ksp, int rw)
1327 {
1328 	int		i;
1329 
1330 	switch (rw) {
1331 	case KSTAT_WRITE:
1332 		/*
1333 		 * Write not permitted
1334 		 */
1335 		return (EACCES);
1336 
1337 	case KSTAT_READ:
1338 		/*
1339 		 * The board_cache is created during startup, and so should be
1340 		 * available before a user can log in and trigger a kstat read,
1341 		 * but we check just in case.
1342 		 */
1343 		if (board_cache_updated == FALSE)
1344 			return (ENXIO);
1345 
1346 		mutex_enter(&board_cache_lock);
1347 
1348 		/*
1349 		 * Set <ks_data_size> to the new number of board readings so
1350 		 * that the snapshot routine can allocate the correctly sized
1351 		 * kstat.
1352 		 */
1353 		ksp->ks_data_size = board_count * sizeof (sg_board_info_t);
1354 
1355 		board_count_snapshot = board_count;
1356 
1357 		/*
1358 		 * We are now guaranteed that that board_cache is not in flux
1359 		 * (as we have the lock) so we take a copy of the board_cache
1360 		 * into the board_cache_snapshot so that the snapshot routine
1361 		 * can copy it from the board_cache_snapshot into the user kstat
1362 		 * buffer.
1363 		 */
1364 		for (i = 0; i < SG_MAX_BDS; i++) {
1365 			board_cache_snapshot[i] = board_cache[i];
1366 		}
1367 
1368 		mutex_exit(&board_cache_lock);
1369 
1370 		return (0);
1371 
1372 	default:
1373 		return (EINVAL);
1374 	}
1375 }
1376 
1377 static int
sgenv_board_info_kstat_snapshot(kstat_t * ksp,void * buf,int rw)1378 sgenv_board_info_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
1379 {
1380 	DCMN_ERR_S(f, "sgenv_board_info_kstat_snapshot()");
1381 
1382 	sg_board_info_t	*bdp;
1383 	int		i, num_bds = 0;
1384 
1385 	switch (rw) {
1386 	case KSTAT_WRITE:
1387 		/*
1388 		 * Write not permitted
1389 		 */
1390 		return (EACCES);
1391 
1392 	case KSTAT_READ:
1393 
1394 		if (board_cache_updated == FALSE) {
1395 			ksp->ks_data_size = 0;
1396 			ksp->ks_data = NULL;
1397 			return (ENOMEM);
1398 		}
1399 
1400 		/*
1401 		 * Update the snap_time with the last time we got fresh data
1402 		 * from the SC.
1403 		 */
1404 		ksp->ks_snaptime = last_board_read_time;
1405 
1406 		ASSERT(board_count_snapshot <= SG_MAX_BDS);
1407 		/*
1408 		 * For each entry in the board_cache_snapshot we check to see
1409 		 * if the node_id is != NULL before we copy it into
1410 		 * the kstat buf.
1411 		 */
1412 		for (i = 0; i < SG_MAX_BDS; i++) {
1413 			bdp = &board_cache_snapshot[i];
1414 			DCMN_ERR_CACHE(CE_NOTE, "%s: looking at "
1415 			    "cache_snapshot entry[%d], node=%d",
1416 			    f, i, bdp->node_id);
1417 			if (bdp->node_id >= 0) {
1418 				/*
1419 				 * Need a check to ensure that the buf
1420 				 * is still within the allocated size.
1421 				 * We check how many boards are already
1422 				 * in the user buf before adding one.
1423 				 */
1424 				num_bds++;
1425 				if (num_bds > board_count_snapshot) {
1426 					ksp->ks_data_size = 0;
1427 					ksp->ks_data = NULL;
1428 					DCMN_ERR(CE_WARN, "%s: buf overflow."
1429 					    " %d >= %d.",
1430 					    f, num_bds, board_count_snapshot);
1431 					return (EIO);
1432 				}
1433 
1434 				DCMN_ERR_CACHE(CE_NOTE, "%s: about to bcopy"
1435 				    " cache_snapshot entry[%d], node=%d,"
1436 				    " board=%d", f, i, bdp->node_id,
1437 				    bdp->board_num);
1438 				bcopy(bdp, buf, sizeof (sg_board_info_t));
1439 				buf = ((sg_board_info_t *)buf) + 1;
1440 			}
1441 		}
1442 		return (0);
1443 
1444 	default:
1445 		return (EINVAL);
1446 	}
1447 }
1448 
1449 
1450 /*
1451  * This function coordinates reading the env data from the SC.
1452  *
1453  * ERROR:
1454  * 	If an error occurs while making a call to the mailbox and we have data
1455  *	in the cache from a previous call to the SC, we return an error of 0.
1456  *	That way the kstat framework will return the old data instead of
1457  *	returning an error and an empty kstat.
1458  */
1459 static int
sgenv_get_env_info_data(void)1460 sgenv_get_env_info_data(void)
1461 {
1462 	DCMN_ERR_S(f, "sgenv_get_env_info_data()");
1463 
1464 	envresp_key_t	new_keys[SGENV_MAX_HPU_KEYS] = {0};
1465 	envresp_key_t	old_key;
1466 	envresp_key_t	key;
1467 
1468 	int	i;
1469 
1470 	int	err = 0;	/* return value of func's which get env data */
1471 	int	status = 0;	/* reason why env data func returned an error */
1472 
1473 	DCMN_ERR_EVENT(CE_NOTE, "%s: entered.", f);
1474 
1475 	err = sgenv_get_hpu_keys(new_keys, &status);
1476 
1477 	if (err != 0) {
1478 		/*
1479 		 * If we get an error getting the key values, then we return
1480 		 * as we cannot proceed any farther. If there is old env data
1481 		 * in the cache, then we return zero so that the kstat
1482 		 * framework will export the old data.
1483 		 */
1484 		if (env_cache_updated == FALSE) {
1485 			sgenv_mbox_error_msg("HPU Keys", err, status);
1486 			return (err);
1487 		} else {
1488 			sgenv_mbox_error_msg("HPU Keys", err, status);
1489 			return (0);
1490 		}
1491 	}
1492 
1493 
1494 	for (i = 0; i < SGENV_MAX_HPU_KEYS; i++) {
1495 
1496 		if (vol_sensor_count[i] == 0) {
1497 			/* empty collection */
1498 			old_key = 0;
1499 		} else {
1500 			/*
1501 			 * populated collection:
1502 			 * (assert size is OK, and 1st sensor is pseudo-sensor)
1503 			 */
1504 			ASSERT(env_cache[i] != NULL);
1505 			ASSERT(env_cache[i][0].sd_id.id.sensor_part ==
1506 			    SG_SENSOR_PART_SCAPP);
1507 			ASSERT(env_cache[i][0].sd_id.id.sensor_type ==
1508 			    SG_SENSOR_TYPE_ENVDB);
1509 			ASSERT(SG_INFO_VALUESTATUS(env_cache[i][0].sd_infostamp)
1510 			    == SG_INFO_VALUE_OK);
1511 
1512 			old_key = env_cache[i][0].sd_value;
1513 		}
1514 
1515 		key = new_keys[i];
1516 
1517 		/*
1518 		 * No data is associated with this key position and there was
1519 		 * no data on the previous read either so we simply continue
1520 		 * to the next key position.
1521 		 */
1522 		if ((key == 0) && (old_key == 0)) {
1523 			ASSERT(env_cache[i] == NULL);
1524 			continue;
1525 		}
1526 
1527 
1528 		/*
1529 		 * We need to grab this lock every time we are going to
1530 		 * update a HPU. However, a kstat_read can grab
1531 		 * the env_cache_lock when it wants to get a snapshot of
1532 		 * the env_cache. This has the affect of stopping the
1533 		 * active env_cache writer after they have updated the
1534 		 * active HPU, allowing the kstat_read to get a dump of
1535 		 * the env_cache, then the env_cache writer can resume
1536 		 * updating the cache. For performance it is more important
1537 		 * that the kstat_read completes quickly so we allow the
1538 		 * kstat_read to interrupt the updating of the env_cache.
1539 		 * The updating can take anything from a few seconds to
1540 		 * several minutes to complete.
1541 		 */
1542 		mutex_enter(&env_cache_lock);
1543 
1544 		/*
1545 		 * If the key just read is zero, then the
1546 		 * group of sensors have been removed by
1547 		 * some means and we need to zero out
1548 		 * the env_cache. (this ensures that data
1549 		 * belonging to a removed board is not
1550 		 * returned)
1551 		 */
1552 		if (key == 0) {
1553 			ASSERT(old_key != 0);
1554 			(void) sgenv_clear_env_cache_entry(i);
1555 			mutex_exit(&env_cache_lock);
1556 			continue;
1557 		}
1558 
1559 		/*
1560 		 * Check to see if this key has changed since
1561 		 * the last read.
1562 		 *
1563 		 * If it has changed, we need to update everything.
1564 		 *
1565 		 * If it hasn't we simply read the volatiles
1566 		 * and check to see if the constants have changed.
1567 		 */
1568 		if (key != old_key) {
1569 			/*
1570 			 * If the key is non-zero, then a new HPU has
1571 			 * been added to the system or it has changed
1572 			 * somehow and we need to re-read everything.
1573 			 * (we also need to zero out the env_cache as
1574 			 * there may be less sensors returned now and
1575 			 * the old ones may not be overwritten)
1576 			 */
1577 
1578 			/*
1579 			 * If the <env_cache> has not already been
1580 			 * allocated for this key position then we
1581 			 * go ahead and allocate it.
1582 			 */
1583 			if (env_cache[i] == NULL) {
1584 				err = sgenv_create_env_cache_entry(i);
1585 				if (err == DDI_FAILURE) {
1586 					mutex_exit(&env_cache_lock);
1587 					continue;
1588 				}
1589 			}
1590 
1591 			err = sgenv_get_env_data(new_keys[i], i,
1592 			    SG_GET_ENV_CONSTANTS, &status);
1593 			if (err) {
1594 				err = sgenv_handle_env_data_error(err, status,
1595 				    i, old_key, "Constant Data");
1596 				mutex_exit(&env_cache_lock);
1597 				if (err != DDI_FAILURE) {
1598 					continue;
1599 				} else if (env_cache_updated == TRUE) {
1600 					return (0);
1601 				} else {
1602 					return (DDI_FAILURE);
1603 				}
1604 			}
1605 
1606 			err = sgenv_get_env_data(new_keys[i], i,
1607 			    SG_GET_ENV_THRESHOLDS, &status);
1608 			if (err) {
1609 				err = sgenv_handle_env_data_error(err, status,
1610 				    i, old_key, "Threshold Data");
1611 				mutex_exit(&env_cache_lock);
1612 				if (err != DDI_FAILURE) {
1613 					continue;
1614 				} else if (env_cache_updated == TRUE) {
1615 					return (0);
1616 				} else {
1617 					return (DDI_FAILURE);
1618 				}
1619 			}
1620 
1621 			err = sgenv_get_env_data(new_keys[i], i,
1622 			    SG_GET_ENV_VOLATILES, &status);
1623 			if (err) {
1624 				err = sgenv_handle_env_data_error(err, status,
1625 				    i, old_key, "Volatile Data (fresh)");
1626 				mutex_exit(&env_cache_lock);
1627 				if (err != DDI_FAILURE) {
1628 					continue;
1629 				} else if (env_cache_updated == TRUE) {
1630 					return (0);
1631 				} else {
1632 					return (DDI_FAILURE);
1633 				}
1634 			}
1635 
1636 			/*
1637 			 * As we have successfully got env data for a HPU,
1638 			 * we ensure <env_cache_updated> is set to TRUE so that
1639 			 * in the future, if an error occurs during the mailbox
1640 			 * transfer, we know that there is old data for at
1641 			 * least one HPU in the <env_cache> which could be
1642 			 * returned instead of returning an error to the kstat
1643 			 * framework indicating that we have no data to return.
1644 			 */
1645 			env_cache_updated = TRUE;
1646 			last_env_read_time = gethrtime();
1647 
1648 		} else {
1649 			/*
1650 			 * key == old_key
1651 			 *
1652 			 * Handle the case when the value of the old key and
1653 			 * the new key are identical.
1654 			 */
1655 			ASSERT(env_cache[i] != NULL);
1656 
1657 			/*
1658 			 * If the keys are identical, then the quasi-constants
1659 			 * should not have changed (and so don't need updating).
1660 			 * Similarly for the threshold readings.
1661 			 */
1662 
1663 			/* Update the volatile data */
1664 			err = sgenv_get_env_data(new_keys[i], i,
1665 			    SG_GET_ENV_VOLATILES, &status);
1666 			if (err) {
1667 				err = sgenv_handle_env_data_error(err, status,
1668 				    i, old_key, "Volatile Data (update)");
1669 				mutex_exit(&env_cache_lock);
1670 				if (err == DDI_FAILURE) {
1671 					return (0);
1672 				} else {
1673 					continue;
1674 				}
1675 			}
1676 
1677 		}
1678 		mutex_exit(&env_cache_lock);
1679 	}
1680 
1681 	return (0);
1682 }
1683 
1684 
1685 static int
sgenv_get_board_info_data(void)1686 sgenv_get_board_info_data(void)
1687 {
1688 	/*
1689 	 * This array keeps track of the valid nodes in a system. A call is
1690 	 * made to OBP to get the "nodeid" property from all the ssm nodes,
1691 	 * and for each nodeid found, that position in the array is set to
1692 	 * TRUE. For a Serengeti only one position in the array will be TRUE.
1693 	 */
1694 	static uint_t node_present[SSM_MAX_INSTANCES] = {SGENV_NO_NODE_EXISTS};
1695 
1696 	static fn_t	f = "sgenv_get_board_info_data()";
1697 	static int	first_time = TRUE;
1698 
1699 	sbbc_msg_t	req;
1700 	sbbc_msg_t	resp;
1701 	int		node;	/* loop index */
1702 	int		board;	/* loop index */
1703 	show_board_t	show_bd, *shbp = &show_bd;
1704 	info_t		inform;
1705 	int		status;	/* msg_status returned by response */
1706 	int		rv = 0;	/* return value of call to mailbox */
1707 	sg_board_info_t	*ptr;
1708 
1709 	DCMN_ERR_EVENT(CE_NOTE, "%s: entered.", f);
1710 
1711 	if (first_time) {
1712 		sgenv_set_valid_node_positions(node_present);
1713 		first_time = FALSE;
1714 	}
1715 
1716 	for (node = 0; node < SSM_MAX_INSTANCES; node++) {
1717 
1718 		if (node_present[node] == SGENV_NO_NODE_EXISTS)
1719 			continue;
1720 
1721 		for (board = 0; board < SG_MAX_BDS; board++) {
1722 
1723 			/*
1724 			 * If we have discovered in a previous call to the SC
1725 			 * that there is no board in this slot on this type of
1726 			 * chassis then we don't waste resources asking the SC
1727 			 * for nonexistent data.
1728 			 */
1729 			if ((node_present[node] & (1 << board)) == 0)
1730 				continue;
1731 
1732 			inform.board = board;
1733 			inform.node = node;
1734 			inform.revision = 0xdead;
1735 
1736 			req.msg_type.type = DR_MBOX;
1737 			req.msg_type.sub_type = DR_MBOX_SHOW_BOARD;
1738 			req.msg_status = SG_MBOX_STATUS_SUCCESS;
1739 			req.msg_len = sizeof (info_t);
1740 			req.msg_bytes = sizeof (info_t);
1741 			req.msg_buf = (caddr_t)&inform;
1742 
1743 			bzero(shbp, sizeof (show_board_t));
1744 			shbp->s_cond = -1;
1745 			shbp->s_power = -1;
1746 			shbp->s_assigned = -1;
1747 			shbp->s_claimed = -1;
1748 			shbp->s_present = -1;
1749 
1750 			resp.msg_type.type = DR_MBOX;
1751 			resp.msg_type.sub_type = DR_MBOX_SHOW_BOARD;
1752 			resp.msg_bytes = sizeof (show_board_t);
1753 			resp.msg_status = SG_MBOX_STATUS_SUCCESS;
1754 			resp.msg_len = sizeof (show_board_t);
1755 			resp.msg_buf = (caddr_t)shbp;
1756 
1757 
1758 			/*
1759 			 * We want to avoid the case where an invalid time
1760 			 * is specified by a user (by patching the
1761 			 * global variable <sgenv_max_mbox_wait_time>).
1762 			 *
1763 			 * Any incorrect values are reset to the default time.
1764 			 */
1765 			if (sgenv_max_mbox_wait_time <=
1766 			    max(sbbc_mbox_min_timeout, 0))
1767 				sgenv_max_mbox_wait_time =
1768 				    sbbc_mbox_default_timeout;
1769 
1770 			rv = sbbc_mbox_request_response(&req, &resp,
1771 			    sgenv_max_mbox_wait_time);
1772 			status = resp.msg_status;
1773 
1774 			if ((rv) || (status != SG_MBOX_STATUS_SUCCESS)) {
1775 				/*
1776 				 * errors from Solaris sgsbbc driver
1777 				 */
1778 				if (status > SG_MBOX_STATUS_SUCCESS) {
1779 					sgenv_mbox_error_msg("Board Info", rv,
1780 					    resp.msg_status);
1781 					return (rv);
1782 				}
1783 
1784 				/*
1785 				 * errors from SCAPP
1786 				 */
1787 				if (status == SG_MBOX_STATUS_ILLEGAL_NODE) {
1788 					sgenv_mbox_error_msg("Board Info", rv,
1789 					    resp.msg_status);
1790 					node_present[node] =
1791 					    SGENV_NO_NODE_EXISTS;
1792 
1793 					/*
1794 					 * No point looping through the rest of
1795 					 * the boards associated with this node.
1796 					 */
1797 					break;
1798 
1799 				} else if (status ==
1800 				    SG_MBOX_STATUS_ILLEGAL_SLOT) {
1801 
1802 					/*
1803 					 * We clear the bit representing <board>
1804 					 * in <node> to indicate that this slot
1805 					 * cannot exist on this chassis.
1806 					 */
1807 					node_present[node] &= (~(1 << board) &
1808 					    SGENV_NODE_TYPE_DS);
1809 					continue;
1810 
1811 				} else if (status ==
1812 				    SG_MBOX_STATUS_BOARD_ACCESS_DENIED) {
1813 					/*
1814 					 * We cannot access data for this slot,
1815 					 * however we may be able to do so in
1816 					 * the future. We do nothing.
1817 					 */
1818 					rv = rv;
1819 				} else {
1820 					char	err_msg[40];
1821 
1822 					(void) sprintf(err_msg,
1823 					    "Board data for "
1824 					    "Node%d/Slot%d", node, board);
1825 					sgenv_mbox_error_msg(err_msg, rv,
1826 					    resp.msg_status);
1827 
1828 					if (rv == 0)
1829 						rv = status;
1830 
1831 					continue;
1832 				}
1833 			}
1834 
1835 			mutex_enter(&board_cache_lock);
1836 			ptr = &board_cache[board];
1837 
1838 			/*
1839 			 * Check if the SC returns data for this board.
1840 			 */
1841 			if (shbp->s_assigned == -1) {
1842 				/*
1843 				 * If this cache entry used to have data and
1844 				 * now doesn't we decrement the board_count
1845 				 * clear the env_cache. The board must have
1846 				 * been removed.
1847 				 */
1848 				if (ptr->node_id != -1) {
1849 					board_count--;
1850 
1851 					/*
1852 					 * clear board_cache entry by
1853 					 * setting node_id to -1;
1854 					 */
1855 					ptr->node_id = -1;
1856 					DCMN_ERR_CACHE(CE_NOTE, "%s: "
1857 					    "Clearing cache line %d [%p]",
1858 					    f, board, (void *)ptr);
1859 				}
1860 			} else {
1861 				/*
1862 				 * If this cache entry was previously empty
1863 				 * and we now have data for it we increment
1864 				 * the board_count. A new board must have
1865 				 * been added.
1866 				 */
1867 				if (ptr->node_id == -1)
1868 					board_count++;
1869 				/*
1870 				 * update the board_cache entry
1871 				 */
1872 				DCMN_ERR_CACHE(CE_NOTE, "%s: "
1873 				    "Writing data for bd=%d into "
1874 				    " the board_cache at [%p]",
1875 				    f, board, (void *)ptr);
1876 				ptr->node_id = node;
1877 				ptr->board_num = board;
1878 				ptr->condition = shbp->s_cond;
1879 				ptr->assigned = shbp->s_assigned;
1880 				ptr->claimed = shbp->s_claimed;
1881 				ptr->present = shbp->s_present;
1882 				ptr->led.led_status =
1883 				    shbp->s_ledstatus;
1884 				last_board_read_time = gethrtime();
1885 			}
1886 			mutex_exit(&board_cache_lock);
1887 		} /* board */
1888 	} /* node */
1889 
1890 	/*
1891 	 * Indicate that have managed to store valid data in the <board_cache>
1892 	 * at least once.
1893 	 */
1894 	if (board_count > 0)
1895 		board_cache_updated = TRUE;
1896 
1897 
1898 	return (rv);
1899 }
1900 
1901 
1902 static int
sgenv_get_hpu_keys(envresp_key_t * new,int * status)1903 sgenv_get_hpu_keys(envresp_key_t *new, int *status)
1904 {
1905 	sbbc_msg_t	req;	/* request */
1906 	sbbc_msg_t	resp;	/* response */
1907 
1908 	int	rv;	/* return value from call to mbox */
1909 
1910 	req.msg_type.type = SG_ENV;
1911 	req.msg_type.sub_type = SG_GET_ENV_HPU_KEYS;
1912 	req.msg_status = SG_MBOX_STATUS_SUCCESS;
1913 	req.msg_len = 0;
1914 	req.msg_bytes = 0;
1915 
1916 	resp.msg_type.type = SG_ENV;
1917 	resp.msg_type.sub_type = SG_GET_ENV_HPU_KEYS;
1918 	resp.msg_status = SG_MBOX_STATUS_SUCCESS;
1919 	resp.msg_len = sizeof (envresp_key_t) * SGENV_MAX_HPU_KEYS;
1920 	resp.msg_bytes = 0;
1921 	resp.msg_buf = (caddr_t)new;
1922 
1923 	/*
1924 	 * We want to avoid the case where an invalid time
1925 	 * is specified by a user (by patching the
1926 	 * global variable <sgenv_max_mbox_wait_time>).
1927 	 *
1928 	 * Any incorrect values are reset to the default time.
1929 	 */
1930 	if (sgenv_max_mbox_wait_time <= max(sbbc_mbox_min_timeout, 0))
1931 		sgenv_max_mbox_wait_time = sbbc_mbox_default_timeout;
1932 
1933 	rv = sbbc_mbox_request_response(&req, &resp, sgenv_max_mbox_wait_time);
1934 
1935 	*status = resp.msg_status;
1936 
1937 	return (rv);
1938 }
1939 
1940 
1941 static int
sgenv_get_env_data(envresp_key_t key,int key_posn,uint16_t flag,int * status)1942 sgenv_get_env_data(envresp_key_t key, int key_posn, uint16_t flag, int *status)
1943 {
1944 	/*
1945 	 * Only one of these buffers is ever going to be used in a call
1946 	 * so to save kernel stack space we use a union.
1947 	 */
1948 	union {
1949 		envresp_constants_t	con[SGENV_MAX_SENSORS_PER_KEY];
1950 		envresp_volatiles_t	vol[SGENV_MAX_SENSORS_PER_KEY];
1951 		envresp_thresholds_t	thr[SGENV_MAX_SENSORS_PER_KEY];
1952 	} buf;
1953 
1954 	sbbc_msg_t	req;	/* request */
1955 	sbbc_msg_t	resp;	/* response */
1956 
1957 	int	i;	/* loop variable for mbox msg_buf */
1958 	int	rv;	/* return value from call to mbox */
1959 
1960 	ASSERT(MUTEX_HELD(&env_cache_lock));
1961 	ASSERT(env_cache[key_posn] != NULL);
1962 
1963 	if (flag == SG_GET_ENV_CONSTANTS) {
1964 		resp.msg_len = sizeof (buf.con);
1965 		resp.msg_buf = (caddr_t)buf.con;
1966 
1967 	} else if (flag == SG_GET_ENV_VOLATILES) {
1968 		resp.msg_len = sizeof (buf.vol);
1969 		resp.msg_buf = (caddr_t)buf.vol;
1970 
1971 	} else if (flag == SG_GET_ENV_THRESHOLDS) {
1972 		resp.msg_len = sizeof (buf.thr);
1973 		resp.msg_buf = (caddr_t)buf.thr;
1974 
1975 	} else {
1976 		*status = EINVAL;
1977 		return (-1);
1978 	}
1979 
1980 	req.msg_type.type = SG_ENV;
1981 	req.msg_type.sub_type = flag;
1982 	req.msg_status = SG_MBOX_STATUS_SUCCESS;
1983 	req.msg_len = 0;
1984 	req.msg_bytes = 0;
1985 	req.msg_data[0] = key;
1986 
1987 	resp.msg_type.type = SG_ENV;
1988 	resp.msg_type.sub_type = flag;
1989 	resp.msg_status = SG_MBOX_STATUS_SUCCESS;
1990 	resp.msg_bytes = 0;
1991 
1992 	/*
1993 	 * We want to avoid the case where an invalid time
1994 	 * is specified by a user (by patching the
1995 	 * global variable <sgenv_max_mbox_wait_time>).
1996 	 *
1997 	 * Any incorrect values are reset to the default time.
1998 	 */
1999 	if (sgenv_max_mbox_wait_time <= max(sbbc_mbox_min_timeout, 0))
2000 		sgenv_max_mbox_wait_time = sbbc_mbox_default_timeout;
2001 
2002 
2003 	rv = sbbc_mbox_request_response(&req, &resp, sgenv_max_mbox_wait_time);
2004 
2005 	*status = resp.msg_status;
2006 
2007 	/*
2008 	 * We now check that the data returned is valid.
2009 	 */
2010 	if (rv != 0) {
2011 		/*
2012 		 * The SBBC driver encountered an error.
2013 		 */
2014 		return (rv);
2015 
2016 	} else {
2017 		/*
2018 		 * The SC encountered an error.
2019 		 */
2020 		switch (*status) {
2021 		case SG_MBOX_STATUS_SUCCESS:
2022 			/*
2023 			 * No problems encountered - continue and return the
2024 			 * new data.
2025 			 */
2026 			break;
2027 
2028 		case ETIMEDOUT:
2029 			/*
2030 			 * For some reason the mailbox failed to return data
2031 			 * and instead timed out so we return ETIMEDOUT
2032 			 */
2033 			return (ETIMEDOUT);
2034 
2035 		case ENXIO:
2036 			/*
2037 			 * no sensors associated with this key, this may have
2038 			 * changed since we read the keys.
2039 			 */
2040 			return (ENXIO);
2041 
2042 		default:
2043 			/*
2044 			 * The contents of the mbox message contain corrupt
2045 			 * data. Flag this as an error to be returned.
2046 			 */
2047 			SGENV_PRINT_MBOX_MSG((&resp), "Env info problem");
2048 			return (EINVAL);
2049 		}
2050 	}
2051 
2052 	/*
2053 	 * Depending on the type of data returned, save the constant/volatile
2054 	 * data returned in the mailbox message into the <env_cache>.
2055 	 */
2056 	for (i = 0; i < resp.msg_data[0]; i++) {
2057 
2058 		if (flag == SG_GET_ENV_CONSTANTS) {
2059 			env_cache[key_posn][i].sd_id.tag_id =
2060 			    buf.con[i].id.tag_id;
2061 			env_cache[key_posn][i].sd_lo =
2062 			    buf.con[i].lo;
2063 			env_cache[key_posn][i].sd_hi =
2064 			    buf.con[i].hi;
2065 
2066 		} else if (flag == SG_GET_ENV_VOLATILES) {
2067 			env_cache[key_posn][i].sd_value =
2068 			    buf.vol[i].value;
2069 			env_cache[key_posn][i].sd_infostamp =
2070 			    buf.vol[i].info;
2071 
2072 			sgenv_set_sensor_status(&env_cache[key_posn][i]);
2073 
2074 		} else if (flag == SG_GET_ENV_THRESHOLDS) {
2075 			env_cache[key_posn][i].sd_lo_warn =
2076 			    buf.thr[i].lo_warn;
2077 			env_cache[key_posn][i].sd_hi_warn =
2078 			    buf.thr[i].hi_warn;
2079 		}
2080 	}
2081 
2082 	if (flag == SG_GET_ENV_VOLATILES)
2083 		vol_sensor_count[key_posn] = resp.msg_data[0];
2084 
2085 	return (rv);
2086 }
2087 
2088 
2089 /*
2090  * This function handles any errors received from the mailbox framework while
2091  * getting environmental data.
2092  *
2093  * INPUT PARAMETERS
2094  *	err	- return value from call to mailbox framework.
2095  *	status	- message status returned by mailbox framework.
2096  *	key	- key from previous (if any) reading of env data.
2097  *		  Needed to see if we have old data in the <env_cache>.
2098  *	str	- String indicating what type of env request failed.
2099  *
2100  * RETURN VALUES
2101  *	rv == DDI_FAILURE	- there is no point in continuing processing
2102  *				  the data, we should exit from the kstat
2103  *				  framework.
2104  *	rv != DDI_FAILURE	- error has been handled correctly, continue
2105  *				  processing the data returned from the SC.
2106  */
2107 static int
sgenv_handle_env_data_error(int err,int status,int key_posn,envresp_key_t key,char * str)2108 sgenv_handle_env_data_error(int err, int status, int key_posn,
2109 				envresp_key_t key, char *str)
2110 {
2111 	int	rv = DDI_SUCCESS;
2112 
2113 	ASSERT(str != (char *)NULL);
2114 
2115 	switch (err) {
2116 	case ENXIO:
2117 		/*
2118 		 * The SC has changed the env data associated with this key
2119 		 * since we started getting the data. We cannot tell if the
2120 		 * data has disappeared due to the removal of the board from
2121 		 * our Domain or just that the data has been updated. We
2122 		 * simply return the last known data (if possible) and the
2123 		 * next time we request the env data, the SC will have
2124 		 * finished processing this board so we will receive the
2125 		 * correct key values and we can get the correct data.
2126 		 */
2127 		DCMN_ERR_CACHE(CE_NOTE, "key @ posn %d has changed from %d"
2128 		    " while %s", key_posn, key, str);
2129 		rv = ENXIO;
2130 		break;
2131 
2132 	default:
2133 		sgenv_mbox_error_msg(str, err, status);
2134 		rv = DDI_FAILURE;
2135 		break;
2136 	}
2137 
2138 	/*
2139 	 * If there was no data in the <env_cache>, we need to clear the data
2140 	 * just added as the <env_cache> will only be partially filled.
2141 	 */
2142 	if (key == 0)
2143 		sgenv_clear_env_cache_entry(key_posn);
2144 
2145 	return (rv);
2146 }
2147 
2148 
2149 /*
2150  * If the sensor readings for a particular collection of HPUs become invalid,
2151  * then we clear the cache by freeing up the memory.
2152  */
2153 static void
sgenv_clear_env_cache_entry(int key_posn)2154 sgenv_clear_env_cache_entry(int key_posn)
2155 {
2156 	ASSERT(MUTEX_HELD(&env_cache_lock));
2157 
2158 	if (env_cache[key_posn] != NULL) {
2159 		kmem_free(env_cache[key_posn], sizeof (env_sensor_t) *
2160 		    SGENV_MAX_SENSORS_PER_KEY);
2161 		env_cache[key_posn] = NULL;
2162 		vol_sensor_count[key_posn] = 0;
2163 	}
2164 }
2165 
2166 
2167 static void
sgenv_mbox_error_msg(char * str,int err,int status)2168 sgenv_mbox_error_msg(char *str, int err, int status)
2169 {
2170 	/*
2171 	 * We update the count of errors we have encountered during calls to
2172 	 * the mailbox framework (unless we will cause a wraparound)
2173 	 */
2174 	if (sgenv_mbox_error_count < INT_MAX)
2175 		sgenv_mbox_error_count++;
2176 
2177 #ifdef DEBUG
2178 	if ((sgenv_debug & SGENV_DEBUG_MSG) == 0)
2179 		return;
2180 
2181 	ASSERT(str != NULL);
2182 
2183 	switch (err) {
2184 	case ENOTSUP:
2185 		DCMN_ERR(CE_WARN, "!This system configuration does not "
2186 		"support SGENV");
2187 		break;
2188 	case ETIMEDOUT:
2189 		DCMN_ERR(CE_WARN, "!Mailbox timed out while servicing "
2190 		"SGENV request for %s", str);
2191 		break;
2192 	default:
2193 		DCMN_ERR(CE_WARN, "!Error occurred reading %s, Errno=%d,"
2194 		" Status=%d", str, err, status);
2195 		break;
2196 	}
2197 #endif
2198 }
2199 
2200 
2201 /*
2202  * INPUT PARAMETERS
2203  *	key_posn -	The position in the env_cache for which we want to
2204  *			allocate space for a HPU's env data.
2205  *
2206  * ERROR VALUES
2207  *	DDI_FAILURE -	We failed to allocate memory for this cache entry.
2208  *			There is no point asking the SC for env data for this
2209  *			HPU as we will have nowhere to store it.
2210  */
2211 static int
sgenv_create_env_cache_entry(int key_posn)2212 sgenv_create_env_cache_entry(int key_posn)
2213 {
2214 	int	i;	/* used to loop thru each sensor to set the status */
2215 
2216 	ASSERT(key_posn < SGENV_MAX_HPU_KEYS);
2217 	ASSERT(key_posn >= 0);
2218 
2219 	env_cache[key_posn] = (env_sensor_t *)kmem_zalloc(
2220 	    sizeof (env_sensor_t) * SGENV_MAX_SENSORS_PER_KEY, KM_NOSLEEP);
2221 	if (env_cache[key_posn] == NULL) {
2222 		cmn_err(CE_WARN, "Failed to allocate memory for env_cache[%d]",
2223 		    key_posn);
2224 		return (DDI_FAILURE);
2225 	}
2226 
2227 	for (i = 0; i < SGENV_MAX_SENSORS_PER_KEY; i++)
2228 		env_cache[key_posn][i].sd_status = SG_SENSOR_STATUS_OK;
2229 
2230 	return (DDI_SUCCESS);
2231 }
2232 
2233 
2234 static void
sgenv_destroy_env_cache(void)2235 sgenv_destroy_env_cache(void)
2236 {
2237 	int i;
2238 
2239 	ASSERT(MUTEX_HELD(&env_cache_lock) == FALSE);
2240 	mutex_enter(&env_cache_lock);
2241 	for (i = 0; i < SGENV_MAX_HPU_KEYS; i++) {
2242 		if (env_cache[i] != NULL) {
2243 			kmem_free(env_cache[i], sizeof (env_sensor_t) *
2244 			    SGENV_MAX_SENSORS_PER_KEY);
2245 			env_cache[i] = NULL;
2246 			vol_sensor_count[i] = 0;
2247 		}
2248 	}
2249 	env_cache_updated = FALSE;
2250 
2251 	mutex_exit(&env_cache_lock);
2252 }
2253 
2254 static void
sgenv_update_env_kstat_size(kstat_t * ksp)2255 sgenv_update_env_kstat_size(kstat_t *ksp)
2256 {
2257 	int	i;
2258 
2259 	ASSERT(MUTEX_HELD(&env_cache_lock));
2260 
2261 	/* reinitialize this and recount number of sensors */
2262 	ksp->ks_data_size = 0;
2263 
2264 	for (i = 0; i < SGENV_MAX_HPU_KEYS; i++) {
2265 		if (vol_sensor_count[i] <= 0)
2266 			continue;
2267 
2268 		ASSERT(vol_sensor_count[i] <= SGENV_MAX_SENSORS_PER_KEY);
2269 
2270 		/*
2271 		 * increment ksp->ks_data_size by the number of
2272 		 * sensors in the collection <i>.
2273 		 */
2274 		ksp->ks_data_size += vol_sensor_count[i] *
2275 		    sizeof (env_sensor_t);
2276 	}
2277 	ASSERT(ksp->ks_data_size >= 0);
2278 }
2279 
2280 
2281 /*
2282  * This function is triggered by the thread that updates the env_cache.
2283  * It checks for any sensors which have exceeded their limits/thresholds
2284  * and generates sysevents for the sensor values that have changed.
2285  */
2286 /*ARGSUSED*/
2287 static uint_t
sgenv_check_sensor_thresholds(void)2288 sgenv_check_sensor_thresholds(void)
2289 {
2290 	DCMN_ERR_S(f, "sgenv_poll_env()");
2291 
2292 	int	key;	/* loop through keys */
2293 	int	i;	/* loops through each sensor for each <key> */
2294 
2295 	env_sensor_t		sensor;
2296 	env_sensor_status_t	status;
2297 
2298 	DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f);
2299 
2300 	mutex_enter(&env_cache_lock);
2301 
2302 	for (key = 0; key < SGENV_MAX_HPU_KEYS; key++) {
2303 
2304 		if (vol_sensor_count[key] == 0)
2305 			continue;
2306 
2307 		for (i = 0; i < vol_sensor_count[key]; i++) {
2308 			sensor = env_cache[key][i];
2309 			status = sensor.sd_status;
2310 
2311 			if (SG_GET_SENSOR_STATUS(status) ==
2312 			    SG_GET_PREV_SENSOR_STATUS(status)) {
2313 				continue;
2314 			}
2315 
2316 			/*
2317 			 * This sensor has changed in status since the last
2318 			 * time we polled - we need to inform the sysevent
2319 			 * framework.
2320 			 */
2321 			switch (sensor.sd_id.id.sensor_type) {
2322 			/*
2323 			 * we don't care about the pseudo sensors and
2324 			 * the Fan Status is notified by a separate
2325 			 * unsolicited event so we simply get the next
2326 			 * reading
2327 			 */
2328 			case SG_SENSOR_TYPE_ENVDB:
2329 			case SG_SENSOR_TYPE_COOLING:
2330 				continue;
2331 
2332 			/*
2333 			 * We have handled all the special cases by now.
2334 			 */
2335 			default:
2336 				(void) sgenv_process_threshold_event(sensor);
2337 				break;
2338 			}
2339 
2340 			SGENV_PRINT_POLL_INFO(sensor);
2341 		}
2342 	}
2343 	mutex_exit(&env_cache_lock);
2344 
2345 	return (DDI_SUCCESS);
2346 }
2347 
2348 
2349 /*
2350  * This function is passed in an array of length SSM_MAX_INSTANCES and
2351  * it searches OBP to for ssm nodes, and for each one if finds, it sets the
2352  * corresponding position in the array to TRUE.
2353  */
2354 static void
sgenv_set_valid_node_positions(uint_t * node_present)2355 sgenv_set_valid_node_positions(uint_t *node_present)
2356 {
2357 	dev_info_t	*rdip;		/* root dev info ptr */
2358 	dev_info_t	*dip;
2359 
2360 	ASSERT(node_present != NULL);
2361 
2362 	rdip = ddi_root_node();
2363 
2364 	for (dip = ddi_get_child(rdip); dip != NULL;
2365 	    dip = ddi_get_next_sibling(dip)) {
2366 		if (strncmp("ssm", ddi_node_name(dip), 3) == 0) {
2367 			int	value;
2368 
2369 			value = ddi_getprop(DDI_DEV_T_ANY, dip,
2370 			    DDI_PROP_DONTPASS, "nodeid", 0);
2371 
2372 			/*
2373 			 * If we get a valid nodeID which has not already
2374 			 * been found in a previous call to this function,
2375 			 * then we set all 10 LSB bits to indicate there may
2376 			 * be a board present in each slot.
2377 			 *
2378 			 * It is the job of sgenv_get_board_info_data() to weed
2379 			 * out the invalid cases when we don't have a
2380 			 * DS chassis.
2381 			 *
2382 			 * NOTE: We make the assumption that a chassis cannot
2383 			 * be DR'ed out, which is true for a Serengeti.
2384 			 * By the time WildCat need this functionality Solaris
2385 			 * will be able to know what kind of a chassis is
2386 			 * present and there will be no need to try and work
2387 			 * this out from the msg_status from the mailbox.
2388 			 */
2389 			if ((value >= 0) &&
2390 			    (value < SSM_MAX_INSTANCES) &&
2391 			    (node_present[value] == SGENV_NO_NODE_EXISTS)) {
2392 				node_present[value] = SGENV_NODE_TYPE_DS;
2393 			}
2394 
2395 		}
2396 	}
2397 }
2398 
2399 
2400 static void
sgenv_set_sensor_status(env_sensor_t * sensor)2401 sgenv_set_sensor_status(env_sensor_t *sensor)
2402 {
2403 	env_sensor_status_t	*status;
2404 
2405 	ASSERT(sensor != NULL);
2406 	status = &sensor->sd_status;
2407 
2408 	/*
2409 	 * Save the previous status so we can compare them later
2410 	 */
2411 	SG_SET_PREV_SENSOR_STATUS(*status, *status);
2412 
2413 	switch (sensor->sd_id.id.sensor_type) {
2414 	case SG_SENSOR_TYPE_ENVDB:
2415 		/*
2416 		 * We want the status of this sensor to always be OK
2417 		 * The concept of limits/thresholds do not exist for it.
2418 		 */
2419 		SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_OK);
2420 		break;
2421 
2422 	case SG_SENSOR_TYPE_COOLING:
2423 		/*
2424 		 * Fans have no concept of limits/thresholds, they have a state
2425 		 * which we store in the <sd_status> field so that we can see
2426 		 * when this state is changed.
2427 		 */
2428 		if (sensor->sd_value == SGENV_FAN_SPEED_HIGH) {
2429 			SG_SET_SENSOR_STATUS(*status,
2430 			    SG_SENSOR_STATUS_FAN_HIGH);
2431 
2432 		} else if (sensor->sd_value == SGENV_FAN_SPEED_LOW) {
2433 			SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_FAN_LOW);
2434 
2435 		} else if (sensor->sd_value == SGENV_FAN_SPEED_OFF) {
2436 			SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_FAN_OFF);
2437 
2438 		} else {
2439 			SG_SET_SENSOR_STATUS(*status,
2440 			    SG_SENSOR_STATUS_FAN_FAIL);
2441 		}
2442 
2443 		/*
2444 		 * If this is the first time this fan status has been read,
2445 		 * then we need to initialize the previous reading to be the
2446 		 * same as the current reading so that an event is not
2447 		 * triggered.
2448 		 *
2449 		 * [ When the env_cache is being created, the status of the
2450 		 *   sensors is set to SG_SENSOR_STATUS_OK, which is not a
2451 		 *   valid Fan status ].
2452 		 */
2453 		if (SG_GET_PREV_SENSOR_STATUS(*status) == SG_SENSOR_STATUS_OK) {
2454 			SG_SET_PREV_SENSOR_STATUS(*status, *status);
2455 		}
2456 
2457 		break;
2458 
2459 	default:
2460 		if (sensor->sd_value > sensor->sd_hi) {
2461 			SG_SET_SENSOR_STATUS(*status,
2462 			    SG_SENSOR_STATUS_HI_DANGER);
2463 
2464 		} else if (sensor->sd_value > sensor->sd_hi_warn) {
2465 			SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_HI_WARN);
2466 
2467 		} else if (sensor->sd_value < sensor->sd_lo) {
2468 			SG_SET_SENSOR_STATUS(*status,
2469 			    SG_SENSOR_STATUS_LO_DANGER);
2470 
2471 		} else if (sensor->sd_value < sensor->sd_lo_warn) {
2472 			SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_LO_WARN);
2473 
2474 		} else {
2475 			SG_SET_SENSOR_STATUS(*status, SG_SENSOR_STATUS_OK);
2476 		}
2477 		break;
2478 	}
2479 }
2480 
2481 
2482 
2483 
2484 /*
2485  * This function, when given an integer arg describing a HPU type,
2486  * returns the descriptive string associated with this HPU type.
2487  */
2488 static const char *
sgenv_get_hpu_id_str(uint_t hpu_type)2489 sgenv_get_hpu_id_str(uint_t hpu_type)
2490 {
2491 	const hpu_value_t *hpu_list = hpus;
2492 
2493 	while (hpu_list->name != (char *)NULL) {
2494 		if (hpu_list->value == hpu_type)
2495 			return (hpu_list->IDstr);
2496 		else
2497 			hpu_list++;
2498 	}
2499 	return ((char *)NULL);
2500 }
2501 
2502 
2503 /*
2504  * This function, when given an integer arg describing a sensor part,
2505  * returns the descriptive string associated with this sensor part.
2506  */
2507 static const char *
sgenv_get_part_str(uint_t sensor_part)2508 sgenv_get_part_str(uint_t sensor_part)
2509 {
2510 	const part_value_t *part_list = parts;
2511 
2512 	while (part_list->name != (char *)NULL) {
2513 		if (part_list->value == sensor_part)
2514 			return (part_list->name);
2515 		else
2516 			part_list++;
2517 	}
2518 	return ((char *)NULL);
2519 }
2520 
2521 
2522 /*
2523  * This function, when given an integer arg describing a sensor type,
2524  * returns the descriptive string associated with this sensor type.
2525  */
2526 static const char *
sgenv_get_type_str(uint_t sensor_type)2527 sgenv_get_type_str(uint_t sensor_type)
2528 {
2529 	const type_value_t *type_list = types;
2530 
2531 	while (type_list->name != (char *)NULL) {
2532 		if (type_list->value == sensor_type)
2533 			return (type_list->name);
2534 		else
2535 			type_list++;
2536 	}
2537 	return ((char *)NULL);
2538 }
2539 
2540 
2541 /*
2542  * This function takes a sensor TagID and generates a string describing
2543  * where in the system the sensor is.
2544  */
2545 static void
sgenv_tagid_to_string(sensor_id_t id,char * str)2546 sgenv_tagid_to_string(sensor_id_t id, char *str)
2547 {
2548 	const char	*hpu_str;
2549 	const char	*part_str;
2550 	const char	*type_str;
2551 
2552 	ASSERT(str != NULL);
2553 
2554 	hpu_str = sgenv_get_hpu_id_str(id.id.hpu_type);
2555 	part_str = sgenv_get_part_str(id.id.sensor_part);
2556 	type_str = sgenv_get_type_str(id.id.sensor_type);
2557 
2558 	(void) sprintf(str,
2559 	    "Sensor: Node=%d, Board=%s%d, Device=%s%d, Type=%s%d: reading has ",
2560 	    id.id.node_id,
2561 	    ((hpu_str != NULL) ? hpu_str : ""),
2562 	    id.id.hpu_slot,
2563 	    ((part_str != NULL) ? part_str : ""),
2564 	    id.id.sensor_partnum,
2565 	    ((type_str != NULL) ? type_str : ""),
2566 	    id.id.sensor_typenum);
2567 
2568 }
2569 
2570 
2571 /*
2572  * This interrupt handler watches for unsolicited mailbox messages from the SC
2573  * telling it that the Keyswitch Position had changed. It then informs the
2574  * Sysevent Framework of this change.
2575  */
2576 static uint_t
sgenv_keyswitch_handler(char * arg)2577 sgenv_keyswitch_handler(char *arg)
2578 {
2579 	DCMN_ERR_S(f, "sgenv_keyswitch_handler()");
2580 
2581 	sysevent_t		*ev = NULL;
2582 	sysevent_id_t		eid;
2583 	sysevent_value_t	se_val;
2584 	sysevent_attr_list_t	*ev_attr_list = NULL;
2585 	sg_event_key_position_t	*payload = NULL;
2586 	sbbc_msg_t		*msg = NULL;
2587 	int			err;
2588 
2589 	DCMN_ERR_EVENT(CE_NOTE, "%s called", f);
2590 
2591 	if (arg == NULL) {
2592 		DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f);
2593 		return (DDI_INTR_CLAIMED);
2594 	}
2595 
2596 	msg = (sbbc_msg_t *)arg;
2597 	if (msg->msg_buf == NULL) {
2598 		DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f);
2599 		return (DDI_INTR_CLAIMED);
2600 	}
2601 
2602 	payload = (sg_event_key_position_t *)msg->msg_buf;
2603 	if (payload == NULL) {
2604 		DCMN_ERR_EVENT(CE_NOTE, "%s: payload == NULL", f);
2605 		return (DDI_INTR_CLAIMED);
2606 	}
2607 
2608 	DCMN_ERR_EVENT(CE_NOTE, "Key posn = %d", (int)*payload);
2609 
2610 
2611 	/*
2612 	 * Allocate memory for sysevent buffer.
2613 	 */
2614 	ev = sysevent_alloc(EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE,
2615 	    EP_SGENV, SE_NOSLEEP);
2616 	if (ev == NULL) {
2617 		cmn_err(CE_WARN, "%s: Failed to alloc mem for %s/%s event",
2618 		    f, EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE);
2619 		return (DDI_INTR_CLAIMED);
2620 	}
2621 
2622 
2623 	/*
2624 	 * Set the DOMAIN_WHAT_CHANGED attribute.
2625 	 */
2626 	se_val.value_type = SE_DATA_TYPE_STRING;
2627 	se_val.value.sv_string = DOMAIN_KEYSWITCH;
2628 	err = sysevent_add_attr(&ev_attr_list, DOMAIN_WHAT_CHANGED,
2629 	    &se_val, SE_NOSLEEP);
2630 	if (err != 0) {
2631 		cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2632 		    DOMAIN_WHAT_CHANGED, EC_DOMAIN,
2633 		    ESC_DOMAIN_STATE_CHANGE);
2634 		sysevent_free(ev);
2635 		return (DDI_INTR_CLAIMED);
2636 	}
2637 
2638 
2639 	/*
2640 	 * Log this event with sysevent framework.
2641 	 */
2642 	if (sysevent_attach_attributes(ev, ev_attr_list) != 0) {
2643 		cmn_err(CE_WARN, "Failed to attach attr list for %s/%s event",
2644 		    EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE);
2645 		sysevent_free_attr(ev_attr_list);
2646 		sysevent_free(ev);
2647 		return (DDI_INTR_CLAIMED);
2648 	}
2649 	err = log_sysevent(ev, SE_NOSLEEP, &eid);
2650 	if (err != 0) {
2651 		cmn_err(CE_WARN, "Failed to log %s/%s event",
2652 		    EC_DOMAIN, ESC_DOMAIN_STATE_CHANGE);
2653 		sysevent_free(ev);
2654 		return (DDI_INTR_CLAIMED);
2655 	}
2656 
2657 	/* clean up */
2658 	sysevent_free(ev);
2659 
2660 	return (DDI_INTR_CLAIMED);
2661 }
2662 
2663 
2664 /*
2665  * This interrupt handler watches for unsolicited mailbox messages from the SC
2666  * telling it that an environmental sensor has exceeded a threshold/limit level
2667  * or has returned to normal having previously exceeded a threshold/limit level.
2668  * It then informs the Sysevent Framework of this change and updates the
2669  * env_cache.
2670  */
2671 static uint_t
sgenv_env_data_handler(char * arg)2672 sgenv_env_data_handler(char *arg)
2673 {
2674 	DCMN_ERR_S(f, "sgenv_env_data_handler()");
2675 
2676 	sg_event_env_changed_t	*payload = NULL;
2677 	sbbc_msg_t		*msg = NULL;
2678 
2679 	DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f);
2680 
2681 	if (arg == NULL) {
2682 		DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f);
2683 		return (DDI_INTR_CLAIMED);
2684 	}
2685 
2686 	msg = (sbbc_msg_t *)arg;
2687 
2688 	if (msg->msg_buf == NULL) {
2689 		DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f);
2690 		return (DDI_INTR_CLAIMED);
2691 	}
2692 
2693 	payload = (sg_event_env_changed_t *)msg->msg_buf;
2694 
2695 	/*
2696 	 * We check the first field of the msg_buf to see if the event_type
2697 	 * is SC_EVENT_ENV, if it is then we handle the event.
2698 	 */
2699 	if (payload->event_type != SC_EVENT_ENV) {
2700 		return (DDI_INTR_CLAIMED);
2701 	}
2702 
2703 	/*
2704 	 * We now need to signal to the env background thread to ask the SC
2705 	 * for env readings and discover which sensor caused the SC to send
2706 	 * the ENV event before sending a sysevent to userland.
2707 	 */
2708 	sgenv_indicate_cache_update_needed(ENV_CACHE);
2709 
2710 	return (DDI_INTR_CLAIMED);
2711 }
2712 
2713 
2714 /*
2715  * This interrupt handler watches for unsolicited mailbox messages from the SC
2716  * telling it that the status of a fan has changed. We register a sysevent
2717  * and trigger a softint to update the env cache.
2718  */
2719 static uint_t
sgenv_fan_status_handler(char * arg)2720 sgenv_fan_status_handler(char *arg)
2721 {
2722 	DCMN_ERR_S(f, "sgenv_fan_status_handler()");
2723 
2724 	sysevent_t		*ev = NULL;
2725 	sysevent_id_t		eid;
2726 	sysevent_value_t	se_val;
2727 	sysevent_attr_list_t	*ev_attr_list = NULL;
2728 	sg_event_fan_status_t	*payload = NULL;
2729 	sbbc_msg_t		*msg = NULL;
2730 	char			fan_str[MAXNAMELEN];
2731 	int			err;
2732 
2733 	DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f);
2734 
2735 	if (arg == NULL) {
2736 		DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f);
2737 		return (DDI_INTR_CLAIMED);
2738 	}
2739 
2740 	msg = (sbbc_msg_t *)arg;
2741 
2742 	/*
2743 	 * We check the first field of the msg_buf to see if the event_type
2744 	 * is SC_EVENT_FAN
2745 	 */
2746 	if (msg->msg_buf == NULL) {
2747 		DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f);
2748 		return (DDI_INTR_CLAIMED);
2749 	}
2750 
2751 	payload = (sg_event_fan_status_t *)msg->msg_buf;
2752 
2753 	/*
2754 	 * If another type of ENV Event triggered this handler then we simply
2755 	 * return now.
2756 	 */
2757 	if (payload->event_type != SC_EVENT_FAN) {
2758 		return (DDI_INTR_CLAIMED);
2759 	}
2760 
2761 	/*
2762 	 * Allocate memory for sysevent buffer.
2763 	 */
2764 	ev = sysevent_alloc(EC_ENV, ESC_ENV_FAN, EP_SGENV, SE_NOSLEEP);
2765 	if (ev == NULL) {
2766 		cmn_err(CE_WARN, "%s: Failed to alloc mem for %s/%s event",
2767 		    f, EC_ENV, ESC_ENV_FAN);
2768 		return (DDI_INTR_CLAIMED);
2769 	}
2770 
2771 
2772 	/*
2773 	 * Set the following attributes for this event:
2774 	 *
2775 	 *	ENV_FRU_ID
2776 	 *	ENV_FRU_RESOURCE_ID
2777 	 *	ENV_FRU_DEVICE
2778 	 *	ENV_FRU_STATE
2779 	 *	ENV_MSG
2780 	 *
2781 	 */
2782 	se_val.value_type = SE_DATA_TYPE_STRING;
2783 	se_val.value.sv_string = ENV_RESERVED_ATTR;
2784 	err = sysevent_add_attr(&ev_attr_list, ENV_FRU_ID, &se_val, SE_NOSLEEP);
2785 	if (err != 0) {
2786 		cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2787 		    ENV_FRU_ID, EC_ENV, ESC_ENV_FAN);
2788 		sysevent_free(ev);
2789 		return (DDI_INTR_CLAIMED);
2790 	}
2791 
2792 	se_val.value_type = SE_DATA_TYPE_STRING;
2793 	se_val.value.sv_string = ENV_RESERVED_ATTR;
2794 	err = sysevent_add_attr(&ev_attr_list, ENV_FRU_RESOURCE_ID,
2795 	    &se_val, SE_NOSLEEP);
2796 	if (err != 0) {
2797 		cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2798 		    ENV_FRU_RESOURCE_ID, EC_ENV, ESC_ENV_FAN);
2799 		sysevent_free_attr(ev_attr_list);
2800 		sysevent_free(ev);
2801 		return (DDI_INTR_CLAIMED);
2802 	}
2803 
2804 	se_val.value_type = SE_DATA_TYPE_STRING;
2805 	se_val.value.sv_string = ENV_RESERVED_ATTR;
2806 	err = sysevent_add_attr(&ev_attr_list, ENV_FRU_DEVICE,
2807 	    &se_val, SE_NOSLEEP);
2808 	if (err != 0) {
2809 		cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2810 		    ENV_FRU_DEVICE, EC_ENV, ESC_ENV_FAN);
2811 		sysevent_free_attr(ev_attr_list);
2812 		sysevent_free(ev);
2813 		return (DDI_INTR_CLAIMED);
2814 	}
2815 
2816 	/*
2817 	 * Checks the fan to see if it has failed.
2818 	 */
2819 	se_val.value_type = SE_DATA_TYPE_INT32;
2820 	switch (payload->fan_speed) {
2821 	case SGENV_FAN_SPEED_OFF:
2822 	case SGENV_FAN_SPEED_LOW:
2823 	case SGENV_FAN_SPEED_HIGH:
2824 		se_val.value.sv_int32 = ENV_OK;
2825 		break;
2826 
2827 	case SGENV_FAN_SPEED_UNKNOWN:
2828 	default:
2829 		se_val.value.sv_int32 = ENV_FAILED;
2830 		break;
2831 	}
2832 
2833 	err = sysevent_add_attr(&ev_attr_list, ENV_FRU_STATE,
2834 	    &se_val, SE_NOSLEEP);
2835 	if (err != 0) {
2836 		cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2837 		    ENV_FRU_STATE, EC_ENV, ESC_ENV_FAN);
2838 		sysevent_free_attr(ev_attr_list);
2839 		sysevent_free(ev);
2840 		return (DDI_INTR_CLAIMED);
2841 	}
2842 
2843 
2844 	/*
2845 	 * Create the message to be sent to sysevent.
2846 	 */
2847 	(void) sprintf(fan_str,
2848 	    "The status of the fan in Node%d/Slot%d is now ",
2849 	    payload->node_id, payload->slot_number);
2850 	switch (payload->fan_speed) {
2851 	case SGENV_FAN_SPEED_OFF:
2852 		(void) strcat(fan_str, SGENV_FAN_SPEED_OFF_STR);
2853 		break;
2854 
2855 	case SGENV_FAN_SPEED_LOW:
2856 		(void) strcat(fan_str, SGENV_FAN_SPEED_LOW_STR);
2857 		break;
2858 
2859 	case SGENV_FAN_SPEED_HIGH:
2860 		(void) strcat(fan_str, SGENV_FAN_SPEED_HIGH_STR);
2861 		break;
2862 
2863 	case SGENV_FAN_SPEED_UNKNOWN:
2864 	default:
2865 		(void) strcat(fan_str, SGENV_FAN_SPEED_UNKNOWN_STR);
2866 		break;
2867 	}
2868 
2869 	DCMN_ERR_EVENT(CE_NOTE, "Fan: %s", fan_str);
2870 
2871 	se_val.value_type = SE_DATA_TYPE_STRING;
2872 	se_val.value.sv_string = fan_str;
2873 	err = sysevent_add_attr(&ev_attr_list, ENV_MSG, &se_val, SE_NOSLEEP);
2874 	if (err != 0) {
2875 		cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2876 		    ENV_MSG, EC_ENV, ESC_ENV_FAN);
2877 		sysevent_free_attr(ev_attr_list);
2878 		sysevent_free(ev);
2879 		return (DDI_INTR_CLAIMED);
2880 	}
2881 
2882 
2883 	/*
2884 	 * Log this event with sysevent framework.
2885 	 */
2886 	if (sysevent_attach_attributes(ev, ev_attr_list) != 0) {
2887 		cmn_err(CE_WARN, "Failed to attach attr list for %s/%s event",
2888 		    EC_ENV, ESC_ENV_FAN);
2889 		sysevent_free_attr(ev_attr_list);
2890 		sysevent_free(ev);
2891 		return (DDI_INTR_CLAIMED);
2892 	}
2893 	err = log_sysevent(ev, SE_NOSLEEP, &eid);
2894 	if (err != 0) {
2895 		cmn_err(CE_WARN, "Failed to log %s/%s event",
2896 		    EC_ENV, ESC_ENV_FAN);
2897 		sysevent_free(ev);
2898 		return (DDI_INTR_CLAIMED);
2899 	}
2900 	sysevent_free(ev);
2901 
2902 	/*
2903 	 * We now need to signal to the env background thread to ask the SC
2904 	 * for env readings and discover which sensor caused the SC to send
2905 	 * the ENV event before sending a sysevent to userland.
2906 	 */
2907 	sgenv_indicate_cache_update_needed(ENV_CACHE);
2908 
2909 	return (DDI_INTR_CLAIMED);
2910 }
2911 
2912 
2913 /*
2914  * This function informs the Sysevent Framework that a temperature, voltage
2915  * or current reading for a sensor has exceeded its threshold/limit value or
2916  * that the reading has returned to a safe value having exceeded its
2917  * threshold/limit value previously.
2918  */
2919 static int
sgenv_process_threshold_event(env_sensor_t sensor)2920 sgenv_process_threshold_event(env_sensor_t sensor)
2921 {
2922 	DCMN_ERR_S(f, "sgenv_process_threshold_event()");
2923 
2924 	sysevent_t		*ev = NULL;
2925 	sysevent_id_t		eid;
2926 	sysevent_value_t	se_val;
2927 	sysevent_attr_list_t	*ev_attr_list = NULL;
2928 	int			err;
2929 
2930 	char	sensor_str[MAX_TAG_ID_STR_LEN];	/* holds the sensor TagID */
2931 
2932 	/*
2933 	 * This function handles the case when a temperature reading passes
2934 	 * a threshold/limit level and also the case when there are power
2935 	 * fluctuations (voltage/current readings pass a threshold/limit level)
2936 	 * so we need to work out which case it is.
2937 	 *
2938 	 * if <temp_event_type> is TRUE, then need to handle an event
2939 	 * of type ESC_ENV_TEMP.
2940 	 */
2941 	int	temp_event_type;
2942 
2943 	switch (sensor.sd_id.id.sensor_type) {
2944 	case SG_SENSOR_TYPE_TEMPERATURE:
2945 		temp_event_type = TRUE;
2946 		ev = sysevent_alloc(EC_ENV, ESC_ENV_TEMP, EP_SGENV, SE_NOSLEEP);
2947 		if (ev == NULL) {
2948 			cmn_err(CE_WARN, "Failed to allocate sysevent buffer "
2949 			    "for %s/%s event", EC_ENV, ESC_ENV_TEMP);
2950 			return (DDI_FAILURE);
2951 		}
2952 		break;
2953 
2954 	default:
2955 		temp_event_type = FALSE;
2956 		ev = sysevent_alloc(EC_ENV, ESC_ENV_POWER,
2957 		    EP_SGENV, SE_NOSLEEP);
2958 		if (ev == NULL) {
2959 			cmn_err(CE_WARN, "Failed to allocate sysevent buffer "
2960 			    "for %s/%s event", EC_ENV, ESC_ENV_POWER);
2961 			return (DDI_FAILURE);
2962 		}
2963 		break;
2964 	}
2965 
2966 
2967 	/*
2968 	 * Set the following attributes for this event:
2969 	 *
2970 	 *	ENV_FRU_ID
2971 	 *	ENV_FRU_RESOURCE_ID
2972 	 *	ENV_FRU_DEVICE
2973 	 *	ENV_FRU_STATE
2974 	 *	ENV_MSG
2975 	 *
2976 	 */
2977 	se_val.value_type = SE_DATA_TYPE_STRING;
2978 	se_val.value.sv_string = ENV_RESERVED_ATTR;
2979 	err = sysevent_add_attr(&ev_attr_list, ENV_FRU_ID, &se_val, SE_NOSLEEP);
2980 	if (err != 0) {
2981 		cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2982 		    ENV_FRU_ID, EC_ENV,
2983 		    (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER));
2984 		sysevent_free(ev);
2985 		return (DDI_FAILURE);
2986 	}
2987 
2988 	se_val.value_type = SE_DATA_TYPE_STRING;
2989 	se_val.value.sv_string = ENV_RESERVED_ATTR;
2990 	err = sysevent_add_attr(&ev_attr_list, ENV_FRU_RESOURCE_ID,
2991 	    &se_val, SE_NOSLEEP);
2992 	if (err != 0) {
2993 		cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
2994 		    ENV_FRU_RESOURCE_ID, EC_ENV,
2995 		    (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER));
2996 		sysevent_free_attr(ev_attr_list);
2997 		sysevent_free(ev);
2998 		return (DDI_FAILURE);
2999 	}
3000 
3001 	se_val.value_type = SE_DATA_TYPE_STRING;
3002 	se_val.value.sv_string = ENV_RESERVED_ATTR;
3003 	err = sysevent_add_attr(&ev_attr_list, ENV_FRU_DEVICE,
3004 	    &se_val, SE_NOSLEEP);
3005 	if (err != 0) {
3006 		cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
3007 		    ENV_FRU_DEVICE, EC_ENV,
3008 		    (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER));
3009 		sysevent_free_attr(ev_attr_list);
3010 		sysevent_free(ev);
3011 		return (DDI_FAILURE);
3012 	}
3013 
3014 
3015 	/*
3016 	 * We need to find out the status of the reading.
3017 	 */
3018 	se_val.value_type = SE_DATA_TYPE_INT32;
3019 	switch (SG_GET_SENSOR_STATUS(sensor.sd_status)) {
3020 	case SG_SENSOR_STATUS_OK:
3021 		se_val.value.sv_int32 = ENV_OK;
3022 		break;
3023 
3024 	case SG_SENSOR_STATUS_LO_WARN:
3025 	case SG_SENSOR_STATUS_HI_WARN:
3026 		se_val.value.sv_int32 = ENV_WARNING;
3027 		break;
3028 
3029 	case SG_SENSOR_STATUS_LO_DANGER:
3030 	case SG_SENSOR_STATUS_HI_DANGER:
3031 	default:
3032 		se_val.value.sv_int32 = ENV_FAILED;
3033 		break;
3034 	}
3035 
3036 	/*
3037 	 * Add ENV_FRU_STATE attribute.
3038 	 */
3039 	err = sysevent_add_attr(&ev_attr_list, ENV_FRU_STATE,
3040 	    &se_val, SE_NOSLEEP);
3041 	if (err != 0) {
3042 		cmn_err(CE_WARN, "Failed to add attr[%s] for %s/%s event "
3043 		    "(Err=%d)", ENV_FRU_STATE, EC_ENV,
3044 		    (temp_event_type ? ESC_ENV_TEMP: ESC_ENV_POWER),
3045 		    err);
3046 		sysevent_free_attr(ev_attr_list);
3047 		sysevent_free(ev);
3048 		return (DDI_FAILURE);
3049 	}
3050 
3051 
3052 	/*
3053 	 * Save the sensor TagID as a string so that a meaningful message
3054 	 * can be passed to as part of the ENV_MSG attribute.
3055 	 */
3056 	sgenv_tagid_to_string(sensor.sd_id, sensor_str);
3057 
3058 	/*
3059 	 * We need to add a string stating what type of event occurred.
3060 	 */
3061 	switch (SG_GET_SENSOR_STATUS(sensor.sd_status)) {
3062 	case SG_SENSOR_STATUS_OK:
3063 		(void) strcat(sensor_str, SGENV_EVENT_MSG_OK);
3064 		break;
3065 
3066 	case SG_SENSOR_STATUS_LO_WARN:
3067 		(void) strcat(sensor_str, SGENV_EVENT_MSG_LO_WARN);
3068 		break;
3069 
3070 	case SG_SENSOR_STATUS_HI_WARN:
3071 		(void) strcat(sensor_str, SGENV_EVENT_MSG_HI_WARN);
3072 		break;
3073 
3074 	case SG_SENSOR_STATUS_LO_DANGER:
3075 		(void) strcat(sensor_str, SGENV_EVENT_MSG_LO_DANGER);
3076 		break;
3077 
3078 	case SG_SENSOR_STATUS_HI_DANGER:
3079 		(void) strcat(sensor_str, SGENV_EVENT_MSG_HI_DANGER);
3080 		break;
3081 
3082 	default:
3083 		DCMN_ERR_EVENT(CE_NOTE, "%s: Unknown sensor status", f);
3084 		(void) strcat(sensor_str, SGENV_EVENT_MSG_UNKNOWN);
3085 		break;
3086 	}
3087 
3088 	DCMN_ERR_EVENT(CE_NOTE, "Temp/Power: %s", sensor_str);
3089 
3090 	/*
3091 	 * Add ENV_MSG attribute.
3092 	 */
3093 	se_val.value_type = SE_DATA_TYPE_STRING;
3094 	se_val.value.sv_string = sensor_str;
3095 	err = sysevent_add_attr(&ev_attr_list, ENV_MSG, &se_val, SE_NOSLEEP);
3096 	if (err != 0) {
3097 		cmn_err(CE_WARN, "Failed to add attr [%s] for %s/%s event",
3098 		    ENV_MSG, EC_ENV,
3099 		    (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER));
3100 		sysevent_free_attr(ev_attr_list);
3101 		sysevent_free(ev);
3102 		return (DDI_FAILURE);
3103 	}
3104 
3105 
3106 	/*
3107 	 * Log this event with sysevent framework.
3108 	 */
3109 	if (sysevent_attach_attributes(ev, ev_attr_list) != 0) {
3110 		cmn_err(CE_WARN, "Failed to attach attr list for %s/%s event",
3111 		    EC_ENV,
3112 		    (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER));
3113 		sysevent_free_attr(ev_attr_list);
3114 		sysevent_free(ev);
3115 		return (DDI_FAILURE);
3116 	}
3117 	err = log_sysevent(ev, SE_NOSLEEP, &eid);
3118 	if (err != 0) {
3119 		cmn_err(CE_WARN, "Failed to log %s/%s event", EC_ENV,
3120 		    (temp_event_type ? ESC_ENV_TEMP : ESC_ENV_POWER));
3121 		sysevent_free(ev);
3122 		return (DDI_FAILURE);
3123 	}
3124 	sysevent_free(ev);
3125 
3126 	return (DDI_SUCCESS);
3127 }
3128 
3129 
3130 /*
3131  * This function gets called when sgenv is notified of a DR event.
3132  * We need to update the board and env caches to ensure that they
3133  * now contain the latest system information..
3134  */
3135 static uint_t
sgenv_dr_event_handler(char * arg)3136 sgenv_dr_event_handler(char *arg)
3137 {
3138 	DCMN_ERR_S(f, "sgenv_dr_event_handler()");
3139 
3140 	sg_system_fru_descriptor_t	*payload = NULL;
3141 	sbbc_msg_t			*msg = NULL;
3142 
3143 	DCMN_ERR_EVENT(CE_NOTE, "%s: just been triggered.", f);
3144 	DCMN_ERR_EVENT(CE_NOTE, "%s: Start: %lld", f, gethrtime());
3145 
3146 
3147 	if (arg == NULL) {
3148 		DCMN_ERR_EVENT(CE_NOTE, "%s: arg == NULL", f);
3149 		return (DDI_INTR_CLAIMED);
3150 	}
3151 
3152 	msg = (sbbc_msg_t *)arg;
3153 
3154 	if (msg->msg_buf == NULL) {
3155 		DCMN_ERR_EVENT(CE_NOTE, "%s: msg_buf == NULL", f);
3156 		return (DDI_INTR_CLAIMED);
3157 	}
3158 
3159 	payload = (sg_system_fru_descriptor_t *)msg->msg_buf;
3160 
3161 	/*
3162 	 * We check the event_details field of the msg_buf to see if
3163 	 * we need to invalidate the caches
3164 	 */
3165 	switch (payload->event_details) {
3166 	case SG_EVT_BOARD_ABSENT:
3167 	case SG_EVT_BOARD_PRESENT:
3168 	case SG_EVT_UNASSIGN:
3169 	case SG_EVT_ASSIGN:
3170 	case SG_EVT_UNAVAILABLE:
3171 	case SG_EVT_AVAILABLE:
3172 	case SG_EVT_POWER_OFF:
3173 	case SG_EVT_POWER_ON:
3174 	case SG_EVT_PASSED_TEST:
3175 	case SG_EVT_FAILED_TEST:
3176 		/*
3177 		 * We now need to signal to the background threads to poll the
3178 		 * SC for env readings and board info which may have changed
3179 		 * as a result of the DR changes. This will cause the
3180 		 * env_cache and the board_cache to be updated.
3181 		 */
3182 		DCMN_ERR_EVENT(CE_NOTE, "%s: about to signal to background "
3183 		    "threads due to event %d.", f, payload->event_details);
3184 
3185 		sgenv_indicate_cache_update_needed(ENV_CACHE);
3186 		sgenv_indicate_cache_update_needed(BOARD_CACHE);
3187 
3188 		break;
3189 
3190 	default:
3191 		DCMN_ERR_EVENT(CE_NOTE, "%s: Unknown DR event type.", f);
3192 		break;
3193 	}
3194 
3195 	DCMN_ERR_EVENT(CE_NOTE, "%s: Finish: %lld", f, gethrtime());
3196 
3197 	return (DDI_INTR_CLAIMED);
3198 }
3199 
3200 
3201 /*
3202  * This function is called by the interrupt handlers watching for ENV/DR events
3203  * from the SC. It indicates to the thread responsible for the cache specified
3204  * that it needs to update its data.
3205  */
3206 static void
sgenv_indicate_cache_update_needed(int cache_type)3207 sgenv_indicate_cache_update_needed(int cache_type)
3208 {
3209 	DCMN_ERR_S(f, "sgenv_indicate_cache_update_needed()");
3210 
3211 	/*
3212 	 * If the cache is already being updated, we set a flag to
3213 	 * inform the thread that it needs to reread the data when
3214 	 * it is finished as we cannot be sure if the data was read
3215 	 * before or after the time this handler was triggered.
3216 	 *
3217 	 * Otherwise the thread is waiting for us and we signal
3218 	 * to it to start reading the data.
3219 	 */
3220 	switch (cache_type) {
3221 	case ENV_CACHE:
3222 		mutex_enter(&env_flag_lock);
3223 		if (env_cache_updating) {
3224 			DCMN_ERR_THREAD(CE_NOTE, "%s: Thread already "
3225 			    "updating env cache", f);
3226 			env_cache_update_needed = B_TRUE;
3227 
3228 		} else {
3229 			DCMN_ERR_THREAD(CE_NOTE, "%s: Sending signal "
3230 			    "to env thread", f);
3231 			cv_signal(&env_flag_cond);
3232 		}
3233 		mutex_exit(&env_flag_lock);
3234 		break;
3235 
3236 	case BOARD_CACHE:
3237 		mutex_enter(&board_flag_lock);
3238 		if (board_cache_updating) {
3239 			DCMN_ERR_THREAD(CE_NOTE, "%s: Thread already "
3240 			    "updating board cache", f);
3241 			board_cache_update_needed = B_TRUE;
3242 
3243 		} else {
3244 			DCMN_ERR_THREAD(CE_NOTE, "%s: Sending signal "
3245 			    "to board thread", f);
3246 			cv_signal(&board_flag_cond);
3247 		}
3248 		mutex_exit(&board_flag_lock);
3249 		break;
3250 
3251 	default:
3252 		DCMN_ERR(CE_NOTE, "%s: Unknown cache type:0x%x", f, cache_type);
3253 		break;
3254 	}
3255 }
3256