xref: /titanic_44/usr/src/cmd/fm/modules/common/disk-monitor/hotplug_mgr.c (revision e5dcf7beb7c949f9234713d5818b581ec3825443)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/sysevent/dr.h>
29 #include <sys/sysevent/eventdefs.h>
30 #include <sys/sunddi.h>	/* for the EC's for DEVFS */
31 
32 #include <errno.h>
33 #include <string.h>
34 #include <strings.h>
35 #include <stdio.h>
36 #include <unistd.h>
37 #include <time.h>
38 #include <pthread.h>
39 
40 #include <libsysevent.h>
41 #include <sys/sysevent_impl.h>
42 
43 #include <libnvpair.h>
44 #include <config_admin.h>
45 
46 #include "disk_monitor.h"
47 #include "hotplug_mgr.h"
48 #include "schg_mgr.h"
49 #include "dm_platform.h"
50 
51 typedef struct sysevent_event {
52 	sysevent_t	*evp;
53 } sysevent_event_t;
54 
55 /* Lock guarantees the ordering of the incoming sysevents */
56 static pthread_t g_sysev_tid;
57 static pthread_mutex_t g_event_handler_lock = PTHREAD_MUTEX_INITIALIZER;
58 static pthread_cond_t g_event_handler_cond = PTHREAD_COND_INITIALIZER;
59 static qu_t *g_sysev_queue = NULL;
60 static thread_state_t g_sysev_thread_state = TS_NOT_RUNNING;
61 /*
62  * The sysevent handle is bound to the main sysevent handler
63  * (event_handler), for each of the hotplug sysevents.
64  */
65 static sysevent_handle_t *sysevent_handle = NULL;
66 
67 static void free_sysevent_event(void *p);
68 
69 static int
nsleep(int seconds)70 nsleep(int seconds)
71 {
72 	struct timespec tspec;
73 
74 	tspec.tv_sec = seconds;
75 	tspec.tv_nsec = 0;
76 
77 	return (nanosleep(&tspec, NULL));
78 }
79 
80 static int
config_list_ext_poll(int num,char * const * path,cfga_list_data_t ** list_array,int * nlist,int flag)81 config_list_ext_poll(int num, char * const *path,
82     cfga_list_data_t **list_array, int *nlist, int flag)
83 {
84 	boolean_t done = B_FALSE;
85 	boolean_t timedout = B_FALSE;
86 	boolean_t interrupted = B_FALSE;
87 	int timeout = 0;
88 	int e;
89 #define	TIMEOUT_MAX 60
90 
91 	do {
92 		switch ((e = config_list_ext(num, path, list_array,
93 		    nlist, NULL, NULL, NULL, flag))) {
94 
95 		case CFGA_OK:
96 
97 			return (CFGA_OK);
98 
99 		case CFGA_BUSY:
100 		case CFGA_SYSTEM_BUSY:
101 
102 			if (timeout++ >= TIMEOUT_MAX)
103 				timedout = B_TRUE;
104 			else {
105 				if (nsleep(1) < 0)
106 					interrupted = (errno == EINTR);
107 			}
108 			break;
109 
110 		default:
111 			done = B_TRUE;
112 			break;
113 
114 		}
115 	} while (!done && !timedout && !interrupted);
116 
117 	return (e);
118 }
119 
120 /*
121  * Given a physical attachment point with a dynamic component
122  * (as in the case of SCSI APs), ensure the 'controller'
123  * portion of the dynamic component matches the physical portion.
124  * Argument 'adjusted' must point to a buffer of at least
125  * MAXPATHLEN bytes.
126  */
127 void
adjust_dynamic_ap(const char * apid,char * adjusted)128 adjust_dynamic_ap(const char *apid, char *adjusted)
129 {
130 	cfga_list_data_t *list_array = NULL;
131 	int nlist;
132 	char *ap_path[1];
133 	char phys[MAXPATHLEN];
134 	char dev_phys[MAXPATHLEN];
135 	char *dyn;
136 	int c, t, d;
137 
138 	dm_assert((strlen(apid) + 8 /* strlen("/devices") */) < MAXPATHLEN);
139 
140 	/* In the case of any error, return the unadjusted APID */
141 	(void) strcpy(adjusted, apid);
142 
143 	/* if AP is not dynamic or not a disk node, no need to adjust it */
144 	dyn = strstr(apid, "::");
145 	if ((dyn == NULL) || (dyn == apid) ||
146 	    (sscanf(dyn, "::dsk/c%dt%dd%d", &c, &t, &d) != 3))
147 		return;
148 
149 	/*
150 	 * Copy the AP_ID and terminate it at the '::' that we know
151 	 * for a fact it contains.  Pre-pend '/devices' for the sake
152 	 * of cfgadm_scsi, and get the cfgadm data for the controller.
153 	 */
154 	(void) strcpy(phys, apid);
155 	*strstr(phys, "::") = '\0';
156 	(void) snprintf(dev_phys, MAXPATHLEN, "/devices%s", phys);
157 	ap_path[0] = dev_phys;
158 
159 	if (config_list_ext_poll(1, ap_path, &list_array, &nlist, 0)
160 	    != CFGA_OK)
161 		return;
162 
163 	dm_assert(nlist == 1);
164 
165 	if (sscanf(list_array[0].ap_log_id, "c%d", &c) == 1)
166 		(void) snprintf(adjusted, MAXPATHLEN, "%s::dsk/c%dt%dd%d",
167 		    phys, c, t, d);
168 
169 	free(list_array);
170 }
171 
172 static int
disk_ap_is_scsi(const char * ap_path)173 disk_ap_is_scsi(const char *ap_path)
174 {
175 	return (strstr(ap_path, ":scsi:") != NULL);
176 }
177 
178 /*
179  * Looks up the attachment point's state and returns it in one of
180  * the hotplug states that the state change manager understands.
181  */
182 hotplug_state_t
disk_ap_state_to_hotplug_state(diskmon_t * diskp)183 disk_ap_state_to_hotplug_state(diskmon_t *diskp)
184 {
185 	hotplug_state_t state = HPS_UNKNOWN;
186 	cfga_list_data_t *list_array = NULL;
187 	int rv, nlist;
188 	char *app = (char *)dm_prop_lookup(diskp->app_props,
189 	    DISK_AP_PROP_APID);
190 	char adj_app[MAXPATHLEN];
191 	char *ap_path[1];
192 	char *devices_app;
193 	int len;
194 	boolean_t list_valid = B_FALSE;
195 
196 	dm_assert(app != NULL);
197 
198 	adjust_dynamic_ap(app, adj_app);
199 	ap_path[0] = adj_app;
200 	devices_app = NULL;
201 
202 	rv = config_list_ext_poll(1, ap_path, &list_array, &nlist,
203 	    CFGA_FLAG_LIST_ALL);
204 
205 	if (rv != CFGA_OK) {
206 		/*
207 		 * The SATA and SCSI libcfgadm plugins add a
208 		 * /devices to the phys id; to use it, we must
209 		 * prepend this string before the call.
210 		 */
211 		len = 8 /* strlen("/devices") */ + strlen(adj_app) + 1;
212 		devices_app = dmalloc(len);
213 		(void) snprintf(devices_app, len, "/devices%s",
214 		    adj_app);
215 		ap_path[0] = devices_app;
216 
217 		rv = config_list_ext_poll(1, ap_path, &list_array, &nlist,
218 		    CFGA_FLAG_LIST_ALL);
219 	}
220 
221 	/*
222 	 * cfgadm_scsi will return an error for an absent target,
223 	 * so treat an error as "absent"; otherwise, make sure
224 	 * cfgadm_xxx has returned a list of 1 item
225 	 */
226 	if (rv == CFGA_OK) {
227 		dm_assert(nlist == 1);
228 		list_valid = B_TRUE;
229 	} else if (disk_ap_is_scsi(ap_path[0]))
230 		state = HPS_ABSENT;
231 
232 	if (devices_app != NULL)
233 		dfree(devices_app, len);
234 
235 	if (list_valid) {
236 		/*
237 		 * The following truth table defines how each state is
238 		 * computed:
239 		 *
240 		 * +----------------------------------------------+
241 		 * |		  | o_state | r_state | condition |
242 		 * |		  +---------+---------+-----------|
243 		 * | Absent	  |Don'tCare|Disc/Empt|	Don'tCare |
244 		 * | Present	  |Unconfgrd|Connected|	 unknown  |
245 		 * | Configured	  |Configred|Connected|	Don'tCare |
246 		 * | Unconfigured |Unconfgrd|Connected|	   OK	  |
247 		 * +--------------+---------+---------+-----------+
248 		 */
249 
250 		if (list_array[0].ap_r_state == CFGA_STAT_EMPTY ||
251 		    list_array[0].ap_r_state == CFGA_STAT_DISCONNECTED)
252 			state = HPS_ABSENT;
253 		else if (list_array[0].ap_r_state == CFGA_STAT_CONNECTED &&
254 		    list_array[0].ap_o_state == CFGA_STAT_UNCONFIGURED &&
255 		    list_array[0].ap_cond == CFGA_COND_UNKNOWN)
256 			state = HPS_PRESENT;
257 		else if (list_array[0].ap_r_state == CFGA_STAT_CONNECTED &&
258 		    list_array[0].ap_o_state == CFGA_STAT_UNCONFIGURED &&
259 		    list_array[0].ap_cond != CFGA_COND_UNKNOWN)
260 			state = HPS_UNCONFIGURED;
261 		else if (list_array[0].ap_r_state == CFGA_STAT_CONNECTED &&
262 		    list_array[0].ap_o_state == CFGA_STAT_CONFIGURED)
263 			state = HPS_CONFIGURED;
264 
265 		free(list_array);
266 	}
267 
268 	return (state);
269 }
270 
271 /*
272  * Examine the sysevent passed in and returns the hotplug state that
273  * the sysevent states (or implies, in the case of attachment point
274  * events).
275  */
276 static hotplug_state_t
disk_sysev_to_state(diskmon_t * diskp,sysevent_t * evp)277 disk_sysev_to_state(diskmon_t *diskp, sysevent_t *evp)
278 {
279 	const char *class_name, *subclass;
280 	hotplug_state_t state = HPS_UNKNOWN;
281 	sysevent_value_t se_val;
282 
283 	/*
284 	 * The state mapping is as follows:
285 	 *
286 	 * Sysevent				State
287 	 * --------------------------------------------------------
288 	 * EC_DEVFS/ESC_DEVFS_DEVI_ADD		Configured
289 	 * EC_DEVFS/ESC_DEVFS_DEVI_REMOVE	Unconfigured
290 	 * EC_DR/ESC_DR_AP_STATE_CHANGE		*[Absent/Present]
291 	 *
292 	 * (The EC_DR event requires a probe of the attachment point
293 	 * to determine the AP's state if there is no usable HINT)
294 	 *
295 	 */
296 
297 	class_name = sysevent_get_class_name(evp);
298 	subclass = sysevent_get_subclass_name(evp);
299 
300 	if (strcmp(class_name, EC_DEVFS) == 0) {
301 		if (strcmp(subclass, ESC_DEVFS_DEVI_ADD) == 0) {
302 
303 			state = HPS_CONFIGURED;
304 
305 		} else if (strcmp(subclass, ESC_DEVFS_DEVI_REMOVE) == 0) {
306 
307 			state = HPS_UNCONFIGURED;
308 
309 		}
310 
311 	} else if (strcmp(class_name, EC_DR) == 0 &&
312 	    ((strcmp(subclass, ESC_DR_AP_STATE_CHANGE) == 0) ||
313 	    (strcmp(subclass, ESC_DR_TARGET_STATE_CHANGE) == 0))) {
314 
315 		if (sysevent_lookup_attr(evp, DR_HINT, SE_DATA_TYPE_STRING,
316 		    &se_val) == 0 && se_val.value.sv_string != NULL) {
317 
318 			if (strcmp(se_val.value.sv_string, DR_HINT_INSERT)
319 			    == 0) {
320 
321 				state = HPS_PRESENT;
322 
323 			} else if (strcmp(se_val.value.sv_string,
324 			    DR_HINT_REMOVE) == 0) {
325 
326 				state = HPS_ABSENT;
327 			}
328 
329 		}
330 
331 		/*
332 		 * If the state could not be determined by the hint
333 		 * (or there was no hint), ask the AP directly.
334 		 * SCSI HBAs may send an insertion sysevent
335 		 * *after* configuring the target node, so double-
336 		 * check HPS_PRESENT
337 		 */
338 		if ((state == HPS_UNKNOWN) || (state = HPS_PRESENT))
339 			state = disk_ap_state_to_hotplug_state(diskp);
340 	}
341 
342 	return (state);
343 }
344 
345 static void
disk_split_ap_path_sata(const char * ap_path,char * device,int * target)346 disk_split_ap_path_sata(const char *ap_path, char *device, int *target)
347 {
348 	char *p;
349 	int n;
350 
351 	/*
352 	 *  /devices/rootnode/.../device:target
353 	 */
354 	(void) strncpy(device, ap_path, MAXPATHLEN);
355 	p = strrchr(device, ':');
356 	dm_assert(p != NULL);
357 	n = sscanf(p, ":%d", target);
358 	dm_assert(n == 1);
359 	*p = '\0';
360 }
361 
362 static void
disk_split_ap_path_scsi(const char * ap_path,char * device,int * target)363 disk_split_ap_path_scsi(const char *ap_path, char *device, int *target)
364 {
365 	char *p;
366 	int n;
367 
368 	/*
369 	 *  /devices/rootnode/.../device:scsi::dsk/cXtXdX
370 	 */
371 
372 	(void) strncpy(device, ap_path, MAXPATHLEN);
373 	p = strrchr(device, ':');
374 	dm_assert(p != NULL);
375 
376 	n = sscanf(p, ":dsk/c%*dt%dd%*d", target);
377 	dm_assert(n == 1);
378 
379 	*strchr(device, ':') = '\0';
380 }
381 
382 static void
disk_split_ap_path(const char * ap_path,char * device,int * target)383 disk_split_ap_path(const char *ap_path, char *device, int *target)
384 {
385 	/*
386 	 * The AP path comes in two forms; for SATA devices,
387 	 * is is of the form:
388 	 *   /devices/rootnode/.../device:portnum
389 	 * and for SCSI devices, it is of the form:
390 	 *  /devices/rootnode/.../device:scsi::dsk/cXtXdX
391 	 */
392 
393 	if (disk_ap_is_scsi(ap_path))
394 		disk_split_ap_path_scsi(ap_path, device, target);
395 	else
396 		disk_split_ap_path_sata(ap_path, device, target);
397 }
398 
399 static void
disk_split_device_path(const char * dev_path,char * device,int * target)400 disk_split_device_path(const char *dev_path, char *device, int *target)
401 {
402 	char *t, *p, *e;
403 
404 	/*
405 	 * The disk device path is of the form:
406 	 * /rootnode/.../device/target@tgtid,tgtlun
407 	 */
408 
409 	(void) strncpy(device, dev_path, MAXPATHLEN);
410 	e = t = strrchr(device, '/');
411 	dm_assert(t != NULL);
412 
413 	t = strchr(t, '@');
414 	dm_assert(t != NULL);
415 	t += 1;
416 
417 	if ((p = strchr(t, ',')) != NULL)
418 		*p = '\0';
419 
420 	*target = strtol(t, 0, 16);
421 	*e = '\0';
422 }
423 
424 /*
425  * Returns the diskmon that corresponds to the physical disk path
426  * passed in.
427  */
428 static diskmon_t *
disk_match_by_device_path(diskmon_t * disklistp,const char * dev_path)429 disk_match_by_device_path(diskmon_t *disklistp, const char *dev_path)
430 {
431 	char dev_device[MAXPATHLEN];
432 	int dev_target;
433 	char ap_device[MAXPATHLEN];
434 	int ap_target;
435 
436 	dm_assert(disklistp != NULL);
437 	dm_assert(dev_path != NULL);
438 
439 	if (strncmp(dev_path, DEVICES_PREFIX, 8) == 0)
440 		dev_path += 8;
441 
442 	/* pare dev_path into device and target components */
443 	disk_split_device_path(dev_path, (char *)&dev_device, &dev_target);
444 
445 	/*
446 	 * The AP path specified in the configuration properties is
447 	 * the path to an attachment point minor node whose port number is
448 	 * equal to the target number on the disk "major" node sent by the
449 	 * sysevent.  To match them, we need to extract the target id and
450 	 * construct an AP string to compare to the AP path in the diskmon.
451 	 */
452 	while (disklistp != NULL) {
453 		char *app = (char *)dm_prop_lookup(disklistp->app_props,
454 		    DISK_AP_PROP_APID);
455 		dm_assert(app != NULL);
456 
457 		/* Not necessary to adjust the APID here */
458 		if (strncmp(app, DEVICES_PREFIX, 8) == 0)
459 			app += 8;
460 
461 		disk_split_ap_path(app, (char *)&ap_device, &ap_target);
462 
463 		if ((strcmp(dev_device, ap_device) == 0) &&
464 		    (dev_target == ap_target))
465 			return (disklistp);
466 
467 		disklistp = disklistp->next;
468 	}
469 	return (NULL);
470 }
471 
472 static diskmon_t *
disk_match_by_ap_id(diskmon_t * disklistp,const char * ap_id)473 disk_match_by_ap_id(diskmon_t *disklistp, const char *ap_id)
474 {
475 	const char *disk_ap_id;
476 	dm_assert(disklistp != NULL);
477 	dm_assert(ap_id != NULL);
478 
479 	/* Match only the device-tree portion of the name */
480 	if (strncmp(ap_id, DEVICES_PREFIX, 8 /* strlen("/devices") */) == 0)
481 		ap_id += 8;
482 
483 	while (disklistp != NULL) {
484 		disk_ap_id = dm_prop_lookup(disklistp->app_props,
485 		    DISK_AP_PROP_APID);
486 
487 		dm_assert(disk_ap_id != NULL);
488 
489 		if (strcmp(disk_ap_id, ap_id) == 0)
490 			return (disklistp);
491 
492 		disklistp = disklistp->next;
493 	}
494 	return (NULL);
495 }
496 
497 static diskmon_t *
disk_match_by_target_id(diskmon_t * disklistp,const char * target_path)498 disk_match_by_target_id(diskmon_t *disklistp, const char *target_path)
499 {
500 	const char *disk_ap_id;
501 
502 	char match_device[MAXPATHLEN];
503 	int match_target;
504 
505 	char ap_device[MAXPATHLEN];
506 	int ap_target;
507 
508 
509 	/* Match only the device-tree portion of the name */
510 	if (strncmp(target_path, DEVICES_PREFIX, 8) == 0)
511 		target_path += 8;
512 	disk_split_ap_path(target_path, (char *)&match_device, &match_target);
513 
514 	while (disklistp != NULL) {
515 
516 		disk_ap_id = dm_prop_lookup(disklistp->app_props,
517 		    DISK_AP_PROP_APID);
518 		dm_assert(disk_ap_id != NULL);
519 
520 		disk_split_ap_path(disk_ap_id, (char *)&ap_device, &ap_target);
521 		if ((match_target == ap_target) &&
522 		    (strcmp(match_device, ap_device) == 0))
523 			return (disklistp);
524 
525 		disklistp = disklistp->next;
526 	}
527 	return (NULL);
528 }
529 
530 static diskmon_t *
match_sysevent_to_disk(diskmon_t * disklistp,sysevent_t * evp)531 match_sysevent_to_disk(diskmon_t *disklistp, sysevent_t *evp)
532 {
533 	diskmon_t *dmp = NULL;
534 	sysevent_value_t se_val;
535 	char *class_name = sysevent_get_class_name(evp);
536 	char *subclass = sysevent_get_subclass_name(evp);
537 
538 	se_val.value.sv_string = NULL;
539 
540 	if (strcmp(class_name, EC_DEVFS) == 0) {
541 		/* EC_DEVFS-class events have a `DEVFS_PATHNAME' property */
542 		if (sysevent_lookup_attr(evp, DEVFS_PATHNAME,
543 		    SE_DATA_TYPE_STRING, &se_val) == 0 &&
544 		    se_val.value.sv_string != NULL) {
545 
546 			dmp = disk_match_by_device_path(disklistp,
547 			    se_val.value.sv_string);
548 
549 		}
550 
551 	} else if (strcmp(class_name, EC_DR) == 0 &&
552 	    strcmp(subclass, ESC_DR_AP_STATE_CHANGE) == 0) {
553 
554 		/* EC_DR-class events have a `DR_AP_ID' property */
555 		if (sysevent_lookup_attr(evp, DR_AP_ID, SE_DATA_TYPE_STRING,
556 		    &se_val) == 0 && se_val.value.sv_string != NULL) {
557 
558 			dmp = disk_match_by_ap_id(disklistp,
559 			    se_val.value.sv_string);
560 		}
561 	} else if (strcmp(class_name, EC_DR) == 0 &&
562 	    strcmp(subclass, ESC_DR_TARGET_STATE_CHANGE) == 0) {
563 		/* get DR_TARGET_ID */
564 		if (sysevent_lookup_attr(evp, DR_TARGET_ID,
565 		    SE_DATA_TYPE_STRING, &se_val) == 0 &&
566 		    se_val.value.sv_string != NULL) {
567 			dmp = disk_match_by_target_id(disklistp,
568 			    se_val.value.sv_string);
569 		}
570 	}
571 
572 	if (se_val.value.sv_string)
573 		log_msg(MM_HPMGR, "match_sysevent_to_disk: device/ap: %s\n",
574 		    se_val.value.sv_string);
575 
576 	return (dmp);
577 }
578 
579 
580 /*
581  * The disk hotplug monitor (DHPM) listens for disk hotplug events and calls the
582  * state-change functionality when a disk's state changes.  The DHPM listens for
583  * hotplug events via sysevent subscriptions to the following sysevent
584  * classes/subclasses: { EC_DEVFS/ESC_DEVFS_BRANCH_ADD,
585  * EC_DEVFS/ESC_DEVFS_BRANCH_REMOVE, EC_DEVFS/ESC_DEVFS_DEVI_ADD,
586  * EC_DEVFS/ESC_DEVFS_DEVI_REMOVE, EC_DR/ESC_DR_AP_STATE_CHANGE }.  Once the
587  * event is received, the device path sent as part of the event is matched
588  * to one of the disks described by the configuration data structures.
589  */
590 static void
dm_process_sysevent(sysevent_t * dupev)591 dm_process_sysevent(sysevent_t *dupev)
592 {
593 	char		*class_name;
594 	char		*pub;
595 	char		*subclass = sysevent_get_subclass_name(dupev);
596 	diskmon_t	*diskp;
597 
598 	class_name = sysevent_get_class_name(dupev);
599 	log_msg(MM_HPMGR, "****EVENT: %s %s (by %s)\n", class_name,
600 	    subclass,
601 	    ((pub = sysevent_get_pub_name(dupev)) != NULL) ? pub : "UNKNOWN");
602 
603 	if (pub)
604 		free(pub);
605 
606 	if (strcmp(class_name, EC_PLATFORM) == 0 &&
607 	    strcmp(subclass, ESC_PLATFORM_SP_RESET) == 0) {
608 		if (dm_platform_resync() != 0)
609 			log_warn("failed to resync SP platform\n");
610 		sysevent_free(dupev);
611 		return;
612 	}
613 
614 	/*
615 	 * We will handle this event if the event's target matches one of the
616 	 * disks we're monitoring
617 	 */
618 	if ((diskp = match_sysevent_to_disk(config_data->disk_list, dupev))
619 	    != NULL) {
620 
621 		dm_state_change(diskp, disk_sysev_to_state(diskp, dupev));
622 	}
623 
624 	sysevent_free(dupev);
625 }
626 
627 static void
dm_fmd_sysevent_thread(void * queuep)628 dm_fmd_sysevent_thread(void *queuep)
629 {
630 	qu_t			*qp = (qu_t *)queuep;
631 	sysevent_event_t	*sevevp;
632 
633 	/* Signal the thread spawner that we're running */
634 	dm_assert(pthread_mutex_lock(&g_event_handler_lock) == 0);
635 	if (g_sysev_thread_state != TS_EXIT_REQUESTED)
636 		g_sysev_thread_state = TS_RUNNING;
637 	(void) pthread_cond_broadcast(&g_event_handler_cond);
638 	dm_assert(pthread_mutex_unlock(&g_event_handler_lock) == 0);
639 
640 	while (g_sysev_thread_state != TS_EXIT_REQUESTED) {
641 		if ((sevevp = (sysevent_event_t *)queue_remove(qp)) == NULL)
642 			continue;
643 
644 		dm_process_sysevent(sevevp->evp);
645 
646 		free_sysevent_event(sevevp);
647 	}
648 
649 	/* Signal the thread spawner that we've exited */
650 	dm_assert(pthread_mutex_lock(&g_event_handler_lock) == 0);
651 	g_sysev_thread_state = TS_EXITED;
652 	(void) pthread_cond_broadcast(&g_event_handler_cond);
653 	dm_assert(pthread_mutex_unlock(&g_event_handler_lock) == 0);
654 
655 	log_msg(MM_HPMGR, "FMD sysevent handler thread exiting...");
656 }
657 
658 static sysevent_event_t *
new_sysevent_event(sysevent_t * ev)659 new_sysevent_event(sysevent_t *ev)
660 {
661 	/*
662 	 * Cannot use dmalloc for this because the thread isn't a FMD-created
663 	 * thread!
664 	 */
665 	sysevent_event_t *sevevp = malloc(sizeof (sysevent_event_t));
666 	sevevp->evp = ev;
667 	return (sevevp);
668 }
669 
670 static void
free_sysevent_event(void * p)671 free_sysevent_event(void *p)
672 {
673 	/* the sysevent_event was allocated with malloc(): */
674 	free(p);
675 }
676 
677 static void
event_handler(sysevent_t * ev)678 event_handler(sysevent_t *ev)
679 {
680 	/* The duplicated sysevent will be freed in the child thread */
681 	sysevent_t	*dupev = sysevent_dup(ev);
682 
683 	/*
684 	 * Add this sysevent to the work queue of our FMA thread so we can
685 	 * handle the sysevent and use the FMA API (e.g. for memory
686 	 * allocation, etc.) in the sysevent handler.
687 	 */
688 	queue_add(g_sysev_queue, new_sysevent_event(dupev));
689 }
690 
691 static void
fini_sysevents(void)692 fini_sysevents(void)
693 {
694 	sysevent_unsubscribe_event(sysevent_handle, EC_ALL);
695 }
696 
697 static int
init_sysevents(void)698 init_sysevents(void)
699 {
700 	int rv = 0;
701 	const char *devfs_subclasses[] = {
702 		ESC_DEVFS_DEVI_ADD,
703 		ESC_DEVFS_DEVI_REMOVE
704 	};
705 	const char *dr_subclasses[] = {
706 		ESC_DR_AP_STATE_CHANGE,
707 		ESC_DR_TARGET_STATE_CHANGE
708 	};
709 	const char *platform_subclasses[] = {
710 		ESC_PLATFORM_SP_RESET
711 	};
712 
713 	if ((sysevent_handle = sysevent_bind_handle(event_handler)) == NULL) {
714 		rv = errno;
715 		log_err("Could not initialize the hotplug manager ("
716 		    "sysevent_bind_handle failure");
717 	}
718 
719 	if (sysevent_subscribe_event(sysevent_handle, EC_DEVFS,
720 	    devfs_subclasses,
721 	    sizeof (devfs_subclasses)/sizeof (devfs_subclasses[0])) != 0) {
722 
723 		log_err("Could not initialize the hotplug manager "
724 		    "sysevent_subscribe_event(event class = EC_DEVFS) "
725 		    "failure");
726 
727 		rv = -1;
728 
729 	} else if (sysevent_subscribe_event(sysevent_handle, EC_DR,
730 	    dr_subclasses,
731 	    sizeof (dr_subclasses)/sizeof (dr_subclasses[0])) != 0) {
732 
733 		log_err("Could not initialize the hotplug manager "
734 		    "sysevent_subscribe_event(event class = EC_DR) "
735 		    "failure");
736 
737 		/* Unsubscribe from all sysevents in the event of a failure */
738 		fini_sysevents();
739 
740 		rv = -1;
741 	} else if (sysevent_subscribe_event(sysevent_handle, EC_PLATFORM,
742 	    platform_subclasses,
743 	    sizeof (platform_subclasses)/sizeof (platform_subclasses[0]))
744 	    != 0) {
745 
746 		log_err("Could not initialize the hotplug manager "
747 		    "sysevent_subscribe_event(event class = EC_PLATFORM) "
748 		    "failure");
749 
750 		/* Unsubscribe from all sysevents in the event of a failure */
751 		fini_sysevents();
752 
753 		rv = -1;
754 	}
755 
756 
757 	return (rv);
758 }
759 
760 /*ARGSUSED*/
761 static void
stdfree(void * p,size_t sz)762 stdfree(void *p, size_t sz)
763 {
764 	free(p);
765 }
766 
767 /*
768  * Assumptions: Each disk's current state was determined and stored in
769  * its diskmon_t.
770  */
771 hotplug_mgr_init_err_t
init_hotplug_manager()772 init_hotplug_manager()
773 {
774 	/* Create the queue to which we'll add sysevents */
775 	g_sysev_queue = new_queue(B_TRUE, malloc, stdfree, free_sysevent_event);
776 
777 	/*
778 	 * Grab the event handler lock before spawning the thread so we can
779 	 * wait for the thread to transition to the running state.
780 	 */
781 	dm_assert(pthread_mutex_lock(&g_event_handler_lock) == 0);
782 
783 	/* Create the sysevent handling thread */
784 	g_sysev_tid = fmd_thr_create(g_fm_hdl, dm_fmd_sysevent_thread,
785 	    g_sysev_queue);
786 
787 	/* Wait for the thread's acknowledgement */
788 	while (g_sysev_thread_state != TS_RUNNING)
789 		(void) pthread_cond_wait(&g_event_handler_cond,
790 		    &g_event_handler_lock);
791 	dm_assert(pthread_mutex_unlock(&g_event_handler_lock) == 0);
792 
793 	if (init_sysevents() != 0) {
794 		log_warn_e("Error initializing sysevents");
795 		return (HPM_ERR_SYSEVENT_INIT);
796 	}
797 
798 	return (0);
799 }
800 
801 void
cleanup_hotplug_manager()802 cleanup_hotplug_manager()
803 {
804 	/* Unsubscribe from the sysevents */
805 	fini_sysevents();
806 
807 	/*
808 	 * Wait for the thread to exit before we can destroy
809 	 * the event queue.
810 	 */
811 	dm_assert(pthread_mutex_lock(&g_event_handler_lock) == 0);
812 	g_sysev_thread_state = TS_EXIT_REQUESTED;
813 	queue_add(g_sysev_queue, NULL);
814 	while (g_sysev_thread_state != TS_EXITED)
815 		(void) pthread_cond_wait(&g_event_handler_cond,
816 		    &g_event_handler_lock);
817 	dm_assert(pthread_mutex_unlock(&g_event_handler_lock) == 0);
818 	(void) pthread_join(g_sysev_tid, NULL);
819 	fmd_thr_destroy(g_fm_hdl, g_sysev_tid);
820 
821 	/* Finally, destroy the event queue and reset the thread state */
822 	queue_free(&g_sysev_queue);
823 	g_sysev_thread_state = TS_NOT_RUNNING;
824 }
825