xref: /illumos-gate/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c (revision 71815ce76261aa773c97600750fdce92334d1990)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012 by Delphix. All rights reserved.
24  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
25  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
26  */
27 
28 /*
29  * ZFS syseventd module.
30  *
31  * The purpose of this module is to identify when devices are added to the
32  * system, and appropriately online or replace the affected vdevs.
33  *
34  * When a device is added to the system:
35  *
36  *	1. Search for any vdevs whose devid matches that of the newly added
37  *	   device.
38  *
39  *	2. If no vdevs are found, then search for any vdevs whose devfs path
40  *	   matches that of the new device.
41  *
42  *	3. If no vdevs match by either method, then ignore the event.
43  *
44  *	4. Attempt to online the device with a flag to indicate that it should
45  *	   be unspared when resilvering completes.  If this succeeds, then the
46  *	   same device was inserted and we should continue normally.
47  *
48  *	5. If the pool does not have the 'autoreplace' property set, attempt to
49  *	   online the device again without the unspare flag, which will
50  *	   generate a FMA fault.
51  *
52  *	6. If the pool has the 'autoreplace' property set, and the matching vdev
53  *	   is a whole disk, then label the new disk and attempt a 'zpool
54  *	   replace'.
55  *
56  * The module responds to EC_DEV_ADD events for both disks and lofi devices,
57  * with the latter used for testing.  The special ESC_ZFS_VDEV_CHECK event
58  * indicates that a device failed to open during pool load, but the autoreplace
59  * property was set.  In this case, we deferred the associated FMA fault until
60  * our module had a chance to process the autoreplace logic.  If the device
61  * could not be replaced, then the second online attempt will trigger the FMA
62  * fault that we skipped earlier.
63  */
64 
65 #include <alloca.h>
66 #include <devid.h>
67 #include <fcntl.h>
68 #include <libnvpair.h>
69 #include <libsysevent.h>
70 #include <libzfs.h>
71 #include <limits.h>
72 #include <stdlib.h>
73 #include <string.h>
74 #include <syslog.h>
75 #include <sys/list.h>
76 #include <sys/sunddi.h>
77 #include <sys/sysevent/eventdefs.h>
78 #include <sys/sysevent/dev.h>
79 #include <thread_pool.h>
80 #include <unistd.h>
81 #include "syseventd.h"
82 
83 #if defined(__i386) || defined(__amd64)
84 #define	PHYS_PATH	":q"
85 #define	RAW_SLICE	"p0"
86 #elif defined(__sparc)
87 #define	PHYS_PATH	":c"
88 #define	RAW_SLICE	"s2"
89 #else
90 #error Unknown architecture
91 #endif
92 
93 typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
94 
95 libzfs_handle_t *g_zfshdl;
96 list_t g_pool_list;
97 tpool_t *g_tpool;
98 boolean_t g_enumeration_done;
99 thread_t g_zfs_tid;
100 
101 typedef struct unavailpool {
102 	zpool_handle_t	*uap_zhp;
103 	list_node_t	uap_node;
104 } unavailpool_t;
105 
106 int
107 zfs_toplevel_state(zpool_handle_t *zhp)
108 {
109 	nvlist_t *nvroot;
110 	vdev_stat_t *vs;
111 	unsigned int c;
112 
113 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
114 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
115 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
116 	    (uint64_t **)&vs, &c) == 0);
117 	return (vs->vs_state);
118 }
119 
120 static int
121 zfs_unavail_pool(zpool_handle_t *zhp, void *data)
122 {
123 	if (zfs_toplevel_state(zhp) < VDEV_STATE_DEGRADED) {
124 		unavailpool_t *uap;
125 		uap = malloc(sizeof (unavailpool_t));
126 		uap->uap_zhp = zhp;
127 		list_insert_tail((list_t *)data, uap);
128 	} else {
129 		zpool_close(zhp);
130 	}
131 	return (0);
132 }
133 
134 /*
135  * The device associated with the given vdev (either by devid or physical path)
136  * has been added to the system.  If 'isdisk' is set, then we only attempt a
137  * replacement if it's a whole disk.  This also implies that we should label the
138  * disk first.
139  *
140  * First, we attempt to online the device (making sure to undo any spare
141  * operation when finished).  If this succeeds, then we're done.  If it fails,
142  * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
143  * but that the label was not what we expected.  If the 'autoreplace' property
144  * is not set, then we relabel the disk (if specified), and attempt a 'zpool
145  * replace'.  If the online is successful, but the new state is something else
146  * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
147  * race, and we should avoid attempting to relabel the disk.
148  */
149 static void
150 zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
151 {
152 	char *path;
153 	vdev_state_t newstate;
154 	nvlist_t *nvroot, *newvd;
155 	uint64_t wholedisk = 0ULL;
156 	uint64_t offline = 0ULL;
157 	char *physpath = NULL;
158 	char rawpath[PATH_MAX], fullpath[PATH_MAX];
159 	zpool_boot_label_t boot_type;
160 	uint64_t boot_size;
161 	size_t len;
162 
163 	if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
164 		return;
165 
166 	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
167 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
168 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
169 
170 	/*
171 	 * We should have a way to online a device by guid.  With the current
172 	 * interface, we are forced to chop off the 's0' for whole disks.
173 	 */
174 	(void) strlcpy(fullpath, path, sizeof (fullpath));
175 	if (wholedisk)
176 		fullpath[strlen(fullpath) - 2] = '\0';
177 
178 	/*
179 	 * Attempt to online the device.  It would be nice to online this by
180 	 * GUID, but the current interface only supports lookup by path.
181 	 */
182 	if (offline ||
183 	    (zpool_vdev_online(zhp, fullpath,
184 	    ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
185 	    (newstate == VDEV_STATE_HEALTHY ||
186 	    newstate == VDEV_STATE_DEGRADED)))
187 		return;
188 
189 	/*
190 	 * If the pool doesn't have the autoreplace property set, then attempt a
191 	 * true online (without the unspare flag), which will trigger a FMA
192 	 * fault.
193 	 */
194 	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
195 	    (isdisk && !wholedisk)) {
196 		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
197 		    &newstate);
198 		return;
199 	}
200 
201 	if (isdisk) {
202 		/*
203 		 * If this is a request to label a whole disk, then attempt to
204 		 * write out the label.  Before we can label the disk, we need
205 		 * access to a raw node.  Ideally, we'd like to walk the devinfo
206 		 * tree and find a raw node from the corresponding parent node.
207 		 * This is overly complicated, and since we know how we labeled
208 		 * this device in the first place, we know it's save to switch
209 		 * from /dev/dsk to /dev/rdsk and append the backup slice.
210 		 *
211 		 * If any part of this process fails, then do a force online to
212 		 * trigger a ZFS fault for the device (and any hot spare
213 		 * replacement).
214 		 */
215 		if (strncmp(path, ZFS_DISK_ROOTD,
216 		    strlen(ZFS_DISK_ROOTD)) != 0) {
217 			(void) zpool_vdev_online(zhp, fullpath,
218 			    ZFS_ONLINE_FORCEFAULT, &newstate);
219 			return;
220 		}
221 
222 		(void) strlcpy(rawpath, path + 9, sizeof (rawpath));
223 		len = strlen(rawpath);
224 		rawpath[len - 2] = '\0';
225 
226 		if (zpool_is_bootable(zhp))
227 			boot_type = ZPOOL_COPY_BOOT_LABEL;
228 		else
229 			boot_type = ZPOOL_NO_BOOT_LABEL;
230 
231 		boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL);
232 		if (zpool_label_disk(g_zfshdl, zhp, rawpath,
233 		    boot_type, boot_size, NULL) != 0) {
234 			(void) zpool_vdev_online(zhp, fullpath,
235 			    ZFS_ONLINE_FORCEFAULT, &newstate);
236 			return;
237 		}
238 	}
239 
240 	/*
241 	 * Cosntruct the root vdev to pass to zpool_vdev_attach().  While adding
242 	 * the entire vdev structure is harmless, we construct a reduced set of
243 	 * path/physpath/wholedisk to keep it simple.
244 	 */
245 	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
246 		return;
247 
248 	if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
249 		nvlist_free(nvroot);
250 		return;
251 	}
252 
253 	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
254 	    nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 ||
255 	    (physpath != NULL && nvlist_add_string(newvd,
256 	    ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) ||
257 	    nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
258 	    nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
259 	    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd,
260 	    1) != 0) {
261 		nvlist_free(newvd);
262 		nvlist_free(nvroot);
263 		return;
264 	}
265 
266 	nvlist_free(newvd);
267 
268 	(void) zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
269 
270 	nvlist_free(nvroot);
271 
272 }
273 
274 /*
275  * Utility functions to find a vdev matching given criteria.
276  */
277 typedef struct dev_data {
278 	const char		*dd_compare;
279 	const char		*dd_prop;
280 	zfs_process_func_t	dd_func;
281 	boolean_t		dd_found;
282 	boolean_t		dd_isdisk;
283 	uint64_t		dd_pool_guid;
284 	uint64_t		dd_vdev_guid;
285 } dev_data_t;
286 
287 static void
288 zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
289 {
290 	dev_data_t *dp = data;
291 	char *path;
292 	uint_t c, children;
293 	nvlist_t **child;
294 	size_t len;
295 	uint64_t guid;
296 
297 	/*
298 	 * First iterate over any children.
299 	 */
300 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
301 	    &child, &children) == 0) {
302 		for (c = 0; c < children; c++)
303 			zfs_iter_vdev(zhp, child[c], data);
304 		return;
305 	}
306 
307 	if (dp->dd_vdev_guid != 0) {
308 		if (nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
309 		    &guid) != 0 || guid != dp->dd_vdev_guid)
310 			return;
311 	} else if (dp->dd_compare != NULL) {
312 		len = strlen(dp->dd_compare);
313 
314 		if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
315 		    strncmp(dp->dd_compare, path, len) != 0)
316 			return;
317 
318 		/*
319 		 * Normally, we want to have an exact match for the comparison
320 		 * string.  However, we allow substring matches in the following
321 		 * cases:
322 		 *
323 		 *	<path>:		This is a devpath, and the target is one
324 		 *			of its children.
325 		 *
326 		 *	<path/>		This is a devid for a whole disk, and
327 		 *			the target is one of its children.
328 		 */
329 		if (path[len] != '\0' && path[len] != ':' &&
330 		    path[len - 1] != '/')
331 			return;
332 	}
333 
334 	(dp->dd_func)(zhp, nvl, dp->dd_isdisk);
335 }
336 
337 void
338 zfs_enable_ds(void *arg)
339 {
340 	unavailpool_t *pool = (unavailpool_t *)arg;
341 
342 	(void) zpool_enable_datasets(pool->uap_zhp, NULL, 0);
343 	zpool_close(pool->uap_zhp);
344 	free(pool);
345 }
346 
347 static int
348 zfs_iter_pool(zpool_handle_t *zhp, void *data)
349 {
350 	nvlist_t *config, *nvl;
351 	dev_data_t *dp = data;
352 	uint64_t pool_guid;
353 	unavailpool_t *pool;
354 
355 	if ((config = zpool_get_config(zhp, NULL)) != NULL) {
356 		if (dp->dd_pool_guid == 0 ||
357 		    (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
358 		    &pool_guid) == 0 && pool_guid == dp->dd_pool_guid)) {
359 			(void) nvlist_lookup_nvlist(config,
360 			    ZPOOL_CONFIG_VDEV_TREE, &nvl);
361 			zfs_iter_vdev(zhp, nvl, data);
362 		}
363 	}
364 	if (g_enumeration_done)  {
365 		for (pool = list_head(&g_pool_list); pool != NULL;
366 		    pool = list_next(&g_pool_list, pool)) {
367 
368 			if (strcmp(zpool_get_name(zhp),
369 			    zpool_get_name(pool->uap_zhp)))
370 				continue;
371 			if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
372 				list_remove(&g_pool_list, pool);
373 				(void) tpool_dispatch(g_tpool, zfs_enable_ds,
374 				    pool);
375 				break;
376 			}
377 		}
378 	}
379 
380 	zpool_close(zhp);
381 	return (0);
382 }
383 
384 /*
385  * Given a physical device path, iterate over all (pool, vdev) pairs which
386  * correspond to the given path.
387  */
388 static boolean_t
389 devpath_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
390 {
391 	dev_data_t data = { 0 };
392 
393 	data.dd_compare = devpath;
394 	data.dd_func = func;
395 	data.dd_prop = ZPOOL_CONFIG_PHYS_PATH;
396 	data.dd_found = B_FALSE;
397 	data.dd_isdisk = wholedisk;
398 
399 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
400 
401 	return (data.dd_found);
402 }
403 
404 /*
405  * Given a /devices path, lookup the corresponding devid for each minor node,
406  * and find any vdevs with matching devids.  Doing this straight up would be
407  * rather inefficient, O(minor nodes * vdevs in system), so we take advantage of
408  * the fact that each devid ends with "/<minornode>".  Once we find any valid
409  * minor node, we chop off the portion after the last slash, and then search for
410  * matching vdevs, which is O(vdevs in system).
411  */
412 static boolean_t
413 devid_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
414 {
415 	size_t len = strlen(devpath) + sizeof ("/devices") +
416 	    sizeof (PHYS_PATH) - 1;
417 	char *fullpath;
418 	int fd;
419 	ddi_devid_t devid;
420 	char *devidstr, *fulldevid;
421 	dev_data_t data = { 0 };
422 
423 	/*
424 	 * Try to open a known minor node.
425 	 */
426 	fullpath = alloca(len);
427 	(void) snprintf(fullpath, len, "/devices%s%s", devpath, PHYS_PATH);
428 	if ((fd = open(fullpath, O_RDONLY)) < 0)
429 		return (B_FALSE);
430 
431 	/*
432 	 * Determine the devid as a string, with no trailing slash for the minor
433 	 * node.
434 	 */
435 	if (devid_get(fd, &devid) != 0) {
436 		(void) close(fd);
437 		return (B_FALSE);
438 	}
439 	(void) close(fd);
440 
441 	if ((devidstr = devid_str_encode(devid, NULL)) == NULL) {
442 		devid_free(devid);
443 		return (B_FALSE);
444 	}
445 
446 	len = strlen(devidstr) + 2;
447 	fulldevid = alloca(len);
448 	(void) snprintf(fulldevid, len, "%s/", devidstr);
449 
450 	data.dd_compare = fulldevid;
451 	data.dd_func = func;
452 	data.dd_prop = ZPOOL_CONFIG_DEVID;
453 	data.dd_found = B_FALSE;
454 	data.dd_isdisk = wholedisk;
455 
456 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
457 
458 	devid_str_free(devidstr);
459 	devid_free(devid);
460 
461 	return (data.dd_found);
462 }
463 
464 /*
465  * This function is called when we receive a devfs add event.  This can be
466  * either a disk event or a lofi event, and the behavior is slightly different
467  * depending on which it is.
468  */
469 static int
470 zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
471 {
472 	char *devpath, *devname;
473 	char path[PATH_MAX], realpath[PATH_MAX];
474 	char *colon, *raw;
475 	int ret;
476 
477 	/*
478 	 * The main unit of operation is the physical device path.  For disks,
479 	 * this is the device node, as all minor nodes are affected.  For lofi
480 	 * devices, this includes the minor path.  Unfortunately, this isn't
481 	 * represented in the DEV_PHYS_PATH for various reasons.
482 	 */
483 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath) != 0)
484 		return (-1);
485 
486 	/*
487 	 * If this is a lofi device, then also get the minor instance name.
488 	 * Unfortunately, the current payload doesn't include an easy way to get
489 	 * this information.  So we cheat by resolving the 'dev_name' (which
490 	 * refers to the raw device) and taking the portion between ':(*),raw'.
491 	 */
492 	(void) strlcpy(realpath, devpath, sizeof (realpath));
493 	if (is_lofi) {
494 		if (nvlist_lookup_string(nvl, DEV_NAME,
495 		    &devname) == 0 &&
496 		    (ret = resolvepath(devname, path,
497 		    sizeof (path))) > 0) {
498 			path[ret] = '\0';
499 			colon = strchr(path, ':');
500 			if (colon != NULL)
501 				raw = strstr(colon + 1, ",raw");
502 			if (colon != NULL && raw != NULL) {
503 				*raw = '\0';
504 				(void) snprintf(realpath,
505 				    sizeof (realpath), "%s%s",
506 				    devpath, colon);
507 				*raw = ',';
508 			}
509 		}
510 	}
511 
512 	/*
513 	 * Iterate over all vdevs with a matching devid, and then those with a
514 	 * matching /devices path.  For disks, we only want to pay attention to
515 	 * vdevs marked as whole disks.  For lofi, we don't care (because we're
516 	 * matching an exact minor name).
517 	 */
518 	if (!devid_iter(realpath, zfs_process_add, !is_lofi))
519 		(void) devpath_iter(realpath, zfs_process_add, !is_lofi);
520 
521 	return (0);
522 }
523 
524 /*
525  * Called when we receive a VDEV_CHECK event, which indicates a device could not
526  * be opened during initial pool open, but the autoreplace property was set on
527  * the pool.  In this case, we treat it as if it were an add event.
528  */
529 static int
530 zfs_deliver_check(nvlist_t *nvl)
531 {
532 	dev_data_t data = { 0 };
533 
534 	if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID,
535 	    &data.dd_pool_guid) != 0 ||
536 	    nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID,
537 	    &data.dd_vdev_guid) != 0 ||
538 	    data.dd_vdev_guid == 0)
539 		return (0);
540 
541 	data.dd_isdisk = B_TRUE;
542 	data.dd_func = zfs_process_add;
543 
544 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
545 
546 	return (0);
547 }
548 
549 #define	DEVICE_PREFIX	"/devices"
550 
551 static int
552 zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
553 {
554 	char *devname = data;
555 	boolean_t avail_spare, l2cache;
556 	vdev_state_t newstate;
557 	nvlist_t *tgt;
558 
559 	syseventd_print(9, "%s: searching for %s in pool %s\n", __func__,
560 	    devname, zpool_get_name(zhp));
561 
562 	if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
563 	    &avail_spare, &l2cache, NULL)) != NULL) {
564 		char *path, fullpath[MAXPATHLEN];
565 		uint64_t wholedisk = 0ULL;
566 
567 		verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
568 		    &path) == 0);
569 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
570 		    &wholedisk) == 0);
571 
572 		syseventd_print(9, "%s: "
573 		    "found %s in pool %s (wholedisk: %s)\n", __func__,
574 		    path, zpool_get_name(zhp),
575 		    wholedisk != 0 ? "true" : "false");
576 
577 		(void) strlcpy(fullpath, path, sizeof (fullpath));
578 		if (wholedisk) {
579 			fullpath[strlen(fullpath) - 2] = '\0';
580 
581 			/*
582 			 * We need to reopen the pool associated with this
583 			 * device so that the kernel can update the size
584 			 * of the expanded device.
585 			 */
586 			(void) zpool_reopen(zhp);
587 		}
588 
589 		if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
590 			syseventd_print(9, "%s: "
591 			    "setting device %s to ONLINE state in pool %s.\n",
592 			    __func__, fullpath, zpool_get_name(zhp));
593 			if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL) {
594 				(void) zpool_vdev_online(zhp, fullpath, 0,
595 				    &newstate);
596 			}
597 		}
598 		zpool_close(zhp);
599 		return (1);
600 	}
601 	zpool_close(zhp);
602 	return (0);
603 }
604 
605 /*
606  * This function is called for each vdev of a pool for which any of the
607  * following events was recieved:
608  *  - ESC_ZFS_vdev_add
609  *  - ESC_ZFS_vdev_attach
610  *  - ESC_ZFS_vdev_clear
611  *  - ESC_ZFS_vdev_online
612  *  - ESC_ZFS_pool_create
613  *  - ESC_ZFS_pool_import
614  * It will update the vdevs FRU property if it is out of date.
615  */
616 /*ARGSUSED2*/
617 static void
618 zfs_update_vdev_fru(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
619 {
620 	char *devpath, *cptr, *oldfru = NULL;
621 	const char *newfru;
622 	uint64_t vdev_guid;
623 
624 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid);
625 	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &devpath);
626 	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_FRU, &oldfru);
627 
628 	/* remove :<slice> from devpath */
629 	cptr = strrchr(devpath, ':');
630 	if (cptr != NULL)
631 		*cptr = '\0';
632 
633 	newfru = libzfs_fru_lookup(g_zfshdl, devpath);
634 	if (newfru == NULL) {
635 		syseventd_print(9, "zfs_update_vdev_fru: no FRU for %s\n",
636 		    devpath);
637 		return;
638 	}
639 
640 	/* do nothing if the FRU hasn't changed */
641 	if (oldfru != NULL && libzfs_fru_compare(g_zfshdl, oldfru, newfru)) {
642 		syseventd_print(9, "zfs_update_vdev_fru: FRU unchanged\n");
643 		return;
644 	}
645 
646 	syseventd_print(9, "zfs_update_vdev_fru: devpath = %s\n", devpath);
647 	syseventd_print(9, "zfs_update_vdev_fru: FRU = %s\n", newfru);
648 
649 	(void) zpool_fru_set(zhp, vdev_guid, newfru);
650 }
651 
652 /*
653  * This function handles the following events:
654  *  - ESC_ZFS_vdev_add
655  *  - ESC_ZFS_vdev_attach
656  *  - ESC_ZFS_vdev_clear
657  *  - ESC_ZFS_vdev_online
658  *  - ESC_ZFS_pool_create
659  *  - ESC_ZFS_pool_import
660  * It will iterate over the pool vdevs to update the FRU property.
661  */
662 int
663 zfs_deliver_update(nvlist_t *nvl)
664 {
665 	dev_data_t dd = { 0 };
666 	char *pname;
667 	zpool_handle_t *zhp;
668 	nvlist_t *config, *vdev;
669 
670 	if (nvlist_lookup_string(nvl, "pool_name", &pname) != 0) {
671 		syseventd_print(9, "zfs_deliver_update: no pool name\n");
672 		return (-1);
673 	}
674 
675 	/*
676 	 * If this event was triggered by a pool export or destroy we cannot
677 	 * open the pool. This is not an error, just return 0 as we don't care
678 	 * about these events.
679 	 */
680 	zhp = zpool_open_canfail(g_zfshdl, pname);
681 	if (zhp == NULL)
682 		return (0);
683 
684 	config = zpool_get_config(zhp, NULL);
685 	if (config == NULL) {
686 		syseventd_print(9, "zfs_deliver_update: "
687 		    "failed to get pool config for %s\n", pname);
688 		zpool_close(zhp);
689 		return (-1);
690 	}
691 
692 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &vdev) != 0) {
693 		syseventd_print(0, "zfs_deliver_update: "
694 		    "failed to get vdev tree for %s\n", pname);
695 		zpool_close(zhp);
696 		return (-1);
697 	}
698 
699 	libzfs_fru_refresh(g_zfshdl);
700 
701 	dd.dd_func = zfs_update_vdev_fru;
702 	zfs_iter_vdev(zhp, vdev, &dd);
703 
704 	zpool_close(zhp);
705 	return (0);
706 }
707 
708 int
709 zfs_deliver_dle(nvlist_t *nvl)
710 {
711 	char *devname;
712 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) != 0) {
713 		syseventd_print(9, "zfs_deliver_event: no physpath\n");
714 		return (-1);
715 	}
716 	if (strncmp(devname, DEVICE_PREFIX, strlen(DEVICE_PREFIX)) != 0) {
717 		syseventd_print(9, "zfs_deliver_event: invalid "
718 		    "device '%s'", devname);
719 		return (-1);
720 	}
721 
722 	/*
723 	 * We try to find the device using the physical
724 	 * path that has been supplied. We need to strip off
725 	 * the /devices prefix before starting our search.
726 	 */
727 	devname += strlen(DEVICE_PREFIX);
728 	if (zpool_iter(g_zfshdl, zfsdle_vdev_online, devname) != 1) {
729 		syseventd_print(9, "zfs_deliver_event: device '%s' not"
730 		    " found\n", devname);
731 		return (1);
732 	}
733 	return (0);
734 }
735 
736 
737 /*ARGSUSED*/
738 static int
739 zfs_deliver_event(sysevent_t *ev, int unused)
740 {
741 	const char *class = sysevent_get_class_name(ev);
742 	const char *subclass = sysevent_get_subclass_name(ev);
743 	nvlist_t *nvl;
744 	int ret;
745 	boolean_t is_lofi = B_FALSE, is_check = B_FALSE;
746 	boolean_t is_dle = B_FALSE, is_update = B_FALSE;
747 
748 	if (strcmp(class, EC_DEV_ADD) == 0) {
749 		/*
750 		 * We're mainly interested in disk additions, but we also listen
751 		 * for new lofi devices, to allow for simplified testing.
752 		 */
753 		if (strcmp(subclass, ESC_DISK) == 0)
754 			is_lofi = B_FALSE;
755 		else if (strcmp(subclass, ESC_LOFI) == 0)
756 			is_lofi = B_TRUE;
757 		else
758 			return (0);
759 
760 		is_check = B_FALSE;
761 	} else if (strcmp(class, EC_ZFS) == 0) {
762 		if (strcmp(subclass, ESC_ZFS_VDEV_CHECK) == 0) {
763 			/*
764 			 * This event signifies that a device failed to open
765 			 * during pool load, but the 'autoreplace' property was
766 			 * set, so we should pretend it's just been added.
767 			 */
768 			is_check = B_TRUE;
769 		} else if ((strcmp(subclass, ESC_ZFS_VDEV_ADD) == 0) ||
770 		    (strcmp(subclass, ESC_ZFS_VDEV_ATTACH) == 0) ||
771 		    (strcmp(subclass, ESC_ZFS_VDEV_CLEAR) == 0) ||
772 		    (strcmp(subclass, ESC_ZFS_VDEV_ONLINE) == 0) ||
773 		    (strcmp(subclass, ESC_ZFS_POOL_CREATE) == 0) ||
774 		    (strcmp(subclass, ESC_ZFS_POOL_IMPORT) == 0)) {
775 			/*
776 			 * When we receive these events we check the pool
777 			 * configuration and update the vdev FRUs if necessary.
778 			 */
779 			is_update = B_TRUE;
780 		}
781 	} else if (strcmp(class, EC_DEV_STATUS) == 0 &&
782 	    strcmp(subclass, ESC_DEV_DLE) == 0) {
783 		is_dle = B_TRUE;
784 	} else {
785 		return (0);
786 	}
787 
788 	if (sysevent_get_attr_list(ev, &nvl) != 0)
789 		return (-1);
790 
791 	if (is_dle)
792 		ret = zfs_deliver_dle(nvl);
793 	else if (is_update)
794 		ret = zfs_deliver_update(nvl);
795 	else if (is_check)
796 		ret = zfs_deliver_check(nvl);
797 	else
798 		ret = zfs_deliver_add(nvl, is_lofi);
799 
800 	nvlist_free(nvl);
801 	return (ret);
802 }
803 
804 /*ARGSUSED*/
805 void *
806 zfs_enum_pools(void *arg)
807 {
808 	(void) zpool_iter(g_zfshdl, zfs_unavail_pool, (void *)&g_pool_list);
809 	if (!list_is_empty(&g_pool_list))
810 		g_tpool = tpool_create(1, sysconf(_SC_NPROCESSORS_ONLN),
811 		    0, NULL);
812 	g_enumeration_done = B_TRUE;
813 	return (NULL);
814 }
815 
816 static struct slm_mod_ops zfs_mod_ops = {
817 	SE_MAJOR_VERSION, SE_MINOR_VERSION, 10, zfs_deliver_event
818 };
819 
820 struct slm_mod_ops *
821 slm_init()
822 {
823 	if ((g_zfshdl = libzfs_init()) == NULL)
824 		return (NULL);
825 	/*
826 	 * collect a list of unavailable pools (asynchronously,
827 	 * since this can take a while)
828 	 */
829 	list_create(&g_pool_list, sizeof (struct unavailpool),
830 	    offsetof(struct unavailpool, uap_node));
831 	if (thr_create(NULL, 0, zfs_enum_pools, NULL, 0, &g_zfs_tid) != 0)
832 		return (NULL);
833 	return (&zfs_mod_ops);
834 }
835 
836 void
837 slm_fini()
838 {
839 	unavailpool_t *pool;
840 
841 	(void) thr_join(g_zfs_tid, NULL, NULL);
842 	if (g_tpool != NULL) {
843 		tpool_wait(g_tpool);
844 		tpool_destroy(g_tpool);
845 	}
846 	while ((pool = (list_head(&g_pool_list))) != NULL) {
847 		list_remove(&g_pool_list, pool);
848 		zpool_close(pool->uap_zhp);
849 		free(pool);
850 	}
851 	list_destroy(&g_pool_list);
852 	libzfs_fini(g_zfshdl);
853 }
854