xref: /illumos-gate/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c (revision 56350fe51602d105c19353b8bf580ddd591726d5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012 by Delphix. All rights reserved.
24  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
25  */
26 
27 /*
28  * ZFS syseventd module.
29  *
30  * The purpose of this module is to identify when devices are added to the
31  * system, and appropriately online or replace the affected vdevs.
32  *
33  * When a device is added to the system:
34  *
35  * 	1. Search for any vdevs whose devid matches that of the newly added
36  *	   device.
37  *
38  * 	2. If no vdevs are found, then search for any vdevs whose devfs path
39  *	   matches that of the new device.
40  *
41  *	3. If no vdevs match by either method, then ignore the event.
42  *
43  * 	4. Attempt to online the device with a flag to indicate that it should
44  *	   be unspared when resilvering completes.  If this succeeds, then the
45  *	   same device was inserted and we should continue normally.
46  *
47  *	5. If the pool does not have the 'autoreplace' property set, attempt to
48  *	   online the device again without the unspare flag, which will
49  *	   generate a FMA fault.
50  *
51  *	6. If the pool has the 'autoreplace' property set, and the matching vdev
52  *	   is a whole disk, then label the new disk and attempt a 'zpool
53  *	   replace'.
54  *
55  * The module responds to EC_DEV_ADD events for both disks and lofi devices,
56  * with the latter used for testing.  The special ESC_ZFS_VDEV_CHECK event
57  * indicates that a device failed to open during pool load, but the autoreplace
58  * property was set.  In this case, we deferred the associated FMA fault until
59  * our module had a chance to process the autoreplace logic.  If the device
60  * could not be replaced, then the second online attempt will trigger the FMA
61  * fault that we skipped earlier.
62  */
63 
64 #include <alloca.h>
65 #include <devid.h>
66 #include <fcntl.h>
67 #include <libnvpair.h>
68 #include <libsysevent.h>
69 #include <libzfs.h>
70 #include <limits.h>
71 #include <stdlib.h>
72 #include <string.h>
73 #include <syslog.h>
74 #include <sys/list.h>
75 #include <sys/sunddi.h>
76 #include <sys/sysevent/eventdefs.h>
77 #include <sys/sysevent/dev.h>
78 #include <thread_pool.h>
79 #include <unistd.h>
80 #include "syseventd.h"
81 
82 #if defined(__i386) || defined(__amd64)
83 #define	PHYS_PATH	":q"
84 #define	RAW_SLICE	"p0"
85 #elif defined(__sparc)
86 #define	PHYS_PATH	":c"
87 #define	RAW_SLICE	"s2"
88 #else
89 #error Unknown architecture
90 #endif
91 
92 typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
93 
94 libzfs_handle_t *g_zfshdl;
95 list_t g_pool_list;
96 tpool_t *g_tpool;
97 boolean_t g_enumeration_done;
98 thread_t g_zfs_tid;
99 
100 typedef struct unavailpool {
101 	zpool_handle_t	*uap_zhp;
102 	list_node_t	uap_node;
103 } unavailpool_t;
104 
105 int
106 zfs_toplevel_state(zpool_handle_t *zhp)
107 {
108 	nvlist_t *nvroot;
109 	vdev_stat_t *vs;
110 	unsigned int c;
111 
112 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
113 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
114 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
115 	    (uint64_t **)&vs, &c) == 0);
116 	return (vs->vs_state);
117 }
118 
119 static int
120 zfs_unavail_pool(zpool_handle_t *zhp, void *data)
121 {
122 	if (zfs_toplevel_state(zhp) < VDEV_STATE_DEGRADED) {
123 		unavailpool_t *uap;
124 		uap = malloc(sizeof (unavailpool_t));
125 		uap->uap_zhp = zhp;
126 		list_insert_tail((list_t *)data, uap);
127 	} else {
128 		zpool_close(zhp);
129 	}
130 	return (0);
131 }
132 
133 /*
134  * The device associated with the given vdev (either by devid or physical path)
135  * has been added to the system.  If 'isdisk' is set, then we only attempt a
136  * replacement if it's a whole disk.  This also implies that we should label the
137  * disk first.
138  *
139  * First, we attempt to online the device (making sure to undo any spare
140  * operation when finished).  If this succeeds, then we're done.  If it fails,
141  * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
142  * but that the label was not what we expected.  If the 'autoreplace' property
143  * is not set, then we relabel the disk (if specified), and attempt a 'zpool
144  * replace'.  If the online is successful, but the new state is something else
145  * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
146  * race, and we should avoid attempting to relabel the disk.
147  */
148 static void
149 zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
150 {
151 	char *path;
152 	vdev_state_t newstate;
153 	nvlist_t *nvroot, *newvd;
154 	uint64_t wholedisk = 0ULL;
155 	uint64_t offline = 0ULL;
156 	char *physpath = NULL;
157 	char rawpath[PATH_MAX], fullpath[PATH_MAX];
158 	size_t len;
159 
160 	if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
161 		return;
162 
163 	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
164 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
165 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
166 
167 	/*
168 	 * We should have a way to online a device by guid.  With the current
169 	 * interface, we are forced to chop off the 's0' for whole disks.
170 	 */
171 	(void) strlcpy(fullpath, path, sizeof (fullpath));
172 	if (wholedisk)
173 		fullpath[strlen(fullpath) - 2] = '\0';
174 
175 	/*
176 	 * Attempt to online the device.  It would be nice to online this by
177 	 * GUID, but the current interface only supports lookup by path.
178 	 */
179 	if (offline ||
180 	    (zpool_vdev_online(zhp, fullpath,
181 	    ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
182 	    (newstate == VDEV_STATE_HEALTHY ||
183 	    newstate == VDEV_STATE_DEGRADED)))
184 		return;
185 
186 	/*
187 	 * If the pool doesn't have the autoreplace property set, then attempt a
188 	 * true online (without the unspare flag), which will trigger a FMA
189 	 * fault.
190 	 */
191 	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
192 	    (isdisk && !wholedisk)) {
193 		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
194 		    &newstate);
195 		return;
196 	}
197 
198 	if (isdisk) {
199 		/*
200 		 * If this is a request to label a whole disk, then attempt to
201 		 * write out the label.  Before we can label the disk, we need
202 		 * access to a raw node.  Ideally, we'd like to walk the devinfo
203 		 * tree and find a raw node from the corresponding parent node.
204 		 * This is overly complicated, and since we know how we labeled
205 		 * this device in the first place, we know it's save to switch
206 		 * from /dev/dsk to /dev/rdsk and append the backup slice.
207 		 *
208 		 * If any part of this process fails, then do a force online to
209 		 * trigger a ZFS fault for the device (and any hot spare
210 		 * replacement).
211 		 */
212 		if (strncmp(path, ZFS_DISK_ROOTD,
213 		    strlen(ZFS_DISK_ROOTD)) != 0) {
214 			(void) zpool_vdev_online(zhp, fullpath,
215 			    ZFS_ONLINE_FORCEFAULT, &newstate);
216 			return;
217 		}
218 
219 		(void) strlcpy(rawpath, path + 9, sizeof (rawpath));
220 		len = strlen(rawpath);
221 		rawpath[len - 2] = '\0';
222 
223 		if (zpool_label_disk(g_zfshdl, zhp, rawpath) != 0) {
224 			(void) zpool_vdev_online(zhp, fullpath,
225 			    ZFS_ONLINE_FORCEFAULT, &newstate);
226 			return;
227 		}
228 	}
229 
230 	/*
231 	 * Cosntruct the root vdev to pass to zpool_vdev_attach().  While adding
232 	 * the entire vdev structure is harmless, we construct a reduced set of
233 	 * path/physpath/wholedisk to keep it simple.
234 	 */
235 	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
236 		return;
237 
238 	if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
239 		nvlist_free(nvroot);
240 		return;
241 	}
242 
243 	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
244 	    nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 ||
245 	    (physpath != NULL && nvlist_add_string(newvd,
246 	    ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) ||
247 	    nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
248 	    nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
249 	    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd,
250 	    1) != 0) {
251 		nvlist_free(newvd);
252 		nvlist_free(nvroot);
253 		return;
254 	}
255 
256 	nvlist_free(newvd);
257 
258 	(void) zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
259 
260 	nvlist_free(nvroot);
261 
262 }
263 
264 /*
265  * Utility functions to find a vdev matching given criteria.
266  */
267 typedef struct dev_data {
268 	const char		*dd_compare;
269 	const char		*dd_prop;
270 	zfs_process_func_t	dd_func;
271 	boolean_t		dd_found;
272 	boolean_t		dd_isdisk;
273 	uint64_t		dd_pool_guid;
274 	uint64_t		dd_vdev_guid;
275 } dev_data_t;
276 
277 static void
278 zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
279 {
280 	dev_data_t *dp = data;
281 	char *path;
282 	uint_t c, children;
283 	nvlist_t **child;
284 	size_t len;
285 	uint64_t guid;
286 
287 	/*
288 	 * First iterate over any children.
289 	 */
290 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
291 	    &child, &children) == 0) {
292 		for (c = 0; c < children; c++)
293 			zfs_iter_vdev(zhp, child[c], data);
294 		return;
295 	}
296 
297 	if (dp->dd_vdev_guid != 0) {
298 		if (nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
299 		    &guid) != 0 || guid != dp->dd_vdev_guid)
300 			return;
301 	} else if (dp->dd_compare != NULL) {
302 		len = strlen(dp->dd_compare);
303 
304 		if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
305 		    strncmp(dp->dd_compare, path, len) != 0)
306 			return;
307 
308 		/*
309 		 * Normally, we want to have an exact match for the comparison
310 		 * string.  However, we allow substring matches in the following
311 		 * cases:
312 		 *
313 		 * 	<path>:		This is a devpath, and the target is one
314 		 * 			of its children.
315 		 *
316 		 * 	<path/>		This is a devid for a whole disk, and
317 		 * 			the target is one of its children.
318 		 */
319 		if (path[len] != '\0' && path[len] != ':' &&
320 		    path[len - 1] != '/')
321 			return;
322 	}
323 
324 	(dp->dd_func)(zhp, nvl, dp->dd_isdisk);
325 }
326 
327 void
328 zfs_enable_ds(void *arg)
329 {
330 	unavailpool_t *pool = (unavailpool_t *)arg;
331 
332 	(void) zpool_enable_datasets(pool->uap_zhp, NULL, 0);
333 	zpool_close(pool->uap_zhp);
334 	free(pool);
335 }
336 
337 static int
338 zfs_iter_pool(zpool_handle_t *zhp, void *data)
339 {
340 	nvlist_t *config, *nvl;
341 	dev_data_t *dp = data;
342 	uint64_t pool_guid;
343 	unavailpool_t *pool;
344 
345 	if ((config = zpool_get_config(zhp, NULL)) != NULL) {
346 		if (dp->dd_pool_guid == 0 ||
347 		    (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
348 		    &pool_guid) == 0 && pool_guid == dp->dd_pool_guid)) {
349 			(void) nvlist_lookup_nvlist(config,
350 			    ZPOOL_CONFIG_VDEV_TREE, &nvl);
351 			zfs_iter_vdev(zhp, nvl, data);
352 		}
353 	}
354 	if (g_enumeration_done)  {
355 		for (pool = list_head(&g_pool_list); pool != NULL;
356 		    pool = list_next(&g_pool_list, pool)) {
357 
358 			if (strcmp(zpool_get_name(zhp),
359 			    zpool_get_name(pool->uap_zhp)))
360 				continue;
361 			if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
362 				list_remove(&g_pool_list, pool);
363 				(void) tpool_dispatch(g_tpool, zfs_enable_ds,
364 				    pool);
365 				break;
366 			}
367 		}
368 	}
369 
370 	zpool_close(zhp);
371 	return (0);
372 }
373 
374 /*
375  * Given a physical device path, iterate over all (pool, vdev) pairs which
376  * correspond to the given path.
377  */
378 static boolean_t
379 devpath_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
380 {
381 	dev_data_t data = { 0 };
382 
383 	data.dd_compare = devpath;
384 	data.dd_func = func;
385 	data.dd_prop = ZPOOL_CONFIG_PHYS_PATH;
386 	data.dd_found = B_FALSE;
387 	data.dd_isdisk = wholedisk;
388 
389 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
390 
391 	return (data.dd_found);
392 }
393 
394 /*
395  * Given a /devices path, lookup the corresponding devid for each minor node,
396  * and find any vdevs with matching devids.  Doing this straight up would be
397  * rather inefficient, O(minor nodes * vdevs in system), so we take advantage of
398  * the fact that each devid ends with "/<minornode>".  Once we find any valid
399  * minor node, we chop off the portion after the last slash, and then search for
400  * matching vdevs, which is O(vdevs in system).
401  */
402 static boolean_t
403 devid_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
404 {
405 	size_t len = strlen(devpath) + sizeof ("/devices") +
406 	    sizeof (PHYS_PATH) - 1;
407 	char *fullpath;
408 	int fd;
409 	ddi_devid_t devid;
410 	char *devidstr, *fulldevid;
411 	dev_data_t data = { 0 };
412 
413 	/*
414 	 * Try to open a known minor node.
415 	 */
416 	fullpath = alloca(len);
417 	(void) snprintf(fullpath, len, "/devices%s%s", devpath, PHYS_PATH);
418 	if ((fd = open(fullpath, O_RDONLY)) < 0)
419 		return (B_FALSE);
420 
421 	/*
422 	 * Determine the devid as a string, with no trailing slash for the minor
423 	 * node.
424 	 */
425 	if (devid_get(fd, &devid) != 0) {
426 		(void) close(fd);
427 		return (B_FALSE);
428 	}
429 	(void) close(fd);
430 
431 	if ((devidstr = devid_str_encode(devid, NULL)) == NULL) {
432 		devid_free(devid);
433 		return (B_FALSE);
434 	}
435 
436 	len = strlen(devidstr) + 2;
437 	fulldevid = alloca(len);
438 	(void) snprintf(fulldevid, len, "%s/", devidstr);
439 
440 	data.dd_compare = fulldevid;
441 	data.dd_func = func;
442 	data.dd_prop = ZPOOL_CONFIG_DEVID;
443 	data.dd_found = B_FALSE;
444 	data.dd_isdisk = wholedisk;
445 
446 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
447 
448 	devid_str_free(devidstr);
449 	devid_free(devid);
450 
451 	return (data.dd_found);
452 }
453 
454 /*
455  * This function is called when we receive a devfs add event.  This can be
456  * either a disk event or a lofi event, and the behavior is slightly different
457  * depending on which it is.
458  */
459 static int
460 zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
461 {
462 	char *devpath, *devname;
463 	char path[PATH_MAX], realpath[PATH_MAX];
464 	char *colon, *raw;
465 	int ret;
466 
467 	/*
468 	 * The main unit of operation is the physical device path.  For disks,
469 	 * this is the device node, as all minor nodes are affected.  For lofi
470 	 * devices, this includes the minor path.  Unfortunately, this isn't
471 	 * represented in the DEV_PHYS_PATH for various reasons.
472 	 */
473 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath) != 0)
474 		return (-1);
475 
476 	/*
477 	 * If this is a lofi device, then also get the minor instance name.
478 	 * Unfortunately, the current payload doesn't include an easy way to get
479 	 * this information.  So we cheat by resolving the 'dev_name' (which
480 	 * refers to the raw device) and taking the portion between ':(*),raw'.
481 	 */
482 	(void) strlcpy(realpath, devpath, sizeof (realpath));
483 	if (is_lofi) {
484 		if (nvlist_lookup_string(nvl, DEV_NAME,
485 		    &devname) == 0 &&
486 		    (ret = resolvepath(devname, path,
487 		    sizeof (path))) > 0) {
488 			path[ret] = '\0';
489 			colon = strchr(path, ':');
490 			if (colon != NULL)
491 				raw = strstr(colon + 1, ",raw");
492 			if (colon != NULL && raw != NULL) {
493 				*raw = '\0';
494 				(void) snprintf(realpath,
495 				    sizeof (realpath), "%s%s",
496 				    devpath, colon);
497 				*raw = ',';
498 			}
499 		}
500 	}
501 
502 	/*
503 	 * Iterate over all vdevs with a matching devid, and then those with a
504 	 * matching /devices path.  For disks, we only want to pay attention to
505 	 * vdevs marked as whole disks.  For lofi, we don't care (because we're
506 	 * matching an exact minor name).
507 	 */
508 	if (!devid_iter(realpath, zfs_process_add, !is_lofi))
509 		(void) devpath_iter(realpath, zfs_process_add, !is_lofi);
510 
511 	return (0);
512 }
513 
514 /*
515  * Called when we receive a VDEV_CHECK event, which indicates a device could not
516  * be opened during initial pool open, but the autoreplace property was set on
517  * the pool.  In this case, we treat it as if it were an add event.
518  */
519 static int
520 zfs_deliver_check(nvlist_t *nvl)
521 {
522 	dev_data_t data = { 0 };
523 
524 	if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID,
525 	    &data.dd_pool_guid) != 0 ||
526 	    nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID,
527 	    &data.dd_vdev_guid) != 0 ||
528 	    data.dd_vdev_guid == 0)
529 		return (0);
530 
531 	data.dd_isdisk = B_TRUE;
532 	data.dd_func = zfs_process_add;
533 
534 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
535 
536 	return (0);
537 }
538 
539 #define	DEVICE_PREFIX	"/devices"
540 
541 static int
542 zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
543 {
544 	char *devname = data;
545 	boolean_t avail_spare, l2cache;
546 	vdev_state_t newstate;
547 	nvlist_t *tgt;
548 
549 	syseventd_print(9, "zfsdle_vdev_online: searching for %s in pool %s\n",
550 	    devname, zpool_get_name(zhp));
551 
552 	if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
553 	    &avail_spare, &l2cache, NULL)) != NULL) {
554 		char *path, fullpath[MAXPATHLEN];
555 		uint64_t wholedisk = 0ULL;
556 
557 		verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
558 		    &path) == 0);
559 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
560 		    &wholedisk) == 0);
561 
562 		(void) strlcpy(fullpath, path, sizeof (fullpath));
563 		if (wholedisk) {
564 			fullpath[strlen(fullpath) - 2] = '\0';
565 
566 			/*
567 			 * We need to reopen the pool associated with this
568 			 * device so that the kernel can update the size
569 			 * of the expanded device.
570 			 */
571 			(void) zpool_reopen(zhp);
572 		}
573 
574 		if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
575 			syseventd_print(9, "zfsdle_vdev_online: setting device"
576 			    " device %s to ONLINE state in pool %s.\n",
577 			    fullpath, zpool_get_name(zhp));
578 			if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL)
579 				(void) zpool_vdev_online(zhp, fullpath, 0,
580 				    &newstate);
581 		}
582 		zpool_close(zhp);
583 		return (1);
584 	}
585 	zpool_close(zhp);
586 	return (0);
587 }
588 
589 /*
590  * This function is called for each vdev of a pool for which any of the
591  * following events was recieved:
592  *  - ESC_ZFS_vdev_add
593  *  - ESC_ZFS_vdev_attach
594  *  - ESC_ZFS_vdev_clear
595  *  - ESC_ZFS_vdev_online
596  *  - ESC_ZFS_pool_create
597  *  - ESC_ZFS_pool_import
598  * It will update the vdevs FRU property if it is out of date.
599  */
600 /*ARGSUSED2*/
601 static void
602 zfs_update_vdev_fru(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
603 {
604 	char *devpath, *cptr, *oldfru = NULL;
605 	const char *newfru;
606 	uint64_t vdev_guid;
607 
608 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid);
609 	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &devpath);
610 	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_FRU, &oldfru);
611 
612 	/* remove :<slice> from devpath */
613 	cptr = strrchr(devpath, ':');
614 	if (cptr != NULL)
615 		*cptr = '\0';
616 
617 	newfru = libzfs_fru_lookup(g_zfshdl, devpath);
618 	if (newfru == NULL) {
619 		syseventd_print(9, "zfs_update_vdev_fru: no FRU for %s\n",
620 		    devpath);
621 		return;
622 	}
623 
624 	/* do nothing if the FRU hasn't changed */
625 	if (oldfru != NULL && libzfs_fru_compare(g_zfshdl, oldfru, newfru)) {
626 		syseventd_print(9, "zfs_update_vdev_fru: FRU unchanged\n");
627 		return;
628 	}
629 
630 	syseventd_print(9, "zfs_update_vdev_fru: devpath = %s\n", devpath);
631 	syseventd_print(9, "zfs_update_vdev_fru: FRU = %s\n", newfru);
632 
633 	(void) zpool_fru_set(zhp, vdev_guid, newfru);
634 }
635 
636 /*
637  * This function handles the following events:
638  *  - ESC_ZFS_vdev_add
639  *  - ESC_ZFS_vdev_attach
640  *  - ESC_ZFS_vdev_clear
641  *  - ESC_ZFS_vdev_online
642  *  - ESC_ZFS_pool_create
643  *  - ESC_ZFS_pool_import
644  * It will iterate over the pool vdevs to update the FRU property.
645  */
646 int
647 zfs_deliver_update(nvlist_t *nvl)
648 {
649 	dev_data_t dd = { 0 };
650 	char *pname;
651 	zpool_handle_t *zhp;
652 	nvlist_t *config, *vdev;
653 
654 	if (nvlist_lookup_string(nvl, "pool_name", &pname) != 0) {
655 		syseventd_print(9, "zfs_deliver_update: no pool name\n");
656 		return (-1);
657 	}
658 
659 	/*
660 	 * If this event was triggered by a pool export or destroy we cannot
661 	 * open the pool. This is not an error, just return 0 as we don't care
662 	 * about these events.
663 	 */
664 	zhp = zpool_open_canfail(g_zfshdl, pname);
665 	if (zhp == NULL)
666 		return (0);
667 
668 	config = zpool_get_config(zhp, NULL);
669 	if (config == NULL) {
670 		syseventd_print(9, "zfs_deliver_update: "
671 		    "failed to get pool config for %s\n", pname);
672 		zpool_close(zhp);
673 		return (-1);
674 	}
675 
676 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &vdev) != 0) {
677 		syseventd_print(0, "zfs_deliver_update: "
678 		    "failed to get vdev tree for %s\n", pname);
679 		zpool_close(zhp);
680 		return (-1);
681 	}
682 
683 	libzfs_fru_refresh(g_zfshdl);
684 
685 	dd.dd_func = zfs_update_vdev_fru;
686 	zfs_iter_vdev(zhp, vdev, &dd);
687 
688 	zpool_close(zhp);
689 	return (0);
690 }
691 
692 int
693 zfs_deliver_dle(nvlist_t *nvl)
694 {
695 	char *devname;
696 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) != 0) {
697 		syseventd_print(9, "zfs_deliver_event: no physpath\n");
698 		return (-1);
699 	}
700 	if (strncmp(devname, DEVICE_PREFIX, strlen(DEVICE_PREFIX)) != 0) {
701 		syseventd_print(9, "zfs_deliver_event: invalid "
702 		    "device '%s'", devname);
703 		return (-1);
704 	}
705 
706 	/*
707 	 * We try to find the device using the physical
708 	 * path that has been supplied. We need to strip off
709 	 * the /devices prefix before starting our search.
710 	 */
711 	devname += strlen(DEVICE_PREFIX);
712 	if (zpool_iter(g_zfshdl, zfsdle_vdev_online, devname) != 1) {
713 		syseventd_print(9, "zfs_deliver_event: device '%s' not"
714 		    " found\n", devname);
715 		return (1);
716 	}
717 	return (0);
718 }
719 
720 
721 /*ARGSUSED*/
722 static int
723 zfs_deliver_event(sysevent_t *ev, int unused)
724 {
725 	const char *class = sysevent_get_class_name(ev);
726 	const char *subclass = sysevent_get_subclass_name(ev);
727 	nvlist_t *nvl;
728 	int ret;
729 	boolean_t is_lofi = B_FALSE, is_check = B_FALSE;
730 	boolean_t is_dle = B_FALSE, is_update = B_FALSE;
731 
732 	if (strcmp(class, EC_DEV_ADD) == 0) {
733 		/*
734 		 * We're mainly interested in disk additions, but we also listen
735 		 * for new lofi devices, to allow for simplified testing.
736 		 */
737 		if (strcmp(subclass, ESC_DISK) == 0)
738 			is_lofi = B_FALSE;
739 		else if (strcmp(subclass, ESC_LOFI) == 0)
740 			is_lofi = B_TRUE;
741 		else
742 			return (0);
743 
744 		is_check = B_FALSE;
745 	} else if (strcmp(class, EC_ZFS) == 0) {
746 		if (strcmp(subclass, ESC_ZFS_VDEV_CHECK) == 0) {
747 			/*
748 			 * This event signifies that a device failed to open
749 			 * during pool load, but the 'autoreplace' property was
750 			 * set, so we should pretend it's just been added.
751 			 */
752 			is_check = B_TRUE;
753 		} else if ((strcmp(subclass, ESC_ZFS_VDEV_ADD) == 0) ||
754 		    (strcmp(subclass, ESC_ZFS_VDEV_ATTACH) == 0) ||
755 		    (strcmp(subclass, ESC_ZFS_VDEV_CLEAR) == 0) ||
756 		    (strcmp(subclass, ESC_ZFS_VDEV_ONLINE) == 0) ||
757 		    (strcmp(subclass, ESC_ZFS_POOL_CREATE) == 0) ||
758 		    (strcmp(subclass, ESC_ZFS_POOL_IMPORT) == 0)) {
759 			/*
760 			 * When we receive these events we check the pool
761 			 * configuration and update the vdev FRUs if necessary.
762 			 */
763 			is_update = B_TRUE;
764 		}
765 	} else if (strcmp(class, EC_DEV_STATUS) == 0 &&
766 	    strcmp(subclass, ESC_DEV_DLE) == 0) {
767 		is_dle = B_TRUE;
768 	} else {
769 		return (0);
770 	}
771 
772 	if (sysevent_get_attr_list(ev, &nvl) != 0)
773 		return (-1);
774 
775 	if (is_dle)
776 		ret = zfs_deliver_dle(nvl);
777 	else if (is_update)
778 		ret = zfs_deliver_update(nvl);
779 	else if (is_check)
780 		ret = zfs_deliver_check(nvl);
781 	else
782 		ret = zfs_deliver_add(nvl, is_lofi);
783 
784 	nvlist_free(nvl);
785 	return (ret);
786 }
787 
788 /*ARGSUSED*/
789 void *
790 zfs_enum_pools(void *arg)
791 {
792 	(void) zpool_iter(g_zfshdl, zfs_unavail_pool, (void *)&g_pool_list);
793 	if (!list_is_empty(&g_pool_list))
794 		g_tpool = tpool_create(1, sysconf(_SC_NPROCESSORS_ONLN),
795 		    0, NULL);
796 	g_enumeration_done = B_TRUE;
797 	return (NULL);
798 }
799 
800 static struct slm_mod_ops zfs_mod_ops = {
801 	SE_MAJOR_VERSION, SE_MINOR_VERSION, 10, zfs_deliver_event
802 };
803 
804 struct slm_mod_ops *
805 slm_init()
806 {
807 	if ((g_zfshdl = libzfs_init()) == NULL)
808 		return (NULL);
809 	/*
810 	 * collect a list of unavailable pools (asynchronously,
811 	 * since this can take a while)
812 	 */
813 	list_create(&g_pool_list, sizeof (struct unavailpool),
814 	    offsetof(struct unavailpool, uap_node));
815 	if (thr_create(NULL, 0, zfs_enum_pools, NULL, 0, &g_zfs_tid) != 0)
816 		return (NULL);
817 	return (&zfs_mod_ops);
818 }
819 
820 void
821 slm_fini()
822 {
823 	unavailpool_t *pool;
824 
825 	(void) thr_join(g_zfs_tid, NULL, NULL);
826 	if (g_tpool != NULL) {
827 		tpool_wait(g_tpool);
828 		tpool_destroy(g_tpool);
829 	}
830 	while ((pool = (list_head(&g_pool_list))) != NULL) {
831 		list_remove(&g_pool_list, pool);
832 		zpool_close(pool->uap_zhp);
833 		free(pool);
834 	}
835 	list_destroy(&g_pool_list);
836 	libzfs_fini(g_zfshdl);
837 }
838