xref: /illumos-gate/usr/src/uts/common/fs/zfs/vdev_disk.c (revision a92282e44f968185a6bba094d1e5fece2da819cf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
25  * Copyright 2020 Joyent, Inc.
26  * Copyright 2020 Joshua M. Clulow <josh@sysmgr.org>
27  */
28 
29 #include <sys/zfs_context.h>
30 #include <sys/spa_impl.h>
31 #include <sys/refcount.h>
32 #include <sys/vdev_impl.h>
33 #include <sys/vdev_trim.h>
34 #include <sys/abd.h>
35 #include <sys/fs/zfs.h>
36 #include <sys/zio.h>
37 #include <sys/sunldi.h>
38 #include <sys/efi_partition.h>
39 #include <sys/fm/fs/zfs.h>
40 #include <sys/ddi.h>
41 
42 /*
43  * Tunable to disable TRIM in case we're using a problematic SSD.
44  */
45 uint_t zfs_no_trim = 0;
46 
47 /*
48  * Tunable parameter for debugging or performance analysis. Setting this
49  * will cause pool corruption on power loss if a volatile out-of-order
50  * write cache is enabled.
51  */
52 boolean_t zfs_nocacheflush = B_FALSE;
53 
54 /*
55  * Virtual device vector for disks.
56  */
57 
58 extern ldi_ident_t zfs_li;
59 
60 static void vdev_disk_close(vdev_t *);
61 
62 typedef struct vdev_disk {
63 	ddi_devid_t	vd_devid;
64 	char		*vd_minor;
65 	ldi_handle_t	vd_lh;
66 	list_t		vd_ldi_cbs;
67 	boolean_t	vd_ldi_offline;
68 } vdev_disk_t;
69 
70 typedef struct vdev_disk_buf {
71 	buf_t	vdb_buf;
72 	zio_t	*vdb_io;
73 } vdev_disk_buf_t;
74 
75 typedef struct vdev_disk_ldi_cb {
76 	list_node_t		lcb_next;
77 	ldi_callback_id_t	lcb_id;
78 } vdev_disk_ldi_cb_t;
79 
80 /*
81  * Bypass the devid when opening a disk vdev.
82  * There have been issues where the devids of several devices were shuffled,
83  * causing pool open failures. Note, that this flag is intended to be used
84  * for pool recovery only.
85  *
86  * Note that if a pool is imported with the devids bypassed, all its vdevs will
87  * cease storing devid information permanently. In practice, the devid is rarely
88  * useful as vdev paths do not tend to change unless the hardware is
89  * reconfigured. That said, if the paths do change and a pool fails to open
90  * automatically at boot, a simple zpool import should re-scan the paths and fix
91  * the issue.
92  */
93 boolean_t vdev_disk_bypass_devid = B_FALSE;
94 
95 static void
96 vdev_disk_alloc(vdev_t *vd)
97 {
98 	vdev_disk_t *dvd;
99 
100 	dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
101 	/*
102 	 * Create the LDI event callback list.
103 	 */
104 	list_create(&dvd->vd_ldi_cbs, sizeof (vdev_disk_ldi_cb_t),
105 	    offsetof(vdev_disk_ldi_cb_t, lcb_next));
106 }
107 
108 static void
109 vdev_disk_free(vdev_t *vd)
110 {
111 	vdev_disk_t *dvd = vd->vdev_tsd;
112 	vdev_disk_ldi_cb_t *lcb;
113 
114 	if (dvd == NULL)
115 		return;
116 
117 	/*
118 	 * We have already closed the LDI handle. Clean up the LDI event
119 	 * callbacks and free vd->vdev_tsd.
120 	 */
121 	while ((lcb = list_head(&dvd->vd_ldi_cbs)) != NULL) {
122 		list_remove(&dvd->vd_ldi_cbs, lcb);
123 		(void) ldi_ev_remove_callbacks(lcb->lcb_id);
124 		kmem_free(lcb, sizeof (vdev_disk_ldi_cb_t));
125 	}
126 	list_destroy(&dvd->vd_ldi_cbs);
127 	kmem_free(dvd, sizeof (vdev_disk_t));
128 	vd->vdev_tsd = NULL;
129 }
130 
131 static int
132 vdev_disk_off_notify(ldi_handle_t lh __unused, ldi_ev_cookie_t ecookie,
133     void *arg, void *ev_data __unused)
134 {
135 	vdev_t *vd = (vdev_t *)arg;
136 	vdev_disk_t *dvd = vd->vdev_tsd;
137 
138 	/*
139 	 * Ignore events other than offline.
140 	 */
141 	if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
142 		return (LDI_EV_SUCCESS);
143 
144 	/*
145 	 * Tell any new threads that stumble upon this vdev that they should not
146 	 * try to do I/O.
147 	 */
148 	dvd->vd_ldi_offline = B_TRUE;
149 
150 	/*
151 	 * Request that the spa_async_thread mark the device as REMOVED and
152 	 * notify FMA of the removal.  This should also trigger a vdev_close()
153 	 * in the async thread.
154 	 */
155 	zfs_post_remove(vd->vdev_spa, vd);
156 	vd->vdev_remove_wanted = B_TRUE;
157 	spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
158 
159 	return (LDI_EV_SUCCESS);
160 }
161 
162 static void
163 vdev_disk_off_finalize(ldi_handle_t lh __unused, ldi_ev_cookie_t ecookie,
164     int ldi_result, void *arg, void *ev_data __unused)
165 {
166 	vdev_t *vd = (vdev_t *)arg;
167 
168 	/*
169 	 * Ignore events other than offline.
170 	 */
171 	if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
172 		return;
173 
174 	/*
175 	 * Request that the vdev be reopened if the offline state change was
176 	 * unsuccessful.
177 	 */
178 	if (ldi_result != LDI_EV_SUCCESS) {
179 		vd->vdev_probe_wanted = B_TRUE;
180 		spa_async_request(vd->vdev_spa, SPA_ASYNC_PROBE);
181 	}
182 }
183 
184 static ldi_ev_callback_t vdev_disk_off_callb = {
185 	.cb_vers = LDI_EV_CB_VERS,
186 	.cb_notify = vdev_disk_off_notify,
187 	.cb_finalize = vdev_disk_off_finalize
188 };
189 
190 static void
191 vdev_disk_dgrd_finalize(ldi_handle_t lh __unused, ldi_ev_cookie_t ecookie,
192     int ldi_result, void *arg, void *ev_data __unused)
193 {
194 	vdev_t *vd = (vdev_t *)arg;
195 
196 	/*
197 	 * Ignore events other than degrade.
198 	 */
199 	if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_DEGRADE) != 0)
200 		return;
201 
202 	/*
203 	 * Degrade events always succeed. Mark the vdev as degraded.
204 	 * This status is purely informative for the user.
205 	 */
206 	(void) vdev_degrade(vd->vdev_spa, vd->vdev_guid, 0);
207 }
208 
209 static ldi_ev_callback_t vdev_disk_dgrd_callb = {
210 	.cb_vers = LDI_EV_CB_VERS,
211 	.cb_notify = NULL,
212 	.cb_finalize = vdev_disk_dgrd_finalize
213 };
214 
215 static void
216 vdev_disk_hold(vdev_t *vd)
217 {
218 	ddi_devid_t devid;
219 	char *minor;
220 
221 	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
222 
223 	/*
224 	 * We must have a pathname, and it must be absolute.
225 	 */
226 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/')
227 		return;
228 
229 	/*
230 	 * Only prefetch path and devid info if the device has
231 	 * never been opened.
232 	 */
233 	if (vd->vdev_tsd != NULL)
234 		return;
235 
236 	if (vd->vdev_wholedisk == -1ULL) {
237 		size_t len = strlen(vd->vdev_path) + 3;
238 		char *buf = kmem_alloc(len, KM_SLEEP);
239 
240 		(void) snprintf(buf, len, "%ss0", vd->vdev_path);
241 
242 		(void) ldi_vp_from_name(buf, &vd->vdev_name_vp);
243 		kmem_free(buf, len);
244 	}
245 
246 	if (vd->vdev_name_vp == NULL)
247 		(void) ldi_vp_from_name(vd->vdev_path, &vd->vdev_name_vp);
248 
249 	if (vd->vdev_devid != NULL &&
250 	    ddi_devid_str_decode(vd->vdev_devid, &devid, &minor) == 0) {
251 		(void) ldi_vp_from_devid(devid, minor, &vd->vdev_devid_vp);
252 		ddi_devid_str_free(minor);
253 		ddi_devid_free(devid);
254 	}
255 }
256 
257 static void
258 vdev_disk_rele(vdev_t *vd)
259 {
260 	ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
261 
262 	if (vd->vdev_name_vp) {
263 		VN_RELE_ASYNC(vd->vdev_name_vp,
264 		    dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
265 		vd->vdev_name_vp = NULL;
266 	}
267 	if (vd->vdev_devid_vp) {
268 		VN_RELE_ASYNC(vd->vdev_devid_vp,
269 		    dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
270 		vd->vdev_devid_vp = NULL;
271 	}
272 }
273 
274 /*
275  * We want to be loud in DEBUG kernels when DKIOCGMEDIAINFOEXT fails, or when
276  * even a fallback to DKIOCGMEDIAINFO fails.
277  */
278 #ifdef DEBUG
279 #define	VDEV_DEBUG(...)	cmn_err(CE_NOTE, __VA_ARGS__)
280 #else
281 #define	VDEV_DEBUG(...)	/* Nothing... */
282 #endif
283 
284 static int
285 vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
286     uint64_t *ashift)
287 {
288 	spa_t *spa = vd->vdev_spa;
289 	vdev_disk_t *dvd = vd->vdev_tsd;
290 	ldi_ev_cookie_t ecookie;
291 	vdev_disk_ldi_cb_t *lcb;
292 	union {
293 		struct dk_minfo_ext ude;
294 		struct dk_minfo ud;
295 	} dks;
296 	struct dk_minfo_ext *dkmext = &dks.ude;
297 	struct dk_minfo *dkm = &dks.ud;
298 	int error, can_free;
299 	dev_t dev;
300 	int otyp;
301 	boolean_t validate_devid = B_FALSE;
302 	uint64_t capacity = 0, blksz = 0, pbsize;
303 
304 	/*
305 	 * We must have a pathname, and it must be absolute.
306 	 */
307 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
308 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
309 		return (SET_ERROR(EINVAL));
310 	}
311 
312 	/*
313 	 * Reopen the device if it's not currently open. Otherwise,
314 	 * just update the physical size of the device.
315 	 */
316 	if (dvd != NULL) {
317 		ASSERT(vd->vdev_reopening);
318 		goto skip_open;
319 	}
320 
321 	/*
322 	 * Create vd->vdev_tsd.
323 	 */
324 	vdev_disk_alloc(vd);
325 	dvd = vd->vdev_tsd;
326 
327 	/*
328 	 * Allow bypassing the devid.
329 	 */
330 	if (vd->vdev_devid != NULL && vdev_disk_bypass_devid) {
331 		vdev_dbgmsg(vd, "vdev_disk_open, devid %s bypassed",
332 		    vd->vdev_devid);
333 		spa_strfree(vd->vdev_devid);
334 		vd->vdev_devid = NULL;
335 	}
336 
337 	/*
338 	 * When opening a disk device, we want to preserve the user's original
339 	 * intent.  We always want to open the device by the path the user gave
340 	 * us, even if it is one of multiple paths to the same device.  But we
341 	 * also want to be able to survive disks being removed/recabled.
342 	 * Therefore the sequence of opening devices is:
343 	 *
344 	 * 1. Try opening the device by path.  For legacy pools without the
345 	 *    'whole_disk' property, attempt to fix the path by appending 's0'.
346 	 *
347 	 * 2. If the devid of the device matches the stored value, return
348 	 *    success.
349 	 *
350 	 * 3. Otherwise, the device may have moved.  Try opening the device
351 	 *    by the devid instead.
352 	 */
353 	if (vd->vdev_devid != NULL) {
354 		if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
355 		    &dvd->vd_minor) != 0) {
356 			vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
357 			vdev_dbgmsg(vd, "vdev_disk_open: invalid "
358 			    "vdev_devid '%s'", vd->vdev_devid);
359 			return (SET_ERROR(EINVAL));
360 		}
361 	}
362 
363 	error = EINVAL;		/* presume failure */
364 
365 	if (vd->vdev_path != NULL) {
366 		if (vd->vdev_wholedisk == -1ULL) {
367 			size_t len = strlen(vd->vdev_path) + 3;
368 			char *buf = kmem_alloc(len, KM_SLEEP);
369 
370 			(void) snprintf(buf, len, "%ss0", vd->vdev_path);
371 
372 			error = ldi_open_by_name(buf, spa_mode(spa), kcred,
373 			    &dvd->vd_lh, zfs_li);
374 			if (error == 0) {
375 				spa_strfree(vd->vdev_path);
376 				vd->vdev_path = buf;
377 				vd->vdev_wholedisk = 1ULL;
378 			} else {
379 				kmem_free(buf, len);
380 			}
381 		}
382 
383 		/*
384 		 * If we have not yet opened the device, try to open it by the
385 		 * specified path.
386 		 */
387 		if (error != 0) {
388 			error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
389 			    kcred, &dvd->vd_lh, zfs_li);
390 		}
391 
392 		/*
393 		 * Compare the devid to the stored value.
394 		 */
395 		if (error == 0 && vd->vdev_devid != NULL) {
396 			ddi_devid_t devid = NULL;
397 
398 			if (ldi_get_devid(dvd->vd_lh, &devid) != 0) {
399 				/*
400 				 * We expected a devid on this device but it no
401 				 * longer appears to have one.  The validation
402 				 * step may need to remove it from the
403 				 * configuration.
404 				 */
405 				validate_devid = B_TRUE;
406 
407 			} else if (ddi_devid_compare(devid, dvd->vd_devid) !=
408 			    0) {
409 				/*
410 				 * A mismatch here is unexpected, log it.
411 				 */
412 				char *devid_str = ddi_devid_str_encode(devid,
413 				    dvd->vd_minor);
414 				vdev_dbgmsg(vd, "vdev_disk_open: devid "
415 				    "mismatch: %s != %s", vd->vdev_devid,
416 				    devid_str);
417 				cmn_err(CE_NOTE, "vdev_disk_open %s: devid "
418 				    "mismatch: %s != %s", vd->vdev_path,
419 				    vd->vdev_devid, devid_str);
420 				ddi_devid_str_free(devid_str);
421 
422 				error = SET_ERROR(EINVAL);
423 				(void) ldi_close(dvd->vd_lh, spa_mode(spa),
424 				    kcred);
425 				dvd->vd_lh = NULL;
426 			}
427 
428 			if (devid != NULL) {
429 				ddi_devid_free(devid);
430 			}
431 		}
432 
433 		/*
434 		 * If we succeeded in opening the device, but 'vdev_wholedisk'
435 		 * is not yet set, then this must be a slice.
436 		 */
437 		if (error == 0 && vd->vdev_wholedisk == -1ULL)
438 			vd->vdev_wholedisk = 0;
439 	}
440 
441 	/*
442 	 * If we were unable to open by path, or the devid check fails, open by
443 	 * devid instead.
444 	 */
445 	if (error != 0 && vd->vdev_devid != NULL) {
446 		error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
447 		    spa_mode(spa), kcred, &dvd->vd_lh, zfs_li);
448 		if (error != 0) {
449 			vdev_dbgmsg(vd, "Failed to open by devid (%s)",
450 			    vd->vdev_devid);
451 		}
452 	}
453 
454 	/*
455 	 * If all else fails, then try opening by physical path (if available)
456 	 * or the logical path (if we failed due to the devid check).  While not
457 	 * as reliable as the devid, this will give us something, and the higher
458 	 * level vdev validation will prevent us from opening the wrong device.
459 	 */
460 	if (error != 0) {
461 		validate_devid = B_TRUE;
462 
463 		if (vd->vdev_physpath != NULL &&
464 		    (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV) {
465 			error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa),
466 			    kcred, &dvd->vd_lh, zfs_li);
467 		}
468 
469 		/*
470 		 * Note that we don't support the legacy auto-wholedisk support
471 		 * as above.  This hasn't been used in a very long time and we
472 		 * don't need to propagate its oddities to this edge condition.
473 		 */
474 		if (error != 0 && vd->vdev_path != NULL) {
475 			error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
476 			    kcred, &dvd->vd_lh, zfs_li);
477 		}
478 	}
479 
480 	/*
481 	 * If this is early in boot, a sweep of available block devices may
482 	 * locate an alternative path that we can try.
483 	 */
484 	if (error != 0) {
485 		const char *altdevpath = vdev_disk_preroot_lookup(
486 		    spa_guid(spa), vd->vdev_guid);
487 
488 		if (altdevpath != NULL) {
489 			vdev_dbgmsg(vd, "Trying alternate preroot path (%s)",
490 			    altdevpath);
491 
492 			validate_devid = B_TRUE;
493 
494 			if ((error = ldi_open_by_name((char *)altdevpath,
495 			    spa_mode(spa), kcred, &dvd->vd_lh, zfs_li)) != 0) {
496 				vdev_dbgmsg(vd, "Failed to open by preroot "
497 				    "path (%s)", altdevpath);
498 			}
499 		}
500 	}
501 
502 	if (error != 0) {
503 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
504 		vdev_dbgmsg(vd, "vdev_disk_open: failed to open [error=%d]",
505 		    error);
506 		return (error);
507 	}
508 
509 	/*
510 	 * Now that the device has been successfully opened, update the devid
511 	 * if necessary.
512 	 */
513 	if (validate_devid) {
514 		ddi_devid_t devid = NULL;
515 		char *minorname = NULL;
516 		char *vd_devid = NULL;
517 		boolean_t remove = B_FALSE, update = B_FALSE;
518 
519 		/*
520 		 * Get the current devid and minor name for the device we
521 		 * opened.
522 		 */
523 		if (ldi_get_devid(dvd->vd_lh, &devid) != 0 ||
524 		    ldi_get_minor_name(dvd->vd_lh, &minorname) != 0) {
525 			/*
526 			 * If we are unable to get the devid or the minor name
527 			 * for the device, we need to remove them from the
528 			 * configuration to prevent potential inconsistencies.
529 			 */
530 			if (dvd->vd_minor != NULL || dvd->vd_devid != NULL ||
531 			    vd->vdev_devid != NULL) {
532 				/*
533 				 * We only need to remove the devid if one
534 				 * exists.
535 				 */
536 				remove = B_TRUE;
537 			}
538 
539 		} else if (dvd->vd_devid == NULL || dvd->vd_minor == NULL) {
540 			/*
541 			 * There was previously no devid at all so we need to
542 			 * add one.
543 			 */
544 			update = B_TRUE;
545 
546 		} else if (ddi_devid_compare(devid, dvd->vd_devid) != 0 ||
547 		    strcmp(minorname, dvd->vd_minor) != 0) {
548 			/*
549 			 * The devid or minor name on file does not match the
550 			 * one from the opened device.
551 			 */
552 			update = B_TRUE;
553 		}
554 
555 		if (update) {
556 			/*
557 			 * Render the new devid and minor name as a string for
558 			 * logging and to store in the vdev configuration.
559 			 */
560 			vd_devid = ddi_devid_str_encode(devid, minorname);
561 		}
562 
563 		if (update || remove) {
564 			vdev_dbgmsg(vd, "vdev_disk_open: update devid from "
565 			    "'%s' to '%s'",
566 			    vd->vdev_devid != NULL ? vd->vdev_devid : "<none>",
567 			    vd_devid != NULL ? vd_devid : "<none>");
568 			cmn_err(CE_NOTE, "vdev_disk_open %s: update devid "
569 			    "from '%s' to '%s'",
570 			    vd->vdev_path != NULL ? vd->vdev_path : "?",
571 			    vd->vdev_devid != NULL ? vd->vdev_devid : "<none>",
572 			    vd_devid != NULL ? vd_devid : "<none>");
573 
574 			/*
575 			 * Remove and free any existing values.
576 			 */
577 			if (dvd->vd_minor != NULL) {
578 				ddi_devid_str_free(dvd->vd_minor);
579 				dvd->vd_minor = NULL;
580 			}
581 			if (dvd->vd_devid != NULL) {
582 				ddi_devid_free(dvd->vd_devid);
583 				dvd->vd_devid = NULL;
584 			}
585 			if (vd->vdev_devid != NULL) {
586 				spa_strfree(vd->vdev_devid);
587 				vd->vdev_devid = NULL;
588 			}
589 		}
590 
591 		if (update) {
592 			/*
593 			 * Install the new values.
594 			 */
595 			vd->vdev_devid = vd_devid;
596 			dvd->vd_minor = minorname;
597 			dvd->vd_devid = devid;
598 
599 		} else {
600 			if (devid != NULL) {
601 				ddi_devid_free(devid);
602 			}
603 			if (minorname != NULL) {
604 				kmem_free(minorname, strlen(minorname) + 1);
605 			}
606 		}
607 	}
608 
609 	/*
610 	 * Once a device is opened, verify that the physical device path (if
611 	 * available) is up to date.
612 	 */
613 	if (ldi_get_dev(dvd->vd_lh, &dev) == 0 &&
614 	    ldi_get_otyp(dvd->vd_lh, &otyp) == 0) {
615 		char *physpath, *minorname;
616 
617 		physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
618 		minorname = NULL;
619 		if (ddi_dev_pathname(dev, otyp, physpath) == 0 &&
620 		    ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 &&
621 		    (vd->vdev_physpath == NULL ||
622 		    strcmp(vd->vdev_physpath, physpath) != 0)) {
623 			if (vd->vdev_physpath)
624 				spa_strfree(vd->vdev_physpath);
625 			(void) strlcat(physpath, ":", MAXPATHLEN);
626 			(void) strlcat(physpath, minorname, MAXPATHLEN);
627 			vd->vdev_physpath = spa_strdup(physpath);
628 		}
629 		if (minorname)
630 			kmem_free(minorname, strlen(minorname) + 1);
631 		kmem_free(physpath, MAXPATHLEN);
632 	}
633 
634 	/*
635 	 * Register callbacks for the LDI offline event.
636 	 */
637 	if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) ==
638 	    LDI_EV_SUCCESS) {
639 		lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
640 		list_insert_tail(&dvd->vd_ldi_cbs, lcb);
641 		(void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
642 		    &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id);
643 	}
644 
645 	/*
646 	 * Register callbacks for the LDI degrade event.
647 	 */
648 	if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) ==
649 	    LDI_EV_SUCCESS) {
650 		lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
651 		list_insert_tail(&dvd->vd_ldi_cbs, lcb);
652 		(void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
653 		    &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id);
654 	}
655 
656 skip_open:
657 	/*
658 	 * Determine the actual size of the device.
659 	 */
660 	if (ldi_get_size(dvd->vd_lh, psize) != 0) {
661 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
662 		vdev_dbgmsg(vd, "vdev_disk_open: failed to get size");
663 		return (SET_ERROR(EINVAL));
664 	}
665 
666 	*max_psize = *psize;
667 
668 	/*
669 	 * Determine the device's minimum transfer size.
670 	 * If the ioctl isn't supported, assume DEV_BSIZE.
671 	 */
672 	if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT,
673 	    (intptr_t)dkmext, FKIOCTL, kcred, NULL)) == 0) {
674 		capacity = dkmext->dki_capacity - 1;
675 		blksz = dkmext->dki_lbsize;
676 		pbsize = dkmext->dki_pbsize;
677 	} else if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO,
678 	    (intptr_t)dkm, FKIOCTL, kcred, NULL)) == 0) {
679 		VDEV_DEBUG(
680 		    "vdev_disk_open(\"%s\"): fallback to DKIOCGMEDIAINFO\n",
681 		    vd->vdev_path);
682 		capacity = dkm->dki_capacity - 1;
683 		blksz = dkm->dki_lbsize;
684 		pbsize = blksz;
685 	} else {
686 		VDEV_DEBUG("vdev_disk_open(\"%s\"): "
687 		    "both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n",
688 		    vd->vdev_path, error);
689 		pbsize = DEV_BSIZE;
690 	}
691 
692 	*ashift = highbit64(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1;
693 
694 	if (vd->vdev_wholedisk == 1) {
695 		int wce = 1;
696 
697 		if (error == 0) {
698 			/*
699 			 * If we have the capability to expand, we'd have
700 			 * found out via success from DKIOCGMEDIAINFO{,EXT}.
701 			 * Adjust max_psize upward accordingly since we know
702 			 * we own the whole disk now.
703 			 */
704 			*max_psize = capacity * blksz;
705 		}
706 
707 		/*
708 		 * Since we own the whole disk, try to enable disk write
709 		 * caching.  We ignore errors because it's OK if we can't do it.
710 		 */
711 		(void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
712 		    FKIOCTL, kcred, NULL);
713 	}
714 
715 	/*
716 	 * Clear the nowritecache bit, so that on a vdev_reopen() we will
717 	 * try again.
718 	 */
719 	vd->vdev_nowritecache = B_FALSE;
720 
721 	if (ldi_ioctl(dvd->vd_lh, DKIOC_CANFREE, (intptr_t)&can_free, FKIOCTL,
722 	    kcred, NULL) == 0 && can_free == 1) {
723 		vd->vdev_has_trim = B_TRUE;
724 	} else {
725 		vd->vdev_has_trim = B_FALSE;
726 	}
727 
728 	if (zfs_no_trim == 1)
729 		vd->vdev_has_trim = B_FALSE;
730 
731 	/* Currently only supported for ZoL. */
732 	vd->vdev_has_securetrim = B_FALSE;
733 
734 	/* Inform the ZIO pipeline that we are non-rotational */
735 	vd->vdev_nonrot = B_FALSE;
736 	if (ldi_prop_exists(dvd->vd_lh, DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
737 	    "device-solid-state")) {
738 		if (ldi_prop_get_int(dvd->vd_lh,
739 		    LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
740 		    "device-solid-state", B_FALSE) != 0)
741 			vd->vdev_nonrot = B_TRUE;
742 	}
743 
744 	return (0);
745 }
746 
747 static void
748 vdev_disk_close(vdev_t *vd)
749 {
750 	vdev_disk_t *dvd = vd->vdev_tsd;
751 
752 	if (vd->vdev_reopening || dvd == NULL)
753 		return;
754 
755 	if (dvd->vd_minor != NULL) {
756 		ddi_devid_str_free(dvd->vd_minor);
757 		dvd->vd_minor = NULL;
758 	}
759 
760 	if (dvd->vd_devid != NULL) {
761 		ddi_devid_free(dvd->vd_devid);
762 		dvd->vd_devid = NULL;
763 	}
764 
765 	if (dvd->vd_lh != NULL) {
766 		(void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
767 		dvd->vd_lh = NULL;
768 	}
769 
770 	vd->vdev_delayed_close = B_FALSE;
771 	vdev_disk_free(vd);
772 }
773 
774 static int
775 vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data,
776     size_t size, uint64_t offset, int flags)
777 {
778 	buf_t *bp;
779 	int error = 0;
780 
781 	if (vd_lh == NULL)
782 		return (SET_ERROR(EINVAL));
783 
784 	ASSERT(flags & B_READ || flags & B_WRITE);
785 
786 	bp = getrbuf(KM_SLEEP);
787 	bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST;
788 	bp->b_bcount = size;
789 	bp->b_un.b_addr = (void *)data;
790 	bp->b_lblkno = lbtodb(offset);
791 	bp->b_bufsize = size;
792 
793 	error = ldi_strategy(vd_lh, bp);
794 	ASSERT(error == 0);
795 	if ((error = biowait(bp)) == 0 && bp->b_resid != 0)
796 		error = SET_ERROR(EIO);
797 	freerbuf(bp);
798 
799 	return (error);
800 }
801 
802 static int
803 vdev_disk_dumpio(vdev_t *vd, caddr_t data, size_t size,
804     uint64_t offset, uint64_t origoffset __unused, boolean_t doread,
805     boolean_t isdump)
806 {
807 	vdev_disk_t *dvd = vd->vdev_tsd;
808 	int flags = doread ? B_READ : B_WRITE;
809 
810 	/*
811 	 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
812 	 * Nothing to be done here but return failure.
813 	 */
814 	if (dvd == NULL || dvd->vd_ldi_offline) {
815 		return (SET_ERROR(ENXIO));
816 	}
817 
818 	ASSERT(vd->vdev_ops == &vdev_disk_ops);
819 
820 	offset += VDEV_LABEL_START_SIZE;
821 
822 	/*
823 	 * If in the context of an active crash dump, use the ldi_dump(9F)
824 	 * call instead of ldi_strategy(9F) as usual.
825 	 */
826 	if (isdump) {
827 		ASSERT3P(dvd, !=, NULL);
828 		return (ldi_dump(dvd->vd_lh, data, lbtodb(offset),
829 		    lbtodb(size)));
830 	}
831 
832 	return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags));
833 }
834 
835 static int
836 vdev_disk_io_intr(buf_t *bp)
837 {
838 	vdev_buf_t *vb = (vdev_buf_t *)bp;
839 	zio_t *zio = vb->vb_io;
840 
841 	/*
842 	 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
843 	 * Rather than teach the rest of the stack about other error
844 	 * possibilities (EFAULT, etc), we normalize the error value here.
845 	 */
846 	zio->io_error = (geterror(bp) != 0 ? EIO : 0);
847 
848 	if (zio->io_error == 0 && bp->b_resid != 0)
849 		zio->io_error = SET_ERROR(EIO);
850 
851 	if (zio->io_type == ZIO_TYPE_READ) {
852 		abd_return_buf_copy(zio->io_abd, bp->b_un.b_addr, zio->io_size);
853 	} else {
854 		abd_return_buf(zio->io_abd, bp->b_un.b_addr, zio->io_size);
855 	}
856 
857 	kmem_free(vb, sizeof (vdev_buf_t));
858 
859 	zio_delay_interrupt(zio);
860 	return (0);
861 }
862 
863 static void
864 vdev_disk_ioctl_free(zio_t *zio)
865 {
866 	kmem_free(zio->io_vsd, sizeof (struct dk_callback));
867 }
868 
869 static const zio_vsd_ops_t vdev_disk_vsd_ops = {
870 	vdev_disk_ioctl_free,
871 	zio_vsd_default_cksum_report
872 };
873 
874 static void
875 vdev_disk_ioctl_done(void *zio_arg, int error)
876 {
877 	zio_t *zio = zio_arg;
878 
879 	zio->io_error = error;
880 
881 	zio_interrupt(zio);
882 }
883 
884 static void
885 vdev_disk_io_start(zio_t *zio)
886 {
887 	vdev_t *vd = zio->io_vd;
888 	vdev_disk_t *dvd = vd->vdev_tsd;
889 	unsigned long trim_flags = 0;
890 	vdev_buf_t *vb;
891 	struct dk_callback *dkc;
892 	buf_t *bp;
893 	int error;
894 
895 	/*
896 	 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
897 	 * Nothing to be done here but return failure.
898 	 */
899 	if (dvd == NULL || dvd->vd_ldi_offline) {
900 		zio->io_error = ENXIO;
901 		zio_interrupt(zio);
902 		return;
903 	}
904 
905 	switch (zio->io_type) {
906 	case ZIO_TYPE_IOCTL:
907 		/* XXPOLICY */
908 		if (!vdev_readable(vd)) {
909 			zio->io_error = SET_ERROR(ENXIO);
910 			zio_interrupt(zio);
911 			return;
912 		}
913 
914 		switch (zio->io_cmd) {
915 
916 		case DKIOCFLUSHWRITECACHE:
917 
918 			if (zfs_nocacheflush)
919 				break;
920 
921 			if (vd->vdev_nowritecache) {
922 				zio->io_error = SET_ERROR(ENOTSUP);
923 				break;
924 			}
925 
926 			zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP);
927 			zio->io_vsd_ops = &vdev_disk_vsd_ops;
928 
929 			dkc->dkc_callback = vdev_disk_ioctl_done;
930 			dkc->dkc_flag = FLUSH_VOLATILE;
931 			dkc->dkc_cookie = zio;
932 
933 			error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
934 			    (uintptr_t)dkc, FKIOCTL, kcred, NULL);
935 
936 			if (error == 0) {
937 				/*
938 				 * The ioctl will be done asychronously,
939 				 * and will call vdev_disk_ioctl_done()
940 				 * upon completion.
941 				 */
942 				return;
943 			}
944 
945 			zio->io_error = error;
946 
947 			break;
948 
949 		default:
950 			zio->io_error = SET_ERROR(ENOTSUP);
951 		}
952 
953 		zio_execute(zio);
954 		return;
955 
956 	case ZIO_TYPE_TRIM:
957 		if (zfs_no_trim == 1 || !vd->vdev_has_trim) {
958 			zio->io_error = SET_ERROR(ENOTSUP);
959 			zio_execute(zio);
960 			return;
961 		}
962 		/* Currently only supported on ZoL. */
963 		ASSERT0(zio->io_trim_flags & ZIO_TRIM_SECURE);
964 
965 		/* dkioc_free_list_t is already declared to hold one entry */
966 		dkioc_free_list_t dfl;
967 		dfl.dfl_flags = 0;
968 		dfl.dfl_num_exts = 1;
969 		dfl.dfl_offset = 0;
970 		dfl.dfl_exts[0].dfle_start = zio->io_offset;
971 		dfl.dfl_exts[0].dfle_length = zio->io_size;
972 
973 		zio->io_error = ldi_ioctl(dvd->vd_lh, DKIOCFREE,
974 		    (uintptr_t)&dfl, FKIOCTL, kcred, NULL);
975 
976 		if (zio->io_error == ENOTSUP || zio->io_error == ENOTTY) {
977 			/*
978 			 * The device must have changed and now TRIM is
979 			 * no longer supported.
980 			 */
981 			vd->vdev_has_trim = B_FALSE;
982 		}
983 
984 		zio_interrupt(zio);
985 		return;
986 	}
987 
988 	ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
989 	zio->io_target_timestamp = zio_handle_io_delay(zio);
990 
991 	vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
992 
993 	vb->vb_io = zio;
994 	bp = &vb->vb_buf;
995 
996 	bioinit(bp);
997 	bp->b_flags = B_BUSY | B_NOCACHE |
998 	    (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
999 	if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
1000 		bp->b_flags |= B_FAILFAST;
1001 	bp->b_bcount = zio->io_size;
1002 
1003 	if (zio->io_type == ZIO_TYPE_READ) {
1004 		bp->b_un.b_addr =
1005 		    abd_borrow_buf(zio->io_abd, zio->io_size);
1006 	} else {
1007 		bp->b_un.b_addr =
1008 		    abd_borrow_buf_copy(zio->io_abd, zio->io_size);
1009 	}
1010 
1011 	bp->b_lblkno = lbtodb(zio->io_offset);
1012 	bp->b_bufsize = zio->io_size;
1013 	bp->b_iodone = vdev_disk_io_intr;
1014 
1015 	/*
1016 	 * In general we would expect ldi_strategy() to return non-zero only
1017 	 * because of programming errors, but we've also seen this fail shortly
1018 	 * after a disk dies.
1019 	 */
1020 	if (ldi_strategy(dvd->vd_lh, bp) != 0) {
1021 		zio->io_error = ENXIO;
1022 		zio_interrupt(zio);
1023 	}
1024 }
1025 
1026 static void
1027 vdev_disk_io_done(zio_t *zio)
1028 {
1029 	vdev_t *vd = zio->io_vd;
1030 
1031 	/*
1032 	 * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if
1033 	 * the device has been removed.  If this is the case, then we trigger an
1034 	 * asynchronous removal of the device. Otherwise, probe the device and
1035 	 * make sure it's still accessible.
1036 	 */
1037 	if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
1038 		vdev_disk_t *dvd = vd->vdev_tsd;
1039 		int state = DKIO_NONE;
1040 
1041 		if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state,
1042 		    FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) {
1043 			/*
1044 			 * We post the resource as soon as possible, instead of
1045 			 * when the async removal actually happens, because the
1046 			 * DE is using this information to discard previous I/O
1047 			 * errors.
1048 			 */
1049 			zfs_post_remove(zio->io_spa, vd);
1050 			vd->vdev_remove_wanted = B_TRUE;
1051 			spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
1052 		} else if (!vd->vdev_delayed_close) {
1053 			vd->vdev_delayed_close = B_TRUE;
1054 		}
1055 	}
1056 }
1057 
1058 vdev_ops_t vdev_disk_ops = {
1059 	.vdev_op_open = vdev_disk_open,
1060 	.vdev_op_close = vdev_disk_close,
1061 	.vdev_op_asize = vdev_default_asize,
1062 	.vdev_op_io_start = vdev_disk_io_start,
1063 	.vdev_op_io_done = vdev_disk_io_done,
1064 	.vdev_op_state_change = NULL,
1065 	.vdev_op_need_resilver = NULL,
1066 	.vdev_op_hold = vdev_disk_hold,
1067 	.vdev_op_rele = vdev_disk_rele,
1068 	.vdev_op_remap = NULL,
1069 	.vdev_op_xlate = vdev_default_xlate,
1070 	.vdev_op_dumpio = vdev_disk_dumpio,
1071 	.vdev_op_type = VDEV_TYPE_DISK,		/* name of this vdev type */
1072 	.vdev_op_leaf = B_TRUE			/* leaf vdev */
1073 };
1074 
1075 /*
1076  * Given the root disk device devid or pathname, read the label from
1077  * the device, and construct a configuration nvlist.
1078  */
1079 int
1080 vdev_disk_read_rootlabel(const char *devpath, const char *devid,
1081     nvlist_t **config)
1082 {
1083 	ldi_handle_t vd_lh;
1084 	vdev_label_t *label;
1085 	uint64_t s, size;
1086 	int l;
1087 	ddi_devid_t tmpdevid;
1088 	int error = -1;
1089 	char *minor_name;
1090 
1091 	/*
1092 	 * Read the device label and build the nvlist.
1093 	 */
1094 	if (devid != NULL && ddi_devid_str_decode((char *)devid, &tmpdevid,
1095 	    &minor_name) == 0) {
1096 		error = ldi_open_by_devid(tmpdevid, minor_name,
1097 		    FREAD, kcred, &vd_lh, zfs_li);
1098 		ddi_devid_free(tmpdevid);
1099 		ddi_devid_str_free(minor_name);
1100 	}
1101 
1102 	if (error != 0 && (error = ldi_open_by_name((char *)devpath, FREAD,
1103 	    kcred, &vd_lh, zfs_li)) != 0) {
1104 		return (error);
1105 	}
1106 
1107 	if (ldi_get_size(vd_lh, &s)) {
1108 		(void) ldi_close(vd_lh, FREAD, kcred);
1109 		return (SET_ERROR(EIO));
1110 	}
1111 
1112 	size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t);
1113 	label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP);
1114 
1115 	*config = NULL;
1116 	for (l = 0; l < VDEV_LABELS; l++) {
1117 		uint64_t offset, state, txg = 0;
1118 
1119 		/* read vdev label */
1120 		offset = vdev_label_offset(size, l, 0);
1121 		if (vdev_disk_ldi_physio(vd_lh, (caddr_t)label,
1122 		    VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, B_READ) != 0)
1123 			continue;
1124 
1125 		if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
1126 		    sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) {
1127 			*config = NULL;
1128 			continue;
1129 		}
1130 
1131 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
1132 		    &state) != 0 || state >= POOL_STATE_DESTROYED) {
1133 			nvlist_free(*config);
1134 			*config = NULL;
1135 			continue;
1136 		}
1137 
1138 		if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
1139 		    &txg) != 0 || txg == 0) {
1140 			nvlist_free(*config);
1141 			*config = NULL;
1142 			continue;
1143 		}
1144 
1145 		break;
1146 	}
1147 
1148 	kmem_free(label, sizeof (vdev_label_t));
1149 	(void) ldi_close(vd_lh, FREAD, kcred);
1150 	if (*config == NULL)
1151 		error = SET_ERROR(EIDRM);
1152 
1153 	return (error);
1154 }
1155 
1156 struct veb {
1157 	list_t veb_ents;
1158 	boolean_t veb_scanned;
1159 };
1160 
1161 struct veb_ent {
1162 	uint64_t vebe_pool_guid;
1163 	uint64_t vebe_vdev_guid;
1164 
1165 	char *vebe_devpath;
1166 
1167 	list_node_t vebe_link;
1168 };
1169 
1170 static kmutex_t veb_lock;
1171 static struct veb *veb;
1172 
1173 static int
1174 vdev_disk_preroot_scan_walk(const char *devpath, void *arg)
1175 {
1176 	int r;
1177 	nvlist_t *cfg = NULL;
1178 	uint64_t pguid = 0, vguid = 0;
1179 
1180 	/*
1181 	 * Attempt to read the label from this block device.
1182 	 */
1183 	if ((r = vdev_disk_read_rootlabel(devpath, NULL, &cfg)) != 0) {
1184 		/*
1185 		 * Many of the available block devices will represent slices or
1186 		 * partitions of disks, or may represent disks that are not at
1187 		 * all initialised with ZFS.  As this is a best effort
1188 		 * mechanism to locate an alternate path to a particular vdev,
1189 		 * we will ignore any failures and keep scanning.
1190 		 */
1191 		return (PREROOT_WALK_BLOCK_DEVICES_NEXT);
1192 	}
1193 
1194 	/*
1195 	 * Determine the pool and vdev GUID read from the label for this
1196 	 * device.  Both values must be present and have a non-zero value.
1197 	 */
1198 	if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &pguid) != 0 ||
1199 	    nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_GUID, &vguid) != 0 ||
1200 	    pguid == 0 || vguid == 0) {
1201 		/*
1202 		 * This label was not complete.
1203 		 */
1204 		goto out;
1205 	}
1206 
1207 	/*
1208 	 * Keep track of all of the GUID-to-devpath mappings we find so that
1209 	 * vdev_disk_preroot_lookup() can search them.
1210 	 */
1211 	struct veb_ent *vebe = kmem_zalloc(sizeof (*vebe), KM_SLEEP);
1212 	vebe->vebe_pool_guid = pguid;
1213 	vebe->vebe_vdev_guid = vguid;
1214 	vebe->vebe_devpath = spa_strdup(devpath);
1215 
1216 	list_insert_tail(&veb->veb_ents, vebe);
1217 
1218 out:
1219 	nvlist_free(cfg);
1220 	return (PREROOT_WALK_BLOCK_DEVICES_NEXT);
1221 }
1222 
1223 const char *
1224 vdev_disk_preroot_lookup(uint64_t pool_guid, uint64_t vdev_guid)
1225 {
1226 	if (pool_guid == 0 || vdev_guid == 0) {
1227 		/*
1228 		 * If we aren't provided both a pool and a vdev GUID, we cannot
1229 		 * perform a lookup.
1230 		 */
1231 		return (NULL);
1232 	}
1233 
1234 	mutex_enter(&veb_lock);
1235 	if (veb == NULL) {
1236 		/*
1237 		 * If vdev_disk_preroot_fini() has been called already, there
1238 		 * is nothing we can do.
1239 		 */
1240 		mutex_exit(&veb_lock);
1241 		return (NULL);
1242 	}
1243 
1244 	/*
1245 	 * We want to perform at most one scan of all block devices per boot.
1246 	 */
1247 	if (!veb->veb_scanned) {
1248 		cmn_err(CE_NOTE, "Performing full ZFS device scan!");
1249 
1250 		preroot_walk_block_devices(vdev_disk_preroot_scan_walk, NULL);
1251 
1252 		veb->veb_scanned = B_TRUE;
1253 	}
1254 
1255 	const char *path = NULL;
1256 	for (struct veb_ent *vebe = list_head(&veb->veb_ents); vebe != NULL;
1257 	    vebe = list_next(&veb->veb_ents, vebe)) {
1258 		if (vebe->vebe_pool_guid == pool_guid &&
1259 		    vebe->vebe_vdev_guid == vdev_guid) {
1260 			path = vebe->vebe_devpath;
1261 			break;
1262 		}
1263 	}
1264 
1265 	mutex_exit(&veb_lock);
1266 
1267 	return (path);
1268 }
1269 
1270 void
1271 vdev_disk_preroot_init(void)
1272 {
1273 	mutex_init(&veb_lock, NULL, MUTEX_DEFAULT, NULL);
1274 
1275 	VERIFY3P(veb, ==, NULL);
1276 	veb = kmem_zalloc(sizeof (*veb), KM_SLEEP);
1277 	list_create(&veb->veb_ents, sizeof (struct veb_ent),
1278 	    offsetof(struct veb_ent, vebe_link));
1279 	veb->veb_scanned = B_FALSE;
1280 }
1281 
1282 void
1283 vdev_disk_preroot_fini(void)
1284 {
1285 	mutex_enter(&veb_lock);
1286 
1287 	if (veb != NULL) {
1288 		while (!list_is_empty(&veb->veb_ents)) {
1289 			struct veb_ent *vebe = list_remove_head(&veb->veb_ents);
1290 
1291 			spa_strfree(vebe->vebe_devpath);
1292 
1293 			kmem_free(vebe, sizeof (*vebe));
1294 		}
1295 
1296 		kmem_free(veb, sizeof (*veb));
1297 		veb = NULL;
1298 	}
1299 
1300 	mutex_exit(&veb_lock);
1301 }
1302