xref: /titanic_44/usr/src/lib/libzfs/common/libzfs_pool.c (revision c64d15a587b6038b85a928885fc997da7315fbfe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <alloca.h>
30 #include <assert.h>
31 #include <ctype.h>
32 #include <errno.h>
33 #include <devid.h>
34 #include <dirent.h>
35 #include <fcntl.h>
36 #include <libintl.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <strings.h>
40 #include <unistd.h>
41 #include <zone.h>
42 #include <sys/efi_partition.h>
43 #include <sys/vtoc.h>
44 #include <sys/zfs_ioctl.h>
45 #include <sys/zio.h>
46 #include <strings.h>
47 
48 #include "zfs_namecheck.h"
49 #include "zfs_prop.h"
50 #include "libzfs_impl.h"
51 
52 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
53 
54 /*
55  * ====================================================================
56  *   zpool property functions
57  * ====================================================================
58  */
59 
60 static int
61 zpool_get_all_props(zpool_handle_t *zhp)
62 {
63 	zfs_cmd_t zc = { 0 };
64 	libzfs_handle_t *hdl = zhp->zpool_hdl;
65 
66 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
67 
68 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
69 		return (-1);
70 
71 	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
72 		if (errno == ENOMEM) {
73 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
74 				zcmd_free_nvlists(&zc);
75 				return (-1);
76 			}
77 		} else {
78 			zcmd_free_nvlists(&zc);
79 			return (-1);
80 		}
81 	}
82 
83 	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
84 		zcmd_free_nvlists(&zc);
85 		return (-1);
86 	}
87 
88 	zcmd_free_nvlists(&zc);
89 
90 	return (0);
91 }
92 
93 static int
94 zpool_props_refresh(zpool_handle_t *zhp)
95 {
96 	nvlist_t *old_props;
97 
98 	old_props = zhp->zpool_props;
99 
100 	if (zpool_get_all_props(zhp) != 0)
101 		return (-1);
102 
103 	nvlist_free(old_props);
104 	return (0);
105 }
106 
107 static char *
108 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
109     zprop_source_t *src)
110 {
111 	nvlist_t *nv, *nvl;
112 	uint64_t ival;
113 	char *value;
114 	zprop_source_t source;
115 
116 	nvl = zhp->zpool_props;
117 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
118 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
119 		source = ival;
120 		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
121 	} else {
122 		source = ZPROP_SRC_DEFAULT;
123 		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
124 			value = "-";
125 	}
126 
127 	if (src)
128 		*src = source;
129 
130 	return (value);
131 }
132 
133 uint64_t
134 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
135 {
136 	nvlist_t *nv, *nvl;
137 	uint64_t value;
138 	zprop_source_t source;
139 
140 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
141 		return (zpool_prop_default_numeric(prop));
142 
143 	nvl = zhp->zpool_props;
144 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
145 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
146 		source = value;
147 		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
148 	} else {
149 		source = ZPROP_SRC_DEFAULT;
150 		value = zpool_prop_default_numeric(prop);
151 	}
152 
153 	if (src)
154 		*src = source;
155 
156 	return (value);
157 }
158 
159 /*
160  * Map VDEV STATE to printed strings.
161  */
162 char *
163 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
164 {
165 	switch (state) {
166 	case VDEV_STATE_CLOSED:
167 	case VDEV_STATE_OFFLINE:
168 		return (gettext("OFFLINE"));
169 	case VDEV_STATE_REMOVED:
170 		return (gettext("REMOVED"));
171 	case VDEV_STATE_CANT_OPEN:
172 		if (aux == VDEV_AUX_CORRUPT_DATA)
173 			return (gettext("FAULTED"));
174 		else
175 			return (gettext("UNAVAIL"));
176 	case VDEV_STATE_FAULTED:
177 		return (gettext("FAULTED"));
178 	case VDEV_STATE_DEGRADED:
179 		return (gettext("DEGRADED"));
180 	case VDEV_STATE_HEALTHY:
181 		return (gettext("ONLINE"));
182 	}
183 
184 	return (gettext("UNKNOWN"));
185 }
186 
187 /*
188  * Get a zpool property value for 'prop' and return the value in
189  * a pre-allocated buffer.
190  */
191 int
192 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
193     zprop_source_t *srctype)
194 {
195 	uint64_t intval;
196 	const char *strval;
197 	zprop_source_t src = ZPROP_SRC_NONE;
198 	nvlist_t *nvroot;
199 	vdev_stat_t *vs;
200 	uint_t vsc;
201 
202 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
203 		if (prop == ZPOOL_PROP_NAME)
204 			(void) strlcpy(buf, zpool_get_name(zhp), len);
205 		else if (prop == ZPOOL_PROP_HEALTH)
206 			(void) strlcpy(buf, "FAULTED", len);
207 		else
208 			(void) strlcpy(buf, "-", len);
209 		return (0);
210 	}
211 
212 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
213 	    prop != ZPOOL_PROP_NAME)
214 		return (-1);
215 
216 	switch (zpool_prop_get_type(prop)) {
217 	case PROP_TYPE_STRING:
218 		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
219 		    len);
220 		break;
221 
222 	case PROP_TYPE_NUMBER:
223 		intval = zpool_get_prop_int(zhp, prop, &src);
224 
225 		switch (prop) {
226 		case ZPOOL_PROP_SIZE:
227 		case ZPOOL_PROP_USED:
228 		case ZPOOL_PROP_AVAILABLE:
229 			(void) zfs_nicenum(intval, buf, len);
230 			break;
231 
232 		case ZPOOL_PROP_CAPACITY:
233 			(void) snprintf(buf, len, "%llu%%",
234 			    (u_longlong_t)intval);
235 			break;
236 
237 		case ZPOOL_PROP_HEALTH:
238 			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
239 			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
240 			verify(nvlist_lookup_uint64_array(nvroot,
241 			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
242 
243 			(void) strlcpy(buf, zpool_state_to_name(intval,
244 			    vs->vs_aux), len);
245 			break;
246 		default:
247 			(void) snprintf(buf, len, "%llu", intval);
248 		}
249 		break;
250 
251 	case PROP_TYPE_INDEX:
252 		intval = zpool_get_prop_int(zhp, prop, &src);
253 		if (zpool_prop_index_to_string(prop, intval, &strval)
254 		    != 0)
255 			return (-1);
256 		(void) strlcpy(buf, strval, len);
257 		break;
258 
259 	default:
260 		abort();
261 	}
262 
263 	if (srctype)
264 		*srctype = src;
265 
266 	return (0);
267 }
268 
269 /*
270  * Check if the bootfs name has the same pool name as it is set to.
271  * Assuming bootfs is a valid dataset name.
272  */
273 static boolean_t
274 bootfs_name_valid(const char *pool, char *bootfs)
275 {
276 	int len = strlen(pool);
277 
278 	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM))
279 		return (B_FALSE);
280 
281 	if (strncmp(pool, bootfs, len) == 0 &&
282 	    (bootfs[len] == '/' || bootfs[len] == '\0'))
283 		return (B_TRUE);
284 
285 	return (B_FALSE);
286 }
287 
288 /*
289  * Inspect the configuration to determine if any of the devices contain
290  * an EFI label.
291  */
292 static boolean_t
293 pool_uses_efi(nvlist_t *config)
294 {
295 	nvlist_t **child;
296 	uint_t c, children;
297 
298 	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
299 	    &child, &children) != 0)
300 		return (read_efi_label(config, NULL) >= 0);
301 
302 	for (c = 0; c < children; c++) {
303 		if (pool_uses_efi(child[c]))
304 			return (B_TRUE);
305 	}
306 	return (B_FALSE);
307 }
308 
309 /*
310  * Given an nvlist of zpool properties to be set, validate that they are
311  * correct, and parse any numeric properties (index, boolean, etc) if they are
312  * specified as strings.
313  */
314 static nvlist_t *
315 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
316     nvlist_t *props, uint64_t version, boolean_t create_or_import, char *errbuf)
317 {
318 	nvpair_t *elem;
319 	nvlist_t *retprops;
320 	zpool_prop_t prop;
321 	char *strval;
322 	uint64_t intval;
323 	char *slash;
324 	struct stat64 statbuf;
325 	zpool_handle_t *zhp;
326 	nvlist_t *nvroot;
327 
328 	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
329 		(void) no_memory(hdl);
330 		return (NULL);
331 	}
332 
333 	elem = NULL;
334 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
335 		const char *propname = nvpair_name(elem);
336 
337 		/*
338 		 * Make sure this property is valid and applies to this type.
339 		 */
340 		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
341 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
342 			    "invalid property '%s'"), propname);
343 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
344 			goto error;
345 		}
346 
347 		if (zpool_prop_readonly(prop)) {
348 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
349 			    "is readonly"), propname);
350 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
351 			goto error;
352 		}
353 
354 		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
355 		    &strval, &intval, errbuf) != 0)
356 			goto error;
357 
358 		/*
359 		 * Perform additional checking for specific properties.
360 		 */
361 		switch (prop) {
362 		case ZPOOL_PROP_VERSION:
363 			if (intval < version || intval > SPA_VERSION) {
364 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
365 				    "property '%s' number %d is invalid."),
366 				    propname, intval);
367 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
368 				goto error;
369 			}
370 			break;
371 
372 		case ZPOOL_PROP_BOOTFS:
373 			if (create_or_import) {
374 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
375 				    "property '%s' cannot be set at creation "
376 				    "or import time"), propname);
377 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
378 				goto error;
379 			}
380 
381 			if (version < SPA_VERSION_BOOTFS) {
382 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
383 				    "pool must be upgraded to support "
384 				    "'%s' property"), propname);
385 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
386 				goto error;
387 			}
388 
389 			/*
390 			 * bootfs property value has to be a dataset name and
391 			 * the dataset has to be in the same pool as it sets to.
392 			 */
393 			if (strval[0] != '\0' && !bootfs_name_valid(poolname,
394 			    strval)) {
395 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
396 				    "is an invalid name"), strval);
397 				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
398 				goto error;
399 			}
400 
401 			if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
402 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
403 				    "could not open pool '%s'"), poolname);
404 				(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
405 				goto error;
406 			}
407 			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
408 			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
409 
410 			/*
411 			 * bootfs property cannot be set on a disk which has
412 			 * been EFI labeled.
413 			 */
414 			if (pool_uses_efi(nvroot)) {
415 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
416 				    "property '%s' not supported on "
417 				    "EFI labeled devices"), propname);
418 				(void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
419 				zpool_close(zhp);
420 				goto error;
421 			}
422 			zpool_close(zhp);
423 			break;
424 
425 		case ZPOOL_PROP_ALTROOT:
426 			if (!create_or_import) {
427 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
428 				    "property '%s' can only be set during pool "
429 				    "creation or import"), propname);
430 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
431 				goto error;
432 			}
433 
434 			if (strval[0] != '/') {
435 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
436 				    "bad alternate root '%s'"), strval);
437 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
438 				goto error;
439 			}
440 			break;
441 
442 		case ZPOOL_PROP_CACHEFILE:
443 			if (strval[0] == '\0')
444 				break;
445 
446 			if (strcmp(strval, "none") == 0)
447 				break;
448 
449 			if (strval[0] != '/') {
450 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
451 				    "property '%s' must be empty, an "
452 				    "absolute path, or 'none'"), propname);
453 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
454 				goto error;
455 			}
456 
457 			slash = strrchr(strval, '/');
458 
459 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
460 			    strcmp(slash, "/..") == 0) {
461 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
462 				    "'%s' is not a valid file"), strval);
463 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
464 				goto error;
465 			}
466 
467 			*slash = '\0';
468 
469 			if (strval[0] != '\0' &&
470 			    (stat64(strval, &statbuf) != 0 ||
471 			    !S_ISDIR(statbuf.st_mode))) {
472 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
473 				    "'%s' is not a valid directory"),
474 				    strval);
475 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
476 				goto error;
477 			}
478 
479 			*slash = '/';
480 			break;
481 		}
482 	}
483 
484 	return (retprops);
485 error:
486 	nvlist_free(retprops);
487 	return (NULL);
488 }
489 
490 /*
491  * Set zpool property : propname=propval.
492  */
493 int
494 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
495 {
496 	zfs_cmd_t zc = { 0 };
497 	int ret = -1;
498 	char errbuf[1024];
499 	nvlist_t *nvl = NULL;
500 	nvlist_t *realprops;
501 	uint64_t version;
502 
503 	(void) snprintf(errbuf, sizeof (errbuf),
504 	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
505 	    zhp->zpool_name);
506 
507 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
508 		return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, errbuf));
509 
510 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
511 		return (no_memory(zhp->zpool_hdl));
512 
513 	if (nvlist_add_string(nvl, propname, propval) != 0) {
514 		nvlist_free(nvl);
515 		return (no_memory(zhp->zpool_hdl));
516 	}
517 
518 	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
519 	if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
520 	    zhp->zpool_name, nvl, version, B_FALSE, errbuf)) == NULL) {
521 		nvlist_free(nvl);
522 		return (-1);
523 	}
524 
525 	nvlist_free(nvl);
526 	nvl = realprops;
527 
528 	/*
529 	 * Execute the corresponding ioctl() to set this property.
530 	 */
531 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
532 
533 	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
534 		nvlist_free(nvl);
535 		return (-1);
536 	}
537 
538 	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
539 
540 	zcmd_free_nvlists(&zc);
541 	nvlist_free(nvl);
542 
543 	if (ret)
544 		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
545 	else
546 		(void) zpool_props_refresh(zhp);
547 
548 	return (ret);
549 }
550 
551 int
552 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
553 {
554 	libzfs_handle_t *hdl = zhp->zpool_hdl;
555 	zprop_list_t *entry;
556 	char buf[ZFS_MAXPROPLEN];
557 
558 	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
559 		return (-1);
560 
561 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
562 
563 		if (entry->pl_fixed)
564 			continue;
565 
566 		if (entry->pl_prop != ZPROP_INVAL &&
567 		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
568 		    NULL) == 0) {
569 			if (strlen(buf) > entry->pl_width)
570 				entry->pl_width = strlen(buf);
571 		}
572 	}
573 
574 	return (0);
575 }
576 
577 
578 /*
579  * Validate the given pool name, optionally putting an extended error message in
580  * 'buf'.
581  */
582 boolean_t
583 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
584 {
585 	namecheck_err_t why;
586 	char what;
587 	int ret;
588 
589 	ret = pool_namecheck(pool, &why, &what);
590 
591 	/*
592 	 * The rules for reserved pool names were extended at a later point.
593 	 * But we need to support users with existing pools that may now be
594 	 * invalid.  So we only check for this expanded set of names during a
595 	 * create (or import), and only in userland.
596 	 */
597 	if (ret == 0 && !isopen &&
598 	    (strncmp(pool, "mirror", 6) == 0 ||
599 	    strncmp(pool, "raidz", 5) == 0 ||
600 	    strncmp(pool, "spare", 5) == 0 ||
601 	    strcmp(pool, "log") == 0)) {
602 		if (hdl != NULL)
603 			zfs_error_aux(hdl,
604 			    dgettext(TEXT_DOMAIN, "name is reserved"));
605 		return (B_FALSE);
606 	}
607 
608 
609 	if (ret != 0) {
610 		if (hdl != NULL) {
611 			switch (why) {
612 			case NAME_ERR_TOOLONG:
613 				zfs_error_aux(hdl,
614 				    dgettext(TEXT_DOMAIN, "name is too long"));
615 				break;
616 
617 			case NAME_ERR_INVALCHAR:
618 				zfs_error_aux(hdl,
619 				    dgettext(TEXT_DOMAIN, "invalid character "
620 				    "'%c' in pool name"), what);
621 				break;
622 
623 			case NAME_ERR_NOLETTER:
624 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
625 				    "name must begin with a letter"));
626 				break;
627 
628 			case NAME_ERR_RESERVED:
629 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
630 				    "name is reserved"));
631 				break;
632 
633 			case NAME_ERR_DISKLIKE:
634 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
635 				    "pool name is reserved"));
636 				break;
637 
638 			case NAME_ERR_LEADING_SLASH:
639 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
640 				    "leading slash in name"));
641 				break;
642 
643 			case NAME_ERR_EMPTY_COMPONENT:
644 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
645 				    "empty component in name"));
646 				break;
647 
648 			case NAME_ERR_TRAILING_SLASH:
649 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
650 				    "trailing slash in name"));
651 				break;
652 
653 			case NAME_ERR_MULTIPLE_AT:
654 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
655 				    "multiple '@' delimiters in name"));
656 				break;
657 
658 			}
659 		}
660 		return (B_FALSE);
661 	}
662 
663 	return (B_TRUE);
664 }
665 
666 /*
667  * Open a handle to the given pool, even if the pool is currently in the FAULTED
668  * state.
669  */
670 zpool_handle_t *
671 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
672 {
673 	zpool_handle_t *zhp;
674 	boolean_t missing;
675 
676 	/*
677 	 * Make sure the pool name is valid.
678 	 */
679 	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
680 		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
681 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
682 		    pool);
683 		return (NULL);
684 	}
685 
686 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
687 		return (NULL);
688 
689 	zhp->zpool_hdl = hdl;
690 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
691 
692 	if (zpool_refresh_stats(zhp, &missing) != 0) {
693 		zpool_close(zhp);
694 		return (NULL);
695 	}
696 
697 	if (missing) {
698 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
699 		(void) zfs_error_fmt(hdl, EZFS_NOENT,
700 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
701 		zpool_close(zhp);
702 		return (NULL);
703 	}
704 
705 	return (zhp);
706 }
707 
708 /*
709  * Like the above, but silent on error.  Used when iterating over pools (because
710  * the configuration cache may be out of date).
711  */
712 int
713 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
714 {
715 	zpool_handle_t *zhp;
716 	boolean_t missing;
717 
718 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
719 		return (-1);
720 
721 	zhp->zpool_hdl = hdl;
722 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
723 
724 	if (zpool_refresh_stats(zhp, &missing) != 0) {
725 		zpool_close(zhp);
726 		return (-1);
727 	}
728 
729 	if (missing) {
730 		zpool_close(zhp);
731 		*ret = NULL;
732 		return (0);
733 	}
734 
735 	*ret = zhp;
736 	return (0);
737 }
738 
739 /*
740  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
741  * state.
742  */
743 zpool_handle_t *
744 zpool_open(libzfs_handle_t *hdl, const char *pool)
745 {
746 	zpool_handle_t *zhp;
747 
748 	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
749 		return (NULL);
750 
751 	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
752 		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
753 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
754 		zpool_close(zhp);
755 		return (NULL);
756 	}
757 
758 	return (zhp);
759 }
760 
761 /*
762  * Close the handle.  Simply frees the memory associated with the handle.
763  */
764 void
765 zpool_close(zpool_handle_t *zhp)
766 {
767 	if (zhp->zpool_config)
768 		nvlist_free(zhp->zpool_config);
769 	if (zhp->zpool_old_config)
770 		nvlist_free(zhp->zpool_old_config);
771 	if (zhp->zpool_props)
772 		nvlist_free(zhp->zpool_props);
773 	free(zhp);
774 }
775 
776 /*
777  * Return the name of the pool.
778  */
779 const char *
780 zpool_get_name(zpool_handle_t *zhp)
781 {
782 	return (zhp->zpool_name);
783 }
784 
785 
786 /*
787  * Return the state of the pool (ACTIVE or UNAVAILABLE)
788  */
789 int
790 zpool_get_state(zpool_handle_t *zhp)
791 {
792 	return (zhp->zpool_state);
793 }
794 
795 /*
796  * Create the named pool, using the provided vdev list.  It is assumed
797  * that the consumer has already validated the contents of the nvlist, so we
798  * don't have to worry about error semantics.
799  */
800 int
801 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
802     nvlist_t *props, nvlist_t *fsprops)
803 {
804 	zfs_cmd_t zc = { 0 };
805 	nvlist_t *zc_fsprops = NULL;
806 	nvlist_t *zc_props = NULL;
807 	char msg[1024];
808 	char *altroot;
809 	int ret = -1;
810 
811 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
812 	    "cannot create '%s'"), pool);
813 
814 	if (!zpool_name_valid(hdl, B_FALSE, pool))
815 		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
816 
817 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
818 		return (-1);
819 
820 	if (props) {
821 		if ((zc_props = zpool_valid_proplist(hdl, pool, props,
822 		    SPA_VERSION_1, B_TRUE, msg)) == NULL) {
823 			goto create_failed;
824 		}
825 	}
826 
827 	if (fsprops) {
828 		uint64_t zoned;
829 		char *zonestr;
830 
831 		zoned = ((nvlist_lookup_string(fsprops,
832 		    zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
833 		    strcmp(zonestr, "on") == 0);
834 
835 		if ((zc_fsprops = zfs_valid_proplist(hdl,
836 		    ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
837 			goto create_failed;
838 		}
839 		if (!zc_props &&
840 		    (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
841 			goto create_failed;
842 		}
843 		if (nvlist_add_nvlist(zc_props,
844 		    ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
845 			goto create_failed;
846 		}
847 	}
848 
849 	if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
850 		goto create_failed;
851 
852 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
853 
854 	if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
855 
856 		zcmd_free_nvlists(&zc);
857 		nvlist_free(zc_props);
858 		nvlist_free(zc_fsprops);
859 
860 		switch (errno) {
861 		case EBUSY:
862 			/*
863 			 * This can happen if the user has specified the same
864 			 * device multiple times.  We can't reliably detect this
865 			 * until we try to add it and see we already have a
866 			 * label.
867 			 */
868 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
869 			    "one or more vdevs refer to the same device"));
870 			return (zfs_error(hdl, EZFS_BADDEV, msg));
871 
872 		case EOVERFLOW:
873 			/*
874 			 * This occurs when one of the devices is below
875 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
876 			 * device was the problem device since there's no
877 			 * reliable way to determine device size from userland.
878 			 */
879 			{
880 				char buf[64];
881 
882 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
883 
884 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
885 				    "one or more devices is less than the "
886 				    "minimum size (%s)"), buf);
887 			}
888 			return (zfs_error(hdl, EZFS_BADDEV, msg));
889 
890 		case ENOSPC:
891 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
892 			    "one or more devices is out of space"));
893 			return (zfs_error(hdl, EZFS_BADDEV, msg));
894 
895 		case ENOTBLK:
896 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
897 			    "cache device must be a disk or disk slice"));
898 			return (zfs_error(hdl, EZFS_BADDEV, msg));
899 
900 		default:
901 			return (zpool_standard_error(hdl, errno, msg));
902 		}
903 	}
904 
905 	/*
906 	 * If this is an alternate root pool, then we automatically set the
907 	 * mountpoint of the root dataset to be '/'.
908 	 */
909 	if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
910 	    &altroot) == 0) {
911 		zfs_handle_t *zhp;
912 
913 		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
914 		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
915 		    "/") == 0);
916 
917 		zfs_close(zhp);
918 	}
919 
920 create_failed:
921 	zcmd_free_nvlists(&zc);
922 	nvlist_free(zc_props);
923 	nvlist_free(zc_fsprops);
924 	return (ret);
925 }
926 
927 /*
928  * Destroy the given pool.  It is up to the caller to ensure that there are no
929  * datasets left in the pool.
930  */
931 int
932 zpool_destroy(zpool_handle_t *zhp)
933 {
934 	zfs_cmd_t zc = { 0 };
935 	zfs_handle_t *zfp = NULL;
936 	libzfs_handle_t *hdl = zhp->zpool_hdl;
937 	char msg[1024];
938 
939 	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
940 	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
941 	    ZFS_TYPE_FILESYSTEM)) == NULL)
942 		return (-1);
943 
944 	if (zpool_remove_zvol_links(zhp) != 0)
945 		return (-1);
946 
947 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
948 
949 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
950 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
951 		    "cannot destroy '%s'"), zhp->zpool_name);
952 
953 		if (errno == EROFS) {
954 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
955 			    "one or more devices is read only"));
956 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
957 		} else {
958 			(void) zpool_standard_error(hdl, errno, msg);
959 		}
960 
961 		if (zfp)
962 			zfs_close(zfp);
963 		return (-1);
964 	}
965 
966 	if (zfp) {
967 		remove_mountpoint(zfp);
968 		zfs_close(zfp);
969 	}
970 
971 	return (0);
972 }
973 
974 /*
975  * Add the given vdevs to the pool.  The caller must have already performed the
976  * necessary verification to ensure that the vdev specification is well-formed.
977  */
978 int
979 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
980 {
981 	zfs_cmd_t zc = { 0 };
982 	int ret;
983 	libzfs_handle_t *hdl = zhp->zpool_hdl;
984 	char msg[1024];
985 	nvlist_t **spares, **l2cache;
986 	uint_t nspares, nl2cache;
987 
988 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
989 	    "cannot add to '%s'"), zhp->zpool_name);
990 
991 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
992 	    SPA_VERSION_SPARES &&
993 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
994 	    &spares, &nspares) == 0) {
995 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
996 		    "upgraded to add hot spares"));
997 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
998 	}
999 
1000 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1001 	    SPA_VERSION_L2CACHE &&
1002 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1003 	    &l2cache, &nl2cache) == 0) {
1004 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1005 		    "upgraded to add cache devices"));
1006 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1007 	}
1008 
1009 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1010 		return (-1);
1011 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1012 
1013 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1014 		switch (errno) {
1015 		case EBUSY:
1016 			/*
1017 			 * This can happen if the user has specified the same
1018 			 * device multiple times.  We can't reliably detect this
1019 			 * until we try to add it and see we already have a
1020 			 * label.
1021 			 */
1022 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1023 			    "one or more vdevs refer to the same device"));
1024 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1025 			break;
1026 
1027 		case EOVERFLOW:
1028 			/*
1029 			 * This occurrs when one of the devices is below
1030 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1031 			 * device was the problem device since there's no
1032 			 * reliable way to determine device size from userland.
1033 			 */
1034 			{
1035 				char buf[64];
1036 
1037 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1038 
1039 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1040 				    "device is less than the minimum "
1041 				    "size (%s)"), buf);
1042 			}
1043 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1044 			break;
1045 
1046 		case ENOTSUP:
1047 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1048 			    "pool must be upgraded to add these vdevs"));
1049 			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
1050 			break;
1051 
1052 		case EDOM:
1053 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1054 			    "root pool can not have multiple vdevs"
1055 			    " or separate logs"));
1056 			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1057 			break;
1058 
1059 		case ENOTBLK:
1060 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1061 			    "cache device must be a disk or disk slice"));
1062 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1063 			break;
1064 
1065 		default:
1066 			(void) zpool_standard_error(hdl, errno, msg);
1067 		}
1068 
1069 		ret = -1;
1070 	} else {
1071 		ret = 0;
1072 	}
1073 
1074 	zcmd_free_nvlists(&zc);
1075 
1076 	return (ret);
1077 }
1078 
1079 /*
1080  * Exports the pool from the system.  The caller must ensure that there are no
1081  * mounted datasets in the pool.
1082  */
1083 int
1084 zpool_export(zpool_handle_t *zhp, boolean_t force)
1085 {
1086 	zfs_cmd_t zc = { 0 };
1087 	char msg[1024];
1088 
1089 	if (zpool_remove_zvol_links(zhp) != 0)
1090 		return (-1);
1091 
1092 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1093 	    "cannot export '%s'"), zhp->zpool_name);
1094 
1095 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1096 	zc.zc_cookie = force;
1097 
1098 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1099 		switch (errno) {
1100 		case EXDEV:
1101 			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1102 			    "use '-f' to override the following errors:\n"
1103 			    "'%s' has an active shared spare which could be"
1104 			    " used by other pools once '%s' is exported."),
1105 			    zhp->zpool_name, zhp->zpool_name);
1106 			return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1107 			    msg));
1108 		default:
1109 			return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1110 			    msg));
1111 		}
1112 	}
1113 
1114 	return (0);
1115 }
1116 
1117 /*
1118  * zpool_import() is a contracted interface. Should be kept the same
1119  * if possible.
1120  *
1121  * Applications should use zpool_import_props() to import a pool with
1122  * new properties value to be set.
1123  */
1124 int
1125 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1126     char *altroot)
1127 {
1128 	nvlist_t *props = NULL;
1129 	int ret;
1130 
1131 	if (altroot != NULL) {
1132 		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1133 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1134 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1135 			    newname));
1136 		}
1137 
1138 		if (nvlist_add_string(props,
1139 		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0) {
1140 			nvlist_free(props);
1141 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1142 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1143 			    newname));
1144 		}
1145 	}
1146 
1147 	ret = zpool_import_props(hdl, config, newname, props, B_FALSE);
1148 	if (props)
1149 		nvlist_free(props);
1150 	return (ret);
1151 }
1152 
1153 /*
1154  * Import the given pool using the known configuration and a list of
1155  * properties to be set. The configuration should have come from
1156  * zpool_find_import(). The 'newname' parameters control whether the pool
1157  * is imported with a different name.
1158  */
1159 int
1160 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1161     nvlist_t *props, boolean_t importfaulted)
1162 {
1163 	zfs_cmd_t zc = { 0 };
1164 	char *thename;
1165 	char *origname;
1166 	int ret;
1167 	char errbuf[1024];
1168 
1169 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1170 	    &origname) == 0);
1171 
1172 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1173 	    "cannot import pool '%s'"), origname);
1174 
1175 	if (newname != NULL) {
1176 		if (!zpool_name_valid(hdl, B_FALSE, newname))
1177 			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1178 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1179 			    newname));
1180 		thename = (char *)newname;
1181 	} else {
1182 		thename = origname;
1183 	}
1184 
1185 	if (props) {
1186 		uint64_t version;
1187 
1188 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1189 		    &version) == 0);
1190 
1191 		if ((props = zpool_valid_proplist(hdl, origname,
1192 		    props, version, B_TRUE, errbuf)) == NULL) {
1193 			return (-1);
1194 		} else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1195 			nvlist_free(props);
1196 			return (-1);
1197 		}
1198 	}
1199 
1200 	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1201 
1202 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1203 	    &zc.zc_guid) == 0);
1204 
1205 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1206 		nvlist_free(props);
1207 		return (-1);
1208 	}
1209 
1210 	zc.zc_cookie = (uint64_t)importfaulted;
1211 	ret = 0;
1212 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
1213 		char desc[1024];
1214 		if (newname == NULL)
1215 			(void) snprintf(desc, sizeof (desc),
1216 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1217 			    thename);
1218 		else
1219 			(void) snprintf(desc, sizeof (desc),
1220 			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1221 			    origname, thename);
1222 
1223 		switch (errno) {
1224 		case ENOTSUP:
1225 			/*
1226 			 * Unsupported version.
1227 			 */
1228 			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
1229 			break;
1230 
1231 		case EINVAL:
1232 			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1233 			break;
1234 
1235 		default:
1236 			(void) zpool_standard_error(hdl, errno, desc);
1237 		}
1238 
1239 		ret = -1;
1240 	} else {
1241 		zpool_handle_t *zhp;
1242 
1243 		/*
1244 		 * This should never fail, but play it safe anyway.
1245 		 */
1246 		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
1247 			ret = -1;
1248 		} else if (zhp != NULL) {
1249 			ret = zpool_create_zvol_links(zhp);
1250 			zpool_close(zhp);
1251 		}
1252 
1253 	}
1254 
1255 	zcmd_free_nvlists(&zc);
1256 	nvlist_free(props);
1257 
1258 	return (ret);
1259 }
1260 
1261 /*
1262  * Scrub the pool.
1263  */
1264 int
1265 zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
1266 {
1267 	zfs_cmd_t zc = { 0 };
1268 	char msg[1024];
1269 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1270 
1271 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1272 	zc.zc_cookie = type;
1273 
1274 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0)
1275 		return (0);
1276 
1277 	(void) snprintf(msg, sizeof (msg),
1278 	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1279 
1280 	if (errno == EBUSY)
1281 		return (zfs_error(hdl, EZFS_RESILVERING, msg));
1282 	else
1283 		return (zpool_standard_error(hdl, errno, msg));
1284 }
1285 
1286 /*
1287  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1288  * spare; but FALSE if its an INUSE spare.
1289  */
1290 static nvlist_t *
1291 vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
1292     boolean_t *avail_spare, boolean_t *l2cache)
1293 {
1294 	uint_t c, children;
1295 	nvlist_t **child;
1296 	uint64_t theguid, present;
1297 	char *path;
1298 	uint64_t wholedisk = 0;
1299 	nvlist_t *ret;
1300 
1301 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
1302 
1303 	if (search == NULL &&
1304 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
1305 		/*
1306 		 * If the device has never been present since import, the only
1307 		 * reliable way to match the vdev is by GUID.
1308 		 */
1309 		if (theguid == guid)
1310 			return (nv);
1311 	} else if (search != NULL &&
1312 	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
1313 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1314 		    &wholedisk);
1315 		if (wholedisk) {
1316 			/*
1317 			 * For whole disks, the internal path has 's0', but the
1318 			 * path passed in by the user doesn't.
1319 			 */
1320 			if (strlen(search) == strlen(path) - 2 &&
1321 			    strncmp(search, path, strlen(search)) == 0)
1322 				return (nv);
1323 		} else if (strcmp(search, path) == 0) {
1324 			return (nv);
1325 		}
1326 	}
1327 
1328 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1329 	    &child, &children) != 0)
1330 		return (NULL);
1331 
1332 	for (c = 0; c < children; c++)
1333 		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1334 		    avail_spare, l2cache)) != NULL)
1335 			return (ret);
1336 
1337 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1338 	    &child, &children) == 0) {
1339 		for (c = 0; c < children; c++) {
1340 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1341 			    avail_spare, l2cache)) != NULL) {
1342 				*avail_spare = B_TRUE;
1343 				return (ret);
1344 			}
1345 		}
1346 	}
1347 
1348 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1349 	    &child, &children) == 0) {
1350 		for (c = 0; c < children; c++) {
1351 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1352 			    avail_spare, l2cache)) != NULL) {
1353 				*l2cache = B_TRUE;
1354 				return (ret);
1355 			}
1356 		}
1357 	}
1358 
1359 	return (NULL);
1360 }
1361 
1362 nvlist_t *
1363 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
1364     boolean_t *l2cache)
1365 {
1366 	char buf[MAXPATHLEN];
1367 	const char *search;
1368 	char *end;
1369 	nvlist_t *nvroot;
1370 	uint64_t guid;
1371 
1372 	guid = strtoull(path, &end, 10);
1373 	if (guid != 0 && *end == '\0') {
1374 		search = NULL;
1375 	} else if (path[0] != '/') {
1376 		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
1377 		search = buf;
1378 	} else {
1379 		search = path;
1380 	}
1381 
1382 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1383 	    &nvroot) == 0);
1384 
1385 	*avail_spare = B_FALSE;
1386 	*l2cache = B_FALSE;
1387 	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
1388 	    l2cache));
1389 }
1390 
1391 /*
1392  * Returns TRUE if the given guid corresponds to the given type.
1393  * This is used to check for hot spares (INUSE or not), and level 2 cache
1394  * devices.
1395  */
1396 static boolean_t
1397 is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
1398 {
1399 	uint64_t target_guid;
1400 	nvlist_t *nvroot;
1401 	nvlist_t **list;
1402 	uint_t count;
1403 	int i;
1404 
1405 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1406 	    &nvroot) == 0);
1407 	if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) {
1408 		for (i = 0; i < count; i++) {
1409 			verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID,
1410 			    &target_guid) == 0);
1411 			if (guid == target_guid)
1412 				return (B_TRUE);
1413 		}
1414 	}
1415 
1416 	return (B_FALSE);
1417 }
1418 
1419 /*
1420  * Bring the specified vdev online.   The 'flags' parameter is a set of the
1421  * ZFS_ONLINE_* flags.
1422  */
1423 int
1424 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
1425     vdev_state_t *newstate)
1426 {
1427 	zfs_cmd_t zc = { 0 };
1428 	char msg[1024];
1429 	nvlist_t *tgt;
1430 	boolean_t avail_spare, l2cache;
1431 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1432 
1433 	(void) snprintf(msg, sizeof (msg),
1434 	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
1435 
1436 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1437 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == NULL)
1438 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1439 
1440 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1441 
1442 	if (avail_spare ||
1443 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1444 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1445 
1446 	zc.zc_cookie = VDEV_STATE_ONLINE;
1447 	zc.zc_obj = flags;
1448 
1449 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0)
1450 		return (zpool_standard_error(hdl, errno, msg));
1451 
1452 	*newstate = zc.zc_cookie;
1453 	return (0);
1454 }
1455 
1456 /*
1457  * Take the specified vdev offline
1458  */
1459 int
1460 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
1461 {
1462 	zfs_cmd_t zc = { 0 };
1463 	char msg[1024];
1464 	nvlist_t *tgt;
1465 	boolean_t avail_spare, l2cache;
1466 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1467 
1468 	(void) snprintf(msg, sizeof (msg),
1469 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
1470 
1471 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1472 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == NULL)
1473 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1474 
1475 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1476 
1477 	if (avail_spare ||
1478 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1479 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1480 
1481 	zc.zc_cookie = VDEV_STATE_OFFLINE;
1482 	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
1483 
1484 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1485 		return (0);
1486 
1487 	switch (errno) {
1488 	case EBUSY:
1489 
1490 		/*
1491 		 * There are no other replicas of this device.
1492 		 */
1493 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1494 
1495 	default:
1496 		return (zpool_standard_error(hdl, errno, msg));
1497 	}
1498 }
1499 
1500 /*
1501  * Mark the given vdev faulted.
1502  */
1503 int
1504 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
1505 {
1506 	zfs_cmd_t zc = { 0 };
1507 	char msg[1024];
1508 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1509 
1510 	(void) snprintf(msg, sizeof (msg),
1511 	    dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid);
1512 
1513 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1514 	zc.zc_guid = guid;
1515 	zc.zc_cookie = VDEV_STATE_FAULTED;
1516 
1517 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1518 		return (0);
1519 
1520 	switch (errno) {
1521 	case EBUSY:
1522 
1523 		/*
1524 		 * There are no other replicas of this device.
1525 		 */
1526 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1527 
1528 	default:
1529 		return (zpool_standard_error(hdl, errno, msg));
1530 	}
1531 
1532 }
1533 
1534 /*
1535  * Mark the given vdev degraded.
1536  */
1537 int
1538 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
1539 {
1540 	zfs_cmd_t zc = { 0 };
1541 	char msg[1024];
1542 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1543 
1544 	(void) snprintf(msg, sizeof (msg),
1545 	    dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid);
1546 
1547 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1548 	zc.zc_guid = guid;
1549 	zc.zc_cookie = VDEV_STATE_DEGRADED;
1550 
1551 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1552 		return (0);
1553 
1554 	return (zpool_standard_error(hdl, errno, msg));
1555 }
1556 
1557 /*
1558  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
1559  * a hot spare.
1560  */
1561 static boolean_t
1562 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
1563 {
1564 	nvlist_t **child;
1565 	uint_t c, children;
1566 	char *type;
1567 
1568 	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
1569 	    &children) == 0) {
1570 		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
1571 		    &type) == 0);
1572 
1573 		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
1574 		    children == 2 && child[which] == tgt)
1575 			return (B_TRUE);
1576 
1577 		for (c = 0; c < children; c++)
1578 			if (is_replacing_spare(child[c], tgt, which))
1579 				return (B_TRUE);
1580 	}
1581 
1582 	return (B_FALSE);
1583 }
1584 
1585 /*
1586  * Attach new_disk (fully described by nvroot) to old_disk.
1587  * If 'replacing' is specified, the new disk will replace the old one.
1588  */
1589 int
1590 zpool_vdev_attach(zpool_handle_t *zhp,
1591     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
1592 {
1593 	zfs_cmd_t zc = { 0 };
1594 	char msg[1024];
1595 	int ret;
1596 	nvlist_t *tgt;
1597 	boolean_t avail_spare, l2cache;
1598 	uint64_t val, is_log;
1599 	char *path, *newname;
1600 	nvlist_t **child;
1601 	uint_t children;
1602 	nvlist_t *config_root;
1603 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1604 
1605 	if (replacing)
1606 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1607 		    "cannot replace %s with %s"), old_disk, new_disk);
1608 	else
1609 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1610 		    "cannot attach %s to %s"), new_disk, old_disk);
1611 
1612 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1613 	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache)) == 0)
1614 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1615 
1616 	if (avail_spare)
1617 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1618 
1619 	if (l2cache)
1620 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1621 
1622 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1623 	zc.zc_cookie = replacing;
1624 
1625 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1626 	    &child, &children) != 0 || children != 1) {
1627 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1628 		    "new device must be a single disk"));
1629 		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
1630 	}
1631 
1632 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
1633 	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
1634 
1635 	if ((newname = zpool_vdev_name(NULL, NULL, child[0])) == NULL)
1636 		return (-1);
1637 
1638 	/*
1639 	 * If the target is a hot spare that has been swapped in, we can only
1640 	 * replace it with another hot spare.
1641 	 */
1642 	if (replacing &&
1643 	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
1644 	    (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache) == NULL ||
1645 	    !avail_spare) && is_replacing_spare(config_root, tgt, 1)) {
1646 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1647 		    "can only be replaced by another hot spare"));
1648 		free(newname);
1649 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1650 	}
1651 
1652 	/*
1653 	 * If we are attempting to replace a spare, it canot be applied to an
1654 	 * already spared device.
1655 	 */
1656 	if (replacing &&
1657 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1658 	    zpool_find_vdev(zhp, newname, &avail_spare, &l2cache) != NULL &&
1659 	    avail_spare && is_replacing_spare(config_root, tgt, 0)) {
1660 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1661 		    "device has already been replaced with a spare"));
1662 		free(newname);
1663 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1664 	}
1665 
1666 	free(newname);
1667 
1668 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1669 		return (-1);
1670 
1671 	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ATTACH, &zc);
1672 
1673 	zcmd_free_nvlists(&zc);
1674 
1675 	if (ret == 0)
1676 		return (0);
1677 
1678 	switch (errno) {
1679 	case ENOTSUP:
1680 		/*
1681 		 * Can't attach to or replace this type of vdev.
1682 		 */
1683 		if (replacing) {
1684 			is_log = B_FALSE;
1685 			(void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_LOG,
1686 			    &is_log);
1687 			if (is_log)
1688 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1689 				    "cannot replace a log with a spare"));
1690 			else
1691 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1692 				    "cannot replace a replacing device"));
1693 		} else {
1694 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1695 			    "can only attach to mirrors and top-level "
1696 			    "disks"));
1697 		}
1698 		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
1699 		break;
1700 
1701 	case EINVAL:
1702 		/*
1703 		 * The new device must be a single disk.
1704 		 */
1705 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1706 		    "new device must be a single disk"));
1707 		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
1708 		break;
1709 
1710 	case EBUSY:
1711 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
1712 		    new_disk);
1713 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1714 		break;
1715 
1716 	case EOVERFLOW:
1717 		/*
1718 		 * The new device is too small.
1719 		 */
1720 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1721 		    "device is too small"));
1722 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1723 		break;
1724 
1725 	case EDOM:
1726 		/*
1727 		 * The new device has a different alignment requirement.
1728 		 */
1729 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1730 		    "devices have different sector alignment"));
1731 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1732 		break;
1733 
1734 	case ENAMETOOLONG:
1735 		/*
1736 		 * The resulting top-level vdev spec won't fit in the label.
1737 		 */
1738 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
1739 		break;
1740 
1741 	default:
1742 		(void) zpool_standard_error(hdl, errno, msg);
1743 	}
1744 
1745 	return (-1);
1746 }
1747 
1748 /*
1749  * Detach the specified device.
1750  */
1751 int
1752 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
1753 {
1754 	zfs_cmd_t zc = { 0 };
1755 	char msg[1024];
1756 	nvlist_t *tgt;
1757 	boolean_t avail_spare, l2cache;
1758 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1759 
1760 	(void) snprintf(msg, sizeof (msg),
1761 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
1762 
1763 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1764 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == 0)
1765 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1766 
1767 	if (avail_spare)
1768 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1769 
1770 	if (l2cache)
1771 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1772 
1773 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1774 
1775 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
1776 		return (0);
1777 
1778 	switch (errno) {
1779 
1780 	case ENOTSUP:
1781 		/*
1782 		 * Can't detach from this type of vdev.
1783 		 */
1784 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
1785 		    "applicable to mirror and replacing vdevs"));
1786 		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
1787 		break;
1788 
1789 	case EBUSY:
1790 		/*
1791 		 * There are no other replicas of this device.
1792 		 */
1793 		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
1794 		break;
1795 
1796 	default:
1797 		(void) zpool_standard_error(hdl, errno, msg);
1798 	}
1799 
1800 	return (-1);
1801 }
1802 
1803 /*
1804  * Remove the given device.  Currently, this is supported only for hot spares
1805  * and level 2 cache devices.
1806  */
1807 int
1808 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
1809 {
1810 	zfs_cmd_t zc = { 0 };
1811 	char msg[1024];
1812 	nvlist_t *tgt;
1813 	boolean_t avail_spare, l2cache;
1814 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1815 
1816 	(void) snprintf(msg, sizeof (msg),
1817 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
1818 
1819 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1820 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == 0)
1821 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1822 
1823 	if (!avail_spare && !l2cache) {
1824 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1825 		    "only inactive hot spares or cache devices "
1826 		    "can be removed"));
1827 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1828 	}
1829 
1830 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1831 
1832 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
1833 		return (0);
1834 
1835 	return (zpool_standard_error(hdl, errno, msg));
1836 }
1837 
1838 /*
1839  * Clear the errors for the pool, or the particular device if specified.
1840  */
1841 int
1842 zpool_clear(zpool_handle_t *zhp, const char *path)
1843 {
1844 	zfs_cmd_t zc = { 0 };
1845 	char msg[1024];
1846 	nvlist_t *tgt;
1847 	boolean_t avail_spare, l2cache;
1848 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1849 
1850 	if (path)
1851 		(void) snprintf(msg, sizeof (msg),
1852 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1853 		    path);
1854 	else
1855 		(void) snprintf(msg, sizeof (msg),
1856 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1857 		    zhp->zpool_name);
1858 
1859 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1860 	if (path) {
1861 		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
1862 		    &l2cache)) == 0)
1863 			return (zfs_error(hdl, EZFS_NODEVICE, msg));
1864 
1865 		/*
1866 		 * Don't allow error clearing for hot spares.  Do allow
1867 		 * error clearing for l2cache devices.
1868 		 */
1869 		if (avail_spare)
1870 			return (zfs_error(hdl, EZFS_ISSPARE, msg));
1871 
1872 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
1873 		    &zc.zc_guid) == 0);
1874 	}
1875 
1876 	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
1877 		return (0);
1878 
1879 	return (zpool_standard_error(hdl, errno, msg));
1880 }
1881 
1882 /*
1883  * Similar to zpool_clear(), but takes a GUID (used by fmd).
1884  */
1885 int
1886 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
1887 {
1888 	zfs_cmd_t zc = { 0 };
1889 	char msg[1024];
1890 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1891 
1892 	(void) snprintf(msg, sizeof (msg),
1893 	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
1894 	    guid);
1895 
1896 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1897 	zc.zc_guid = guid;
1898 
1899 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
1900 		return (0);
1901 
1902 	return (zpool_standard_error(hdl, errno, msg));
1903 }
1904 
1905 /*
1906  * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
1907  * hierarchy.
1908  */
1909 int
1910 zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
1911     void *data)
1912 {
1913 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1914 	char (*paths)[MAXPATHLEN];
1915 	size_t size = 4;
1916 	int curr, fd, base, ret = 0;
1917 	DIR *dirp;
1918 	struct dirent *dp;
1919 	struct stat st;
1920 
1921 	if ((base = open("/dev/zvol/dsk", O_RDONLY)) < 0)
1922 		return (errno == ENOENT ? 0 : -1);
1923 
1924 	if (fstatat(base, zhp->zpool_name, &st, 0) != 0) {
1925 		int err = errno;
1926 		(void) close(base);
1927 		return (err == ENOENT ? 0 : -1);
1928 	}
1929 
1930 	/*
1931 	 * Oddly this wasn't a directory -- ignore that failure since we
1932 	 * know there are no links lower in the (non-existant) hierarchy.
1933 	 */
1934 	if (!S_ISDIR(st.st_mode)) {
1935 		(void) close(base);
1936 		return (0);
1937 	}
1938 
1939 	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
1940 		(void) close(base);
1941 		return (-1);
1942 	}
1943 
1944 	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
1945 	curr = 0;
1946 
1947 	while (curr >= 0) {
1948 		if (fstatat(base, paths[curr], &st, AT_SYMLINK_NOFOLLOW) != 0)
1949 			goto err;
1950 
1951 		if (S_ISDIR(st.st_mode)) {
1952 			if ((fd = openat(base, paths[curr], O_RDONLY)) < 0)
1953 				goto err;
1954 
1955 			if ((dirp = fdopendir(fd)) == NULL) {
1956 				(void) close(fd);
1957 				goto err;
1958 			}
1959 
1960 			while ((dp = readdir(dirp)) != NULL) {
1961 				if (dp->d_name[0] == '.')
1962 					continue;
1963 
1964 				if (curr + 1 == size) {
1965 					paths = zfs_realloc(hdl, paths,
1966 					    size * sizeof (paths[0]),
1967 					    size * 2 * sizeof (paths[0]));
1968 					if (paths == NULL) {
1969 						(void) closedir(dirp);
1970 						(void) close(fd);
1971 						goto err;
1972 					}
1973 
1974 					size *= 2;
1975 				}
1976 
1977 				(void) strlcpy(paths[curr + 1], paths[curr],
1978 				    sizeof (paths[curr + 1]));
1979 				(void) strlcat(paths[curr], "/",
1980 				    sizeof (paths[curr]));
1981 				(void) strlcat(paths[curr], dp->d_name,
1982 				    sizeof (paths[curr]));
1983 				curr++;
1984 			}
1985 
1986 			(void) closedir(dirp);
1987 
1988 		} else {
1989 			if ((ret = cb(paths[curr], data)) != 0)
1990 				break;
1991 		}
1992 
1993 		curr--;
1994 	}
1995 
1996 	free(paths);
1997 	(void) close(base);
1998 
1999 	return (ret);
2000 
2001 err:
2002 	free(paths);
2003 	(void) close(base);
2004 	return (-1);
2005 }
2006 
2007 typedef struct zvol_cb {
2008 	zpool_handle_t *zcb_pool;
2009 	boolean_t zcb_create;
2010 } zvol_cb_t;
2011 
2012 /*ARGSUSED*/
2013 static int
2014 do_zvol_create(zfs_handle_t *zhp, void *data)
2015 {
2016 	int ret = 0;
2017 
2018 	if (ZFS_IS_VOLUME(zhp)) {
2019 		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
2020 		ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL);
2021 	}
2022 
2023 	if (ret == 0)
2024 		ret = zfs_iter_filesystems(zhp, do_zvol_create, NULL);
2025 
2026 	zfs_close(zhp);
2027 
2028 	return (ret);
2029 }
2030 
2031 /*
2032  * Iterate over all zvols in the pool and make any necessary minor nodes.
2033  */
2034 int
2035 zpool_create_zvol_links(zpool_handle_t *zhp)
2036 {
2037 	zfs_handle_t *zfp;
2038 	int ret;
2039 
2040 	/*
2041 	 * If the pool is unavailable, just return success.
2042 	 */
2043 	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
2044 	    zhp->zpool_name)) == NULL)
2045 		return (0);
2046 
2047 	ret = zfs_iter_filesystems(zfp, do_zvol_create, NULL);
2048 
2049 	zfs_close(zfp);
2050 	return (ret);
2051 }
2052 
2053 static int
2054 do_zvol_remove(const char *dataset, void *data)
2055 {
2056 	zpool_handle_t *zhp = data;
2057 
2058 	return (zvol_remove_link(zhp->zpool_hdl, dataset));
2059 }
2060 
2061 /*
2062  * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
2063  * by examining the /dev links so that a corrupted pool doesn't impede this
2064  * operation.
2065  */
2066 int
2067 zpool_remove_zvol_links(zpool_handle_t *zhp)
2068 {
2069 	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
2070 }
2071 
2072 /*
2073  * Convert from a devid string to a path.
2074  */
2075 static char *
2076 devid_to_path(char *devid_str)
2077 {
2078 	ddi_devid_t devid;
2079 	char *minor;
2080 	char *path;
2081 	devid_nmlist_t *list = NULL;
2082 	int ret;
2083 
2084 	if (devid_str_decode(devid_str, &devid, &minor) != 0)
2085 		return (NULL);
2086 
2087 	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
2088 
2089 	devid_str_free(minor);
2090 	devid_free(devid);
2091 
2092 	if (ret != 0)
2093 		return (NULL);
2094 
2095 	if ((path = strdup(list[0].devname)) == NULL)
2096 		return (NULL);
2097 
2098 	devid_free_nmlist(list);
2099 
2100 	return (path);
2101 }
2102 
2103 /*
2104  * Convert from a path to a devid string.
2105  */
2106 static char *
2107 path_to_devid(const char *path)
2108 {
2109 	int fd;
2110 	ddi_devid_t devid;
2111 	char *minor, *ret;
2112 
2113 	if ((fd = open(path, O_RDONLY)) < 0)
2114 		return (NULL);
2115 
2116 	minor = NULL;
2117 	ret = NULL;
2118 	if (devid_get(fd, &devid) == 0) {
2119 		if (devid_get_minor_name(fd, &minor) == 0)
2120 			ret = devid_str_encode(devid, minor);
2121 		if (minor != NULL)
2122 			devid_str_free(minor);
2123 		devid_free(devid);
2124 	}
2125 	(void) close(fd);
2126 
2127 	return (ret);
2128 }
2129 
2130 /*
2131  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
2132  * ignore any failure here, since a common case is for an unprivileged user to
2133  * type 'zpool status', and we'll display the correct information anyway.
2134  */
2135 static void
2136 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
2137 {
2138 	zfs_cmd_t zc = { 0 };
2139 
2140 	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2141 	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
2142 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2143 	    &zc.zc_guid) == 0);
2144 
2145 	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
2146 }
2147 
2148 /*
2149  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
2150  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
2151  * We also check if this is a whole disk, in which case we strip off the
2152  * trailing 's0' slice name.
2153  *
2154  * This routine is also responsible for identifying when disks have been
2155  * reconfigured in a new location.  The kernel will have opened the device by
2156  * devid, but the path will still refer to the old location.  To catch this, we
2157  * first do a path -> devid translation (which is fast for the common case).  If
2158  * the devid matches, we're done.  If not, we do a reverse devid -> path
2159  * translation and issue the appropriate ioctl() to update the path of the vdev.
2160  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
2161  * of these checks.
2162  */
2163 char *
2164 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
2165 {
2166 	char *path, *devid;
2167 	uint64_t value;
2168 	char buf[64];
2169 	vdev_stat_t *vs;
2170 	uint_t vsc;
2171 
2172 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
2173 	    &value) == 0) {
2174 		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2175 		    &value) == 0);
2176 		(void) snprintf(buf, sizeof (buf), "%llu",
2177 		    (u_longlong_t)value);
2178 		path = buf;
2179 	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
2180 
2181 		/*
2182 		 * If the device is dead (faulted, offline, etc) then don't
2183 		 * bother opening it.  Otherwise we may be forcing the user to
2184 		 * open a misbehaving device, which can have undesirable
2185 		 * effects.
2186 		 */
2187 		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
2188 		    (uint64_t **)&vs, &vsc) != 0 ||
2189 		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
2190 		    zhp != NULL &&
2191 		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
2192 			/*
2193 			 * Determine if the current path is correct.
2194 			 */
2195 			char *newdevid = path_to_devid(path);
2196 
2197 			if (newdevid == NULL ||
2198 			    strcmp(devid, newdevid) != 0) {
2199 				char *newpath;
2200 
2201 				if ((newpath = devid_to_path(devid)) != NULL) {
2202 					/*
2203 					 * Update the path appropriately.
2204 					 */
2205 					set_path(zhp, nv, newpath);
2206 					if (nvlist_add_string(nv,
2207 					    ZPOOL_CONFIG_PATH, newpath) == 0)
2208 						verify(nvlist_lookup_string(nv,
2209 						    ZPOOL_CONFIG_PATH,
2210 						    &path) == 0);
2211 					free(newpath);
2212 				}
2213 			}
2214 
2215 			if (newdevid)
2216 				devid_str_free(newdevid);
2217 		}
2218 
2219 		if (strncmp(path, "/dev/dsk/", 9) == 0)
2220 			path += 9;
2221 
2222 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2223 		    &value) == 0 && value) {
2224 			char *tmp = zfs_strdup(hdl, path);
2225 			if (tmp == NULL)
2226 				return (NULL);
2227 			tmp[strlen(path) - 2] = '\0';
2228 			return (tmp);
2229 		}
2230 	} else {
2231 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
2232 
2233 		/*
2234 		 * If it's a raidz device, we need to stick in the parity level.
2235 		 */
2236 		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
2237 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
2238 			    &value) == 0);
2239 			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
2240 			    (u_longlong_t)value);
2241 			path = buf;
2242 		}
2243 	}
2244 
2245 	return (zfs_strdup(hdl, path));
2246 }
2247 
2248 static int
2249 zbookmark_compare(const void *a, const void *b)
2250 {
2251 	return (memcmp(a, b, sizeof (zbookmark_t)));
2252 }
2253 
2254 /*
2255  * Retrieve the persistent error log, uniquify the members, and return to the
2256  * caller.
2257  */
2258 int
2259 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
2260 {
2261 	zfs_cmd_t zc = { 0 };
2262 	uint64_t count;
2263 	zbookmark_t *zb = NULL;
2264 	int i;
2265 
2266 	/*
2267 	 * Retrieve the raw error list from the kernel.  If the number of errors
2268 	 * has increased, allocate more space and continue until we get the
2269 	 * entire list.
2270 	 */
2271 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
2272 	    &count) == 0);
2273 	if (count == 0)
2274 		return (0);
2275 	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
2276 	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
2277 		return (-1);
2278 	zc.zc_nvlist_dst_size = count;
2279 	(void) strcpy(zc.zc_name, zhp->zpool_name);
2280 	for (;;) {
2281 		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
2282 		    &zc) != 0) {
2283 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
2284 			if (errno == ENOMEM) {
2285 				count = zc.zc_nvlist_dst_size;
2286 				if ((zc.zc_nvlist_dst = (uintptr_t)
2287 				    zfs_alloc(zhp->zpool_hdl, count *
2288 				    sizeof (zbookmark_t))) == (uintptr_t)NULL)
2289 					return (-1);
2290 			} else {
2291 				return (-1);
2292 			}
2293 		} else {
2294 			break;
2295 		}
2296 	}
2297 
2298 	/*
2299 	 * Sort the resulting bookmarks.  This is a little confusing due to the
2300 	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
2301 	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
2302 	 * _not_ copied as part of the process.  So we point the start of our
2303 	 * array appropriate and decrement the total number of elements.
2304 	 */
2305 	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
2306 	    zc.zc_nvlist_dst_size;
2307 	count -= zc.zc_nvlist_dst_size;
2308 
2309 	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
2310 
2311 	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
2312 
2313 	/*
2314 	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
2315 	 */
2316 	for (i = 0; i < count; i++) {
2317 		nvlist_t *nv;
2318 
2319 		/* ignoring zb_blkid and zb_level for now */
2320 		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
2321 		    zb[i-1].zb_object == zb[i].zb_object)
2322 			continue;
2323 
2324 		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
2325 			goto nomem;
2326 		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
2327 		    zb[i].zb_objset) != 0) {
2328 			nvlist_free(nv);
2329 			goto nomem;
2330 		}
2331 		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
2332 		    zb[i].zb_object) != 0) {
2333 			nvlist_free(nv);
2334 			goto nomem;
2335 		}
2336 		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
2337 			nvlist_free(nv);
2338 			goto nomem;
2339 		}
2340 		nvlist_free(nv);
2341 	}
2342 
2343 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
2344 	return (0);
2345 
2346 nomem:
2347 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
2348 	return (no_memory(zhp->zpool_hdl));
2349 }
2350 
2351 /*
2352  * Upgrade a ZFS pool to the latest on-disk version.
2353  */
2354 int
2355 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
2356 {
2357 	zfs_cmd_t zc = { 0 };
2358 	libzfs_handle_t *hdl = zhp->zpool_hdl;
2359 
2360 	(void) strcpy(zc.zc_name, zhp->zpool_name);
2361 	zc.zc_cookie = new_version;
2362 
2363 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
2364 		return (zpool_standard_error_fmt(hdl, errno,
2365 		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
2366 		    zhp->zpool_name));
2367 	return (0);
2368 }
2369 
2370 void
2371 zpool_set_history_str(const char *subcommand, int argc, char **argv,
2372     char *history_str)
2373 {
2374 	int i;
2375 
2376 	(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
2377 	for (i = 1; i < argc; i++) {
2378 		if (strlen(history_str) + 1 + strlen(argv[i]) >
2379 		    HIS_MAX_RECORD_LEN)
2380 			break;
2381 		(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
2382 		(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
2383 	}
2384 }
2385 
2386 /*
2387  * Stage command history for logging.
2388  */
2389 int
2390 zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
2391 {
2392 	if (history_str == NULL)
2393 		return (EINVAL);
2394 
2395 	if (strlen(history_str) > HIS_MAX_RECORD_LEN)
2396 		return (EINVAL);
2397 
2398 	if (hdl->libzfs_log_str != NULL)
2399 		free(hdl->libzfs_log_str);
2400 
2401 	if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
2402 		return (no_memory(hdl));
2403 
2404 	return (0);
2405 }
2406 
2407 /*
2408  * Perform ioctl to get some command history of a pool.
2409  *
2410  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
2411  * logical offset of the history buffer to start reading from.
2412  *
2413  * Upon return, 'off' is the next logical offset to read from and
2414  * 'len' is the actual amount of bytes read into 'buf'.
2415  */
2416 static int
2417 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
2418 {
2419 	zfs_cmd_t zc = { 0 };
2420 	libzfs_handle_t *hdl = zhp->zpool_hdl;
2421 
2422 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2423 
2424 	zc.zc_history = (uint64_t)(uintptr_t)buf;
2425 	zc.zc_history_len = *len;
2426 	zc.zc_history_offset = *off;
2427 
2428 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
2429 		switch (errno) {
2430 		case EPERM:
2431 			return (zfs_error_fmt(hdl, EZFS_PERM,
2432 			    dgettext(TEXT_DOMAIN,
2433 			    "cannot show history for pool '%s'"),
2434 			    zhp->zpool_name));
2435 		case ENOENT:
2436 			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
2437 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
2438 			    "'%s'"), zhp->zpool_name));
2439 		case ENOTSUP:
2440 			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
2441 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
2442 			    "'%s', pool must be upgraded"), zhp->zpool_name));
2443 		default:
2444 			return (zpool_standard_error_fmt(hdl, errno,
2445 			    dgettext(TEXT_DOMAIN,
2446 			    "cannot get history for '%s'"), zhp->zpool_name));
2447 		}
2448 	}
2449 
2450 	*len = zc.zc_history_len;
2451 	*off = zc.zc_history_offset;
2452 
2453 	return (0);
2454 }
2455 
2456 /*
2457  * Process the buffer of nvlists, unpacking and storing each nvlist record
2458  * into 'records'.  'leftover' is set to the number of bytes that weren't
2459  * processed as there wasn't a complete record.
2460  */
2461 static int
2462 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
2463     nvlist_t ***records, uint_t *numrecords)
2464 {
2465 	uint64_t reclen;
2466 	nvlist_t *nv;
2467 	int i;
2468 
2469 	while (bytes_read > sizeof (reclen)) {
2470 
2471 		/* get length of packed record (stored as little endian) */
2472 		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
2473 			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
2474 
2475 		if (bytes_read < sizeof (reclen) + reclen)
2476 			break;
2477 
2478 		/* unpack record */
2479 		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
2480 			return (ENOMEM);
2481 		bytes_read -= sizeof (reclen) + reclen;
2482 		buf += sizeof (reclen) + reclen;
2483 
2484 		/* add record to nvlist array */
2485 		(*numrecords)++;
2486 		if (ISP2(*numrecords + 1)) {
2487 			*records = realloc(*records,
2488 			    *numrecords * 2 * sizeof (nvlist_t *));
2489 		}
2490 		(*records)[*numrecords - 1] = nv;
2491 	}
2492 
2493 	*leftover = bytes_read;
2494 	return (0);
2495 }
2496 
2497 #define	HIS_BUF_LEN	(128*1024)
2498 
2499 /*
2500  * Retrieve the command history of a pool.
2501  */
2502 int
2503 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
2504 {
2505 	char buf[HIS_BUF_LEN];
2506 	uint64_t off = 0;
2507 	nvlist_t **records = NULL;
2508 	uint_t numrecords = 0;
2509 	int err, i;
2510 
2511 	do {
2512 		uint64_t bytes_read = sizeof (buf);
2513 		uint64_t leftover;
2514 
2515 		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
2516 			break;
2517 
2518 		/* if nothing else was read in, we're at EOF, just return */
2519 		if (!bytes_read)
2520 			break;
2521 
2522 		if ((err = zpool_history_unpack(buf, bytes_read,
2523 		    &leftover, &records, &numrecords)) != 0)
2524 			break;
2525 		off -= leftover;
2526 
2527 		/* CONSTCOND */
2528 	} while (1);
2529 
2530 	if (!err) {
2531 		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
2532 		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
2533 		    records, numrecords) == 0);
2534 	}
2535 	for (i = 0; i < numrecords; i++)
2536 		nvlist_free(records[i]);
2537 	free(records);
2538 
2539 	return (err);
2540 }
2541 
2542 void
2543 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
2544     char *pathname, size_t len)
2545 {
2546 	zfs_cmd_t zc = { 0 };
2547 	boolean_t mounted = B_FALSE;
2548 	char *mntpnt = NULL;
2549 	char dsname[MAXNAMELEN];
2550 
2551 	if (dsobj == 0) {
2552 		/* special case for the MOS */
2553 		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
2554 		return;
2555 	}
2556 
2557 	/* get the dataset's name */
2558 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2559 	zc.zc_obj = dsobj;
2560 	if (ioctl(zhp->zpool_hdl->libzfs_fd,
2561 	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
2562 		/* just write out a path of two object numbers */
2563 		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
2564 		    dsobj, obj);
2565 		return;
2566 	}
2567 	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
2568 
2569 	/* find out if the dataset is mounted */
2570 	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
2571 
2572 	/* get the corrupted object's path */
2573 	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
2574 	zc.zc_obj = obj;
2575 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
2576 	    &zc) == 0) {
2577 		if (mounted) {
2578 			(void) snprintf(pathname, len, "%s%s", mntpnt,
2579 			    zc.zc_value);
2580 		} else {
2581 			(void) snprintf(pathname, len, "%s:%s",
2582 			    dsname, zc.zc_value);
2583 		}
2584 	} else {
2585 		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
2586 	}
2587 	free(mntpnt);
2588 }
2589 
2590 #define	RDISK_ROOT	"/dev/rdsk"
2591 #define	BACKUP_SLICE	"s2"
2592 /*
2593  * Don't start the slice at the default block of 34; many storage
2594  * devices will use a stripe width of 128k, so start there instead.
2595  */
2596 #define	NEW_START_BLOCK	256
2597 
2598 /*
2599  * Read the EFI label from the config, if a label does not exist then
2600  * pass back the error to the caller. If the caller has passed a non-NULL
2601  * diskaddr argument then we set it to the starting address of the EFI
2602  * partition.
2603  */
2604 static int
2605 read_efi_label(nvlist_t *config, diskaddr_t *sb)
2606 {
2607 	char *path;
2608 	int fd;
2609 	char diskname[MAXPATHLEN];
2610 	int err = -1;
2611 
2612 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
2613 		return (err);
2614 
2615 	(void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
2616 	    strrchr(path, '/'));
2617 	if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
2618 		struct dk_gpt *vtoc;
2619 
2620 		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
2621 			if (sb != NULL)
2622 				*sb = vtoc->efi_parts[0].p_start;
2623 			efi_free(vtoc);
2624 		}
2625 		(void) close(fd);
2626 	}
2627 	return (err);
2628 }
2629 
2630 /*
2631  * determine where a partition starts on a disk in the current
2632  * configuration
2633  */
2634 static diskaddr_t
2635 find_start_block(nvlist_t *config)
2636 {
2637 	nvlist_t **child;
2638 	uint_t c, children;
2639 	diskaddr_t sb = MAXOFFSET_T;
2640 	uint64_t wholedisk;
2641 
2642 	if (nvlist_lookup_nvlist_array(config,
2643 	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
2644 		if (nvlist_lookup_uint64(config,
2645 		    ZPOOL_CONFIG_WHOLE_DISK,
2646 		    &wholedisk) != 0 || !wholedisk) {
2647 			return (MAXOFFSET_T);
2648 		}
2649 		if (read_efi_label(config, &sb) < 0)
2650 			sb = MAXOFFSET_T;
2651 		return (sb);
2652 	}
2653 
2654 	for (c = 0; c < children; c++) {
2655 		sb = find_start_block(child[c]);
2656 		if (sb != MAXOFFSET_T) {
2657 			return (sb);
2658 		}
2659 	}
2660 	return (MAXOFFSET_T);
2661 }
2662 
2663 /*
2664  * Label an individual disk.  The name provided is the short name,
2665  * stripped of any leading /dev path.
2666  */
2667 int
2668 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
2669 {
2670 	char path[MAXPATHLEN];
2671 	struct dk_gpt *vtoc;
2672 	int fd;
2673 	size_t resv = EFI_MIN_RESV_SIZE;
2674 	uint64_t slice_size;
2675 	diskaddr_t start_block;
2676 	char errbuf[1024];
2677 
2678 	/* prepare an error message just in case */
2679 	(void) snprintf(errbuf, sizeof (errbuf),
2680 	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
2681 
2682 	if (zhp) {
2683 		nvlist_t *nvroot;
2684 
2685 		verify(nvlist_lookup_nvlist(zhp->zpool_config,
2686 		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2687 
2688 		if (zhp->zpool_start_block == 0)
2689 			start_block = find_start_block(nvroot);
2690 		else
2691 			start_block = zhp->zpool_start_block;
2692 		zhp->zpool_start_block = start_block;
2693 	} else {
2694 		/* new pool */
2695 		start_block = NEW_START_BLOCK;
2696 	}
2697 
2698 	(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
2699 	    BACKUP_SLICE);
2700 
2701 	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
2702 		/*
2703 		 * This shouldn't happen.  We've long since verified that this
2704 		 * is a valid device.
2705 		 */
2706 		zfs_error_aux(hdl,
2707 		    dgettext(TEXT_DOMAIN, "unable to open device"));
2708 		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2709 	}
2710 
2711 	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
2712 		/*
2713 		 * The only way this can fail is if we run out of memory, or we
2714 		 * were unable to read the disk's capacity
2715 		 */
2716 		if (errno == ENOMEM)
2717 			(void) no_memory(hdl);
2718 
2719 		(void) close(fd);
2720 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2721 		    "unable to read disk capacity"), name);
2722 
2723 		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2724 	}
2725 
2726 	slice_size = vtoc->efi_last_u_lba + 1;
2727 	slice_size -= EFI_MIN_RESV_SIZE;
2728 	if (start_block == MAXOFFSET_T)
2729 		start_block = NEW_START_BLOCK;
2730 	slice_size -= start_block;
2731 
2732 	vtoc->efi_parts[0].p_start = start_block;
2733 	vtoc->efi_parts[0].p_size = slice_size;
2734 
2735 	/*
2736 	 * Why we use V_USR: V_BACKUP confuses users, and is considered
2737 	 * disposable by some EFI utilities (since EFI doesn't have a backup
2738 	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
2739 	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
2740 	 * etc. were all pretty specific.  V_USR is as close to reality as we
2741 	 * can get, in the absence of V_OTHER.
2742 	 */
2743 	vtoc->efi_parts[0].p_tag = V_USR;
2744 	(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
2745 
2746 	vtoc->efi_parts[8].p_start = slice_size + start_block;
2747 	vtoc->efi_parts[8].p_size = resv;
2748 	vtoc->efi_parts[8].p_tag = V_RESERVED;
2749 
2750 	if (efi_write(fd, vtoc) != 0) {
2751 		/*
2752 		 * Some block drivers (like pcata) may not support EFI
2753 		 * GPT labels.  Print out a helpful error message dir-
2754 		 * ecting the user to manually label the disk and give
2755 		 * a specific slice.
2756 		 */
2757 		(void) close(fd);
2758 		efi_free(vtoc);
2759 
2760 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2761 		    "try using fdisk(1M) and then provide a specific slice"));
2762 		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
2763 	}
2764 
2765 	(void) close(fd);
2766 	efi_free(vtoc);
2767 	return (0);
2768 }
2769 
2770 static boolean_t
2771 supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
2772 {
2773 	char *type;
2774 	nvlist_t **child;
2775 	uint_t children, c;
2776 
2777 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
2778 	if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
2779 	    strcmp(type, VDEV_TYPE_FILE) == 0 ||
2780 	    strcmp(type, VDEV_TYPE_LOG) == 0 ||
2781 	    strcmp(type, VDEV_TYPE_MISSING) == 0) {
2782 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2783 		    "vdev type '%s' is not supported"), type);
2784 		(void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
2785 		return (B_FALSE);
2786 	}
2787 	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
2788 	    &child, &children) == 0) {
2789 		for (c = 0; c < children; c++) {
2790 			if (!supported_dump_vdev_type(hdl, child[c], errbuf))
2791 				return (B_FALSE);
2792 		}
2793 	}
2794 	return (B_TRUE);
2795 }
2796 
2797 /*
2798  * check if this zvol is allowable for use as a dump device; zero if
2799  * it is, > 0 if it isn't, < 0 if it isn't a zvol
2800  */
2801 int
2802 zvol_check_dump_config(char *arg)
2803 {
2804 	zpool_handle_t *zhp = NULL;
2805 	nvlist_t *config, *nvroot;
2806 	char *p, *volname;
2807 	nvlist_t **top;
2808 	uint_t toplevels;
2809 	libzfs_handle_t *hdl;
2810 	char errbuf[1024];
2811 	char poolname[ZPOOL_MAXNAMELEN];
2812 	int pathlen = strlen(ZVOL_FULL_DEV_DIR);
2813 	int ret = 1;
2814 
2815 	if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
2816 		return (-1);
2817 	}
2818 
2819 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2820 	    "dump is not supported on device '%s'"), arg);
2821 
2822 	if ((hdl = libzfs_init()) == NULL)
2823 		return (1);
2824 	libzfs_print_on_error(hdl, B_TRUE);
2825 
2826 	volname = arg + pathlen;
2827 
2828 	/* check the configuration of the pool */
2829 	if ((p = strchr(volname, '/')) == NULL) {
2830 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2831 		    "malformed dataset name"));
2832 		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
2833 		return (1);
2834 	} else if (p - volname >= ZFS_MAXNAMELEN) {
2835 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2836 		    "dataset name is too long"));
2837 		(void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
2838 		return (1);
2839 	} else {
2840 		(void) strncpy(poolname, volname, p - volname);
2841 		poolname[p - volname] = '\0';
2842 	}
2843 
2844 	if ((zhp = zpool_open(hdl, poolname)) == NULL) {
2845 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2846 		    "could not open pool '%s'"), poolname);
2847 		(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
2848 		goto out;
2849 	}
2850 	config = zpool_get_config(zhp, NULL);
2851 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2852 	    &nvroot) != 0) {
2853 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2854 		    "could not obtain vdev configuration for  '%s'"), poolname);
2855 		(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
2856 		goto out;
2857 	}
2858 
2859 	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2860 	    &top, &toplevels) == 0);
2861 	if (toplevels != 1) {
2862 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2863 		    "'%s' has multiple top level vdevs"), poolname);
2864 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
2865 		goto out;
2866 	}
2867 
2868 	if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
2869 		goto out;
2870 	}
2871 	ret = 0;
2872 
2873 out:
2874 	if (zhp)
2875 		zpool_close(zhp);
2876 	libzfs_fini(hdl);
2877 	return (ret);
2878 }
2879