xref: /titanic_52/usr/src/lib/libzfs/common/libzfs_pool.c (revision 968633ad8faee931821fd6b656eb0d96d4b186c0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <alloca.h>
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <devid.h>
32 #include <dirent.h>
33 #include <fcntl.h>
34 #include <libintl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <unistd.h>
39 #include <zone.h>
40 #include <sys/efi_partition.h>
41 #include <sys/vtoc.h>
42 #include <sys/zfs_ioctl.h>
43 #include <sys/zio.h>
44 #include <strings.h>
45 
46 #include "zfs_namecheck.h"
47 #include "zfs_prop.h"
48 #include "libzfs_impl.h"
49 
50 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
51 
52 /*
53  * ====================================================================
54  *   zpool property functions
55  * ====================================================================
56  */
57 
58 static int
59 zpool_get_all_props(zpool_handle_t *zhp)
60 {
61 	zfs_cmd_t zc = { 0 };
62 	libzfs_handle_t *hdl = zhp->zpool_hdl;
63 
64 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
65 
66 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
67 		return (-1);
68 
69 	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
70 		if (errno == ENOMEM) {
71 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
72 				zcmd_free_nvlists(&zc);
73 				return (-1);
74 			}
75 		} else {
76 			zcmd_free_nvlists(&zc);
77 			return (-1);
78 		}
79 	}
80 
81 	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
82 		zcmd_free_nvlists(&zc);
83 		return (-1);
84 	}
85 
86 	zcmd_free_nvlists(&zc);
87 
88 	return (0);
89 }
90 
91 static int
92 zpool_props_refresh(zpool_handle_t *zhp)
93 {
94 	nvlist_t *old_props;
95 
96 	old_props = zhp->zpool_props;
97 
98 	if (zpool_get_all_props(zhp) != 0)
99 		return (-1);
100 
101 	nvlist_free(old_props);
102 	return (0);
103 }
104 
105 static char *
106 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
107     zprop_source_t *src)
108 {
109 	nvlist_t *nv, *nvl;
110 	uint64_t ival;
111 	char *value;
112 	zprop_source_t source;
113 
114 	nvl = zhp->zpool_props;
115 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
116 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
117 		source = ival;
118 		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
119 	} else {
120 		source = ZPROP_SRC_DEFAULT;
121 		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
122 			value = "-";
123 	}
124 
125 	if (src)
126 		*src = source;
127 
128 	return (value);
129 }
130 
131 uint64_t
132 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
133 {
134 	nvlist_t *nv, *nvl;
135 	uint64_t value;
136 	zprop_source_t source;
137 
138 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
139 		/*
140 		 * zpool_get_all_props() has most likely failed because
141 		 * the pool is faulted, but if all we need is the top level
142 		 * vdev's guid then get it from the zhp config nvlist.
143 		 */
144 		if ((prop == ZPOOL_PROP_GUID) &&
145 		    (nvlist_lookup_nvlist(zhp->zpool_config,
146 		    ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
147 		    (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
148 		    == 0)) {
149 			return (value);
150 		}
151 		return (zpool_prop_default_numeric(prop));
152 	}
153 
154 	nvl = zhp->zpool_props;
155 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
156 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
157 		source = value;
158 		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
159 	} else {
160 		source = ZPROP_SRC_DEFAULT;
161 		value = zpool_prop_default_numeric(prop);
162 	}
163 
164 	if (src)
165 		*src = source;
166 
167 	return (value);
168 }
169 
170 /*
171  * Map VDEV STATE to printed strings.
172  */
173 char *
174 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
175 {
176 	switch (state) {
177 	case VDEV_STATE_CLOSED:
178 	case VDEV_STATE_OFFLINE:
179 		return (gettext("OFFLINE"));
180 	case VDEV_STATE_REMOVED:
181 		return (gettext("REMOVED"));
182 	case VDEV_STATE_CANT_OPEN:
183 		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
184 			return (gettext("FAULTED"));
185 		else
186 			return (gettext("UNAVAIL"));
187 	case VDEV_STATE_FAULTED:
188 		return (gettext("FAULTED"));
189 	case VDEV_STATE_DEGRADED:
190 		return (gettext("DEGRADED"));
191 	case VDEV_STATE_HEALTHY:
192 		return (gettext("ONLINE"));
193 	}
194 
195 	return (gettext("UNKNOWN"));
196 }
197 
198 /*
199  * Get a zpool property value for 'prop' and return the value in
200  * a pre-allocated buffer.
201  */
202 int
203 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
204     zprop_source_t *srctype)
205 {
206 	uint64_t intval;
207 	const char *strval;
208 	zprop_source_t src = ZPROP_SRC_NONE;
209 	nvlist_t *nvroot;
210 	vdev_stat_t *vs;
211 	uint_t vsc;
212 
213 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
214 		if (prop == ZPOOL_PROP_NAME)
215 			(void) strlcpy(buf, zpool_get_name(zhp), len);
216 		else if (prop == ZPOOL_PROP_HEALTH)
217 			(void) strlcpy(buf, "FAULTED", len);
218 		else
219 			(void) strlcpy(buf, "-", len);
220 		return (0);
221 	}
222 
223 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
224 	    prop != ZPOOL_PROP_NAME)
225 		return (-1);
226 
227 	switch (zpool_prop_get_type(prop)) {
228 	case PROP_TYPE_STRING:
229 		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
230 		    len);
231 		break;
232 
233 	case PROP_TYPE_NUMBER:
234 		intval = zpool_get_prop_int(zhp, prop, &src);
235 
236 		switch (prop) {
237 		case ZPOOL_PROP_SIZE:
238 		case ZPOOL_PROP_USED:
239 		case ZPOOL_PROP_AVAILABLE:
240 			(void) zfs_nicenum(intval, buf, len);
241 			break;
242 
243 		case ZPOOL_PROP_CAPACITY:
244 			(void) snprintf(buf, len, "%llu%%",
245 			    (u_longlong_t)intval);
246 			break;
247 
248 		case ZPOOL_PROP_HEALTH:
249 			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
250 			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
251 			verify(nvlist_lookup_uint64_array(nvroot,
252 			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
253 
254 			(void) strlcpy(buf, zpool_state_to_name(intval,
255 			    vs->vs_aux), len);
256 			break;
257 		default:
258 			(void) snprintf(buf, len, "%llu", intval);
259 		}
260 		break;
261 
262 	case PROP_TYPE_INDEX:
263 		intval = zpool_get_prop_int(zhp, prop, &src);
264 		if (zpool_prop_index_to_string(prop, intval, &strval)
265 		    != 0)
266 			return (-1);
267 		(void) strlcpy(buf, strval, len);
268 		break;
269 
270 	default:
271 		abort();
272 	}
273 
274 	if (srctype)
275 		*srctype = src;
276 
277 	return (0);
278 }
279 
280 /*
281  * Check if the bootfs name has the same pool name as it is set to.
282  * Assuming bootfs is a valid dataset name.
283  */
284 static boolean_t
285 bootfs_name_valid(const char *pool, char *bootfs)
286 {
287 	int len = strlen(pool);
288 
289 	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
290 		return (B_FALSE);
291 
292 	if (strncmp(pool, bootfs, len) == 0 &&
293 	    (bootfs[len] == '/' || bootfs[len] == '\0'))
294 		return (B_TRUE);
295 
296 	return (B_FALSE);
297 }
298 
299 /*
300  * Inspect the configuration to determine if any of the devices contain
301  * an EFI label.
302  */
303 static boolean_t
304 pool_uses_efi(nvlist_t *config)
305 {
306 	nvlist_t **child;
307 	uint_t c, children;
308 
309 	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
310 	    &child, &children) != 0)
311 		return (read_efi_label(config, NULL) >= 0);
312 
313 	for (c = 0; c < children; c++) {
314 		if (pool_uses_efi(child[c]))
315 			return (B_TRUE);
316 	}
317 	return (B_FALSE);
318 }
319 
320 /*
321  * Given an nvlist of zpool properties to be set, validate that they are
322  * correct, and parse any numeric properties (index, boolean, etc) if they are
323  * specified as strings.
324  */
325 static nvlist_t *
326 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
327     nvlist_t *props, uint64_t version, boolean_t create_or_import, char *errbuf)
328 {
329 	nvpair_t *elem;
330 	nvlist_t *retprops;
331 	zpool_prop_t prop;
332 	char *strval;
333 	uint64_t intval;
334 	char *slash;
335 	struct stat64 statbuf;
336 	zpool_handle_t *zhp;
337 	nvlist_t *nvroot;
338 
339 	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
340 		(void) no_memory(hdl);
341 		return (NULL);
342 	}
343 
344 	elem = NULL;
345 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
346 		const char *propname = nvpair_name(elem);
347 
348 		/*
349 		 * Make sure this property is valid and applies to this type.
350 		 */
351 		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
352 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
353 			    "invalid property '%s'"), propname);
354 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
355 			goto error;
356 		}
357 
358 		if (zpool_prop_readonly(prop)) {
359 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
360 			    "is readonly"), propname);
361 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
362 			goto error;
363 		}
364 
365 		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
366 		    &strval, &intval, errbuf) != 0)
367 			goto error;
368 
369 		/*
370 		 * Perform additional checking for specific properties.
371 		 */
372 		switch (prop) {
373 		case ZPOOL_PROP_VERSION:
374 			if (intval < version || intval > SPA_VERSION) {
375 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
376 				    "property '%s' number %d is invalid."),
377 				    propname, intval);
378 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
379 				goto error;
380 			}
381 			break;
382 
383 		case ZPOOL_PROP_BOOTFS:
384 			if (create_or_import) {
385 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
386 				    "property '%s' cannot be set at creation "
387 				    "or import time"), propname);
388 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
389 				goto error;
390 			}
391 
392 			if (version < SPA_VERSION_BOOTFS) {
393 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
394 				    "pool must be upgraded to support "
395 				    "'%s' property"), propname);
396 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
397 				goto error;
398 			}
399 
400 			/*
401 			 * bootfs property value has to be a dataset name and
402 			 * the dataset has to be in the same pool as it sets to.
403 			 */
404 			if (strval[0] != '\0' && !bootfs_name_valid(poolname,
405 			    strval)) {
406 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
407 				    "is an invalid name"), strval);
408 				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
409 				goto error;
410 			}
411 
412 			if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
413 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
414 				    "could not open pool '%s'"), poolname);
415 				(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
416 				goto error;
417 			}
418 			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
419 			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
420 
421 			/*
422 			 * bootfs property cannot be set on a disk which has
423 			 * been EFI labeled.
424 			 */
425 			if (pool_uses_efi(nvroot)) {
426 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
427 				    "property '%s' not supported on "
428 				    "EFI labeled devices"), propname);
429 				(void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
430 				zpool_close(zhp);
431 				goto error;
432 			}
433 			zpool_close(zhp);
434 			break;
435 
436 		case ZPOOL_PROP_ALTROOT:
437 			if (!create_or_import) {
438 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
439 				    "property '%s' can only be set during pool "
440 				    "creation or import"), propname);
441 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
442 				goto error;
443 			}
444 
445 			if (strval[0] != '/') {
446 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
447 				    "bad alternate root '%s'"), strval);
448 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
449 				goto error;
450 			}
451 			break;
452 
453 		case ZPOOL_PROP_CACHEFILE:
454 			if (strval[0] == '\0')
455 				break;
456 
457 			if (strcmp(strval, "none") == 0)
458 				break;
459 
460 			if (strval[0] != '/') {
461 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
462 				    "property '%s' must be empty, an "
463 				    "absolute path, or 'none'"), propname);
464 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
465 				goto error;
466 			}
467 
468 			slash = strrchr(strval, '/');
469 
470 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
471 			    strcmp(slash, "/..") == 0) {
472 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
473 				    "'%s' is not a valid file"), strval);
474 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
475 				goto error;
476 			}
477 
478 			*slash = '\0';
479 
480 			if (strval[0] != '\0' &&
481 			    (stat64(strval, &statbuf) != 0 ||
482 			    !S_ISDIR(statbuf.st_mode))) {
483 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
484 				    "'%s' is not a valid directory"),
485 				    strval);
486 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
487 				goto error;
488 			}
489 
490 			*slash = '/';
491 			break;
492 		}
493 	}
494 
495 	return (retprops);
496 error:
497 	nvlist_free(retprops);
498 	return (NULL);
499 }
500 
501 /*
502  * Set zpool property : propname=propval.
503  */
504 int
505 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
506 {
507 	zfs_cmd_t zc = { 0 };
508 	int ret = -1;
509 	char errbuf[1024];
510 	nvlist_t *nvl = NULL;
511 	nvlist_t *realprops;
512 	uint64_t version;
513 
514 	(void) snprintf(errbuf, sizeof (errbuf),
515 	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
516 	    zhp->zpool_name);
517 
518 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
519 		return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, errbuf));
520 
521 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
522 		return (no_memory(zhp->zpool_hdl));
523 
524 	if (nvlist_add_string(nvl, propname, propval) != 0) {
525 		nvlist_free(nvl);
526 		return (no_memory(zhp->zpool_hdl));
527 	}
528 
529 	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
530 	if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
531 	    zhp->zpool_name, nvl, version, B_FALSE, errbuf)) == NULL) {
532 		nvlist_free(nvl);
533 		return (-1);
534 	}
535 
536 	nvlist_free(nvl);
537 	nvl = realprops;
538 
539 	/*
540 	 * Execute the corresponding ioctl() to set this property.
541 	 */
542 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
543 
544 	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
545 		nvlist_free(nvl);
546 		return (-1);
547 	}
548 
549 	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
550 
551 	zcmd_free_nvlists(&zc);
552 	nvlist_free(nvl);
553 
554 	if (ret)
555 		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
556 	else
557 		(void) zpool_props_refresh(zhp);
558 
559 	return (ret);
560 }
561 
562 int
563 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
564 {
565 	libzfs_handle_t *hdl = zhp->zpool_hdl;
566 	zprop_list_t *entry;
567 	char buf[ZFS_MAXPROPLEN];
568 
569 	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
570 		return (-1);
571 
572 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
573 
574 		if (entry->pl_fixed)
575 			continue;
576 
577 		if (entry->pl_prop != ZPROP_INVAL &&
578 		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
579 		    NULL) == 0) {
580 			if (strlen(buf) > entry->pl_width)
581 				entry->pl_width = strlen(buf);
582 		}
583 	}
584 
585 	return (0);
586 }
587 
588 
589 /*
590  * Validate the given pool name, optionally putting an extended error message in
591  * 'buf'.
592  */
593 boolean_t
594 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
595 {
596 	namecheck_err_t why;
597 	char what;
598 	int ret;
599 
600 	ret = pool_namecheck(pool, &why, &what);
601 
602 	/*
603 	 * The rules for reserved pool names were extended at a later point.
604 	 * But we need to support users with existing pools that may now be
605 	 * invalid.  So we only check for this expanded set of names during a
606 	 * create (or import), and only in userland.
607 	 */
608 	if (ret == 0 && !isopen &&
609 	    (strncmp(pool, "mirror", 6) == 0 ||
610 	    strncmp(pool, "raidz", 5) == 0 ||
611 	    strncmp(pool, "spare", 5) == 0 ||
612 	    strcmp(pool, "log") == 0)) {
613 		if (hdl != NULL)
614 			zfs_error_aux(hdl,
615 			    dgettext(TEXT_DOMAIN, "name is reserved"));
616 		return (B_FALSE);
617 	}
618 
619 
620 	if (ret != 0) {
621 		if (hdl != NULL) {
622 			switch (why) {
623 			case NAME_ERR_TOOLONG:
624 				zfs_error_aux(hdl,
625 				    dgettext(TEXT_DOMAIN, "name is too long"));
626 				break;
627 
628 			case NAME_ERR_INVALCHAR:
629 				zfs_error_aux(hdl,
630 				    dgettext(TEXT_DOMAIN, "invalid character "
631 				    "'%c' in pool name"), what);
632 				break;
633 
634 			case NAME_ERR_NOLETTER:
635 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
636 				    "name must begin with a letter"));
637 				break;
638 
639 			case NAME_ERR_RESERVED:
640 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
641 				    "name is reserved"));
642 				break;
643 
644 			case NAME_ERR_DISKLIKE:
645 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
646 				    "pool name is reserved"));
647 				break;
648 
649 			case NAME_ERR_LEADING_SLASH:
650 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
651 				    "leading slash in name"));
652 				break;
653 
654 			case NAME_ERR_EMPTY_COMPONENT:
655 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
656 				    "empty component in name"));
657 				break;
658 
659 			case NAME_ERR_TRAILING_SLASH:
660 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
661 				    "trailing slash in name"));
662 				break;
663 
664 			case NAME_ERR_MULTIPLE_AT:
665 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
666 				    "multiple '@' delimiters in name"));
667 				break;
668 
669 			}
670 		}
671 		return (B_FALSE);
672 	}
673 
674 	return (B_TRUE);
675 }
676 
677 /*
678  * Open a handle to the given pool, even if the pool is currently in the FAULTED
679  * state.
680  */
681 zpool_handle_t *
682 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
683 {
684 	zpool_handle_t *zhp;
685 	boolean_t missing;
686 
687 	/*
688 	 * Make sure the pool name is valid.
689 	 */
690 	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
691 		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
692 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
693 		    pool);
694 		return (NULL);
695 	}
696 
697 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
698 		return (NULL);
699 
700 	zhp->zpool_hdl = hdl;
701 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
702 
703 	if (zpool_refresh_stats(zhp, &missing) != 0) {
704 		zpool_close(zhp);
705 		return (NULL);
706 	}
707 
708 	if (missing) {
709 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
710 		(void) zfs_error_fmt(hdl, EZFS_NOENT,
711 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
712 		zpool_close(zhp);
713 		return (NULL);
714 	}
715 
716 	return (zhp);
717 }
718 
719 /*
720  * Like the above, but silent on error.  Used when iterating over pools (because
721  * the configuration cache may be out of date).
722  */
723 int
724 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
725 {
726 	zpool_handle_t *zhp;
727 	boolean_t missing;
728 
729 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
730 		return (-1);
731 
732 	zhp->zpool_hdl = hdl;
733 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
734 
735 	if (zpool_refresh_stats(zhp, &missing) != 0) {
736 		zpool_close(zhp);
737 		return (-1);
738 	}
739 
740 	if (missing) {
741 		zpool_close(zhp);
742 		*ret = NULL;
743 		return (0);
744 	}
745 
746 	*ret = zhp;
747 	return (0);
748 }
749 
750 /*
751  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
752  * state.
753  */
754 zpool_handle_t *
755 zpool_open(libzfs_handle_t *hdl, const char *pool)
756 {
757 	zpool_handle_t *zhp;
758 
759 	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
760 		return (NULL);
761 
762 	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
763 		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
764 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
765 		zpool_close(zhp);
766 		return (NULL);
767 	}
768 
769 	return (zhp);
770 }
771 
772 /*
773  * Close the handle.  Simply frees the memory associated with the handle.
774  */
775 void
776 zpool_close(zpool_handle_t *zhp)
777 {
778 	if (zhp->zpool_config)
779 		nvlist_free(zhp->zpool_config);
780 	if (zhp->zpool_old_config)
781 		nvlist_free(zhp->zpool_old_config);
782 	if (zhp->zpool_props)
783 		nvlist_free(zhp->zpool_props);
784 	free(zhp);
785 }
786 
787 /*
788  * Return the name of the pool.
789  */
790 const char *
791 zpool_get_name(zpool_handle_t *zhp)
792 {
793 	return (zhp->zpool_name);
794 }
795 
796 
797 /*
798  * Return the state of the pool (ACTIVE or UNAVAILABLE)
799  */
800 int
801 zpool_get_state(zpool_handle_t *zhp)
802 {
803 	return (zhp->zpool_state);
804 }
805 
806 /*
807  * Create the named pool, using the provided vdev list.  It is assumed
808  * that the consumer has already validated the contents of the nvlist, so we
809  * don't have to worry about error semantics.
810  */
811 int
812 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
813     nvlist_t *props, nvlist_t *fsprops)
814 {
815 	zfs_cmd_t zc = { 0 };
816 	nvlist_t *zc_fsprops = NULL;
817 	nvlist_t *zc_props = NULL;
818 	char msg[1024];
819 	char *altroot;
820 	int ret = -1;
821 
822 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
823 	    "cannot create '%s'"), pool);
824 
825 	if (!zpool_name_valid(hdl, B_FALSE, pool))
826 		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
827 
828 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
829 		return (-1);
830 
831 	if (props) {
832 		if ((zc_props = zpool_valid_proplist(hdl, pool, props,
833 		    SPA_VERSION_1, B_TRUE, msg)) == NULL) {
834 			goto create_failed;
835 		}
836 	}
837 
838 	if (fsprops) {
839 		uint64_t zoned;
840 		char *zonestr;
841 
842 		zoned = ((nvlist_lookup_string(fsprops,
843 		    zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
844 		    strcmp(zonestr, "on") == 0);
845 
846 		if ((zc_fsprops = zfs_valid_proplist(hdl,
847 		    ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
848 			goto create_failed;
849 		}
850 		if (!zc_props &&
851 		    (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
852 			goto create_failed;
853 		}
854 		if (nvlist_add_nvlist(zc_props,
855 		    ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
856 			goto create_failed;
857 		}
858 	}
859 
860 	if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
861 		goto create_failed;
862 
863 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
864 
865 	if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
866 
867 		zcmd_free_nvlists(&zc);
868 		nvlist_free(zc_props);
869 		nvlist_free(zc_fsprops);
870 
871 		switch (errno) {
872 		case EBUSY:
873 			/*
874 			 * This can happen if the user has specified the same
875 			 * device multiple times.  We can't reliably detect this
876 			 * until we try to add it and see we already have a
877 			 * label.
878 			 */
879 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
880 			    "one or more vdevs refer to the same device"));
881 			return (zfs_error(hdl, EZFS_BADDEV, msg));
882 
883 		case EOVERFLOW:
884 			/*
885 			 * This occurs when one of the devices is below
886 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
887 			 * device was the problem device since there's no
888 			 * reliable way to determine device size from userland.
889 			 */
890 			{
891 				char buf[64];
892 
893 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
894 
895 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
896 				    "one or more devices is less than the "
897 				    "minimum size (%s)"), buf);
898 			}
899 			return (zfs_error(hdl, EZFS_BADDEV, msg));
900 
901 		case ENOSPC:
902 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
903 			    "one or more devices is out of space"));
904 			return (zfs_error(hdl, EZFS_BADDEV, msg));
905 
906 		case ENOTBLK:
907 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
908 			    "cache device must be a disk or disk slice"));
909 			return (zfs_error(hdl, EZFS_BADDEV, msg));
910 
911 		default:
912 			return (zpool_standard_error(hdl, errno, msg));
913 		}
914 	}
915 
916 	/*
917 	 * If this is an alternate root pool, then we automatically set the
918 	 * mountpoint of the root dataset to be '/'.
919 	 */
920 	if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
921 	    &altroot) == 0) {
922 		zfs_handle_t *zhp;
923 
924 		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
925 		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
926 		    "/") == 0);
927 
928 		zfs_close(zhp);
929 	}
930 
931 create_failed:
932 	zcmd_free_nvlists(&zc);
933 	nvlist_free(zc_props);
934 	nvlist_free(zc_fsprops);
935 	return (ret);
936 }
937 
938 /*
939  * Destroy the given pool.  It is up to the caller to ensure that there are no
940  * datasets left in the pool.
941  */
942 int
943 zpool_destroy(zpool_handle_t *zhp)
944 {
945 	zfs_cmd_t zc = { 0 };
946 	zfs_handle_t *zfp = NULL;
947 	libzfs_handle_t *hdl = zhp->zpool_hdl;
948 	char msg[1024];
949 
950 	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
951 	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
952 	    ZFS_TYPE_FILESYSTEM)) == NULL)
953 		return (-1);
954 
955 	if (zpool_remove_zvol_links(zhp) != 0)
956 		return (-1);
957 
958 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
959 
960 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
961 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
962 		    "cannot destroy '%s'"), zhp->zpool_name);
963 
964 		if (errno == EROFS) {
965 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
966 			    "one or more devices is read only"));
967 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
968 		} else {
969 			(void) zpool_standard_error(hdl, errno, msg);
970 		}
971 
972 		if (zfp)
973 			zfs_close(zfp);
974 		return (-1);
975 	}
976 
977 	if (zfp) {
978 		remove_mountpoint(zfp);
979 		zfs_close(zfp);
980 	}
981 
982 	return (0);
983 }
984 
985 /*
986  * Add the given vdevs to the pool.  The caller must have already performed the
987  * necessary verification to ensure that the vdev specification is well-formed.
988  */
989 int
990 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
991 {
992 	zfs_cmd_t zc = { 0 };
993 	int ret;
994 	libzfs_handle_t *hdl = zhp->zpool_hdl;
995 	char msg[1024];
996 	nvlist_t **spares, **l2cache;
997 	uint_t nspares, nl2cache;
998 
999 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1000 	    "cannot add to '%s'"), zhp->zpool_name);
1001 
1002 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1003 	    SPA_VERSION_SPARES &&
1004 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1005 	    &spares, &nspares) == 0) {
1006 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1007 		    "upgraded to add hot spares"));
1008 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1009 	}
1010 
1011 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1012 	    SPA_VERSION_L2CACHE &&
1013 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1014 	    &l2cache, &nl2cache) == 0) {
1015 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1016 		    "upgraded to add cache devices"));
1017 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1018 	}
1019 
1020 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1021 		return (-1);
1022 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1023 
1024 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1025 		switch (errno) {
1026 		case EBUSY:
1027 			/*
1028 			 * This can happen if the user has specified the same
1029 			 * device multiple times.  We can't reliably detect this
1030 			 * until we try to add it and see we already have a
1031 			 * label.
1032 			 */
1033 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1034 			    "one or more vdevs refer to the same device"));
1035 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1036 			break;
1037 
1038 		case EOVERFLOW:
1039 			/*
1040 			 * This occurrs when one of the devices is below
1041 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1042 			 * device was the problem device since there's no
1043 			 * reliable way to determine device size from userland.
1044 			 */
1045 			{
1046 				char buf[64];
1047 
1048 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1049 
1050 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1051 				    "device is less than the minimum "
1052 				    "size (%s)"), buf);
1053 			}
1054 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1055 			break;
1056 
1057 		case ENOTSUP:
1058 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1059 			    "pool must be upgraded to add these vdevs"));
1060 			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
1061 			break;
1062 
1063 		case EDOM:
1064 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1065 			    "root pool can not have multiple vdevs"
1066 			    " or separate logs"));
1067 			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1068 			break;
1069 
1070 		case ENOTBLK:
1071 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1072 			    "cache device must be a disk or disk slice"));
1073 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1074 			break;
1075 
1076 		default:
1077 			(void) zpool_standard_error(hdl, errno, msg);
1078 		}
1079 
1080 		ret = -1;
1081 	} else {
1082 		ret = 0;
1083 	}
1084 
1085 	zcmd_free_nvlists(&zc);
1086 
1087 	return (ret);
1088 }
1089 
1090 /*
1091  * Exports the pool from the system.  The caller must ensure that there are no
1092  * mounted datasets in the pool.
1093  */
1094 int
1095 zpool_export(zpool_handle_t *zhp, boolean_t force)
1096 {
1097 	zfs_cmd_t zc = { 0 };
1098 	char msg[1024];
1099 
1100 	if (zpool_remove_zvol_links(zhp) != 0)
1101 		return (-1);
1102 
1103 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1104 	    "cannot export '%s'"), zhp->zpool_name);
1105 
1106 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1107 	zc.zc_cookie = force;
1108 
1109 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1110 		switch (errno) {
1111 		case EXDEV:
1112 			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1113 			    "use '-f' to override the following errors:\n"
1114 			    "'%s' has an active shared spare which could be"
1115 			    " used by other pools once '%s' is exported."),
1116 			    zhp->zpool_name, zhp->zpool_name);
1117 			return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1118 			    msg));
1119 		default:
1120 			return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1121 			    msg));
1122 		}
1123 	}
1124 
1125 	return (0);
1126 }
1127 
1128 /*
1129  * zpool_import() is a contracted interface. Should be kept the same
1130  * if possible.
1131  *
1132  * Applications should use zpool_import_props() to import a pool with
1133  * new properties value to be set.
1134  */
1135 int
1136 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1137     char *altroot)
1138 {
1139 	nvlist_t *props = NULL;
1140 	int ret;
1141 
1142 	if (altroot != NULL) {
1143 		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1144 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1145 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1146 			    newname));
1147 		}
1148 
1149 		if (nvlist_add_string(props,
1150 		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0) {
1151 			nvlist_free(props);
1152 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1153 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1154 			    newname));
1155 		}
1156 	}
1157 
1158 	ret = zpool_import_props(hdl, config, newname, props, B_FALSE);
1159 	if (props)
1160 		nvlist_free(props);
1161 	return (ret);
1162 }
1163 
1164 /*
1165  * Import the given pool using the known configuration and a list of
1166  * properties to be set. The configuration should have come from
1167  * zpool_find_import(). The 'newname' parameters control whether the pool
1168  * is imported with a different name.
1169  */
1170 int
1171 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1172     nvlist_t *props, boolean_t importfaulted)
1173 {
1174 	zfs_cmd_t zc = { 0 };
1175 	char *thename;
1176 	char *origname;
1177 	int ret;
1178 	char errbuf[1024];
1179 
1180 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1181 	    &origname) == 0);
1182 
1183 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1184 	    "cannot import pool '%s'"), origname);
1185 
1186 	if (newname != NULL) {
1187 		if (!zpool_name_valid(hdl, B_FALSE, newname))
1188 			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1189 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1190 			    newname));
1191 		thename = (char *)newname;
1192 	} else {
1193 		thename = origname;
1194 	}
1195 
1196 	if (props) {
1197 		uint64_t version;
1198 
1199 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1200 		    &version) == 0);
1201 
1202 		if ((props = zpool_valid_proplist(hdl, origname,
1203 		    props, version, B_TRUE, errbuf)) == NULL) {
1204 			return (-1);
1205 		} else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1206 			nvlist_free(props);
1207 			return (-1);
1208 		}
1209 	}
1210 
1211 	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1212 
1213 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1214 	    &zc.zc_guid) == 0);
1215 
1216 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1217 		nvlist_free(props);
1218 		return (-1);
1219 	}
1220 
1221 	zc.zc_cookie = (uint64_t)importfaulted;
1222 	ret = 0;
1223 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
1224 		char desc[1024];
1225 		if (newname == NULL)
1226 			(void) snprintf(desc, sizeof (desc),
1227 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1228 			    thename);
1229 		else
1230 			(void) snprintf(desc, sizeof (desc),
1231 			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1232 			    origname, thename);
1233 
1234 		switch (errno) {
1235 		case ENOTSUP:
1236 			/*
1237 			 * Unsupported version.
1238 			 */
1239 			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
1240 			break;
1241 
1242 		case EINVAL:
1243 			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1244 			break;
1245 
1246 		default:
1247 			(void) zpool_standard_error(hdl, errno, desc);
1248 		}
1249 
1250 		ret = -1;
1251 	} else {
1252 		zpool_handle_t *zhp;
1253 
1254 		/*
1255 		 * This should never fail, but play it safe anyway.
1256 		 */
1257 		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
1258 			ret = -1;
1259 		} else if (zhp != NULL) {
1260 			ret = zpool_create_zvol_links(zhp);
1261 			zpool_close(zhp);
1262 		}
1263 
1264 	}
1265 
1266 	zcmd_free_nvlists(&zc);
1267 	nvlist_free(props);
1268 
1269 	return (ret);
1270 }
1271 
1272 /*
1273  * Scrub the pool.
1274  */
1275 int
1276 zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
1277 {
1278 	zfs_cmd_t zc = { 0 };
1279 	char msg[1024];
1280 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1281 
1282 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1283 	zc.zc_cookie = type;
1284 
1285 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0)
1286 		return (0);
1287 
1288 	(void) snprintf(msg, sizeof (msg),
1289 	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1290 
1291 	if (errno == EBUSY)
1292 		return (zfs_error(hdl, EZFS_RESILVERING, msg));
1293 	else
1294 		return (zpool_standard_error(hdl, errno, msg));
1295 }
1296 
1297 /*
1298  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1299  * spare; but FALSE if its an INUSE spare.
1300  */
1301 static nvlist_t *
1302 vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
1303     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
1304 {
1305 	uint_t c, children;
1306 	nvlist_t **child;
1307 	uint64_t theguid, present;
1308 	char *path;
1309 	uint64_t wholedisk = 0;
1310 	nvlist_t *ret;
1311 	uint64_t is_log;
1312 
1313 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
1314 
1315 	if (search == NULL &&
1316 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
1317 		/*
1318 		 * If the device has never been present since import, the only
1319 		 * reliable way to match the vdev is by GUID.
1320 		 */
1321 		if (theguid == guid)
1322 			return (nv);
1323 	} else if (search != NULL &&
1324 	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
1325 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1326 		    &wholedisk);
1327 		if (wholedisk) {
1328 			/*
1329 			 * For whole disks, the internal path has 's0', but the
1330 			 * path passed in by the user doesn't.
1331 			 */
1332 			if (strlen(search) == strlen(path) - 2 &&
1333 			    strncmp(search, path, strlen(search)) == 0)
1334 				return (nv);
1335 		} else if (strcmp(search, path) == 0) {
1336 			return (nv);
1337 		}
1338 	}
1339 
1340 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1341 	    &child, &children) != 0)
1342 		return (NULL);
1343 
1344 	for (c = 0; c < children; c++) {
1345 		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1346 		    avail_spare, l2cache, NULL)) != NULL) {
1347 			/*
1348 			 * The 'is_log' value is only set for the toplevel
1349 			 * vdev, not the leaf vdevs.  So we always lookup the
1350 			 * log device from the root of the vdev tree (where
1351 			 * 'log' is non-NULL).
1352 			 */
1353 			if (log != NULL &&
1354 			    nvlist_lookup_uint64(child[c],
1355 			    ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
1356 			    is_log) {
1357 				*log = B_TRUE;
1358 			}
1359 			return (ret);
1360 		}
1361 	}
1362 
1363 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1364 	    &child, &children) == 0) {
1365 		for (c = 0; c < children; c++) {
1366 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1367 			    avail_spare, l2cache, NULL)) != NULL) {
1368 				*avail_spare = B_TRUE;
1369 				return (ret);
1370 			}
1371 		}
1372 	}
1373 
1374 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1375 	    &child, &children) == 0) {
1376 		for (c = 0; c < children; c++) {
1377 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1378 			    avail_spare, l2cache, NULL)) != NULL) {
1379 				*l2cache = B_TRUE;
1380 				return (ret);
1381 			}
1382 		}
1383 	}
1384 
1385 	return (NULL);
1386 }
1387 
1388 nvlist_t *
1389 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
1390     boolean_t *l2cache, boolean_t *log)
1391 {
1392 	char buf[MAXPATHLEN];
1393 	const char *search;
1394 	char *end;
1395 	nvlist_t *nvroot;
1396 	uint64_t guid;
1397 
1398 	guid = strtoull(path, &end, 10);
1399 	if (guid != 0 && *end == '\0') {
1400 		search = NULL;
1401 	} else if (path[0] != '/') {
1402 		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
1403 		search = buf;
1404 	} else {
1405 		search = path;
1406 	}
1407 
1408 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1409 	    &nvroot) == 0);
1410 
1411 	*avail_spare = B_FALSE;
1412 	*l2cache = B_FALSE;
1413 	if (log != NULL)
1414 		*log = B_FALSE;
1415 	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
1416 	    l2cache, log));
1417 }
1418 
1419 static int
1420 vdev_online(nvlist_t *nv)
1421 {
1422 	uint64_t ival;
1423 
1424 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &ival) == 0 ||
1425 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED, &ival) == 0 ||
1426 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED, &ival) == 0)
1427 		return (0);
1428 
1429 	return (1);
1430 }
1431 
1432 /*
1433  * Get phys_path for a root pool
1434  * Return 0 on success; non-zeron on failure.
1435  */
1436 int
1437 zpool_get_physpath(zpool_handle_t *zhp, char *physpath)
1438 {
1439 	char bootfs[ZPOOL_MAXNAMELEN];
1440 	nvlist_t *vdev_root;
1441 	nvlist_t **child;
1442 	uint_t count;
1443 	int i;
1444 
1445 	/*
1446 	 * Make sure this is a root pool, as phys_path doesn't mean
1447 	 * anything to a non-root pool.
1448 	 */
1449 	if (zpool_get_prop(zhp, ZPOOL_PROP_BOOTFS, bootfs,
1450 	    sizeof (bootfs), NULL) != 0)
1451 		return (-1);
1452 
1453 	verify(nvlist_lookup_nvlist(zhp->zpool_config,
1454 	    ZPOOL_CONFIG_VDEV_TREE, &vdev_root) == 0);
1455 
1456 	if (nvlist_lookup_nvlist_array(vdev_root, ZPOOL_CONFIG_CHILDREN,
1457 	    &child, &count) != 0)
1458 		return (-2);
1459 
1460 	for (i = 0; i < count; i++) {
1461 		nvlist_t **child2;
1462 		uint_t count2;
1463 		char *type;
1464 		char *tmppath;
1465 		int j;
1466 
1467 		if (nvlist_lookup_string(child[i], ZPOOL_CONFIG_TYPE, &type)
1468 		    != 0)
1469 			return (-3);
1470 
1471 		if (strcmp(type, VDEV_TYPE_DISK) == 0) {
1472 			if (!vdev_online(child[i]))
1473 				return (-8);
1474 			verify(nvlist_lookup_string(child[i],
1475 			    ZPOOL_CONFIG_PHYS_PATH, &tmppath) == 0);
1476 			(void) strncpy(physpath, tmppath, strlen(tmppath));
1477 		} else if (strcmp(type, VDEV_TYPE_MIRROR) == 0) {
1478 			if (nvlist_lookup_nvlist_array(child[i],
1479 			    ZPOOL_CONFIG_CHILDREN, &child2, &count2) != 0)
1480 				return (-4);
1481 
1482 			for (j = 0; j < count2; j++) {
1483 				if (!vdev_online(child2[j]))
1484 					return (-8);
1485 				if (nvlist_lookup_string(child2[j],
1486 				    ZPOOL_CONFIG_PHYS_PATH, &tmppath) != 0)
1487 					return (-5);
1488 
1489 				if ((strlen(physpath) + strlen(tmppath)) >
1490 				    MAXNAMELEN)
1491 					return (-6);
1492 
1493 				if (strlen(physpath) == 0) {
1494 					(void) strncpy(physpath, tmppath,
1495 					    strlen(tmppath));
1496 				} else {
1497 					(void) strcat(physpath, " ");
1498 					(void) strcat(physpath, tmppath);
1499 				}
1500 			}
1501 		} else {
1502 			return (-7);
1503 		}
1504 	}
1505 
1506 	return (0);
1507 }
1508 
1509 /*
1510  * Returns TRUE if the given guid corresponds to the given type.
1511  * This is used to check for hot spares (INUSE or not), and level 2 cache
1512  * devices.
1513  */
1514 static boolean_t
1515 is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
1516 {
1517 	uint64_t target_guid;
1518 	nvlist_t *nvroot;
1519 	nvlist_t **list;
1520 	uint_t count;
1521 	int i;
1522 
1523 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1524 	    &nvroot) == 0);
1525 	if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) {
1526 		for (i = 0; i < count; i++) {
1527 			verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID,
1528 			    &target_guid) == 0);
1529 			if (guid == target_guid)
1530 				return (B_TRUE);
1531 		}
1532 	}
1533 
1534 	return (B_FALSE);
1535 }
1536 
1537 /*
1538  * Bring the specified vdev online.   The 'flags' parameter is a set of the
1539  * ZFS_ONLINE_* flags.
1540  */
1541 int
1542 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
1543     vdev_state_t *newstate)
1544 {
1545 	zfs_cmd_t zc = { 0 };
1546 	char msg[1024];
1547 	nvlist_t *tgt;
1548 	boolean_t avail_spare, l2cache;
1549 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1550 
1551 	(void) snprintf(msg, sizeof (msg),
1552 	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
1553 
1554 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1555 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1556 	    NULL)) == NULL)
1557 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1558 
1559 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1560 
1561 	if (avail_spare ||
1562 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1563 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1564 
1565 	zc.zc_cookie = VDEV_STATE_ONLINE;
1566 	zc.zc_obj = flags;
1567 
1568 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0)
1569 		return (zpool_standard_error(hdl, errno, msg));
1570 
1571 	*newstate = zc.zc_cookie;
1572 	return (0);
1573 }
1574 
1575 /*
1576  * Take the specified vdev offline
1577  */
1578 int
1579 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
1580 {
1581 	zfs_cmd_t zc = { 0 };
1582 	char msg[1024];
1583 	nvlist_t *tgt;
1584 	boolean_t avail_spare, l2cache;
1585 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1586 
1587 	(void) snprintf(msg, sizeof (msg),
1588 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
1589 
1590 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1591 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1592 	    NULL)) == NULL)
1593 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1594 
1595 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1596 
1597 	if (avail_spare ||
1598 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1599 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1600 
1601 	zc.zc_cookie = VDEV_STATE_OFFLINE;
1602 	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
1603 
1604 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1605 		return (0);
1606 
1607 	switch (errno) {
1608 	case EBUSY:
1609 
1610 		/*
1611 		 * There are no other replicas of this device.
1612 		 */
1613 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1614 
1615 	default:
1616 		return (zpool_standard_error(hdl, errno, msg));
1617 	}
1618 }
1619 
1620 /*
1621  * Mark the given vdev faulted.
1622  */
1623 int
1624 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
1625 {
1626 	zfs_cmd_t zc = { 0 };
1627 	char msg[1024];
1628 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1629 
1630 	(void) snprintf(msg, sizeof (msg),
1631 	    dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid);
1632 
1633 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1634 	zc.zc_guid = guid;
1635 	zc.zc_cookie = VDEV_STATE_FAULTED;
1636 
1637 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1638 		return (0);
1639 
1640 	switch (errno) {
1641 	case EBUSY:
1642 
1643 		/*
1644 		 * There are no other replicas of this device.
1645 		 */
1646 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1647 
1648 	default:
1649 		return (zpool_standard_error(hdl, errno, msg));
1650 	}
1651 
1652 }
1653 
1654 /*
1655  * Mark the given vdev degraded.
1656  */
1657 int
1658 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
1659 {
1660 	zfs_cmd_t zc = { 0 };
1661 	char msg[1024];
1662 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1663 
1664 	(void) snprintf(msg, sizeof (msg),
1665 	    dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid);
1666 
1667 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1668 	zc.zc_guid = guid;
1669 	zc.zc_cookie = VDEV_STATE_DEGRADED;
1670 
1671 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1672 		return (0);
1673 
1674 	return (zpool_standard_error(hdl, errno, msg));
1675 }
1676 
1677 /*
1678  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
1679  * a hot spare.
1680  */
1681 static boolean_t
1682 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
1683 {
1684 	nvlist_t **child;
1685 	uint_t c, children;
1686 	char *type;
1687 
1688 	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
1689 	    &children) == 0) {
1690 		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
1691 		    &type) == 0);
1692 
1693 		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
1694 		    children == 2 && child[which] == tgt)
1695 			return (B_TRUE);
1696 
1697 		for (c = 0; c < children; c++)
1698 			if (is_replacing_spare(child[c], tgt, which))
1699 				return (B_TRUE);
1700 	}
1701 
1702 	return (B_FALSE);
1703 }
1704 
1705 /*
1706  * Attach new_disk (fully described by nvroot) to old_disk.
1707  * If 'replacing' is specified, the new disk will replace the old one.
1708  */
1709 int
1710 zpool_vdev_attach(zpool_handle_t *zhp,
1711     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
1712 {
1713 	zfs_cmd_t zc = { 0 };
1714 	char msg[1024];
1715 	int ret;
1716 	nvlist_t *tgt;
1717 	boolean_t avail_spare, l2cache, islog;
1718 	uint64_t val;
1719 	char *path, *newname;
1720 	nvlist_t **child;
1721 	uint_t children;
1722 	nvlist_t *config_root;
1723 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1724 
1725 	if (replacing)
1726 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1727 		    "cannot replace %s with %s"), old_disk, new_disk);
1728 	else
1729 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1730 		    "cannot attach %s to %s"), new_disk, old_disk);
1731 
1732 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1733 	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
1734 	    &islog)) == 0)
1735 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1736 
1737 	if (avail_spare)
1738 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1739 
1740 	if (l2cache)
1741 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1742 
1743 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1744 	zc.zc_cookie = replacing;
1745 
1746 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1747 	    &child, &children) != 0 || children != 1) {
1748 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1749 		    "new device must be a single disk"));
1750 		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
1751 	}
1752 
1753 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
1754 	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
1755 
1756 	if ((newname = zpool_vdev_name(NULL, NULL, child[0])) == NULL)
1757 		return (-1);
1758 
1759 	/*
1760 	 * If the target is a hot spare that has been swapped in, we can only
1761 	 * replace it with another hot spare.
1762 	 */
1763 	if (replacing &&
1764 	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
1765 	    (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
1766 	    NULL) == NULL || !avail_spare) &&
1767 	    is_replacing_spare(config_root, tgt, 1)) {
1768 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1769 		    "can only be replaced by another hot spare"));
1770 		free(newname);
1771 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1772 	}
1773 
1774 	/*
1775 	 * If we are attempting to replace a spare, it canot be applied to an
1776 	 * already spared device.
1777 	 */
1778 	if (replacing &&
1779 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1780 	    zpool_find_vdev(zhp, newname, &avail_spare,
1781 	    &l2cache, NULL) != NULL && avail_spare &&
1782 	    is_replacing_spare(config_root, tgt, 0)) {
1783 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1784 		    "device has already been replaced with a spare"));
1785 		free(newname);
1786 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1787 	}
1788 
1789 	free(newname);
1790 
1791 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1792 		return (-1);
1793 
1794 	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ATTACH, &zc);
1795 
1796 	zcmd_free_nvlists(&zc);
1797 
1798 	if (ret == 0)
1799 		return (0);
1800 
1801 	switch (errno) {
1802 	case ENOTSUP:
1803 		/*
1804 		 * Can't attach to or replace this type of vdev.
1805 		 */
1806 		if (replacing) {
1807 			if (islog)
1808 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1809 				    "cannot replace a log with a spare"));
1810 			else
1811 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1812 				    "cannot replace a replacing device"));
1813 		} else {
1814 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1815 			    "can only attach to mirrors and top-level "
1816 			    "disks"));
1817 		}
1818 		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
1819 		break;
1820 
1821 	case EINVAL:
1822 		/*
1823 		 * The new device must be a single disk.
1824 		 */
1825 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1826 		    "new device must be a single disk"));
1827 		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
1828 		break;
1829 
1830 	case EBUSY:
1831 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
1832 		    new_disk);
1833 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1834 		break;
1835 
1836 	case EOVERFLOW:
1837 		/*
1838 		 * The new device is too small.
1839 		 */
1840 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1841 		    "device is too small"));
1842 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1843 		break;
1844 
1845 	case EDOM:
1846 		/*
1847 		 * The new device has a different alignment requirement.
1848 		 */
1849 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1850 		    "devices have different sector alignment"));
1851 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1852 		break;
1853 
1854 	case ENAMETOOLONG:
1855 		/*
1856 		 * The resulting top-level vdev spec won't fit in the label.
1857 		 */
1858 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
1859 		break;
1860 
1861 	default:
1862 		(void) zpool_standard_error(hdl, errno, msg);
1863 	}
1864 
1865 	return (-1);
1866 }
1867 
1868 /*
1869  * Detach the specified device.
1870  */
1871 int
1872 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
1873 {
1874 	zfs_cmd_t zc = { 0 };
1875 	char msg[1024];
1876 	nvlist_t *tgt;
1877 	boolean_t avail_spare, l2cache;
1878 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1879 
1880 	(void) snprintf(msg, sizeof (msg),
1881 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
1882 
1883 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1884 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1885 	    NULL)) == 0)
1886 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1887 
1888 	if (avail_spare)
1889 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1890 
1891 	if (l2cache)
1892 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1893 
1894 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1895 
1896 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
1897 		return (0);
1898 
1899 	switch (errno) {
1900 
1901 	case ENOTSUP:
1902 		/*
1903 		 * Can't detach from this type of vdev.
1904 		 */
1905 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
1906 		    "applicable to mirror and replacing vdevs"));
1907 		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
1908 		break;
1909 
1910 	case EBUSY:
1911 		/*
1912 		 * There are no other replicas of this device.
1913 		 */
1914 		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
1915 		break;
1916 
1917 	default:
1918 		(void) zpool_standard_error(hdl, errno, msg);
1919 	}
1920 
1921 	return (-1);
1922 }
1923 
1924 /*
1925  * Remove the given device.  Currently, this is supported only for hot spares
1926  * and level 2 cache devices.
1927  */
1928 int
1929 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
1930 {
1931 	zfs_cmd_t zc = { 0 };
1932 	char msg[1024];
1933 	nvlist_t *tgt;
1934 	boolean_t avail_spare, l2cache;
1935 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1936 
1937 	(void) snprintf(msg, sizeof (msg),
1938 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
1939 
1940 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1941 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1942 	    NULL)) == 0)
1943 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1944 
1945 	if (!avail_spare && !l2cache) {
1946 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1947 		    "only inactive hot spares or cache devices "
1948 		    "can be removed"));
1949 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1950 	}
1951 
1952 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1953 
1954 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
1955 		return (0);
1956 
1957 	return (zpool_standard_error(hdl, errno, msg));
1958 }
1959 
1960 /*
1961  * Clear the errors for the pool, or the particular device if specified.
1962  */
1963 int
1964 zpool_clear(zpool_handle_t *zhp, const char *path)
1965 {
1966 	zfs_cmd_t zc = { 0 };
1967 	char msg[1024];
1968 	nvlist_t *tgt;
1969 	boolean_t avail_spare, l2cache;
1970 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1971 
1972 	if (path)
1973 		(void) snprintf(msg, sizeof (msg),
1974 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1975 		    path);
1976 	else
1977 		(void) snprintf(msg, sizeof (msg),
1978 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1979 		    zhp->zpool_name);
1980 
1981 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1982 	if (path) {
1983 		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
1984 		    &l2cache, NULL)) == 0)
1985 			return (zfs_error(hdl, EZFS_NODEVICE, msg));
1986 
1987 		/*
1988 		 * Don't allow error clearing for hot spares.  Do allow
1989 		 * error clearing for l2cache devices.
1990 		 */
1991 		if (avail_spare)
1992 			return (zfs_error(hdl, EZFS_ISSPARE, msg));
1993 
1994 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
1995 		    &zc.zc_guid) == 0);
1996 	}
1997 
1998 	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
1999 		return (0);
2000 
2001 	return (zpool_standard_error(hdl, errno, msg));
2002 }
2003 
2004 /*
2005  * Similar to zpool_clear(), but takes a GUID (used by fmd).
2006  */
2007 int
2008 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
2009 {
2010 	zfs_cmd_t zc = { 0 };
2011 	char msg[1024];
2012 	libzfs_handle_t *hdl = zhp->zpool_hdl;
2013 
2014 	(void) snprintf(msg, sizeof (msg),
2015 	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
2016 	    guid);
2017 
2018 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2019 	zc.zc_guid = guid;
2020 
2021 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
2022 		return (0);
2023 
2024 	return (zpool_standard_error(hdl, errno, msg));
2025 }
2026 
2027 /*
2028  * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
2029  * hierarchy.
2030  */
2031 int
2032 zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
2033     void *data)
2034 {
2035 	libzfs_handle_t *hdl = zhp->zpool_hdl;
2036 	char (*paths)[MAXPATHLEN];
2037 	size_t size = 4;
2038 	int curr, fd, base, ret = 0;
2039 	DIR *dirp;
2040 	struct dirent *dp;
2041 	struct stat st;
2042 
2043 	if ((base = open("/dev/zvol/dsk", O_RDONLY)) < 0)
2044 		return (errno == ENOENT ? 0 : -1);
2045 
2046 	if (fstatat(base, zhp->zpool_name, &st, 0) != 0) {
2047 		int err = errno;
2048 		(void) close(base);
2049 		return (err == ENOENT ? 0 : -1);
2050 	}
2051 
2052 	/*
2053 	 * Oddly this wasn't a directory -- ignore that failure since we
2054 	 * know there are no links lower in the (non-existant) hierarchy.
2055 	 */
2056 	if (!S_ISDIR(st.st_mode)) {
2057 		(void) close(base);
2058 		return (0);
2059 	}
2060 
2061 	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
2062 		(void) close(base);
2063 		return (-1);
2064 	}
2065 
2066 	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
2067 	curr = 0;
2068 
2069 	while (curr >= 0) {
2070 		if (fstatat(base, paths[curr], &st, AT_SYMLINK_NOFOLLOW) != 0)
2071 			goto err;
2072 
2073 		if (S_ISDIR(st.st_mode)) {
2074 			if ((fd = openat(base, paths[curr], O_RDONLY)) < 0)
2075 				goto err;
2076 
2077 			if ((dirp = fdopendir(fd)) == NULL) {
2078 				(void) close(fd);
2079 				goto err;
2080 			}
2081 
2082 			while ((dp = readdir(dirp)) != NULL) {
2083 				if (dp->d_name[0] == '.')
2084 					continue;
2085 
2086 				if (curr + 1 == size) {
2087 					paths = zfs_realloc(hdl, paths,
2088 					    size * sizeof (paths[0]),
2089 					    size * 2 * sizeof (paths[0]));
2090 					if (paths == NULL) {
2091 						(void) closedir(dirp);
2092 						(void) close(fd);
2093 						goto err;
2094 					}
2095 
2096 					size *= 2;
2097 				}
2098 
2099 				(void) strlcpy(paths[curr + 1], paths[curr],
2100 				    sizeof (paths[curr + 1]));
2101 				(void) strlcat(paths[curr], "/",
2102 				    sizeof (paths[curr]));
2103 				(void) strlcat(paths[curr], dp->d_name,
2104 				    sizeof (paths[curr]));
2105 				curr++;
2106 			}
2107 
2108 			(void) closedir(dirp);
2109 
2110 		} else {
2111 			if ((ret = cb(paths[curr], data)) != 0)
2112 				break;
2113 		}
2114 
2115 		curr--;
2116 	}
2117 
2118 	free(paths);
2119 	(void) close(base);
2120 
2121 	return (ret);
2122 
2123 err:
2124 	free(paths);
2125 	(void) close(base);
2126 	return (-1);
2127 }
2128 
2129 typedef struct zvol_cb {
2130 	zpool_handle_t *zcb_pool;
2131 	boolean_t zcb_create;
2132 } zvol_cb_t;
2133 
2134 /*ARGSUSED*/
2135 static int
2136 do_zvol_create(zfs_handle_t *zhp, void *data)
2137 {
2138 	int ret = 0;
2139 
2140 	if (ZFS_IS_VOLUME(zhp)) {
2141 		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
2142 		ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL);
2143 	}
2144 
2145 	if (ret == 0)
2146 		ret = zfs_iter_filesystems(zhp, do_zvol_create, NULL);
2147 
2148 	zfs_close(zhp);
2149 
2150 	return (ret);
2151 }
2152 
2153 /*
2154  * Iterate over all zvols in the pool and make any necessary minor nodes.
2155  */
2156 int
2157 zpool_create_zvol_links(zpool_handle_t *zhp)
2158 {
2159 	zfs_handle_t *zfp;
2160 	int ret;
2161 
2162 	/*
2163 	 * If the pool is unavailable, just return success.
2164 	 */
2165 	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
2166 	    zhp->zpool_name)) == NULL)
2167 		return (0);
2168 
2169 	ret = zfs_iter_filesystems(zfp, do_zvol_create, NULL);
2170 
2171 	zfs_close(zfp);
2172 	return (ret);
2173 }
2174 
2175 static int
2176 do_zvol_remove(const char *dataset, void *data)
2177 {
2178 	zpool_handle_t *zhp = data;
2179 
2180 	return (zvol_remove_link(zhp->zpool_hdl, dataset));
2181 }
2182 
2183 /*
2184  * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
2185  * by examining the /dev links so that a corrupted pool doesn't impede this
2186  * operation.
2187  */
2188 int
2189 zpool_remove_zvol_links(zpool_handle_t *zhp)
2190 {
2191 	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
2192 }
2193 
2194 /*
2195  * Convert from a devid string to a path.
2196  */
2197 static char *
2198 devid_to_path(char *devid_str)
2199 {
2200 	ddi_devid_t devid;
2201 	char *minor;
2202 	char *path;
2203 	devid_nmlist_t *list = NULL;
2204 	int ret;
2205 
2206 	if (devid_str_decode(devid_str, &devid, &minor) != 0)
2207 		return (NULL);
2208 
2209 	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
2210 
2211 	devid_str_free(minor);
2212 	devid_free(devid);
2213 
2214 	if (ret != 0)
2215 		return (NULL);
2216 
2217 	if ((path = strdup(list[0].devname)) == NULL)
2218 		return (NULL);
2219 
2220 	devid_free_nmlist(list);
2221 
2222 	return (path);
2223 }
2224 
2225 /*
2226  * Convert from a path to a devid string.
2227  */
2228 static char *
2229 path_to_devid(const char *path)
2230 {
2231 	int fd;
2232 	ddi_devid_t devid;
2233 	char *minor, *ret;
2234 
2235 	if ((fd = open(path, O_RDONLY)) < 0)
2236 		return (NULL);
2237 
2238 	minor = NULL;
2239 	ret = NULL;
2240 	if (devid_get(fd, &devid) == 0) {
2241 		if (devid_get_minor_name(fd, &minor) == 0)
2242 			ret = devid_str_encode(devid, minor);
2243 		if (minor != NULL)
2244 			devid_str_free(minor);
2245 		devid_free(devid);
2246 	}
2247 	(void) close(fd);
2248 
2249 	return (ret);
2250 }
2251 
2252 /*
2253  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
2254  * ignore any failure here, since a common case is for an unprivileged user to
2255  * type 'zpool status', and we'll display the correct information anyway.
2256  */
2257 static void
2258 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
2259 {
2260 	zfs_cmd_t zc = { 0 };
2261 
2262 	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2263 	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
2264 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2265 	    &zc.zc_guid) == 0);
2266 
2267 	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
2268 }
2269 
2270 /*
2271  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
2272  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
2273  * We also check if this is a whole disk, in which case we strip off the
2274  * trailing 's0' slice name.
2275  *
2276  * This routine is also responsible for identifying when disks have been
2277  * reconfigured in a new location.  The kernel will have opened the device by
2278  * devid, but the path will still refer to the old location.  To catch this, we
2279  * first do a path -> devid translation (which is fast for the common case).  If
2280  * the devid matches, we're done.  If not, we do a reverse devid -> path
2281  * translation and issue the appropriate ioctl() to update the path of the vdev.
2282  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
2283  * of these checks.
2284  */
2285 char *
2286 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
2287 {
2288 	char *path, *devid;
2289 	uint64_t value;
2290 	char buf[64];
2291 	vdev_stat_t *vs;
2292 	uint_t vsc;
2293 
2294 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
2295 	    &value) == 0) {
2296 		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2297 		    &value) == 0);
2298 		(void) snprintf(buf, sizeof (buf), "%llu",
2299 		    (u_longlong_t)value);
2300 		path = buf;
2301 	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
2302 
2303 		/*
2304 		 * If the device is dead (faulted, offline, etc) then don't
2305 		 * bother opening it.  Otherwise we may be forcing the user to
2306 		 * open a misbehaving device, which can have undesirable
2307 		 * effects.
2308 		 */
2309 		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
2310 		    (uint64_t **)&vs, &vsc) != 0 ||
2311 		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
2312 		    zhp != NULL &&
2313 		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
2314 			/*
2315 			 * Determine if the current path is correct.
2316 			 */
2317 			char *newdevid = path_to_devid(path);
2318 
2319 			if (newdevid == NULL ||
2320 			    strcmp(devid, newdevid) != 0) {
2321 				char *newpath;
2322 
2323 				if ((newpath = devid_to_path(devid)) != NULL) {
2324 					/*
2325 					 * Update the path appropriately.
2326 					 */
2327 					set_path(zhp, nv, newpath);
2328 					if (nvlist_add_string(nv,
2329 					    ZPOOL_CONFIG_PATH, newpath) == 0)
2330 						verify(nvlist_lookup_string(nv,
2331 						    ZPOOL_CONFIG_PATH,
2332 						    &path) == 0);
2333 					free(newpath);
2334 				}
2335 			}
2336 
2337 			if (newdevid)
2338 				devid_str_free(newdevid);
2339 		}
2340 
2341 		if (strncmp(path, "/dev/dsk/", 9) == 0)
2342 			path += 9;
2343 
2344 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2345 		    &value) == 0 && value) {
2346 			char *tmp = zfs_strdup(hdl, path);
2347 			if (tmp == NULL)
2348 				return (NULL);
2349 			tmp[strlen(path) - 2] = '\0';
2350 			return (tmp);
2351 		}
2352 	} else {
2353 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
2354 
2355 		/*
2356 		 * If it's a raidz device, we need to stick in the parity level.
2357 		 */
2358 		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
2359 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
2360 			    &value) == 0);
2361 			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
2362 			    (u_longlong_t)value);
2363 			path = buf;
2364 		}
2365 	}
2366 
2367 	return (zfs_strdup(hdl, path));
2368 }
2369 
2370 static int
2371 zbookmark_compare(const void *a, const void *b)
2372 {
2373 	return (memcmp(a, b, sizeof (zbookmark_t)));
2374 }
2375 
2376 /*
2377  * Retrieve the persistent error log, uniquify the members, and return to the
2378  * caller.
2379  */
2380 int
2381 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
2382 {
2383 	zfs_cmd_t zc = { 0 };
2384 	uint64_t count;
2385 	zbookmark_t *zb = NULL;
2386 	int i;
2387 
2388 	/*
2389 	 * Retrieve the raw error list from the kernel.  If the number of errors
2390 	 * has increased, allocate more space and continue until we get the
2391 	 * entire list.
2392 	 */
2393 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
2394 	    &count) == 0);
2395 	if (count == 0)
2396 		return (0);
2397 	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
2398 	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
2399 		return (-1);
2400 	zc.zc_nvlist_dst_size = count;
2401 	(void) strcpy(zc.zc_name, zhp->zpool_name);
2402 	for (;;) {
2403 		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
2404 		    &zc) != 0) {
2405 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
2406 			if (errno == ENOMEM) {
2407 				count = zc.zc_nvlist_dst_size;
2408 				if ((zc.zc_nvlist_dst = (uintptr_t)
2409 				    zfs_alloc(zhp->zpool_hdl, count *
2410 				    sizeof (zbookmark_t))) == (uintptr_t)NULL)
2411 					return (-1);
2412 			} else {
2413 				return (-1);
2414 			}
2415 		} else {
2416 			break;
2417 		}
2418 	}
2419 
2420 	/*
2421 	 * Sort the resulting bookmarks.  This is a little confusing due to the
2422 	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
2423 	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
2424 	 * _not_ copied as part of the process.  So we point the start of our
2425 	 * array appropriate and decrement the total number of elements.
2426 	 */
2427 	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
2428 	    zc.zc_nvlist_dst_size;
2429 	count -= zc.zc_nvlist_dst_size;
2430 
2431 	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
2432 
2433 	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
2434 
2435 	/*
2436 	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
2437 	 */
2438 	for (i = 0; i < count; i++) {
2439 		nvlist_t *nv;
2440 
2441 		/* ignoring zb_blkid and zb_level for now */
2442 		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
2443 		    zb[i-1].zb_object == zb[i].zb_object)
2444 			continue;
2445 
2446 		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
2447 			goto nomem;
2448 		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
2449 		    zb[i].zb_objset) != 0) {
2450 			nvlist_free(nv);
2451 			goto nomem;
2452 		}
2453 		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
2454 		    zb[i].zb_object) != 0) {
2455 			nvlist_free(nv);
2456 			goto nomem;
2457 		}
2458 		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
2459 			nvlist_free(nv);
2460 			goto nomem;
2461 		}
2462 		nvlist_free(nv);
2463 	}
2464 
2465 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
2466 	return (0);
2467 
2468 nomem:
2469 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
2470 	return (no_memory(zhp->zpool_hdl));
2471 }
2472 
2473 /*
2474  * Upgrade a ZFS pool to the latest on-disk version.
2475  */
2476 int
2477 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
2478 {
2479 	zfs_cmd_t zc = { 0 };
2480 	libzfs_handle_t *hdl = zhp->zpool_hdl;
2481 
2482 	(void) strcpy(zc.zc_name, zhp->zpool_name);
2483 	zc.zc_cookie = new_version;
2484 
2485 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
2486 		return (zpool_standard_error_fmt(hdl, errno,
2487 		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
2488 		    zhp->zpool_name));
2489 	return (0);
2490 }
2491 
2492 void
2493 zpool_set_history_str(const char *subcommand, int argc, char **argv,
2494     char *history_str)
2495 {
2496 	int i;
2497 
2498 	(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
2499 	for (i = 1; i < argc; i++) {
2500 		if (strlen(history_str) + 1 + strlen(argv[i]) >
2501 		    HIS_MAX_RECORD_LEN)
2502 			break;
2503 		(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
2504 		(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
2505 	}
2506 }
2507 
2508 /*
2509  * Stage command history for logging.
2510  */
2511 int
2512 zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
2513 {
2514 	if (history_str == NULL)
2515 		return (EINVAL);
2516 
2517 	if (strlen(history_str) > HIS_MAX_RECORD_LEN)
2518 		return (EINVAL);
2519 
2520 	if (hdl->libzfs_log_str != NULL)
2521 		free(hdl->libzfs_log_str);
2522 
2523 	if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
2524 		return (no_memory(hdl));
2525 
2526 	return (0);
2527 }
2528 
2529 /*
2530  * Perform ioctl to get some command history of a pool.
2531  *
2532  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
2533  * logical offset of the history buffer to start reading from.
2534  *
2535  * Upon return, 'off' is the next logical offset to read from and
2536  * 'len' is the actual amount of bytes read into 'buf'.
2537  */
2538 static int
2539 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
2540 {
2541 	zfs_cmd_t zc = { 0 };
2542 	libzfs_handle_t *hdl = zhp->zpool_hdl;
2543 
2544 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2545 
2546 	zc.zc_history = (uint64_t)(uintptr_t)buf;
2547 	zc.zc_history_len = *len;
2548 	zc.zc_history_offset = *off;
2549 
2550 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
2551 		switch (errno) {
2552 		case EPERM:
2553 			return (zfs_error_fmt(hdl, EZFS_PERM,
2554 			    dgettext(TEXT_DOMAIN,
2555 			    "cannot show history for pool '%s'"),
2556 			    zhp->zpool_name));
2557 		case ENOENT:
2558 			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
2559 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
2560 			    "'%s'"), zhp->zpool_name));
2561 		case ENOTSUP:
2562 			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
2563 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
2564 			    "'%s', pool must be upgraded"), zhp->zpool_name));
2565 		default:
2566 			return (zpool_standard_error_fmt(hdl, errno,
2567 			    dgettext(TEXT_DOMAIN,
2568 			    "cannot get history for '%s'"), zhp->zpool_name));
2569 		}
2570 	}
2571 
2572 	*len = zc.zc_history_len;
2573 	*off = zc.zc_history_offset;
2574 
2575 	return (0);
2576 }
2577 
2578 /*
2579  * Process the buffer of nvlists, unpacking and storing each nvlist record
2580  * into 'records'.  'leftover' is set to the number of bytes that weren't
2581  * processed as there wasn't a complete record.
2582  */
2583 static int
2584 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
2585     nvlist_t ***records, uint_t *numrecords)
2586 {
2587 	uint64_t reclen;
2588 	nvlist_t *nv;
2589 	int i;
2590 
2591 	while (bytes_read > sizeof (reclen)) {
2592 
2593 		/* get length of packed record (stored as little endian) */
2594 		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
2595 			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
2596 
2597 		if (bytes_read < sizeof (reclen) + reclen)
2598 			break;
2599 
2600 		/* unpack record */
2601 		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
2602 			return (ENOMEM);
2603 		bytes_read -= sizeof (reclen) + reclen;
2604 		buf += sizeof (reclen) + reclen;
2605 
2606 		/* add record to nvlist array */
2607 		(*numrecords)++;
2608 		if (ISP2(*numrecords + 1)) {
2609 			*records = realloc(*records,
2610 			    *numrecords * 2 * sizeof (nvlist_t *));
2611 		}
2612 		(*records)[*numrecords - 1] = nv;
2613 	}
2614 
2615 	*leftover = bytes_read;
2616 	return (0);
2617 }
2618 
2619 #define	HIS_BUF_LEN	(128*1024)
2620 
2621 /*
2622  * Retrieve the command history of a pool.
2623  */
2624 int
2625 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
2626 {
2627 	char buf[HIS_BUF_LEN];
2628 	uint64_t off = 0;
2629 	nvlist_t **records = NULL;
2630 	uint_t numrecords = 0;
2631 	int err, i;
2632 
2633 	do {
2634 		uint64_t bytes_read = sizeof (buf);
2635 		uint64_t leftover;
2636 
2637 		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
2638 			break;
2639 
2640 		/* if nothing else was read in, we're at EOF, just return */
2641 		if (!bytes_read)
2642 			break;
2643 
2644 		if ((err = zpool_history_unpack(buf, bytes_read,
2645 		    &leftover, &records, &numrecords)) != 0)
2646 			break;
2647 		off -= leftover;
2648 
2649 		/* CONSTCOND */
2650 	} while (1);
2651 
2652 	if (!err) {
2653 		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
2654 		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
2655 		    records, numrecords) == 0);
2656 	}
2657 	for (i = 0; i < numrecords; i++)
2658 		nvlist_free(records[i]);
2659 	free(records);
2660 
2661 	return (err);
2662 }
2663 
2664 void
2665 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
2666     char *pathname, size_t len)
2667 {
2668 	zfs_cmd_t zc = { 0 };
2669 	boolean_t mounted = B_FALSE;
2670 	char *mntpnt = NULL;
2671 	char dsname[MAXNAMELEN];
2672 
2673 	if (dsobj == 0) {
2674 		/* special case for the MOS */
2675 		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
2676 		return;
2677 	}
2678 
2679 	/* get the dataset's name */
2680 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2681 	zc.zc_obj = dsobj;
2682 	if (ioctl(zhp->zpool_hdl->libzfs_fd,
2683 	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
2684 		/* just write out a path of two object numbers */
2685 		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
2686 		    dsobj, obj);
2687 		return;
2688 	}
2689 	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
2690 
2691 	/* find out if the dataset is mounted */
2692 	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
2693 
2694 	/* get the corrupted object's path */
2695 	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
2696 	zc.zc_obj = obj;
2697 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
2698 	    &zc) == 0) {
2699 		if (mounted) {
2700 			(void) snprintf(pathname, len, "%s%s", mntpnt,
2701 			    zc.zc_value);
2702 		} else {
2703 			(void) snprintf(pathname, len, "%s:%s",
2704 			    dsname, zc.zc_value);
2705 		}
2706 	} else {
2707 		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
2708 	}
2709 	free(mntpnt);
2710 }
2711 
2712 #define	RDISK_ROOT	"/dev/rdsk"
2713 #define	BACKUP_SLICE	"s2"
2714 /*
2715  * Don't start the slice at the default block of 34; many storage
2716  * devices will use a stripe width of 128k, so start there instead.
2717  */
2718 #define	NEW_START_BLOCK	256
2719 
2720 /*
2721  * Read the EFI label from the config, if a label does not exist then
2722  * pass back the error to the caller. If the caller has passed a non-NULL
2723  * diskaddr argument then we set it to the starting address of the EFI
2724  * partition.
2725  */
2726 static int
2727 read_efi_label(nvlist_t *config, diskaddr_t *sb)
2728 {
2729 	char *path;
2730 	int fd;
2731 	char diskname[MAXPATHLEN];
2732 	int err = -1;
2733 
2734 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
2735 		return (err);
2736 
2737 	(void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
2738 	    strrchr(path, '/'));
2739 	if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
2740 		struct dk_gpt *vtoc;
2741 
2742 		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
2743 			if (sb != NULL)
2744 				*sb = vtoc->efi_parts[0].p_start;
2745 			efi_free(vtoc);
2746 		}
2747 		(void) close(fd);
2748 	}
2749 	return (err);
2750 }
2751 
2752 /*
2753  * determine where a partition starts on a disk in the current
2754  * configuration
2755  */
2756 static diskaddr_t
2757 find_start_block(nvlist_t *config)
2758 {
2759 	nvlist_t **child;
2760 	uint_t c, children;
2761 	diskaddr_t sb = MAXOFFSET_T;
2762 	uint64_t wholedisk;
2763 
2764 	if (nvlist_lookup_nvlist_array(config,
2765 	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
2766 		if (nvlist_lookup_uint64(config,
2767 		    ZPOOL_CONFIG_WHOLE_DISK,
2768 		    &wholedisk) != 0 || !wholedisk) {
2769 			return (MAXOFFSET_T);
2770 		}
2771 		if (read_efi_label(config, &sb) < 0)
2772 			sb = MAXOFFSET_T;
2773 		return (sb);
2774 	}
2775 
2776 	for (c = 0; c < children; c++) {
2777 		sb = find_start_block(child[c]);
2778 		if (sb != MAXOFFSET_T) {
2779 			return (sb);
2780 		}
2781 	}
2782 	return (MAXOFFSET_T);
2783 }
2784 
2785 /*
2786  * Label an individual disk.  The name provided is the short name,
2787  * stripped of any leading /dev path.
2788  */
2789 int
2790 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
2791 {
2792 	char path[MAXPATHLEN];
2793 	struct dk_gpt *vtoc;
2794 	int fd;
2795 	size_t resv = EFI_MIN_RESV_SIZE;
2796 	uint64_t slice_size;
2797 	diskaddr_t start_block;
2798 	char errbuf[1024];
2799 
2800 	/* prepare an error message just in case */
2801 	(void) snprintf(errbuf, sizeof (errbuf),
2802 	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
2803 
2804 	if (zhp) {
2805 		nvlist_t *nvroot;
2806 
2807 		verify(nvlist_lookup_nvlist(zhp->zpool_config,
2808 		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2809 
2810 		if (zhp->zpool_start_block == 0)
2811 			start_block = find_start_block(nvroot);
2812 		else
2813 			start_block = zhp->zpool_start_block;
2814 		zhp->zpool_start_block = start_block;
2815 	} else {
2816 		/* new pool */
2817 		start_block = NEW_START_BLOCK;
2818 	}
2819 
2820 	(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
2821 	    BACKUP_SLICE);
2822 
2823 	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
2824 		/*
2825 		 * This shouldn't happen.  We've long since verified that this
2826 		 * is a valid device.
2827 		 */
2828 		zfs_error_aux(hdl,
2829 		    dgettext(TEXT_DOMAIN, "unable to open device"));
2830 		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2831 	}
2832 
2833 	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
2834 		/*
2835 		 * The only way this can fail is if we run out of memory, or we
2836 		 * were unable to read the disk's capacity
2837 		 */
2838 		if (errno == ENOMEM)
2839 			(void) no_memory(hdl);
2840 
2841 		(void) close(fd);
2842 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2843 		    "unable to read disk capacity"), name);
2844 
2845 		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2846 	}
2847 
2848 	slice_size = vtoc->efi_last_u_lba + 1;
2849 	slice_size -= EFI_MIN_RESV_SIZE;
2850 	if (start_block == MAXOFFSET_T)
2851 		start_block = NEW_START_BLOCK;
2852 	slice_size -= start_block;
2853 
2854 	vtoc->efi_parts[0].p_start = start_block;
2855 	vtoc->efi_parts[0].p_size = slice_size;
2856 
2857 	/*
2858 	 * Why we use V_USR: V_BACKUP confuses users, and is considered
2859 	 * disposable by some EFI utilities (since EFI doesn't have a backup
2860 	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
2861 	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
2862 	 * etc. were all pretty specific.  V_USR is as close to reality as we
2863 	 * can get, in the absence of V_OTHER.
2864 	 */
2865 	vtoc->efi_parts[0].p_tag = V_USR;
2866 	(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
2867 
2868 	vtoc->efi_parts[8].p_start = slice_size + start_block;
2869 	vtoc->efi_parts[8].p_size = resv;
2870 	vtoc->efi_parts[8].p_tag = V_RESERVED;
2871 
2872 	if (efi_write(fd, vtoc) != 0) {
2873 		/*
2874 		 * Some block drivers (like pcata) may not support EFI
2875 		 * GPT labels.  Print out a helpful error message dir-
2876 		 * ecting the user to manually label the disk and give
2877 		 * a specific slice.
2878 		 */
2879 		(void) close(fd);
2880 		efi_free(vtoc);
2881 
2882 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2883 		    "try using fdisk(1M) and then provide a specific slice"));
2884 		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
2885 	}
2886 
2887 	(void) close(fd);
2888 	efi_free(vtoc);
2889 	return (0);
2890 }
2891 
2892 static boolean_t
2893 supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
2894 {
2895 	char *type;
2896 	nvlist_t **child;
2897 	uint_t children, c;
2898 
2899 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
2900 	if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
2901 	    strcmp(type, VDEV_TYPE_FILE) == 0 ||
2902 	    strcmp(type, VDEV_TYPE_LOG) == 0 ||
2903 	    strcmp(type, VDEV_TYPE_MISSING) == 0) {
2904 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2905 		    "vdev type '%s' is not supported"), type);
2906 		(void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
2907 		return (B_FALSE);
2908 	}
2909 	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
2910 	    &child, &children) == 0) {
2911 		for (c = 0; c < children; c++) {
2912 			if (!supported_dump_vdev_type(hdl, child[c], errbuf))
2913 				return (B_FALSE);
2914 		}
2915 	}
2916 	return (B_TRUE);
2917 }
2918 
2919 /*
2920  * check if this zvol is allowable for use as a dump device; zero if
2921  * it is, > 0 if it isn't, < 0 if it isn't a zvol
2922  */
2923 int
2924 zvol_check_dump_config(char *arg)
2925 {
2926 	zpool_handle_t *zhp = NULL;
2927 	nvlist_t *config, *nvroot;
2928 	char *p, *volname;
2929 	nvlist_t **top;
2930 	uint_t toplevels;
2931 	libzfs_handle_t *hdl;
2932 	char errbuf[1024];
2933 	char poolname[ZPOOL_MAXNAMELEN];
2934 	int pathlen = strlen(ZVOL_FULL_DEV_DIR);
2935 	int ret = 1;
2936 
2937 	if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
2938 		return (-1);
2939 	}
2940 
2941 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2942 	    "dump is not supported on device '%s'"), arg);
2943 
2944 	if ((hdl = libzfs_init()) == NULL)
2945 		return (1);
2946 	libzfs_print_on_error(hdl, B_TRUE);
2947 
2948 	volname = arg + pathlen;
2949 
2950 	/* check the configuration of the pool */
2951 	if ((p = strchr(volname, '/')) == NULL) {
2952 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2953 		    "malformed dataset name"));
2954 		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
2955 		return (1);
2956 	} else if (p - volname >= ZFS_MAXNAMELEN) {
2957 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2958 		    "dataset name is too long"));
2959 		(void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
2960 		return (1);
2961 	} else {
2962 		(void) strncpy(poolname, volname, p - volname);
2963 		poolname[p - volname] = '\0';
2964 	}
2965 
2966 	if ((zhp = zpool_open(hdl, poolname)) == NULL) {
2967 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2968 		    "could not open pool '%s'"), poolname);
2969 		(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
2970 		goto out;
2971 	}
2972 	config = zpool_get_config(zhp, NULL);
2973 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2974 	    &nvroot) != 0) {
2975 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2976 		    "could not obtain vdev configuration for  '%s'"), poolname);
2977 		(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
2978 		goto out;
2979 	}
2980 
2981 	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2982 	    &top, &toplevels) == 0);
2983 	if (toplevels != 1) {
2984 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2985 		    "'%s' has multiple top level vdevs"), poolname);
2986 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
2987 		goto out;
2988 	}
2989 
2990 	if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
2991 		goto out;
2992 	}
2993 	ret = 0;
2994 
2995 out:
2996 	if (zhp)
2997 		zpool_close(zhp);
2998 	libzfs_fini(hdl);
2999 	return (ret);
3000 }
3001