xref: /titanic_52/usr/src/lib/libzfs/common/libzfs_pool.c (revision fa94a07fd0519b8abfd871ad8fe60e6bebe1e2bb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <alloca.h>
30 #include <assert.h>
31 #include <ctype.h>
32 #include <errno.h>
33 #include <devid.h>
34 #include <dirent.h>
35 #include <fcntl.h>
36 #include <libintl.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <strings.h>
40 #include <unistd.h>
41 #include <sys/efi_partition.h>
42 #include <sys/vtoc.h>
43 #include <sys/zfs_ioctl.h>
44 #include <sys/zio.h>
45 #include <strings.h>
46 
47 #include "zfs_namecheck.h"
48 #include "zfs_prop.h"
49 #include "libzfs_impl.h"
50 
51 
52 /*
53  * ====================================================================
54  *   zpool property functions
55  * ====================================================================
56  */
57 
58 static int
59 zpool_get_all_props(zpool_handle_t *zhp)
60 {
61 	zfs_cmd_t zc = { 0 };
62 	libzfs_handle_t *hdl = zhp->zpool_hdl;
63 
64 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
65 
66 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
67 		return (-1);
68 
69 	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
70 		if (errno == ENOMEM) {
71 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
72 				zcmd_free_nvlists(&zc);
73 				return (-1);
74 			}
75 		} else {
76 			zcmd_free_nvlists(&zc);
77 			return (-1);
78 		}
79 	}
80 
81 	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
82 		zcmd_free_nvlists(&zc);
83 		return (-1);
84 	}
85 
86 	zcmd_free_nvlists(&zc);
87 
88 	return (0);
89 }
90 
91 static int
92 zpool_props_refresh(zpool_handle_t *zhp)
93 {
94 	nvlist_t *old_props;
95 
96 	old_props = zhp->zpool_props;
97 
98 	if (zpool_get_all_props(zhp) != 0)
99 		return (-1);
100 
101 	nvlist_free(old_props);
102 	return (0);
103 }
104 
105 static char *
106 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
107     zprop_source_t *src)
108 {
109 	nvlist_t *nv, *nvl;
110 	uint64_t ival;
111 	char *value;
112 	zprop_source_t source;
113 
114 	nvl = zhp->zpool_props;
115 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
116 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
117 		source = ival;
118 		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
119 	} else {
120 		source = ZPROP_SRC_DEFAULT;
121 		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
122 			value = "-";
123 	}
124 
125 	if (src)
126 		*src = source;
127 
128 	return (value);
129 }
130 
131 uint64_t
132 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
133 {
134 	nvlist_t *nv, *nvl;
135 	uint64_t value;
136 	zprop_source_t source;
137 
138 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
139 		return (zpool_prop_default_numeric(prop));
140 
141 	nvl = zhp->zpool_props;
142 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
143 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
144 		source = value;
145 		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
146 	} else {
147 		source = ZPROP_SRC_DEFAULT;
148 		value = zpool_prop_default_numeric(prop);
149 	}
150 
151 	if (src)
152 		*src = source;
153 
154 	return (value);
155 }
156 
157 /*
158  * Map VDEV STATE to printed strings.
159  */
160 char *
161 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
162 {
163 	switch (state) {
164 	case VDEV_STATE_CLOSED:
165 	case VDEV_STATE_OFFLINE:
166 		return (gettext("OFFLINE"));
167 	case VDEV_STATE_REMOVED:
168 		return (gettext("REMOVED"));
169 	case VDEV_STATE_CANT_OPEN:
170 		if (aux == VDEV_AUX_CORRUPT_DATA)
171 			return (gettext("FAULTED"));
172 		else
173 			return (gettext("UNAVAIL"));
174 	case VDEV_STATE_FAULTED:
175 		return (gettext("FAULTED"));
176 	case VDEV_STATE_DEGRADED:
177 		return (gettext("DEGRADED"));
178 	case VDEV_STATE_HEALTHY:
179 		return (gettext("ONLINE"));
180 	}
181 
182 	return (gettext("UNKNOWN"));
183 }
184 
185 /*
186  * Get a zpool property value for 'prop' and return the value in
187  * a pre-allocated buffer.
188  */
189 int
190 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
191     zprop_source_t *srctype)
192 {
193 	uint64_t intval;
194 	const char *strval;
195 	zprop_source_t src = ZPROP_SRC_NONE;
196 	nvlist_t *nvroot;
197 	vdev_stat_t *vs;
198 	uint_t vsc;
199 
200 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
201 		if (prop == ZPOOL_PROP_NAME)
202 			(void) strlcpy(buf, zpool_get_name(zhp), len);
203 		else if (prop == ZPOOL_PROP_HEALTH)
204 			(void) strlcpy(buf, "FAULTED", len);
205 		else
206 			(void) strlcpy(buf, "-", len);
207 		return (0);
208 	}
209 
210 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
211 	    prop != ZPOOL_PROP_NAME)
212 		return (-1);
213 
214 	switch (zpool_prop_get_type(prop)) {
215 	case PROP_TYPE_STRING:
216 		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
217 		    len);
218 		break;
219 
220 	case PROP_TYPE_NUMBER:
221 		intval = zpool_get_prop_int(zhp, prop, &src);
222 
223 		switch (prop) {
224 		case ZPOOL_PROP_SIZE:
225 		case ZPOOL_PROP_USED:
226 		case ZPOOL_PROP_AVAILABLE:
227 			(void) zfs_nicenum(intval, buf, len);
228 			break;
229 
230 		case ZPOOL_PROP_CAPACITY:
231 			(void) snprintf(buf, len, "%llu%%",
232 			    (u_longlong_t)intval);
233 			break;
234 
235 		case ZPOOL_PROP_HEALTH:
236 			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
237 			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
238 			verify(nvlist_lookup_uint64_array(nvroot,
239 			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
240 
241 			(void) strlcpy(buf, zpool_state_to_name(intval,
242 			    vs->vs_aux), len);
243 			break;
244 		default:
245 			(void) snprintf(buf, len, "%llu", intval);
246 		}
247 		break;
248 
249 	case PROP_TYPE_INDEX:
250 		intval = zpool_get_prop_int(zhp, prop, &src);
251 		if (zpool_prop_index_to_string(prop, intval, &strval)
252 		    != 0)
253 			return (-1);
254 		(void) strlcpy(buf, strval, len);
255 		break;
256 
257 	default:
258 		abort();
259 	}
260 
261 	if (srctype)
262 		*srctype = src;
263 
264 	return (0);
265 }
266 
267 /*
268  * Check if the bootfs name has the same pool name as it is set to.
269  * Assuming bootfs is a valid dataset name.
270  */
271 static boolean_t
272 bootfs_name_valid(const char *pool, char *bootfs)
273 {
274 	int len = strlen(pool);
275 
276 	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM))
277 		return (B_FALSE);
278 
279 	if (strncmp(pool, bootfs, len) == 0 &&
280 	    (bootfs[len] == '/' || bootfs[len] == '\0'))
281 		return (B_TRUE);
282 
283 	return (B_FALSE);
284 }
285 
286 /*
287  * Given an nvlist of zpool properties to be set, validate that they are
288  * correct, and parse any numeric properties (index, boolean, etc) if they are
289  * specified as strings.
290  */
291 static nvlist_t *
292 zpool_validate_properties(libzfs_handle_t *hdl, const char *poolname,
293     nvlist_t *props, uint64_t version, boolean_t create_or_import, char *errbuf)
294 {
295 	nvpair_t *elem;
296 	nvlist_t *retprops;
297 	zpool_prop_t prop;
298 	char *strval;
299 	uint64_t intval;
300 	char *slash;
301 	struct stat64 statbuf;
302 
303 	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
304 		(void) no_memory(hdl);
305 		return (NULL);
306 	}
307 
308 	elem = NULL;
309 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
310 		const char *propname = nvpair_name(elem);
311 
312 		/*
313 		 * Make sure this property is valid and applies to this type.
314 		 */
315 		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
316 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
317 			    "invalid property '%s'"), propname);
318 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
319 			goto error;
320 		}
321 
322 		if (zpool_prop_readonly(prop)) {
323 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
324 			    "is readonly"), propname);
325 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
326 			goto error;
327 		}
328 
329 		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
330 		    &strval, &intval, errbuf) != 0)
331 			goto error;
332 
333 		/*
334 		 * Perform additional checking for specific properties.
335 		 */
336 		switch (prop) {
337 		case ZPOOL_PROP_VERSION:
338 			if (intval < version || intval > SPA_VERSION) {
339 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
340 				    "property '%s' number %d is invalid."),
341 				    propname, intval);
342 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
343 				goto error;
344 			}
345 			break;
346 
347 		case ZPOOL_PROP_BOOTFS:
348 			if (create_or_import) {
349 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
350 				    "property '%s' cannot be set at creation "
351 				    "or import time"), propname);
352 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
353 				goto error;
354 			}
355 
356 			if (version < SPA_VERSION_BOOTFS) {
357 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
358 				    "pool must be upgraded to support "
359 				    "'%s' property"), propname);
360 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
361 				goto error;
362 			}
363 
364 			/*
365 			 * bootfs property value has to be a dataset name and
366 			 * the dataset has to be in the same pool as it sets to.
367 			 */
368 			if (strval[0] != '\0' && !bootfs_name_valid(poolname,
369 			    strval)) {
370 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
371 				    "is an invalid name"), strval);
372 				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
373 				goto error;
374 			}
375 			break;
376 
377 		case ZPOOL_PROP_ALTROOT:
378 			if (!create_or_import) {
379 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
380 				    "property '%s' can only be set during pool "
381 				    "creation or import"), propname);
382 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
383 				goto error;
384 			}
385 
386 			if (strval[0] != '/') {
387 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
388 				    "bad alternate root '%s'"), strval);
389 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
390 				goto error;
391 			}
392 			break;
393 
394 		case ZPOOL_PROP_CACHEFILE:
395 			if (strval[0] == '\0')
396 				break;
397 
398 			if (strcmp(strval, "none") == 0)
399 				break;
400 
401 			if (strval[0] != '/') {
402 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
403 				    "property '%s' must be empty, an "
404 				    "absolute path, or 'none'"), propname);
405 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
406 				goto error;
407 			}
408 
409 			slash = strrchr(strval, '/');
410 
411 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
412 			    strcmp(slash, "/..") == 0) {
413 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
414 				    "'%s' is not a valid file"), strval);
415 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
416 				goto error;
417 			}
418 
419 			*slash = '\0';
420 
421 			if (stat64(strval, &statbuf) != 0 ||
422 			    !S_ISDIR(statbuf.st_mode)) {
423 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
424 				    "'%s' is not a valid directory"),
425 				    strval);
426 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
427 				goto error;
428 			}
429 
430 			*slash = '/';
431 			break;
432 		}
433 	}
434 
435 	return (retprops);
436 error:
437 	nvlist_free(retprops);
438 	return (NULL);
439 }
440 
441 /*
442  * Set zpool property : propname=propval.
443  */
444 int
445 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
446 {
447 	zfs_cmd_t zc = { 0 };
448 	int ret = -1;
449 	char errbuf[1024];
450 	nvlist_t *nvl = NULL;
451 	nvlist_t *realprops;
452 	uint64_t version;
453 
454 	(void) snprintf(errbuf, sizeof (errbuf),
455 	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
456 	    zhp->zpool_name);
457 
458 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
459 		return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, errbuf));
460 
461 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
462 		return (no_memory(zhp->zpool_hdl));
463 
464 	if (nvlist_add_string(nvl, propname, propval) != 0) {
465 		nvlist_free(nvl);
466 		return (no_memory(zhp->zpool_hdl));
467 	}
468 
469 	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
470 	if ((realprops = zpool_validate_properties(zhp->zpool_hdl,
471 	    zhp->zpool_name, nvl, version, B_FALSE, errbuf)) == NULL) {
472 		nvlist_free(nvl);
473 		return (-1);
474 	}
475 
476 	nvlist_free(nvl);
477 	nvl = realprops;
478 
479 	/*
480 	 * Execute the corresponding ioctl() to set this property.
481 	 */
482 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
483 
484 	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
485 		nvlist_free(nvl);
486 		return (-1);
487 	}
488 
489 	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
490 
491 	zcmd_free_nvlists(&zc);
492 	nvlist_free(nvl);
493 
494 	if (ret)
495 		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
496 	else
497 		(void) zpool_props_refresh(zhp);
498 
499 	return (ret);
500 }
501 
502 int
503 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
504 {
505 	libzfs_handle_t *hdl = zhp->zpool_hdl;
506 	zprop_list_t *entry;
507 	char buf[ZFS_MAXPROPLEN];
508 
509 	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
510 		return (-1);
511 
512 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
513 
514 		if (entry->pl_fixed)
515 			continue;
516 
517 		if (entry->pl_prop != ZPROP_INVAL &&
518 		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
519 		    NULL) == 0) {
520 			if (strlen(buf) > entry->pl_width)
521 				entry->pl_width = strlen(buf);
522 		}
523 	}
524 
525 	return (0);
526 }
527 
528 
529 /*
530  * Validate the given pool name, optionally putting an extended error message in
531  * 'buf'.
532  */
533 static boolean_t
534 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
535 {
536 	namecheck_err_t why;
537 	char what;
538 	int ret;
539 
540 	ret = pool_namecheck(pool, &why, &what);
541 
542 	/*
543 	 * The rules for reserved pool names were extended at a later point.
544 	 * But we need to support users with existing pools that may now be
545 	 * invalid.  So we only check for this expanded set of names during a
546 	 * create (or import), and only in userland.
547 	 */
548 	if (ret == 0 && !isopen &&
549 	    (strncmp(pool, "mirror", 6) == 0 ||
550 	    strncmp(pool, "raidz", 5) == 0 ||
551 	    strncmp(pool, "spare", 5) == 0 ||
552 	    strcmp(pool, "log") == 0)) {
553 		zfs_error_aux(hdl,
554 		    dgettext(TEXT_DOMAIN, "name is reserved"));
555 		return (B_FALSE);
556 	}
557 
558 
559 	if (ret != 0) {
560 		if (hdl != NULL) {
561 			switch (why) {
562 			case NAME_ERR_TOOLONG:
563 				zfs_error_aux(hdl,
564 				    dgettext(TEXT_DOMAIN, "name is too long"));
565 				break;
566 
567 			case NAME_ERR_INVALCHAR:
568 				zfs_error_aux(hdl,
569 				    dgettext(TEXT_DOMAIN, "invalid character "
570 				    "'%c' in pool name"), what);
571 				break;
572 
573 			case NAME_ERR_NOLETTER:
574 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
575 				    "name must begin with a letter"));
576 				break;
577 
578 			case NAME_ERR_RESERVED:
579 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
580 				    "name is reserved"));
581 				break;
582 
583 			case NAME_ERR_DISKLIKE:
584 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
585 				    "pool name is reserved"));
586 				break;
587 
588 			case NAME_ERR_LEADING_SLASH:
589 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
590 				    "leading slash in name"));
591 				break;
592 
593 			case NAME_ERR_EMPTY_COMPONENT:
594 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
595 				    "empty component in name"));
596 				break;
597 
598 			case NAME_ERR_TRAILING_SLASH:
599 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
600 				    "trailing slash in name"));
601 				break;
602 
603 			case NAME_ERR_MULTIPLE_AT:
604 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
605 				    "multiple '@' delimiters in name"));
606 				break;
607 
608 			}
609 		}
610 		return (B_FALSE);
611 	}
612 
613 	return (B_TRUE);
614 }
615 
616 /*
617  * Open a handle to the given pool, even if the pool is currently in the FAULTED
618  * state.
619  */
620 zpool_handle_t *
621 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
622 {
623 	zpool_handle_t *zhp;
624 	boolean_t missing;
625 
626 	/*
627 	 * Make sure the pool name is valid.
628 	 */
629 	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
630 		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
631 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
632 		    pool);
633 		return (NULL);
634 	}
635 
636 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
637 		return (NULL);
638 
639 	zhp->zpool_hdl = hdl;
640 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
641 
642 	if (zpool_refresh_stats(zhp, &missing) != 0) {
643 		zpool_close(zhp);
644 		return (NULL);
645 	}
646 
647 	if (missing) {
648 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
649 		(void) zfs_error_fmt(hdl, EZFS_NOENT,
650 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
651 		zpool_close(zhp);
652 		return (NULL);
653 	}
654 
655 	return (zhp);
656 }
657 
658 /*
659  * Like the above, but silent on error.  Used when iterating over pools (because
660  * the configuration cache may be out of date).
661  */
662 int
663 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
664 {
665 	zpool_handle_t *zhp;
666 	boolean_t missing;
667 
668 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
669 		return (-1);
670 
671 	zhp->zpool_hdl = hdl;
672 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
673 
674 	if (zpool_refresh_stats(zhp, &missing) != 0) {
675 		zpool_close(zhp);
676 		return (-1);
677 	}
678 
679 	if (missing) {
680 		zpool_close(zhp);
681 		*ret = NULL;
682 		return (0);
683 	}
684 
685 	*ret = zhp;
686 	return (0);
687 }
688 
689 /*
690  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
691  * state.
692  */
693 zpool_handle_t *
694 zpool_open(libzfs_handle_t *hdl, const char *pool)
695 {
696 	zpool_handle_t *zhp;
697 
698 	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
699 		return (NULL);
700 
701 	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
702 		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
703 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
704 		zpool_close(zhp);
705 		return (NULL);
706 	}
707 
708 	return (zhp);
709 }
710 
711 /*
712  * Close the handle.  Simply frees the memory associated with the handle.
713  */
714 void
715 zpool_close(zpool_handle_t *zhp)
716 {
717 	if (zhp->zpool_config)
718 		nvlist_free(zhp->zpool_config);
719 	if (zhp->zpool_old_config)
720 		nvlist_free(zhp->zpool_old_config);
721 	if (zhp->zpool_props)
722 		nvlist_free(zhp->zpool_props);
723 	free(zhp);
724 }
725 
726 /*
727  * Return the name of the pool.
728  */
729 const char *
730 zpool_get_name(zpool_handle_t *zhp)
731 {
732 	return (zhp->zpool_name);
733 }
734 
735 
736 /*
737  * Return the state of the pool (ACTIVE or UNAVAILABLE)
738  */
739 int
740 zpool_get_state(zpool_handle_t *zhp)
741 {
742 	return (zhp->zpool_state);
743 }
744 
745 /*
746  * Create the named pool, using the provided vdev list.  It is assumed
747  * that the consumer has already validated the contents of the nvlist, so we
748  * don't have to worry about error semantics.
749  */
750 int
751 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
752     nvlist_t *props)
753 {
754 	zfs_cmd_t zc = { 0 };
755 	char msg[1024];
756 	char *altroot;
757 
758 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
759 	    "cannot create '%s'"), pool);
760 
761 	if (!zpool_name_valid(hdl, B_FALSE, pool))
762 		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
763 
764 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
765 		return (-1);
766 
767 	if (props && (props = zpool_validate_properties(hdl, pool, props,
768 	    SPA_VERSION_1, B_TRUE, msg)) == NULL)
769 		return (-1);
770 
771 	if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
772 		nvlist_free(props);
773 		return (-1);
774 	}
775 
776 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
777 
778 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc) != 0) {
779 
780 		zcmd_free_nvlists(&zc);
781 		nvlist_free(props);
782 
783 		switch (errno) {
784 		case EBUSY:
785 			/*
786 			 * This can happen if the user has specified the same
787 			 * device multiple times.  We can't reliably detect this
788 			 * until we try to add it and see we already have a
789 			 * label.
790 			 */
791 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
792 			    "one or more vdevs refer to the same device"));
793 			return (zfs_error(hdl, EZFS_BADDEV, msg));
794 
795 		case EOVERFLOW:
796 			/*
797 			 * This occurs when one of the devices is below
798 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
799 			 * device was the problem device since there's no
800 			 * reliable way to determine device size from userland.
801 			 */
802 			{
803 				char buf[64];
804 
805 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
806 
807 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
808 				    "one or more devices is less than the "
809 				    "minimum size (%s)"), buf);
810 			}
811 			return (zfs_error(hdl, EZFS_BADDEV, msg));
812 
813 		case ENOSPC:
814 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
815 			    "one or more devices is out of space"));
816 			return (zfs_error(hdl, EZFS_BADDEV, msg));
817 
818 		case ENOTBLK:
819 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
820 			    "cache device must be a disk or disk slice"));
821 			return (zfs_error(hdl, EZFS_BADDEV, msg));
822 
823 		default:
824 			return (zpool_standard_error(hdl, errno, msg));
825 		}
826 	}
827 
828 	/*
829 	 * If this is an alternate root pool, then we automatically set the
830 	 * mountpoint of the root dataset to be '/'.
831 	 */
832 	if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
833 	    &altroot) == 0) {
834 		zfs_handle_t *zhp;
835 
836 		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
837 		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
838 		    "/") == 0);
839 
840 		zfs_close(zhp);
841 	}
842 
843 	zcmd_free_nvlists(&zc);
844 	nvlist_free(props);
845 	return (0);
846 }
847 
848 /*
849  * Destroy the given pool.  It is up to the caller to ensure that there are no
850  * datasets left in the pool.
851  */
852 int
853 zpool_destroy(zpool_handle_t *zhp)
854 {
855 	zfs_cmd_t zc = { 0 };
856 	zfs_handle_t *zfp = NULL;
857 	libzfs_handle_t *hdl = zhp->zpool_hdl;
858 	char msg[1024];
859 
860 	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
861 	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
862 	    ZFS_TYPE_FILESYSTEM)) == NULL)
863 		return (-1);
864 
865 	if (zpool_remove_zvol_links(zhp) != 0)
866 		return (-1);
867 
868 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
869 
870 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
871 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
872 		    "cannot destroy '%s'"), zhp->zpool_name);
873 
874 		if (errno == EROFS) {
875 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
876 			    "one or more devices is read only"));
877 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
878 		} else {
879 			(void) zpool_standard_error(hdl, errno, msg);
880 		}
881 
882 		if (zfp)
883 			zfs_close(zfp);
884 		return (-1);
885 	}
886 
887 	if (zfp) {
888 		remove_mountpoint(zfp);
889 		zfs_close(zfp);
890 	}
891 
892 	return (0);
893 }
894 
895 /*
896  * Add the given vdevs to the pool.  The caller must have already performed the
897  * necessary verification to ensure that the vdev specification is well-formed.
898  */
899 int
900 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
901 {
902 	zfs_cmd_t zc = { 0 };
903 	int ret;
904 	libzfs_handle_t *hdl = zhp->zpool_hdl;
905 	char msg[1024];
906 	nvlist_t **spares, **l2cache;
907 	uint_t nspares, nl2cache;
908 
909 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
910 	    "cannot add to '%s'"), zhp->zpool_name);
911 
912 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
913 	    SPA_VERSION_SPARES &&
914 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
915 	    &spares, &nspares) == 0) {
916 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
917 		    "upgraded to add hot spares"));
918 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
919 	}
920 
921 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
922 	    SPA_VERSION_L2CACHE &&
923 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
924 	    &l2cache, &nl2cache) == 0) {
925 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
926 		    "upgraded to add cache devices"));
927 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
928 	}
929 
930 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
931 		return (-1);
932 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
933 
934 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
935 		switch (errno) {
936 		case EBUSY:
937 			/*
938 			 * This can happen if the user has specified the same
939 			 * device multiple times.  We can't reliably detect this
940 			 * until we try to add it and see we already have a
941 			 * label.
942 			 */
943 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
944 			    "one or more vdevs refer to the same device"));
945 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
946 			break;
947 
948 		case EOVERFLOW:
949 			/*
950 			 * This occurrs when one of the devices is below
951 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
952 			 * device was the problem device since there's no
953 			 * reliable way to determine device size from userland.
954 			 */
955 			{
956 				char buf[64];
957 
958 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
959 
960 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
961 				    "device is less than the minimum "
962 				    "size (%s)"), buf);
963 			}
964 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
965 			break;
966 
967 		case ENOTSUP:
968 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
969 			    "pool must be upgraded to add these vdevs"));
970 			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
971 			break;
972 
973 		case EDOM:
974 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
975 			    "root pool can not have multiple vdevs"
976 			    " or separate logs"));
977 			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
978 			break;
979 
980 		case ENOTBLK:
981 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
982 			    "cache device must be a disk or disk slice"));
983 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
984 			break;
985 
986 		default:
987 			(void) zpool_standard_error(hdl, errno, msg);
988 		}
989 
990 		ret = -1;
991 	} else {
992 		ret = 0;
993 	}
994 
995 	zcmd_free_nvlists(&zc);
996 
997 	return (ret);
998 }
999 
1000 /*
1001  * Exports the pool from the system.  The caller must ensure that there are no
1002  * mounted datasets in the pool.
1003  */
1004 int
1005 zpool_export(zpool_handle_t *zhp)
1006 {
1007 	zfs_cmd_t zc = { 0 };
1008 
1009 	if (zpool_remove_zvol_links(zhp) != 0)
1010 		return (-1);
1011 
1012 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1013 
1014 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0)
1015 		return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1016 		    dgettext(TEXT_DOMAIN, "cannot export '%s'"),
1017 		    zhp->zpool_name));
1018 	return (0);
1019 }
1020 
1021 /*
1022  * zpool_import() is a contracted interface. Should be kept the same
1023  * if possible.
1024  *
1025  * Applications should use zpool_import_props() to import a pool with
1026  * new properties value to be set.
1027  */
1028 int
1029 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1030     char *altroot)
1031 {
1032 	nvlist_t *props = NULL;
1033 	int ret;
1034 
1035 	if (altroot != NULL) {
1036 		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1037 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1038 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1039 			    newname));
1040 		}
1041 
1042 		if (nvlist_add_string(props,
1043 		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0) {
1044 			nvlist_free(props);
1045 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1046 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1047 			    newname));
1048 		}
1049 	}
1050 
1051 	ret = zpool_import_props(hdl, config, newname, props);
1052 	if (props)
1053 		nvlist_free(props);
1054 	return (ret);
1055 }
1056 
1057 /*
1058  * Import the given pool using the known configuration and a list of
1059  * properties to be set. The configuration should have come from
1060  * zpool_find_import(). The 'newname' parameters control whether the pool
1061  * is imported with a different name.
1062  */
1063 int
1064 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1065     nvlist_t *props)
1066 {
1067 	zfs_cmd_t zc = { 0 };
1068 	char *thename;
1069 	char *origname;
1070 	int ret;
1071 	char errbuf[1024];
1072 
1073 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1074 	    &origname) == 0);
1075 
1076 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1077 	    "cannot import pool '%s'"), origname);
1078 
1079 	if (newname != NULL) {
1080 		if (!zpool_name_valid(hdl, B_FALSE, newname))
1081 			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1082 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1083 			    newname));
1084 		thename = (char *)newname;
1085 	} else {
1086 		thename = origname;
1087 	}
1088 
1089 	if (props) {
1090 		uint64_t version;
1091 
1092 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1093 		    &version) == 0);
1094 
1095 		if ((props = zpool_validate_properties(hdl, origname,
1096 		    props, version, B_TRUE, errbuf)) == NULL) {
1097 			return (-1);
1098 		} else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1099 			nvlist_free(props);
1100 			return (-1);
1101 		}
1102 	}
1103 
1104 	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1105 
1106 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1107 	    &zc.zc_guid) == 0);
1108 
1109 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1110 		nvlist_free(props);
1111 		return (-1);
1112 	}
1113 
1114 	ret = 0;
1115 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
1116 		char desc[1024];
1117 		if (newname == NULL)
1118 			(void) snprintf(desc, sizeof (desc),
1119 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1120 			    thename);
1121 		else
1122 			(void) snprintf(desc, sizeof (desc),
1123 			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1124 			    origname, thename);
1125 
1126 		switch (errno) {
1127 		case ENOTSUP:
1128 			/*
1129 			 * Unsupported version.
1130 			 */
1131 			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
1132 			break;
1133 
1134 		case EINVAL:
1135 			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1136 			break;
1137 
1138 		default:
1139 			(void) zpool_standard_error(hdl, errno, desc);
1140 		}
1141 
1142 		ret = -1;
1143 	} else {
1144 		zpool_handle_t *zhp;
1145 
1146 		/*
1147 		 * This should never fail, but play it safe anyway.
1148 		 */
1149 		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
1150 			ret = -1;
1151 		} else if (zhp != NULL) {
1152 			ret = zpool_create_zvol_links(zhp);
1153 			zpool_close(zhp);
1154 		}
1155 
1156 	}
1157 
1158 	zcmd_free_nvlists(&zc);
1159 	nvlist_free(props);
1160 
1161 	return (ret);
1162 }
1163 
1164 /*
1165  * Scrub the pool.
1166  */
1167 int
1168 zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
1169 {
1170 	zfs_cmd_t zc = { 0 };
1171 	char msg[1024];
1172 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1173 
1174 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1175 	zc.zc_cookie = type;
1176 
1177 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0)
1178 		return (0);
1179 
1180 	(void) snprintf(msg, sizeof (msg),
1181 	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1182 
1183 	if (errno == EBUSY)
1184 		return (zfs_error(hdl, EZFS_RESILVERING, msg));
1185 	else
1186 		return (zpool_standard_error(hdl, errno, msg));
1187 }
1188 
1189 /*
1190  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1191  * spare; but FALSE if its an INUSE spare.
1192  */
1193 static nvlist_t *
1194 vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
1195     boolean_t *avail_spare, boolean_t *l2cache)
1196 {
1197 	uint_t c, children;
1198 	nvlist_t **child;
1199 	uint64_t theguid, present;
1200 	char *path;
1201 	uint64_t wholedisk = 0;
1202 	nvlist_t *ret;
1203 
1204 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
1205 
1206 	if (search == NULL &&
1207 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
1208 		/*
1209 		 * If the device has never been present since import, the only
1210 		 * reliable way to match the vdev is by GUID.
1211 		 */
1212 		if (theguid == guid)
1213 			return (nv);
1214 	} else if (search != NULL &&
1215 	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
1216 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1217 		    &wholedisk);
1218 		if (wholedisk) {
1219 			/*
1220 			 * For whole disks, the internal path has 's0', but the
1221 			 * path passed in by the user doesn't.
1222 			 */
1223 			if (strlen(search) == strlen(path) - 2 &&
1224 			    strncmp(search, path, strlen(search)) == 0)
1225 				return (nv);
1226 		} else if (strcmp(search, path) == 0) {
1227 			return (nv);
1228 		}
1229 	}
1230 
1231 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1232 	    &child, &children) != 0)
1233 		return (NULL);
1234 
1235 	for (c = 0; c < children; c++)
1236 		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1237 		    avail_spare, l2cache)) != NULL)
1238 			return (ret);
1239 
1240 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1241 	    &child, &children) == 0) {
1242 		for (c = 0; c < children; c++) {
1243 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1244 			    avail_spare, l2cache)) != NULL) {
1245 				*avail_spare = B_TRUE;
1246 				return (ret);
1247 			}
1248 		}
1249 	}
1250 
1251 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1252 	    &child, &children) == 0) {
1253 		for (c = 0; c < children; c++) {
1254 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1255 			    avail_spare, l2cache)) != NULL) {
1256 				*l2cache = B_TRUE;
1257 				return (ret);
1258 			}
1259 		}
1260 	}
1261 
1262 	return (NULL);
1263 }
1264 
1265 nvlist_t *
1266 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
1267     boolean_t *l2cache)
1268 {
1269 	char buf[MAXPATHLEN];
1270 	const char *search;
1271 	char *end;
1272 	nvlist_t *nvroot;
1273 	uint64_t guid;
1274 
1275 	guid = strtoull(path, &end, 10);
1276 	if (guid != 0 && *end == '\0') {
1277 		search = NULL;
1278 	} else if (path[0] != '/') {
1279 		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
1280 		search = buf;
1281 	} else {
1282 		search = path;
1283 	}
1284 
1285 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1286 	    &nvroot) == 0);
1287 
1288 	*avail_spare = B_FALSE;
1289 	*l2cache = B_FALSE;
1290 	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
1291 	    l2cache));
1292 }
1293 
1294 /*
1295  * Returns TRUE if the given guid corresponds to the given type.
1296  * This is used to check for hot spares (INUSE or not), and level 2 cache
1297  * devices.
1298  */
1299 static boolean_t
1300 is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
1301 {
1302 	uint64_t target_guid;
1303 	nvlist_t *nvroot;
1304 	nvlist_t **list;
1305 	uint_t count;
1306 	int i;
1307 
1308 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1309 	    &nvroot) == 0);
1310 	if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) {
1311 		for (i = 0; i < count; i++) {
1312 			verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID,
1313 			    &target_guid) == 0);
1314 			if (guid == target_guid)
1315 				return (B_TRUE);
1316 		}
1317 	}
1318 
1319 	return (B_FALSE);
1320 }
1321 
1322 /*
1323  * Bring the specified vdev online.   The 'flags' parameter is a set of the
1324  * ZFS_ONLINE_* flags.
1325  */
1326 int
1327 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
1328     vdev_state_t *newstate)
1329 {
1330 	zfs_cmd_t zc = { 0 };
1331 	char msg[1024];
1332 	nvlist_t *tgt;
1333 	boolean_t avail_spare, l2cache;
1334 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1335 
1336 	(void) snprintf(msg, sizeof (msg),
1337 	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
1338 
1339 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1340 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == NULL)
1341 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1342 
1343 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1344 
1345 	if (avail_spare ||
1346 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1347 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1348 
1349 	if (l2cache ||
1350 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_L2CACHE) == B_TRUE)
1351 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1352 
1353 	zc.zc_cookie = VDEV_STATE_ONLINE;
1354 	zc.zc_obj = flags;
1355 
1356 
1357 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0)
1358 		return (zpool_standard_error(hdl, errno, msg));
1359 
1360 	*newstate = zc.zc_cookie;
1361 	return (0);
1362 }
1363 
1364 /*
1365  * Take the specified vdev offline
1366  */
1367 int
1368 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
1369 {
1370 	zfs_cmd_t zc = { 0 };
1371 	char msg[1024];
1372 	nvlist_t *tgt;
1373 	boolean_t avail_spare, l2cache;
1374 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1375 
1376 	(void) snprintf(msg, sizeof (msg),
1377 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
1378 
1379 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1380 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == NULL)
1381 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1382 
1383 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1384 
1385 	if (avail_spare ||
1386 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1387 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1388 
1389 	if (l2cache ||
1390 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_L2CACHE) == B_TRUE)
1391 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1392 
1393 	zc.zc_cookie = VDEV_STATE_OFFLINE;
1394 	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
1395 
1396 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1397 		return (0);
1398 
1399 	switch (errno) {
1400 	case EBUSY:
1401 
1402 		/*
1403 		 * There are no other replicas of this device.
1404 		 */
1405 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1406 
1407 	default:
1408 		return (zpool_standard_error(hdl, errno, msg));
1409 	}
1410 }
1411 
1412 /*
1413  * Mark the given vdev faulted.
1414  */
1415 int
1416 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
1417 {
1418 	zfs_cmd_t zc = { 0 };
1419 	char msg[1024];
1420 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1421 
1422 	(void) snprintf(msg, sizeof (msg),
1423 	    dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid);
1424 
1425 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1426 	zc.zc_guid = guid;
1427 	zc.zc_cookie = VDEV_STATE_FAULTED;
1428 
1429 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1430 		return (0);
1431 
1432 	switch (errno) {
1433 	case EBUSY:
1434 
1435 		/*
1436 		 * There are no other replicas of this device.
1437 		 */
1438 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1439 
1440 	default:
1441 		return (zpool_standard_error(hdl, errno, msg));
1442 	}
1443 
1444 }
1445 
1446 /*
1447  * Mark the given vdev degraded.
1448  */
1449 int
1450 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
1451 {
1452 	zfs_cmd_t zc = { 0 };
1453 	char msg[1024];
1454 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1455 
1456 	(void) snprintf(msg, sizeof (msg),
1457 	    dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid);
1458 
1459 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1460 	zc.zc_guid = guid;
1461 	zc.zc_cookie = VDEV_STATE_DEGRADED;
1462 
1463 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1464 		return (0);
1465 
1466 	return (zpool_standard_error(hdl, errno, msg));
1467 }
1468 
1469 /*
1470  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
1471  * a hot spare.
1472  */
1473 static boolean_t
1474 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
1475 {
1476 	nvlist_t **child;
1477 	uint_t c, children;
1478 	char *type;
1479 
1480 	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
1481 	    &children) == 0) {
1482 		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
1483 		    &type) == 0);
1484 
1485 		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
1486 		    children == 2 && child[which] == tgt)
1487 			return (B_TRUE);
1488 
1489 		for (c = 0; c < children; c++)
1490 			if (is_replacing_spare(child[c], tgt, which))
1491 				return (B_TRUE);
1492 	}
1493 
1494 	return (B_FALSE);
1495 }
1496 
1497 /*
1498  * Attach new_disk (fully described by nvroot) to old_disk.
1499  * If 'replacing' is specified, the new disk will replace the old one.
1500  */
1501 int
1502 zpool_vdev_attach(zpool_handle_t *zhp,
1503     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
1504 {
1505 	zfs_cmd_t zc = { 0 };
1506 	char msg[1024];
1507 	int ret;
1508 	nvlist_t *tgt;
1509 	boolean_t avail_spare, l2cache;
1510 	uint64_t val, is_log;
1511 	char *path;
1512 	nvlist_t **child;
1513 	uint_t children;
1514 	nvlist_t *config_root;
1515 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1516 
1517 	if (replacing)
1518 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1519 		    "cannot replace %s with %s"), old_disk, new_disk);
1520 	else
1521 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1522 		    "cannot attach %s to %s"), new_disk, old_disk);
1523 
1524 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1525 	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache)) == 0)
1526 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1527 
1528 	if (avail_spare)
1529 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1530 
1531 	if (l2cache)
1532 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1533 
1534 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1535 	zc.zc_cookie = replacing;
1536 
1537 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1538 	    &child, &children) != 0 || children != 1) {
1539 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1540 		    "new device must be a single disk"));
1541 		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
1542 	}
1543 
1544 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
1545 	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
1546 
1547 	/*
1548 	 * If the target is a hot spare that has been swapped in, we can only
1549 	 * replace it with another hot spare.
1550 	 */
1551 	if (replacing &&
1552 	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
1553 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1554 	    (zpool_find_vdev(zhp, path, &avail_spare, &l2cache) == NULL ||
1555 	    !avail_spare) && is_replacing_spare(config_root, tgt, 1)) {
1556 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1557 		    "can only be replaced by another hot spare"));
1558 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1559 	}
1560 
1561 	/*
1562 	 * If we are attempting to replace a spare, it canot be applied to an
1563 	 * already spared device.
1564 	 */
1565 	if (replacing &&
1566 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1567 	    zpool_find_vdev(zhp, path, &avail_spare, &l2cache) != NULL &&
1568 	    avail_spare && is_replacing_spare(config_root, tgt, 0)) {
1569 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1570 		    "device has already been replaced with a spare"));
1571 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1572 	}
1573 
1574 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1575 		return (-1);
1576 
1577 	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ATTACH, &zc);
1578 
1579 	zcmd_free_nvlists(&zc);
1580 
1581 	if (ret == 0)
1582 		return (0);
1583 
1584 	switch (errno) {
1585 	case ENOTSUP:
1586 		/*
1587 		 * Can't attach to or replace this type of vdev.
1588 		 */
1589 		if (replacing) {
1590 			is_log = B_FALSE;
1591 			(void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_LOG,
1592 			    &is_log);
1593 			if (is_log)
1594 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1595 				    "cannot replace a log with a spare"));
1596 			else
1597 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1598 				    "cannot replace a replacing device"));
1599 		} else {
1600 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1601 			    "can only attach to mirrors and top-level "
1602 			    "disks"));
1603 		}
1604 		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
1605 		break;
1606 
1607 	case EINVAL:
1608 		/*
1609 		 * The new device must be a single disk.
1610 		 */
1611 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1612 		    "new device must be a single disk"));
1613 		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
1614 		break;
1615 
1616 	case EBUSY:
1617 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
1618 		    new_disk);
1619 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1620 		break;
1621 
1622 	case EOVERFLOW:
1623 		/*
1624 		 * The new device is too small.
1625 		 */
1626 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1627 		    "device is too small"));
1628 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1629 		break;
1630 
1631 	case EDOM:
1632 		/*
1633 		 * The new device has a different alignment requirement.
1634 		 */
1635 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1636 		    "devices have different sector alignment"));
1637 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1638 		break;
1639 
1640 	case ENAMETOOLONG:
1641 		/*
1642 		 * The resulting top-level vdev spec won't fit in the label.
1643 		 */
1644 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
1645 		break;
1646 
1647 	default:
1648 		(void) zpool_standard_error(hdl, errno, msg);
1649 	}
1650 
1651 	return (-1);
1652 }
1653 
1654 /*
1655  * Detach the specified device.
1656  */
1657 int
1658 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
1659 {
1660 	zfs_cmd_t zc = { 0 };
1661 	char msg[1024];
1662 	nvlist_t *tgt;
1663 	boolean_t avail_spare, l2cache;
1664 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1665 
1666 	(void) snprintf(msg, sizeof (msg),
1667 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
1668 
1669 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1670 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == 0)
1671 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1672 
1673 	if (avail_spare)
1674 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1675 
1676 	if (l2cache)
1677 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1678 
1679 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1680 
1681 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
1682 		return (0);
1683 
1684 	switch (errno) {
1685 
1686 	case ENOTSUP:
1687 		/*
1688 		 * Can't detach from this type of vdev.
1689 		 */
1690 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
1691 		    "applicable to mirror and replacing vdevs"));
1692 		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
1693 		break;
1694 
1695 	case EBUSY:
1696 		/*
1697 		 * There are no other replicas of this device.
1698 		 */
1699 		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
1700 		break;
1701 
1702 	default:
1703 		(void) zpool_standard_error(hdl, errno, msg);
1704 	}
1705 
1706 	return (-1);
1707 }
1708 
1709 /*
1710  * Remove the given device.  Currently, this is supported only for hot spares
1711  * and level 2 cache devices.
1712  */
1713 int
1714 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
1715 {
1716 	zfs_cmd_t zc = { 0 };
1717 	char msg[1024];
1718 	nvlist_t *tgt;
1719 	boolean_t avail_spare, l2cache;
1720 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1721 
1722 	(void) snprintf(msg, sizeof (msg),
1723 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
1724 
1725 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1726 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == 0)
1727 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1728 
1729 	if (!avail_spare && !l2cache) {
1730 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1731 		    "only inactive hot spares or cache devices "
1732 		    "can be removed"));
1733 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1734 	}
1735 
1736 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1737 
1738 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
1739 		return (0);
1740 
1741 	return (zpool_standard_error(hdl, errno, msg));
1742 }
1743 
1744 /*
1745  * Clear the errors for the pool, or the particular device if specified.
1746  */
1747 int
1748 zpool_clear(zpool_handle_t *zhp, const char *path)
1749 {
1750 	zfs_cmd_t zc = { 0 };
1751 	char msg[1024];
1752 	nvlist_t *tgt;
1753 	boolean_t avail_spare, l2cache;
1754 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1755 
1756 	if (path)
1757 		(void) snprintf(msg, sizeof (msg),
1758 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1759 		    path);
1760 	else
1761 		(void) snprintf(msg, sizeof (msg),
1762 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1763 		    zhp->zpool_name);
1764 
1765 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1766 	if (path) {
1767 		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
1768 		    &l2cache)) == 0)
1769 			return (zfs_error(hdl, EZFS_NODEVICE, msg));
1770 
1771 		/*
1772 		 * Don't allow error clearing for hot spares.  Do allow
1773 		 * error clearing for l2cache devices.
1774 		 */
1775 		if (avail_spare)
1776 			return (zfs_error(hdl, EZFS_ISSPARE, msg));
1777 
1778 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
1779 		    &zc.zc_guid) == 0);
1780 	}
1781 
1782 	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
1783 		return (0);
1784 
1785 	return (zpool_standard_error(hdl, errno, msg));
1786 }
1787 
1788 /*
1789  * Similar to zpool_clear(), but takes a GUID (used by fmd).
1790  */
1791 int
1792 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
1793 {
1794 	zfs_cmd_t zc = { 0 };
1795 	char msg[1024];
1796 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1797 
1798 	(void) snprintf(msg, sizeof (msg),
1799 	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
1800 	    guid);
1801 
1802 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1803 	zc.zc_guid = guid;
1804 
1805 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
1806 		return (0);
1807 
1808 	return (zpool_standard_error(hdl, errno, msg));
1809 }
1810 
1811 /*
1812  * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
1813  * hierarchy.
1814  */
1815 int
1816 zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
1817     void *data)
1818 {
1819 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1820 	char (*paths)[MAXPATHLEN];
1821 	size_t size = 4;
1822 	int curr, fd, base, ret = 0;
1823 	DIR *dirp;
1824 	struct dirent *dp;
1825 	struct stat st;
1826 
1827 	if ((base = open("/dev/zvol/dsk", O_RDONLY)) < 0)
1828 		return (errno == ENOENT ? 0 : -1);
1829 
1830 	if (fstatat(base, zhp->zpool_name, &st, 0) != 0) {
1831 		int err = errno;
1832 		(void) close(base);
1833 		return (err == ENOENT ? 0 : -1);
1834 	}
1835 
1836 	/*
1837 	 * Oddly this wasn't a directory -- ignore that failure since we
1838 	 * know there are no links lower in the (non-existant) hierarchy.
1839 	 */
1840 	if (!S_ISDIR(st.st_mode)) {
1841 		(void) close(base);
1842 		return (0);
1843 	}
1844 
1845 	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
1846 		(void) close(base);
1847 		return (-1);
1848 	}
1849 
1850 	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
1851 	curr = 0;
1852 
1853 	while (curr >= 0) {
1854 		if (fstatat(base, paths[curr], &st, AT_SYMLINK_NOFOLLOW) != 0)
1855 			goto err;
1856 
1857 		if (S_ISDIR(st.st_mode)) {
1858 			if ((fd = openat(base, paths[curr], O_RDONLY)) < 0)
1859 				goto err;
1860 
1861 			if ((dirp = fdopendir(fd)) == NULL) {
1862 				(void) close(fd);
1863 				goto err;
1864 			}
1865 
1866 			while ((dp = readdir(dirp)) != NULL) {
1867 				if (dp->d_name[0] == '.')
1868 					continue;
1869 
1870 				if (curr + 1 == size) {
1871 					paths = zfs_realloc(hdl, paths,
1872 					    size * sizeof (paths[0]),
1873 					    size * 2 * sizeof (paths[0]));
1874 					if (paths == NULL) {
1875 						(void) closedir(dirp);
1876 						(void) close(fd);
1877 						goto err;
1878 					}
1879 
1880 					size *= 2;
1881 				}
1882 
1883 				(void) strlcpy(paths[curr + 1], paths[curr],
1884 				    sizeof (paths[curr + 1]));
1885 				(void) strlcat(paths[curr], "/",
1886 				    sizeof (paths[curr]));
1887 				(void) strlcat(paths[curr], dp->d_name,
1888 				    sizeof (paths[curr]));
1889 				curr++;
1890 			}
1891 
1892 			(void) closedir(dirp);
1893 
1894 		} else {
1895 			if ((ret = cb(paths[curr], data)) != 0)
1896 				break;
1897 		}
1898 
1899 		curr--;
1900 	}
1901 
1902 	free(paths);
1903 	(void) close(base);
1904 
1905 	return (ret);
1906 
1907 err:
1908 	free(paths);
1909 	(void) close(base);
1910 	return (-1);
1911 }
1912 
1913 typedef struct zvol_cb {
1914 	zpool_handle_t *zcb_pool;
1915 	boolean_t zcb_create;
1916 } zvol_cb_t;
1917 
1918 /*ARGSUSED*/
1919 static int
1920 do_zvol_create(zfs_handle_t *zhp, void *data)
1921 {
1922 	int ret = 0;
1923 
1924 	if (ZFS_IS_VOLUME(zhp)) {
1925 		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
1926 		ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL);
1927 	}
1928 
1929 	if (ret == 0)
1930 		ret = zfs_iter_filesystems(zhp, do_zvol_create, NULL);
1931 
1932 	zfs_close(zhp);
1933 
1934 	return (ret);
1935 }
1936 
1937 /*
1938  * Iterate over all zvols in the pool and make any necessary minor nodes.
1939  */
1940 int
1941 zpool_create_zvol_links(zpool_handle_t *zhp)
1942 {
1943 	zfs_handle_t *zfp;
1944 	int ret;
1945 
1946 	/*
1947 	 * If the pool is unavailable, just return success.
1948 	 */
1949 	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
1950 	    zhp->zpool_name)) == NULL)
1951 		return (0);
1952 
1953 	ret = zfs_iter_filesystems(zfp, do_zvol_create, NULL);
1954 
1955 	zfs_close(zfp);
1956 	return (ret);
1957 }
1958 
1959 static int
1960 do_zvol_remove(const char *dataset, void *data)
1961 {
1962 	zpool_handle_t *zhp = data;
1963 
1964 	return (zvol_remove_link(zhp->zpool_hdl, dataset));
1965 }
1966 
1967 /*
1968  * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
1969  * by examining the /dev links so that a corrupted pool doesn't impede this
1970  * operation.
1971  */
1972 int
1973 zpool_remove_zvol_links(zpool_handle_t *zhp)
1974 {
1975 	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
1976 }
1977 
1978 /*
1979  * Convert from a devid string to a path.
1980  */
1981 static char *
1982 devid_to_path(char *devid_str)
1983 {
1984 	ddi_devid_t devid;
1985 	char *minor;
1986 	char *path;
1987 	devid_nmlist_t *list = NULL;
1988 	int ret;
1989 
1990 	if (devid_str_decode(devid_str, &devid, &minor) != 0)
1991 		return (NULL);
1992 
1993 	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
1994 
1995 	devid_str_free(minor);
1996 	devid_free(devid);
1997 
1998 	if (ret != 0)
1999 		return (NULL);
2000 
2001 	if ((path = strdup(list[0].devname)) == NULL)
2002 		return (NULL);
2003 
2004 	devid_free_nmlist(list);
2005 
2006 	return (path);
2007 }
2008 
2009 /*
2010  * Convert from a path to a devid string.
2011  */
2012 static char *
2013 path_to_devid(const char *path)
2014 {
2015 	int fd;
2016 	ddi_devid_t devid;
2017 	char *minor, *ret;
2018 
2019 	if ((fd = open(path, O_RDONLY)) < 0)
2020 		return (NULL);
2021 
2022 	minor = NULL;
2023 	ret = NULL;
2024 	if (devid_get(fd, &devid) == 0) {
2025 		if (devid_get_minor_name(fd, &minor) == 0)
2026 			ret = devid_str_encode(devid, minor);
2027 		if (minor != NULL)
2028 			devid_str_free(minor);
2029 		devid_free(devid);
2030 	}
2031 	(void) close(fd);
2032 
2033 	return (ret);
2034 }
2035 
2036 /*
2037  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
2038  * ignore any failure here, since a common case is for an unprivileged user to
2039  * type 'zpool status', and we'll display the correct information anyway.
2040  */
2041 static void
2042 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
2043 {
2044 	zfs_cmd_t zc = { 0 };
2045 
2046 	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2047 	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
2048 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2049 	    &zc.zc_guid) == 0);
2050 
2051 	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
2052 }
2053 
2054 /*
2055  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
2056  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
2057  * We also check if this is a whole disk, in which case we strip off the
2058  * trailing 's0' slice name.
2059  *
2060  * This routine is also responsible for identifying when disks have been
2061  * reconfigured in a new location.  The kernel will have opened the device by
2062  * devid, but the path will still refer to the old location.  To catch this, we
2063  * first do a path -> devid translation (which is fast for the common case).  If
2064  * the devid matches, we're done.  If not, we do a reverse devid -> path
2065  * translation and issue the appropriate ioctl() to update the path of the vdev.
2066  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
2067  * of these checks.
2068  */
2069 char *
2070 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
2071 {
2072 	char *path, *devid;
2073 	uint64_t value;
2074 	char buf[64];
2075 	vdev_stat_t *vs;
2076 	uint_t vsc;
2077 
2078 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
2079 	    &value) == 0) {
2080 		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2081 		    &value) == 0);
2082 		(void) snprintf(buf, sizeof (buf), "%llu",
2083 		    (u_longlong_t)value);
2084 		path = buf;
2085 	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
2086 
2087 		/*
2088 		 * If the device is dead (faulted, offline, etc) then don't
2089 		 * bother opening it.  Otherwise we may be forcing the user to
2090 		 * open a misbehaving device, which can have undesirable
2091 		 * effects.
2092 		 */
2093 		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
2094 		    (uint64_t **)&vs, &vsc) != 0 ||
2095 		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
2096 		    zhp != NULL &&
2097 		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
2098 			/*
2099 			 * Determine if the current path is correct.
2100 			 */
2101 			char *newdevid = path_to_devid(path);
2102 
2103 			if (newdevid == NULL ||
2104 			    strcmp(devid, newdevid) != 0) {
2105 				char *newpath;
2106 
2107 				if ((newpath = devid_to_path(devid)) != NULL) {
2108 					/*
2109 					 * Update the path appropriately.
2110 					 */
2111 					set_path(zhp, nv, newpath);
2112 					if (nvlist_add_string(nv,
2113 					    ZPOOL_CONFIG_PATH, newpath) == 0)
2114 						verify(nvlist_lookup_string(nv,
2115 						    ZPOOL_CONFIG_PATH,
2116 						    &path) == 0);
2117 					free(newpath);
2118 				}
2119 			}
2120 
2121 			if (newdevid)
2122 				devid_str_free(newdevid);
2123 		}
2124 
2125 		if (strncmp(path, "/dev/dsk/", 9) == 0)
2126 			path += 9;
2127 
2128 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2129 		    &value) == 0 && value) {
2130 			char *tmp = zfs_strdup(hdl, path);
2131 			if (tmp == NULL)
2132 				return (NULL);
2133 			tmp[strlen(path) - 2] = '\0';
2134 			return (tmp);
2135 		}
2136 	} else {
2137 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
2138 
2139 		/*
2140 		 * If it's a raidz device, we need to stick in the parity level.
2141 		 */
2142 		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
2143 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
2144 			    &value) == 0);
2145 			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
2146 			    (u_longlong_t)value);
2147 			path = buf;
2148 		}
2149 	}
2150 
2151 	return (zfs_strdup(hdl, path));
2152 }
2153 
2154 static int
2155 zbookmark_compare(const void *a, const void *b)
2156 {
2157 	return (memcmp(a, b, sizeof (zbookmark_t)));
2158 }
2159 
2160 /*
2161  * Retrieve the persistent error log, uniquify the members, and return to the
2162  * caller.
2163  */
2164 int
2165 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
2166 {
2167 	zfs_cmd_t zc = { 0 };
2168 	uint64_t count;
2169 	zbookmark_t *zb = NULL;
2170 	int i;
2171 
2172 	/*
2173 	 * Retrieve the raw error list from the kernel.  If the number of errors
2174 	 * has increased, allocate more space and continue until we get the
2175 	 * entire list.
2176 	 */
2177 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
2178 	    &count) == 0);
2179 	if (count == 0)
2180 		return (0);
2181 	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
2182 	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
2183 		return (-1);
2184 	zc.zc_nvlist_dst_size = count;
2185 	(void) strcpy(zc.zc_name, zhp->zpool_name);
2186 	for (;;) {
2187 		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
2188 		    &zc) != 0) {
2189 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
2190 			if (errno == ENOMEM) {
2191 				count = zc.zc_nvlist_dst_size;
2192 				if ((zc.zc_nvlist_dst = (uintptr_t)
2193 				    zfs_alloc(zhp->zpool_hdl, count *
2194 				    sizeof (zbookmark_t))) == (uintptr_t)NULL)
2195 					return (-1);
2196 			} else {
2197 				return (-1);
2198 			}
2199 		} else {
2200 			break;
2201 		}
2202 	}
2203 
2204 	/*
2205 	 * Sort the resulting bookmarks.  This is a little confusing due to the
2206 	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
2207 	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
2208 	 * _not_ copied as part of the process.  So we point the start of our
2209 	 * array appropriate and decrement the total number of elements.
2210 	 */
2211 	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
2212 	    zc.zc_nvlist_dst_size;
2213 	count -= zc.zc_nvlist_dst_size;
2214 
2215 	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
2216 
2217 	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
2218 
2219 	/*
2220 	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
2221 	 */
2222 	for (i = 0; i < count; i++) {
2223 		nvlist_t *nv;
2224 
2225 		/* ignoring zb_blkid and zb_level for now */
2226 		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
2227 		    zb[i-1].zb_object == zb[i].zb_object)
2228 			continue;
2229 
2230 		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
2231 			goto nomem;
2232 		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
2233 		    zb[i].zb_objset) != 0) {
2234 			nvlist_free(nv);
2235 			goto nomem;
2236 		}
2237 		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
2238 		    zb[i].zb_object) != 0) {
2239 			nvlist_free(nv);
2240 			goto nomem;
2241 		}
2242 		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
2243 			nvlist_free(nv);
2244 			goto nomem;
2245 		}
2246 		nvlist_free(nv);
2247 	}
2248 
2249 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
2250 	return (0);
2251 
2252 nomem:
2253 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
2254 	return (no_memory(zhp->zpool_hdl));
2255 }
2256 
2257 /*
2258  * Upgrade a ZFS pool to the latest on-disk version.
2259  */
2260 int
2261 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
2262 {
2263 	zfs_cmd_t zc = { 0 };
2264 	libzfs_handle_t *hdl = zhp->zpool_hdl;
2265 
2266 	(void) strcpy(zc.zc_name, zhp->zpool_name);
2267 	zc.zc_cookie = new_version;
2268 
2269 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
2270 		return (zpool_standard_error_fmt(hdl, errno,
2271 		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
2272 		    zhp->zpool_name));
2273 	return (0);
2274 }
2275 
2276 void
2277 zpool_set_history_str(const char *subcommand, int argc, char **argv,
2278     char *history_str)
2279 {
2280 	int i;
2281 
2282 	(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
2283 	for (i = 1; i < argc; i++) {
2284 		if (strlen(history_str) + 1 + strlen(argv[i]) >
2285 		    HIS_MAX_RECORD_LEN)
2286 			break;
2287 		(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
2288 		(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
2289 	}
2290 }
2291 
2292 /*
2293  * Stage command history for logging.
2294  */
2295 int
2296 zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
2297 {
2298 	if (history_str == NULL)
2299 		return (EINVAL);
2300 
2301 	if (strlen(history_str) > HIS_MAX_RECORD_LEN)
2302 		return (EINVAL);
2303 
2304 	if (hdl->libzfs_log_str != NULL)
2305 		free(hdl->libzfs_log_str);
2306 
2307 	if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
2308 		return (no_memory(hdl));
2309 
2310 	return (0);
2311 }
2312 
2313 /*
2314  * Perform ioctl to get some command history of a pool.
2315  *
2316  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
2317  * logical offset of the history buffer to start reading from.
2318  *
2319  * Upon return, 'off' is the next logical offset to read from and
2320  * 'len' is the actual amount of bytes read into 'buf'.
2321  */
2322 static int
2323 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
2324 {
2325 	zfs_cmd_t zc = { 0 };
2326 	libzfs_handle_t *hdl = zhp->zpool_hdl;
2327 
2328 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2329 
2330 	zc.zc_history = (uint64_t)(uintptr_t)buf;
2331 	zc.zc_history_len = *len;
2332 	zc.zc_history_offset = *off;
2333 
2334 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
2335 		switch (errno) {
2336 		case EPERM:
2337 			return (zfs_error_fmt(hdl, EZFS_PERM,
2338 			    dgettext(TEXT_DOMAIN,
2339 			    "cannot show history for pool '%s'"),
2340 			    zhp->zpool_name));
2341 		case ENOENT:
2342 			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
2343 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
2344 			    "'%s'"), zhp->zpool_name));
2345 		case ENOTSUP:
2346 			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
2347 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
2348 			    "'%s', pool must be upgraded"), zhp->zpool_name));
2349 		default:
2350 			return (zpool_standard_error_fmt(hdl, errno,
2351 			    dgettext(TEXT_DOMAIN,
2352 			    "cannot get history for '%s'"), zhp->zpool_name));
2353 		}
2354 	}
2355 
2356 	*len = zc.zc_history_len;
2357 	*off = zc.zc_history_offset;
2358 
2359 	return (0);
2360 }
2361 
2362 /*
2363  * Process the buffer of nvlists, unpacking and storing each nvlist record
2364  * into 'records'.  'leftover' is set to the number of bytes that weren't
2365  * processed as there wasn't a complete record.
2366  */
2367 static int
2368 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
2369     nvlist_t ***records, uint_t *numrecords)
2370 {
2371 	uint64_t reclen;
2372 	nvlist_t *nv;
2373 	int i;
2374 
2375 	while (bytes_read > sizeof (reclen)) {
2376 
2377 		/* get length of packed record (stored as little endian) */
2378 		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
2379 			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
2380 
2381 		if (bytes_read < sizeof (reclen) + reclen)
2382 			break;
2383 
2384 		/* unpack record */
2385 		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
2386 			return (ENOMEM);
2387 		bytes_read -= sizeof (reclen) + reclen;
2388 		buf += sizeof (reclen) + reclen;
2389 
2390 		/* add record to nvlist array */
2391 		(*numrecords)++;
2392 		if (ISP2(*numrecords + 1)) {
2393 			*records = realloc(*records,
2394 			    *numrecords * 2 * sizeof (nvlist_t *));
2395 		}
2396 		(*records)[*numrecords - 1] = nv;
2397 	}
2398 
2399 	*leftover = bytes_read;
2400 	return (0);
2401 }
2402 
2403 #define	HIS_BUF_LEN	(128*1024)
2404 
2405 /*
2406  * Retrieve the command history of a pool.
2407  */
2408 int
2409 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
2410 {
2411 	char buf[HIS_BUF_LEN];
2412 	uint64_t off = 0;
2413 	nvlist_t **records = NULL;
2414 	uint_t numrecords = 0;
2415 	int err, i;
2416 
2417 	do {
2418 		uint64_t bytes_read = sizeof (buf);
2419 		uint64_t leftover;
2420 
2421 		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
2422 			break;
2423 
2424 		/* if nothing else was read in, we're at EOF, just return */
2425 		if (!bytes_read)
2426 			break;
2427 
2428 		if ((err = zpool_history_unpack(buf, bytes_read,
2429 		    &leftover, &records, &numrecords)) != 0)
2430 			break;
2431 		off -= leftover;
2432 
2433 		/* CONSTCOND */
2434 	} while (1);
2435 
2436 	if (!err) {
2437 		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
2438 		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
2439 		    records, numrecords) == 0);
2440 	}
2441 	for (i = 0; i < numrecords; i++)
2442 		nvlist_free(records[i]);
2443 	free(records);
2444 
2445 	return (err);
2446 }
2447 
2448 void
2449 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
2450     char *pathname, size_t len)
2451 {
2452 	zfs_cmd_t zc = { 0 };
2453 	boolean_t mounted = B_FALSE;
2454 	char *mntpnt = NULL;
2455 	char dsname[MAXNAMELEN];
2456 
2457 	if (dsobj == 0) {
2458 		/* special case for the MOS */
2459 		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
2460 		return;
2461 	}
2462 
2463 	/* get the dataset's name */
2464 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2465 	zc.zc_obj = dsobj;
2466 	if (ioctl(zhp->zpool_hdl->libzfs_fd,
2467 	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
2468 		/* just write out a path of two object numbers */
2469 		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
2470 		    dsobj, obj);
2471 		return;
2472 	}
2473 	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
2474 
2475 	/* find out if the dataset is mounted */
2476 	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
2477 
2478 	/* get the corrupted object's path */
2479 	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
2480 	zc.zc_obj = obj;
2481 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
2482 	    &zc) == 0) {
2483 		if (mounted) {
2484 			(void) snprintf(pathname, len, "%s%s", mntpnt,
2485 			    zc.zc_value);
2486 		} else {
2487 			(void) snprintf(pathname, len, "%s:%s",
2488 			    dsname, zc.zc_value);
2489 		}
2490 	} else {
2491 		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
2492 	}
2493 	free(mntpnt);
2494 }
2495 
2496 #define	RDISK_ROOT	"/dev/rdsk"
2497 #define	BACKUP_SLICE	"s2"
2498 /*
2499  * Don't start the slice at the default block of 34; many storage
2500  * devices will use a stripe width of 128k, so start there instead.
2501  */
2502 #define	NEW_START_BLOCK	256
2503 
2504 /*
2505  * determine where a partition starts on a disk in the current
2506  * configuration
2507  */
2508 static diskaddr_t
2509 find_start_block(nvlist_t *config)
2510 {
2511 	nvlist_t **child;
2512 	uint_t c, children;
2513 	char *path;
2514 	diskaddr_t sb = MAXOFFSET_T;
2515 	int fd;
2516 	char diskname[MAXPATHLEN];
2517 	uint64_t wholedisk;
2518 
2519 	if (nvlist_lookup_nvlist_array(config,
2520 	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
2521 		if (nvlist_lookup_uint64(config,
2522 		    ZPOOL_CONFIG_WHOLE_DISK,
2523 		    &wholedisk) != 0 || !wholedisk) {
2524 			return (MAXOFFSET_T);
2525 		}
2526 		if (nvlist_lookup_string(config,
2527 		    ZPOOL_CONFIG_PATH, &path) != 0) {
2528 			return (MAXOFFSET_T);
2529 		}
2530 
2531 		(void) snprintf(diskname, sizeof (diskname), "%s%s",
2532 		    RDISK_ROOT, strrchr(path, '/'));
2533 		if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
2534 			struct dk_gpt *vtoc;
2535 			if (efi_alloc_and_read(fd, &vtoc) >= 0) {
2536 				sb = vtoc->efi_parts[0].p_start;
2537 				efi_free(vtoc);
2538 			}
2539 			(void) close(fd);
2540 		}
2541 		return (sb);
2542 	}
2543 
2544 	for (c = 0; c < children; c++) {
2545 		sb = find_start_block(child[c]);
2546 		if (sb != MAXOFFSET_T) {
2547 			return (sb);
2548 		}
2549 	}
2550 	return (MAXOFFSET_T);
2551 }
2552 
2553 /*
2554  * Label an individual disk.  The name provided is the short name,
2555  * stripped of any leading /dev path.
2556  */
2557 int
2558 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
2559 {
2560 	char path[MAXPATHLEN];
2561 	struct dk_gpt *vtoc;
2562 	int fd;
2563 	size_t resv = EFI_MIN_RESV_SIZE;
2564 	uint64_t slice_size;
2565 	diskaddr_t start_block;
2566 	char errbuf[1024];
2567 
2568 	if (zhp) {
2569 		nvlist_t *nvroot;
2570 
2571 		verify(nvlist_lookup_nvlist(zhp->zpool_config,
2572 		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2573 
2574 		if (zhp->zpool_start_block == 0)
2575 			start_block = find_start_block(nvroot);
2576 		else
2577 			start_block = zhp->zpool_start_block;
2578 		zhp->zpool_start_block = start_block;
2579 	} else {
2580 		/* new pool */
2581 		start_block = NEW_START_BLOCK;
2582 	}
2583 
2584 	(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
2585 	    BACKUP_SLICE);
2586 
2587 	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
2588 		/*
2589 		 * This shouldn't happen.  We've long since verified that this
2590 		 * is a valid device.
2591 		 */
2592 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2593 		    "label '%s': unable to open device"), name);
2594 		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2595 	}
2596 
2597 	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
2598 		/*
2599 		 * The only way this can fail is if we run out of memory, or we
2600 		 * were unable to read the disk's capacity
2601 		 */
2602 		if (errno == ENOMEM)
2603 			(void) no_memory(hdl);
2604 
2605 		(void) close(fd);
2606 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "cannot "
2607 		    "label '%s': unable to read disk capacity"), name);
2608 
2609 		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2610 	}
2611 
2612 	slice_size = vtoc->efi_last_u_lba + 1;
2613 	slice_size -= EFI_MIN_RESV_SIZE;
2614 	if (start_block == MAXOFFSET_T)
2615 		start_block = NEW_START_BLOCK;
2616 	slice_size -= start_block;
2617 
2618 	vtoc->efi_parts[0].p_start = start_block;
2619 	vtoc->efi_parts[0].p_size = slice_size;
2620 
2621 	/*
2622 	 * Why we use V_USR: V_BACKUP confuses users, and is considered
2623 	 * disposable by some EFI utilities (since EFI doesn't have a backup
2624 	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
2625 	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
2626 	 * etc. were all pretty specific.  V_USR is as close to reality as we
2627 	 * can get, in the absence of V_OTHER.
2628 	 */
2629 	vtoc->efi_parts[0].p_tag = V_USR;
2630 	(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
2631 
2632 	vtoc->efi_parts[8].p_start = slice_size + start_block;
2633 	vtoc->efi_parts[8].p_size = resv;
2634 	vtoc->efi_parts[8].p_tag = V_RESERVED;
2635 
2636 	if (efi_write(fd, vtoc) != 0) {
2637 		/*
2638 		 * Some block drivers (like pcata) may not support EFI
2639 		 * GPT labels.  Print out a helpful error message dir-
2640 		 * ecting the user to manually label the disk and give
2641 		 * a specific slice.
2642 		 */
2643 		(void) close(fd);
2644 		efi_free(vtoc);
2645 
2646 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2647 		    "cannot label '%s': try using fdisk(1M) and then "
2648 		    "provide a specific slice"), name);
2649 		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
2650 	}
2651 
2652 	(void) close(fd);
2653 	efi_free(vtoc);
2654 	return (0);
2655 }
2656