xref: /illumos-gate/usr/src/lib/libzfs/common/libzfs_pool.c (revision 1d9df23bbf4124f12ca7832d6eaaa600e0aa8eda)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <alloca.h>
30 #include <assert.h>
31 #include <ctype.h>
32 #include <errno.h>
33 #include <devid.h>
34 #include <dirent.h>
35 #include <fcntl.h>
36 #include <libintl.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <strings.h>
40 #include <unistd.h>
41 #include <sys/efi_partition.h>
42 #include <sys/vtoc.h>
43 #include <sys/zfs_ioctl.h>
44 #include <sys/zio.h>
45 #include <strings.h>
46 
47 #include "zfs_namecheck.h"
48 #include "zfs_prop.h"
49 #include "libzfs_impl.h"
50 
51 
52 /*
53  * ====================================================================
54  *   zpool property functions
55  * ====================================================================
56  */
57 
58 static int
59 zpool_get_all_props(zpool_handle_t *zhp)
60 {
61 	zfs_cmd_t zc = { 0 };
62 	libzfs_handle_t *hdl = zhp->zpool_hdl;
63 
64 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
65 
66 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
67 		return (-1);
68 
69 	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
70 		if (errno == ENOMEM) {
71 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
72 				zcmd_free_nvlists(&zc);
73 				return (-1);
74 			}
75 		} else {
76 			zcmd_free_nvlists(&zc);
77 			return (-1);
78 		}
79 	}
80 
81 	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
82 		zcmd_free_nvlists(&zc);
83 		return (-1);
84 	}
85 
86 	zcmd_free_nvlists(&zc);
87 
88 	return (0);
89 }
90 
91 static int
92 zpool_props_refresh(zpool_handle_t *zhp)
93 {
94 	nvlist_t *old_props;
95 
96 	old_props = zhp->zpool_props;
97 
98 	if (zpool_get_all_props(zhp) != 0)
99 		return (-1);
100 
101 	nvlist_free(old_props);
102 	return (0);
103 }
104 
105 static char *
106 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
107     zprop_source_t *src)
108 {
109 	nvlist_t *nv, *nvl;
110 	uint64_t ival;
111 	char *value;
112 	zprop_source_t source;
113 
114 	nvl = zhp->zpool_props;
115 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
116 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
117 		source = ival;
118 		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
119 	} else {
120 		source = ZPROP_SRC_DEFAULT;
121 		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
122 			value = "-";
123 	}
124 
125 	if (src)
126 		*src = source;
127 
128 	return (value);
129 }
130 
131 uint64_t
132 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
133 {
134 	nvlist_t *nv, *nvl;
135 	uint64_t value;
136 	zprop_source_t source;
137 
138 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
139 		return (zpool_prop_default_numeric(prop));
140 
141 	nvl = zhp->zpool_props;
142 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
143 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
144 		source = value;
145 		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
146 	} else {
147 		source = ZPROP_SRC_DEFAULT;
148 		value = zpool_prop_default_numeric(prop);
149 	}
150 
151 	if (src)
152 		*src = source;
153 
154 	return (value);
155 }
156 
157 /*
158  * Map VDEV STATE to printed strings.
159  */
160 char *
161 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
162 {
163 	switch (state) {
164 	case VDEV_STATE_CLOSED:
165 	case VDEV_STATE_OFFLINE:
166 		return (gettext("OFFLINE"));
167 	case VDEV_STATE_REMOVED:
168 		return (gettext("REMOVED"));
169 	case VDEV_STATE_CANT_OPEN:
170 		if (aux == VDEV_AUX_CORRUPT_DATA)
171 			return (gettext("FAULTED"));
172 		else
173 			return (gettext("UNAVAIL"));
174 	case VDEV_STATE_FAULTED:
175 		return (gettext("FAULTED"));
176 	case VDEV_STATE_DEGRADED:
177 		return (gettext("DEGRADED"));
178 	case VDEV_STATE_HEALTHY:
179 		return (gettext("ONLINE"));
180 	}
181 
182 	return (gettext("UNKNOWN"));
183 }
184 
185 /*
186  * Get a zpool property value for 'prop' and return the value in
187  * a pre-allocated buffer.
188  */
189 int
190 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
191     zprop_source_t *srctype)
192 {
193 	uint64_t intval;
194 	const char *strval;
195 	zprop_source_t src = ZPROP_SRC_NONE;
196 	nvlist_t *nvroot;
197 	vdev_stat_t *vs;
198 	uint_t vsc;
199 
200 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
201 		if (prop == ZPOOL_PROP_NAME)
202 			(void) strlcpy(buf, zpool_get_name(zhp), len);
203 		else if (prop == ZPOOL_PROP_HEALTH)
204 			(void) strlcpy(buf, "FAULTED", len);
205 		else
206 			(void) strlcpy(buf, "-", len);
207 		return (0);
208 	}
209 
210 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
211 	    prop != ZPOOL_PROP_NAME)
212 		return (-1);
213 
214 	switch (zpool_prop_get_type(prop)) {
215 	case PROP_TYPE_STRING:
216 		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
217 		    len);
218 		break;
219 
220 	case PROP_TYPE_NUMBER:
221 		intval = zpool_get_prop_int(zhp, prop, &src);
222 
223 		switch (prop) {
224 		case ZPOOL_PROP_SIZE:
225 		case ZPOOL_PROP_USED:
226 		case ZPOOL_PROP_AVAILABLE:
227 			(void) zfs_nicenum(intval, buf, len);
228 			break;
229 
230 		case ZPOOL_PROP_CAPACITY:
231 			(void) snprintf(buf, len, "%llu%%",
232 			    (u_longlong_t)intval);
233 			break;
234 
235 		case ZPOOL_PROP_HEALTH:
236 			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
237 			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
238 			verify(nvlist_lookup_uint64_array(nvroot,
239 			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
240 
241 			(void) strlcpy(buf, zpool_state_to_name(intval,
242 			    vs->vs_aux), len);
243 			break;
244 		default:
245 			(void) snprintf(buf, len, "%llu", intval);
246 		}
247 		break;
248 
249 	case PROP_TYPE_INDEX:
250 		intval = zpool_get_prop_int(zhp, prop, &src);
251 		if (zpool_prop_index_to_string(prop, intval, &strval)
252 		    != 0)
253 			return (-1);
254 		(void) strlcpy(buf, strval, len);
255 		break;
256 
257 	default:
258 		abort();
259 	}
260 
261 	if (srctype)
262 		*srctype = src;
263 
264 	return (0);
265 }
266 
267 /*
268  * Check if the bootfs name has the same pool name as it is set to.
269  * Assuming bootfs is a valid dataset name.
270  */
271 static boolean_t
272 bootfs_name_valid(const char *pool, char *bootfs)
273 {
274 	int len = strlen(pool);
275 
276 	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM))
277 		return (B_FALSE);
278 
279 	if (strncmp(pool, bootfs, len) == 0 &&
280 	    (bootfs[len] == '/' || bootfs[len] == '\0'))
281 		return (B_TRUE);
282 
283 	return (B_FALSE);
284 }
285 
286 /*
287  * Given an nvlist of zpool properties to be set, validate that they are
288  * correct, and parse any numeric properties (index, boolean, etc) if they are
289  * specified as strings.
290  */
291 static nvlist_t *
292 zpool_validate_properties(libzfs_handle_t *hdl, const char *poolname,
293     nvlist_t *props, uint64_t version, boolean_t create_or_import, char *errbuf)
294 {
295 	nvpair_t *elem;
296 	nvlist_t *retprops;
297 	zpool_prop_t prop;
298 	char *strval;
299 	uint64_t intval;
300 	char *slash;
301 	struct stat64 statbuf;
302 
303 	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
304 		(void) no_memory(hdl);
305 		return (NULL);
306 	}
307 
308 	elem = NULL;
309 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
310 		const char *propname = nvpair_name(elem);
311 
312 		/*
313 		 * Make sure this property is valid and applies to this type.
314 		 */
315 		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
316 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
317 			    "invalid property '%s'"), propname);
318 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
319 			goto error;
320 		}
321 
322 		if (zpool_prop_readonly(prop)) {
323 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
324 			    "is readonly"), propname);
325 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
326 			goto error;
327 		}
328 
329 		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
330 		    &strval, &intval, errbuf) != 0)
331 			goto error;
332 
333 		/*
334 		 * Perform additional checking for specific properties.
335 		 */
336 		switch (prop) {
337 		case ZPOOL_PROP_VERSION:
338 			if (intval < version || intval > SPA_VERSION) {
339 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
340 				    "property '%s' number %d is invalid."),
341 				    propname, intval);
342 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
343 				goto error;
344 			}
345 			break;
346 
347 		case ZPOOL_PROP_BOOTFS:
348 			if (create_or_import) {
349 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
350 				    "property '%s' cannot be set at creation "
351 				    "or import time"), propname);
352 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
353 				goto error;
354 			}
355 
356 			if (version < SPA_VERSION_BOOTFS) {
357 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
358 				    "pool must be upgraded to support "
359 				    "'%s' property"), propname);
360 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
361 				goto error;
362 			}
363 
364 			/*
365 			 * bootfs property value has to be a dataset name and
366 			 * the dataset has to be in the same pool as it sets to.
367 			 */
368 			if (strval[0] != '\0' && !bootfs_name_valid(poolname,
369 			    strval)) {
370 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
371 				    "is an invalid name"), strval);
372 				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
373 				goto error;
374 			}
375 			break;
376 
377 		case ZPOOL_PROP_ALTROOT:
378 			if (!create_or_import) {
379 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
380 				    "property '%s' can only be set during pool "
381 				    "creation or import"), propname);
382 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
383 				goto error;
384 			}
385 
386 			if (strval[0] != '/') {
387 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
388 				    "bad alternate root '%s'"), strval);
389 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
390 				goto error;
391 			}
392 			break;
393 
394 		case ZPOOL_PROP_CACHEFILE:
395 			if (strval[0] == '\0')
396 				break;
397 
398 			if (strcmp(strval, "none") == 0)
399 				break;
400 
401 			if (strval[0] != '/') {
402 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
403 				    "property '%s' must be empty, an "
404 				    "absolute path, or 'none'"), propname);
405 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
406 				goto error;
407 			}
408 
409 			slash = strrchr(strval, '/');
410 
411 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
412 			    strcmp(slash, "/..") == 0) {
413 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
414 				    "'%s' is not a valid file"), strval);
415 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
416 				goto error;
417 			}
418 
419 			*slash = '\0';
420 
421 			if (strval[0] != '\0' &&
422 			    (stat64(strval, &statbuf) != 0 ||
423 			    !S_ISDIR(statbuf.st_mode))) {
424 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
425 				    "'%s' is not a valid directory"),
426 				    strval);
427 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
428 				goto error;
429 			}
430 
431 			*slash = '/';
432 			break;
433 		}
434 	}
435 
436 	return (retprops);
437 error:
438 	nvlist_free(retprops);
439 	return (NULL);
440 }
441 
442 /*
443  * Set zpool property : propname=propval.
444  */
445 int
446 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
447 {
448 	zfs_cmd_t zc = { 0 };
449 	int ret = -1;
450 	char errbuf[1024];
451 	nvlist_t *nvl = NULL;
452 	nvlist_t *realprops;
453 	uint64_t version;
454 
455 	(void) snprintf(errbuf, sizeof (errbuf),
456 	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
457 	    zhp->zpool_name);
458 
459 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
460 		return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, errbuf));
461 
462 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
463 		return (no_memory(zhp->zpool_hdl));
464 
465 	if (nvlist_add_string(nvl, propname, propval) != 0) {
466 		nvlist_free(nvl);
467 		return (no_memory(zhp->zpool_hdl));
468 	}
469 
470 	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
471 	if ((realprops = zpool_validate_properties(zhp->zpool_hdl,
472 	    zhp->zpool_name, nvl, version, B_FALSE, errbuf)) == NULL) {
473 		nvlist_free(nvl);
474 		return (-1);
475 	}
476 
477 	nvlist_free(nvl);
478 	nvl = realprops;
479 
480 	/*
481 	 * Execute the corresponding ioctl() to set this property.
482 	 */
483 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
484 
485 	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
486 		nvlist_free(nvl);
487 		return (-1);
488 	}
489 
490 	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
491 
492 	zcmd_free_nvlists(&zc);
493 	nvlist_free(nvl);
494 
495 	if (ret)
496 		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
497 	else
498 		(void) zpool_props_refresh(zhp);
499 
500 	return (ret);
501 }
502 
503 int
504 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
505 {
506 	libzfs_handle_t *hdl = zhp->zpool_hdl;
507 	zprop_list_t *entry;
508 	char buf[ZFS_MAXPROPLEN];
509 
510 	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
511 		return (-1);
512 
513 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
514 
515 		if (entry->pl_fixed)
516 			continue;
517 
518 		if (entry->pl_prop != ZPROP_INVAL &&
519 		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
520 		    NULL) == 0) {
521 			if (strlen(buf) > entry->pl_width)
522 				entry->pl_width = strlen(buf);
523 		}
524 	}
525 
526 	return (0);
527 }
528 
529 
530 /*
531  * Validate the given pool name, optionally putting an extended error message in
532  * 'buf'.
533  */
534 static boolean_t
535 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
536 {
537 	namecheck_err_t why;
538 	char what;
539 	int ret;
540 
541 	ret = pool_namecheck(pool, &why, &what);
542 
543 	/*
544 	 * The rules for reserved pool names were extended at a later point.
545 	 * But we need to support users with existing pools that may now be
546 	 * invalid.  So we only check for this expanded set of names during a
547 	 * create (or import), and only in userland.
548 	 */
549 	if (ret == 0 && !isopen &&
550 	    (strncmp(pool, "mirror", 6) == 0 ||
551 	    strncmp(pool, "raidz", 5) == 0 ||
552 	    strncmp(pool, "spare", 5) == 0 ||
553 	    strcmp(pool, "log") == 0)) {
554 		zfs_error_aux(hdl,
555 		    dgettext(TEXT_DOMAIN, "name is reserved"));
556 		return (B_FALSE);
557 	}
558 
559 
560 	if (ret != 0) {
561 		if (hdl != NULL) {
562 			switch (why) {
563 			case NAME_ERR_TOOLONG:
564 				zfs_error_aux(hdl,
565 				    dgettext(TEXT_DOMAIN, "name is too long"));
566 				break;
567 
568 			case NAME_ERR_INVALCHAR:
569 				zfs_error_aux(hdl,
570 				    dgettext(TEXT_DOMAIN, "invalid character "
571 				    "'%c' in pool name"), what);
572 				break;
573 
574 			case NAME_ERR_NOLETTER:
575 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
576 				    "name must begin with a letter"));
577 				break;
578 
579 			case NAME_ERR_RESERVED:
580 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
581 				    "name is reserved"));
582 				break;
583 
584 			case NAME_ERR_DISKLIKE:
585 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
586 				    "pool name is reserved"));
587 				break;
588 
589 			case NAME_ERR_LEADING_SLASH:
590 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
591 				    "leading slash in name"));
592 				break;
593 
594 			case NAME_ERR_EMPTY_COMPONENT:
595 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
596 				    "empty component in name"));
597 				break;
598 
599 			case NAME_ERR_TRAILING_SLASH:
600 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
601 				    "trailing slash in name"));
602 				break;
603 
604 			case NAME_ERR_MULTIPLE_AT:
605 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
606 				    "multiple '@' delimiters in name"));
607 				break;
608 
609 			}
610 		}
611 		return (B_FALSE);
612 	}
613 
614 	return (B_TRUE);
615 }
616 
617 /*
618  * Open a handle to the given pool, even if the pool is currently in the FAULTED
619  * state.
620  */
621 zpool_handle_t *
622 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
623 {
624 	zpool_handle_t *zhp;
625 	boolean_t missing;
626 
627 	/*
628 	 * Make sure the pool name is valid.
629 	 */
630 	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
631 		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
632 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
633 		    pool);
634 		return (NULL);
635 	}
636 
637 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
638 		return (NULL);
639 
640 	zhp->zpool_hdl = hdl;
641 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
642 
643 	if (zpool_refresh_stats(zhp, &missing) != 0) {
644 		zpool_close(zhp);
645 		return (NULL);
646 	}
647 
648 	if (missing) {
649 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
650 		(void) zfs_error_fmt(hdl, EZFS_NOENT,
651 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
652 		zpool_close(zhp);
653 		return (NULL);
654 	}
655 
656 	return (zhp);
657 }
658 
659 /*
660  * Like the above, but silent on error.  Used when iterating over pools (because
661  * the configuration cache may be out of date).
662  */
663 int
664 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
665 {
666 	zpool_handle_t *zhp;
667 	boolean_t missing;
668 
669 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
670 		return (-1);
671 
672 	zhp->zpool_hdl = hdl;
673 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
674 
675 	if (zpool_refresh_stats(zhp, &missing) != 0) {
676 		zpool_close(zhp);
677 		return (-1);
678 	}
679 
680 	if (missing) {
681 		zpool_close(zhp);
682 		*ret = NULL;
683 		return (0);
684 	}
685 
686 	*ret = zhp;
687 	return (0);
688 }
689 
690 /*
691  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
692  * state.
693  */
694 zpool_handle_t *
695 zpool_open(libzfs_handle_t *hdl, const char *pool)
696 {
697 	zpool_handle_t *zhp;
698 
699 	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
700 		return (NULL);
701 
702 	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
703 		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
704 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
705 		zpool_close(zhp);
706 		return (NULL);
707 	}
708 
709 	return (zhp);
710 }
711 
712 /*
713  * Close the handle.  Simply frees the memory associated with the handle.
714  */
715 void
716 zpool_close(zpool_handle_t *zhp)
717 {
718 	if (zhp->zpool_config)
719 		nvlist_free(zhp->zpool_config);
720 	if (zhp->zpool_old_config)
721 		nvlist_free(zhp->zpool_old_config);
722 	if (zhp->zpool_props)
723 		nvlist_free(zhp->zpool_props);
724 	free(zhp);
725 }
726 
727 /*
728  * Return the name of the pool.
729  */
730 const char *
731 zpool_get_name(zpool_handle_t *zhp)
732 {
733 	return (zhp->zpool_name);
734 }
735 
736 
737 /*
738  * Return the state of the pool (ACTIVE or UNAVAILABLE)
739  */
740 int
741 zpool_get_state(zpool_handle_t *zhp)
742 {
743 	return (zhp->zpool_state);
744 }
745 
746 /*
747  * Create the named pool, using the provided vdev list.  It is assumed
748  * that the consumer has already validated the contents of the nvlist, so we
749  * don't have to worry about error semantics.
750  */
751 int
752 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
753     nvlist_t *props)
754 {
755 	zfs_cmd_t zc = { 0 };
756 	char msg[1024];
757 	char *altroot;
758 
759 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
760 	    "cannot create '%s'"), pool);
761 
762 	if (!zpool_name_valid(hdl, B_FALSE, pool))
763 		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
764 
765 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
766 		return (-1);
767 
768 	if (props && (props = zpool_validate_properties(hdl, pool, props,
769 	    SPA_VERSION_1, B_TRUE, msg)) == NULL)
770 		return (-1);
771 
772 	if (props && zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
773 		nvlist_free(props);
774 		return (-1);
775 	}
776 
777 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
778 
779 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc) != 0) {
780 
781 		zcmd_free_nvlists(&zc);
782 		nvlist_free(props);
783 
784 		switch (errno) {
785 		case EBUSY:
786 			/*
787 			 * This can happen if the user has specified the same
788 			 * device multiple times.  We can't reliably detect this
789 			 * until we try to add it and see we already have a
790 			 * label.
791 			 */
792 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
793 			    "one or more vdevs refer to the same device"));
794 			return (zfs_error(hdl, EZFS_BADDEV, msg));
795 
796 		case EOVERFLOW:
797 			/*
798 			 * This occurs when one of the devices is below
799 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
800 			 * device was the problem device since there's no
801 			 * reliable way to determine device size from userland.
802 			 */
803 			{
804 				char buf[64];
805 
806 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
807 
808 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
809 				    "one or more devices is less than the "
810 				    "minimum size (%s)"), buf);
811 			}
812 			return (zfs_error(hdl, EZFS_BADDEV, msg));
813 
814 		case ENOSPC:
815 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
816 			    "one or more devices is out of space"));
817 			return (zfs_error(hdl, EZFS_BADDEV, msg));
818 
819 		case ENOTBLK:
820 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
821 			    "cache device must be a disk or disk slice"));
822 			return (zfs_error(hdl, EZFS_BADDEV, msg));
823 
824 		default:
825 			return (zpool_standard_error(hdl, errno, msg));
826 		}
827 	}
828 
829 	/*
830 	 * If this is an alternate root pool, then we automatically set the
831 	 * mountpoint of the root dataset to be '/'.
832 	 */
833 	if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
834 	    &altroot) == 0) {
835 		zfs_handle_t *zhp;
836 
837 		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
838 		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
839 		    "/") == 0);
840 
841 		zfs_close(zhp);
842 	}
843 
844 	zcmd_free_nvlists(&zc);
845 	nvlist_free(props);
846 	return (0);
847 }
848 
849 /*
850  * Destroy the given pool.  It is up to the caller to ensure that there are no
851  * datasets left in the pool.
852  */
853 int
854 zpool_destroy(zpool_handle_t *zhp)
855 {
856 	zfs_cmd_t zc = { 0 };
857 	zfs_handle_t *zfp = NULL;
858 	libzfs_handle_t *hdl = zhp->zpool_hdl;
859 	char msg[1024];
860 
861 	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
862 	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
863 	    ZFS_TYPE_FILESYSTEM)) == NULL)
864 		return (-1);
865 
866 	if (zpool_remove_zvol_links(zhp) != 0)
867 		return (-1);
868 
869 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
870 
871 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
872 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
873 		    "cannot destroy '%s'"), zhp->zpool_name);
874 
875 		if (errno == EROFS) {
876 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
877 			    "one or more devices is read only"));
878 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
879 		} else {
880 			(void) zpool_standard_error(hdl, errno, msg);
881 		}
882 
883 		if (zfp)
884 			zfs_close(zfp);
885 		return (-1);
886 	}
887 
888 	if (zfp) {
889 		remove_mountpoint(zfp);
890 		zfs_close(zfp);
891 	}
892 
893 	return (0);
894 }
895 
896 /*
897  * Add the given vdevs to the pool.  The caller must have already performed the
898  * necessary verification to ensure that the vdev specification is well-formed.
899  */
900 int
901 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
902 {
903 	zfs_cmd_t zc = { 0 };
904 	int ret;
905 	libzfs_handle_t *hdl = zhp->zpool_hdl;
906 	char msg[1024];
907 	nvlist_t **spares, **l2cache;
908 	uint_t nspares, nl2cache;
909 
910 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
911 	    "cannot add to '%s'"), zhp->zpool_name);
912 
913 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
914 	    SPA_VERSION_SPARES &&
915 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
916 	    &spares, &nspares) == 0) {
917 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
918 		    "upgraded to add hot spares"));
919 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
920 	}
921 
922 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
923 	    SPA_VERSION_L2CACHE &&
924 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
925 	    &l2cache, &nl2cache) == 0) {
926 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
927 		    "upgraded to add cache devices"));
928 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
929 	}
930 
931 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
932 		return (-1);
933 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
934 
935 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
936 		switch (errno) {
937 		case EBUSY:
938 			/*
939 			 * This can happen if the user has specified the same
940 			 * device multiple times.  We can't reliably detect this
941 			 * until we try to add it and see we already have a
942 			 * label.
943 			 */
944 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
945 			    "one or more vdevs refer to the same device"));
946 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
947 			break;
948 
949 		case EOVERFLOW:
950 			/*
951 			 * This occurrs when one of the devices is below
952 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
953 			 * device was the problem device since there's no
954 			 * reliable way to determine device size from userland.
955 			 */
956 			{
957 				char buf[64];
958 
959 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
960 
961 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
962 				    "device is less than the minimum "
963 				    "size (%s)"), buf);
964 			}
965 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
966 			break;
967 
968 		case ENOTSUP:
969 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
970 			    "pool must be upgraded to add these vdevs"));
971 			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
972 			break;
973 
974 		case EDOM:
975 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
976 			    "root pool can not have multiple vdevs"
977 			    " or separate logs"));
978 			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
979 			break;
980 
981 		case ENOTBLK:
982 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
983 			    "cache device must be a disk or disk slice"));
984 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
985 			break;
986 
987 		default:
988 			(void) zpool_standard_error(hdl, errno, msg);
989 		}
990 
991 		ret = -1;
992 	} else {
993 		ret = 0;
994 	}
995 
996 	zcmd_free_nvlists(&zc);
997 
998 	return (ret);
999 }
1000 
1001 /*
1002  * Exports the pool from the system.  The caller must ensure that there are no
1003  * mounted datasets in the pool.
1004  */
1005 int
1006 zpool_export(zpool_handle_t *zhp)
1007 {
1008 	zfs_cmd_t zc = { 0 };
1009 
1010 	if (zpool_remove_zvol_links(zhp) != 0)
1011 		return (-1);
1012 
1013 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1014 
1015 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0)
1016 		return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1017 		    dgettext(TEXT_DOMAIN, "cannot export '%s'"),
1018 		    zhp->zpool_name));
1019 	return (0);
1020 }
1021 
1022 /*
1023  * zpool_import() is a contracted interface. Should be kept the same
1024  * if possible.
1025  *
1026  * Applications should use zpool_import_props() to import a pool with
1027  * new properties value to be set.
1028  */
1029 int
1030 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1031     char *altroot)
1032 {
1033 	nvlist_t *props = NULL;
1034 	int ret;
1035 
1036 	if (altroot != NULL) {
1037 		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1038 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1039 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1040 			    newname));
1041 		}
1042 
1043 		if (nvlist_add_string(props,
1044 		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0) {
1045 			nvlist_free(props);
1046 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1047 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1048 			    newname));
1049 		}
1050 	}
1051 
1052 	ret = zpool_import_props(hdl, config, newname, props);
1053 	if (props)
1054 		nvlist_free(props);
1055 	return (ret);
1056 }
1057 
1058 /*
1059  * Import the given pool using the known configuration and a list of
1060  * properties to be set. The configuration should have come from
1061  * zpool_find_import(). The 'newname' parameters control whether the pool
1062  * is imported with a different name.
1063  */
1064 int
1065 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1066     nvlist_t *props)
1067 {
1068 	zfs_cmd_t zc = { 0 };
1069 	char *thename;
1070 	char *origname;
1071 	int ret;
1072 	char errbuf[1024];
1073 
1074 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1075 	    &origname) == 0);
1076 
1077 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1078 	    "cannot import pool '%s'"), origname);
1079 
1080 	if (newname != NULL) {
1081 		if (!zpool_name_valid(hdl, B_FALSE, newname))
1082 			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1083 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1084 			    newname));
1085 		thename = (char *)newname;
1086 	} else {
1087 		thename = origname;
1088 	}
1089 
1090 	if (props) {
1091 		uint64_t version;
1092 
1093 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1094 		    &version) == 0);
1095 
1096 		if ((props = zpool_validate_properties(hdl, origname,
1097 		    props, version, B_TRUE, errbuf)) == NULL) {
1098 			return (-1);
1099 		} else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1100 			nvlist_free(props);
1101 			return (-1);
1102 		}
1103 	}
1104 
1105 	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1106 
1107 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1108 	    &zc.zc_guid) == 0);
1109 
1110 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1111 		nvlist_free(props);
1112 		return (-1);
1113 	}
1114 
1115 	ret = 0;
1116 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
1117 		char desc[1024];
1118 		if (newname == NULL)
1119 			(void) snprintf(desc, sizeof (desc),
1120 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1121 			    thename);
1122 		else
1123 			(void) snprintf(desc, sizeof (desc),
1124 			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1125 			    origname, thename);
1126 
1127 		switch (errno) {
1128 		case ENOTSUP:
1129 			/*
1130 			 * Unsupported version.
1131 			 */
1132 			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
1133 			break;
1134 
1135 		case EINVAL:
1136 			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1137 			break;
1138 
1139 		default:
1140 			(void) zpool_standard_error(hdl, errno, desc);
1141 		}
1142 
1143 		ret = -1;
1144 	} else {
1145 		zpool_handle_t *zhp;
1146 
1147 		/*
1148 		 * This should never fail, but play it safe anyway.
1149 		 */
1150 		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
1151 			ret = -1;
1152 		} else if (zhp != NULL) {
1153 			ret = zpool_create_zvol_links(zhp);
1154 			zpool_close(zhp);
1155 		}
1156 
1157 	}
1158 
1159 	zcmd_free_nvlists(&zc);
1160 	nvlist_free(props);
1161 
1162 	return (ret);
1163 }
1164 
1165 /*
1166  * Scrub the pool.
1167  */
1168 int
1169 zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
1170 {
1171 	zfs_cmd_t zc = { 0 };
1172 	char msg[1024];
1173 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1174 
1175 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1176 	zc.zc_cookie = type;
1177 
1178 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0)
1179 		return (0);
1180 
1181 	(void) snprintf(msg, sizeof (msg),
1182 	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1183 
1184 	if (errno == EBUSY)
1185 		return (zfs_error(hdl, EZFS_RESILVERING, msg));
1186 	else
1187 		return (zpool_standard_error(hdl, errno, msg));
1188 }
1189 
1190 /*
1191  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1192  * spare; but FALSE if its an INUSE spare.
1193  */
1194 static nvlist_t *
1195 vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
1196     boolean_t *avail_spare, boolean_t *l2cache)
1197 {
1198 	uint_t c, children;
1199 	nvlist_t **child;
1200 	uint64_t theguid, present;
1201 	char *path;
1202 	uint64_t wholedisk = 0;
1203 	nvlist_t *ret;
1204 
1205 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
1206 
1207 	if (search == NULL &&
1208 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
1209 		/*
1210 		 * If the device has never been present since import, the only
1211 		 * reliable way to match the vdev is by GUID.
1212 		 */
1213 		if (theguid == guid)
1214 			return (nv);
1215 	} else if (search != NULL &&
1216 	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
1217 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1218 		    &wholedisk);
1219 		if (wholedisk) {
1220 			/*
1221 			 * For whole disks, the internal path has 's0', but the
1222 			 * path passed in by the user doesn't.
1223 			 */
1224 			if (strlen(search) == strlen(path) - 2 &&
1225 			    strncmp(search, path, strlen(search)) == 0)
1226 				return (nv);
1227 		} else if (strcmp(search, path) == 0) {
1228 			return (nv);
1229 		}
1230 	}
1231 
1232 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1233 	    &child, &children) != 0)
1234 		return (NULL);
1235 
1236 	for (c = 0; c < children; c++)
1237 		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1238 		    avail_spare, l2cache)) != NULL)
1239 			return (ret);
1240 
1241 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1242 	    &child, &children) == 0) {
1243 		for (c = 0; c < children; c++) {
1244 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1245 			    avail_spare, l2cache)) != NULL) {
1246 				*avail_spare = B_TRUE;
1247 				return (ret);
1248 			}
1249 		}
1250 	}
1251 
1252 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1253 	    &child, &children) == 0) {
1254 		for (c = 0; c < children; c++) {
1255 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1256 			    avail_spare, l2cache)) != NULL) {
1257 				*l2cache = B_TRUE;
1258 				return (ret);
1259 			}
1260 		}
1261 	}
1262 
1263 	return (NULL);
1264 }
1265 
1266 nvlist_t *
1267 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
1268     boolean_t *l2cache)
1269 {
1270 	char buf[MAXPATHLEN];
1271 	const char *search;
1272 	char *end;
1273 	nvlist_t *nvroot;
1274 	uint64_t guid;
1275 
1276 	guid = strtoull(path, &end, 10);
1277 	if (guid != 0 && *end == '\0') {
1278 		search = NULL;
1279 	} else if (path[0] != '/') {
1280 		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
1281 		search = buf;
1282 	} else {
1283 		search = path;
1284 	}
1285 
1286 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1287 	    &nvroot) == 0);
1288 
1289 	*avail_spare = B_FALSE;
1290 	*l2cache = B_FALSE;
1291 	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
1292 	    l2cache));
1293 }
1294 
1295 /*
1296  * Returns TRUE if the given guid corresponds to the given type.
1297  * This is used to check for hot spares (INUSE or not), and level 2 cache
1298  * devices.
1299  */
1300 static boolean_t
1301 is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
1302 {
1303 	uint64_t target_guid;
1304 	nvlist_t *nvroot;
1305 	nvlist_t **list;
1306 	uint_t count;
1307 	int i;
1308 
1309 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1310 	    &nvroot) == 0);
1311 	if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) {
1312 		for (i = 0; i < count; i++) {
1313 			verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID,
1314 			    &target_guid) == 0);
1315 			if (guid == target_guid)
1316 				return (B_TRUE);
1317 		}
1318 	}
1319 
1320 	return (B_FALSE);
1321 }
1322 
1323 /*
1324  * Bring the specified vdev online.   The 'flags' parameter is a set of the
1325  * ZFS_ONLINE_* flags.
1326  */
1327 int
1328 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
1329     vdev_state_t *newstate)
1330 {
1331 	zfs_cmd_t zc = { 0 };
1332 	char msg[1024];
1333 	nvlist_t *tgt;
1334 	boolean_t avail_spare, l2cache;
1335 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1336 
1337 	(void) snprintf(msg, sizeof (msg),
1338 	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
1339 
1340 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1341 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == NULL)
1342 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1343 
1344 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1345 
1346 	if (avail_spare ||
1347 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1348 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1349 
1350 	if (l2cache ||
1351 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_L2CACHE) == B_TRUE)
1352 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1353 
1354 	zc.zc_cookie = VDEV_STATE_ONLINE;
1355 	zc.zc_obj = flags;
1356 
1357 
1358 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0)
1359 		return (zpool_standard_error(hdl, errno, msg));
1360 
1361 	*newstate = zc.zc_cookie;
1362 	return (0);
1363 }
1364 
1365 /*
1366  * Take the specified vdev offline
1367  */
1368 int
1369 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
1370 {
1371 	zfs_cmd_t zc = { 0 };
1372 	char msg[1024];
1373 	nvlist_t *tgt;
1374 	boolean_t avail_spare, l2cache;
1375 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1376 
1377 	(void) snprintf(msg, sizeof (msg),
1378 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
1379 
1380 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1381 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == NULL)
1382 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1383 
1384 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1385 
1386 	if (avail_spare ||
1387 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1388 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1389 
1390 	if (l2cache ||
1391 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_L2CACHE) == B_TRUE)
1392 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1393 
1394 	zc.zc_cookie = VDEV_STATE_OFFLINE;
1395 	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
1396 
1397 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1398 		return (0);
1399 
1400 	switch (errno) {
1401 	case EBUSY:
1402 
1403 		/*
1404 		 * There are no other replicas of this device.
1405 		 */
1406 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1407 
1408 	default:
1409 		return (zpool_standard_error(hdl, errno, msg));
1410 	}
1411 }
1412 
1413 /*
1414  * Mark the given vdev faulted.
1415  */
1416 int
1417 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
1418 {
1419 	zfs_cmd_t zc = { 0 };
1420 	char msg[1024];
1421 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1422 
1423 	(void) snprintf(msg, sizeof (msg),
1424 	    dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid);
1425 
1426 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1427 	zc.zc_guid = guid;
1428 	zc.zc_cookie = VDEV_STATE_FAULTED;
1429 
1430 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1431 		return (0);
1432 
1433 	switch (errno) {
1434 	case EBUSY:
1435 
1436 		/*
1437 		 * There are no other replicas of this device.
1438 		 */
1439 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1440 
1441 	default:
1442 		return (zpool_standard_error(hdl, errno, msg));
1443 	}
1444 
1445 }
1446 
1447 /*
1448  * Mark the given vdev degraded.
1449  */
1450 int
1451 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
1452 {
1453 	zfs_cmd_t zc = { 0 };
1454 	char msg[1024];
1455 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1456 
1457 	(void) snprintf(msg, sizeof (msg),
1458 	    dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid);
1459 
1460 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1461 	zc.zc_guid = guid;
1462 	zc.zc_cookie = VDEV_STATE_DEGRADED;
1463 
1464 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1465 		return (0);
1466 
1467 	return (zpool_standard_error(hdl, errno, msg));
1468 }
1469 
1470 /*
1471  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
1472  * a hot spare.
1473  */
1474 static boolean_t
1475 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
1476 {
1477 	nvlist_t **child;
1478 	uint_t c, children;
1479 	char *type;
1480 
1481 	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
1482 	    &children) == 0) {
1483 		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
1484 		    &type) == 0);
1485 
1486 		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
1487 		    children == 2 && child[which] == tgt)
1488 			return (B_TRUE);
1489 
1490 		for (c = 0; c < children; c++)
1491 			if (is_replacing_spare(child[c], tgt, which))
1492 				return (B_TRUE);
1493 	}
1494 
1495 	return (B_FALSE);
1496 }
1497 
1498 /*
1499  * Attach new_disk (fully described by nvroot) to old_disk.
1500  * If 'replacing' is specified, the new disk will replace the old one.
1501  */
1502 int
1503 zpool_vdev_attach(zpool_handle_t *zhp,
1504     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
1505 {
1506 	zfs_cmd_t zc = { 0 };
1507 	char msg[1024];
1508 	int ret;
1509 	nvlist_t *tgt;
1510 	boolean_t avail_spare, l2cache;
1511 	uint64_t val, is_log;
1512 	char *path;
1513 	nvlist_t **child;
1514 	uint_t children;
1515 	nvlist_t *config_root;
1516 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1517 
1518 	if (replacing)
1519 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1520 		    "cannot replace %s with %s"), old_disk, new_disk);
1521 	else
1522 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1523 		    "cannot attach %s to %s"), new_disk, old_disk);
1524 
1525 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1526 	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache)) == 0)
1527 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1528 
1529 	if (avail_spare)
1530 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1531 
1532 	if (l2cache)
1533 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1534 
1535 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1536 	zc.zc_cookie = replacing;
1537 
1538 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1539 	    &child, &children) != 0 || children != 1) {
1540 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1541 		    "new device must be a single disk"));
1542 		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
1543 	}
1544 
1545 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
1546 	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
1547 
1548 	/*
1549 	 * If the target is a hot spare that has been swapped in, we can only
1550 	 * replace it with another hot spare.
1551 	 */
1552 	if (replacing &&
1553 	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
1554 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1555 	    (zpool_find_vdev(zhp, path, &avail_spare, &l2cache) == NULL ||
1556 	    !avail_spare) && is_replacing_spare(config_root, tgt, 1)) {
1557 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1558 		    "can only be replaced by another hot spare"));
1559 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1560 	}
1561 
1562 	/*
1563 	 * If we are attempting to replace a spare, it canot be applied to an
1564 	 * already spared device.
1565 	 */
1566 	if (replacing &&
1567 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1568 	    zpool_find_vdev(zhp, path, &avail_spare, &l2cache) != NULL &&
1569 	    avail_spare && is_replacing_spare(config_root, tgt, 0)) {
1570 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1571 		    "device has already been replaced with a spare"));
1572 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1573 	}
1574 
1575 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1576 		return (-1);
1577 
1578 	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ATTACH, &zc);
1579 
1580 	zcmd_free_nvlists(&zc);
1581 
1582 	if (ret == 0)
1583 		return (0);
1584 
1585 	switch (errno) {
1586 	case ENOTSUP:
1587 		/*
1588 		 * Can't attach to or replace this type of vdev.
1589 		 */
1590 		if (replacing) {
1591 			is_log = B_FALSE;
1592 			(void) nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_LOG,
1593 			    &is_log);
1594 			if (is_log)
1595 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1596 				    "cannot replace a log with a spare"));
1597 			else
1598 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1599 				    "cannot replace a replacing device"));
1600 		} else {
1601 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1602 			    "can only attach to mirrors and top-level "
1603 			    "disks"));
1604 		}
1605 		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
1606 		break;
1607 
1608 	case EINVAL:
1609 		/*
1610 		 * The new device must be a single disk.
1611 		 */
1612 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1613 		    "new device must be a single disk"));
1614 		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
1615 		break;
1616 
1617 	case EBUSY:
1618 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
1619 		    new_disk);
1620 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1621 		break;
1622 
1623 	case EOVERFLOW:
1624 		/*
1625 		 * The new device is too small.
1626 		 */
1627 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1628 		    "device is too small"));
1629 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1630 		break;
1631 
1632 	case EDOM:
1633 		/*
1634 		 * The new device has a different alignment requirement.
1635 		 */
1636 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1637 		    "devices have different sector alignment"));
1638 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1639 		break;
1640 
1641 	case ENAMETOOLONG:
1642 		/*
1643 		 * The resulting top-level vdev spec won't fit in the label.
1644 		 */
1645 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
1646 		break;
1647 
1648 	default:
1649 		(void) zpool_standard_error(hdl, errno, msg);
1650 	}
1651 
1652 	return (-1);
1653 }
1654 
1655 /*
1656  * Detach the specified device.
1657  */
1658 int
1659 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
1660 {
1661 	zfs_cmd_t zc = { 0 };
1662 	char msg[1024];
1663 	nvlist_t *tgt;
1664 	boolean_t avail_spare, l2cache;
1665 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1666 
1667 	(void) snprintf(msg, sizeof (msg),
1668 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
1669 
1670 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1671 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == 0)
1672 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1673 
1674 	if (avail_spare)
1675 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1676 
1677 	if (l2cache)
1678 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1679 
1680 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1681 
1682 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
1683 		return (0);
1684 
1685 	switch (errno) {
1686 
1687 	case ENOTSUP:
1688 		/*
1689 		 * Can't detach from this type of vdev.
1690 		 */
1691 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
1692 		    "applicable to mirror and replacing vdevs"));
1693 		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
1694 		break;
1695 
1696 	case EBUSY:
1697 		/*
1698 		 * There are no other replicas of this device.
1699 		 */
1700 		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
1701 		break;
1702 
1703 	default:
1704 		(void) zpool_standard_error(hdl, errno, msg);
1705 	}
1706 
1707 	return (-1);
1708 }
1709 
1710 /*
1711  * Remove the given device.  Currently, this is supported only for hot spares
1712  * and level 2 cache devices.
1713  */
1714 int
1715 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
1716 {
1717 	zfs_cmd_t zc = { 0 };
1718 	char msg[1024];
1719 	nvlist_t *tgt;
1720 	boolean_t avail_spare, l2cache;
1721 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1722 
1723 	(void) snprintf(msg, sizeof (msg),
1724 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
1725 
1726 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1727 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache)) == 0)
1728 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1729 
1730 	if (!avail_spare && !l2cache) {
1731 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1732 		    "only inactive hot spares or cache devices "
1733 		    "can be removed"));
1734 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1735 	}
1736 
1737 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1738 
1739 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
1740 		return (0);
1741 
1742 	return (zpool_standard_error(hdl, errno, msg));
1743 }
1744 
1745 /*
1746  * Clear the errors for the pool, or the particular device if specified.
1747  */
1748 int
1749 zpool_clear(zpool_handle_t *zhp, const char *path)
1750 {
1751 	zfs_cmd_t zc = { 0 };
1752 	char msg[1024];
1753 	nvlist_t *tgt;
1754 	boolean_t avail_spare, l2cache;
1755 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1756 
1757 	if (path)
1758 		(void) snprintf(msg, sizeof (msg),
1759 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1760 		    path);
1761 	else
1762 		(void) snprintf(msg, sizeof (msg),
1763 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1764 		    zhp->zpool_name);
1765 
1766 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1767 	if (path) {
1768 		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
1769 		    &l2cache)) == 0)
1770 			return (zfs_error(hdl, EZFS_NODEVICE, msg));
1771 
1772 		/*
1773 		 * Don't allow error clearing for hot spares.  Do allow
1774 		 * error clearing for l2cache devices.
1775 		 */
1776 		if (avail_spare)
1777 			return (zfs_error(hdl, EZFS_ISSPARE, msg));
1778 
1779 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
1780 		    &zc.zc_guid) == 0);
1781 	}
1782 
1783 	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
1784 		return (0);
1785 
1786 	return (zpool_standard_error(hdl, errno, msg));
1787 }
1788 
1789 /*
1790  * Similar to zpool_clear(), but takes a GUID (used by fmd).
1791  */
1792 int
1793 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
1794 {
1795 	zfs_cmd_t zc = { 0 };
1796 	char msg[1024];
1797 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1798 
1799 	(void) snprintf(msg, sizeof (msg),
1800 	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
1801 	    guid);
1802 
1803 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1804 	zc.zc_guid = guid;
1805 
1806 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
1807 		return (0);
1808 
1809 	return (zpool_standard_error(hdl, errno, msg));
1810 }
1811 
1812 /*
1813  * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
1814  * hierarchy.
1815  */
1816 int
1817 zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
1818     void *data)
1819 {
1820 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1821 	char (*paths)[MAXPATHLEN];
1822 	size_t size = 4;
1823 	int curr, fd, base, ret = 0;
1824 	DIR *dirp;
1825 	struct dirent *dp;
1826 	struct stat st;
1827 
1828 	if ((base = open("/dev/zvol/dsk", O_RDONLY)) < 0)
1829 		return (errno == ENOENT ? 0 : -1);
1830 
1831 	if (fstatat(base, zhp->zpool_name, &st, 0) != 0) {
1832 		int err = errno;
1833 		(void) close(base);
1834 		return (err == ENOENT ? 0 : -1);
1835 	}
1836 
1837 	/*
1838 	 * Oddly this wasn't a directory -- ignore that failure since we
1839 	 * know there are no links lower in the (non-existant) hierarchy.
1840 	 */
1841 	if (!S_ISDIR(st.st_mode)) {
1842 		(void) close(base);
1843 		return (0);
1844 	}
1845 
1846 	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
1847 		(void) close(base);
1848 		return (-1);
1849 	}
1850 
1851 	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
1852 	curr = 0;
1853 
1854 	while (curr >= 0) {
1855 		if (fstatat(base, paths[curr], &st, AT_SYMLINK_NOFOLLOW) != 0)
1856 			goto err;
1857 
1858 		if (S_ISDIR(st.st_mode)) {
1859 			if ((fd = openat(base, paths[curr], O_RDONLY)) < 0)
1860 				goto err;
1861 
1862 			if ((dirp = fdopendir(fd)) == NULL) {
1863 				(void) close(fd);
1864 				goto err;
1865 			}
1866 
1867 			while ((dp = readdir(dirp)) != NULL) {
1868 				if (dp->d_name[0] == '.')
1869 					continue;
1870 
1871 				if (curr + 1 == size) {
1872 					paths = zfs_realloc(hdl, paths,
1873 					    size * sizeof (paths[0]),
1874 					    size * 2 * sizeof (paths[0]));
1875 					if (paths == NULL) {
1876 						(void) closedir(dirp);
1877 						(void) close(fd);
1878 						goto err;
1879 					}
1880 
1881 					size *= 2;
1882 				}
1883 
1884 				(void) strlcpy(paths[curr + 1], paths[curr],
1885 				    sizeof (paths[curr + 1]));
1886 				(void) strlcat(paths[curr], "/",
1887 				    sizeof (paths[curr]));
1888 				(void) strlcat(paths[curr], dp->d_name,
1889 				    sizeof (paths[curr]));
1890 				curr++;
1891 			}
1892 
1893 			(void) closedir(dirp);
1894 
1895 		} else {
1896 			if ((ret = cb(paths[curr], data)) != 0)
1897 				break;
1898 		}
1899 
1900 		curr--;
1901 	}
1902 
1903 	free(paths);
1904 	(void) close(base);
1905 
1906 	return (ret);
1907 
1908 err:
1909 	free(paths);
1910 	(void) close(base);
1911 	return (-1);
1912 }
1913 
1914 typedef struct zvol_cb {
1915 	zpool_handle_t *zcb_pool;
1916 	boolean_t zcb_create;
1917 } zvol_cb_t;
1918 
1919 /*ARGSUSED*/
1920 static int
1921 do_zvol_create(zfs_handle_t *zhp, void *data)
1922 {
1923 	int ret = 0;
1924 
1925 	if (ZFS_IS_VOLUME(zhp)) {
1926 		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
1927 		ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL);
1928 	}
1929 
1930 	if (ret == 0)
1931 		ret = zfs_iter_filesystems(zhp, do_zvol_create, NULL);
1932 
1933 	zfs_close(zhp);
1934 
1935 	return (ret);
1936 }
1937 
1938 /*
1939  * Iterate over all zvols in the pool and make any necessary minor nodes.
1940  */
1941 int
1942 zpool_create_zvol_links(zpool_handle_t *zhp)
1943 {
1944 	zfs_handle_t *zfp;
1945 	int ret;
1946 
1947 	/*
1948 	 * If the pool is unavailable, just return success.
1949 	 */
1950 	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
1951 	    zhp->zpool_name)) == NULL)
1952 		return (0);
1953 
1954 	ret = zfs_iter_filesystems(zfp, do_zvol_create, NULL);
1955 
1956 	zfs_close(zfp);
1957 	return (ret);
1958 }
1959 
1960 static int
1961 do_zvol_remove(const char *dataset, void *data)
1962 {
1963 	zpool_handle_t *zhp = data;
1964 
1965 	return (zvol_remove_link(zhp->zpool_hdl, dataset));
1966 }
1967 
1968 /*
1969  * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
1970  * by examining the /dev links so that a corrupted pool doesn't impede this
1971  * operation.
1972  */
1973 int
1974 zpool_remove_zvol_links(zpool_handle_t *zhp)
1975 {
1976 	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
1977 }
1978 
1979 /*
1980  * Convert from a devid string to a path.
1981  */
1982 static char *
1983 devid_to_path(char *devid_str)
1984 {
1985 	ddi_devid_t devid;
1986 	char *minor;
1987 	char *path;
1988 	devid_nmlist_t *list = NULL;
1989 	int ret;
1990 
1991 	if (devid_str_decode(devid_str, &devid, &minor) != 0)
1992 		return (NULL);
1993 
1994 	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
1995 
1996 	devid_str_free(minor);
1997 	devid_free(devid);
1998 
1999 	if (ret != 0)
2000 		return (NULL);
2001 
2002 	if ((path = strdup(list[0].devname)) == NULL)
2003 		return (NULL);
2004 
2005 	devid_free_nmlist(list);
2006 
2007 	return (path);
2008 }
2009 
2010 /*
2011  * Convert from a path to a devid string.
2012  */
2013 static char *
2014 path_to_devid(const char *path)
2015 {
2016 	int fd;
2017 	ddi_devid_t devid;
2018 	char *minor, *ret;
2019 
2020 	if ((fd = open(path, O_RDONLY)) < 0)
2021 		return (NULL);
2022 
2023 	minor = NULL;
2024 	ret = NULL;
2025 	if (devid_get(fd, &devid) == 0) {
2026 		if (devid_get_minor_name(fd, &minor) == 0)
2027 			ret = devid_str_encode(devid, minor);
2028 		if (minor != NULL)
2029 			devid_str_free(minor);
2030 		devid_free(devid);
2031 	}
2032 	(void) close(fd);
2033 
2034 	return (ret);
2035 }
2036 
2037 /*
2038  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
2039  * ignore any failure here, since a common case is for an unprivileged user to
2040  * type 'zpool status', and we'll display the correct information anyway.
2041  */
2042 static void
2043 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
2044 {
2045 	zfs_cmd_t zc = { 0 };
2046 
2047 	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2048 	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
2049 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2050 	    &zc.zc_guid) == 0);
2051 
2052 	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
2053 }
2054 
2055 /*
2056  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
2057  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
2058  * We also check if this is a whole disk, in which case we strip off the
2059  * trailing 's0' slice name.
2060  *
2061  * This routine is also responsible for identifying when disks have been
2062  * reconfigured in a new location.  The kernel will have opened the device by
2063  * devid, but the path will still refer to the old location.  To catch this, we
2064  * first do a path -> devid translation (which is fast for the common case).  If
2065  * the devid matches, we're done.  If not, we do a reverse devid -> path
2066  * translation and issue the appropriate ioctl() to update the path of the vdev.
2067  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
2068  * of these checks.
2069  */
2070 char *
2071 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
2072 {
2073 	char *path, *devid;
2074 	uint64_t value;
2075 	char buf[64];
2076 	vdev_stat_t *vs;
2077 	uint_t vsc;
2078 
2079 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
2080 	    &value) == 0) {
2081 		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2082 		    &value) == 0);
2083 		(void) snprintf(buf, sizeof (buf), "%llu",
2084 		    (u_longlong_t)value);
2085 		path = buf;
2086 	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
2087 
2088 		/*
2089 		 * If the device is dead (faulted, offline, etc) then don't
2090 		 * bother opening it.  Otherwise we may be forcing the user to
2091 		 * open a misbehaving device, which can have undesirable
2092 		 * effects.
2093 		 */
2094 		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
2095 		    (uint64_t **)&vs, &vsc) != 0 ||
2096 		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
2097 		    zhp != NULL &&
2098 		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
2099 			/*
2100 			 * Determine if the current path is correct.
2101 			 */
2102 			char *newdevid = path_to_devid(path);
2103 
2104 			if (newdevid == NULL ||
2105 			    strcmp(devid, newdevid) != 0) {
2106 				char *newpath;
2107 
2108 				if ((newpath = devid_to_path(devid)) != NULL) {
2109 					/*
2110 					 * Update the path appropriately.
2111 					 */
2112 					set_path(zhp, nv, newpath);
2113 					if (nvlist_add_string(nv,
2114 					    ZPOOL_CONFIG_PATH, newpath) == 0)
2115 						verify(nvlist_lookup_string(nv,
2116 						    ZPOOL_CONFIG_PATH,
2117 						    &path) == 0);
2118 					free(newpath);
2119 				}
2120 			}
2121 
2122 			if (newdevid)
2123 				devid_str_free(newdevid);
2124 		}
2125 
2126 		if (strncmp(path, "/dev/dsk/", 9) == 0)
2127 			path += 9;
2128 
2129 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2130 		    &value) == 0 && value) {
2131 			char *tmp = zfs_strdup(hdl, path);
2132 			if (tmp == NULL)
2133 				return (NULL);
2134 			tmp[strlen(path) - 2] = '\0';
2135 			return (tmp);
2136 		}
2137 	} else {
2138 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
2139 
2140 		/*
2141 		 * If it's a raidz device, we need to stick in the parity level.
2142 		 */
2143 		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
2144 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
2145 			    &value) == 0);
2146 			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
2147 			    (u_longlong_t)value);
2148 			path = buf;
2149 		}
2150 	}
2151 
2152 	return (zfs_strdup(hdl, path));
2153 }
2154 
2155 static int
2156 zbookmark_compare(const void *a, const void *b)
2157 {
2158 	return (memcmp(a, b, sizeof (zbookmark_t)));
2159 }
2160 
2161 /*
2162  * Retrieve the persistent error log, uniquify the members, and return to the
2163  * caller.
2164  */
2165 int
2166 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
2167 {
2168 	zfs_cmd_t zc = { 0 };
2169 	uint64_t count;
2170 	zbookmark_t *zb = NULL;
2171 	int i;
2172 
2173 	/*
2174 	 * Retrieve the raw error list from the kernel.  If the number of errors
2175 	 * has increased, allocate more space and continue until we get the
2176 	 * entire list.
2177 	 */
2178 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
2179 	    &count) == 0);
2180 	if (count == 0)
2181 		return (0);
2182 	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
2183 	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
2184 		return (-1);
2185 	zc.zc_nvlist_dst_size = count;
2186 	(void) strcpy(zc.zc_name, zhp->zpool_name);
2187 	for (;;) {
2188 		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
2189 		    &zc) != 0) {
2190 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
2191 			if (errno == ENOMEM) {
2192 				count = zc.zc_nvlist_dst_size;
2193 				if ((zc.zc_nvlist_dst = (uintptr_t)
2194 				    zfs_alloc(zhp->zpool_hdl, count *
2195 				    sizeof (zbookmark_t))) == (uintptr_t)NULL)
2196 					return (-1);
2197 			} else {
2198 				return (-1);
2199 			}
2200 		} else {
2201 			break;
2202 		}
2203 	}
2204 
2205 	/*
2206 	 * Sort the resulting bookmarks.  This is a little confusing due to the
2207 	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
2208 	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
2209 	 * _not_ copied as part of the process.  So we point the start of our
2210 	 * array appropriate and decrement the total number of elements.
2211 	 */
2212 	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
2213 	    zc.zc_nvlist_dst_size;
2214 	count -= zc.zc_nvlist_dst_size;
2215 
2216 	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
2217 
2218 	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
2219 
2220 	/*
2221 	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
2222 	 */
2223 	for (i = 0; i < count; i++) {
2224 		nvlist_t *nv;
2225 
2226 		/* ignoring zb_blkid and zb_level for now */
2227 		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
2228 		    zb[i-1].zb_object == zb[i].zb_object)
2229 			continue;
2230 
2231 		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
2232 			goto nomem;
2233 		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
2234 		    zb[i].zb_objset) != 0) {
2235 			nvlist_free(nv);
2236 			goto nomem;
2237 		}
2238 		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
2239 		    zb[i].zb_object) != 0) {
2240 			nvlist_free(nv);
2241 			goto nomem;
2242 		}
2243 		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
2244 			nvlist_free(nv);
2245 			goto nomem;
2246 		}
2247 		nvlist_free(nv);
2248 	}
2249 
2250 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
2251 	return (0);
2252 
2253 nomem:
2254 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
2255 	return (no_memory(zhp->zpool_hdl));
2256 }
2257 
2258 /*
2259  * Upgrade a ZFS pool to the latest on-disk version.
2260  */
2261 int
2262 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
2263 {
2264 	zfs_cmd_t zc = { 0 };
2265 	libzfs_handle_t *hdl = zhp->zpool_hdl;
2266 
2267 	(void) strcpy(zc.zc_name, zhp->zpool_name);
2268 	zc.zc_cookie = new_version;
2269 
2270 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
2271 		return (zpool_standard_error_fmt(hdl, errno,
2272 		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
2273 		    zhp->zpool_name));
2274 	return (0);
2275 }
2276 
2277 void
2278 zpool_set_history_str(const char *subcommand, int argc, char **argv,
2279     char *history_str)
2280 {
2281 	int i;
2282 
2283 	(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
2284 	for (i = 1; i < argc; i++) {
2285 		if (strlen(history_str) + 1 + strlen(argv[i]) >
2286 		    HIS_MAX_RECORD_LEN)
2287 			break;
2288 		(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
2289 		(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
2290 	}
2291 }
2292 
2293 /*
2294  * Stage command history for logging.
2295  */
2296 int
2297 zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
2298 {
2299 	if (history_str == NULL)
2300 		return (EINVAL);
2301 
2302 	if (strlen(history_str) > HIS_MAX_RECORD_LEN)
2303 		return (EINVAL);
2304 
2305 	if (hdl->libzfs_log_str != NULL)
2306 		free(hdl->libzfs_log_str);
2307 
2308 	if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
2309 		return (no_memory(hdl));
2310 
2311 	return (0);
2312 }
2313 
2314 /*
2315  * Perform ioctl to get some command history of a pool.
2316  *
2317  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
2318  * logical offset of the history buffer to start reading from.
2319  *
2320  * Upon return, 'off' is the next logical offset to read from and
2321  * 'len' is the actual amount of bytes read into 'buf'.
2322  */
2323 static int
2324 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
2325 {
2326 	zfs_cmd_t zc = { 0 };
2327 	libzfs_handle_t *hdl = zhp->zpool_hdl;
2328 
2329 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2330 
2331 	zc.zc_history = (uint64_t)(uintptr_t)buf;
2332 	zc.zc_history_len = *len;
2333 	zc.zc_history_offset = *off;
2334 
2335 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
2336 		switch (errno) {
2337 		case EPERM:
2338 			return (zfs_error_fmt(hdl, EZFS_PERM,
2339 			    dgettext(TEXT_DOMAIN,
2340 			    "cannot show history for pool '%s'"),
2341 			    zhp->zpool_name));
2342 		case ENOENT:
2343 			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
2344 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
2345 			    "'%s'"), zhp->zpool_name));
2346 		case ENOTSUP:
2347 			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
2348 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
2349 			    "'%s', pool must be upgraded"), zhp->zpool_name));
2350 		default:
2351 			return (zpool_standard_error_fmt(hdl, errno,
2352 			    dgettext(TEXT_DOMAIN,
2353 			    "cannot get history for '%s'"), zhp->zpool_name));
2354 		}
2355 	}
2356 
2357 	*len = zc.zc_history_len;
2358 	*off = zc.zc_history_offset;
2359 
2360 	return (0);
2361 }
2362 
2363 /*
2364  * Process the buffer of nvlists, unpacking and storing each nvlist record
2365  * into 'records'.  'leftover' is set to the number of bytes that weren't
2366  * processed as there wasn't a complete record.
2367  */
2368 static int
2369 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
2370     nvlist_t ***records, uint_t *numrecords)
2371 {
2372 	uint64_t reclen;
2373 	nvlist_t *nv;
2374 	int i;
2375 
2376 	while (bytes_read > sizeof (reclen)) {
2377 
2378 		/* get length of packed record (stored as little endian) */
2379 		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
2380 			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
2381 
2382 		if (bytes_read < sizeof (reclen) + reclen)
2383 			break;
2384 
2385 		/* unpack record */
2386 		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
2387 			return (ENOMEM);
2388 		bytes_read -= sizeof (reclen) + reclen;
2389 		buf += sizeof (reclen) + reclen;
2390 
2391 		/* add record to nvlist array */
2392 		(*numrecords)++;
2393 		if (ISP2(*numrecords + 1)) {
2394 			*records = realloc(*records,
2395 			    *numrecords * 2 * sizeof (nvlist_t *));
2396 		}
2397 		(*records)[*numrecords - 1] = nv;
2398 	}
2399 
2400 	*leftover = bytes_read;
2401 	return (0);
2402 }
2403 
2404 #define	HIS_BUF_LEN	(128*1024)
2405 
2406 /*
2407  * Retrieve the command history of a pool.
2408  */
2409 int
2410 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
2411 {
2412 	char buf[HIS_BUF_LEN];
2413 	uint64_t off = 0;
2414 	nvlist_t **records = NULL;
2415 	uint_t numrecords = 0;
2416 	int err, i;
2417 
2418 	do {
2419 		uint64_t bytes_read = sizeof (buf);
2420 		uint64_t leftover;
2421 
2422 		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
2423 			break;
2424 
2425 		/* if nothing else was read in, we're at EOF, just return */
2426 		if (!bytes_read)
2427 			break;
2428 
2429 		if ((err = zpool_history_unpack(buf, bytes_read,
2430 		    &leftover, &records, &numrecords)) != 0)
2431 			break;
2432 		off -= leftover;
2433 
2434 		/* CONSTCOND */
2435 	} while (1);
2436 
2437 	if (!err) {
2438 		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
2439 		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
2440 		    records, numrecords) == 0);
2441 	}
2442 	for (i = 0; i < numrecords; i++)
2443 		nvlist_free(records[i]);
2444 	free(records);
2445 
2446 	return (err);
2447 }
2448 
2449 void
2450 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
2451     char *pathname, size_t len)
2452 {
2453 	zfs_cmd_t zc = { 0 };
2454 	boolean_t mounted = B_FALSE;
2455 	char *mntpnt = NULL;
2456 	char dsname[MAXNAMELEN];
2457 
2458 	if (dsobj == 0) {
2459 		/* special case for the MOS */
2460 		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
2461 		return;
2462 	}
2463 
2464 	/* get the dataset's name */
2465 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2466 	zc.zc_obj = dsobj;
2467 	if (ioctl(zhp->zpool_hdl->libzfs_fd,
2468 	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
2469 		/* just write out a path of two object numbers */
2470 		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
2471 		    dsobj, obj);
2472 		return;
2473 	}
2474 	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
2475 
2476 	/* find out if the dataset is mounted */
2477 	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
2478 
2479 	/* get the corrupted object's path */
2480 	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
2481 	zc.zc_obj = obj;
2482 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
2483 	    &zc) == 0) {
2484 		if (mounted) {
2485 			(void) snprintf(pathname, len, "%s%s", mntpnt,
2486 			    zc.zc_value);
2487 		} else {
2488 			(void) snprintf(pathname, len, "%s:%s",
2489 			    dsname, zc.zc_value);
2490 		}
2491 	} else {
2492 		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
2493 	}
2494 	free(mntpnt);
2495 }
2496 
2497 #define	RDISK_ROOT	"/dev/rdsk"
2498 #define	BACKUP_SLICE	"s2"
2499 /*
2500  * Don't start the slice at the default block of 34; many storage
2501  * devices will use a stripe width of 128k, so start there instead.
2502  */
2503 #define	NEW_START_BLOCK	256
2504 
2505 /*
2506  * determine where a partition starts on a disk in the current
2507  * configuration
2508  */
2509 static diskaddr_t
2510 find_start_block(nvlist_t *config)
2511 {
2512 	nvlist_t **child;
2513 	uint_t c, children;
2514 	char *path;
2515 	diskaddr_t sb = MAXOFFSET_T;
2516 	int fd;
2517 	char diskname[MAXPATHLEN];
2518 	uint64_t wholedisk;
2519 
2520 	if (nvlist_lookup_nvlist_array(config,
2521 	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
2522 		if (nvlist_lookup_uint64(config,
2523 		    ZPOOL_CONFIG_WHOLE_DISK,
2524 		    &wholedisk) != 0 || !wholedisk) {
2525 			return (MAXOFFSET_T);
2526 		}
2527 		if (nvlist_lookup_string(config,
2528 		    ZPOOL_CONFIG_PATH, &path) != 0) {
2529 			return (MAXOFFSET_T);
2530 		}
2531 
2532 		(void) snprintf(diskname, sizeof (diskname), "%s%s",
2533 		    RDISK_ROOT, strrchr(path, '/'));
2534 		if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
2535 			struct dk_gpt *vtoc;
2536 			if (efi_alloc_and_read(fd, &vtoc) >= 0) {
2537 				sb = vtoc->efi_parts[0].p_start;
2538 				efi_free(vtoc);
2539 			}
2540 			(void) close(fd);
2541 		}
2542 		return (sb);
2543 	}
2544 
2545 	for (c = 0; c < children; c++) {
2546 		sb = find_start_block(child[c]);
2547 		if (sb != MAXOFFSET_T) {
2548 			return (sb);
2549 		}
2550 	}
2551 	return (MAXOFFSET_T);
2552 }
2553 
2554 /*
2555  * Label an individual disk.  The name provided is the short name,
2556  * stripped of any leading /dev path.
2557  */
2558 int
2559 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
2560 {
2561 	char path[MAXPATHLEN];
2562 	struct dk_gpt *vtoc;
2563 	int fd;
2564 	size_t resv = EFI_MIN_RESV_SIZE;
2565 	uint64_t slice_size;
2566 	diskaddr_t start_block;
2567 	char errbuf[1024];
2568 
2569 	/* prepare an error message just in case */
2570 	(void) snprintf(errbuf, sizeof (errbuf),
2571 	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
2572 
2573 	if (zhp) {
2574 		nvlist_t *nvroot;
2575 
2576 		verify(nvlist_lookup_nvlist(zhp->zpool_config,
2577 		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2578 
2579 		if (zhp->zpool_start_block == 0)
2580 			start_block = find_start_block(nvroot);
2581 		else
2582 			start_block = zhp->zpool_start_block;
2583 		zhp->zpool_start_block = start_block;
2584 	} else {
2585 		/* new pool */
2586 		start_block = NEW_START_BLOCK;
2587 	}
2588 
2589 	(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
2590 	    BACKUP_SLICE);
2591 
2592 	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
2593 		/*
2594 		 * This shouldn't happen.  We've long since verified that this
2595 		 * is a valid device.
2596 		 */
2597 		zfs_error_aux(hdl,
2598 		    dgettext(TEXT_DOMAIN, "unable to open device"));
2599 		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2600 	}
2601 
2602 	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
2603 		/*
2604 		 * The only way this can fail is if we run out of memory, or we
2605 		 * were unable to read the disk's capacity
2606 		 */
2607 		if (errno == ENOMEM)
2608 			(void) no_memory(hdl);
2609 
2610 		(void) close(fd);
2611 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2612 		    "unable to read disk capacity"), name);
2613 
2614 		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2615 	}
2616 
2617 	slice_size = vtoc->efi_last_u_lba + 1;
2618 	slice_size -= EFI_MIN_RESV_SIZE;
2619 	if (start_block == MAXOFFSET_T)
2620 		start_block = NEW_START_BLOCK;
2621 	slice_size -= start_block;
2622 
2623 	vtoc->efi_parts[0].p_start = start_block;
2624 	vtoc->efi_parts[0].p_size = slice_size;
2625 
2626 	/*
2627 	 * Why we use V_USR: V_BACKUP confuses users, and is considered
2628 	 * disposable by some EFI utilities (since EFI doesn't have a backup
2629 	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
2630 	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
2631 	 * etc. were all pretty specific.  V_USR is as close to reality as we
2632 	 * can get, in the absence of V_OTHER.
2633 	 */
2634 	vtoc->efi_parts[0].p_tag = V_USR;
2635 	(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
2636 
2637 	vtoc->efi_parts[8].p_start = slice_size + start_block;
2638 	vtoc->efi_parts[8].p_size = resv;
2639 	vtoc->efi_parts[8].p_tag = V_RESERVED;
2640 
2641 	if (efi_write(fd, vtoc) != 0) {
2642 		/*
2643 		 * Some block drivers (like pcata) may not support EFI
2644 		 * GPT labels.  Print out a helpful error message dir-
2645 		 * ecting the user to manually label the disk and give
2646 		 * a specific slice.
2647 		 */
2648 		(void) close(fd);
2649 		efi_free(vtoc);
2650 
2651 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2652 		    "try using fdisk(1M) and then provide a specific slice"));
2653 		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
2654 	}
2655 
2656 	(void) close(fd);
2657 	efi_free(vtoc);
2658 	return (0);
2659 }
2660