xref: /titanic_50/usr/src/lib/libzfs/common/libzfs_pool.c (revision 508aff1a85ed04f187fd074799bcaefd630490f1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <alloca.h>
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <devid.h>
32 #include <dirent.h>
33 #include <fcntl.h>
34 #include <libintl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <unistd.h>
39 #include <zone.h>
40 #include <sys/efi_partition.h>
41 #include <sys/vtoc.h>
42 #include <sys/zfs_ioctl.h>
43 #include <sys/zio.h>
44 #include <strings.h>
45 
46 #include "zfs_namecheck.h"
47 #include "zfs_prop.h"
48 #include "libzfs_impl.h"
49 
50 static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
51 
52 /*
53  * ====================================================================
54  *   zpool property functions
55  * ====================================================================
56  */
57 
58 static int
59 zpool_get_all_props(zpool_handle_t *zhp)
60 {
61 	zfs_cmd_t zc = { 0 };
62 	libzfs_handle_t *hdl = zhp->zpool_hdl;
63 
64 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
65 
66 	if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
67 		return (-1);
68 
69 	while (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_PROPS, &zc) != 0) {
70 		if (errno == ENOMEM) {
71 			if (zcmd_expand_dst_nvlist(hdl, &zc) != 0) {
72 				zcmd_free_nvlists(&zc);
73 				return (-1);
74 			}
75 		} else {
76 			zcmd_free_nvlists(&zc);
77 			return (-1);
78 		}
79 	}
80 
81 	if (zcmd_read_dst_nvlist(hdl, &zc, &zhp->zpool_props) != 0) {
82 		zcmd_free_nvlists(&zc);
83 		return (-1);
84 	}
85 
86 	zcmd_free_nvlists(&zc);
87 
88 	return (0);
89 }
90 
91 static int
92 zpool_props_refresh(zpool_handle_t *zhp)
93 {
94 	nvlist_t *old_props;
95 
96 	old_props = zhp->zpool_props;
97 
98 	if (zpool_get_all_props(zhp) != 0)
99 		return (-1);
100 
101 	nvlist_free(old_props);
102 	return (0);
103 }
104 
105 static char *
106 zpool_get_prop_string(zpool_handle_t *zhp, zpool_prop_t prop,
107     zprop_source_t *src)
108 {
109 	nvlist_t *nv, *nvl;
110 	uint64_t ival;
111 	char *value;
112 	zprop_source_t source;
113 
114 	nvl = zhp->zpool_props;
115 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
116 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &ival) == 0);
117 		source = ival;
118 		verify(nvlist_lookup_string(nv, ZPROP_VALUE, &value) == 0);
119 	} else {
120 		source = ZPROP_SRC_DEFAULT;
121 		if ((value = (char *)zpool_prop_default_string(prop)) == NULL)
122 			value = "-";
123 	}
124 
125 	if (src)
126 		*src = source;
127 
128 	return (value);
129 }
130 
131 uint64_t
132 zpool_get_prop_int(zpool_handle_t *zhp, zpool_prop_t prop, zprop_source_t *src)
133 {
134 	nvlist_t *nv, *nvl;
135 	uint64_t value;
136 	zprop_source_t source;
137 
138 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp)) {
139 		/*
140 		 * zpool_get_all_props() has most likely failed because
141 		 * the pool is faulted, but if all we need is the top level
142 		 * vdev's guid then get it from the zhp config nvlist.
143 		 */
144 		if ((prop == ZPOOL_PROP_GUID) &&
145 		    (nvlist_lookup_nvlist(zhp->zpool_config,
146 		    ZPOOL_CONFIG_VDEV_TREE, &nv) == 0) &&
147 		    (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &value)
148 		    == 0)) {
149 			return (value);
150 		}
151 		return (zpool_prop_default_numeric(prop));
152 	}
153 
154 	nvl = zhp->zpool_props;
155 	if (nvlist_lookup_nvlist(nvl, zpool_prop_to_name(prop), &nv) == 0) {
156 		verify(nvlist_lookup_uint64(nv, ZPROP_SOURCE, &value) == 0);
157 		source = value;
158 		verify(nvlist_lookup_uint64(nv, ZPROP_VALUE, &value) == 0);
159 	} else {
160 		source = ZPROP_SRC_DEFAULT;
161 		value = zpool_prop_default_numeric(prop);
162 	}
163 
164 	if (src)
165 		*src = source;
166 
167 	return (value);
168 }
169 
170 /*
171  * Map VDEV STATE to printed strings.
172  */
173 char *
174 zpool_state_to_name(vdev_state_t state, vdev_aux_t aux)
175 {
176 	switch (state) {
177 	case VDEV_STATE_CLOSED:
178 	case VDEV_STATE_OFFLINE:
179 		return (gettext("OFFLINE"));
180 	case VDEV_STATE_REMOVED:
181 		return (gettext("REMOVED"));
182 	case VDEV_STATE_CANT_OPEN:
183 		if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
184 			return (gettext("FAULTED"));
185 		else
186 			return (gettext("UNAVAIL"));
187 	case VDEV_STATE_FAULTED:
188 		return (gettext("FAULTED"));
189 	case VDEV_STATE_DEGRADED:
190 		return (gettext("DEGRADED"));
191 	case VDEV_STATE_HEALTHY:
192 		return (gettext("ONLINE"));
193 	}
194 
195 	return (gettext("UNKNOWN"));
196 }
197 
198 /*
199  * Get a zpool property value for 'prop' and return the value in
200  * a pre-allocated buffer.
201  */
202 int
203 zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
204     zprop_source_t *srctype)
205 {
206 	uint64_t intval;
207 	const char *strval;
208 	zprop_source_t src = ZPROP_SRC_NONE;
209 	nvlist_t *nvroot;
210 	vdev_stat_t *vs;
211 	uint_t vsc;
212 
213 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
214 		if (prop == ZPOOL_PROP_NAME)
215 			(void) strlcpy(buf, zpool_get_name(zhp), len);
216 		else if (prop == ZPOOL_PROP_HEALTH)
217 			(void) strlcpy(buf, "FAULTED", len);
218 		else
219 			(void) strlcpy(buf, "-", len);
220 		return (0);
221 	}
222 
223 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp) &&
224 	    prop != ZPOOL_PROP_NAME)
225 		return (-1);
226 
227 	switch (zpool_prop_get_type(prop)) {
228 	case PROP_TYPE_STRING:
229 		(void) strlcpy(buf, zpool_get_prop_string(zhp, prop, &src),
230 		    len);
231 		break;
232 
233 	case PROP_TYPE_NUMBER:
234 		intval = zpool_get_prop_int(zhp, prop, &src);
235 
236 		switch (prop) {
237 		case ZPOOL_PROP_SIZE:
238 		case ZPOOL_PROP_USED:
239 		case ZPOOL_PROP_AVAILABLE:
240 			(void) zfs_nicenum(intval, buf, len);
241 			break;
242 
243 		case ZPOOL_PROP_CAPACITY:
244 			(void) snprintf(buf, len, "%llu%%",
245 			    (u_longlong_t)intval);
246 			break;
247 
248 		case ZPOOL_PROP_HEALTH:
249 			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
250 			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
251 			verify(nvlist_lookup_uint64_array(nvroot,
252 			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
253 
254 			(void) strlcpy(buf, zpool_state_to_name(intval,
255 			    vs->vs_aux), len);
256 			break;
257 		default:
258 			(void) snprintf(buf, len, "%llu", intval);
259 		}
260 		break;
261 
262 	case PROP_TYPE_INDEX:
263 		intval = zpool_get_prop_int(zhp, prop, &src);
264 		if (zpool_prop_index_to_string(prop, intval, &strval)
265 		    != 0)
266 			return (-1);
267 		(void) strlcpy(buf, strval, len);
268 		break;
269 
270 	default:
271 		abort();
272 	}
273 
274 	if (srctype)
275 		*srctype = src;
276 
277 	return (0);
278 }
279 
280 /*
281  * Check if the bootfs name has the same pool name as it is set to.
282  * Assuming bootfs is a valid dataset name.
283  */
284 static boolean_t
285 bootfs_name_valid(const char *pool, char *bootfs)
286 {
287 	int len = strlen(pool);
288 
289 	if (!zfs_name_valid(bootfs, ZFS_TYPE_FILESYSTEM|ZFS_TYPE_SNAPSHOT))
290 		return (B_FALSE);
291 
292 	if (strncmp(pool, bootfs, len) == 0 &&
293 	    (bootfs[len] == '/' || bootfs[len] == '\0'))
294 		return (B_TRUE);
295 
296 	return (B_FALSE);
297 }
298 
299 /*
300  * Inspect the configuration to determine if any of the devices contain
301  * an EFI label.
302  */
303 static boolean_t
304 pool_uses_efi(nvlist_t *config)
305 {
306 	nvlist_t **child;
307 	uint_t c, children;
308 
309 	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
310 	    &child, &children) != 0)
311 		return (read_efi_label(config, NULL) >= 0);
312 
313 	for (c = 0; c < children; c++) {
314 		if (pool_uses_efi(child[c]))
315 			return (B_TRUE);
316 	}
317 	return (B_FALSE);
318 }
319 
320 /*
321  * Given an nvlist of zpool properties to be set, validate that they are
322  * correct, and parse any numeric properties (index, boolean, etc) if they are
323  * specified as strings.
324  */
325 static nvlist_t *
326 zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
327     nvlist_t *props, uint64_t version, boolean_t create_or_import, char *errbuf)
328 {
329 	nvpair_t *elem;
330 	nvlist_t *retprops;
331 	zpool_prop_t prop;
332 	char *strval;
333 	uint64_t intval;
334 	char *slash;
335 	struct stat64 statbuf;
336 	zpool_handle_t *zhp;
337 	nvlist_t *nvroot;
338 
339 	if (nvlist_alloc(&retprops, NV_UNIQUE_NAME, 0) != 0) {
340 		(void) no_memory(hdl);
341 		return (NULL);
342 	}
343 
344 	elem = NULL;
345 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
346 		const char *propname = nvpair_name(elem);
347 
348 		/*
349 		 * Make sure this property is valid and applies to this type.
350 		 */
351 		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
352 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
353 			    "invalid property '%s'"), propname);
354 			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
355 			goto error;
356 		}
357 
358 		if (zpool_prop_readonly(prop)) {
359 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
360 			    "is readonly"), propname);
361 			(void) zfs_error(hdl, EZFS_PROPREADONLY, errbuf);
362 			goto error;
363 		}
364 
365 		if (zprop_parse_value(hdl, elem, prop, ZFS_TYPE_POOL, retprops,
366 		    &strval, &intval, errbuf) != 0)
367 			goto error;
368 
369 		/*
370 		 * Perform additional checking for specific properties.
371 		 */
372 		switch (prop) {
373 		case ZPOOL_PROP_VERSION:
374 			if (intval < version || intval > SPA_VERSION) {
375 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
376 				    "property '%s' number %d is invalid."),
377 				    propname, intval);
378 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
379 				goto error;
380 			}
381 			break;
382 
383 		case ZPOOL_PROP_BOOTFS:
384 			if (create_or_import) {
385 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
386 				    "property '%s' cannot be set at creation "
387 				    "or import time"), propname);
388 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
389 				goto error;
390 			}
391 
392 			if (version < SPA_VERSION_BOOTFS) {
393 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
394 				    "pool must be upgraded to support "
395 				    "'%s' property"), propname);
396 				(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
397 				goto error;
398 			}
399 
400 			/*
401 			 * bootfs property value has to be a dataset name and
402 			 * the dataset has to be in the same pool as it sets to.
403 			 */
404 			if (strval[0] != '\0' && !bootfs_name_valid(poolname,
405 			    strval)) {
406 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "'%s' "
407 				    "is an invalid name"), strval);
408 				(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
409 				goto error;
410 			}
411 
412 			if ((zhp = zpool_open_canfail(hdl, poolname)) == NULL) {
413 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
414 				    "could not open pool '%s'"), poolname);
415 				(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
416 				goto error;
417 			}
418 			verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
419 			    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
420 
421 			/*
422 			 * bootfs property cannot be set on a disk which has
423 			 * been EFI labeled.
424 			 */
425 			if (pool_uses_efi(nvroot)) {
426 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
427 				    "property '%s' not supported on "
428 				    "EFI labeled devices"), propname);
429 				(void) zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf);
430 				zpool_close(zhp);
431 				goto error;
432 			}
433 			zpool_close(zhp);
434 			break;
435 
436 		case ZPOOL_PROP_ALTROOT:
437 			if (!create_or_import) {
438 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
439 				    "property '%s' can only be set during pool "
440 				    "creation or import"), propname);
441 				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
442 				goto error;
443 			}
444 
445 			if (strval[0] != '/') {
446 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
447 				    "bad alternate root '%s'"), strval);
448 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
449 				goto error;
450 			}
451 			break;
452 
453 		case ZPOOL_PROP_CACHEFILE:
454 			if (strval[0] == '\0')
455 				break;
456 
457 			if (strcmp(strval, "none") == 0)
458 				break;
459 
460 			if (strval[0] != '/') {
461 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
462 				    "property '%s' must be empty, an "
463 				    "absolute path, or 'none'"), propname);
464 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
465 				goto error;
466 			}
467 
468 			slash = strrchr(strval, '/');
469 
470 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
471 			    strcmp(slash, "/..") == 0) {
472 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
473 				    "'%s' is not a valid file"), strval);
474 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
475 				goto error;
476 			}
477 
478 			*slash = '\0';
479 
480 			if (strval[0] != '\0' &&
481 			    (stat64(strval, &statbuf) != 0 ||
482 			    !S_ISDIR(statbuf.st_mode))) {
483 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
484 				    "'%s' is not a valid directory"),
485 				    strval);
486 				(void) zfs_error(hdl, EZFS_BADPATH, errbuf);
487 				goto error;
488 			}
489 
490 			*slash = '/';
491 			break;
492 		}
493 	}
494 
495 	return (retprops);
496 error:
497 	nvlist_free(retprops);
498 	return (NULL);
499 }
500 
501 /*
502  * Set zpool property : propname=propval.
503  */
504 int
505 zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
506 {
507 	zfs_cmd_t zc = { 0 };
508 	int ret = -1;
509 	char errbuf[1024];
510 	nvlist_t *nvl = NULL;
511 	nvlist_t *realprops;
512 	uint64_t version;
513 
514 	(void) snprintf(errbuf, sizeof (errbuf),
515 	    dgettext(TEXT_DOMAIN, "cannot set property for '%s'"),
516 	    zhp->zpool_name);
517 
518 	if (zhp->zpool_props == NULL && zpool_get_all_props(zhp))
519 		return (zfs_error(zhp->zpool_hdl, EZFS_POOLPROPS, errbuf));
520 
521 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
522 		return (no_memory(zhp->zpool_hdl));
523 
524 	if (nvlist_add_string(nvl, propname, propval) != 0) {
525 		nvlist_free(nvl);
526 		return (no_memory(zhp->zpool_hdl));
527 	}
528 
529 	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
530 	if ((realprops = zpool_valid_proplist(zhp->zpool_hdl,
531 	    zhp->zpool_name, nvl, version, B_FALSE, errbuf)) == NULL) {
532 		nvlist_free(nvl);
533 		return (-1);
534 	}
535 
536 	nvlist_free(nvl);
537 	nvl = realprops;
538 
539 	/*
540 	 * Execute the corresponding ioctl() to set this property.
541 	 */
542 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
543 
544 	if (zcmd_write_src_nvlist(zhp->zpool_hdl, &zc, nvl) != 0) {
545 		nvlist_free(nvl);
546 		return (-1);
547 	}
548 
549 	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SET_PROPS, &zc);
550 
551 	zcmd_free_nvlists(&zc);
552 	nvlist_free(nvl);
553 
554 	if (ret)
555 		(void) zpool_standard_error(zhp->zpool_hdl, errno, errbuf);
556 	else
557 		(void) zpool_props_refresh(zhp);
558 
559 	return (ret);
560 }
561 
562 int
563 zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
564 {
565 	libzfs_handle_t *hdl = zhp->zpool_hdl;
566 	zprop_list_t *entry;
567 	char buf[ZFS_MAXPROPLEN];
568 
569 	if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
570 		return (-1);
571 
572 	for (entry = *plp; entry != NULL; entry = entry->pl_next) {
573 
574 		if (entry->pl_fixed)
575 			continue;
576 
577 		if (entry->pl_prop != ZPROP_INVAL &&
578 		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
579 		    NULL) == 0) {
580 			if (strlen(buf) > entry->pl_width)
581 				entry->pl_width = strlen(buf);
582 		}
583 	}
584 
585 	return (0);
586 }
587 
588 
589 /*
590  * Validate the given pool name, optionally putting an extended error message in
591  * 'buf'.
592  */
593 boolean_t
594 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
595 {
596 	namecheck_err_t why;
597 	char what;
598 	int ret;
599 
600 	ret = pool_namecheck(pool, &why, &what);
601 
602 	/*
603 	 * The rules for reserved pool names were extended at a later point.
604 	 * But we need to support users with existing pools that may now be
605 	 * invalid.  So we only check for this expanded set of names during a
606 	 * create (or import), and only in userland.
607 	 */
608 	if (ret == 0 && !isopen &&
609 	    (strncmp(pool, "mirror", 6) == 0 ||
610 	    strncmp(pool, "raidz", 5) == 0 ||
611 	    strncmp(pool, "spare", 5) == 0 ||
612 	    strcmp(pool, "log") == 0)) {
613 		if (hdl != NULL)
614 			zfs_error_aux(hdl,
615 			    dgettext(TEXT_DOMAIN, "name is reserved"));
616 		return (B_FALSE);
617 	}
618 
619 
620 	if (ret != 0) {
621 		if (hdl != NULL) {
622 			switch (why) {
623 			case NAME_ERR_TOOLONG:
624 				zfs_error_aux(hdl,
625 				    dgettext(TEXT_DOMAIN, "name is too long"));
626 				break;
627 
628 			case NAME_ERR_INVALCHAR:
629 				zfs_error_aux(hdl,
630 				    dgettext(TEXT_DOMAIN, "invalid character "
631 				    "'%c' in pool name"), what);
632 				break;
633 
634 			case NAME_ERR_NOLETTER:
635 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
636 				    "name must begin with a letter"));
637 				break;
638 
639 			case NAME_ERR_RESERVED:
640 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
641 				    "name is reserved"));
642 				break;
643 
644 			case NAME_ERR_DISKLIKE:
645 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
646 				    "pool name is reserved"));
647 				break;
648 
649 			case NAME_ERR_LEADING_SLASH:
650 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
651 				    "leading slash in name"));
652 				break;
653 
654 			case NAME_ERR_EMPTY_COMPONENT:
655 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
656 				    "empty component in name"));
657 				break;
658 
659 			case NAME_ERR_TRAILING_SLASH:
660 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
661 				    "trailing slash in name"));
662 				break;
663 
664 			case NAME_ERR_MULTIPLE_AT:
665 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
666 				    "multiple '@' delimiters in name"));
667 				break;
668 
669 			}
670 		}
671 		return (B_FALSE);
672 	}
673 
674 	return (B_TRUE);
675 }
676 
677 /*
678  * Open a handle to the given pool, even if the pool is currently in the FAULTED
679  * state.
680  */
681 zpool_handle_t *
682 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
683 {
684 	zpool_handle_t *zhp;
685 	boolean_t missing;
686 
687 	/*
688 	 * Make sure the pool name is valid.
689 	 */
690 	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
691 		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
692 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
693 		    pool);
694 		return (NULL);
695 	}
696 
697 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
698 		return (NULL);
699 
700 	zhp->zpool_hdl = hdl;
701 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
702 
703 	if (zpool_refresh_stats(zhp, &missing) != 0) {
704 		zpool_close(zhp);
705 		return (NULL);
706 	}
707 
708 	if (missing) {
709 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "no such pool"));
710 		(void) zfs_error_fmt(hdl, EZFS_NOENT,
711 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), pool);
712 		zpool_close(zhp);
713 		return (NULL);
714 	}
715 
716 	return (zhp);
717 }
718 
719 /*
720  * Like the above, but silent on error.  Used when iterating over pools (because
721  * the configuration cache may be out of date).
722  */
723 int
724 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
725 {
726 	zpool_handle_t *zhp;
727 	boolean_t missing;
728 
729 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
730 		return (-1);
731 
732 	zhp->zpool_hdl = hdl;
733 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
734 
735 	if (zpool_refresh_stats(zhp, &missing) != 0) {
736 		zpool_close(zhp);
737 		return (-1);
738 	}
739 
740 	if (missing) {
741 		zpool_close(zhp);
742 		*ret = NULL;
743 		return (0);
744 	}
745 
746 	*ret = zhp;
747 	return (0);
748 }
749 
750 /*
751  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
752  * state.
753  */
754 zpool_handle_t *
755 zpool_open(libzfs_handle_t *hdl, const char *pool)
756 {
757 	zpool_handle_t *zhp;
758 
759 	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
760 		return (NULL);
761 
762 	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
763 		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
764 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
765 		zpool_close(zhp);
766 		return (NULL);
767 	}
768 
769 	return (zhp);
770 }
771 
772 /*
773  * Close the handle.  Simply frees the memory associated with the handle.
774  */
775 void
776 zpool_close(zpool_handle_t *zhp)
777 {
778 	if (zhp->zpool_config)
779 		nvlist_free(zhp->zpool_config);
780 	if (zhp->zpool_old_config)
781 		nvlist_free(zhp->zpool_old_config);
782 	if (zhp->zpool_props)
783 		nvlist_free(zhp->zpool_props);
784 	free(zhp);
785 }
786 
787 /*
788  * Return the name of the pool.
789  */
790 const char *
791 zpool_get_name(zpool_handle_t *zhp)
792 {
793 	return (zhp->zpool_name);
794 }
795 
796 
797 /*
798  * Return the state of the pool (ACTIVE or UNAVAILABLE)
799  */
800 int
801 zpool_get_state(zpool_handle_t *zhp)
802 {
803 	return (zhp->zpool_state);
804 }
805 
806 /*
807  * Create the named pool, using the provided vdev list.  It is assumed
808  * that the consumer has already validated the contents of the nvlist, so we
809  * don't have to worry about error semantics.
810  */
811 int
812 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
813     nvlist_t *props, nvlist_t *fsprops)
814 {
815 	zfs_cmd_t zc = { 0 };
816 	nvlist_t *zc_fsprops = NULL;
817 	nvlist_t *zc_props = NULL;
818 	char msg[1024];
819 	char *altroot;
820 	int ret = -1;
821 
822 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
823 	    "cannot create '%s'"), pool);
824 
825 	if (!zpool_name_valid(hdl, B_FALSE, pool))
826 		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
827 
828 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
829 		return (-1);
830 
831 	if (props) {
832 		if ((zc_props = zpool_valid_proplist(hdl, pool, props,
833 		    SPA_VERSION_1, B_TRUE, msg)) == NULL) {
834 			goto create_failed;
835 		}
836 	}
837 
838 	if (fsprops) {
839 		uint64_t zoned;
840 		char *zonestr;
841 
842 		zoned = ((nvlist_lookup_string(fsprops,
843 		    zfs_prop_to_name(ZFS_PROP_ZONED), &zonestr) == 0) &&
844 		    strcmp(zonestr, "on") == 0);
845 
846 		if ((zc_fsprops = zfs_valid_proplist(hdl,
847 		    ZFS_TYPE_FILESYSTEM, fsprops, zoned, NULL, msg)) == NULL) {
848 			goto create_failed;
849 		}
850 		if (!zc_props &&
851 		    (nvlist_alloc(&zc_props, NV_UNIQUE_NAME, 0) != 0)) {
852 			goto create_failed;
853 		}
854 		if (nvlist_add_nvlist(zc_props,
855 		    ZPOOL_ROOTFS_PROPS, zc_fsprops) != 0) {
856 			goto create_failed;
857 		}
858 	}
859 
860 	if (zc_props && zcmd_write_src_nvlist(hdl, &zc, zc_props) != 0)
861 		goto create_failed;
862 
863 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
864 
865 	if ((ret = zfs_ioctl(hdl, ZFS_IOC_POOL_CREATE, &zc)) != 0) {
866 
867 		zcmd_free_nvlists(&zc);
868 		nvlist_free(zc_props);
869 		nvlist_free(zc_fsprops);
870 
871 		switch (errno) {
872 		case EBUSY:
873 			/*
874 			 * This can happen if the user has specified the same
875 			 * device multiple times.  We can't reliably detect this
876 			 * until we try to add it and see we already have a
877 			 * label.
878 			 */
879 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
880 			    "one or more vdevs refer to the same device"));
881 			return (zfs_error(hdl, EZFS_BADDEV, msg));
882 
883 		case EOVERFLOW:
884 			/*
885 			 * This occurs when one of the devices is below
886 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
887 			 * device was the problem device since there's no
888 			 * reliable way to determine device size from userland.
889 			 */
890 			{
891 				char buf[64];
892 
893 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
894 
895 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
896 				    "one or more devices is less than the "
897 				    "minimum size (%s)"), buf);
898 			}
899 			return (zfs_error(hdl, EZFS_BADDEV, msg));
900 
901 		case ENOSPC:
902 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
903 			    "one or more devices is out of space"));
904 			return (zfs_error(hdl, EZFS_BADDEV, msg));
905 
906 		case ENOTBLK:
907 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
908 			    "cache device must be a disk or disk slice"));
909 			return (zfs_error(hdl, EZFS_BADDEV, msg));
910 
911 		default:
912 			return (zpool_standard_error(hdl, errno, msg));
913 		}
914 	}
915 
916 	/*
917 	 * If this is an alternate root pool, then we automatically set the
918 	 * mountpoint of the root dataset to be '/'.
919 	 */
920 	if (nvlist_lookup_string(props, zpool_prop_to_name(ZPOOL_PROP_ALTROOT),
921 	    &altroot) == 0) {
922 		zfs_handle_t *zhp;
923 
924 		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_DATASET)) != NULL);
925 		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
926 		    "/") == 0);
927 
928 		zfs_close(zhp);
929 	}
930 
931 create_failed:
932 	zcmd_free_nvlists(&zc);
933 	nvlist_free(zc_props);
934 	nvlist_free(zc_fsprops);
935 	return (ret);
936 }
937 
938 /*
939  * Destroy the given pool.  It is up to the caller to ensure that there are no
940  * datasets left in the pool.
941  */
942 int
943 zpool_destroy(zpool_handle_t *zhp)
944 {
945 	zfs_cmd_t zc = { 0 };
946 	zfs_handle_t *zfp = NULL;
947 	libzfs_handle_t *hdl = zhp->zpool_hdl;
948 	char msg[1024];
949 
950 	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
951 	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
952 	    ZFS_TYPE_FILESYSTEM)) == NULL)
953 		return (-1);
954 
955 	if (zpool_remove_zvol_links(zhp) != 0)
956 		return (-1);
957 
958 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
959 
960 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
961 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
962 		    "cannot destroy '%s'"), zhp->zpool_name);
963 
964 		if (errno == EROFS) {
965 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
966 			    "one or more devices is read only"));
967 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
968 		} else {
969 			(void) zpool_standard_error(hdl, errno, msg);
970 		}
971 
972 		if (zfp)
973 			zfs_close(zfp);
974 		return (-1);
975 	}
976 
977 	if (zfp) {
978 		remove_mountpoint(zfp);
979 		zfs_close(zfp);
980 	}
981 
982 	return (0);
983 }
984 
985 /*
986  * Add the given vdevs to the pool.  The caller must have already performed the
987  * necessary verification to ensure that the vdev specification is well-formed.
988  */
989 int
990 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
991 {
992 	zfs_cmd_t zc = { 0 };
993 	int ret;
994 	libzfs_handle_t *hdl = zhp->zpool_hdl;
995 	char msg[1024];
996 	nvlist_t **spares, **l2cache;
997 	uint_t nspares, nl2cache;
998 
999 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1000 	    "cannot add to '%s'"), zhp->zpool_name);
1001 
1002 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1003 	    SPA_VERSION_SPARES &&
1004 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
1005 	    &spares, &nspares) == 0) {
1006 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1007 		    "upgraded to add hot spares"));
1008 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1009 	}
1010 
1011 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
1012 	    SPA_VERSION_L2CACHE &&
1013 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
1014 	    &l2cache, &nl2cache) == 0) {
1015 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
1016 		    "upgraded to add cache devices"));
1017 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
1018 	}
1019 
1020 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1021 		return (-1);
1022 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1023 
1024 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ADD, &zc) != 0) {
1025 		switch (errno) {
1026 		case EBUSY:
1027 			/*
1028 			 * This can happen if the user has specified the same
1029 			 * device multiple times.  We can't reliably detect this
1030 			 * until we try to add it and see we already have a
1031 			 * label.
1032 			 */
1033 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1034 			    "one or more vdevs refer to the same device"));
1035 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1036 			break;
1037 
1038 		case EOVERFLOW:
1039 			/*
1040 			 * This occurrs when one of the devices is below
1041 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
1042 			 * device was the problem device since there's no
1043 			 * reliable way to determine device size from userland.
1044 			 */
1045 			{
1046 				char buf[64];
1047 
1048 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
1049 
1050 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1051 				    "device is less than the minimum "
1052 				    "size (%s)"), buf);
1053 			}
1054 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1055 			break;
1056 
1057 		case ENOTSUP:
1058 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1059 			    "pool must be upgraded to add these vdevs"));
1060 			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
1061 			break;
1062 
1063 		case EDOM:
1064 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1065 			    "root pool can not have multiple vdevs"
1066 			    " or separate logs"));
1067 			(void) zfs_error(hdl, EZFS_POOL_NOTSUP, msg);
1068 			break;
1069 
1070 		case ENOTBLK:
1071 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1072 			    "cache device must be a disk or disk slice"));
1073 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
1074 			break;
1075 
1076 		default:
1077 			(void) zpool_standard_error(hdl, errno, msg);
1078 		}
1079 
1080 		ret = -1;
1081 	} else {
1082 		ret = 0;
1083 	}
1084 
1085 	zcmd_free_nvlists(&zc);
1086 
1087 	return (ret);
1088 }
1089 
1090 /*
1091  * Exports the pool from the system.  The caller must ensure that there are no
1092  * mounted datasets in the pool.
1093  */
1094 int
1095 zpool_export(zpool_handle_t *zhp, boolean_t force)
1096 {
1097 	zfs_cmd_t zc = { 0 };
1098 	char msg[1024];
1099 
1100 	if (zpool_remove_zvol_links(zhp) != 0)
1101 		return (-1);
1102 
1103 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1104 	    "cannot export '%s'"), zhp->zpool_name);
1105 
1106 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1107 	zc.zc_cookie = force;
1108 
1109 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_EXPORT, &zc) != 0) {
1110 		switch (errno) {
1111 		case EXDEV:
1112 			zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
1113 			    "use '-f' to override the following errors:\n"
1114 			    "'%s' has an active shared spare which could be"
1115 			    " used by other pools once '%s' is exported."),
1116 			    zhp->zpool_name, zhp->zpool_name);
1117 			return (zfs_error(zhp->zpool_hdl, EZFS_ACTIVE_SPARE,
1118 			    msg));
1119 		default:
1120 			return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
1121 			    msg));
1122 		}
1123 	}
1124 
1125 	return (0);
1126 }
1127 
1128 /*
1129  * zpool_import() is a contracted interface. Should be kept the same
1130  * if possible.
1131  *
1132  * Applications should use zpool_import_props() to import a pool with
1133  * new properties value to be set.
1134  */
1135 int
1136 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1137     char *altroot)
1138 {
1139 	nvlist_t *props = NULL;
1140 	int ret;
1141 
1142 	if (altroot != NULL) {
1143 		if (nvlist_alloc(&props, NV_UNIQUE_NAME, 0) != 0) {
1144 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1145 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1146 			    newname));
1147 		}
1148 
1149 		if (nvlist_add_string(props,
1150 		    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), altroot) != 0) {
1151 			nvlist_free(props);
1152 			return (zfs_error_fmt(hdl, EZFS_NOMEM,
1153 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1154 			    newname));
1155 		}
1156 	}
1157 
1158 	ret = zpool_import_props(hdl, config, newname, props, B_FALSE);
1159 	if (props)
1160 		nvlist_free(props);
1161 	return (ret);
1162 }
1163 
1164 /*
1165  * Import the given pool using the known configuration and a list of
1166  * properties to be set. The configuration should have come from
1167  * zpool_find_import(). The 'newname' parameters control whether the pool
1168  * is imported with a different name.
1169  */
1170 int
1171 zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
1172     nvlist_t *props, boolean_t importfaulted)
1173 {
1174 	zfs_cmd_t zc = { 0 };
1175 	char *thename;
1176 	char *origname;
1177 	int ret;
1178 	char errbuf[1024];
1179 
1180 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
1181 	    &origname) == 0);
1182 
1183 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1184 	    "cannot import pool '%s'"), origname);
1185 
1186 	if (newname != NULL) {
1187 		if (!zpool_name_valid(hdl, B_FALSE, newname))
1188 			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
1189 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1190 			    newname));
1191 		thename = (char *)newname;
1192 	} else {
1193 		thename = origname;
1194 	}
1195 
1196 	if (props) {
1197 		uint64_t version;
1198 
1199 		verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
1200 		    &version) == 0);
1201 
1202 		if ((props = zpool_valid_proplist(hdl, origname,
1203 		    props, version, B_TRUE, errbuf)) == NULL) {
1204 			return (-1);
1205 		} else if (zcmd_write_src_nvlist(hdl, &zc, props) != 0) {
1206 			nvlist_free(props);
1207 			return (-1);
1208 		}
1209 	}
1210 
1211 	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
1212 
1213 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
1214 	    &zc.zc_guid) == 0);
1215 
1216 	if (zcmd_write_conf_nvlist(hdl, &zc, config) != 0) {
1217 		nvlist_free(props);
1218 		return (-1);
1219 	}
1220 
1221 	zc.zc_cookie = (uint64_t)importfaulted;
1222 	ret = 0;
1223 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
1224 		char desc[1024];
1225 		if (newname == NULL)
1226 			(void) snprintf(desc, sizeof (desc),
1227 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
1228 			    thename);
1229 		else
1230 			(void) snprintf(desc, sizeof (desc),
1231 			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
1232 			    origname, thename);
1233 
1234 		switch (errno) {
1235 		case ENOTSUP:
1236 			/*
1237 			 * Unsupported version.
1238 			 */
1239 			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
1240 			break;
1241 
1242 		case EINVAL:
1243 			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
1244 			break;
1245 
1246 		default:
1247 			(void) zpool_standard_error(hdl, errno, desc);
1248 		}
1249 
1250 		ret = -1;
1251 	} else {
1252 		zpool_handle_t *zhp;
1253 
1254 		/*
1255 		 * This should never fail, but play it safe anyway.
1256 		 */
1257 		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
1258 			ret = -1;
1259 		} else if (zhp != NULL) {
1260 			ret = zpool_create_zvol_links(zhp);
1261 			zpool_close(zhp);
1262 		}
1263 
1264 	}
1265 
1266 	zcmd_free_nvlists(&zc);
1267 	nvlist_free(props);
1268 
1269 	return (ret);
1270 }
1271 
1272 /*
1273  * Scrub the pool.
1274  */
1275 int
1276 zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
1277 {
1278 	zfs_cmd_t zc = { 0 };
1279 	char msg[1024];
1280 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1281 
1282 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1283 	zc.zc_cookie = type;
1284 
1285 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_POOL_SCRUB, &zc) == 0)
1286 		return (0);
1287 
1288 	(void) snprintf(msg, sizeof (msg),
1289 	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
1290 
1291 	if (errno == EBUSY)
1292 		return (zfs_error(hdl, EZFS_RESILVERING, msg));
1293 	else
1294 		return (zpool_standard_error(hdl, errno, msg));
1295 }
1296 
1297 /*
1298  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
1299  * spare; but FALSE if its an INUSE spare.
1300  */
1301 static nvlist_t *
1302 vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
1303     boolean_t *avail_spare, boolean_t *l2cache, boolean_t *log)
1304 {
1305 	uint_t c, children;
1306 	nvlist_t **child;
1307 	uint64_t theguid, present;
1308 	char *path;
1309 	uint64_t wholedisk = 0;
1310 	nvlist_t *ret;
1311 	uint64_t is_log;
1312 
1313 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
1314 
1315 	if (search == NULL &&
1316 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
1317 		/*
1318 		 * If the device has never been present since import, the only
1319 		 * reliable way to match the vdev is by GUID.
1320 		 */
1321 		if (theguid == guid)
1322 			return (nv);
1323 	} else if (search != NULL &&
1324 	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
1325 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1326 		    &wholedisk);
1327 		if (wholedisk) {
1328 			/*
1329 			 * For whole disks, the internal path has 's0', but the
1330 			 * path passed in by the user doesn't.
1331 			 */
1332 			if (strlen(search) == strlen(path) - 2 &&
1333 			    strncmp(search, path, strlen(search)) == 0)
1334 				return (nv);
1335 		} else if (strcmp(search, path) == 0) {
1336 			return (nv);
1337 		}
1338 	}
1339 
1340 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
1341 	    &child, &children) != 0)
1342 		return (NULL);
1343 
1344 	for (c = 0; c < children; c++) {
1345 		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1346 		    avail_spare, l2cache, NULL)) != NULL) {
1347 			/*
1348 			 * The 'is_log' value is only set for the toplevel
1349 			 * vdev, not the leaf vdevs.  So we always lookup the
1350 			 * log device from the root of the vdev tree (where
1351 			 * 'log' is non-NULL).
1352 			 */
1353 			if (log != NULL &&
1354 			    nvlist_lookup_uint64(child[c],
1355 			    ZPOOL_CONFIG_IS_LOG, &is_log) == 0 &&
1356 			    is_log) {
1357 				*log = B_TRUE;
1358 			}
1359 			return (ret);
1360 		}
1361 	}
1362 
1363 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
1364 	    &child, &children) == 0) {
1365 		for (c = 0; c < children; c++) {
1366 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1367 			    avail_spare, l2cache, NULL)) != NULL) {
1368 				*avail_spare = B_TRUE;
1369 				return (ret);
1370 			}
1371 		}
1372 	}
1373 
1374 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
1375 	    &child, &children) == 0) {
1376 		for (c = 0; c < children; c++) {
1377 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
1378 			    avail_spare, l2cache, NULL)) != NULL) {
1379 				*l2cache = B_TRUE;
1380 				return (ret);
1381 			}
1382 		}
1383 	}
1384 
1385 	return (NULL);
1386 }
1387 
1388 nvlist_t *
1389 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
1390     boolean_t *l2cache, boolean_t *log)
1391 {
1392 	char buf[MAXPATHLEN];
1393 	const char *search;
1394 	char *end;
1395 	nvlist_t *nvroot;
1396 	uint64_t guid;
1397 
1398 	guid = strtoull(path, &end, 10);
1399 	if (guid != 0 && *end == '\0') {
1400 		search = NULL;
1401 	} else if (path[0] != '/') {
1402 		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
1403 		search = buf;
1404 	} else {
1405 		search = path;
1406 	}
1407 
1408 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1409 	    &nvroot) == 0);
1410 
1411 	*avail_spare = B_FALSE;
1412 	*l2cache = B_FALSE;
1413 	if (log != NULL)
1414 		*log = B_FALSE;
1415 	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare,
1416 	    l2cache, log));
1417 }
1418 
1419 /*
1420  * Returns TRUE if the given guid corresponds to the given type.
1421  * This is used to check for hot spares (INUSE or not), and level 2 cache
1422  * devices.
1423  */
1424 static boolean_t
1425 is_guid_type(zpool_handle_t *zhp, uint64_t guid, const char *type)
1426 {
1427 	uint64_t target_guid;
1428 	nvlist_t *nvroot;
1429 	nvlist_t **list;
1430 	uint_t count;
1431 	int i;
1432 
1433 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
1434 	    &nvroot) == 0);
1435 	if (nvlist_lookup_nvlist_array(nvroot, type, &list, &count) == 0) {
1436 		for (i = 0; i < count; i++) {
1437 			verify(nvlist_lookup_uint64(list[i], ZPOOL_CONFIG_GUID,
1438 			    &target_guid) == 0);
1439 			if (guid == target_guid)
1440 				return (B_TRUE);
1441 		}
1442 	}
1443 
1444 	return (B_FALSE);
1445 }
1446 
1447 /*
1448  * Bring the specified vdev online.   The 'flags' parameter is a set of the
1449  * ZFS_ONLINE_* flags.
1450  */
1451 int
1452 zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
1453     vdev_state_t *newstate)
1454 {
1455 	zfs_cmd_t zc = { 0 };
1456 	char msg[1024];
1457 	nvlist_t *tgt;
1458 	boolean_t avail_spare, l2cache;
1459 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1460 
1461 	(void) snprintf(msg, sizeof (msg),
1462 	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
1463 
1464 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1465 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1466 	    NULL)) == NULL)
1467 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1468 
1469 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1470 
1471 	if (avail_spare ||
1472 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1473 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1474 
1475 	zc.zc_cookie = VDEV_STATE_ONLINE;
1476 	zc.zc_obj = flags;
1477 
1478 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) != 0)
1479 		return (zpool_standard_error(hdl, errno, msg));
1480 
1481 	*newstate = zc.zc_cookie;
1482 	return (0);
1483 }
1484 
1485 /*
1486  * Take the specified vdev offline
1487  */
1488 int
1489 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
1490 {
1491 	zfs_cmd_t zc = { 0 };
1492 	char msg[1024];
1493 	nvlist_t *tgt;
1494 	boolean_t avail_spare, l2cache;
1495 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1496 
1497 	(void) snprintf(msg, sizeof (msg),
1498 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
1499 
1500 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1501 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1502 	    NULL)) == NULL)
1503 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1504 
1505 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1506 
1507 	if (avail_spare ||
1508 	    is_guid_type(zhp, zc.zc_guid, ZPOOL_CONFIG_SPARES) == B_TRUE)
1509 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1510 
1511 	zc.zc_cookie = VDEV_STATE_OFFLINE;
1512 	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
1513 
1514 	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1515 		return (0);
1516 
1517 	switch (errno) {
1518 	case EBUSY:
1519 
1520 		/*
1521 		 * There are no other replicas of this device.
1522 		 */
1523 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1524 
1525 	default:
1526 		return (zpool_standard_error(hdl, errno, msg));
1527 	}
1528 }
1529 
1530 /*
1531  * Mark the given vdev faulted.
1532  */
1533 int
1534 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
1535 {
1536 	zfs_cmd_t zc = { 0 };
1537 	char msg[1024];
1538 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1539 
1540 	(void) snprintf(msg, sizeof (msg),
1541 	    dgettext(TEXT_DOMAIN, "cannot fault %llu"), guid);
1542 
1543 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1544 	zc.zc_guid = guid;
1545 	zc.zc_cookie = VDEV_STATE_FAULTED;
1546 
1547 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1548 		return (0);
1549 
1550 	switch (errno) {
1551 	case EBUSY:
1552 
1553 		/*
1554 		 * There are no other replicas of this device.
1555 		 */
1556 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
1557 
1558 	default:
1559 		return (zpool_standard_error(hdl, errno, msg));
1560 	}
1561 
1562 }
1563 
1564 /*
1565  * Mark the given vdev degraded.
1566  */
1567 int
1568 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid)
1569 {
1570 	zfs_cmd_t zc = { 0 };
1571 	char msg[1024];
1572 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1573 
1574 	(void) snprintf(msg, sizeof (msg),
1575 	    dgettext(TEXT_DOMAIN, "cannot degrade %llu"), guid);
1576 
1577 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1578 	zc.zc_guid = guid;
1579 	zc.zc_cookie = VDEV_STATE_DEGRADED;
1580 
1581 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
1582 		return (0);
1583 
1584 	return (zpool_standard_error(hdl, errno, msg));
1585 }
1586 
1587 /*
1588  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
1589  * a hot spare.
1590  */
1591 static boolean_t
1592 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
1593 {
1594 	nvlist_t **child;
1595 	uint_t c, children;
1596 	char *type;
1597 
1598 	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
1599 	    &children) == 0) {
1600 		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
1601 		    &type) == 0);
1602 
1603 		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
1604 		    children == 2 && child[which] == tgt)
1605 			return (B_TRUE);
1606 
1607 		for (c = 0; c < children; c++)
1608 			if (is_replacing_spare(child[c], tgt, which))
1609 				return (B_TRUE);
1610 	}
1611 
1612 	return (B_FALSE);
1613 }
1614 
1615 /*
1616  * Attach new_disk (fully described by nvroot) to old_disk.
1617  * If 'replacing' is specified, the new disk will replace the old one.
1618  */
1619 int
1620 zpool_vdev_attach(zpool_handle_t *zhp,
1621     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
1622 {
1623 	zfs_cmd_t zc = { 0 };
1624 	char msg[1024];
1625 	int ret;
1626 	nvlist_t *tgt;
1627 	boolean_t avail_spare, l2cache, islog;
1628 	uint64_t val;
1629 	char *path, *newname;
1630 	nvlist_t **child;
1631 	uint_t children;
1632 	nvlist_t *config_root;
1633 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1634 
1635 	if (replacing)
1636 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1637 		    "cannot replace %s with %s"), old_disk, new_disk);
1638 	else
1639 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
1640 		    "cannot attach %s to %s"), new_disk, old_disk);
1641 
1642 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1643 	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
1644 	    &islog)) == 0)
1645 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1646 
1647 	if (avail_spare)
1648 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1649 
1650 	if (l2cache)
1651 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1652 
1653 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1654 	zc.zc_cookie = replacing;
1655 
1656 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1657 	    &child, &children) != 0 || children != 1) {
1658 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1659 		    "new device must be a single disk"));
1660 		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
1661 	}
1662 
1663 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
1664 	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
1665 
1666 	if ((newname = zpool_vdev_name(NULL, NULL, child[0])) == NULL)
1667 		return (-1);
1668 
1669 	/*
1670 	 * If the target is a hot spare that has been swapped in, we can only
1671 	 * replace it with another hot spare.
1672 	 */
1673 	if (replacing &&
1674 	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
1675 	    (zpool_find_vdev(zhp, newname, &avail_spare, &l2cache,
1676 	    NULL) == NULL || !avail_spare) &&
1677 	    is_replacing_spare(config_root, tgt, 1)) {
1678 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1679 		    "can only be replaced by another hot spare"));
1680 		free(newname);
1681 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1682 	}
1683 
1684 	/*
1685 	 * If we are attempting to replace a spare, it canot be applied to an
1686 	 * already spared device.
1687 	 */
1688 	if (replacing &&
1689 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1690 	    zpool_find_vdev(zhp, newname, &avail_spare,
1691 	    &l2cache, NULL) != NULL && avail_spare &&
1692 	    is_replacing_spare(config_root, tgt, 0)) {
1693 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1694 		    "device has already been replaced with a spare"));
1695 		free(newname);
1696 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1697 	}
1698 
1699 	free(newname);
1700 
1701 	if (zcmd_write_conf_nvlist(hdl, &zc, nvroot) != 0)
1702 		return (-1);
1703 
1704 	ret = zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_ATTACH, &zc);
1705 
1706 	zcmd_free_nvlists(&zc);
1707 
1708 	if (ret == 0)
1709 		return (0);
1710 
1711 	switch (errno) {
1712 	case ENOTSUP:
1713 		/*
1714 		 * Can't attach to or replace this type of vdev.
1715 		 */
1716 		if (replacing) {
1717 			if (islog)
1718 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1719 				    "cannot replace a log with a spare"));
1720 			else
1721 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1722 				    "cannot replace a replacing device"));
1723 		} else {
1724 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1725 			    "can only attach to mirrors and top-level "
1726 			    "disks"));
1727 		}
1728 		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
1729 		break;
1730 
1731 	case EINVAL:
1732 		/*
1733 		 * The new device must be a single disk.
1734 		 */
1735 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1736 		    "new device must be a single disk"));
1737 		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
1738 		break;
1739 
1740 	case EBUSY:
1741 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
1742 		    new_disk);
1743 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1744 		break;
1745 
1746 	case EOVERFLOW:
1747 		/*
1748 		 * The new device is too small.
1749 		 */
1750 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1751 		    "device is too small"));
1752 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1753 		break;
1754 
1755 	case EDOM:
1756 		/*
1757 		 * The new device has a different alignment requirement.
1758 		 */
1759 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1760 		    "devices have different sector alignment"));
1761 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1762 		break;
1763 
1764 	case ENAMETOOLONG:
1765 		/*
1766 		 * The resulting top-level vdev spec won't fit in the label.
1767 		 */
1768 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
1769 		break;
1770 
1771 	default:
1772 		(void) zpool_standard_error(hdl, errno, msg);
1773 	}
1774 
1775 	return (-1);
1776 }
1777 
1778 /*
1779  * Detach the specified device.
1780  */
1781 int
1782 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
1783 {
1784 	zfs_cmd_t zc = { 0 };
1785 	char msg[1024];
1786 	nvlist_t *tgt;
1787 	boolean_t avail_spare, l2cache;
1788 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1789 
1790 	(void) snprintf(msg, sizeof (msg),
1791 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
1792 
1793 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1794 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1795 	    NULL)) == 0)
1796 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1797 
1798 	if (avail_spare)
1799 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1800 
1801 	if (l2cache)
1802 		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
1803 
1804 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1805 
1806 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_DETACH, &zc) == 0)
1807 		return (0);
1808 
1809 	switch (errno) {
1810 
1811 	case ENOTSUP:
1812 		/*
1813 		 * Can't detach from this type of vdev.
1814 		 */
1815 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
1816 		    "applicable to mirror and replacing vdevs"));
1817 		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
1818 		break;
1819 
1820 	case EBUSY:
1821 		/*
1822 		 * There are no other replicas of this device.
1823 		 */
1824 		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
1825 		break;
1826 
1827 	default:
1828 		(void) zpool_standard_error(hdl, errno, msg);
1829 	}
1830 
1831 	return (-1);
1832 }
1833 
1834 /*
1835  * Remove the given device.  Currently, this is supported only for hot spares
1836  * and level 2 cache devices.
1837  */
1838 int
1839 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
1840 {
1841 	zfs_cmd_t zc = { 0 };
1842 	char msg[1024];
1843 	nvlist_t *tgt;
1844 	boolean_t avail_spare, l2cache;
1845 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1846 
1847 	(void) snprintf(msg, sizeof (msg),
1848 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
1849 
1850 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1851 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
1852 	    NULL)) == 0)
1853 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1854 
1855 	if (!avail_spare && !l2cache) {
1856 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1857 		    "only inactive hot spares or cache devices "
1858 		    "can be removed"));
1859 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1860 	}
1861 
1862 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1863 
1864 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
1865 		return (0);
1866 
1867 	return (zpool_standard_error(hdl, errno, msg));
1868 }
1869 
1870 /*
1871  * Clear the errors for the pool, or the particular device if specified.
1872  */
1873 int
1874 zpool_clear(zpool_handle_t *zhp, const char *path)
1875 {
1876 	zfs_cmd_t zc = { 0 };
1877 	char msg[1024];
1878 	nvlist_t *tgt;
1879 	boolean_t avail_spare, l2cache;
1880 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1881 
1882 	if (path)
1883 		(void) snprintf(msg, sizeof (msg),
1884 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1885 		    path);
1886 	else
1887 		(void) snprintf(msg, sizeof (msg),
1888 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1889 		    zhp->zpool_name);
1890 
1891 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1892 	if (path) {
1893 		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
1894 		    &l2cache, NULL)) == 0)
1895 			return (zfs_error(hdl, EZFS_NODEVICE, msg));
1896 
1897 		/*
1898 		 * Don't allow error clearing for hot spares.  Do allow
1899 		 * error clearing for l2cache devices.
1900 		 */
1901 		if (avail_spare)
1902 			return (zfs_error(hdl, EZFS_ISSPARE, msg));
1903 
1904 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
1905 		    &zc.zc_guid) == 0);
1906 	}
1907 
1908 	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
1909 		return (0);
1910 
1911 	return (zpool_standard_error(hdl, errno, msg));
1912 }
1913 
1914 /*
1915  * Similar to zpool_clear(), but takes a GUID (used by fmd).
1916  */
1917 int
1918 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
1919 {
1920 	zfs_cmd_t zc = { 0 };
1921 	char msg[1024];
1922 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1923 
1924 	(void) snprintf(msg, sizeof (msg),
1925 	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
1926 	    guid);
1927 
1928 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1929 	zc.zc_guid = guid;
1930 
1931 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
1932 		return (0);
1933 
1934 	return (zpool_standard_error(hdl, errno, msg));
1935 }
1936 
1937 /*
1938  * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
1939  * hierarchy.
1940  */
1941 int
1942 zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
1943     void *data)
1944 {
1945 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1946 	char (*paths)[MAXPATHLEN];
1947 	size_t size = 4;
1948 	int curr, fd, base, ret = 0;
1949 	DIR *dirp;
1950 	struct dirent *dp;
1951 	struct stat st;
1952 
1953 	if ((base = open("/dev/zvol/dsk", O_RDONLY)) < 0)
1954 		return (errno == ENOENT ? 0 : -1);
1955 
1956 	if (fstatat(base, zhp->zpool_name, &st, 0) != 0) {
1957 		int err = errno;
1958 		(void) close(base);
1959 		return (err == ENOENT ? 0 : -1);
1960 	}
1961 
1962 	/*
1963 	 * Oddly this wasn't a directory -- ignore that failure since we
1964 	 * know there are no links lower in the (non-existant) hierarchy.
1965 	 */
1966 	if (!S_ISDIR(st.st_mode)) {
1967 		(void) close(base);
1968 		return (0);
1969 	}
1970 
1971 	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
1972 		(void) close(base);
1973 		return (-1);
1974 	}
1975 
1976 	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
1977 	curr = 0;
1978 
1979 	while (curr >= 0) {
1980 		if (fstatat(base, paths[curr], &st, AT_SYMLINK_NOFOLLOW) != 0)
1981 			goto err;
1982 
1983 		if (S_ISDIR(st.st_mode)) {
1984 			if ((fd = openat(base, paths[curr], O_RDONLY)) < 0)
1985 				goto err;
1986 
1987 			if ((dirp = fdopendir(fd)) == NULL) {
1988 				(void) close(fd);
1989 				goto err;
1990 			}
1991 
1992 			while ((dp = readdir(dirp)) != NULL) {
1993 				if (dp->d_name[0] == '.')
1994 					continue;
1995 
1996 				if (curr + 1 == size) {
1997 					paths = zfs_realloc(hdl, paths,
1998 					    size * sizeof (paths[0]),
1999 					    size * 2 * sizeof (paths[0]));
2000 					if (paths == NULL) {
2001 						(void) closedir(dirp);
2002 						(void) close(fd);
2003 						goto err;
2004 					}
2005 
2006 					size *= 2;
2007 				}
2008 
2009 				(void) strlcpy(paths[curr + 1], paths[curr],
2010 				    sizeof (paths[curr + 1]));
2011 				(void) strlcat(paths[curr], "/",
2012 				    sizeof (paths[curr]));
2013 				(void) strlcat(paths[curr], dp->d_name,
2014 				    sizeof (paths[curr]));
2015 				curr++;
2016 			}
2017 
2018 			(void) closedir(dirp);
2019 
2020 		} else {
2021 			if ((ret = cb(paths[curr], data)) != 0)
2022 				break;
2023 		}
2024 
2025 		curr--;
2026 	}
2027 
2028 	free(paths);
2029 	(void) close(base);
2030 
2031 	return (ret);
2032 
2033 err:
2034 	free(paths);
2035 	(void) close(base);
2036 	return (-1);
2037 }
2038 
2039 typedef struct zvol_cb {
2040 	zpool_handle_t *zcb_pool;
2041 	boolean_t zcb_create;
2042 } zvol_cb_t;
2043 
2044 /*ARGSUSED*/
2045 static int
2046 do_zvol_create(zfs_handle_t *zhp, void *data)
2047 {
2048 	int ret = 0;
2049 
2050 	if (ZFS_IS_VOLUME(zhp)) {
2051 		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
2052 		ret = zfs_iter_snapshots(zhp, do_zvol_create, NULL);
2053 	}
2054 
2055 	if (ret == 0)
2056 		ret = zfs_iter_filesystems(zhp, do_zvol_create, NULL);
2057 
2058 	zfs_close(zhp);
2059 
2060 	return (ret);
2061 }
2062 
2063 /*
2064  * Iterate over all zvols in the pool and make any necessary minor nodes.
2065  */
2066 int
2067 zpool_create_zvol_links(zpool_handle_t *zhp)
2068 {
2069 	zfs_handle_t *zfp;
2070 	int ret;
2071 
2072 	/*
2073 	 * If the pool is unavailable, just return success.
2074 	 */
2075 	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
2076 	    zhp->zpool_name)) == NULL)
2077 		return (0);
2078 
2079 	ret = zfs_iter_filesystems(zfp, do_zvol_create, NULL);
2080 
2081 	zfs_close(zfp);
2082 	return (ret);
2083 }
2084 
2085 static int
2086 do_zvol_remove(const char *dataset, void *data)
2087 {
2088 	zpool_handle_t *zhp = data;
2089 
2090 	return (zvol_remove_link(zhp->zpool_hdl, dataset));
2091 }
2092 
2093 /*
2094  * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
2095  * by examining the /dev links so that a corrupted pool doesn't impede this
2096  * operation.
2097  */
2098 int
2099 zpool_remove_zvol_links(zpool_handle_t *zhp)
2100 {
2101 	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
2102 }
2103 
2104 /*
2105  * Convert from a devid string to a path.
2106  */
2107 static char *
2108 devid_to_path(char *devid_str)
2109 {
2110 	ddi_devid_t devid;
2111 	char *minor;
2112 	char *path;
2113 	devid_nmlist_t *list = NULL;
2114 	int ret;
2115 
2116 	if (devid_str_decode(devid_str, &devid, &minor) != 0)
2117 		return (NULL);
2118 
2119 	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
2120 
2121 	devid_str_free(minor);
2122 	devid_free(devid);
2123 
2124 	if (ret != 0)
2125 		return (NULL);
2126 
2127 	if ((path = strdup(list[0].devname)) == NULL)
2128 		return (NULL);
2129 
2130 	devid_free_nmlist(list);
2131 
2132 	return (path);
2133 }
2134 
2135 /*
2136  * Convert from a path to a devid string.
2137  */
2138 static char *
2139 path_to_devid(const char *path)
2140 {
2141 	int fd;
2142 	ddi_devid_t devid;
2143 	char *minor, *ret;
2144 
2145 	if ((fd = open(path, O_RDONLY)) < 0)
2146 		return (NULL);
2147 
2148 	minor = NULL;
2149 	ret = NULL;
2150 	if (devid_get(fd, &devid) == 0) {
2151 		if (devid_get_minor_name(fd, &minor) == 0)
2152 			ret = devid_str_encode(devid, minor);
2153 		if (minor != NULL)
2154 			devid_str_free(minor);
2155 		devid_free(devid);
2156 	}
2157 	(void) close(fd);
2158 
2159 	return (ret);
2160 }
2161 
2162 /*
2163  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
2164  * ignore any failure here, since a common case is for an unprivileged user to
2165  * type 'zpool status', and we'll display the correct information anyway.
2166  */
2167 static void
2168 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
2169 {
2170 	zfs_cmd_t zc = { 0 };
2171 
2172 	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2173 	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
2174 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2175 	    &zc.zc_guid) == 0);
2176 
2177 	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
2178 }
2179 
2180 /*
2181  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
2182  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
2183  * We also check if this is a whole disk, in which case we strip off the
2184  * trailing 's0' slice name.
2185  *
2186  * This routine is also responsible for identifying when disks have been
2187  * reconfigured in a new location.  The kernel will have opened the device by
2188  * devid, but the path will still refer to the old location.  To catch this, we
2189  * first do a path -> devid translation (which is fast for the common case).  If
2190  * the devid matches, we're done.  If not, we do a reverse devid -> path
2191  * translation and issue the appropriate ioctl() to update the path of the vdev.
2192  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
2193  * of these checks.
2194  */
2195 char *
2196 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
2197 {
2198 	char *path, *devid;
2199 	uint64_t value;
2200 	char buf[64];
2201 	vdev_stat_t *vs;
2202 	uint_t vsc;
2203 
2204 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
2205 	    &value) == 0) {
2206 		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
2207 		    &value) == 0);
2208 		(void) snprintf(buf, sizeof (buf), "%llu",
2209 		    (u_longlong_t)value);
2210 		path = buf;
2211 	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
2212 
2213 		/*
2214 		 * If the device is dead (faulted, offline, etc) then don't
2215 		 * bother opening it.  Otherwise we may be forcing the user to
2216 		 * open a misbehaving device, which can have undesirable
2217 		 * effects.
2218 		 */
2219 		if ((nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
2220 		    (uint64_t **)&vs, &vsc) != 0 ||
2221 		    vs->vs_state >= VDEV_STATE_DEGRADED) &&
2222 		    zhp != NULL &&
2223 		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
2224 			/*
2225 			 * Determine if the current path is correct.
2226 			 */
2227 			char *newdevid = path_to_devid(path);
2228 
2229 			if (newdevid == NULL ||
2230 			    strcmp(devid, newdevid) != 0) {
2231 				char *newpath;
2232 
2233 				if ((newpath = devid_to_path(devid)) != NULL) {
2234 					/*
2235 					 * Update the path appropriately.
2236 					 */
2237 					set_path(zhp, nv, newpath);
2238 					if (nvlist_add_string(nv,
2239 					    ZPOOL_CONFIG_PATH, newpath) == 0)
2240 						verify(nvlist_lookup_string(nv,
2241 						    ZPOOL_CONFIG_PATH,
2242 						    &path) == 0);
2243 					free(newpath);
2244 				}
2245 			}
2246 
2247 			if (newdevid)
2248 				devid_str_free(newdevid);
2249 		}
2250 
2251 		if (strncmp(path, "/dev/dsk/", 9) == 0)
2252 			path += 9;
2253 
2254 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
2255 		    &value) == 0 && value) {
2256 			char *tmp = zfs_strdup(hdl, path);
2257 			if (tmp == NULL)
2258 				return (NULL);
2259 			tmp[strlen(path) - 2] = '\0';
2260 			return (tmp);
2261 		}
2262 	} else {
2263 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
2264 
2265 		/*
2266 		 * If it's a raidz device, we need to stick in the parity level.
2267 		 */
2268 		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
2269 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
2270 			    &value) == 0);
2271 			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
2272 			    (u_longlong_t)value);
2273 			path = buf;
2274 		}
2275 	}
2276 
2277 	return (zfs_strdup(hdl, path));
2278 }
2279 
2280 static int
2281 zbookmark_compare(const void *a, const void *b)
2282 {
2283 	return (memcmp(a, b, sizeof (zbookmark_t)));
2284 }
2285 
2286 /*
2287  * Retrieve the persistent error log, uniquify the members, and return to the
2288  * caller.
2289  */
2290 int
2291 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
2292 {
2293 	zfs_cmd_t zc = { 0 };
2294 	uint64_t count;
2295 	zbookmark_t *zb = NULL;
2296 	int i;
2297 
2298 	/*
2299 	 * Retrieve the raw error list from the kernel.  If the number of errors
2300 	 * has increased, allocate more space and continue until we get the
2301 	 * entire list.
2302 	 */
2303 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
2304 	    &count) == 0);
2305 	if (count == 0)
2306 		return (0);
2307 	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
2308 	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
2309 		return (-1);
2310 	zc.zc_nvlist_dst_size = count;
2311 	(void) strcpy(zc.zc_name, zhp->zpool_name);
2312 	for (;;) {
2313 		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
2314 		    &zc) != 0) {
2315 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
2316 			if (errno == ENOMEM) {
2317 				count = zc.zc_nvlist_dst_size;
2318 				if ((zc.zc_nvlist_dst = (uintptr_t)
2319 				    zfs_alloc(zhp->zpool_hdl, count *
2320 				    sizeof (zbookmark_t))) == (uintptr_t)NULL)
2321 					return (-1);
2322 			} else {
2323 				return (-1);
2324 			}
2325 		} else {
2326 			break;
2327 		}
2328 	}
2329 
2330 	/*
2331 	 * Sort the resulting bookmarks.  This is a little confusing due to the
2332 	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
2333 	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
2334 	 * _not_ copied as part of the process.  So we point the start of our
2335 	 * array appropriate and decrement the total number of elements.
2336 	 */
2337 	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
2338 	    zc.zc_nvlist_dst_size;
2339 	count -= zc.zc_nvlist_dst_size;
2340 
2341 	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
2342 
2343 	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
2344 
2345 	/*
2346 	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
2347 	 */
2348 	for (i = 0; i < count; i++) {
2349 		nvlist_t *nv;
2350 
2351 		/* ignoring zb_blkid and zb_level for now */
2352 		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
2353 		    zb[i-1].zb_object == zb[i].zb_object)
2354 			continue;
2355 
2356 		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
2357 			goto nomem;
2358 		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
2359 		    zb[i].zb_objset) != 0) {
2360 			nvlist_free(nv);
2361 			goto nomem;
2362 		}
2363 		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
2364 		    zb[i].zb_object) != 0) {
2365 			nvlist_free(nv);
2366 			goto nomem;
2367 		}
2368 		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
2369 			nvlist_free(nv);
2370 			goto nomem;
2371 		}
2372 		nvlist_free(nv);
2373 	}
2374 
2375 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
2376 	return (0);
2377 
2378 nomem:
2379 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
2380 	return (no_memory(zhp->zpool_hdl));
2381 }
2382 
2383 /*
2384  * Upgrade a ZFS pool to the latest on-disk version.
2385  */
2386 int
2387 zpool_upgrade(zpool_handle_t *zhp, uint64_t new_version)
2388 {
2389 	zfs_cmd_t zc = { 0 };
2390 	libzfs_handle_t *hdl = zhp->zpool_hdl;
2391 
2392 	(void) strcpy(zc.zc_name, zhp->zpool_name);
2393 	zc.zc_cookie = new_version;
2394 
2395 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
2396 		return (zpool_standard_error_fmt(hdl, errno,
2397 		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
2398 		    zhp->zpool_name));
2399 	return (0);
2400 }
2401 
2402 void
2403 zpool_set_history_str(const char *subcommand, int argc, char **argv,
2404     char *history_str)
2405 {
2406 	int i;
2407 
2408 	(void) strlcpy(history_str, subcommand, HIS_MAX_RECORD_LEN);
2409 	for (i = 1; i < argc; i++) {
2410 		if (strlen(history_str) + 1 + strlen(argv[i]) >
2411 		    HIS_MAX_RECORD_LEN)
2412 			break;
2413 		(void) strlcat(history_str, " ", HIS_MAX_RECORD_LEN);
2414 		(void) strlcat(history_str, argv[i], HIS_MAX_RECORD_LEN);
2415 	}
2416 }
2417 
2418 /*
2419  * Stage command history for logging.
2420  */
2421 int
2422 zpool_stage_history(libzfs_handle_t *hdl, const char *history_str)
2423 {
2424 	if (history_str == NULL)
2425 		return (EINVAL);
2426 
2427 	if (strlen(history_str) > HIS_MAX_RECORD_LEN)
2428 		return (EINVAL);
2429 
2430 	if (hdl->libzfs_log_str != NULL)
2431 		free(hdl->libzfs_log_str);
2432 
2433 	if ((hdl->libzfs_log_str = strdup(history_str)) == NULL)
2434 		return (no_memory(hdl));
2435 
2436 	return (0);
2437 }
2438 
2439 /*
2440  * Perform ioctl to get some command history of a pool.
2441  *
2442  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
2443  * logical offset of the history buffer to start reading from.
2444  *
2445  * Upon return, 'off' is the next logical offset to read from and
2446  * 'len' is the actual amount of bytes read into 'buf'.
2447  */
2448 static int
2449 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
2450 {
2451 	zfs_cmd_t zc = { 0 };
2452 	libzfs_handle_t *hdl = zhp->zpool_hdl;
2453 
2454 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2455 
2456 	zc.zc_history = (uint64_t)(uintptr_t)buf;
2457 	zc.zc_history_len = *len;
2458 	zc.zc_history_offset = *off;
2459 
2460 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
2461 		switch (errno) {
2462 		case EPERM:
2463 			return (zfs_error_fmt(hdl, EZFS_PERM,
2464 			    dgettext(TEXT_DOMAIN,
2465 			    "cannot show history for pool '%s'"),
2466 			    zhp->zpool_name));
2467 		case ENOENT:
2468 			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
2469 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
2470 			    "'%s'"), zhp->zpool_name));
2471 		case ENOTSUP:
2472 			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
2473 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
2474 			    "'%s', pool must be upgraded"), zhp->zpool_name));
2475 		default:
2476 			return (zpool_standard_error_fmt(hdl, errno,
2477 			    dgettext(TEXT_DOMAIN,
2478 			    "cannot get history for '%s'"), zhp->zpool_name));
2479 		}
2480 	}
2481 
2482 	*len = zc.zc_history_len;
2483 	*off = zc.zc_history_offset;
2484 
2485 	return (0);
2486 }
2487 
2488 /*
2489  * Process the buffer of nvlists, unpacking and storing each nvlist record
2490  * into 'records'.  'leftover' is set to the number of bytes that weren't
2491  * processed as there wasn't a complete record.
2492  */
2493 static int
2494 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
2495     nvlist_t ***records, uint_t *numrecords)
2496 {
2497 	uint64_t reclen;
2498 	nvlist_t *nv;
2499 	int i;
2500 
2501 	while (bytes_read > sizeof (reclen)) {
2502 
2503 		/* get length of packed record (stored as little endian) */
2504 		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
2505 			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
2506 
2507 		if (bytes_read < sizeof (reclen) + reclen)
2508 			break;
2509 
2510 		/* unpack record */
2511 		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
2512 			return (ENOMEM);
2513 		bytes_read -= sizeof (reclen) + reclen;
2514 		buf += sizeof (reclen) + reclen;
2515 
2516 		/* add record to nvlist array */
2517 		(*numrecords)++;
2518 		if (ISP2(*numrecords + 1)) {
2519 			*records = realloc(*records,
2520 			    *numrecords * 2 * sizeof (nvlist_t *));
2521 		}
2522 		(*records)[*numrecords - 1] = nv;
2523 	}
2524 
2525 	*leftover = bytes_read;
2526 	return (0);
2527 }
2528 
2529 #define	HIS_BUF_LEN	(128*1024)
2530 
2531 /*
2532  * Retrieve the command history of a pool.
2533  */
2534 int
2535 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
2536 {
2537 	char buf[HIS_BUF_LEN];
2538 	uint64_t off = 0;
2539 	nvlist_t **records = NULL;
2540 	uint_t numrecords = 0;
2541 	int err, i;
2542 
2543 	do {
2544 		uint64_t bytes_read = sizeof (buf);
2545 		uint64_t leftover;
2546 
2547 		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
2548 			break;
2549 
2550 		/* if nothing else was read in, we're at EOF, just return */
2551 		if (!bytes_read)
2552 			break;
2553 
2554 		if ((err = zpool_history_unpack(buf, bytes_read,
2555 		    &leftover, &records, &numrecords)) != 0)
2556 			break;
2557 		off -= leftover;
2558 
2559 		/* CONSTCOND */
2560 	} while (1);
2561 
2562 	if (!err) {
2563 		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
2564 		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
2565 		    records, numrecords) == 0);
2566 	}
2567 	for (i = 0; i < numrecords; i++)
2568 		nvlist_free(records[i]);
2569 	free(records);
2570 
2571 	return (err);
2572 }
2573 
2574 void
2575 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
2576     char *pathname, size_t len)
2577 {
2578 	zfs_cmd_t zc = { 0 };
2579 	boolean_t mounted = B_FALSE;
2580 	char *mntpnt = NULL;
2581 	char dsname[MAXNAMELEN];
2582 
2583 	if (dsobj == 0) {
2584 		/* special case for the MOS */
2585 		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
2586 		return;
2587 	}
2588 
2589 	/* get the dataset's name */
2590 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
2591 	zc.zc_obj = dsobj;
2592 	if (ioctl(zhp->zpool_hdl->libzfs_fd,
2593 	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
2594 		/* just write out a path of two object numbers */
2595 		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
2596 		    dsobj, obj);
2597 		return;
2598 	}
2599 	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
2600 
2601 	/* find out if the dataset is mounted */
2602 	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
2603 
2604 	/* get the corrupted object's path */
2605 	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
2606 	zc.zc_obj = obj;
2607 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
2608 	    &zc) == 0) {
2609 		if (mounted) {
2610 			(void) snprintf(pathname, len, "%s%s", mntpnt,
2611 			    zc.zc_value);
2612 		} else {
2613 			(void) snprintf(pathname, len, "%s:%s",
2614 			    dsname, zc.zc_value);
2615 		}
2616 	} else {
2617 		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
2618 	}
2619 	free(mntpnt);
2620 }
2621 
2622 #define	RDISK_ROOT	"/dev/rdsk"
2623 #define	BACKUP_SLICE	"s2"
2624 /*
2625  * Don't start the slice at the default block of 34; many storage
2626  * devices will use a stripe width of 128k, so start there instead.
2627  */
2628 #define	NEW_START_BLOCK	256
2629 
2630 /*
2631  * Read the EFI label from the config, if a label does not exist then
2632  * pass back the error to the caller. If the caller has passed a non-NULL
2633  * diskaddr argument then we set it to the starting address of the EFI
2634  * partition.
2635  */
2636 static int
2637 read_efi_label(nvlist_t *config, diskaddr_t *sb)
2638 {
2639 	char *path;
2640 	int fd;
2641 	char diskname[MAXPATHLEN];
2642 	int err = -1;
2643 
2644 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
2645 		return (err);
2646 
2647 	(void) snprintf(diskname, sizeof (diskname), "%s%s", RDISK_ROOT,
2648 	    strrchr(path, '/'));
2649 	if ((fd = open(diskname, O_RDONLY|O_NDELAY)) >= 0) {
2650 		struct dk_gpt *vtoc;
2651 
2652 		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
2653 			if (sb != NULL)
2654 				*sb = vtoc->efi_parts[0].p_start;
2655 			efi_free(vtoc);
2656 		}
2657 		(void) close(fd);
2658 	}
2659 	return (err);
2660 }
2661 
2662 /*
2663  * determine where a partition starts on a disk in the current
2664  * configuration
2665  */
2666 static diskaddr_t
2667 find_start_block(nvlist_t *config)
2668 {
2669 	nvlist_t **child;
2670 	uint_t c, children;
2671 	diskaddr_t sb = MAXOFFSET_T;
2672 	uint64_t wholedisk;
2673 
2674 	if (nvlist_lookup_nvlist_array(config,
2675 	    ZPOOL_CONFIG_CHILDREN, &child, &children) != 0) {
2676 		if (nvlist_lookup_uint64(config,
2677 		    ZPOOL_CONFIG_WHOLE_DISK,
2678 		    &wholedisk) != 0 || !wholedisk) {
2679 			return (MAXOFFSET_T);
2680 		}
2681 		if (read_efi_label(config, &sb) < 0)
2682 			sb = MAXOFFSET_T;
2683 		return (sb);
2684 	}
2685 
2686 	for (c = 0; c < children; c++) {
2687 		sb = find_start_block(child[c]);
2688 		if (sb != MAXOFFSET_T) {
2689 			return (sb);
2690 		}
2691 	}
2692 	return (MAXOFFSET_T);
2693 }
2694 
2695 /*
2696  * Label an individual disk.  The name provided is the short name,
2697  * stripped of any leading /dev path.
2698  */
2699 int
2700 zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, char *name)
2701 {
2702 	char path[MAXPATHLEN];
2703 	struct dk_gpt *vtoc;
2704 	int fd;
2705 	size_t resv = EFI_MIN_RESV_SIZE;
2706 	uint64_t slice_size;
2707 	diskaddr_t start_block;
2708 	char errbuf[1024];
2709 
2710 	/* prepare an error message just in case */
2711 	(void) snprintf(errbuf, sizeof (errbuf),
2712 	    dgettext(TEXT_DOMAIN, "cannot label '%s'"), name);
2713 
2714 	if (zhp) {
2715 		nvlist_t *nvroot;
2716 
2717 		verify(nvlist_lookup_nvlist(zhp->zpool_config,
2718 		    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2719 
2720 		if (zhp->zpool_start_block == 0)
2721 			start_block = find_start_block(nvroot);
2722 		else
2723 			start_block = zhp->zpool_start_block;
2724 		zhp->zpool_start_block = start_block;
2725 	} else {
2726 		/* new pool */
2727 		start_block = NEW_START_BLOCK;
2728 	}
2729 
2730 	(void) snprintf(path, sizeof (path), "%s/%s%s", RDISK_ROOT, name,
2731 	    BACKUP_SLICE);
2732 
2733 	if ((fd = open(path, O_RDWR | O_NDELAY)) < 0) {
2734 		/*
2735 		 * This shouldn't happen.  We've long since verified that this
2736 		 * is a valid device.
2737 		 */
2738 		zfs_error_aux(hdl,
2739 		    dgettext(TEXT_DOMAIN, "unable to open device"));
2740 		return (zfs_error(hdl, EZFS_OPENFAILED, errbuf));
2741 	}
2742 
2743 	if (efi_alloc_and_init(fd, EFI_NUMPAR, &vtoc) != 0) {
2744 		/*
2745 		 * The only way this can fail is if we run out of memory, or we
2746 		 * were unable to read the disk's capacity
2747 		 */
2748 		if (errno == ENOMEM)
2749 			(void) no_memory(hdl);
2750 
2751 		(void) close(fd);
2752 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2753 		    "unable to read disk capacity"), name);
2754 
2755 		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
2756 	}
2757 
2758 	slice_size = vtoc->efi_last_u_lba + 1;
2759 	slice_size -= EFI_MIN_RESV_SIZE;
2760 	if (start_block == MAXOFFSET_T)
2761 		start_block = NEW_START_BLOCK;
2762 	slice_size -= start_block;
2763 
2764 	vtoc->efi_parts[0].p_start = start_block;
2765 	vtoc->efi_parts[0].p_size = slice_size;
2766 
2767 	/*
2768 	 * Why we use V_USR: V_BACKUP confuses users, and is considered
2769 	 * disposable by some EFI utilities (since EFI doesn't have a backup
2770 	 * slice).  V_UNASSIGNED is supposed to be used only for zero size
2771 	 * partitions, and efi_write() will fail if we use it.  V_ROOT, V_BOOT,
2772 	 * etc. were all pretty specific.  V_USR is as close to reality as we
2773 	 * can get, in the absence of V_OTHER.
2774 	 */
2775 	vtoc->efi_parts[0].p_tag = V_USR;
2776 	(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
2777 
2778 	vtoc->efi_parts[8].p_start = slice_size + start_block;
2779 	vtoc->efi_parts[8].p_size = resv;
2780 	vtoc->efi_parts[8].p_tag = V_RESERVED;
2781 
2782 	if (efi_write(fd, vtoc) != 0) {
2783 		/*
2784 		 * Some block drivers (like pcata) may not support EFI
2785 		 * GPT labels.  Print out a helpful error message dir-
2786 		 * ecting the user to manually label the disk and give
2787 		 * a specific slice.
2788 		 */
2789 		(void) close(fd);
2790 		efi_free(vtoc);
2791 
2792 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2793 		    "try using fdisk(1M) and then provide a specific slice"));
2794 		return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
2795 	}
2796 
2797 	(void) close(fd);
2798 	efi_free(vtoc);
2799 	return (0);
2800 }
2801 
2802 static boolean_t
2803 supported_dump_vdev_type(libzfs_handle_t *hdl, nvlist_t *config, char *errbuf)
2804 {
2805 	char *type;
2806 	nvlist_t **child;
2807 	uint_t children, c;
2808 
2809 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_TYPE, &type) == 0);
2810 	if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
2811 	    strcmp(type, VDEV_TYPE_FILE) == 0 ||
2812 	    strcmp(type, VDEV_TYPE_LOG) == 0 ||
2813 	    strcmp(type, VDEV_TYPE_MISSING) == 0) {
2814 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2815 		    "vdev type '%s' is not supported"), type);
2816 		(void) zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf);
2817 		return (B_FALSE);
2818 	}
2819 	if (nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
2820 	    &child, &children) == 0) {
2821 		for (c = 0; c < children; c++) {
2822 			if (!supported_dump_vdev_type(hdl, child[c], errbuf))
2823 				return (B_FALSE);
2824 		}
2825 	}
2826 	return (B_TRUE);
2827 }
2828 
2829 /*
2830  * check if this zvol is allowable for use as a dump device; zero if
2831  * it is, > 0 if it isn't, < 0 if it isn't a zvol
2832  */
2833 int
2834 zvol_check_dump_config(char *arg)
2835 {
2836 	zpool_handle_t *zhp = NULL;
2837 	nvlist_t *config, *nvroot;
2838 	char *p, *volname;
2839 	nvlist_t **top;
2840 	uint_t toplevels;
2841 	libzfs_handle_t *hdl;
2842 	char errbuf[1024];
2843 	char poolname[ZPOOL_MAXNAMELEN];
2844 	int pathlen = strlen(ZVOL_FULL_DEV_DIR);
2845 	int ret = 1;
2846 
2847 	if (strncmp(arg, ZVOL_FULL_DEV_DIR, pathlen)) {
2848 		return (-1);
2849 	}
2850 
2851 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2852 	    "dump is not supported on device '%s'"), arg);
2853 
2854 	if ((hdl = libzfs_init()) == NULL)
2855 		return (1);
2856 	libzfs_print_on_error(hdl, B_TRUE);
2857 
2858 	volname = arg + pathlen;
2859 
2860 	/* check the configuration of the pool */
2861 	if ((p = strchr(volname, '/')) == NULL) {
2862 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2863 		    "malformed dataset name"));
2864 		(void) zfs_error(hdl, EZFS_INVALIDNAME, errbuf);
2865 		return (1);
2866 	} else if (p - volname >= ZFS_MAXNAMELEN) {
2867 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2868 		    "dataset name is too long"));
2869 		(void) zfs_error(hdl, EZFS_NAMETOOLONG, errbuf);
2870 		return (1);
2871 	} else {
2872 		(void) strncpy(poolname, volname, p - volname);
2873 		poolname[p - volname] = '\0';
2874 	}
2875 
2876 	if ((zhp = zpool_open(hdl, poolname)) == NULL) {
2877 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2878 		    "could not open pool '%s'"), poolname);
2879 		(void) zfs_error(hdl, EZFS_OPENFAILED, errbuf);
2880 		goto out;
2881 	}
2882 	config = zpool_get_config(zhp, NULL);
2883 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
2884 	    &nvroot) != 0) {
2885 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2886 		    "could not obtain vdev configuration for  '%s'"), poolname);
2887 		(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
2888 		goto out;
2889 	}
2890 
2891 	verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2892 	    &top, &toplevels) == 0);
2893 	if (toplevels != 1) {
2894 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2895 		    "'%s' has multiple top level vdevs"), poolname);
2896 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
2897 		goto out;
2898 	}
2899 
2900 	if (!supported_dump_vdev_type(hdl, top[0], errbuf)) {
2901 		goto out;
2902 	}
2903 	ret = 0;
2904 
2905 out:
2906 	if (zhp)
2907 		zpool_close(zhp);
2908 	libzfs_fini(hdl);
2909 	return (ret);
2910 }
2911