xref: /illumos-gate/usr/src/lib/libzfs/common/libzfs_pool.c (revision 051d39bbeea3e1b0fd8395dc97be34acb3241891)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <alloca.h>
30 #include <assert.h>
31 #include <ctype.h>
32 #include <errno.h>
33 #include <devid.h>
34 #include <dirent.h>
35 #include <fcntl.h>
36 #include <libintl.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <strings.h>
40 #include <unistd.h>
41 #include <sys/zfs_ioctl.h>
42 #include <sys/zio.h>
43 #include <strings.h>
44 
45 #include "zfs_namecheck.h"
46 #include "libzfs_impl.h"
47 
48 /*
49  * Validate the given pool name, optionally putting an extended error message in
50  * 'buf'.
51  */
52 static boolean_t
53 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
54 {
55 	namecheck_err_t why;
56 	char what;
57 	int ret;
58 
59 	ret = pool_namecheck(pool, &why, &what);
60 
61 	/*
62 	 * The rules for reserved pool names were extended at a later point.
63 	 * But we need to support users with existing pools that may now be
64 	 * invalid.  So we only check for this expanded set of names during a
65 	 * create (or import), and only in userland.
66 	 */
67 	if (ret == 0 && !isopen &&
68 	    (strncmp(pool, "mirror", 6) == 0 ||
69 	    strncmp(pool, "raidz", 5) == 0 ||
70 	    strncmp(pool, "spare", 5) == 0)) {
71 		zfs_error_aux(hdl,
72 		    dgettext(TEXT_DOMAIN, "name is reserved"));
73 		return (B_FALSE);
74 	}
75 
76 
77 	if (ret != 0) {
78 		if (hdl != NULL) {
79 			switch (why) {
80 			case NAME_ERR_TOOLONG:
81 				zfs_error_aux(hdl,
82 				    dgettext(TEXT_DOMAIN, "name is too long"));
83 				break;
84 
85 			case NAME_ERR_INVALCHAR:
86 				zfs_error_aux(hdl,
87 				    dgettext(TEXT_DOMAIN, "invalid character "
88 				    "'%c' in pool name"), what);
89 				break;
90 
91 			case NAME_ERR_NOLETTER:
92 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
93 				    "name must begin with a letter"));
94 				break;
95 
96 			case NAME_ERR_RESERVED:
97 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
98 				    "name is reserved"));
99 				break;
100 
101 			case NAME_ERR_DISKLIKE:
102 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
103 				    "pool name is reserved"));
104 				break;
105 
106 			case NAME_ERR_LEADING_SLASH:
107 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
108 				    "leading slash in name"));
109 				break;
110 
111 			case NAME_ERR_EMPTY_COMPONENT:
112 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
113 				    "empty component in name"));
114 				break;
115 
116 			case NAME_ERR_TRAILING_SLASH:
117 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
118 				    "trailing slash in name"));
119 				break;
120 
121 			case NAME_ERR_MULTIPLE_AT:
122 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
123 				    "multiple '@' delimiters in name"));
124 				break;
125 
126 			}
127 		}
128 		return (B_FALSE);
129 	}
130 
131 	return (B_TRUE);
132 }
133 
134 /*
135  * Set the pool-wide health based on the vdev state of the root vdev.
136  */
137 int
138 set_pool_health(nvlist_t *config)
139 {
140 	nvlist_t *nvroot;
141 	vdev_stat_t *vs;
142 	uint_t vsc;
143 	char *health;
144 
145 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
146 	    &nvroot) == 0);
147 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
148 	    (uint64_t **)&vs, &vsc) == 0);
149 
150 	switch (vs->vs_state) {
151 
152 	case VDEV_STATE_CLOSED:
153 	case VDEV_STATE_CANT_OPEN:
154 	case VDEV_STATE_OFFLINE:
155 		health = dgettext(TEXT_DOMAIN, "FAULTED");
156 		break;
157 
158 	case VDEV_STATE_DEGRADED:
159 		health = dgettext(TEXT_DOMAIN, "DEGRADED");
160 		break;
161 
162 	case VDEV_STATE_HEALTHY:
163 		health = dgettext(TEXT_DOMAIN, "ONLINE");
164 		break;
165 
166 	default:
167 		abort();
168 	}
169 
170 	return (nvlist_add_string(config, ZPOOL_CONFIG_POOL_HEALTH, health));
171 }
172 
173 /*
174  * Open a handle to the given pool, even if the pool is currently in the FAULTED
175  * state.
176  */
177 zpool_handle_t *
178 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
179 {
180 	zpool_handle_t *zhp;
181 	boolean_t missing;
182 
183 	/*
184 	 * Make sure the pool name is valid.
185 	 */
186 	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
187 		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
188 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
189 		    pool);
190 		return (NULL);
191 	}
192 
193 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
194 		return (NULL);
195 
196 	zhp->zpool_hdl = hdl;
197 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
198 
199 	if (zpool_refresh_stats(zhp, &missing) != 0) {
200 		zpool_close(zhp);
201 		return (NULL);
202 	}
203 
204 	if (missing) {
205 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
206 		    "no such pool"));
207 		(void) zfs_error_fmt(hdl, EZFS_NOENT,
208 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
209 		    pool);
210 		zpool_close(zhp);
211 		return (NULL);
212 	}
213 
214 	return (zhp);
215 }
216 
217 /*
218  * Like the above, but silent on error.  Used when iterating over pools (because
219  * the configuration cache may be out of date).
220  */
221 int
222 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
223 {
224 	zpool_handle_t *zhp;
225 	boolean_t missing;
226 
227 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
228 		return (-1);
229 
230 	zhp->zpool_hdl = hdl;
231 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
232 
233 	if (zpool_refresh_stats(zhp, &missing) != 0) {
234 		zpool_close(zhp);
235 		return (-1);
236 	}
237 
238 	if (missing) {
239 		zpool_close(zhp);
240 		*ret = NULL;
241 		return (0);
242 	}
243 
244 	*ret = zhp;
245 	return (0);
246 }
247 
248 /*
249  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
250  * state.
251  */
252 zpool_handle_t *
253 zpool_open(libzfs_handle_t *hdl, const char *pool)
254 {
255 	zpool_handle_t *zhp;
256 
257 	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
258 		return (NULL);
259 
260 	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
261 		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
262 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
263 		zpool_close(zhp);
264 		return (NULL);
265 	}
266 
267 	return (zhp);
268 }
269 
270 /*
271  * Close the handle.  Simply frees the memory associated with the handle.
272  */
273 void
274 zpool_close(zpool_handle_t *zhp)
275 {
276 	if (zhp->zpool_config)
277 		nvlist_free(zhp->zpool_config);
278 	if (zhp->zpool_old_config)
279 		nvlist_free(zhp->zpool_old_config);
280 	if (zhp->zpool_error_log) {
281 		int i;
282 		for (i = 0; i < zhp->zpool_error_count; i++)
283 			nvlist_free(zhp->zpool_error_log[i]);
284 		free(zhp->zpool_error_log);
285 	}
286 	free(zhp);
287 }
288 
289 /*
290  * Return the name of the pool.
291  */
292 const char *
293 zpool_get_name(zpool_handle_t *zhp)
294 {
295 	return (zhp->zpool_name);
296 }
297 
298 /*
299  * Return the GUID of the pool.
300  */
301 uint64_t
302 zpool_get_guid(zpool_handle_t *zhp)
303 {
304 	uint64_t guid;
305 
306 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
307 	    &guid) == 0);
308 	return (guid);
309 }
310 
311 /*
312  * Return the version of the pool.
313  */
314 uint64_t
315 zpool_get_version(zpool_handle_t *zhp)
316 {
317 	uint64_t version;
318 
319 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_VERSION,
320 	    &version) == 0);
321 
322 	return (version);
323 }
324 
325 /*
326  * Return the amount of space currently consumed by the pool.
327  */
328 uint64_t
329 zpool_get_space_used(zpool_handle_t *zhp)
330 {
331 	nvlist_t *nvroot;
332 	vdev_stat_t *vs;
333 	uint_t vsc;
334 
335 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
336 	    &nvroot) == 0);
337 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
338 	    (uint64_t **)&vs, &vsc) == 0);
339 
340 	return (vs->vs_alloc);
341 }
342 
343 /*
344  * Return the total space in the pool.
345  */
346 uint64_t
347 zpool_get_space_total(zpool_handle_t *zhp)
348 {
349 	nvlist_t *nvroot;
350 	vdev_stat_t *vs;
351 	uint_t vsc;
352 
353 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
354 	    &nvroot) == 0);
355 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
356 	    (uint64_t **)&vs, &vsc) == 0);
357 
358 	return (vs->vs_space);
359 }
360 
361 /*
362  * Return the alternate root for this pool, if any.
363  */
364 int
365 zpool_get_root(zpool_handle_t *zhp, char *buf, size_t buflen)
366 {
367 	zfs_cmd_t zc = { 0 };
368 
369 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
370 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 ||
371 	    zc.zc_value[0] == '\0')
372 		return (-1);
373 
374 	(void) strlcpy(buf, zc.zc_value, buflen);
375 
376 	return (0);
377 }
378 
379 /*
380  * Return the state of the pool (ACTIVE or UNAVAILABLE)
381  */
382 int
383 zpool_get_state(zpool_handle_t *zhp)
384 {
385 	return (zhp->zpool_state);
386 }
387 
388 /*
389  * Create the named pool, using the provided vdev list.  It is assumed
390  * that the consumer has already validated the contents of the nvlist, so we
391  * don't have to worry about error semantics.
392  */
393 int
394 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
395     const char *altroot)
396 {
397 	zfs_cmd_t zc = { 0 };
398 	char msg[1024];
399 
400 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
401 	    "cannot create '%s'"), pool);
402 
403 	if (!zpool_name_valid(hdl, B_FALSE, pool))
404 		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
405 
406 	if (altroot != NULL && altroot[0] != '/')
407 		return (zfs_error_fmt(hdl, EZFS_BADPATH,
408 		    dgettext(TEXT_DOMAIN, "bad alternate root '%s'"), altroot));
409 
410 	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
411 		return (-1);
412 
413 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
414 
415 	if (altroot != NULL)
416 		(void) strlcpy(zc.zc_value, altroot, sizeof (zc.zc_value));
417 
418 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CREATE, &zc) != 0) {
419 		zcmd_free_nvlists(&zc);
420 
421 		switch (errno) {
422 		case EBUSY:
423 			/*
424 			 * This can happen if the user has specified the same
425 			 * device multiple times.  We can't reliably detect this
426 			 * until we try to add it and see we already have a
427 			 * label.
428 			 */
429 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
430 			    "one or more vdevs refer to the same device"));
431 			return (zfs_error(hdl, EZFS_BADDEV, msg));
432 
433 		case EOVERFLOW:
434 			/*
435 			 * This occurs when one of the devices is below
436 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
437 			 * device was the problem device since there's no
438 			 * reliable way to determine device size from userland.
439 			 */
440 			{
441 				char buf[64];
442 
443 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
444 
445 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
446 				    "one or more devices is less than the "
447 				    "minimum size (%s)"), buf);
448 			}
449 			return (zfs_error(hdl, EZFS_BADDEV, msg));
450 
451 		case ENOSPC:
452 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
453 			    "one or more devices is out of space"));
454 			return (zfs_error(hdl, EZFS_BADDEV, msg));
455 
456 		default:
457 			return (zpool_standard_error(hdl, errno, msg));
458 		}
459 	}
460 
461 	zcmd_free_nvlists(&zc);
462 
463 	/*
464 	 * If this is an alternate root pool, then we automatically set the
465 	 * mountpoint of the root dataset to be '/'.
466 	 */
467 	if (altroot != NULL) {
468 		zfs_handle_t *zhp;
469 
470 		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_ANY)) != NULL);
471 		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
472 		    "/") == 0);
473 
474 		zfs_close(zhp);
475 	}
476 
477 	return (0);
478 }
479 
480 /*
481  * Destroy the given pool.  It is up to the caller to ensure that there are no
482  * datasets left in the pool.
483  */
484 int
485 zpool_destroy(zpool_handle_t *zhp)
486 {
487 	zfs_cmd_t zc = { 0 };
488 	zfs_handle_t *zfp = NULL;
489 	libzfs_handle_t *hdl = zhp->zpool_hdl;
490 	char msg[1024];
491 
492 	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
493 	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
494 	    ZFS_TYPE_FILESYSTEM)) == NULL)
495 		return (-1);
496 
497 	if (zpool_remove_zvol_links(zhp) != 0)
498 		return (-1);
499 
500 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
501 
502 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
503 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
504 		    "cannot destroy '%s'"), zhp->zpool_name);
505 
506 		if (errno == EROFS) {
507 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
508 			    "one or more devices is read only"));
509 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
510 		} else {
511 			(void) zpool_standard_error(hdl, errno, msg);
512 		}
513 
514 		if (zfp)
515 			zfs_close(zfp);
516 		return (-1);
517 	}
518 
519 	if (zfp) {
520 		remove_mountpoint(zfp);
521 		zfs_close(zfp);
522 	}
523 
524 	return (0);
525 }
526 
527 /*
528  * Add the given vdevs to the pool.  The caller must have already performed the
529  * necessary verification to ensure that the vdev specification is well-formed.
530  */
531 int
532 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
533 {
534 	zfs_cmd_t zc = { 0 };
535 	int ret;
536 	libzfs_handle_t *hdl = zhp->zpool_hdl;
537 	char msg[1024];
538 	nvlist_t **spares;
539 	uint_t nspares;
540 
541 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
542 	    "cannot add to '%s'"), zhp->zpool_name);
543 
544 	if (zpool_get_version(zhp) < ZFS_VERSION_SPARES &&
545 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
546 	    &spares, &nspares) == 0) {
547 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
548 		    "upgraded to add hot spares"));
549 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
550 	}
551 
552 	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
553 		return (-1);
554 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
555 
556 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ADD, &zc) != 0) {
557 		switch (errno) {
558 		case EBUSY:
559 			/*
560 			 * This can happen if the user has specified the same
561 			 * device multiple times.  We can't reliably detect this
562 			 * until we try to add it and see we already have a
563 			 * label.
564 			 */
565 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
566 			    "one or more vdevs refer to the same device"));
567 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
568 			break;
569 
570 		case EOVERFLOW:
571 			/*
572 			 * This occurrs when one of the devices is below
573 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
574 			 * device was the problem device since there's no
575 			 * reliable way to determine device size from userland.
576 			 */
577 			{
578 				char buf[64];
579 
580 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
581 
582 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
583 				    "device is less than the minimum "
584 				    "size (%s)"), buf);
585 			}
586 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
587 			break;
588 
589 		case ENOTSUP:
590 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
591 			    "pool must be upgraded to add raidz2 vdevs"));
592 			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
593 			break;
594 
595 		default:
596 			(void) zpool_standard_error(hdl, errno, msg);
597 		}
598 
599 		ret = -1;
600 	} else {
601 		ret = 0;
602 	}
603 
604 	zcmd_free_nvlists(&zc);
605 
606 	return (ret);
607 }
608 
609 /*
610  * Exports the pool from the system.  The caller must ensure that there are no
611  * mounted datasets in the pool.
612  */
613 int
614 zpool_export(zpool_handle_t *zhp)
615 {
616 	zfs_cmd_t zc = { 0 };
617 
618 	if (zpool_remove_zvol_links(zhp) != 0)
619 		return (-1);
620 
621 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
622 
623 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_EXPORT, &zc) != 0)
624 		return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
625 		    dgettext(TEXT_DOMAIN, "cannot export '%s'"),
626 		    zhp->zpool_name));
627 
628 	return (0);
629 }
630 
631 /*
632  * Import the given pool using the known configuration.  The configuration
633  * should have come from zpool_find_import().  The 'newname' and 'altroot'
634  * parameters control whether the pool is imported with a different name or with
635  * an alternate root, respectively.
636  */
637 int
638 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
639     const char *altroot)
640 {
641 	zfs_cmd_t zc = { 0 };
642 	char *thename;
643 	char *origname;
644 	int ret;
645 
646 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
647 	    &origname) == 0);
648 
649 	if (newname != NULL) {
650 		if (!zpool_name_valid(hdl, B_FALSE, newname))
651 			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
652 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
653 			    newname));
654 		thename = (char *)newname;
655 	} else {
656 		thename = origname;
657 	}
658 
659 	if (altroot != NULL && altroot[0] != '/')
660 		return (zfs_error_fmt(hdl, EZFS_BADPATH,
661 		    dgettext(TEXT_DOMAIN, "bad alternate root '%s'"),
662 		    altroot));
663 
664 	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
665 
666 	if (altroot != NULL)
667 		(void) strlcpy(zc.zc_value, altroot, sizeof (zc.zc_value));
668 	else
669 		zc.zc_value[0] = '\0';
670 
671 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
672 	    &zc.zc_guid) == 0);
673 
674 	if (zcmd_write_src_nvlist(hdl, &zc, config, NULL) != 0)
675 		return (-1);
676 
677 	ret = 0;
678 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
679 		char desc[1024];
680 		if (newname == NULL)
681 			(void) snprintf(desc, sizeof (desc),
682 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
683 			    thename);
684 		else
685 			(void) snprintf(desc, sizeof (desc),
686 			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
687 			    origname, thename);
688 
689 		switch (errno) {
690 		case ENOTSUP:
691 			/*
692 			 * Unsupported version.
693 			 */
694 			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
695 			break;
696 
697 		case EINVAL:
698 			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
699 			break;
700 
701 		default:
702 			(void) zpool_standard_error(hdl, errno, desc);
703 		}
704 
705 		ret = -1;
706 	} else {
707 		zpool_handle_t *zhp;
708 		/*
709 		 * This should never fail, but play it safe anyway.
710 		 */
711 		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
712 			ret = -1;
713 		} else if (zhp != NULL) {
714 			ret = zpool_create_zvol_links(zhp);
715 			zpool_close(zhp);
716 		}
717 	}
718 
719 	zcmd_free_nvlists(&zc);
720 	return (ret);
721 }
722 
723 /*
724  * Scrub the pool.
725  */
726 int
727 zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
728 {
729 	zfs_cmd_t zc = { 0 };
730 	char msg[1024];
731 	libzfs_handle_t *hdl = zhp->zpool_hdl;
732 
733 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
734 	zc.zc_cookie = type;
735 
736 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_SCRUB, &zc) == 0)
737 		return (0);
738 
739 	(void) snprintf(msg, sizeof (msg),
740 	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
741 
742 	if (errno == EBUSY)
743 		return (zfs_error(hdl, EZFS_RESILVERING, msg));
744 	else
745 		return (zpool_standard_error(hdl, errno, msg));
746 }
747 
748 /*
749  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
750  * spare; but FALSE if its an INUSE spare.
751  */
752 static nvlist_t *
753 vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
754     boolean_t *avail_spare)
755 {
756 	uint_t c, children;
757 	nvlist_t **child;
758 	uint64_t theguid, present;
759 	char *path;
760 	uint64_t wholedisk = 0;
761 	nvlist_t *ret;
762 
763 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
764 
765 	if (search == NULL &&
766 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
767 		/*
768 		 * If the device has never been present since import, the only
769 		 * reliable way to match the vdev is by GUID.
770 		 */
771 		if (theguid == guid)
772 			return (nv);
773 	} else if (search != NULL &&
774 	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
775 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
776 		    &wholedisk);
777 		if (wholedisk) {
778 			/*
779 			 * For whole disks, the internal path has 's0', but the
780 			 * path passed in by the user doesn't.
781 			 */
782 			if (strlen(search) == strlen(path) - 2 &&
783 			    strncmp(search, path, strlen(search)) == 0)
784 				return (nv);
785 		} else if (strcmp(search, path) == 0) {
786 			return (nv);
787 		}
788 	}
789 
790 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
791 	    &child, &children) != 0)
792 		return (NULL);
793 
794 	for (c = 0; c < children; c++)
795 		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
796 		    avail_spare)) != NULL)
797 			return (ret);
798 
799 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
800 	    &child, &children) == 0) {
801 		for (c = 0; c < children; c++) {
802 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
803 			    avail_spare)) != NULL) {
804 				*avail_spare = B_TRUE;
805 				return (ret);
806 			}
807 		}
808 	}
809 
810 	return (NULL);
811 }
812 
813 nvlist_t *
814 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare)
815 {
816 	char buf[MAXPATHLEN];
817 	const char *search;
818 	char *end;
819 	nvlist_t *nvroot;
820 	uint64_t guid;
821 
822 	guid = strtoull(path, &end, 10);
823 	if (guid != 0 && *end == '\0') {
824 		search = NULL;
825 	} else if (path[0] != '/') {
826 		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
827 		search = buf;
828 	} else {
829 		search = path;
830 	}
831 
832 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
833 	    &nvroot) == 0);
834 
835 	*avail_spare = B_FALSE;
836 	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare));
837 }
838 
839 /*
840  * Returns TRUE if the given guid corresponds to a spare (INUSE or not).
841  */
842 static boolean_t
843 is_spare(zpool_handle_t *zhp, uint64_t guid)
844 {
845 	uint64_t spare_guid;
846 	nvlist_t *nvroot;
847 	nvlist_t **spares;
848 	uint_t nspares;
849 	int i;
850 
851 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
852 	    &nvroot) == 0);
853 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
854 	    &spares, &nspares) == 0) {
855 		for (i = 0; i < nspares; i++) {
856 			verify(nvlist_lookup_uint64(spares[i],
857 			    ZPOOL_CONFIG_GUID, &spare_guid) == 0);
858 			if (guid == spare_guid)
859 				return (B_TRUE);
860 		}
861 	}
862 
863 	return (B_FALSE);
864 }
865 
866 /*
867  * Bring the specified vdev online
868  */
869 int
870 zpool_vdev_online(zpool_handle_t *zhp, const char *path)
871 {
872 	zfs_cmd_t zc = { 0 };
873 	char msg[1024];
874 	nvlist_t *tgt;
875 	boolean_t avail_spare;
876 	libzfs_handle_t *hdl = zhp->zpool_hdl;
877 
878 	(void) snprintf(msg, sizeof (msg),
879 	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
880 
881 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
882 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == NULL)
883 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
884 
885 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
886 
887 	if (avail_spare || is_spare(zhp, zc.zc_guid) == B_TRUE)
888 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
889 
890 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ONLINE, &zc) == 0)
891 		return (0);
892 
893 	return (zpool_standard_error(hdl, errno, msg));
894 }
895 
896 /*
897  * Take the specified vdev offline
898  */
899 int
900 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, int istmp)
901 {
902 	zfs_cmd_t zc = { 0 };
903 	char msg[1024];
904 	nvlist_t *tgt;
905 	boolean_t avail_spare;
906 	libzfs_handle_t *hdl = zhp->zpool_hdl;
907 
908 	(void) snprintf(msg, sizeof (msg),
909 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
910 
911 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
912 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == NULL)
913 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
914 
915 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
916 
917 	if (avail_spare || is_spare(zhp, zc.zc_guid) == B_TRUE)
918 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
919 
920 	zc.zc_cookie = istmp;
921 
922 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_OFFLINE, &zc) == 0)
923 		return (0);
924 
925 	switch (errno) {
926 	case EBUSY:
927 
928 		/*
929 		 * There are no other replicas of this device.
930 		 */
931 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
932 
933 	default:
934 		return (zpool_standard_error(hdl, errno, msg));
935 	}
936 }
937 
938 /*
939  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
940  * a hot spare.
941  */
942 static boolean_t
943 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
944 {
945 	nvlist_t **child;
946 	uint_t c, children;
947 	char *type;
948 
949 	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
950 	    &children) == 0) {
951 		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
952 		    &type) == 0);
953 
954 		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
955 		    children == 2 && child[which] == tgt)
956 			return (B_TRUE);
957 
958 		for (c = 0; c < children; c++)
959 			if (is_replacing_spare(child[c], tgt, which))
960 				return (B_TRUE);
961 	}
962 
963 	return (B_FALSE);
964 }
965 
966 /*
967  * Attach new_disk (fully described by nvroot) to old_disk.
968  * If 'replacing' is specified, tne new disk will replace the old one.
969  */
970 int
971 zpool_vdev_attach(zpool_handle_t *zhp,
972     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
973 {
974 	zfs_cmd_t zc = { 0 };
975 	char msg[1024];
976 	int ret;
977 	nvlist_t *tgt;
978 	boolean_t avail_spare;
979 	uint64_t val;
980 	char *path;
981 	nvlist_t **child;
982 	uint_t children;
983 	nvlist_t *config_root;
984 	libzfs_handle_t *hdl = zhp->zpool_hdl;
985 
986 	if (replacing)
987 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
988 		    "cannot replace %s with %s"), old_disk, new_disk);
989 	else
990 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
991 		    "cannot attach %s to %s"), new_disk, old_disk);
992 
993 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
994 	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare)) == 0)
995 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
996 
997 	if (avail_spare)
998 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
999 
1000 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1001 	zc.zc_cookie = replacing;
1002 
1003 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1004 	    &child, &children) != 0 || children != 1) {
1005 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1006 		    "new device must be a single disk"));
1007 		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
1008 	}
1009 
1010 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
1011 	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
1012 
1013 	/*
1014 	 * If the target is a hot spare that has been swapped in, we can only
1015 	 * replace it with another hot spare.
1016 	 */
1017 	if (replacing &&
1018 	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
1019 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1020 	    (zpool_find_vdev(zhp, path, &avail_spare) == NULL ||
1021 	    !avail_spare) && is_replacing_spare(config_root, tgt, 1)) {
1022 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1023 		    "can only be replaced by another hot spare"));
1024 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1025 	}
1026 
1027 	/*
1028 	 * If we are attempting to replace a spare, it canot be applied to an
1029 	 * already spared device.
1030 	 */
1031 	if (replacing &&
1032 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1033 	    zpool_find_vdev(zhp, path, &avail_spare) != NULL && avail_spare &&
1034 	    is_replacing_spare(config_root, tgt, 0)) {
1035 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1036 		    "device has already been replaced with a spare"));
1037 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1038 	}
1039 
1040 	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
1041 		return (-1);
1042 
1043 	ret = ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ATTACH, &zc);
1044 
1045 	zcmd_free_nvlists(&zc);
1046 
1047 	if (ret == 0)
1048 		return (0);
1049 
1050 	switch (errno) {
1051 	case ENOTSUP:
1052 		/*
1053 		 * Can't attach to or replace this type of vdev.
1054 		 */
1055 		if (replacing)
1056 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1057 			    "cannot replace a replacing device"));
1058 		else
1059 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1060 			    "can only attach to mirrors and top-level "
1061 			    "disks"));
1062 		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
1063 		break;
1064 
1065 	case EINVAL:
1066 		/*
1067 		 * The new device must be a single disk.
1068 		 */
1069 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1070 		    "new device must be a single disk"));
1071 		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
1072 		break;
1073 
1074 	case EBUSY:
1075 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
1076 		    new_disk);
1077 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1078 		break;
1079 
1080 	case EOVERFLOW:
1081 		/*
1082 		 * The new device is too small.
1083 		 */
1084 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1085 		    "device is too small"));
1086 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1087 		break;
1088 
1089 	case EDOM:
1090 		/*
1091 		 * The new device has a different alignment requirement.
1092 		 */
1093 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1094 		    "devices have different sector alignment"));
1095 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1096 		break;
1097 
1098 	case ENAMETOOLONG:
1099 		/*
1100 		 * The resulting top-level vdev spec won't fit in the label.
1101 		 */
1102 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
1103 		break;
1104 
1105 	default:
1106 		(void) zpool_standard_error(hdl, errno, msg);
1107 	}
1108 
1109 	return (-1);
1110 }
1111 
1112 /*
1113  * Detach the specified device.
1114  */
1115 int
1116 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
1117 {
1118 	zfs_cmd_t zc = { 0 };
1119 	char msg[1024];
1120 	nvlist_t *tgt;
1121 	boolean_t avail_spare;
1122 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1123 
1124 	(void) snprintf(msg, sizeof (msg),
1125 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
1126 
1127 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1128 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
1129 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1130 
1131 	if (avail_spare)
1132 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1133 
1134 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1135 
1136 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_DETACH, &zc) == 0)
1137 		return (0);
1138 
1139 	switch (errno) {
1140 
1141 	case ENOTSUP:
1142 		/*
1143 		 * Can't detach from this type of vdev.
1144 		 */
1145 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
1146 		    "applicable to mirror and replacing vdevs"));
1147 		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
1148 		break;
1149 
1150 	case EBUSY:
1151 		/*
1152 		 * There are no other replicas of this device.
1153 		 */
1154 		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
1155 		break;
1156 
1157 	default:
1158 		(void) zpool_standard_error(hdl, errno, msg);
1159 	}
1160 
1161 	return (-1);
1162 }
1163 
1164 /*
1165  * Remove the given device.  Currently, this is supported only for hot spares.
1166  */
1167 int
1168 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
1169 {
1170 	zfs_cmd_t zc = { 0 };
1171 	char msg[1024];
1172 	nvlist_t *tgt;
1173 	boolean_t avail_spare;
1174 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1175 
1176 	(void) snprintf(msg, sizeof (msg),
1177 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
1178 
1179 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1180 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
1181 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1182 
1183 	if (!avail_spare) {
1184 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1185 		    "only hot spares can be removed"));
1186 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1187 	}
1188 
1189 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1190 
1191 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
1192 		return (0);
1193 
1194 	return (zpool_standard_error(hdl, errno, msg));
1195 }
1196 
1197 /*
1198  * Clear the errors for the pool, or the particular device if specified.
1199  */
1200 int
1201 zpool_clear(zpool_handle_t *zhp, const char *path)
1202 {
1203 	zfs_cmd_t zc = { 0 };
1204 	char msg[1024];
1205 	nvlist_t *tgt;
1206 	boolean_t avail_spare;
1207 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1208 
1209 	if (path)
1210 		(void) snprintf(msg, sizeof (msg),
1211 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1212 		    path);
1213 	else
1214 		(void) snprintf(msg, sizeof (msg),
1215 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1216 		    zhp->zpool_name);
1217 
1218 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1219 	if (path) {
1220 		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
1221 			return (zfs_error(hdl, EZFS_NODEVICE, msg));
1222 
1223 		if (avail_spare)
1224 			return (zfs_error(hdl, EZFS_ISSPARE, msg));
1225 
1226 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
1227 		    &zc.zc_guid) == 0);
1228 	}
1229 
1230 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
1231 		return (0);
1232 
1233 	return (zpool_standard_error(hdl, errno, msg));
1234 }
1235 
1236 /*
1237  * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
1238  * hierarchy.
1239  */
1240 int
1241 zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
1242     void *data)
1243 {
1244 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1245 	char (*paths)[MAXPATHLEN];
1246 	size_t size = 4;
1247 	int curr, fd, base, ret = 0;
1248 	DIR *dirp;
1249 	struct dirent *dp;
1250 	struct stat st;
1251 
1252 	if ((base = open("/dev/zvol/dsk", O_RDONLY)) < 0)
1253 		return (errno == ENOENT ? 0 : -1);
1254 
1255 	if (fstatat(base, zhp->zpool_name, &st, 0) != 0) {
1256 		int err = errno;
1257 		(void) close(base);
1258 		return (err == ENOENT ? 0 : -1);
1259 	}
1260 
1261 	/*
1262 	 * Oddly this wasn't a directory -- ignore that failure since we
1263 	 * know there are no links lower in the (non-existant) hierarchy.
1264 	 */
1265 	if (!S_ISDIR(st.st_mode)) {
1266 		(void) close(base);
1267 		return (0);
1268 	}
1269 
1270 	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
1271 		(void) close(base);
1272 		return (-1);
1273 	}
1274 
1275 	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
1276 	curr = 0;
1277 
1278 	while (curr >= 0) {
1279 		if (fstatat(base, paths[curr], &st, AT_SYMLINK_NOFOLLOW) != 0)
1280 			goto err;
1281 
1282 		if (S_ISDIR(st.st_mode)) {
1283 			if ((fd = openat(base, paths[curr], O_RDONLY)) < 0)
1284 				goto err;
1285 
1286 			if ((dirp = fdopendir(fd)) == NULL) {
1287 				(void) close(fd);
1288 				goto err;
1289 			}
1290 
1291 			while ((dp = readdir(dirp)) != NULL) {
1292 				if (dp->d_name[0] == '.')
1293 					continue;
1294 
1295 				if (curr + 1 == size) {
1296 					paths = zfs_realloc(hdl, paths,
1297 					    size * sizeof (paths[0]),
1298 					    size * 2 * sizeof (paths[0]));
1299 					if (paths == NULL) {
1300 						(void) closedir(dirp);
1301 						(void) close(fd);
1302 						goto err;
1303 					}
1304 
1305 					size *= 2;
1306 				}
1307 
1308 				(void) strlcpy(paths[curr + 1], paths[curr],
1309 				    sizeof (paths[curr + 1]));
1310 				(void) strlcat(paths[curr], "/",
1311 				    sizeof (paths[curr]));
1312 				(void) strlcat(paths[curr], dp->d_name,
1313 				    sizeof (paths[curr]));
1314 				curr++;
1315 			}
1316 
1317 			(void) closedir(dirp);
1318 
1319 		} else {
1320 			if ((ret = cb(paths[curr], data)) != 0)
1321 				break;
1322 		}
1323 
1324 		curr--;
1325 	}
1326 
1327 	free(paths);
1328 	(void) close(base);
1329 
1330 	return (ret);
1331 
1332 err:
1333 	free(paths);
1334 	(void) close(base);
1335 	return (-1);
1336 }
1337 
1338 typedef struct zvol_cb {
1339 	zpool_handle_t *zcb_pool;
1340 	boolean_t zcb_create;
1341 } zvol_cb_t;
1342 
1343 /*ARGSUSED*/
1344 static int
1345 do_zvol_create(zfs_handle_t *zhp, void *data)
1346 {
1347 	int ret;
1348 
1349 	if (ZFS_IS_VOLUME(zhp))
1350 		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
1351 
1352 	ret = zfs_iter_children(zhp, do_zvol_create, NULL);
1353 
1354 	zfs_close(zhp);
1355 
1356 	return (ret);
1357 }
1358 
1359 /*
1360  * Iterate over all zvols in the pool and make any necessary minor nodes.
1361  */
1362 int
1363 zpool_create_zvol_links(zpool_handle_t *zhp)
1364 {
1365 	zfs_handle_t *zfp;
1366 	int ret;
1367 
1368 	/*
1369 	 * If the pool is unavailable, just return success.
1370 	 */
1371 	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
1372 	    zhp->zpool_name)) == NULL)
1373 		return (0);
1374 
1375 	ret = zfs_iter_children(zfp, do_zvol_create, NULL);
1376 
1377 	zfs_close(zfp);
1378 	return (ret);
1379 }
1380 
1381 static int
1382 do_zvol_remove(const char *dataset, void *data)
1383 {
1384 	zpool_handle_t *zhp = data;
1385 
1386 	return (zvol_remove_link(zhp->zpool_hdl, dataset));
1387 }
1388 
1389 /*
1390  * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
1391  * by examining the /dev links so that a corrupted pool doesn't impede this
1392  * operation.
1393  */
1394 int
1395 zpool_remove_zvol_links(zpool_handle_t *zhp)
1396 {
1397 	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
1398 }
1399 
1400 /*
1401  * Convert from a devid string to a path.
1402  */
1403 static char *
1404 devid_to_path(char *devid_str)
1405 {
1406 	ddi_devid_t devid;
1407 	char *minor;
1408 	char *path;
1409 	devid_nmlist_t *list = NULL;
1410 	int ret;
1411 
1412 	if (devid_str_decode(devid_str, &devid, &minor) != 0)
1413 		return (NULL);
1414 
1415 	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
1416 
1417 	devid_str_free(minor);
1418 	devid_free(devid);
1419 
1420 	if (ret != 0)
1421 		return (NULL);
1422 
1423 	if ((path = strdup(list[0].devname)) == NULL)
1424 		return (NULL);
1425 
1426 	devid_free_nmlist(list);
1427 
1428 	return (path);
1429 }
1430 
1431 /*
1432  * Convert from a path to a devid string.
1433  */
1434 static char *
1435 path_to_devid(const char *path)
1436 {
1437 	int fd;
1438 	ddi_devid_t devid;
1439 	char *minor, *ret;
1440 
1441 	if ((fd = open(path, O_RDONLY)) < 0)
1442 		return (NULL);
1443 
1444 	minor = NULL;
1445 	ret = NULL;
1446 	if (devid_get(fd, &devid) == 0) {
1447 		if (devid_get_minor_name(fd, &minor) == 0)
1448 			ret = devid_str_encode(devid, minor);
1449 		if (minor != NULL)
1450 			devid_str_free(minor);
1451 		devid_free(devid);
1452 	}
1453 	(void) close(fd);
1454 
1455 	return (ret);
1456 }
1457 
1458 /*
1459  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
1460  * ignore any failure here, since a common case is for an unprivileged user to
1461  * type 'zpool status', and we'll display the correct information anyway.
1462  */
1463 static void
1464 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
1465 {
1466 	zfs_cmd_t zc = { 0 };
1467 
1468 	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1469 	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
1470 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1471 	    &zc.zc_guid) == 0);
1472 
1473 	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
1474 }
1475 
1476 /*
1477  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
1478  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
1479  * We also check if this is a whole disk, in which case we strip off the
1480  * trailing 's0' slice name.
1481  *
1482  * This routine is also responsible for identifying when disks have been
1483  * reconfigured in a new location.  The kernel will have opened the device by
1484  * devid, but the path will still refer to the old location.  To catch this, we
1485  * first do a path -> devid translation (which is fast for the common case).  If
1486  * the devid matches, we're done.  If not, we do a reverse devid -> path
1487  * translation and issue the appropriate ioctl() to update the path of the vdev.
1488  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
1489  * of these checks.
1490  */
1491 char *
1492 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
1493 {
1494 	char *path, *devid;
1495 	uint64_t value;
1496 	char buf[64];
1497 
1498 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
1499 	    &value) == 0) {
1500 		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1501 		    &value) == 0);
1502 		(void) snprintf(buf, sizeof (buf), "%llu",
1503 		    (u_longlong_t)value);
1504 		path = buf;
1505 	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
1506 
1507 		if (zhp != NULL &&
1508 		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
1509 			/*
1510 			 * Determine if the current path is correct.
1511 			 */
1512 			char *newdevid = path_to_devid(path);
1513 
1514 			if (newdevid == NULL ||
1515 			    strcmp(devid, newdevid) != 0) {
1516 				char *newpath;
1517 
1518 				if ((newpath = devid_to_path(devid)) != NULL) {
1519 					/*
1520 					 * Update the path appropriately.
1521 					 */
1522 					set_path(zhp, nv, newpath);
1523 					if (nvlist_add_string(nv,
1524 					    ZPOOL_CONFIG_PATH, newpath) == 0)
1525 						verify(nvlist_lookup_string(nv,
1526 						    ZPOOL_CONFIG_PATH,
1527 						    &path) == 0);
1528 					free(newpath);
1529 				}
1530 			}
1531 
1532 			if (newdevid)
1533 				devid_str_free(newdevid);
1534 		}
1535 
1536 		if (strncmp(path, "/dev/dsk/", 9) == 0)
1537 			path += 9;
1538 
1539 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1540 		    &value) == 0 && value) {
1541 			char *tmp = zfs_strdup(hdl, path);
1542 			if (tmp == NULL)
1543 				return (NULL);
1544 			tmp[strlen(path) - 2] = '\0';
1545 			return (tmp);
1546 		}
1547 	} else {
1548 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
1549 
1550 		/*
1551 		 * If it's a raidz device, we need to stick in the parity level.
1552 		 */
1553 		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
1554 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
1555 			    &value) == 0);
1556 			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
1557 			    (u_longlong_t)value);
1558 			path = buf;
1559 		}
1560 	}
1561 
1562 	return (zfs_strdup(hdl, path));
1563 }
1564 
1565 static int
1566 zbookmark_compare(const void *a, const void *b)
1567 {
1568 	return (memcmp(a, b, sizeof (zbookmark_t)));
1569 }
1570 
1571 /*
1572  * Retrieve the persistent error log, uniquify the members, and return to the
1573  * caller.
1574  */
1575 int
1576 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t ***list, size_t *nelem)
1577 {
1578 	zfs_cmd_t zc = { 0 };
1579 	uint64_t count;
1580 	zbookmark_t *zb = NULL;
1581 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1582 	int i, j;
1583 
1584 	if (zhp->zpool_error_log != NULL) {
1585 		*list = zhp->zpool_error_log;
1586 		*nelem = zhp->zpool_error_count;
1587 		return (0);
1588 	}
1589 
1590 	/*
1591 	 * Retrieve the raw error list from the kernel.  If the number of errors
1592 	 * has increased, allocate more space and continue until we get the
1593 	 * entire list.
1594 	 */
1595 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
1596 	    &count) == 0);
1597 	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
1598 	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
1599 		return (-1);
1600 	zc.zc_nvlist_dst_size = count;
1601 	(void) strcpy(zc.zc_name, zhp->zpool_name);
1602 	for (;;) {
1603 		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
1604 		    &zc) != 0) {
1605 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
1606 			if (errno == ENOMEM) {
1607 				if ((zc.zc_nvlist_dst = (uintptr_t)
1608 				    zfs_alloc(zhp->zpool_hdl,
1609 				    zc.zc_nvlist_dst_size)) == (uintptr_t)NULL)
1610 					return (-1);
1611 			} else {
1612 				return (-1);
1613 			}
1614 		} else {
1615 			break;
1616 		}
1617 	}
1618 
1619 	/*
1620 	 * Sort the resulting bookmarks.  This is a little confusing due to the
1621 	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
1622 	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
1623 	 * _not_ copied as part of the process.  So we point the start of our
1624 	 * array appropriate and decrement the total number of elements.
1625 	 */
1626 	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
1627 	    zc.zc_nvlist_dst_size;
1628 	count -= zc.zc_nvlist_dst_size;
1629 	zc.zc_nvlist_dst = 0ULL;
1630 
1631 	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
1632 
1633 	/*
1634 	 * Count the number of unique elements
1635 	 */
1636 	j = 0;
1637 	for (i = 0; i < count; i++) {
1638 		if (i > 0 && memcmp(&zb[i - 1], &zb[i],
1639 		    sizeof (zbookmark_t)) == 0)
1640 			continue;
1641 		j++;
1642 	}
1643 
1644 	/*
1645 	 * If the user has only requested the number of items, return it now
1646 	 * without bothering with the extra work.
1647 	 */
1648 	if (list == NULL) {
1649 		*nelem = j;
1650 		free((void *)(uintptr_t)zc.zc_nvlist_dst);
1651 		return (0);
1652 	}
1653 
1654 	zhp->zpool_error_count = j;
1655 
1656 	/*
1657 	 * Allocate an array of nvlists to hold the results
1658 	 */
1659 	if ((zhp->zpool_error_log = zfs_alloc(zhp->zpool_hdl,
1660 	    j * sizeof (nvlist_t *))) == NULL) {
1661 		free((void *)(uintptr_t)zc.zc_nvlist_dst);
1662 		return (-1);
1663 	}
1664 
1665 	/*
1666 	 * Fill in the results with names from the kernel.
1667 	 */
1668 	j = 0;
1669 	for (i = 0; i < count; i++) {
1670 		char buf[64];
1671 		nvlist_t *nv;
1672 
1673 		if (i > 0 && memcmp(&zb[i - 1], &zb[i],
1674 		    sizeof (zbookmark_t)) == 0)
1675 			continue;
1676 
1677 		if (zcmd_alloc_dst_nvlist(hdl, &zc, 0) != 0)
1678 			goto nomem;
1679 
1680 		zc.zc_bookmark = zb[i];
1681 		for (;;) {
1682 			if (ioctl(zhp->zpool_hdl->libzfs_fd,
1683 			    ZFS_IOC_BOOKMARK_NAME, &zc) != 0) {
1684 				if (errno == ENOMEM) {
1685 					if (zcmd_expand_dst_nvlist(hdl, &zc)
1686 					    != 0) {
1687 						zcmd_free_nvlists(&zc);
1688 						goto nomem;
1689 					}
1690 
1691 					continue;
1692 				} else {
1693 					if (nvlist_alloc(&nv, NV_UNIQUE_NAME,
1694 					    0) != 0)
1695 						goto nomem;
1696 
1697 					zhp->zpool_error_log[j] = nv;
1698 					(void) snprintf(buf, sizeof (buf),
1699 					    "%llx", (longlong_t)
1700 					    zb[i].zb_objset);
1701 					if (nvlist_add_string(nv,
1702 					    ZPOOL_ERR_DATASET, buf) != 0)
1703 						goto nomem;
1704 					(void) snprintf(buf, sizeof (buf),
1705 					    "%llx", (longlong_t)
1706 					    zb[i].zb_object);
1707 					if (nvlist_add_string(nv,
1708 					    ZPOOL_ERR_OBJECT, buf) != 0)
1709 						goto nomem;
1710 					(void) snprintf(buf, sizeof (buf),
1711 					    "lvl=%u blkid=%llu",
1712 					    (int)zb[i].zb_level,
1713 					    (long long)zb[i].zb_blkid);
1714 					if (nvlist_add_string(nv,
1715 					    ZPOOL_ERR_RANGE, buf) != 0)
1716 						goto nomem;
1717 				}
1718 			} else {
1719 				if (zcmd_read_dst_nvlist(hdl, &zc,
1720 				    &zhp->zpool_error_log[j]) != 0) {
1721 					zcmd_free_nvlists(&zc);
1722 					goto nomem;
1723 				}
1724 			}
1725 
1726 			break;
1727 		}
1728 
1729 		zcmd_free_nvlists(&zc);
1730 
1731 		j++;
1732 	}
1733 
1734 	*list = zhp->zpool_error_log;
1735 	*nelem = zhp->zpool_error_count;
1736 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
1737 
1738 	return (0);
1739 
1740 nomem:
1741 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
1742 	for (i = 0; i < zhp->zpool_error_count; i++)
1743 		nvlist_free(zhp->zpool_error_log[i]);
1744 	free(zhp->zpool_error_log);
1745 	zhp->zpool_error_log = NULL;
1746 	return (no_memory(zhp->zpool_hdl));
1747 }
1748 
1749 /*
1750  * Upgrade a ZFS pool to the latest on-disk version.
1751  */
1752 int
1753 zpool_upgrade(zpool_handle_t *zhp)
1754 {
1755 	zfs_cmd_t zc = { 0 };
1756 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1757 
1758 	(void) strcpy(zc.zc_name, zhp->zpool_name);
1759 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
1760 		return (zpool_standard_error_fmt(hdl, errno,
1761 		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
1762 		    zhp->zpool_name));
1763 
1764 	return (0);
1765 }
1766 
1767 /*
1768  * Log command history.
1769  *
1770  * 'pool' is B_TRUE if we are logging a command for 'zpool'; B_FALSE
1771  * otherwise ('zfs').  'pool_create' is B_TRUE if we are logging the creation
1772  * of the pool; B_FALSE otherwise.  'path' is the pathanme containing the
1773  * poolname.  'argc' and 'argv' are used to construct the command string.
1774  */
1775 void
1776 zpool_log_history(libzfs_handle_t *hdl, int argc, char **argv, const char *path,
1777     boolean_t pool, boolean_t pool_create)
1778 {
1779 	char cmd_buf[HIS_MAX_RECORD_LEN];
1780 	char *dspath;
1781 	zfs_cmd_t zc = { 0 };
1782 	int i;
1783 
1784 	/* construct the command string */
1785 	(void) strcpy(cmd_buf, pool ? "zpool" : "zfs");
1786 	for (i = 0; i < argc; i++) {
1787 		if (strlen(cmd_buf) + 1 + strlen(argv[i]) > HIS_MAX_RECORD_LEN)
1788 			break;
1789 		(void) strcat(cmd_buf, " ");
1790 		(void) strcat(cmd_buf, argv[i]);
1791 	}
1792 
1793 	/* figure out the poolname */
1794 	dspath = strpbrk(path, "/@");
1795 	if (dspath == NULL) {
1796 		(void) strcpy(zc.zc_name, path);
1797 	} else {
1798 		(void) strncpy(zc.zc_name, path, dspath - path);
1799 		zc.zc_name[dspath-path] = '\0';
1800 	}
1801 
1802 	zc.zc_history = (uint64_t)(uintptr_t)cmd_buf;
1803 	zc.zc_history_len = strlen(cmd_buf);
1804 
1805 	/* overloading zc_history_offset */
1806 	zc.zc_history_offset = pool_create;
1807 
1808 	(void) ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_LOG_HISTORY, &zc);
1809 }
1810 
1811 /*
1812  * Perform ioctl to get some command history of a pool.
1813  *
1814  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
1815  * logical offset of the history buffer to start reading from.
1816  *
1817  * Upon return, 'off' is the next logical offset to read from and
1818  * 'len' is the actual amount of bytes read into 'buf'.
1819  */
1820 static int
1821 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
1822 {
1823 	zfs_cmd_t zc = { 0 };
1824 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1825 
1826 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1827 
1828 	zc.zc_history = (uint64_t)(uintptr_t)buf;
1829 	zc.zc_history_len = *len;
1830 	zc.zc_history_offset = *off;
1831 
1832 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
1833 		switch (errno) {
1834 		case EPERM:
1835 			return (zfs_error_fmt(hdl, EZFS_PERM,
1836 			    dgettext(TEXT_DOMAIN,
1837 			    "cannot show history for pool '%s'"),
1838 			    zhp->zpool_name));
1839 		case ENOENT:
1840 			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
1841 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
1842 			    "'%s'"), zhp->zpool_name));
1843 		default:
1844 			return (zpool_standard_error_fmt(hdl, errno,
1845 			    dgettext(TEXT_DOMAIN,
1846 			    "cannot get history for '%s'"), zhp->zpool_name));
1847 		}
1848 	}
1849 
1850 	*len = zc.zc_history_len;
1851 	*off = zc.zc_history_offset;
1852 
1853 	return (0);
1854 }
1855 
1856 /*
1857  * Process the buffer of nvlists, unpacking and storing each nvlist record
1858  * into 'records'.  'leftover' is set to the number of bytes that weren't
1859  * processed as there wasn't a complete record.
1860  */
1861 static int
1862 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
1863     nvlist_t ***records, uint_t *numrecords)
1864 {
1865 	uint64_t reclen;
1866 	nvlist_t *nv;
1867 	int i;
1868 
1869 	while (bytes_read > sizeof (reclen)) {
1870 
1871 		/* get length of packed record (stored as little endian) */
1872 		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
1873 			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
1874 
1875 		if (bytes_read < sizeof (reclen) + reclen)
1876 			break;
1877 
1878 		/* unpack record */
1879 		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
1880 			return (ENOMEM);
1881 		bytes_read -= sizeof (reclen) + reclen;
1882 		buf += sizeof (reclen) + reclen;
1883 
1884 		/* add record to nvlist array */
1885 		(*numrecords)++;
1886 		if (ISP2(*numrecords + 1)) {
1887 			*records = realloc(*records,
1888 			    *numrecords * 2 * sizeof (nvlist_t *));
1889 		}
1890 		(*records)[*numrecords - 1] = nv;
1891 	}
1892 
1893 	*leftover = bytes_read;
1894 	return (0);
1895 }
1896 
1897 #define	HIS_BUF_LEN	(128*1024)
1898 
1899 /*
1900  * Retrieve the command history of a pool.
1901  */
1902 int
1903 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
1904 {
1905 	char buf[HIS_BUF_LEN];
1906 	uint64_t off = 0;
1907 	nvlist_t **records = NULL;
1908 	uint_t numrecords = 0;
1909 	int err, i;
1910 
1911 	do {
1912 		uint64_t bytes_read = sizeof (buf);
1913 		uint64_t leftover;
1914 
1915 		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
1916 			break;
1917 
1918 		/* if nothing else was read in, we're at EOF, just return */
1919 		if (!bytes_read)
1920 			break;
1921 
1922 		if ((err = zpool_history_unpack(buf, bytes_read,
1923 		    &leftover, &records, &numrecords)) != 0)
1924 			break;
1925 		off -= leftover;
1926 
1927 		/* CONSTCOND */
1928 	} while (1);
1929 
1930 	if (!err) {
1931 		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
1932 		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
1933 		    records, numrecords) == 0);
1934 	}
1935 	for (i = 0; i < numrecords; i++)
1936 		nvlist_free(records[i]);
1937 	free(records);
1938 
1939 	return (err);
1940 }
1941