xref: /titanic_51/usr/src/lib/libzfs/common/libzfs_pool.c (revision 5363b1129db4ee42d2c9736898eab4670580bec7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <alloca.h>
30 #include <assert.h>
31 #include <ctype.h>
32 #include <errno.h>
33 #include <devid.h>
34 #include <dirent.h>
35 #include <fcntl.h>
36 #include <libintl.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <strings.h>
40 #include <unistd.h>
41 #include <sys/zfs_ioctl.h>
42 #include <sys/zio.h>
43 #include <strings.h>
44 
45 #include "zfs_namecheck.h"
46 #include "libzfs_impl.h"
47 
48 /*
49  * Validate the given pool name, optionally putting an extended error message in
50  * 'buf'.
51  */
52 static boolean_t
53 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
54 {
55 	namecheck_err_t why;
56 	char what;
57 	int ret;
58 
59 	ret = pool_namecheck(pool, &why, &what);
60 
61 	/*
62 	 * The rules for reserved pool names were extended at a later point.
63 	 * But we need to support users with existing pools that may now be
64 	 * invalid.  So we only check for this expanded set of names during a
65 	 * create (or import), and only in userland.
66 	 */
67 	if (ret == 0 && !isopen &&
68 	    (strncmp(pool, "mirror", 6) == 0 ||
69 	    strncmp(pool, "raidz", 5) == 0 ||
70 	    strncmp(pool, "spare", 5) == 0)) {
71 		zfs_error_aux(hdl,
72 		    dgettext(TEXT_DOMAIN, "name is reserved"));
73 		return (B_FALSE);
74 	}
75 
76 
77 	if (ret != 0) {
78 		if (hdl != NULL) {
79 			switch (why) {
80 			case NAME_ERR_TOOLONG:
81 				zfs_error_aux(hdl,
82 				    dgettext(TEXT_DOMAIN, "name is too long"));
83 				break;
84 
85 			case NAME_ERR_INVALCHAR:
86 				zfs_error_aux(hdl,
87 				    dgettext(TEXT_DOMAIN, "invalid character "
88 				    "'%c' in pool name"), what);
89 				break;
90 
91 			case NAME_ERR_NOLETTER:
92 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
93 				    "name must begin with a letter"));
94 				break;
95 
96 			case NAME_ERR_RESERVED:
97 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
98 				    "name is reserved"));
99 				break;
100 
101 			case NAME_ERR_DISKLIKE:
102 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
103 				    "pool name is reserved"));
104 				break;
105 
106 			case NAME_ERR_LEADING_SLASH:
107 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
108 				    "leading slash in name"));
109 				break;
110 
111 			case NAME_ERR_EMPTY_COMPONENT:
112 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
113 				    "empty component in name"));
114 				break;
115 
116 			case NAME_ERR_TRAILING_SLASH:
117 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
118 				    "trailing slash in name"));
119 				break;
120 
121 			case NAME_ERR_MULTIPLE_AT:
122 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
123 				    "multiple '@' delimiters in name"));
124 				break;
125 
126 			}
127 		}
128 		return (B_FALSE);
129 	}
130 
131 	return (B_TRUE);
132 }
133 
134 /*
135  * Set the pool-wide health based on the vdev state of the root vdev.
136  */
137 int
138 set_pool_health(nvlist_t *config)
139 {
140 	nvlist_t *nvroot;
141 	vdev_stat_t *vs;
142 	uint_t vsc;
143 	char *health;
144 
145 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
146 	    &nvroot) == 0);
147 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
148 	    (uint64_t **)&vs, &vsc) == 0);
149 
150 	switch (vs->vs_state) {
151 
152 	case VDEV_STATE_CLOSED:
153 	case VDEV_STATE_CANT_OPEN:
154 	case VDEV_STATE_OFFLINE:
155 		health = dgettext(TEXT_DOMAIN, "FAULTED");
156 		break;
157 
158 	case VDEV_STATE_DEGRADED:
159 		health = dgettext(TEXT_DOMAIN, "DEGRADED");
160 		break;
161 
162 	case VDEV_STATE_HEALTHY:
163 		health = dgettext(TEXT_DOMAIN, "ONLINE");
164 		break;
165 
166 	default:
167 		abort();
168 	}
169 
170 	return (nvlist_add_string(config, ZPOOL_CONFIG_POOL_HEALTH, health));
171 }
172 
173 /*
174  * Open a handle to the given pool, even if the pool is currently in the FAULTED
175  * state.
176  */
177 zpool_handle_t *
178 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
179 {
180 	zpool_handle_t *zhp;
181 	boolean_t missing;
182 
183 	/*
184 	 * Make sure the pool name is valid.
185 	 */
186 	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
187 		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
188 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
189 		    pool);
190 		return (NULL);
191 	}
192 
193 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
194 		return (NULL);
195 
196 	zhp->zpool_hdl = hdl;
197 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
198 
199 	if (zpool_refresh_stats(zhp, &missing) != 0) {
200 		zpool_close(zhp);
201 		return (NULL);
202 	}
203 
204 	if (missing) {
205 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
206 		    "no such pool"));
207 		(void) zfs_error_fmt(hdl, EZFS_NOENT,
208 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
209 		    pool);
210 		zpool_close(zhp);
211 		return (NULL);
212 	}
213 
214 	return (zhp);
215 }
216 
217 /*
218  * Like the above, but silent on error.  Used when iterating over pools (because
219  * the configuration cache may be out of date).
220  */
221 int
222 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
223 {
224 	zpool_handle_t *zhp;
225 	boolean_t missing;
226 
227 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
228 		return (-1);
229 
230 	zhp->zpool_hdl = hdl;
231 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
232 
233 	if (zpool_refresh_stats(zhp, &missing) != 0) {
234 		zpool_close(zhp);
235 		return (-1);
236 	}
237 
238 	if (missing) {
239 		zpool_close(zhp);
240 		*ret = NULL;
241 		return (0);
242 	}
243 
244 	*ret = zhp;
245 	return (0);
246 }
247 
248 /*
249  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
250  * state.
251  */
252 zpool_handle_t *
253 zpool_open(libzfs_handle_t *hdl, const char *pool)
254 {
255 	zpool_handle_t *zhp;
256 
257 	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
258 		return (NULL);
259 
260 	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
261 		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
262 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
263 		zpool_close(zhp);
264 		return (NULL);
265 	}
266 
267 	return (zhp);
268 }
269 
270 /*
271  * Close the handle.  Simply frees the memory associated with the handle.
272  */
273 void
274 zpool_close(zpool_handle_t *zhp)
275 {
276 	if (zhp->zpool_config)
277 		nvlist_free(zhp->zpool_config);
278 	if (zhp->zpool_old_config)
279 		nvlist_free(zhp->zpool_old_config);
280 	free(zhp);
281 }
282 
283 /*
284  * Return the name of the pool.
285  */
286 const char *
287 zpool_get_name(zpool_handle_t *zhp)
288 {
289 	return (zhp->zpool_name);
290 }
291 
292 /*
293  * Return the GUID of the pool.
294  */
295 uint64_t
296 zpool_get_guid(zpool_handle_t *zhp)
297 {
298 	uint64_t guid;
299 
300 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
301 	    &guid) == 0);
302 	return (guid);
303 }
304 
305 /*
306  * Return the version of the pool.
307  */
308 uint64_t
309 zpool_get_version(zpool_handle_t *zhp)
310 {
311 	uint64_t version;
312 
313 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_VERSION,
314 	    &version) == 0);
315 
316 	return (version);
317 }
318 
319 /*
320  * Return the amount of space currently consumed by the pool.
321  */
322 uint64_t
323 zpool_get_space_used(zpool_handle_t *zhp)
324 {
325 	nvlist_t *nvroot;
326 	vdev_stat_t *vs;
327 	uint_t vsc;
328 
329 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
330 	    &nvroot) == 0);
331 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
332 	    (uint64_t **)&vs, &vsc) == 0);
333 
334 	return (vs->vs_alloc);
335 }
336 
337 /*
338  * Return the total space in the pool.
339  */
340 uint64_t
341 zpool_get_space_total(zpool_handle_t *zhp)
342 {
343 	nvlist_t *nvroot;
344 	vdev_stat_t *vs;
345 	uint_t vsc;
346 
347 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
348 	    &nvroot) == 0);
349 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
350 	    (uint64_t **)&vs, &vsc) == 0);
351 
352 	return (vs->vs_space);
353 }
354 
355 /*
356  * Return the alternate root for this pool, if any.
357  */
358 int
359 zpool_get_root(zpool_handle_t *zhp, char *buf, size_t buflen)
360 {
361 	zfs_cmd_t zc = { 0 };
362 
363 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
364 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 ||
365 	    zc.zc_value[0] == '\0')
366 		return (-1);
367 
368 	(void) strlcpy(buf, zc.zc_value, buflen);
369 
370 	return (0);
371 }
372 
373 /*
374  * Return the state of the pool (ACTIVE or UNAVAILABLE)
375  */
376 int
377 zpool_get_state(zpool_handle_t *zhp)
378 {
379 	return (zhp->zpool_state);
380 }
381 
382 /*
383  * Create the named pool, using the provided vdev list.  It is assumed
384  * that the consumer has already validated the contents of the nvlist, so we
385  * don't have to worry about error semantics.
386  */
387 int
388 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
389     const char *altroot)
390 {
391 	zfs_cmd_t zc = { 0 };
392 	char msg[1024];
393 
394 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
395 	    "cannot create '%s'"), pool);
396 
397 	if (!zpool_name_valid(hdl, B_FALSE, pool))
398 		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
399 
400 	if (altroot != NULL && altroot[0] != '/')
401 		return (zfs_error_fmt(hdl, EZFS_BADPATH,
402 		    dgettext(TEXT_DOMAIN, "bad alternate root '%s'"), altroot));
403 
404 	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
405 		return (-1);
406 
407 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
408 
409 	if (altroot != NULL)
410 		(void) strlcpy(zc.zc_value, altroot, sizeof (zc.zc_value));
411 
412 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CREATE, &zc) != 0) {
413 		zcmd_free_nvlists(&zc);
414 
415 		switch (errno) {
416 		case EBUSY:
417 			/*
418 			 * This can happen if the user has specified the same
419 			 * device multiple times.  We can't reliably detect this
420 			 * until we try to add it and see we already have a
421 			 * label.
422 			 */
423 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
424 			    "one or more vdevs refer to the same device"));
425 			return (zfs_error(hdl, EZFS_BADDEV, msg));
426 
427 		case EOVERFLOW:
428 			/*
429 			 * This occurs when one of the devices is below
430 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
431 			 * device was the problem device since there's no
432 			 * reliable way to determine device size from userland.
433 			 */
434 			{
435 				char buf[64];
436 
437 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
438 
439 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
440 				    "one or more devices is less than the "
441 				    "minimum size (%s)"), buf);
442 			}
443 			return (zfs_error(hdl, EZFS_BADDEV, msg));
444 
445 		case ENOSPC:
446 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
447 			    "one or more devices is out of space"));
448 			return (zfs_error(hdl, EZFS_BADDEV, msg));
449 
450 		default:
451 			return (zpool_standard_error(hdl, errno, msg));
452 		}
453 	}
454 
455 	zcmd_free_nvlists(&zc);
456 
457 	/*
458 	 * If this is an alternate root pool, then we automatically set the
459 	 * mountpoint of the root dataset to be '/'.
460 	 */
461 	if (altroot != NULL) {
462 		zfs_handle_t *zhp;
463 
464 		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_ANY)) != NULL);
465 		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
466 		    "/") == 0);
467 
468 		zfs_close(zhp);
469 	}
470 
471 	return (0);
472 }
473 
474 /*
475  * Destroy the given pool.  It is up to the caller to ensure that there are no
476  * datasets left in the pool.
477  */
478 int
479 zpool_destroy(zpool_handle_t *zhp)
480 {
481 	zfs_cmd_t zc = { 0 };
482 	zfs_handle_t *zfp = NULL;
483 	libzfs_handle_t *hdl = zhp->zpool_hdl;
484 	char msg[1024];
485 
486 	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
487 	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
488 	    ZFS_TYPE_FILESYSTEM)) == NULL)
489 		return (-1);
490 
491 	if (zpool_remove_zvol_links(zhp) != 0)
492 		return (-1);
493 
494 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
495 
496 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
497 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
498 		    "cannot destroy '%s'"), zhp->zpool_name);
499 
500 		if (errno == EROFS) {
501 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
502 			    "one or more devices is read only"));
503 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
504 		} else {
505 			(void) zpool_standard_error(hdl, errno, msg);
506 		}
507 
508 		if (zfp)
509 			zfs_close(zfp);
510 		return (-1);
511 	}
512 
513 	if (zfp) {
514 		remove_mountpoint(zfp);
515 		zfs_close(zfp);
516 	}
517 
518 	return (0);
519 }
520 
521 /*
522  * Add the given vdevs to the pool.  The caller must have already performed the
523  * necessary verification to ensure that the vdev specification is well-formed.
524  */
525 int
526 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
527 {
528 	zfs_cmd_t zc = { 0 };
529 	int ret;
530 	libzfs_handle_t *hdl = zhp->zpool_hdl;
531 	char msg[1024];
532 	nvlist_t **spares;
533 	uint_t nspares;
534 
535 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
536 	    "cannot add to '%s'"), zhp->zpool_name);
537 
538 	if (zpool_get_version(zhp) < ZFS_VERSION_SPARES &&
539 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
540 	    &spares, &nspares) == 0) {
541 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
542 		    "upgraded to add hot spares"));
543 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
544 	}
545 
546 	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
547 		return (-1);
548 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
549 
550 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ADD, &zc) != 0) {
551 		switch (errno) {
552 		case EBUSY:
553 			/*
554 			 * This can happen if the user has specified the same
555 			 * device multiple times.  We can't reliably detect this
556 			 * until we try to add it and see we already have a
557 			 * label.
558 			 */
559 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
560 			    "one or more vdevs refer to the same device"));
561 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
562 			break;
563 
564 		case EOVERFLOW:
565 			/*
566 			 * This occurrs when one of the devices is below
567 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
568 			 * device was the problem device since there's no
569 			 * reliable way to determine device size from userland.
570 			 */
571 			{
572 				char buf[64];
573 
574 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
575 
576 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
577 				    "device is less than the minimum "
578 				    "size (%s)"), buf);
579 			}
580 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
581 			break;
582 
583 		case ENOTSUP:
584 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
585 			    "pool must be upgraded to add raidz2 vdevs"));
586 			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
587 			break;
588 
589 		default:
590 			(void) zpool_standard_error(hdl, errno, msg);
591 		}
592 
593 		ret = -1;
594 	} else {
595 		ret = 0;
596 	}
597 
598 	zcmd_free_nvlists(&zc);
599 
600 	return (ret);
601 }
602 
603 /*
604  * Exports the pool from the system.  The caller must ensure that there are no
605  * mounted datasets in the pool.
606  */
607 int
608 zpool_export(zpool_handle_t *zhp)
609 {
610 	zfs_cmd_t zc = { 0 };
611 
612 	if (zpool_remove_zvol_links(zhp) != 0)
613 		return (-1);
614 
615 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
616 
617 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_EXPORT, &zc) != 0)
618 		return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
619 		    dgettext(TEXT_DOMAIN, "cannot export '%s'"),
620 		    zhp->zpool_name));
621 
622 	return (0);
623 }
624 
625 /*
626  * Import the given pool using the known configuration.  The configuration
627  * should have come from zpool_find_import().  The 'newname' and 'altroot'
628  * parameters control whether the pool is imported with a different name or with
629  * an alternate root, respectively.
630  */
631 int
632 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
633     const char *altroot)
634 {
635 	zfs_cmd_t zc = { 0 };
636 	char *thename;
637 	char *origname;
638 	int ret;
639 
640 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
641 	    &origname) == 0);
642 
643 	if (newname != NULL) {
644 		if (!zpool_name_valid(hdl, B_FALSE, newname))
645 			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
646 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
647 			    newname));
648 		thename = (char *)newname;
649 	} else {
650 		thename = origname;
651 	}
652 
653 	if (altroot != NULL && altroot[0] != '/')
654 		return (zfs_error_fmt(hdl, EZFS_BADPATH,
655 		    dgettext(TEXT_DOMAIN, "bad alternate root '%s'"),
656 		    altroot));
657 
658 	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
659 
660 	if (altroot != NULL)
661 		(void) strlcpy(zc.zc_value, altroot, sizeof (zc.zc_value));
662 	else
663 		zc.zc_value[0] = '\0';
664 
665 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
666 	    &zc.zc_guid) == 0);
667 
668 	if (zcmd_write_src_nvlist(hdl, &zc, config, NULL) != 0)
669 		return (-1);
670 
671 	ret = 0;
672 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
673 		char desc[1024];
674 		if (newname == NULL)
675 			(void) snprintf(desc, sizeof (desc),
676 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
677 			    thename);
678 		else
679 			(void) snprintf(desc, sizeof (desc),
680 			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
681 			    origname, thename);
682 
683 		switch (errno) {
684 		case ENOTSUP:
685 			/*
686 			 * Unsupported version.
687 			 */
688 			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
689 			break;
690 
691 		case EINVAL:
692 			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
693 			break;
694 
695 		default:
696 			(void) zpool_standard_error(hdl, errno, desc);
697 		}
698 
699 		ret = -1;
700 	} else {
701 		zpool_handle_t *zhp;
702 		/*
703 		 * This should never fail, but play it safe anyway.
704 		 */
705 		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
706 			ret = -1;
707 		} else if (zhp != NULL) {
708 			ret = zpool_create_zvol_links(zhp);
709 			zpool_close(zhp);
710 		}
711 	}
712 
713 	zcmd_free_nvlists(&zc);
714 	return (ret);
715 }
716 
717 /*
718  * Scrub the pool.
719  */
720 int
721 zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
722 {
723 	zfs_cmd_t zc = { 0 };
724 	char msg[1024];
725 	libzfs_handle_t *hdl = zhp->zpool_hdl;
726 
727 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
728 	zc.zc_cookie = type;
729 
730 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_SCRUB, &zc) == 0)
731 		return (0);
732 
733 	(void) snprintf(msg, sizeof (msg),
734 	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
735 
736 	if (errno == EBUSY)
737 		return (zfs_error(hdl, EZFS_RESILVERING, msg));
738 	else
739 		return (zpool_standard_error(hdl, errno, msg));
740 }
741 
742 /*
743  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
744  * spare; but FALSE if its an INUSE spare.
745  */
746 static nvlist_t *
747 vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
748     boolean_t *avail_spare)
749 {
750 	uint_t c, children;
751 	nvlist_t **child;
752 	uint64_t theguid, present;
753 	char *path;
754 	uint64_t wholedisk = 0;
755 	nvlist_t *ret;
756 
757 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
758 
759 	if (search == NULL &&
760 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
761 		/*
762 		 * If the device has never been present since import, the only
763 		 * reliable way to match the vdev is by GUID.
764 		 */
765 		if (theguid == guid)
766 			return (nv);
767 	} else if (search != NULL &&
768 	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
769 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
770 		    &wholedisk);
771 		if (wholedisk) {
772 			/*
773 			 * For whole disks, the internal path has 's0', but the
774 			 * path passed in by the user doesn't.
775 			 */
776 			if (strlen(search) == strlen(path) - 2 &&
777 			    strncmp(search, path, strlen(search)) == 0)
778 				return (nv);
779 		} else if (strcmp(search, path) == 0) {
780 			return (nv);
781 		}
782 	}
783 
784 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
785 	    &child, &children) != 0)
786 		return (NULL);
787 
788 	for (c = 0; c < children; c++)
789 		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
790 		    avail_spare)) != NULL)
791 			return (ret);
792 
793 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
794 	    &child, &children) == 0) {
795 		for (c = 0; c < children; c++) {
796 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
797 			    avail_spare)) != NULL) {
798 				*avail_spare = B_TRUE;
799 				return (ret);
800 			}
801 		}
802 	}
803 
804 	return (NULL);
805 }
806 
807 nvlist_t *
808 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare)
809 {
810 	char buf[MAXPATHLEN];
811 	const char *search;
812 	char *end;
813 	nvlist_t *nvroot;
814 	uint64_t guid;
815 
816 	guid = strtoull(path, &end, 10);
817 	if (guid != 0 && *end == '\0') {
818 		search = NULL;
819 	} else if (path[0] != '/') {
820 		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
821 		search = buf;
822 	} else {
823 		search = path;
824 	}
825 
826 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
827 	    &nvroot) == 0);
828 
829 	*avail_spare = B_FALSE;
830 	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare));
831 }
832 
833 /*
834  * Returns TRUE if the given guid corresponds to a spare (INUSE or not).
835  */
836 static boolean_t
837 is_spare(zpool_handle_t *zhp, uint64_t guid)
838 {
839 	uint64_t spare_guid;
840 	nvlist_t *nvroot;
841 	nvlist_t **spares;
842 	uint_t nspares;
843 	int i;
844 
845 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
846 	    &nvroot) == 0);
847 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
848 	    &spares, &nspares) == 0) {
849 		for (i = 0; i < nspares; i++) {
850 			verify(nvlist_lookup_uint64(spares[i],
851 			    ZPOOL_CONFIG_GUID, &spare_guid) == 0);
852 			if (guid == spare_guid)
853 				return (B_TRUE);
854 		}
855 	}
856 
857 	return (B_FALSE);
858 }
859 
860 /*
861  * Bring the specified vdev online
862  */
863 int
864 zpool_vdev_online(zpool_handle_t *zhp, const char *path)
865 {
866 	zfs_cmd_t zc = { 0 };
867 	char msg[1024];
868 	nvlist_t *tgt;
869 	boolean_t avail_spare;
870 	libzfs_handle_t *hdl = zhp->zpool_hdl;
871 
872 	(void) snprintf(msg, sizeof (msg),
873 	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
874 
875 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
876 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == NULL)
877 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
878 
879 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
880 
881 	if (avail_spare || is_spare(zhp, zc.zc_guid) == B_TRUE)
882 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
883 
884 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ONLINE, &zc) == 0)
885 		return (0);
886 
887 	return (zpool_standard_error(hdl, errno, msg));
888 }
889 
890 /*
891  * Take the specified vdev offline
892  */
893 int
894 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, int istmp)
895 {
896 	zfs_cmd_t zc = { 0 };
897 	char msg[1024];
898 	nvlist_t *tgt;
899 	boolean_t avail_spare;
900 	libzfs_handle_t *hdl = zhp->zpool_hdl;
901 
902 	(void) snprintf(msg, sizeof (msg),
903 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
904 
905 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
906 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == NULL)
907 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
908 
909 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
910 
911 	if (avail_spare || is_spare(zhp, zc.zc_guid) == B_TRUE)
912 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
913 
914 	zc.zc_cookie = istmp;
915 
916 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_OFFLINE, &zc) == 0)
917 		return (0);
918 
919 	switch (errno) {
920 	case EBUSY:
921 
922 		/*
923 		 * There are no other replicas of this device.
924 		 */
925 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
926 
927 	default:
928 		return (zpool_standard_error(hdl, errno, msg));
929 	}
930 }
931 
932 /*
933  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
934  * a hot spare.
935  */
936 static boolean_t
937 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
938 {
939 	nvlist_t **child;
940 	uint_t c, children;
941 	char *type;
942 
943 	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
944 	    &children) == 0) {
945 		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
946 		    &type) == 0);
947 
948 		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
949 		    children == 2 && child[which] == tgt)
950 			return (B_TRUE);
951 
952 		for (c = 0; c < children; c++)
953 			if (is_replacing_spare(child[c], tgt, which))
954 				return (B_TRUE);
955 	}
956 
957 	return (B_FALSE);
958 }
959 
960 /*
961  * Attach new_disk (fully described by nvroot) to old_disk.
962  * If 'replacing' is specified, tne new disk will replace the old one.
963  */
964 int
965 zpool_vdev_attach(zpool_handle_t *zhp,
966     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
967 {
968 	zfs_cmd_t zc = { 0 };
969 	char msg[1024];
970 	int ret;
971 	nvlist_t *tgt;
972 	boolean_t avail_spare;
973 	uint64_t val;
974 	char *path;
975 	nvlist_t **child;
976 	uint_t children;
977 	nvlist_t *config_root;
978 	libzfs_handle_t *hdl = zhp->zpool_hdl;
979 
980 	if (replacing)
981 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
982 		    "cannot replace %s with %s"), old_disk, new_disk);
983 	else
984 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
985 		    "cannot attach %s to %s"), new_disk, old_disk);
986 
987 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
988 	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare)) == 0)
989 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
990 
991 	if (avail_spare)
992 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
993 
994 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
995 	zc.zc_cookie = replacing;
996 
997 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
998 	    &child, &children) != 0 || children != 1) {
999 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1000 		    "new device must be a single disk"));
1001 		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
1002 	}
1003 
1004 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
1005 	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
1006 
1007 	/*
1008 	 * If the target is a hot spare that has been swapped in, we can only
1009 	 * replace it with another hot spare.
1010 	 */
1011 	if (replacing &&
1012 	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
1013 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1014 	    (zpool_find_vdev(zhp, path, &avail_spare) == NULL ||
1015 	    !avail_spare) && is_replacing_spare(config_root, tgt, 1)) {
1016 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1017 		    "can only be replaced by another hot spare"));
1018 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1019 	}
1020 
1021 	/*
1022 	 * If we are attempting to replace a spare, it canot be applied to an
1023 	 * already spared device.
1024 	 */
1025 	if (replacing &&
1026 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
1027 	    zpool_find_vdev(zhp, path, &avail_spare) != NULL && avail_spare &&
1028 	    is_replacing_spare(config_root, tgt, 0)) {
1029 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1030 		    "device has already been replaced with a spare"));
1031 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
1032 	}
1033 
1034 	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
1035 		return (-1);
1036 
1037 	ret = ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ATTACH, &zc);
1038 
1039 	zcmd_free_nvlists(&zc);
1040 
1041 	if (ret == 0)
1042 		return (0);
1043 
1044 	switch (errno) {
1045 	case ENOTSUP:
1046 		/*
1047 		 * Can't attach to or replace this type of vdev.
1048 		 */
1049 		if (replacing)
1050 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1051 			    "cannot replace a replacing device"));
1052 		else
1053 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1054 			    "can only attach to mirrors and top-level "
1055 			    "disks"));
1056 		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
1057 		break;
1058 
1059 	case EINVAL:
1060 		/*
1061 		 * The new device must be a single disk.
1062 		 */
1063 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1064 		    "new device must be a single disk"));
1065 		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
1066 		break;
1067 
1068 	case EBUSY:
1069 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
1070 		    new_disk);
1071 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1072 		break;
1073 
1074 	case EOVERFLOW:
1075 		/*
1076 		 * The new device is too small.
1077 		 */
1078 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1079 		    "device is too small"));
1080 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1081 		break;
1082 
1083 	case EDOM:
1084 		/*
1085 		 * The new device has a different alignment requirement.
1086 		 */
1087 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1088 		    "devices have different sector alignment"));
1089 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1090 		break;
1091 
1092 	case ENAMETOOLONG:
1093 		/*
1094 		 * The resulting top-level vdev spec won't fit in the label.
1095 		 */
1096 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
1097 		break;
1098 
1099 	default:
1100 		(void) zpool_standard_error(hdl, errno, msg);
1101 	}
1102 
1103 	return (-1);
1104 }
1105 
1106 /*
1107  * Detach the specified device.
1108  */
1109 int
1110 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
1111 {
1112 	zfs_cmd_t zc = { 0 };
1113 	char msg[1024];
1114 	nvlist_t *tgt;
1115 	boolean_t avail_spare;
1116 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1117 
1118 	(void) snprintf(msg, sizeof (msg),
1119 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
1120 
1121 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1122 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
1123 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1124 
1125 	if (avail_spare)
1126 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1127 
1128 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1129 
1130 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_DETACH, &zc) == 0)
1131 		return (0);
1132 
1133 	switch (errno) {
1134 
1135 	case ENOTSUP:
1136 		/*
1137 		 * Can't detach from this type of vdev.
1138 		 */
1139 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
1140 		    "applicable to mirror and replacing vdevs"));
1141 		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
1142 		break;
1143 
1144 	case EBUSY:
1145 		/*
1146 		 * There are no other replicas of this device.
1147 		 */
1148 		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
1149 		break;
1150 
1151 	default:
1152 		(void) zpool_standard_error(hdl, errno, msg);
1153 	}
1154 
1155 	return (-1);
1156 }
1157 
1158 /*
1159  * Remove the given device.  Currently, this is supported only for hot spares.
1160  */
1161 int
1162 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
1163 {
1164 	zfs_cmd_t zc = { 0 };
1165 	char msg[1024];
1166 	nvlist_t *tgt;
1167 	boolean_t avail_spare;
1168 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1169 
1170 	(void) snprintf(msg, sizeof (msg),
1171 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
1172 
1173 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1174 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
1175 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1176 
1177 	if (!avail_spare) {
1178 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1179 		    "only inactive hot spares can be removed"));
1180 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1181 	}
1182 
1183 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1184 
1185 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
1186 		return (0);
1187 
1188 	return (zpool_standard_error(hdl, errno, msg));
1189 }
1190 
1191 /*
1192  * Clear the errors for the pool, or the particular device if specified.
1193  */
1194 int
1195 zpool_clear(zpool_handle_t *zhp, const char *path)
1196 {
1197 	zfs_cmd_t zc = { 0 };
1198 	char msg[1024];
1199 	nvlist_t *tgt;
1200 	boolean_t avail_spare;
1201 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1202 
1203 	if (path)
1204 		(void) snprintf(msg, sizeof (msg),
1205 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1206 		    path);
1207 	else
1208 		(void) snprintf(msg, sizeof (msg),
1209 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1210 		    zhp->zpool_name);
1211 
1212 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1213 	if (path) {
1214 		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
1215 			return (zfs_error(hdl, EZFS_NODEVICE, msg));
1216 
1217 		if (avail_spare)
1218 			return (zfs_error(hdl, EZFS_ISSPARE, msg));
1219 
1220 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
1221 		    &zc.zc_guid) == 0);
1222 	}
1223 
1224 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
1225 		return (0);
1226 
1227 	return (zpool_standard_error(hdl, errno, msg));
1228 }
1229 
1230 /*
1231  * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
1232  * hierarchy.
1233  */
1234 int
1235 zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
1236     void *data)
1237 {
1238 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1239 	char (*paths)[MAXPATHLEN];
1240 	size_t size = 4;
1241 	int curr, fd, base, ret = 0;
1242 	DIR *dirp;
1243 	struct dirent *dp;
1244 	struct stat st;
1245 
1246 	if ((base = open("/dev/zvol/dsk", O_RDONLY)) < 0)
1247 		return (errno == ENOENT ? 0 : -1);
1248 
1249 	if (fstatat(base, zhp->zpool_name, &st, 0) != 0) {
1250 		int err = errno;
1251 		(void) close(base);
1252 		return (err == ENOENT ? 0 : -1);
1253 	}
1254 
1255 	/*
1256 	 * Oddly this wasn't a directory -- ignore that failure since we
1257 	 * know there are no links lower in the (non-existant) hierarchy.
1258 	 */
1259 	if (!S_ISDIR(st.st_mode)) {
1260 		(void) close(base);
1261 		return (0);
1262 	}
1263 
1264 	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
1265 		(void) close(base);
1266 		return (-1);
1267 	}
1268 
1269 	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
1270 	curr = 0;
1271 
1272 	while (curr >= 0) {
1273 		if (fstatat(base, paths[curr], &st, AT_SYMLINK_NOFOLLOW) != 0)
1274 			goto err;
1275 
1276 		if (S_ISDIR(st.st_mode)) {
1277 			if ((fd = openat(base, paths[curr], O_RDONLY)) < 0)
1278 				goto err;
1279 
1280 			if ((dirp = fdopendir(fd)) == NULL) {
1281 				(void) close(fd);
1282 				goto err;
1283 			}
1284 
1285 			while ((dp = readdir(dirp)) != NULL) {
1286 				if (dp->d_name[0] == '.')
1287 					continue;
1288 
1289 				if (curr + 1 == size) {
1290 					paths = zfs_realloc(hdl, paths,
1291 					    size * sizeof (paths[0]),
1292 					    size * 2 * sizeof (paths[0]));
1293 					if (paths == NULL) {
1294 						(void) closedir(dirp);
1295 						(void) close(fd);
1296 						goto err;
1297 					}
1298 
1299 					size *= 2;
1300 				}
1301 
1302 				(void) strlcpy(paths[curr + 1], paths[curr],
1303 				    sizeof (paths[curr + 1]));
1304 				(void) strlcat(paths[curr], "/",
1305 				    sizeof (paths[curr]));
1306 				(void) strlcat(paths[curr], dp->d_name,
1307 				    sizeof (paths[curr]));
1308 				curr++;
1309 			}
1310 
1311 			(void) closedir(dirp);
1312 
1313 		} else {
1314 			if ((ret = cb(paths[curr], data)) != 0)
1315 				break;
1316 		}
1317 
1318 		curr--;
1319 	}
1320 
1321 	free(paths);
1322 	(void) close(base);
1323 
1324 	return (ret);
1325 
1326 err:
1327 	free(paths);
1328 	(void) close(base);
1329 	return (-1);
1330 }
1331 
1332 typedef struct zvol_cb {
1333 	zpool_handle_t *zcb_pool;
1334 	boolean_t zcb_create;
1335 } zvol_cb_t;
1336 
1337 /*ARGSUSED*/
1338 static int
1339 do_zvol_create(zfs_handle_t *zhp, void *data)
1340 {
1341 	int ret;
1342 
1343 	if (ZFS_IS_VOLUME(zhp))
1344 		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
1345 
1346 	ret = zfs_iter_children(zhp, do_zvol_create, NULL);
1347 
1348 	zfs_close(zhp);
1349 
1350 	return (ret);
1351 }
1352 
1353 /*
1354  * Iterate over all zvols in the pool and make any necessary minor nodes.
1355  */
1356 int
1357 zpool_create_zvol_links(zpool_handle_t *zhp)
1358 {
1359 	zfs_handle_t *zfp;
1360 	int ret;
1361 
1362 	/*
1363 	 * If the pool is unavailable, just return success.
1364 	 */
1365 	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
1366 	    zhp->zpool_name)) == NULL)
1367 		return (0);
1368 
1369 	ret = zfs_iter_children(zfp, do_zvol_create, NULL);
1370 
1371 	zfs_close(zfp);
1372 	return (ret);
1373 }
1374 
1375 static int
1376 do_zvol_remove(const char *dataset, void *data)
1377 {
1378 	zpool_handle_t *zhp = data;
1379 
1380 	return (zvol_remove_link(zhp->zpool_hdl, dataset));
1381 }
1382 
1383 /*
1384  * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
1385  * by examining the /dev links so that a corrupted pool doesn't impede this
1386  * operation.
1387  */
1388 int
1389 zpool_remove_zvol_links(zpool_handle_t *zhp)
1390 {
1391 	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
1392 }
1393 
1394 /*
1395  * Convert from a devid string to a path.
1396  */
1397 static char *
1398 devid_to_path(char *devid_str)
1399 {
1400 	ddi_devid_t devid;
1401 	char *minor;
1402 	char *path;
1403 	devid_nmlist_t *list = NULL;
1404 	int ret;
1405 
1406 	if (devid_str_decode(devid_str, &devid, &minor) != 0)
1407 		return (NULL);
1408 
1409 	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
1410 
1411 	devid_str_free(minor);
1412 	devid_free(devid);
1413 
1414 	if (ret != 0)
1415 		return (NULL);
1416 
1417 	if ((path = strdup(list[0].devname)) == NULL)
1418 		return (NULL);
1419 
1420 	devid_free_nmlist(list);
1421 
1422 	return (path);
1423 }
1424 
1425 /*
1426  * Convert from a path to a devid string.
1427  */
1428 static char *
1429 path_to_devid(const char *path)
1430 {
1431 	int fd;
1432 	ddi_devid_t devid;
1433 	char *minor, *ret;
1434 
1435 	if ((fd = open(path, O_RDONLY)) < 0)
1436 		return (NULL);
1437 
1438 	minor = NULL;
1439 	ret = NULL;
1440 	if (devid_get(fd, &devid) == 0) {
1441 		if (devid_get_minor_name(fd, &minor) == 0)
1442 			ret = devid_str_encode(devid, minor);
1443 		if (minor != NULL)
1444 			devid_str_free(minor);
1445 		devid_free(devid);
1446 	}
1447 	(void) close(fd);
1448 
1449 	return (ret);
1450 }
1451 
1452 /*
1453  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
1454  * ignore any failure here, since a common case is for an unprivileged user to
1455  * type 'zpool status', and we'll display the correct information anyway.
1456  */
1457 static void
1458 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
1459 {
1460 	zfs_cmd_t zc = { 0 };
1461 
1462 	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1463 	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
1464 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1465 	    &zc.zc_guid) == 0);
1466 
1467 	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
1468 }
1469 
1470 /*
1471  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
1472  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
1473  * We also check if this is a whole disk, in which case we strip off the
1474  * trailing 's0' slice name.
1475  *
1476  * This routine is also responsible for identifying when disks have been
1477  * reconfigured in a new location.  The kernel will have opened the device by
1478  * devid, but the path will still refer to the old location.  To catch this, we
1479  * first do a path -> devid translation (which is fast for the common case).  If
1480  * the devid matches, we're done.  If not, we do a reverse devid -> path
1481  * translation and issue the appropriate ioctl() to update the path of the vdev.
1482  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
1483  * of these checks.
1484  */
1485 char *
1486 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
1487 {
1488 	char *path, *devid;
1489 	uint64_t value;
1490 	char buf[64];
1491 
1492 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
1493 	    &value) == 0) {
1494 		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1495 		    &value) == 0);
1496 		(void) snprintf(buf, sizeof (buf), "%llu",
1497 		    (u_longlong_t)value);
1498 		path = buf;
1499 	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
1500 
1501 		if (zhp != NULL &&
1502 		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
1503 			/*
1504 			 * Determine if the current path is correct.
1505 			 */
1506 			char *newdevid = path_to_devid(path);
1507 
1508 			if (newdevid == NULL ||
1509 			    strcmp(devid, newdevid) != 0) {
1510 				char *newpath;
1511 
1512 				if ((newpath = devid_to_path(devid)) != NULL) {
1513 					/*
1514 					 * Update the path appropriately.
1515 					 */
1516 					set_path(zhp, nv, newpath);
1517 					if (nvlist_add_string(nv,
1518 					    ZPOOL_CONFIG_PATH, newpath) == 0)
1519 						verify(nvlist_lookup_string(nv,
1520 						    ZPOOL_CONFIG_PATH,
1521 						    &path) == 0);
1522 					free(newpath);
1523 				}
1524 			}
1525 
1526 			if (newdevid)
1527 				devid_str_free(newdevid);
1528 		}
1529 
1530 		if (strncmp(path, "/dev/dsk/", 9) == 0)
1531 			path += 9;
1532 
1533 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1534 		    &value) == 0 && value) {
1535 			char *tmp = zfs_strdup(hdl, path);
1536 			if (tmp == NULL)
1537 				return (NULL);
1538 			tmp[strlen(path) - 2] = '\0';
1539 			return (tmp);
1540 		}
1541 	} else {
1542 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
1543 
1544 		/*
1545 		 * If it's a raidz device, we need to stick in the parity level.
1546 		 */
1547 		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
1548 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
1549 			    &value) == 0);
1550 			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
1551 			    (u_longlong_t)value);
1552 			path = buf;
1553 		}
1554 	}
1555 
1556 	return (zfs_strdup(hdl, path));
1557 }
1558 
1559 static int
1560 zbookmark_compare(const void *a, const void *b)
1561 {
1562 	return (memcmp(a, b, sizeof (zbookmark_t)));
1563 }
1564 
1565 /*
1566  * Retrieve the persistent error log, uniquify the members, and return to the
1567  * caller.
1568  */
1569 int
1570 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
1571 {
1572 	zfs_cmd_t zc = { 0 };
1573 	uint64_t count;
1574 	zbookmark_t *zb = NULL;
1575 	int i;
1576 
1577 	/*
1578 	 * Retrieve the raw error list from the kernel.  If the number of errors
1579 	 * has increased, allocate more space and continue until we get the
1580 	 * entire list.
1581 	 */
1582 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
1583 	    &count) == 0);
1584 	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
1585 	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
1586 		return (-1);
1587 	zc.zc_nvlist_dst_size = count;
1588 	(void) strcpy(zc.zc_name, zhp->zpool_name);
1589 	for (;;) {
1590 		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
1591 		    &zc) != 0) {
1592 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
1593 			if (errno == ENOMEM) {
1594 				if ((zc.zc_nvlist_dst = (uintptr_t)
1595 				    zfs_alloc(zhp->zpool_hdl,
1596 				    zc.zc_nvlist_dst_size)) == (uintptr_t)NULL)
1597 					return (-1);
1598 			} else {
1599 				return (-1);
1600 			}
1601 		} else {
1602 			break;
1603 		}
1604 	}
1605 
1606 	/*
1607 	 * Sort the resulting bookmarks.  This is a little confusing due to the
1608 	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
1609 	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
1610 	 * _not_ copied as part of the process.  So we point the start of our
1611 	 * array appropriate and decrement the total number of elements.
1612 	 */
1613 	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
1614 	    zc.zc_nvlist_dst_size;
1615 	count -= zc.zc_nvlist_dst_size;
1616 
1617 	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
1618 
1619 	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
1620 
1621 	/*
1622 	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
1623 	 */
1624 	for (i = 0; i < count; i++) {
1625 		nvlist_t *nv;
1626 
1627 		if (i > 0 && memcmp(&zb[i - 1], &zb[i],
1628 		    sizeof (zbookmark_t)) == 0)
1629 			continue;
1630 
1631 		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
1632 			goto nomem;
1633 		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
1634 		    zb[i].zb_objset) != 0) {
1635 			nvlist_free(nv);
1636 			goto nomem;
1637 		}
1638 		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
1639 		    zb[i].zb_object) != 0) {
1640 			nvlist_free(nv);
1641 			goto nomem;
1642 		}
1643 		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
1644 			nvlist_free(nv);
1645 			goto nomem;
1646 		}
1647 		nvlist_free(nv);
1648 	}
1649 
1650 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
1651 	return (0);
1652 
1653 nomem:
1654 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
1655 	return (no_memory(zhp->zpool_hdl));
1656 }
1657 
1658 /*
1659  * Upgrade a ZFS pool to the latest on-disk version.
1660  */
1661 int
1662 zpool_upgrade(zpool_handle_t *zhp)
1663 {
1664 	zfs_cmd_t zc = { 0 };
1665 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1666 
1667 	(void) strcpy(zc.zc_name, zhp->zpool_name);
1668 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
1669 		return (zpool_standard_error_fmt(hdl, errno,
1670 		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
1671 		    zhp->zpool_name));
1672 
1673 	return (0);
1674 }
1675 
1676 /*
1677  * Log command history.
1678  *
1679  * 'pool' is B_TRUE if we are logging a command for 'zpool'; B_FALSE
1680  * otherwise ('zfs').  'pool_create' is B_TRUE if we are logging the creation
1681  * of the pool; B_FALSE otherwise.  'path' is the pathanme containing the
1682  * poolname.  'argc' and 'argv' are used to construct the command string.
1683  */
1684 void
1685 zpool_log_history(libzfs_handle_t *hdl, int argc, char **argv, const char *path,
1686     boolean_t pool, boolean_t pool_create)
1687 {
1688 	char cmd_buf[HIS_MAX_RECORD_LEN];
1689 	char *dspath;
1690 	zfs_cmd_t zc = { 0 };
1691 	int i;
1692 
1693 	/* construct the command string */
1694 	(void) strcpy(cmd_buf, pool ? "zpool" : "zfs");
1695 	for (i = 0; i < argc; i++) {
1696 		if (strlen(cmd_buf) + 1 + strlen(argv[i]) > HIS_MAX_RECORD_LEN)
1697 			break;
1698 		(void) strcat(cmd_buf, " ");
1699 		(void) strcat(cmd_buf, argv[i]);
1700 	}
1701 
1702 	/* figure out the poolname */
1703 	dspath = strpbrk(path, "/@");
1704 	if (dspath == NULL) {
1705 		(void) strcpy(zc.zc_name, path);
1706 	} else {
1707 		(void) strncpy(zc.zc_name, path, dspath - path);
1708 		zc.zc_name[dspath-path] = '\0';
1709 	}
1710 
1711 	zc.zc_history = (uint64_t)(uintptr_t)cmd_buf;
1712 	zc.zc_history_len = strlen(cmd_buf);
1713 
1714 	/* overloading zc_history_offset */
1715 	zc.zc_history_offset = pool_create;
1716 
1717 	(void) ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_LOG_HISTORY, &zc);
1718 }
1719 
1720 /*
1721  * Perform ioctl to get some command history of a pool.
1722  *
1723  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
1724  * logical offset of the history buffer to start reading from.
1725  *
1726  * Upon return, 'off' is the next logical offset to read from and
1727  * 'len' is the actual amount of bytes read into 'buf'.
1728  */
1729 static int
1730 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
1731 {
1732 	zfs_cmd_t zc = { 0 };
1733 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1734 
1735 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1736 
1737 	zc.zc_history = (uint64_t)(uintptr_t)buf;
1738 	zc.zc_history_len = *len;
1739 	zc.zc_history_offset = *off;
1740 
1741 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
1742 		switch (errno) {
1743 		case EPERM:
1744 			return (zfs_error_fmt(hdl, EZFS_PERM,
1745 			    dgettext(TEXT_DOMAIN,
1746 			    "cannot show history for pool '%s'"),
1747 			    zhp->zpool_name));
1748 		case ENOENT:
1749 			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
1750 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
1751 			    "'%s'"), zhp->zpool_name));
1752 		default:
1753 			return (zpool_standard_error_fmt(hdl, errno,
1754 			    dgettext(TEXT_DOMAIN,
1755 			    "cannot get history for '%s'"), zhp->zpool_name));
1756 		}
1757 	}
1758 
1759 	*len = zc.zc_history_len;
1760 	*off = zc.zc_history_offset;
1761 
1762 	return (0);
1763 }
1764 
1765 /*
1766  * Process the buffer of nvlists, unpacking and storing each nvlist record
1767  * into 'records'.  'leftover' is set to the number of bytes that weren't
1768  * processed as there wasn't a complete record.
1769  */
1770 static int
1771 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
1772     nvlist_t ***records, uint_t *numrecords)
1773 {
1774 	uint64_t reclen;
1775 	nvlist_t *nv;
1776 	int i;
1777 
1778 	while (bytes_read > sizeof (reclen)) {
1779 
1780 		/* get length of packed record (stored as little endian) */
1781 		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
1782 			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
1783 
1784 		if (bytes_read < sizeof (reclen) + reclen)
1785 			break;
1786 
1787 		/* unpack record */
1788 		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
1789 			return (ENOMEM);
1790 		bytes_read -= sizeof (reclen) + reclen;
1791 		buf += sizeof (reclen) + reclen;
1792 
1793 		/* add record to nvlist array */
1794 		(*numrecords)++;
1795 		if (ISP2(*numrecords + 1)) {
1796 			*records = realloc(*records,
1797 			    *numrecords * 2 * sizeof (nvlist_t *));
1798 		}
1799 		(*records)[*numrecords - 1] = nv;
1800 	}
1801 
1802 	*leftover = bytes_read;
1803 	return (0);
1804 }
1805 
1806 #define	HIS_BUF_LEN	(128*1024)
1807 
1808 /*
1809  * Retrieve the command history of a pool.
1810  */
1811 int
1812 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
1813 {
1814 	char buf[HIS_BUF_LEN];
1815 	uint64_t off = 0;
1816 	nvlist_t **records = NULL;
1817 	uint_t numrecords = 0;
1818 	int err, i;
1819 
1820 	do {
1821 		uint64_t bytes_read = sizeof (buf);
1822 		uint64_t leftover;
1823 
1824 		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
1825 			break;
1826 
1827 		/* if nothing else was read in, we're at EOF, just return */
1828 		if (!bytes_read)
1829 			break;
1830 
1831 		if ((err = zpool_history_unpack(buf, bytes_read,
1832 		    &leftover, &records, &numrecords)) != 0)
1833 			break;
1834 		off -= leftover;
1835 
1836 		/* CONSTCOND */
1837 	} while (1);
1838 
1839 	if (!err) {
1840 		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
1841 		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
1842 		    records, numrecords) == 0);
1843 	}
1844 	for (i = 0; i < numrecords; i++)
1845 		nvlist_free(records[i]);
1846 	free(records);
1847 
1848 	return (err);
1849 }
1850 
1851 void
1852 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
1853     char *pathname, size_t len)
1854 {
1855 	zfs_cmd_t zc = { 0 };
1856 	boolean_t mounted = B_FALSE;
1857 	char *mntpnt = NULL;
1858 	char dsname[MAXNAMELEN];
1859 
1860 	if (dsobj == 0) {
1861 		/* special case for the MOS */
1862 		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
1863 		return;
1864 	}
1865 
1866 	/* get the dataset's name */
1867 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1868 	zc.zc_obj = dsobj;
1869 	if (ioctl(zhp->zpool_hdl->libzfs_fd,
1870 	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
1871 		/* just write out a path of two object numbers */
1872 		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
1873 		    dsobj, obj);
1874 		return;
1875 	}
1876 	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
1877 
1878 	/* find out if the dataset is mounted */
1879 	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
1880 
1881 	/* get the corrupted object's path */
1882 	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
1883 	zc.zc_obj = obj;
1884 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
1885 	    &zc) == 0) {
1886 		if (mounted) {
1887 			(void) snprintf(pathname, len, "%s%s", mntpnt,
1888 			    zc.zc_value);
1889 		} else {
1890 			(void) snprintf(pathname, len, "%s:%s",
1891 			    dsname, zc.zc_value);
1892 		}
1893 	} else {
1894 		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
1895 	}
1896 	free(mntpnt);
1897 }
1898