xref: /titanic_50/usr/src/lib/libzfs/common/libzfs_pool.c (revision c2cb63342c63c60cee771d1af82f377e34aa1217)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <alloca.h>
30 #include <assert.h>
31 #include <ctype.h>
32 #include <errno.h>
33 #include <devid.h>
34 #include <dirent.h>
35 #include <fcntl.h>
36 #include <libintl.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <strings.h>
40 #include <unistd.h>
41 #include <sys/zfs_ioctl.h>
42 #include <sys/zio.h>
43 #include <strings.h>
44 
45 #include "zfs_namecheck.h"
46 #include "libzfs_impl.h"
47 
48 /*
49  * Validate the given pool name, optionally putting an extended error message in
50  * 'buf'.
51  */
52 static boolean_t
53 zpool_name_valid(libzfs_handle_t *hdl, boolean_t isopen, const char *pool)
54 {
55 	namecheck_err_t why;
56 	char what;
57 	int ret;
58 
59 	ret = pool_namecheck(pool, &why, &what);
60 
61 	/*
62 	 * The rules for reserved pool names were extended at a later point.
63 	 * But we need to support users with existing pools that may now be
64 	 * invalid.  So we only check for this expanded set of names during a
65 	 * create (or import), and only in userland.
66 	 */
67 	if (ret == 0 && !isopen &&
68 	    (strncmp(pool, "mirror", 6) == 0 ||
69 	    strncmp(pool, "raidz", 5) == 0 ||
70 	    strncmp(pool, "spare", 5) == 0)) {
71 		zfs_error_aux(hdl,
72 		    dgettext(TEXT_DOMAIN, "name is reserved"));
73 		return (B_FALSE);
74 	}
75 
76 
77 	if (ret != 0) {
78 		if (hdl != NULL) {
79 			switch (why) {
80 			case NAME_ERR_TOOLONG:
81 				zfs_error_aux(hdl,
82 				    dgettext(TEXT_DOMAIN, "name is too long"));
83 				break;
84 
85 			case NAME_ERR_INVALCHAR:
86 				zfs_error_aux(hdl,
87 				    dgettext(TEXT_DOMAIN, "invalid character "
88 				    "'%c' in pool name"), what);
89 				break;
90 
91 			case NAME_ERR_NOLETTER:
92 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
93 				    "name must begin with a letter"));
94 				break;
95 
96 			case NAME_ERR_RESERVED:
97 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
98 				    "name is reserved"));
99 				break;
100 
101 			case NAME_ERR_DISKLIKE:
102 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
103 				    "pool name is reserved"));
104 				break;
105 
106 			case NAME_ERR_LEADING_SLASH:
107 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
108 				    "leading slash in name"));
109 				break;
110 
111 			case NAME_ERR_EMPTY_COMPONENT:
112 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
113 				    "empty component in name"));
114 				break;
115 
116 			case NAME_ERR_TRAILING_SLASH:
117 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
118 				    "trailing slash in name"));
119 				break;
120 
121 			case NAME_ERR_MULTIPLE_AT:
122 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
123 				    "multiple '@' delimiters in name"));
124 				break;
125 
126 			}
127 		}
128 		return (B_FALSE);
129 	}
130 
131 	return (B_TRUE);
132 }
133 
134 /*
135  * Open a handle to the given pool, even if the pool is currently in the FAULTED
136  * state.
137  */
138 zpool_handle_t *
139 zpool_open_canfail(libzfs_handle_t *hdl, const char *pool)
140 {
141 	zpool_handle_t *zhp;
142 	boolean_t missing;
143 
144 	/*
145 	 * Make sure the pool name is valid.
146 	 */
147 	if (!zpool_name_valid(hdl, B_TRUE, pool)) {
148 		(void) zfs_error_fmt(hdl, EZFS_INVALIDNAME,
149 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
150 		    pool);
151 		return (NULL);
152 	}
153 
154 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
155 		return (NULL);
156 
157 	zhp->zpool_hdl = hdl;
158 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
159 
160 	if (zpool_refresh_stats(zhp, &missing) != 0) {
161 		zpool_close(zhp);
162 		return (NULL);
163 	}
164 
165 	if (missing) {
166 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
167 		    "no such pool"));
168 		(void) zfs_error_fmt(hdl, EZFS_NOENT,
169 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"),
170 		    pool);
171 		zpool_close(zhp);
172 		return (NULL);
173 	}
174 
175 	return (zhp);
176 }
177 
178 /*
179  * Like the above, but silent on error.  Used when iterating over pools (because
180  * the configuration cache may be out of date).
181  */
182 int
183 zpool_open_silent(libzfs_handle_t *hdl, const char *pool, zpool_handle_t **ret)
184 {
185 	zpool_handle_t *zhp;
186 	boolean_t missing;
187 
188 	if ((zhp = zfs_alloc(hdl, sizeof (zpool_handle_t))) == NULL)
189 		return (-1);
190 
191 	zhp->zpool_hdl = hdl;
192 	(void) strlcpy(zhp->zpool_name, pool, sizeof (zhp->zpool_name));
193 
194 	if (zpool_refresh_stats(zhp, &missing) != 0) {
195 		zpool_close(zhp);
196 		return (-1);
197 	}
198 
199 	if (missing) {
200 		zpool_close(zhp);
201 		*ret = NULL;
202 		return (0);
203 	}
204 
205 	*ret = zhp;
206 	return (0);
207 }
208 
209 /*
210  * Similar to zpool_open_canfail(), but refuses to open pools in the faulted
211  * state.
212  */
213 zpool_handle_t *
214 zpool_open(libzfs_handle_t *hdl, const char *pool)
215 {
216 	zpool_handle_t *zhp;
217 
218 	if ((zhp = zpool_open_canfail(hdl, pool)) == NULL)
219 		return (NULL);
220 
221 	if (zhp->zpool_state == POOL_STATE_UNAVAIL) {
222 		(void) zfs_error_fmt(hdl, EZFS_POOLUNAVAIL,
223 		    dgettext(TEXT_DOMAIN, "cannot open '%s'"), zhp->zpool_name);
224 		zpool_close(zhp);
225 		return (NULL);
226 	}
227 
228 	return (zhp);
229 }
230 
231 /*
232  * Close the handle.  Simply frees the memory associated with the handle.
233  */
234 void
235 zpool_close(zpool_handle_t *zhp)
236 {
237 	if (zhp->zpool_config)
238 		nvlist_free(zhp->zpool_config);
239 	if (zhp->zpool_old_config)
240 		nvlist_free(zhp->zpool_old_config);
241 	free(zhp);
242 }
243 
244 /*
245  * Return the name of the pool.
246  */
247 const char *
248 zpool_get_name(zpool_handle_t *zhp)
249 {
250 	return (zhp->zpool_name);
251 }
252 
253 /*
254  * Return the GUID of the pool.
255  */
256 uint64_t
257 zpool_get_guid(zpool_handle_t *zhp)
258 {
259 	uint64_t guid;
260 
261 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_POOL_GUID,
262 	    &guid) == 0);
263 	return (guid);
264 }
265 
266 /*
267  * Return the version of the pool.
268  */
269 uint64_t
270 zpool_get_version(zpool_handle_t *zhp)
271 {
272 	uint64_t version;
273 
274 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_VERSION,
275 	    &version) == 0);
276 
277 	return (version);
278 }
279 
280 /*
281  * Return the amount of space currently consumed by the pool.
282  */
283 uint64_t
284 zpool_get_space_used(zpool_handle_t *zhp)
285 {
286 	nvlist_t *nvroot;
287 	vdev_stat_t *vs;
288 	uint_t vsc;
289 
290 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
291 	    &nvroot) == 0);
292 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
293 	    (uint64_t **)&vs, &vsc) == 0);
294 
295 	return (vs->vs_alloc);
296 }
297 
298 /*
299  * Return the total space in the pool.
300  */
301 uint64_t
302 zpool_get_space_total(zpool_handle_t *zhp)
303 {
304 	nvlist_t *nvroot;
305 	vdev_stat_t *vs;
306 	uint_t vsc;
307 
308 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
309 	    &nvroot) == 0);
310 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
311 	    (uint64_t **)&vs, &vsc) == 0);
312 
313 	return (vs->vs_space);
314 }
315 
316 /*
317  * Return the alternate root for this pool, if any.
318  */
319 int
320 zpool_get_root(zpool_handle_t *zhp, char *buf, size_t buflen)
321 {
322 	zfs_cmd_t zc = { 0 };
323 
324 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
325 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0 ||
326 	    zc.zc_value[0] == '\0')
327 		return (-1);
328 
329 	(void) strlcpy(buf, zc.zc_value, buflen);
330 
331 	return (0);
332 }
333 
334 /*
335  * Return the state of the pool (ACTIVE or UNAVAILABLE)
336  */
337 int
338 zpool_get_state(zpool_handle_t *zhp)
339 {
340 	return (zhp->zpool_state);
341 }
342 
343 /*
344  * Create the named pool, using the provided vdev list.  It is assumed
345  * that the consumer has already validated the contents of the nvlist, so we
346  * don't have to worry about error semantics.
347  */
348 int
349 zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
350     const char *altroot)
351 {
352 	zfs_cmd_t zc = { 0 };
353 	char msg[1024];
354 
355 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
356 	    "cannot create '%s'"), pool);
357 
358 	if (!zpool_name_valid(hdl, B_FALSE, pool))
359 		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
360 
361 	if (altroot != NULL && altroot[0] != '/')
362 		return (zfs_error_fmt(hdl, EZFS_BADPATH,
363 		    dgettext(TEXT_DOMAIN, "bad alternate root '%s'"), altroot));
364 
365 	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
366 		return (-1);
367 
368 	(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));
369 
370 	if (altroot != NULL)
371 		(void) strlcpy(zc.zc_value, altroot, sizeof (zc.zc_value));
372 
373 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_CREATE, &zc) != 0) {
374 		zcmd_free_nvlists(&zc);
375 
376 		switch (errno) {
377 		case EBUSY:
378 			/*
379 			 * This can happen if the user has specified the same
380 			 * device multiple times.  We can't reliably detect this
381 			 * until we try to add it and see we already have a
382 			 * label.
383 			 */
384 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
385 			    "one or more vdevs refer to the same device"));
386 			return (zfs_error(hdl, EZFS_BADDEV, msg));
387 
388 		case EOVERFLOW:
389 			/*
390 			 * This occurs when one of the devices is below
391 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
392 			 * device was the problem device since there's no
393 			 * reliable way to determine device size from userland.
394 			 */
395 			{
396 				char buf[64];
397 
398 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
399 
400 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
401 				    "one or more devices is less than the "
402 				    "minimum size (%s)"), buf);
403 			}
404 			return (zfs_error(hdl, EZFS_BADDEV, msg));
405 
406 		case ENOSPC:
407 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
408 			    "one or more devices is out of space"));
409 			return (zfs_error(hdl, EZFS_BADDEV, msg));
410 
411 		default:
412 			return (zpool_standard_error(hdl, errno, msg));
413 		}
414 	}
415 
416 	zcmd_free_nvlists(&zc);
417 
418 	/*
419 	 * If this is an alternate root pool, then we automatically set the
420 	 * mountpoint of the root dataset to be '/'.
421 	 */
422 	if (altroot != NULL) {
423 		zfs_handle_t *zhp;
424 
425 		verify((zhp = zfs_open(hdl, pool, ZFS_TYPE_ANY)) != NULL);
426 		verify(zfs_prop_set(zhp, zfs_prop_to_name(ZFS_PROP_MOUNTPOINT),
427 		    "/") == 0);
428 
429 		zfs_close(zhp);
430 	}
431 
432 	return (0);
433 }
434 
435 /*
436  * Destroy the given pool.  It is up to the caller to ensure that there are no
437  * datasets left in the pool.
438  */
439 int
440 zpool_destroy(zpool_handle_t *zhp)
441 {
442 	zfs_cmd_t zc = { 0 };
443 	zfs_handle_t *zfp = NULL;
444 	libzfs_handle_t *hdl = zhp->zpool_hdl;
445 	char msg[1024];
446 
447 	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
448 	    (zfp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
449 	    ZFS_TYPE_FILESYSTEM)) == NULL)
450 		return (-1);
451 
452 	if (zpool_remove_zvol_links(zhp) != 0)
453 		return (-1);
454 
455 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
456 
457 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
458 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
459 		    "cannot destroy '%s'"), zhp->zpool_name);
460 
461 		if (errno == EROFS) {
462 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
463 			    "one or more devices is read only"));
464 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
465 		} else {
466 			(void) zpool_standard_error(hdl, errno, msg);
467 		}
468 
469 		if (zfp)
470 			zfs_close(zfp);
471 		return (-1);
472 	}
473 
474 	if (zfp) {
475 		remove_mountpoint(zfp);
476 		zfs_close(zfp);
477 	}
478 
479 	return (0);
480 }
481 
482 /*
483  * Add the given vdevs to the pool.  The caller must have already performed the
484  * necessary verification to ensure that the vdev specification is well-formed.
485  */
486 int
487 zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
488 {
489 	zfs_cmd_t zc = { 0 };
490 	int ret;
491 	libzfs_handle_t *hdl = zhp->zpool_hdl;
492 	char msg[1024];
493 	nvlist_t **spares;
494 	uint_t nspares;
495 
496 	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
497 	    "cannot add to '%s'"), zhp->zpool_name);
498 
499 	if (zpool_get_version(zhp) < ZFS_VERSION_SPARES &&
500 	    nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
501 	    &spares, &nspares) == 0) {
502 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
503 		    "upgraded to add hot spares"));
504 		return (zfs_error(hdl, EZFS_BADVERSION, msg));
505 	}
506 
507 	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
508 		return (-1);
509 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
510 
511 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ADD, &zc) != 0) {
512 		switch (errno) {
513 		case EBUSY:
514 			/*
515 			 * This can happen if the user has specified the same
516 			 * device multiple times.  We can't reliably detect this
517 			 * until we try to add it and see we already have a
518 			 * label.
519 			 */
520 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
521 			    "one or more vdevs refer to the same device"));
522 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
523 			break;
524 
525 		case EOVERFLOW:
526 			/*
527 			 * This occurrs when one of the devices is below
528 			 * SPA_MINDEVSIZE.  Unfortunately, we can't detect which
529 			 * device was the problem device since there's no
530 			 * reliable way to determine device size from userland.
531 			 */
532 			{
533 				char buf[64];
534 
535 				zfs_nicenum(SPA_MINDEVSIZE, buf, sizeof (buf));
536 
537 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
538 				    "device is less than the minimum "
539 				    "size (%s)"), buf);
540 			}
541 			(void) zfs_error(hdl, EZFS_BADDEV, msg);
542 			break;
543 
544 		case ENOTSUP:
545 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
546 			    "pool must be upgraded to add raidz2 vdevs"));
547 			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
548 			break;
549 
550 		default:
551 			(void) zpool_standard_error(hdl, errno, msg);
552 		}
553 
554 		ret = -1;
555 	} else {
556 		ret = 0;
557 	}
558 
559 	zcmd_free_nvlists(&zc);
560 
561 	return (ret);
562 }
563 
564 /*
565  * Exports the pool from the system.  The caller must ensure that there are no
566  * mounted datasets in the pool.
567  */
568 int
569 zpool_export(zpool_handle_t *zhp)
570 {
571 	zfs_cmd_t zc = { 0 };
572 
573 	if (zpool_remove_zvol_links(zhp) != 0)
574 		return (-1);
575 
576 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
577 
578 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_EXPORT, &zc) != 0)
579 		return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
580 		    dgettext(TEXT_DOMAIN, "cannot export '%s'"),
581 		    zhp->zpool_name));
582 
583 	return (0);
584 }
585 
586 /*
587  * Import the given pool using the known configuration.  The configuration
588  * should have come from zpool_find_import().  The 'newname' and 'altroot'
589  * parameters control whether the pool is imported with a different name or with
590  * an alternate root, respectively.
591  */
592 int
593 zpool_import(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
594     const char *altroot)
595 {
596 	zfs_cmd_t zc = { 0 };
597 	char *thename;
598 	char *origname;
599 	int ret;
600 
601 	verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
602 	    &origname) == 0);
603 
604 	if (newname != NULL) {
605 		if (!zpool_name_valid(hdl, B_FALSE, newname))
606 			return (zfs_error_fmt(hdl, EZFS_INVALIDNAME,
607 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
608 			    newname));
609 		thename = (char *)newname;
610 	} else {
611 		thename = origname;
612 	}
613 
614 	if (altroot != NULL && altroot[0] != '/')
615 		return (zfs_error_fmt(hdl, EZFS_BADPATH,
616 		    dgettext(TEXT_DOMAIN, "bad alternate root '%s'"),
617 		    altroot));
618 
619 	(void) strlcpy(zc.zc_name, thename, sizeof (zc.zc_name));
620 
621 	if (altroot != NULL)
622 		(void) strlcpy(zc.zc_value, altroot, sizeof (zc.zc_value));
623 	else
624 		zc.zc_value[0] = '\0';
625 
626 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
627 	    &zc.zc_guid) == 0);
628 
629 	if (zcmd_write_src_nvlist(hdl, &zc, config, NULL) != 0)
630 		return (-1);
631 
632 	ret = 0;
633 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_IMPORT, &zc) != 0) {
634 		char desc[1024];
635 		if (newname == NULL)
636 			(void) snprintf(desc, sizeof (desc),
637 			    dgettext(TEXT_DOMAIN, "cannot import '%s'"),
638 			    thename);
639 		else
640 			(void) snprintf(desc, sizeof (desc),
641 			    dgettext(TEXT_DOMAIN, "cannot import '%s' as '%s'"),
642 			    origname, thename);
643 
644 		switch (errno) {
645 		case ENOTSUP:
646 			/*
647 			 * Unsupported version.
648 			 */
649 			(void) zfs_error(hdl, EZFS_BADVERSION, desc);
650 			break;
651 
652 		case EINVAL:
653 			(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
654 			break;
655 
656 		default:
657 			(void) zpool_standard_error(hdl, errno, desc);
658 		}
659 
660 		ret = -1;
661 	} else {
662 		zpool_handle_t *zhp;
663 		/*
664 		 * This should never fail, but play it safe anyway.
665 		 */
666 		if (zpool_open_silent(hdl, thename, &zhp) != 0) {
667 			ret = -1;
668 		} else if (zhp != NULL) {
669 			ret = zpool_create_zvol_links(zhp);
670 			zpool_close(zhp);
671 		}
672 	}
673 
674 	zcmd_free_nvlists(&zc);
675 	return (ret);
676 }
677 
678 /*
679  * Scrub the pool.
680  */
681 int
682 zpool_scrub(zpool_handle_t *zhp, pool_scrub_type_t type)
683 {
684 	zfs_cmd_t zc = { 0 };
685 	char msg[1024];
686 	libzfs_handle_t *hdl = zhp->zpool_hdl;
687 
688 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
689 	zc.zc_cookie = type;
690 
691 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_POOL_SCRUB, &zc) == 0)
692 		return (0);
693 
694 	(void) snprintf(msg, sizeof (msg),
695 	    dgettext(TEXT_DOMAIN, "cannot scrub %s"), zc.zc_name);
696 
697 	if (errno == EBUSY)
698 		return (zfs_error(hdl, EZFS_RESILVERING, msg));
699 	else
700 		return (zpool_standard_error(hdl, errno, msg));
701 }
702 
703 /*
704  * 'avail_spare' is set to TRUE if the provided guid refers to an AVAIL
705  * spare; but FALSE if its an INUSE spare.
706  */
707 static nvlist_t *
708 vdev_to_nvlist_iter(nvlist_t *nv, const char *search, uint64_t guid,
709     boolean_t *avail_spare)
710 {
711 	uint_t c, children;
712 	nvlist_t **child;
713 	uint64_t theguid, present;
714 	char *path;
715 	uint64_t wholedisk = 0;
716 	nvlist_t *ret;
717 
718 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &theguid) == 0);
719 
720 	if (search == NULL &&
721 	    nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT, &present) == 0) {
722 		/*
723 		 * If the device has never been present since import, the only
724 		 * reliable way to match the vdev is by GUID.
725 		 */
726 		if (theguid == guid)
727 			return (nv);
728 	} else if (search != NULL &&
729 	    nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
730 		(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
731 		    &wholedisk);
732 		if (wholedisk) {
733 			/*
734 			 * For whole disks, the internal path has 's0', but the
735 			 * path passed in by the user doesn't.
736 			 */
737 			if (strlen(search) == strlen(path) - 2 &&
738 			    strncmp(search, path, strlen(search)) == 0)
739 				return (nv);
740 		} else if (strcmp(search, path) == 0) {
741 			return (nv);
742 		}
743 	}
744 
745 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
746 	    &child, &children) != 0)
747 		return (NULL);
748 
749 	for (c = 0; c < children; c++)
750 		if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
751 		    avail_spare)) != NULL)
752 			return (ret);
753 
754 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
755 	    &child, &children) == 0) {
756 		for (c = 0; c < children; c++) {
757 			if ((ret = vdev_to_nvlist_iter(child[c], search, guid,
758 			    avail_spare)) != NULL) {
759 				*avail_spare = B_TRUE;
760 				return (ret);
761 			}
762 		}
763 	}
764 
765 	return (NULL);
766 }
767 
768 nvlist_t *
769 zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare)
770 {
771 	char buf[MAXPATHLEN];
772 	const char *search;
773 	char *end;
774 	nvlist_t *nvroot;
775 	uint64_t guid;
776 
777 	guid = strtoull(path, &end, 10);
778 	if (guid != 0 && *end == '\0') {
779 		search = NULL;
780 	} else if (path[0] != '/') {
781 		(void) snprintf(buf, sizeof (buf), "%s%s", "/dev/dsk/", path);
782 		search = buf;
783 	} else {
784 		search = path;
785 	}
786 
787 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
788 	    &nvroot) == 0);
789 
790 	*avail_spare = B_FALSE;
791 	return (vdev_to_nvlist_iter(nvroot, search, guid, avail_spare));
792 }
793 
794 /*
795  * Returns TRUE if the given guid corresponds to a spare (INUSE or not).
796  */
797 static boolean_t
798 is_spare(zpool_handle_t *zhp, uint64_t guid)
799 {
800 	uint64_t spare_guid;
801 	nvlist_t *nvroot;
802 	nvlist_t **spares;
803 	uint_t nspares;
804 	int i;
805 
806 	verify(nvlist_lookup_nvlist(zhp->zpool_config, ZPOOL_CONFIG_VDEV_TREE,
807 	    &nvroot) == 0);
808 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
809 	    &spares, &nspares) == 0) {
810 		for (i = 0; i < nspares; i++) {
811 			verify(nvlist_lookup_uint64(spares[i],
812 			    ZPOOL_CONFIG_GUID, &spare_guid) == 0);
813 			if (guid == spare_guid)
814 				return (B_TRUE);
815 		}
816 	}
817 
818 	return (B_FALSE);
819 }
820 
821 /*
822  * Bring the specified vdev online
823  */
824 int
825 zpool_vdev_online(zpool_handle_t *zhp, const char *path)
826 {
827 	zfs_cmd_t zc = { 0 };
828 	char msg[1024];
829 	nvlist_t *tgt;
830 	boolean_t avail_spare;
831 	libzfs_handle_t *hdl = zhp->zpool_hdl;
832 
833 	(void) snprintf(msg, sizeof (msg),
834 	    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
835 
836 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
837 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == NULL)
838 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
839 
840 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
841 
842 	if (avail_spare || is_spare(zhp, zc.zc_guid) == B_TRUE)
843 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
844 
845 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ONLINE, &zc) == 0)
846 		return (0);
847 
848 	return (zpool_standard_error(hdl, errno, msg));
849 }
850 
851 /*
852  * Take the specified vdev offline
853  */
854 int
855 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, int istmp)
856 {
857 	zfs_cmd_t zc = { 0 };
858 	char msg[1024];
859 	nvlist_t *tgt;
860 	boolean_t avail_spare;
861 	libzfs_handle_t *hdl = zhp->zpool_hdl;
862 
863 	(void) snprintf(msg, sizeof (msg),
864 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
865 
866 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
867 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == NULL)
868 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
869 
870 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
871 
872 	if (avail_spare || is_spare(zhp, zc.zc_guid) == B_TRUE)
873 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
874 
875 	zc.zc_cookie = istmp;
876 
877 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_OFFLINE, &zc) == 0)
878 		return (0);
879 
880 	switch (errno) {
881 	case EBUSY:
882 
883 		/*
884 		 * There are no other replicas of this device.
885 		 */
886 		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
887 
888 	default:
889 		return (zpool_standard_error(hdl, errno, msg));
890 	}
891 }
892 
893 /*
894  * Returns TRUE if the given nvlist is a vdev that was originally swapped in as
895  * a hot spare.
896  */
897 static boolean_t
898 is_replacing_spare(nvlist_t *search, nvlist_t *tgt, int which)
899 {
900 	nvlist_t **child;
901 	uint_t c, children;
902 	char *type;
903 
904 	if (nvlist_lookup_nvlist_array(search, ZPOOL_CONFIG_CHILDREN, &child,
905 	    &children) == 0) {
906 		verify(nvlist_lookup_string(search, ZPOOL_CONFIG_TYPE,
907 		    &type) == 0);
908 
909 		if (strcmp(type, VDEV_TYPE_SPARE) == 0 &&
910 		    children == 2 && child[which] == tgt)
911 			return (B_TRUE);
912 
913 		for (c = 0; c < children; c++)
914 			if (is_replacing_spare(child[c], tgt, which))
915 				return (B_TRUE);
916 	}
917 
918 	return (B_FALSE);
919 }
920 
921 /*
922  * Attach new_disk (fully described by nvroot) to old_disk.
923  * If 'replacing' is specified, tne new disk will replace the old one.
924  */
925 int
926 zpool_vdev_attach(zpool_handle_t *zhp,
927     const char *old_disk, const char *new_disk, nvlist_t *nvroot, int replacing)
928 {
929 	zfs_cmd_t zc = { 0 };
930 	char msg[1024];
931 	int ret;
932 	nvlist_t *tgt;
933 	boolean_t avail_spare;
934 	uint64_t val;
935 	char *path;
936 	nvlist_t **child;
937 	uint_t children;
938 	nvlist_t *config_root;
939 	libzfs_handle_t *hdl = zhp->zpool_hdl;
940 
941 	if (replacing)
942 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
943 		    "cannot replace %s with %s"), old_disk, new_disk);
944 	else
945 		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
946 		    "cannot attach %s to %s"), new_disk, old_disk);
947 
948 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
949 	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare)) == 0)
950 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
951 
952 	if (avail_spare)
953 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
954 
955 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
956 	zc.zc_cookie = replacing;
957 
958 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
959 	    &child, &children) != 0 || children != 1) {
960 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
961 		    "new device must be a single disk"));
962 		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
963 	}
964 
965 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
966 	    ZPOOL_CONFIG_VDEV_TREE, &config_root) == 0);
967 
968 	/*
969 	 * If the target is a hot spare that has been swapped in, we can only
970 	 * replace it with another hot spare.
971 	 */
972 	if (replacing &&
973 	    nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_IS_SPARE, &val) == 0 &&
974 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
975 	    (zpool_find_vdev(zhp, path, &avail_spare) == NULL ||
976 	    !avail_spare) && is_replacing_spare(config_root, tgt, 1)) {
977 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
978 		    "can only be replaced by another hot spare"));
979 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
980 	}
981 
982 	/*
983 	 * If we are attempting to replace a spare, it canot be applied to an
984 	 * already spared device.
985 	 */
986 	if (replacing &&
987 	    nvlist_lookup_string(child[0], ZPOOL_CONFIG_PATH, &path) == 0 &&
988 	    zpool_find_vdev(zhp, path, &avail_spare) != NULL && avail_spare &&
989 	    is_replacing_spare(config_root, tgt, 0)) {
990 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
991 		    "device has already been replaced with a spare"));
992 		return (zfs_error(hdl, EZFS_BADTARGET, msg));
993 	}
994 
995 	if (zcmd_write_src_nvlist(hdl, &zc, nvroot, NULL) != 0)
996 		return (-1);
997 
998 	ret = ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_ATTACH, &zc);
999 
1000 	zcmd_free_nvlists(&zc);
1001 
1002 	if (ret == 0)
1003 		return (0);
1004 
1005 	switch (errno) {
1006 	case ENOTSUP:
1007 		/*
1008 		 * Can't attach to or replace this type of vdev.
1009 		 */
1010 		if (replacing)
1011 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1012 			    "cannot replace a replacing device"));
1013 		else
1014 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1015 			    "can only attach to mirrors and top-level "
1016 			    "disks"));
1017 		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
1018 		break;
1019 
1020 	case EINVAL:
1021 		/*
1022 		 * The new device must be a single disk.
1023 		 */
1024 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1025 		    "new device must be a single disk"));
1026 		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
1027 		break;
1028 
1029 	case EBUSY:
1030 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy"),
1031 		    new_disk);
1032 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1033 		break;
1034 
1035 	case EOVERFLOW:
1036 		/*
1037 		 * The new device is too small.
1038 		 */
1039 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1040 		    "device is too small"));
1041 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1042 		break;
1043 
1044 	case EDOM:
1045 		/*
1046 		 * The new device has a different alignment requirement.
1047 		 */
1048 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1049 		    "devices have different sector alignment"));
1050 		(void) zfs_error(hdl, EZFS_BADDEV, msg);
1051 		break;
1052 
1053 	case ENAMETOOLONG:
1054 		/*
1055 		 * The resulting top-level vdev spec won't fit in the label.
1056 		 */
1057 		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
1058 		break;
1059 
1060 	default:
1061 		(void) zpool_standard_error(hdl, errno, msg);
1062 	}
1063 
1064 	return (-1);
1065 }
1066 
1067 /*
1068  * Detach the specified device.
1069  */
1070 int
1071 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
1072 {
1073 	zfs_cmd_t zc = { 0 };
1074 	char msg[1024];
1075 	nvlist_t *tgt;
1076 	boolean_t avail_spare;
1077 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1078 
1079 	(void) snprintf(msg, sizeof (msg),
1080 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
1081 
1082 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1083 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
1084 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1085 
1086 	if (avail_spare)
1087 		return (zfs_error(hdl, EZFS_ISSPARE, msg));
1088 
1089 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1090 
1091 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_DETACH, &zc) == 0)
1092 		return (0);
1093 
1094 	switch (errno) {
1095 
1096 	case ENOTSUP:
1097 		/*
1098 		 * Can't detach from this type of vdev.
1099 		 */
1100 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
1101 		    "applicable to mirror and replacing vdevs"));
1102 		(void) zfs_error(zhp->zpool_hdl, EZFS_BADTARGET, msg);
1103 		break;
1104 
1105 	case EBUSY:
1106 		/*
1107 		 * There are no other replicas of this device.
1108 		 */
1109 		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
1110 		break;
1111 
1112 	default:
1113 		(void) zpool_standard_error(hdl, errno, msg);
1114 	}
1115 
1116 	return (-1);
1117 }
1118 
1119 /*
1120  * Remove the given device.  Currently, this is supported only for hot spares.
1121  */
1122 int
1123 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
1124 {
1125 	zfs_cmd_t zc = { 0 };
1126 	char msg[1024];
1127 	nvlist_t *tgt;
1128 	boolean_t avail_spare;
1129 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1130 
1131 	(void) snprintf(msg, sizeof (msg),
1132 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
1133 
1134 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1135 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
1136 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1137 
1138 	if (!avail_spare) {
1139 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1140 		    "only inactive hot spares can be removed"));
1141 		return (zfs_error(hdl, EZFS_NODEVICE, msg));
1142 	}
1143 
1144 	verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID, &zc.zc_guid) == 0);
1145 
1146 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
1147 		return (0);
1148 
1149 	return (zpool_standard_error(hdl, errno, msg));
1150 }
1151 
1152 /*
1153  * Clear the errors for the pool, or the particular device if specified.
1154  */
1155 int
1156 zpool_clear(zpool_handle_t *zhp, const char *path)
1157 {
1158 	zfs_cmd_t zc = { 0 };
1159 	char msg[1024];
1160 	nvlist_t *tgt;
1161 	boolean_t avail_spare;
1162 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1163 
1164 	if (path)
1165 		(void) snprintf(msg, sizeof (msg),
1166 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1167 		    path);
1168 	else
1169 		(void) snprintf(msg, sizeof (msg),
1170 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
1171 		    zhp->zpool_name);
1172 
1173 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1174 	if (path) {
1175 		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare)) == 0)
1176 			return (zfs_error(hdl, EZFS_NODEVICE, msg));
1177 
1178 		if (avail_spare)
1179 			return (zfs_error(hdl, EZFS_ISSPARE, msg));
1180 
1181 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID,
1182 		    &zc.zc_guid) == 0);
1183 	}
1184 
1185 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_CLEAR, &zc) == 0)
1186 		return (0);
1187 
1188 	return (zpool_standard_error(hdl, errno, msg));
1189 }
1190 
1191 /*
1192  * Iterate over all zvols in a given pool by walking the /dev/zvol/dsk/<pool>
1193  * hierarchy.
1194  */
1195 int
1196 zpool_iter_zvol(zpool_handle_t *zhp, int (*cb)(const char *, void *),
1197     void *data)
1198 {
1199 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1200 	char (*paths)[MAXPATHLEN];
1201 	size_t size = 4;
1202 	int curr, fd, base, ret = 0;
1203 	DIR *dirp;
1204 	struct dirent *dp;
1205 	struct stat st;
1206 
1207 	if ((base = open("/dev/zvol/dsk", O_RDONLY)) < 0)
1208 		return (errno == ENOENT ? 0 : -1);
1209 
1210 	if (fstatat(base, zhp->zpool_name, &st, 0) != 0) {
1211 		int err = errno;
1212 		(void) close(base);
1213 		return (err == ENOENT ? 0 : -1);
1214 	}
1215 
1216 	/*
1217 	 * Oddly this wasn't a directory -- ignore that failure since we
1218 	 * know there are no links lower in the (non-existant) hierarchy.
1219 	 */
1220 	if (!S_ISDIR(st.st_mode)) {
1221 		(void) close(base);
1222 		return (0);
1223 	}
1224 
1225 	if ((paths = zfs_alloc(hdl, size * sizeof (paths[0]))) == NULL) {
1226 		(void) close(base);
1227 		return (-1);
1228 	}
1229 
1230 	(void) strlcpy(paths[0], zhp->zpool_name, sizeof (paths[0]));
1231 	curr = 0;
1232 
1233 	while (curr >= 0) {
1234 		if (fstatat(base, paths[curr], &st, AT_SYMLINK_NOFOLLOW) != 0)
1235 			goto err;
1236 
1237 		if (S_ISDIR(st.st_mode)) {
1238 			if ((fd = openat(base, paths[curr], O_RDONLY)) < 0)
1239 				goto err;
1240 
1241 			if ((dirp = fdopendir(fd)) == NULL) {
1242 				(void) close(fd);
1243 				goto err;
1244 			}
1245 
1246 			while ((dp = readdir(dirp)) != NULL) {
1247 				if (dp->d_name[0] == '.')
1248 					continue;
1249 
1250 				if (curr + 1 == size) {
1251 					paths = zfs_realloc(hdl, paths,
1252 					    size * sizeof (paths[0]),
1253 					    size * 2 * sizeof (paths[0]));
1254 					if (paths == NULL) {
1255 						(void) closedir(dirp);
1256 						(void) close(fd);
1257 						goto err;
1258 					}
1259 
1260 					size *= 2;
1261 				}
1262 
1263 				(void) strlcpy(paths[curr + 1], paths[curr],
1264 				    sizeof (paths[curr + 1]));
1265 				(void) strlcat(paths[curr], "/",
1266 				    sizeof (paths[curr]));
1267 				(void) strlcat(paths[curr], dp->d_name,
1268 				    sizeof (paths[curr]));
1269 				curr++;
1270 			}
1271 
1272 			(void) closedir(dirp);
1273 
1274 		} else {
1275 			if ((ret = cb(paths[curr], data)) != 0)
1276 				break;
1277 		}
1278 
1279 		curr--;
1280 	}
1281 
1282 	free(paths);
1283 	(void) close(base);
1284 
1285 	return (ret);
1286 
1287 err:
1288 	free(paths);
1289 	(void) close(base);
1290 	return (-1);
1291 }
1292 
1293 typedef struct zvol_cb {
1294 	zpool_handle_t *zcb_pool;
1295 	boolean_t zcb_create;
1296 } zvol_cb_t;
1297 
1298 /*ARGSUSED*/
1299 static int
1300 do_zvol_create(zfs_handle_t *zhp, void *data)
1301 {
1302 	int ret;
1303 
1304 	if (ZFS_IS_VOLUME(zhp))
1305 		(void) zvol_create_link(zhp->zfs_hdl, zhp->zfs_name);
1306 
1307 	ret = zfs_iter_children(zhp, do_zvol_create, NULL);
1308 
1309 	zfs_close(zhp);
1310 
1311 	return (ret);
1312 }
1313 
1314 /*
1315  * Iterate over all zvols in the pool and make any necessary minor nodes.
1316  */
1317 int
1318 zpool_create_zvol_links(zpool_handle_t *zhp)
1319 {
1320 	zfs_handle_t *zfp;
1321 	int ret;
1322 
1323 	/*
1324 	 * If the pool is unavailable, just return success.
1325 	 */
1326 	if ((zfp = make_dataset_handle(zhp->zpool_hdl,
1327 	    zhp->zpool_name)) == NULL)
1328 		return (0);
1329 
1330 	ret = zfs_iter_children(zfp, do_zvol_create, NULL);
1331 
1332 	zfs_close(zfp);
1333 	return (ret);
1334 }
1335 
1336 static int
1337 do_zvol_remove(const char *dataset, void *data)
1338 {
1339 	zpool_handle_t *zhp = data;
1340 
1341 	return (zvol_remove_link(zhp->zpool_hdl, dataset));
1342 }
1343 
1344 /*
1345  * Iterate over all zvols in the pool and remove any minor nodes.  We iterate
1346  * by examining the /dev links so that a corrupted pool doesn't impede this
1347  * operation.
1348  */
1349 int
1350 zpool_remove_zvol_links(zpool_handle_t *zhp)
1351 {
1352 	return (zpool_iter_zvol(zhp, do_zvol_remove, zhp));
1353 }
1354 
1355 /*
1356  * Convert from a devid string to a path.
1357  */
1358 static char *
1359 devid_to_path(char *devid_str)
1360 {
1361 	ddi_devid_t devid;
1362 	char *minor;
1363 	char *path;
1364 	devid_nmlist_t *list = NULL;
1365 	int ret;
1366 
1367 	if (devid_str_decode(devid_str, &devid, &minor) != 0)
1368 		return (NULL);
1369 
1370 	ret = devid_deviceid_to_nmlist("/dev", devid, minor, &list);
1371 
1372 	devid_str_free(minor);
1373 	devid_free(devid);
1374 
1375 	if (ret != 0)
1376 		return (NULL);
1377 
1378 	if ((path = strdup(list[0].devname)) == NULL)
1379 		return (NULL);
1380 
1381 	devid_free_nmlist(list);
1382 
1383 	return (path);
1384 }
1385 
1386 /*
1387  * Convert from a path to a devid string.
1388  */
1389 static char *
1390 path_to_devid(const char *path)
1391 {
1392 	int fd;
1393 	ddi_devid_t devid;
1394 	char *minor, *ret;
1395 
1396 	if ((fd = open(path, O_RDONLY)) < 0)
1397 		return (NULL);
1398 
1399 	minor = NULL;
1400 	ret = NULL;
1401 	if (devid_get(fd, &devid) == 0) {
1402 		if (devid_get_minor_name(fd, &minor) == 0)
1403 			ret = devid_str_encode(devid, minor);
1404 		if (minor != NULL)
1405 			devid_str_free(minor);
1406 		devid_free(devid);
1407 	}
1408 	(void) close(fd);
1409 
1410 	return (ret);
1411 }
1412 
1413 /*
1414  * Issue the necessary ioctl() to update the stored path value for the vdev.  We
1415  * ignore any failure here, since a common case is for an unprivileged user to
1416  * type 'zpool status', and we'll display the correct information anyway.
1417  */
1418 static void
1419 set_path(zpool_handle_t *zhp, nvlist_t *nv, const char *path)
1420 {
1421 	zfs_cmd_t zc = { 0 };
1422 
1423 	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1424 	(void) strncpy(zc.zc_value, path, sizeof (zc.zc_value));
1425 	verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1426 	    &zc.zc_guid) == 0);
1427 
1428 	(void) ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_VDEV_SETPATH, &zc);
1429 }
1430 
1431 /*
1432  * Given a vdev, return the name to display in iostat.  If the vdev has a path,
1433  * we use that, stripping off any leading "/dev/dsk/"; if not, we use the type.
1434  * We also check if this is a whole disk, in which case we strip off the
1435  * trailing 's0' slice name.
1436  *
1437  * This routine is also responsible for identifying when disks have been
1438  * reconfigured in a new location.  The kernel will have opened the device by
1439  * devid, but the path will still refer to the old location.  To catch this, we
1440  * first do a path -> devid translation (which is fast for the common case).  If
1441  * the devid matches, we're done.  If not, we do a reverse devid -> path
1442  * translation and issue the appropriate ioctl() to update the path of the vdev.
1443  * If 'zhp' is NULL, then this is an exported pool, and we don't need to do any
1444  * of these checks.
1445  */
1446 char *
1447 zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv)
1448 {
1449 	char *path, *devid;
1450 	uint64_t value;
1451 	char buf[64];
1452 
1453 	if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
1454 	    &value) == 0) {
1455 		verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
1456 		    &value) == 0);
1457 		(void) snprintf(buf, sizeof (buf), "%llu",
1458 		    (u_longlong_t)value);
1459 		path = buf;
1460 	} else if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0) {
1461 
1462 		if (zhp != NULL &&
1463 		    nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &devid) == 0) {
1464 			/*
1465 			 * Determine if the current path is correct.
1466 			 */
1467 			char *newdevid = path_to_devid(path);
1468 
1469 			if (newdevid == NULL ||
1470 			    strcmp(devid, newdevid) != 0) {
1471 				char *newpath;
1472 
1473 				if ((newpath = devid_to_path(devid)) != NULL) {
1474 					/*
1475 					 * Update the path appropriately.
1476 					 */
1477 					set_path(zhp, nv, newpath);
1478 					if (nvlist_add_string(nv,
1479 					    ZPOOL_CONFIG_PATH, newpath) == 0)
1480 						verify(nvlist_lookup_string(nv,
1481 						    ZPOOL_CONFIG_PATH,
1482 						    &path) == 0);
1483 					free(newpath);
1484 				}
1485 			}
1486 
1487 			if (newdevid)
1488 				devid_str_free(newdevid);
1489 		}
1490 
1491 		if (strncmp(path, "/dev/dsk/", 9) == 0)
1492 			path += 9;
1493 
1494 		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
1495 		    &value) == 0 && value) {
1496 			char *tmp = zfs_strdup(hdl, path);
1497 			if (tmp == NULL)
1498 				return (NULL);
1499 			tmp[strlen(path) - 2] = '\0';
1500 			return (tmp);
1501 		}
1502 	} else {
1503 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &path) == 0);
1504 
1505 		/*
1506 		 * If it's a raidz device, we need to stick in the parity level.
1507 		 */
1508 		if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
1509 			verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
1510 			    &value) == 0);
1511 			(void) snprintf(buf, sizeof (buf), "%s%llu", path,
1512 			    (u_longlong_t)value);
1513 			path = buf;
1514 		}
1515 	}
1516 
1517 	return (zfs_strdup(hdl, path));
1518 }
1519 
1520 static int
1521 zbookmark_compare(const void *a, const void *b)
1522 {
1523 	return (memcmp(a, b, sizeof (zbookmark_t)));
1524 }
1525 
1526 /*
1527  * Retrieve the persistent error log, uniquify the members, and return to the
1528  * caller.
1529  */
1530 int
1531 zpool_get_errlog(zpool_handle_t *zhp, nvlist_t **nverrlistp)
1532 {
1533 	zfs_cmd_t zc = { 0 };
1534 	uint64_t count;
1535 	zbookmark_t *zb = NULL;
1536 	int i;
1537 
1538 	/*
1539 	 * Retrieve the raw error list from the kernel.  If the number of errors
1540 	 * has increased, allocate more space and continue until we get the
1541 	 * entire list.
1542 	 */
1543 	verify(nvlist_lookup_uint64(zhp->zpool_config, ZPOOL_CONFIG_ERRCOUNT,
1544 	    &count) == 0);
1545 	if ((zc.zc_nvlist_dst = (uintptr_t)zfs_alloc(zhp->zpool_hdl,
1546 	    count * sizeof (zbookmark_t))) == (uintptr_t)NULL)
1547 		return (-1);
1548 	zc.zc_nvlist_dst_size = count;
1549 	(void) strcpy(zc.zc_name, zhp->zpool_name);
1550 	for (;;) {
1551 		if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_ERROR_LOG,
1552 		    &zc) != 0) {
1553 			free((void *)(uintptr_t)zc.zc_nvlist_dst);
1554 			if (errno == ENOMEM) {
1555 				count = zc.zc_nvlist_dst_size;
1556 				if ((zc.zc_nvlist_dst = (uintptr_t)
1557 				    zfs_alloc(zhp->zpool_hdl, count *
1558 				    sizeof (zbookmark_t))) == (uintptr_t)NULL)
1559 					return (-1);
1560 			} else {
1561 				return (-1);
1562 			}
1563 		} else {
1564 			break;
1565 		}
1566 	}
1567 
1568 	/*
1569 	 * Sort the resulting bookmarks.  This is a little confusing due to the
1570 	 * implementation of ZFS_IOC_ERROR_LOG.  The bookmarks are copied last
1571 	 * to first, and 'zc_nvlist_dst_size' indicates the number of boomarks
1572 	 * _not_ copied as part of the process.  So we point the start of our
1573 	 * array appropriate and decrement the total number of elements.
1574 	 */
1575 	zb = ((zbookmark_t *)(uintptr_t)zc.zc_nvlist_dst) +
1576 	    zc.zc_nvlist_dst_size;
1577 	count -= zc.zc_nvlist_dst_size;
1578 
1579 	qsort(zb, count, sizeof (zbookmark_t), zbookmark_compare);
1580 
1581 	verify(nvlist_alloc(nverrlistp, 0, KM_SLEEP) == 0);
1582 
1583 	/*
1584 	 * Fill in the nverrlistp with nvlist's of dataset and object numbers.
1585 	 */
1586 	for (i = 0; i < count; i++) {
1587 		nvlist_t *nv;
1588 
1589 		/* ignoring zb_blkid and zb_level for now */
1590 		if (i > 0 && zb[i-1].zb_objset == zb[i].zb_objset &&
1591 		    zb[i-1].zb_object == zb[i].zb_object)
1592 			continue;
1593 
1594 		if (nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) != 0)
1595 			goto nomem;
1596 		if (nvlist_add_uint64(nv, ZPOOL_ERR_DATASET,
1597 		    zb[i].zb_objset) != 0) {
1598 			nvlist_free(nv);
1599 			goto nomem;
1600 		}
1601 		if (nvlist_add_uint64(nv, ZPOOL_ERR_OBJECT,
1602 		    zb[i].zb_object) != 0) {
1603 			nvlist_free(nv);
1604 			goto nomem;
1605 		}
1606 		if (nvlist_add_nvlist(*nverrlistp, "ejk", nv) != 0) {
1607 			nvlist_free(nv);
1608 			goto nomem;
1609 		}
1610 		nvlist_free(nv);
1611 	}
1612 
1613 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
1614 	return (0);
1615 
1616 nomem:
1617 	free((void *)(uintptr_t)zc.zc_nvlist_dst);
1618 	return (no_memory(zhp->zpool_hdl));
1619 }
1620 
1621 /*
1622  * Upgrade a ZFS pool to the latest on-disk version.
1623  */
1624 int
1625 zpool_upgrade(zpool_handle_t *zhp)
1626 {
1627 	zfs_cmd_t zc = { 0 };
1628 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1629 
1630 	(void) strcpy(zc.zc_name, zhp->zpool_name);
1631 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_UPGRADE, &zc) != 0)
1632 		return (zpool_standard_error_fmt(hdl, errno,
1633 		    dgettext(TEXT_DOMAIN, "cannot upgrade '%s'"),
1634 		    zhp->zpool_name));
1635 
1636 	return (0);
1637 }
1638 
1639 /*
1640  * Log command history.
1641  *
1642  * 'pool' is B_TRUE if we are logging a command for 'zpool'; B_FALSE
1643  * otherwise ('zfs').  'pool_create' is B_TRUE if we are logging the creation
1644  * of the pool; B_FALSE otherwise.  'path' is the pathanme containing the
1645  * poolname.  'argc' and 'argv' are used to construct the command string.
1646  */
1647 void
1648 zpool_log_history(libzfs_handle_t *hdl, int argc, char **argv, const char *path,
1649     boolean_t pool, boolean_t pool_create)
1650 {
1651 	char cmd_buf[HIS_MAX_RECORD_LEN];
1652 	char *dspath;
1653 	zfs_cmd_t zc = { 0 };
1654 	int i;
1655 
1656 	/* construct the command string */
1657 	(void) strcpy(cmd_buf, pool ? "zpool" : "zfs");
1658 	for (i = 0; i < argc; i++) {
1659 		if (strlen(cmd_buf) + 1 + strlen(argv[i]) > HIS_MAX_RECORD_LEN)
1660 			break;
1661 		(void) strcat(cmd_buf, " ");
1662 		(void) strcat(cmd_buf, argv[i]);
1663 	}
1664 
1665 	/* figure out the poolname */
1666 	dspath = strpbrk(path, "/@");
1667 	if (dspath == NULL) {
1668 		(void) strcpy(zc.zc_name, path);
1669 	} else {
1670 		(void) strncpy(zc.zc_name, path, dspath - path);
1671 		zc.zc_name[dspath-path] = '\0';
1672 	}
1673 
1674 	zc.zc_history = (uint64_t)(uintptr_t)cmd_buf;
1675 	zc.zc_history_len = strlen(cmd_buf);
1676 
1677 	/* overloading zc_history_offset */
1678 	zc.zc_history_offset = pool_create;
1679 
1680 	(void) ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_LOG_HISTORY, &zc);
1681 }
1682 
1683 /*
1684  * Perform ioctl to get some command history of a pool.
1685  *
1686  * 'buf' is the buffer to fill up to 'len' bytes.  'off' is the
1687  * logical offset of the history buffer to start reading from.
1688  *
1689  * Upon return, 'off' is the next logical offset to read from and
1690  * 'len' is the actual amount of bytes read into 'buf'.
1691  */
1692 static int
1693 get_history(zpool_handle_t *zhp, char *buf, uint64_t *off, uint64_t *len)
1694 {
1695 	zfs_cmd_t zc = { 0 };
1696 	libzfs_handle_t *hdl = zhp->zpool_hdl;
1697 
1698 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1699 
1700 	zc.zc_history = (uint64_t)(uintptr_t)buf;
1701 	zc.zc_history_len = *len;
1702 	zc.zc_history_offset = *off;
1703 
1704 	if (ioctl(hdl->libzfs_fd, ZFS_IOC_POOL_GET_HISTORY, &zc) != 0) {
1705 		switch (errno) {
1706 		case EPERM:
1707 			return (zfs_error_fmt(hdl, EZFS_PERM,
1708 			    dgettext(TEXT_DOMAIN,
1709 			    "cannot show history for pool '%s'"),
1710 			    zhp->zpool_name));
1711 		case ENOENT:
1712 			return (zfs_error_fmt(hdl, EZFS_NOHISTORY,
1713 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
1714 			    "'%s'"), zhp->zpool_name));
1715 		case ENOTSUP:
1716 			return (zfs_error_fmt(hdl, EZFS_BADVERSION,
1717 			    dgettext(TEXT_DOMAIN, "cannot get history for pool "
1718 			    "'%s', pool must be upgraded"), zhp->zpool_name));
1719 		default:
1720 			return (zpool_standard_error_fmt(hdl, errno,
1721 			    dgettext(TEXT_DOMAIN,
1722 			    "cannot get history for '%s'"), zhp->zpool_name));
1723 		}
1724 	}
1725 
1726 	*len = zc.zc_history_len;
1727 	*off = zc.zc_history_offset;
1728 
1729 	return (0);
1730 }
1731 
1732 /*
1733  * Process the buffer of nvlists, unpacking and storing each nvlist record
1734  * into 'records'.  'leftover' is set to the number of bytes that weren't
1735  * processed as there wasn't a complete record.
1736  */
1737 static int
1738 zpool_history_unpack(char *buf, uint64_t bytes_read, uint64_t *leftover,
1739     nvlist_t ***records, uint_t *numrecords)
1740 {
1741 	uint64_t reclen;
1742 	nvlist_t *nv;
1743 	int i;
1744 
1745 	while (bytes_read > sizeof (reclen)) {
1746 
1747 		/* get length of packed record (stored as little endian) */
1748 		for (i = 0, reclen = 0; i < sizeof (reclen); i++)
1749 			reclen += (uint64_t)(((uchar_t *)buf)[i]) << (8*i);
1750 
1751 		if (bytes_read < sizeof (reclen) + reclen)
1752 			break;
1753 
1754 		/* unpack record */
1755 		if (nvlist_unpack(buf + sizeof (reclen), reclen, &nv, 0) != 0)
1756 			return (ENOMEM);
1757 		bytes_read -= sizeof (reclen) + reclen;
1758 		buf += sizeof (reclen) + reclen;
1759 
1760 		/* add record to nvlist array */
1761 		(*numrecords)++;
1762 		if (ISP2(*numrecords + 1)) {
1763 			*records = realloc(*records,
1764 			    *numrecords * 2 * sizeof (nvlist_t *));
1765 		}
1766 		(*records)[*numrecords - 1] = nv;
1767 	}
1768 
1769 	*leftover = bytes_read;
1770 	return (0);
1771 }
1772 
1773 #define	HIS_BUF_LEN	(128*1024)
1774 
1775 /*
1776  * Retrieve the command history of a pool.
1777  */
1778 int
1779 zpool_get_history(zpool_handle_t *zhp, nvlist_t **nvhisp)
1780 {
1781 	char buf[HIS_BUF_LEN];
1782 	uint64_t off = 0;
1783 	nvlist_t **records = NULL;
1784 	uint_t numrecords = 0;
1785 	int err, i;
1786 
1787 	do {
1788 		uint64_t bytes_read = sizeof (buf);
1789 		uint64_t leftover;
1790 
1791 		if ((err = get_history(zhp, buf, &off, &bytes_read)) != 0)
1792 			break;
1793 
1794 		/* if nothing else was read in, we're at EOF, just return */
1795 		if (!bytes_read)
1796 			break;
1797 
1798 		if ((err = zpool_history_unpack(buf, bytes_read,
1799 		    &leftover, &records, &numrecords)) != 0)
1800 			break;
1801 		off -= leftover;
1802 
1803 		/* CONSTCOND */
1804 	} while (1);
1805 
1806 	if (!err) {
1807 		verify(nvlist_alloc(nvhisp, NV_UNIQUE_NAME, 0) == 0);
1808 		verify(nvlist_add_nvlist_array(*nvhisp, ZPOOL_HIST_RECORD,
1809 		    records, numrecords) == 0);
1810 	}
1811 	for (i = 0; i < numrecords; i++)
1812 		nvlist_free(records[i]);
1813 	free(records);
1814 
1815 	return (err);
1816 }
1817 
1818 void
1819 zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj, uint64_t obj,
1820     char *pathname, size_t len)
1821 {
1822 	zfs_cmd_t zc = { 0 };
1823 	boolean_t mounted = B_FALSE;
1824 	char *mntpnt = NULL;
1825 	char dsname[MAXNAMELEN];
1826 
1827 	if (dsobj == 0) {
1828 		/* special case for the MOS */
1829 		(void) snprintf(pathname, len, "<metadata>:<0x%llx>", obj);
1830 		return;
1831 	}
1832 
1833 	/* get the dataset's name */
1834 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
1835 	zc.zc_obj = dsobj;
1836 	if (ioctl(zhp->zpool_hdl->libzfs_fd,
1837 	    ZFS_IOC_DSOBJ_TO_DSNAME, &zc) != 0) {
1838 		/* just write out a path of two object numbers */
1839 		(void) snprintf(pathname, len, "<0x%llx>:<0x%llx>",
1840 		    dsobj, obj);
1841 		return;
1842 	}
1843 	(void) strlcpy(dsname, zc.zc_value, sizeof (dsname));
1844 
1845 	/* find out if the dataset is mounted */
1846 	mounted = is_mounted(zhp->zpool_hdl, dsname, &mntpnt);
1847 
1848 	/* get the corrupted object's path */
1849 	(void) strlcpy(zc.zc_name, dsname, sizeof (zc.zc_name));
1850 	zc.zc_obj = obj;
1851 	if (ioctl(zhp->zpool_hdl->libzfs_fd, ZFS_IOC_OBJ_TO_PATH,
1852 	    &zc) == 0) {
1853 		if (mounted) {
1854 			(void) snprintf(pathname, len, "%s%s", mntpnt,
1855 			    zc.zc_value);
1856 		} else {
1857 			(void) snprintf(pathname, len, "%s:%s",
1858 			    dsname, zc.zc_value);
1859 		}
1860 	} else {
1861 		(void) snprintf(pathname, len, "%s:<0x%llx>", dsname, obj);
1862 	}
1863 	free(mntpnt);
1864 }
1865