xref: /illumos-gate/usr/src/boot/libsa/zfs/zfs.c (revision 1fa2a66491e7d8ae0be84e7da4da8e812480c710)
1 /*
2  * Copyright (c) 2007 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 
29 /*
30  *	Stand-alone file reading package.
31  */
32 
33 #include <stand.h>
34 #include <sys/disk.h>
35 #include <sys/param.h>
36 #include <sys/time.h>
37 #include <sys/queue.h>
38 #include <disk.h>
39 #include <part.h>
40 #include <stddef.h>
41 #include <stdarg.h>
42 #include <string.h>
43 #include <bootstrap.h>
44 #include <inttypes.h>
45 
46 #include "libzfs.h"
47 
48 #include "zfsimpl.c"
49 
50 /* Define the range of indexes to be populated with ZFS Boot Environments */
51 #define		ZFS_BE_FIRST	4
52 #define		ZFS_BE_LAST	8
53 
54 static int	zfs_open(const char *, struct open_file *);
55 static int	zfs_close(struct open_file *);
56 static int	zfs_read(struct open_file *, void *, size_t, size_t *);
57 static off_t	zfs_seek(struct open_file *, off_t, int);
58 static int	zfs_stat(struct open_file *, struct stat *);
59 static int	zfs_readdir(struct open_file *, struct dirent *);
60 
61 struct devsw zfs_dev;
62 
63 struct fs_ops zfs_fsops = {
64 	"zfs",
65 	zfs_open,
66 	zfs_close,
67 	zfs_read,
68 	null_write,
69 	zfs_seek,
70 	zfs_stat,
71 	zfs_readdir
72 };
73 
74 /*
75  * In-core open file.
76  */
77 struct file {
78 	off_t		f_seekp;	/* seek pointer */
79 	dnode_phys_t	f_dnode;
80 	uint64_t	f_zap_type;	/* zap type for readdir */
81 	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
82 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
83 };
84 
85 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head =
86     SLIST_HEAD_INITIALIZER(zfs_be_head);
87 struct zfs_be_list *zfs_be_headp;
88 struct zfs_be_entry {
89 	const char *name;
90 	SLIST_ENTRY(zfs_be_entry) entries;
91 } *zfs_be, *zfs_be_tmp;
92 
93 /*
94  * Open a file.
95  */
96 static int
97 zfs_open(const char *upath, struct open_file *f)
98 {
99 	struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
100 	struct file *fp;
101 	int rc;
102 
103 	if (f->f_dev != &zfs_dev)
104 		return (EINVAL);
105 
106 	/* allocate file system specific data structure */
107 	fp = calloc(1, sizeof (struct file));
108 	if (fp == NULL)
109 		return (ENOMEM);
110 	f->f_fsdata = fp;
111 
112 	rc = zfs_lookup(mount, upath, &fp->f_dnode);
113 	fp->f_seekp = 0;
114 	if (rc) {
115 		f->f_fsdata = NULL;
116 		free(fp);
117 	}
118 	return (rc);
119 }
120 
121 static int
122 zfs_close(struct open_file *f)
123 {
124 	struct file *fp = (struct file *)f->f_fsdata;
125 
126 	dnode_cache_obj = NULL;
127 	f->f_fsdata = NULL;
128 
129 	free(fp);
130 	return (0);
131 }
132 
133 /*
134  * Copy a portion of a file into kernel memory.
135  * Cross block boundaries when necessary.
136  */
137 static int
138 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid)
139 {
140 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
141 	struct file *fp = (struct file *)f->f_fsdata;
142 	struct stat sb;
143 	size_t n;
144 	int rc;
145 
146 	rc = zfs_stat(f, &sb);
147 	if (rc)
148 		return (rc);
149 	n = size;
150 	if (fp->f_seekp + n > sb.st_size)
151 		n = sb.st_size - fp->f_seekp;
152 
153 	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
154 	if (rc)
155 		return (rc);
156 
157 	fp->f_seekp += n;
158 	if (resid)
159 		*resid = size - n;
160 
161 	return (0);
162 }
163 
164 static off_t
165 zfs_seek(struct open_file *f, off_t offset, int where)
166 {
167 	struct file *fp = (struct file *)f->f_fsdata;
168 	struct stat sb;
169 	int error;
170 
171 	switch (where) {
172 	case SEEK_SET:
173 		fp->f_seekp = offset;
174 		break;
175 	case SEEK_CUR:
176 		fp->f_seekp += offset;
177 		break;
178 	case SEEK_END:
179 		error = zfs_stat(f, &sb);
180 		if (error != 0) {
181 			errno = error;
182 			return (-1);
183 		}
184 		fp->f_seekp = sb.st_size - offset;
185 		break;
186 	default:
187 		errno = EINVAL;
188 		return (-1);
189 	}
190 	return (fp->f_seekp);
191 }
192 
193 static int
194 zfs_stat(struct open_file *f, struct stat *sb)
195 {
196 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
197 	struct file *fp = (struct file *)f->f_fsdata;
198 
199 	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
200 }
201 
202 static int
203 zfs_readdir(struct open_file *f, struct dirent *d)
204 {
205 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
206 	struct file *fp = (struct file *)f->f_fsdata;
207 	mzap_ent_phys_t mze;
208 	struct stat sb;
209 	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
210 	int rc;
211 
212 	rc = zfs_stat(f, &sb);
213 	if (rc)
214 		return (rc);
215 	if (!S_ISDIR(sb.st_mode))
216 		return (ENOTDIR);
217 
218 	/*
219 	 * If this is the first read, get the zap type.
220 	 */
221 	if (fp->f_seekp == 0) {
222 		rc = dnode_read(spa, &fp->f_dnode, 0, &fp->f_zap_type,
223 		    sizeof (fp->f_zap_type));
224 		if (rc)
225 			return (rc);
226 
227 		if (fp->f_zap_type == ZBT_MICRO) {
228 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
229 		} else {
230 			rc = dnode_read(spa, &fp->f_dnode,
231 			    offsetof(zap_phys_t, zap_num_leafs),
232 			    &fp->f_num_leafs, sizeof (fp->f_num_leafs));
233 			if (rc)
234 				return (rc);
235 
236 			fp->f_seekp = bsize;
237 			fp->f_zap_leaf = malloc(bsize);
238 			if (fp->f_zap_leaf == NULL)
239 				return (ENOMEM);
240 			rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp,
241 			    fp->f_zap_leaf, bsize);
242 			if (rc)
243 				return (rc);
244 		}
245 	}
246 
247 	if (fp->f_zap_type == ZBT_MICRO) {
248 	mzap_next:
249 		if (fp->f_seekp >= bsize)
250 			return (ENOENT);
251 
252 		rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, &mze,
253 		    sizeof (mze));
254 		if (rc)
255 			return (rc);
256 		fp->f_seekp += sizeof (mze);
257 
258 		if (!mze.mze_name[0])
259 			goto mzap_next;
260 
261 		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
262 		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
263 		strcpy(d->d_name, mze.mze_name);
264 		d->d_namlen = strlen(d->d_name);
265 		return (0);
266 	} else {
267 		zap_leaf_t zl;
268 		zap_leaf_chunk_t *zc, *nc;
269 		int chunk;
270 		size_t namelen;
271 		char *p;
272 		uint64_t value;
273 
274 		/*
275 		 * Initialise this so we can use the ZAP size
276 		 * calculating macros.
277 		 */
278 		zl.l_bs = ilog2(bsize);
279 		zl.l_phys = fp->f_zap_leaf;
280 
281 		/*
282 		 * Figure out which chunk we are currently looking at
283 		 * and consider seeking to the next leaf. We use the
284 		 * low bits of f_seekp as a simple chunk index.
285 		 */
286 	fzap_next:
287 		chunk = fp->f_seekp & (bsize - 1);
288 		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
289 			fp->f_seekp = (fp->f_seekp & ~(bsize - 1)) + bsize;
290 			chunk = 0;
291 
292 			/*
293 			 * Check for EOF and read the new leaf.
294 			 */
295 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
296 				return (ENOENT);
297 
298 			rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp,
299 			    fp->f_zap_leaf, bsize);
300 			if (rc)
301 				return (rc);
302 		}
303 
304 		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
305 		fp->f_seekp++;
306 		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
307 			goto fzap_next;
308 
309 		namelen = zc->l_entry.le_name_numints;
310 		if (namelen > sizeof (d->d_name))
311 			namelen = sizeof (d->d_name);
312 
313 		/*
314 		 * Paste the name back together.
315 		 */
316 		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
317 		p = d->d_name;
318 		while (namelen > 0) {
319 			int len;
320 			len = namelen;
321 			if (len > ZAP_LEAF_ARRAY_BYTES)
322 				len = ZAP_LEAF_ARRAY_BYTES;
323 			memcpy(p, nc->l_array.la_array, len);
324 			p += len;
325 			namelen -= len;
326 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
327 		}
328 		d->d_name[sizeof (d->d_name) - 1] = 0;
329 
330 		/*
331 		 * Assume the first eight bytes of the value are
332 		 * a uint64_t.
333 		 */
334 		value = fzap_leaf_value(&zl, zc);
335 
336 		d->d_fileno = ZFS_DIRENT_OBJ(value);
337 		d->d_type = ZFS_DIRENT_TYPE(value);
338 		d->d_namlen = strlen(d->d_name);
339 
340 		return (0);
341 	}
342 }
343 
344 static int
345 vdev_read(vdev_t *vdev __unused, void *priv, off_t offset, void *buf,
346     size_t bytes)
347 {
348 	int fd, ret;
349 	size_t res, head, tail, total_size, full_sec_size;
350 	unsigned secsz, do_tail_read;
351 	off_t start_sec;
352 	char *outbuf, *bouncebuf;
353 
354 	fd = (uintptr_t)priv;
355 	outbuf = (char *)buf;
356 	bouncebuf = NULL;
357 
358 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
359 	if (ret != 0)
360 		return (ret);
361 
362 	/* BEGIN CSTYLED */
363 	/*
364 	 * Handling reads of arbitrary offset and size - multi-sector case
365 	 * and single-sector case.
366 	 *
367 	 *                        Multi-sector Case
368 	 *                (do_tail_read = true if tail > 0)
369 	 *
370 	 *   |<----------------------total_size--------------------->|
371 	 *   |                                                       |
372 	 *   |<--head-->|<--------------bytes------------>|<--tail-->|
373 	 *   |          |                                 |          |
374 	 *   |          |       |<~full_sec_size~>|       |          |
375 	 *   +------------------+                 +------------------+
376 	 *   |          |0101010|     .  .  .     |0101011|          |
377 	 *   +------------------+                 +------------------+
378 	 *         start_sec                         start_sec + n
379 	 *
380 	 *
381 	 *                      Single-sector Case
382 	 *                    (do_tail_read = false)
383 	 *
384 	 *              |<------total_size = secsz----->|
385 	 *              |                               |
386 	 *              |<-head->|<---bytes--->|<-tail->|
387 	 *              +-------------------------------+
388 	 *              |        |0101010101010|        |
389 	 *              +-------------------------------+
390 	 *                          start_sec
391 	 */
392 	/* END CSTYLED */
393 	start_sec = offset / secsz;
394 	head = offset % secsz;
395 	total_size = roundup2(head + bytes, secsz);
396 	tail = total_size - (head + bytes);
397 	do_tail_read = ((tail > 0) && (head + bytes > secsz));
398 	full_sec_size = total_size;
399 	if (head > 0)
400 		full_sec_size -= secsz;
401 	if (do_tail_read)
402 		full_sec_size -= secsz;
403 
404 	/* Return of partial sector data requires a bounce buffer. */
405 	if ((head > 0) || do_tail_read || bytes < secsz) {
406 		bouncebuf = malloc(secsz);
407 		if (bouncebuf == NULL) {
408 			printf("vdev_read: out of memory\n");
409 			return (ENOMEM);
410 		}
411 	}
412 
413 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
414 		ret = errno;
415 		goto error;
416 	}
417 
418 	/* Partial data return from first sector */
419 	if (head > 0) {
420 		res = read(fd, bouncebuf, secsz);
421 		if (res != secsz) {
422 			ret = EIO;
423 			goto error;
424 		}
425 		memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
426 		outbuf += min(secsz - head, bytes);
427 	}
428 
429 	/* Full data return from read sectors */
430 	if (full_sec_size > 0) {
431 		if (bytes < full_sec_size) {
432 			res = read(fd, bouncebuf, secsz);
433 			if (res != secsz) {
434 				ret = EIO;
435 				goto error;
436 			}
437 			memcpy(outbuf, bouncebuf, bytes);
438 		} else {
439 			res = read(fd, outbuf, full_sec_size);
440 			if (res != full_sec_size) {
441 				ret = EIO;
442 				goto error;
443 			}
444 			outbuf += full_sec_size;
445 		}
446 	}
447 
448 	/* Partial data return from last sector */
449 	if (do_tail_read) {
450 		res = read(fd, bouncebuf, secsz);
451 		if (res != secsz) {
452 			ret = EIO;
453 			goto error;
454 		}
455 		memcpy(outbuf, bouncebuf, secsz - tail);
456 	}
457 
458 	ret = 0;
459 error:
460 	free(bouncebuf);
461 	return (ret);
462 }
463 
464 static int
465 vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes)
466 {
467 	int fd, ret;
468 	size_t head, tail, total_size, full_sec_size;
469 	unsigned secsz, do_tail_write;
470 	off_t start_sec;
471 	ssize_t res;
472 	char *outbuf, *bouncebuf;
473 
474 	fd = (uintptr_t)vdev->v_priv;
475 	outbuf = (char *)buf;
476 	bouncebuf = NULL;
477 
478 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
479 	if (ret != 0)
480 		return (ret);
481 
482 	start_sec = offset / secsz;
483 	head = offset % secsz;
484 	total_size = roundup2(head + bytes, secsz);
485 	tail = total_size - (head + bytes);
486 	do_tail_write = ((tail > 0) && (head + bytes > secsz));
487 	full_sec_size = total_size;
488 	if (head > 0)
489 		full_sec_size -= secsz;
490 	if (do_tail_write)
491 		full_sec_size -= secsz;
492 
493 	/* Partial sector write requires a bounce buffer. */
494 	if ((head > 0) || do_tail_write || bytes < secsz) {
495 		bouncebuf = malloc(secsz);
496 		if (bouncebuf == NULL) {
497 			printf("vdev_write: out of memory\n");
498 			return (ENOMEM);
499 		}
500 	}
501 
502 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
503 		ret = errno;
504 		goto error;
505 	}
506 
507 	/* Partial data for first sector */
508 	if (head > 0) {
509 		res = read(fd, bouncebuf, secsz);
510 		if ((unsigned)res != secsz) {
511 			ret = EIO;
512 			goto error;
513 		}
514 		memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes));
515 		(void) lseek(fd, -secsz, SEEK_CUR);
516 		res = write(fd, bouncebuf, secsz);
517 		if ((unsigned)res != secsz) {
518 			ret = EIO;
519 			goto error;
520 		}
521 		outbuf += min(secsz - head, bytes);
522 	}
523 
524 	/*
525 	 * Full data write to sectors.
526 	 * Note, there is still corner case where we write
527 	 * to sector boundary, but less than sector size, e.g. write 512B
528 	 * to 4k sector.
529 	 */
530 	if (full_sec_size > 0) {
531 		if (bytes < full_sec_size) {
532 			res = read(fd, bouncebuf, secsz);
533 			if ((unsigned)res != secsz) {
534 				ret = EIO;
535 				goto error;
536 			}
537 			memcpy(bouncebuf, outbuf, bytes);
538 			(void) lseek(fd, -secsz, SEEK_CUR);
539 			res = write(fd, bouncebuf, secsz);
540 			if ((unsigned)res != secsz) {
541 				ret = EIO;
542 				goto error;
543 			}
544 		} else {
545 			res = write(fd, outbuf, full_sec_size);
546 			if ((unsigned)res != full_sec_size) {
547 				ret = EIO;
548 				goto error;
549 			}
550 			outbuf += full_sec_size;
551 		}
552 	}
553 
554 	/* Partial data write to last sector */
555 	if (do_tail_write) {
556 		res = read(fd, bouncebuf, secsz);
557 		if ((unsigned)res != secsz) {
558 			ret = EIO;
559 			goto error;
560 		}
561 		memcpy(bouncebuf, outbuf, secsz - tail);
562 		(void) lseek(fd, -secsz, SEEK_CUR);
563 		res = write(fd, bouncebuf, secsz);
564 		if ((unsigned)res != secsz) {
565 			ret = EIO;
566 			goto error;
567 		}
568 	}
569 
570 	ret = 0;
571 error:
572 	free(bouncebuf);
573 	return (ret);
574 }
575 
576 static int
577 zfs_dev_init(void)
578 {
579 	spa_t *spa;
580 	spa_t *next;
581 	spa_t *prev;
582 
583 	zfs_init();
584 	if (archsw.arch_zfs_probe == NULL)
585 		return (ENXIO);
586 	archsw.arch_zfs_probe();
587 
588 	prev = NULL;
589 	spa = STAILQ_FIRST(&zfs_pools);
590 	while (spa != NULL) {
591 		next = STAILQ_NEXT(spa, spa_link);
592 		if (zfs_spa_init(spa)) {
593 			if (prev == NULL)
594 				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
595 			else
596 				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
597 		} else
598 			prev = spa;
599 		spa = next;
600 	}
601 	return (0);
602 }
603 
604 struct zfs_probe_args {
605 	int		fd;
606 	const char	*devname;
607 	uint64_t	*pool_guid;
608 	unsigned	secsz;
609 };
610 
611 static int
612 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
613 {
614 	struct zfs_probe_args *ppa;
615 
616 	ppa = (struct zfs_probe_args *)arg;
617 	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
618 	    offset * ppa->secsz, buf, blocks * ppa->secsz));
619 }
620 
621 static int
622 zfs_probe(int fd, uint64_t *pool_guid)
623 {
624 	spa_t *spa;
625 	int ret;
626 
627 	spa = NULL;
628 	ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa);
629 	if (ret == 0 && pool_guid != NULL)
630 		*pool_guid = spa->spa_guid;
631 	return (ret);
632 }
633 
634 static int
635 zfs_probe_partition(void *arg, const char *partname,
636     const struct ptable_entry *part)
637 {
638 	struct zfs_probe_args *ppa, pa;
639 	struct ptable *table;
640 	char devname[32];
641 	int ret = 0;
642 
643 	/* filter out partitions *not* used by zfs */
644 	switch (part->type) {
645 	case PART_EFI:		/* efi system partition */
646 	case PART_RESERVED:	/* efi reserverd */
647 	case PART_VTOC_BOOT:	/* vtoc boot area */
648 	case PART_VTOC_SWAP:
649 		return (ret);
650 	default:
651 		break;
652 	}
653 	ppa = (struct zfs_probe_args *)arg;
654 	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
655 	devname[strlen(ppa->devname) - 1] = '\0';
656 	snprintf(devname, sizeof (devname), "%s%s:", devname, partname);
657 	pa.fd = open(devname, O_RDWR);
658 	if (pa.fd == -1)
659 		return (ret);
660 	ret = zfs_probe(pa.fd, ppa->pool_guid);
661 	if (ret == 0)
662 		return (ret);
663 	if (part->type == PART_SOLARIS2) {
664 		pa.devname = devname;
665 		pa.pool_guid = ppa->pool_guid;
666 		pa.secsz = ppa->secsz;
667 		table = ptable_open(&pa, part->end - part->start + 1,
668 		    ppa->secsz, zfs_diskread);
669 		if (table != NULL) {
670 			enum ptable_type pt = ptable_gettype(table);
671 
672 			if (pt == PTABLE_VTOC8 || pt == PTABLE_VTOC)
673 				ptable_iterate(table, &pa, zfs_probe_partition);
674 			ptable_close(table);
675 		}
676 	}
677 	close(pa.fd);
678 	return (0);
679 }
680 
681 /*
682  * Return bootenv nvlist from pool label.
683  */
684 int
685 zfs_get_bootenv(void *vdev, nvlist_t **benvp)
686 {
687 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
688 	nvlist_t *benv = NULL;
689 	vdev_t *vd;
690 	spa_t *spa;
691 
692 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
693 		return (ENOTSUP);
694 
695 	if ((spa = spa_find_by_dev(dev)) == NULL)
696 		return (ENXIO);
697 
698 	if (spa->spa_bootenv == NULL) {
699 		STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children,
700 		    v_childlink) {
701 			benv = vdev_read_bootenv(vd);
702 
703 			if (benv != NULL)
704 				break;
705 		}
706 		spa->spa_bootenv = benv;
707 	} else {
708 		benv = spa->spa_bootenv;
709 	}
710 
711 	if (benv == NULL)
712 		return (ENOENT);
713 
714 	*benvp = benv;
715 	return (0);
716 }
717 
718 /*
719  * Store nvlist to pool label bootenv area. Also updates cached pointer in spa.
720  */
721 int
722 zfs_set_bootenv(void *vdev, nvlist_t *benv)
723 {
724 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
725 	spa_t *spa;
726 	vdev_t *vd;
727 
728 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
729 		return (ENOTSUP);
730 
731 	if ((spa = spa_find_by_dev(dev)) == NULL)
732 		return (ENXIO);
733 
734 	STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) {
735 		vdev_write_bootenv(vd, benv);
736 	}
737 
738 	spa->spa_bootenv = benv;
739 	return (0);
740 }
741 
742 /*
743  * Get bootonce value by key. The bootonce <key, value> pair is removed
744  * from the bootenv nvlist and the remaining nvlist is committed back to disk.
745  */
746 int
747 zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size)
748 {
749 	nvlist_t *benv;
750 	char *result = NULL;
751 	int result_size, rv;
752 
753 	if ((rv = zfs_get_bootenv(vdev, &benv)) != 0)
754 		return (rv);
755 
756 	if ((rv = nvlist_find(benv, key, DATA_TYPE_STRING, NULL,
757 	    &result, &result_size)) == 0) {
758 		if (result_size == 0) {
759 			/* ignore empty string */
760 			rv = ENOENT;
761 		} else {
762 			size = MIN((size_t)result_size + 1, size);
763 			strlcpy(buf, result, size);
764 		}
765 		(void) nvlist_remove(benv, key, DATA_TYPE_STRING);
766 		(void) zfs_set_bootenv(vdev, benv);
767 	}
768 
769 	return (rv);
770 }
771 
772 /*
773  * nvstore backend.
774  */
775 
776 static int zfs_nvstore_setter(void *, int, const char *,
777     const void *, size_t);
778 static int zfs_nvstore_setter_str(void *, const char *, const char *,
779     const char *);
780 static int zfs_nvstore_unset_impl(void *, const char *, bool);
781 static int zfs_nvstore_setenv(void *, void *);
782 
783 /*
784  * nvstore is only present for current rootfs pool.
785  */
786 static int
787 zfs_nvstore_sethook(struct env_var *ev, int flags __unused, const void *value)
788 {
789 	struct zfs_devdesc *dev;
790 	int rv;
791 
792 	archsw.arch_getdev((void **)&dev, NULL, NULL);
793 	if (dev == NULL)
794 		return (ENXIO);
795 
796 	rv = zfs_nvstore_setter_str(dev, NULL, ev->ev_name, value);
797 
798 	free(dev);
799 	return (rv);
800 }
801 
802 /*
803  * nvstore is only present for current rootfs pool.
804  */
805 static int
806 zfs_nvstore_unsethook(struct env_var *ev)
807 {
808 	struct zfs_devdesc *dev;
809 	int rv;
810 
811 	archsw.arch_getdev((void **)&dev, NULL, NULL);
812 	if (dev == NULL)
813 		return (ENXIO);
814 
815 	rv = zfs_nvstore_unset_impl(dev, ev->ev_name, false);
816 
817 	free(dev);
818 	return (rv);
819 }
820 
821 static int
822 zfs_nvstore_getter(void *vdev, const char *name, void **data)
823 {
824 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
825 	spa_t *spa;
826 	nvlist_t *nv;
827 	char *str, **ptr;
828 	int size;
829 	int rv;
830 
831 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
832 		return (ENOTSUP);
833 
834 	if ((spa = spa_find_by_dev(dev)) == NULL)
835 		return (ENXIO);
836 
837 	if (spa->spa_bootenv == NULL)
838 		return (ENXIO);
839 
840 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
841 	    NULL, &nv, NULL) != 0)
842 		return (ENOENT);
843 
844 	rv = nvlist_find(nv, name, DATA_TYPE_STRING, NULL, &str, &size);
845 	if (rv == 0) {
846 		ptr = (char **)data;
847 		asprintf(ptr, "%.*s", size, str);
848 		if (*data == NULL)
849 			rv = ENOMEM;
850 	}
851 	nvlist_destroy(nv);
852 	return (rv);
853 }
854 
855 static int
856 zfs_nvstore_setter(void *vdev, int type, const char *name,
857     const void *data, size_t size)
858 {
859 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
860 	spa_t *spa;
861 	nvlist_t *nv;
862 	int rv;
863 	bool env_set = true;
864 
865 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
866 		return (ENOTSUP);
867 
868 	if ((spa = spa_find_by_dev(dev)) == NULL)
869 		return (ENXIO);
870 
871 	if (spa->spa_bootenv == NULL)
872 		return (ENXIO);
873 
874 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
875 	    NULL, &nv, NULL) != 0) {
876 		nv = nvlist_create(NV_UNIQUE_NAME);
877 		if (nv == NULL)
878 			return (ENOMEM);
879 	}
880 
881 	rv = 0;
882 	switch (type) {
883 	case DATA_TYPE_INT8:
884 		if (size != sizeof (int8_t)) {
885 			rv = EINVAL;
886 			break;
887 		}
888 		rv = nvlist_add_int8(nv, name, *(int8_t *)data);
889 		break;
890 
891 	case DATA_TYPE_INT16:
892 		if (size != sizeof (int16_t)) {
893 			rv = EINVAL;
894 			break;
895 		}
896 		rv = nvlist_add_int16(nv, name, *(int16_t *)data);
897 		break;
898 
899 	case DATA_TYPE_INT32:
900 		if (size != sizeof (int32_t)) {
901 			rv = EINVAL;
902 			break;
903 		}
904 		rv = nvlist_add_int32(nv, name, *(int32_t *)data);
905 		break;
906 
907 	case DATA_TYPE_INT64:
908 		if (size != sizeof (int64_t)) {
909 			rv = EINVAL;
910 			break;
911 		}
912 		rv = nvlist_add_int64(nv, name, *(int64_t *)data);
913 		break;
914 
915 	case DATA_TYPE_BYTE:
916 		if (size != sizeof (uint8_t)) {
917 			rv = EINVAL;
918 			break;
919 		}
920 		rv = nvlist_add_byte(nv, name, *(int8_t *)data);
921 		break;
922 
923 	case DATA_TYPE_UINT8:
924 		if (size != sizeof (uint8_t)) {
925 			rv = EINVAL;
926 			break;
927 		}
928 		rv = nvlist_add_uint8(nv, name, *(int8_t *)data);
929 		break;
930 	case DATA_TYPE_UINT16:
931 		if (size != sizeof (uint16_t)) {
932 			rv = EINVAL;
933 			break;
934 		}
935 		rv = nvlist_add_uint16(nv, name, *(uint16_t *)data);
936 		break;
937 
938 	case DATA_TYPE_UINT32:
939 		if (size != sizeof (uint32_t)) {
940 			rv = EINVAL;
941 			break;
942 		}
943 		rv = nvlist_add_uint32(nv, name, *(uint32_t *)data);
944 		break;
945 
946 	case DATA_TYPE_UINT64:
947 		if (size != sizeof (uint64_t)) {
948 			rv = EINVAL;
949 			break;
950 		}
951 		rv = nvlist_add_uint64(nv, name, *(uint64_t *)data);
952 		break;
953 
954 	case DATA_TYPE_STRING:
955 		rv = nvlist_add_string(nv, name, data);
956 		break;
957 
958 	case DATA_TYPE_BOOLEAN_VALUE:
959 		if (size != sizeof (boolean_t)) {
960 			rv = EINVAL;
961 			break;
962 		}
963 		rv = nvlist_add_boolean_value(nv, name, *(boolean_t *)data);
964 		break;
965 
966 	default:
967 		rv = EINVAL;
968 		break;
969 	}
970 
971 	if (rv == 0) {
972 		rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv);
973 		if (rv == 0) {
974 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
975 		}
976 		if (rv == 0) {
977 			if (env_set) {
978 				rv = zfs_nvstore_setenv(vdev,
979 				    nvpair_find(nv, name));
980 			} else {
981 				env_discard(env_getenv(name));
982 				rv = 0;
983 			}
984 		}
985 	}
986 
987 	nvlist_destroy(nv);
988 	return (rv);
989 }
990 
991 static int
992 get_int64(const char *data, int64_t *ip)
993 {
994 	char *end;
995 	int64_t val;
996 
997 	errno = 0;
998 	val = strtoll(data, &end, 0);
999 	if (errno != 0 || *data == '\0' || *end != '\0')
1000 		return (EINVAL);
1001 
1002 	*ip = val;
1003 	return (0);
1004 }
1005 
1006 static int
1007 get_uint64(const char *data, uint64_t *ip)
1008 {
1009 	char *end;
1010 	uint64_t val;
1011 
1012 	errno = 0;
1013 	val = strtoull(data, &end, 0);
1014 	if (errno != 0 || *data == '\0' || *end != '\0')
1015 		return (EINVAL);
1016 
1017 	*ip = val;
1018 	return (0);
1019 }
1020 
1021 /*
1022  * Translate textual data to data type. If type is not set, and we are
1023  * creating new pair, use DATA_TYPE_STRING.
1024  */
1025 static int
1026 zfs_nvstore_setter_str(void *vdev, const char *type, const char *name,
1027     const char *data)
1028 {
1029 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1030 	spa_t *spa;
1031 	nvlist_t *nv;
1032 	int rv;
1033 	data_type_t dt;
1034 	int64_t val;
1035 	uint64_t uval;
1036 
1037 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1038 		return (ENOTSUP);
1039 
1040 	if ((spa = spa_find_by_dev(dev)) == NULL)
1041 		return (ENXIO);
1042 
1043 	if (spa->spa_bootenv == NULL)
1044 		return (ENXIO);
1045 
1046 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1047 	    NULL, &nv, NULL) != 0) {
1048 		nv = NULL;
1049 	}
1050 
1051 	if (type == NULL) {
1052 		nvp_header_t *nvh;
1053 
1054 		/*
1055 		 * if there is no existing pair, default to string.
1056 		 * Otherwise, use type from existing pair.
1057 		 */
1058 		nvh = nvpair_find(nv, name);
1059 		if (nvh == NULL) {
1060 			dt = DATA_TYPE_STRING;
1061 		} else {
1062 			nv_string_t *nvp_name;
1063 			nv_pair_data_t *nvp_data;
1064 
1065 			nvp_name = (nv_string_t *)(nvh + 1);
1066 			nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1067 			    NV_ALIGN4(nvp_name->nv_size));
1068 			dt = nvp_data->nv_type;
1069 		}
1070 	} else {
1071 		dt = nvpair_type_from_name(type);
1072 	}
1073 	nvlist_destroy(nv);
1074 
1075 	rv = 0;
1076 	switch (dt) {
1077 	case DATA_TYPE_INT8:
1078 		rv = get_int64(data, &val);
1079 		if (rv == 0) {
1080 			int8_t v = val;
1081 
1082 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1083 		}
1084 		break;
1085 	case DATA_TYPE_INT16:
1086 		rv = get_int64(data, &val);
1087 		if (rv == 0) {
1088 			int16_t v = val;
1089 
1090 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1091 		}
1092 		break;
1093 	case DATA_TYPE_INT32:
1094 		rv = get_int64(data, &val);
1095 		if (rv == 0) {
1096 			int32_t v = val;
1097 
1098 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1099 		}
1100 		break;
1101 	case DATA_TYPE_INT64:
1102 		rv = get_int64(data, &val);
1103 		if (rv == 0) {
1104 			rv = zfs_nvstore_setter(vdev, dt, name, &val,
1105 			    sizeof (val));
1106 		}
1107 		break;
1108 
1109 	case DATA_TYPE_BYTE:
1110 		rv = get_uint64(data, &uval);
1111 		if (rv == 0) {
1112 			uint8_t v = uval;
1113 
1114 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1115 		}
1116 		break;
1117 
1118 	case DATA_TYPE_UINT8:
1119 		rv = get_uint64(data, &uval);
1120 		if (rv == 0) {
1121 			uint8_t v = uval;
1122 
1123 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1124 		}
1125 		break;
1126 
1127 	case DATA_TYPE_UINT16:
1128 		rv = get_uint64(data, &uval);
1129 		if (rv == 0) {
1130 			uint16_t v = uval;
1131 
1132 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1133 		}
1134 		break;
1135 
1136 	case DATA_TYPE_UINT32:
1137 		rv = get_uint64(data, &uval);
1138 		if (rv == 0) {
1139 			uint32_t v = uval;
1140 
1141 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1142 		}
1143 		break;
1144 
1145 	case DATA_TYPE_UINT64:
1146 		rv = get_uint64(data, &uval);
1147 		if (rv == 0) {
1148 			rv = zfs_nvstore_setter(vdev, dt, name, &uval,
1149 			    sizeof (uval));
1150 		}
1151 		break;
1152 
1153 	case DATA_TYPE_STRING:
1154 		rv = zfs_nvstore_setter(vdev, dt, name, data, strlen(data) + 1);
1155 		break;
1156 
1157 	case DATA_TYPE_BOOLEAN_VALUE:
1158 		rv = get_int64(data, &val);
1159 		if (rv == 0) {
1160 			boolean_t v = val;
1161 
1162 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1163 		}
1164 		break;
1165 
1166 	default:
1167 		rv = EINVAL;
1168 	}
1169 	return (rv);
1170 }
1171 
1172 static int
1173 zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env)
1174 {
1175 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1176 	spa_t *spa;
1177 	nvlist_t *nv;
1178 	int rv;
1179 
1180 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1181 		return (ENOTSUP);
1182 
1183 	if ((spa = spa_find_by_dev(dev)) == NULL)
1184 		return (ENXIO);
1185 
1186 	if (spa->spa_bootenv == NULL)
1187 		return (ENXIO);
1188 
1189 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1190 	    NULL, &nv, NULL) != 0)
1191 		return (ENOENT);
1192 
1193 	rv = nvlist_remove(nv, name, DATA_TYPE_UNKNOWN);
1194 	if (rv == 0) {
1195 		if (nvlist_next_nvpair(nv, NULL) == NULL) {
1196 			rv = nvlist_remove(spa->spa_bootenv, OS_NVSTORE,
1197 			    DATA_TYPE_NVLIST);
1198 		} else {
1199 			rv = nvlist_add_nvlist(spa->spa_bootenv,
1200 			    OS_NVSTORE, nv);
1201 		}
1202 		if (rv == 0)
1203 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1204 	}
1205 
1206 	if (unset_env)
1207 		env_discard(env_getenv(name));
1208 	return (rv);
1209 }
1210 
1211 static int
1212 zfs_nvstore_unset(void *vdev, const char *name)
1213 {
1214 	return (zfs_nvstore_unset_impl(vdev, name, true));
1215 }
1216 
1217 static int
1218 zfs_nvstore_print(void *vdev __unused, void *ptr)
1219 {
1220 
1221 	nvpair_print(ptr, 0);
1222 	return (0);
1223 }
1224 
1225 /*
1226  * Create environment variable from nvpair.
1227  * set hook will update nvstore with new value, unset hook will remove
1228  * variable from nvstore.
1229  */
1230 static int
1231 zfs_nvstore_setenv(void *vdev __unused, void *ptr)
1232 {
1233 	nvp_header_t *nvh = ptr;
1234 	nv_string_t *nvp_name, *nvp_value;
1235 	nv_pair_data_t *nvp_data;
1236 	char *name, *value;
1237 	int rv = 0;
1238 
1239 	if (nvh == NULL)
1240 		return (ENOENT);
1241 
1242 	nvp_name = (nv_string_t *)(nvh + 1);
1243 	nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1244 	    NV_ALIGN4(nvp_name->nv_size));
1245 
1246 	if ((name = nvstring_get(nvp_name)) == NULL)
1247 		return (ENOMEM);
1248 
1249 	value = NULL;
1250 	switch (nvp_data->nv_type) {
1251 	case DATA_TYPE_BYTE:
1252 	case DATA_TYPE_UINT8:
1253 		(void) asprintf(&value, "%uc",
1254 		    *(unsigned *)&nvp_data->nv_data[0]);
1255 		if (value == NULL)
1256 			rv = ENOMEM;
1257 		break;
1258 
1259 	case DATA_TYPE_INT8:
1260 		(void) asprintf(&value, "%c", *(int *)&nvp_data->nv_data[0]);
1261 		if (value == NULL)
1262 			rv = ENOMEM;
1263 		break;
1264 
1265 	case DATA_TYPE_INT16:
1266 		(void) asprintf(&value, "%hd", *(short *)&nvp_data->nv_data[0]);
1267 		if (value == NULL)
1268 			rv = ENOMEM;
1269 		break;
1270 
1271 	case DATA_TYPE_UINT16:
1272 		(void) asprintf(&value, "%hu",
1273 		    *(unsigned short *)&nvp_data->nv_data[0]);
1274 		if (value == NULL)
1275 			rv = ENOMEM;
1276 		break;
1277 
1278 	case DATA_TYPE_BOOLEAN_VALUE:
1279 	case DATA_TYPE_INT32:
1280 		(void) asprintf(&value, "%d", *(int *)&nvp_data->nv_data[0]);
1281 		if (value == NULL)
1282 			rv = ENOMEM;
1283 		break;
1284 
1285 	case DATA_TYPE_UINT32:
1286 		(void) asprintf(&value, "%u",
1287 		    *(unsigned *)&nvp_data->nv_data[0]);
1288 		if (value == NULL)
1289 			rv = ENOMEM;
1290 		break;
1291 
1292 	case DATA_TYPE_INT64:
1293 		(void) asprintf(&value, "%jd",
1294 		    (intmax_t)*(int64_t *)&nvp_data->nv_data[0]);
1295 		if (value == NULL)
1296 			rv = ENOMEM;
1297 		break;
1298 
1299 	case DATA_TYPE_UINT64:
1300 		(void) asprintf(&value, "%ju",
1301 		    (uintmax_t)*(uint64_t *)&nvp_data->nv_data[0]);
1302 		if (value == NULL)
1303 			rv = ENOMEM;
1304 		break;
1305 
1306 	case DATA_TYPE_STRING:
1307 		nvp_value = (nv_string_t *)&nvp_data->nv_data[0];
1308 		if ((value = nvstring_get(nvp_value)) == NULL) {
1309 			rv = ENOMEM;
1310 			break;
1311 		}
1312 		break;
1313 
1314 	default:
1315 		rv = EINVAL;
1316 		break;
1317 	}
1318 
1319 	if (value != NULL) {
1320 		rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, value,
1321 		    zfs_nvstore_sethook, zfs_nvstore_unsethook);
1322 		free(value);
1323 	}
1324 	free(name);
1325 	return (rv);
1326 }
1327 
1328 static int
1329 zfs_nvstore_iterate(void *vdev, int (*cb)(void *, void *))
1330 {
1331 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1332 	spa_t *spa;
1333 	nvlist_t *nv;
1334 	nvp_header_t *nvh;
1335 	int rv;
1336 
1337 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1338 		return (ENOTSUP);
1339 
1340 	if ((spa = spa_find_by_dev(dev)) == NULL)
1341 		return (ENXIO);
1342 
1343 	if (spa->spa_bootenv == NULL)
1344 		return (ENXIO);
1345 
1346 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1347 	    NULL, &nv, NULL) != 0)
1348 		return (ENOENT);
1349 
1350 	rv = 0;
1351 	nvh = NULL;
1352 	while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) {
1353 		rv = cb(vdev, nvh);
1354 		if (rv != 0)
1355 			break;
1356 	}
1357 	return (rv);
1358 }
1359 
1360 nvs_callbacks_t nvstore_zfs_cb = {
1361 	.nvs_getter = zfs_nvstore_getter,
1362 	.nvs_setter = zfs_nvstore_setter,
1363 	.nvs_setter_str = zfs_nvstore_setter_str,
1364 	.nvs_unset = zfs_nvstore_unset,
1365 	.nvs_print = zfs_nvstore_print,
1366 	.nvs_iterate = zfs_nvstore_iterate
1367 };
1368 
1369 int
1370 zfs_attach_nvstore(void *vdev)
1371 {
1372 	struct zfs_devdesc *dev = vdev;
1373 	spa_t *spa;
1374 	uint64_t version;
1375 	int rv;
1376 
1377 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1378 		return (ENOTSUP);
1379 
1380 	if ((spa = spa_find_by_dev(dev)) == NULL)
1381 		return (ENXIO);
1382 
1383 	rv = nvlist_find(spa->spa_bootenv, BOOTENV_VERSION, DATA_TYPE_UINT64,
1384 	    NULL, &version, NULL);
1385 
1386 	if (rv != 0 || version != VB_NVLIST) {
1387 		return (ENXIO);
1388 	}
1389 
1390 	dev = malloc(sizeof (*dev));
1391 	if (dev == NULL)
1392 		return (ENOMEM);
1393 	memcpy(dev, vdev, sizeof (*dev));
1394 
1395 	rv = nvstore_init(spa->spa_name, &nvstore_zfs_cb, dev);
1396 	if (rv != 0)
1397 		free(dev);
1398 	else
1399 		rv = zfs_nvstore_iterate(dev, zfs_nvstore_setenv);
1400 	return (rv);
1401 }
1402 
1403 int
1404 zfs_probe_dev(const char *devname, uint64_t *pool_guid)
1405 {
1406 	struct disk_devdesc *dev;
1407 	struct ptable *table;
1408 	struct zfs_probe_args pa;
1409 	uint64_t mediasz;
1410 	int ret;
1411 
1412 	if (pool_guid)
1413 		*pool_guid = 0;
1414 	pa.fd = open(devname, O_RDWR);
1415 	if (pa.fd == -1)
1416 		return (ENXIO);
1417 	/*
1418 	 * We will not probe the whole disk, we can not boot from such
1419 	 * disks and some systems will misreport the disk sizes and will
1420 	 * hang while accessing the disk.
1421 	 */
1422 	if (archsw.arch_getdev((void **)&dev, devname, NULL) == 0) {
1423 		int partition = dev->d_partition;
1424 		int slice = dev->d_slice;
1425 
1426 		free(dev);
1427 		if (partition != D_PARTNONE && slice != D_SLICENONE) {
1428 			ret = zfs_probe(pa.fd, pool_guid);
1429 			if (ret == 0)
1430 				return (0);
1431 		}
1432 	}
1433 
1434 	/* Probe each partition */
1435 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
1436 	if (ret == 0)
1437 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
1438 	if (ret == 0) {
1439 		pa.devname = devname;
1440 		pa.pool_guid = pool_guid;
1441 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
1442 		    zfs_diskread);
1443 		if (table != NULL) {
1444 			ptable_iterate(table, &pa, zfs_probe_partition);
1445 			ptable_close(table);
1446 		}
1447 	}
1448 	close(pa.fd);
1449 	if (pool_guid && *pool_guid == 0)
1450 		ret = ENXIO;
1451 	return (ret);
1452 }
1453 
1454 /*
1455  * Print information about ZFS pools
1456  */
1457 static int
1458 zfs_dev_print(int verbose)
1459 {
1460 	spa_t *spa;
1461 	char line[80];
1462 	int ret = 0;
1463 
1464 	if (STAILQ_EMPTY(&zfs_pools))
1465 		return (0);
1466 
1467 	printf("%s devices:", zfs_dev.dv_name);
1468 	if ((ret = pager_output("\n")) != 0)
1469 		return (ret);
1470 
1471 	if (verbose) {
1472 		return (spa_all_status());
1473 	}
1474 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
1475 		snprintf(line, sizeof (line), "    zfs:%s\n", spa->spa_name);
1476 		ret = pager_output(line);
1477 		if (ret != 0)
1478 			break;
1479 	}
1480 	return (ret);
1481 }
1482 
1483 /*
1484  * Attempt to open the pool described by (dev) for use by (f).
1485  */
1486 static int
1487 zfs_dev_open(struct open_file *f, ...)
1488 {
1489 	va_list		args;
1490 	struct zfs_devdesc	*dev;
1491 	struct zfsmount	*mount;
1492 	spa_t		*spa;
1493 	int		rv;
1494 
1495 	va_start(args, f);
1496 	dev = va_arg(args, struct zfs_devdesc *);
1497 	va_end(args);
1498 
1499 	if ((spa = spa_find_by_dev(dev)) == NULL)
1500 		return (ENXIO);
1501 
1502 	mount = malloc(sizeof (*mount));
1503 	if (mount == NULL)
1504 		rv = ENOMEM;
1505 	else
1506 		rv = zfs_mount(spa, dev->root_guid, mount);
1507 	if (rv != 0) {
1508 		free(mount);
1509 		return (rv);
1510 	}
1511 	if (mount->objset.os_type != DMU_OST_ZFS) {
1512 		printf("Unexpected object set type %ju\n",
1513 		    (uintmax_t)mount->objset.os_type);
1514 		free(mount);
1515 		return (EIO);
1516 	}
1517 	f->f_devdata = mount;
1518 	free(dev);
1519 	return (0);
1520 }
1521 
1522 static int
1523 zfs_dev_close(struct open_file *f)
1524 {
1525 
1526 	free(f->f_devdata);
1527 	f->f_devdata = NULL;
1528 	return (0);
1529 }
1530 
1531 static int
1532 zfs_dev_strategy(void *devdata __unused, int rw __unused,
1533     daddr_t dblk __unused, size_t size __unused,
1534     char *buf __unused, size_t *rsize __unused)
1535 {
1536 
1537 	return (ENOSYS);
1538 }
1539 
1540 struct devsw zfs_dev = {
1541 	.dv_name = "zfs",
1542 	.dv_type = DEVT_ZFS,
1543 	.dv_init = zfs_dev_init,
1544 	.dv_strategy = zfs_dev_strategy,
1545 	.dv_open = zfs_dev_open,
1546 	.dv_close = zfs_dev_close,
1547 	.dv_ioctl = noioctl,
1548 	.dv_print = zfs_dev_print,
1549 	.dv_cleanup = NULL
1550 };
1551 
1552 int
1553 zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
1554 {
1555 	static char	rootname[ZFS_MAXNAMELEN];
1556 	static char	poolname[ZFS_MAXNAMELEN];
1557 	spa_t		*spa;
1558 	const char	*end;
1559 	const char	*np;
1560 	const char	*sep;
1561 	int		rv;
1562 
1563 	np = devspec;
1564 	if (*np != ':')
1565 		return (EINVAL);
1566 	np++;
1567 	end = strrchr(np, ':');
1568 	if (end == NULL)
1569 		return (EINVAL);
1570 	sep = strchr(np, '/');
1571 	if (sep == NULL || sep >= end)
1572 		sep = end;
1573 	memcpy(poolname, np, sep - np);
1574 	poolname[sep - np] = '\0';
1575 	if (sep < end) {
1576 		sep++;
1577 		memcpy(rootname, sep, end - sep);
1578 		rootname[end - sep] = '\0';
1579 	}
1580 	else
1581 		rootname[0] = '\0';
1582 
1583 	spa = spa_find_by_name(poolname);
1584 	if (!spa)
1585 		return (ENXIO);
1586 	dev->pool_guid = spa->spa_guid;
1587 	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
1588 	if (rv != 0)
1589 		return (rv);
1590 	if (path != NULL)
1591 		*path = (*end == '\0') ? end : end + 1;
1592 	dev->dd.d_dev = &zfs_dev;
1593 	return (0);
1594 }
1595 
1596 char *
1597 zfs_bootfs(void *zdev)
1598 {
1599 	static char		rootname[ZFS_MAXNAMELEN];
1600 	static char		buf[2 * ZFS_MAXNAMELEN];
1601 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)zdev;
1602 	uint64_t		objnum;
1603 	spa_t			*spa;
1604 	int			n;
1605 
1606 	buf[0] = '\0';
1607 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1608 		return (buf);
1609 
1610 	spa = spa_find_by_guid(dev->pool_guid);
1611 	if (spa == NULL) {
1612 		printf("ZFS: can't find pool by guid\n");
1613 		return (buf);
1614 	}
1615 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
1616 		printf("ZFS: can't find filesystem by guid\n");
1617 		return (buf);
1618 	}
1619 	if (zfs_lookup_dataset(spa, rootname, &objnum)) {
1620 		printf("ZFS: can't find filesystem by name\n");
1621 		return (buf);
1622 	}
1623 
1624 	/* Set the environment. */
1625 	snprintf(buf, sizeof (buf), "%" PRIu64, dev->pool_guid);
1626 	setenv("zfs-bootpool", buf, 1);
1627 	snprintf(buf, sizeof (buf), "%" PRIu64, spa->spa_boot_vdev->v_guid);
1628 	setenv("zfs-bootvdev", buf, 1);
1629 	snprintf(buf, sizeof (buf), "%s/%" PRIu64, spa->spa_name, objnum);
1630 	setenv("zfs-bootfs", buf, 1);
1631 	if (spa->spa_boot_vdev->v_phys_path != NULL)
1632 		setenv("bootpath", spa->spa_boot_vdev->v_phys_path, 1);
1633 	if (spa->spa_boot_vdev->v_devid != NULL)
1634 		setenv("diskdevid", spa->spa_boot_vdev->v_devid, 1);
1635 
1636 	/*
1637 	 * Build the command line string. Once our kernel will read
1638 	 * the environment and we can stop caring about old kernels,
1639 	 * we can remove this part.
1640 	 */
1641 	snprintf(buf, sizeof (buf), "zfs-bootfs=%s/%" PRIu64, spa->spa_name,
1642 	    objnum);
1643 	n = strlen(buf);
1644 	if (spa->spa_boot_vdev->v_phys_path != NULL) {
1645 		snprintf(buf+n, sizeof (buf) - n, ",bootpath=\"%s\"",
1646 		    spa->spa_boot_vdev->v_phys_path);
1647 		n = strlen(buf);
1648 	}
1649 	if (spa->spa_boot_vdev->v_devid != NULL) {
1650 		snprintf(buf+n, sizeof (buf) - n, ",diskdevid=\"%s\"",
1651 		    spa->spa_boot_vdev->v_devid);
1652 	}
1653 	return (buf);
1654 }
1655 
1656 char *
1657 zfs_fmtdev(void *vdev)
1658 {
1659 	static char		rootname[ZFS_MAXNAMELEN];
1660 	static char		buf[2 * ZFS_MAXNAMELEN + 8];
1661 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
1662 	spa_t			*spa;
1663 
1664 	buf[0] = '\0';
1665 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1666 		return (buf);
1667 
1668 	/* Do we have any pools? */
1669 	spa = STAILQ_FIRST(&zfs_pools);
1670 	if (spa == NULL)
1671 		return (buf);
1672 
1673 	if (dev->pool_guid == 0)
1674 		dev->pool_guid = spa->spa_guid;
1675 	else
1676 		spa = spa_find_by_guid(dev->pool_guid);
1677 
1678 	if (spa == NULL) {
1679 		printf("ZFS: can't find pool by guid\n");
1680 		return (buf);
1681 	}
1682 	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
1683 		printf("ZFS: can't find root filesystem\n");
1684 		return (buf);
1685 	}
1686 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
1687 		printf("ZFS: can't find filesystem by guid\n");
1688 		return (buf);
1689 	}
1690 
1691 	if (rootname[0] == '\0')
1692 		snprintf(buf, sizeof (buf), "%s:%s:", dev->dd.d_dev->dv_name,
1693 		    spa->spa_name);
1694 	else
1695 		snprintf(buf, sizeof (buf), "%s:%s/%s:", dev->dd.d_dev->dv_name,
1696 		    spa->spa_name, rootname);
1697 	return (buf);
1698 }
1699 
1700 int
1701 zfs_list(const char *name)
1702 {
1703 	static char	poolname[ZFS_MAXNAMELEN];
1704 	uint64_t	objid;
1705 	spa_t		*spa;
1706 	const char	*dsname;
1707 	int		len;
1708 	int		rv;
1709 
1710 	len = strlen(name);
1711 	dsname = strchr(name, '/');
1712 	if (dsname != NULL) {
1713 		len = dsname - name;
1714 		dsname++;
1715 	} else
1716 		dsname = "";
1717 	memcpy(poolname, name, len);
1718 	poolname[len] = '\0';
1719 
1720 	spa = spa_find_by_name(poolname);
1721 	if (!spa)
1722 		return (ENXIO);
1723 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1724 	if (rv != 0)
1725 		return (rv);
1726 
1727 	return (zfs_list_dataset(spa, objid));
1728 }
1729