xref: /freebsd/stand/libsa/zfs/zfs.c (revision 766145637dd5f1316c2ac4a20956e1b17bf8df42)
1 /*-
2  * Copyright (c) 2007 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  *	$FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /*
33  *	Stand-alone file reading package.
34  */
35 
36 #include <stand.h>
37 #include <sys/disk.h>
38 #include <sys/param.h>
39 #include <sys/time.h>
40 #include <sys/queue.h>
41 #include <part.h>
42 #include <stddef.h>
43 #include <stdarg.h>
44 #include <string.h>
45 #include <bootstrap.h>
46 
47 #include "libzfs.h"
48 
49 #include "zfsimpl.c"
50 
51 /* Define the range of indexes to be populated with ZFS Boot Environments */
52 #define		ZFS_BE_FIRST	4
53 #define		ZFS_BE_LAST	8
54 
55 static int	zfs_open(const char *path, struct open_file *f);
56 static int	zfs_close(struct open_file *f);
57 static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
58 static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
59 static int	zfs_stat(struct open_file *f, struct stat *sb);
60 static int	zfs_readdir(struct open_file *f, struct dirent *d);
61 static int	zfs_mount(const char *dev, const char *path, void **data);
62 static int	zfs_unmount(const char *dev, void *data);
63 
64 static void	zfs_bootenv_initial(const char *envname, spa_t *spa,
65 		    const char *name, const char *dsname, int checkpoint);
66 static void	zfs_checkpoints_initial(spa_t *spa, const char *name,
67 		    const char *dsname);
68 
69 static int	zfs_parsedev(struct devdesc **idev, const char *devspec,
70 		    const char **path);
71 
72 struct devsw zfs_dev;
73 
74 struct fs_ops zfs_fsops = {
75 	.fs_name = "zfs",
76 	.fo_open = zfs_open,
77 	.fo_close = zfs_close,
78 	.fo_read = zfs_read,
79 	.fo_write = null_write,
80 	.fo_seek = zfs_seek,
81 	.fo_stat = zfs_stat,
82 	.fo_readdir = zfs_readdir,
83 	.fo_mount = zfs_mount,
84 	.fo_unmount = zfs_unmount
85 };
86 
87 /*
88  * In-core open file.
89  */
90 struct file {
91 	off_t		f_seekp;	/* seek pointer */
92 	dnode_phys_t	f_dnode;
93 	uint64_t	f_zap_type;	/* zap type for readdir */
94 	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
95 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
96 };
97 
98 static int	zfs_env_index;
99 static int	zfs_env_count;
100 
101 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
102 struct zfs_be_list *zfs_be_headp;
103 struct zfs_be_entry {
104 	char *name;
105 	SLIST_ENTRY(zfs_be_entry) entries;
106 } *zfs_be, *zfs_be_tmp;
107 
108 /*
109  * Open a file.
110  */
111 static int
112 zfs_open(const char *upath, struct open_file *f)
113 {
114 	struct devdesc *dev = f->f_devdata;
115 	struct zfsmount *mount = dev->d_opendata;
116 	struct file *fp;
117 	int rc;
118 
119 	if (f->f_dev != &zfs_dev)
120 		return (EINVAL);
121 
122 	/* allocate file system specific data structure */
123 	fp = calloc(1, sizeof(struct file));
124 	if (fp == NULL)
125 		return (ENOMEM);
126 	f->f_fsdata = fp;
127 
128 	rc = zfs_lookup(mount, upath, &fp->f_dnode);
129 	fp->f_seekp = 0;
130 	if (rc) {
131 		f->f_fsdata = NULL;
132 		free(fp);
133 	}
134 	return (rc);
135 }
136 
137 static int
138 zfs_close(struct open_file *f)
139 {
140 	struct file *fp = (struct file *)f->f_fsdata;
141 
142 	dnode_cache_obj = NULL;
143 	f->f_fsdata = NULL;
144 
145 	free(fp);
146 	return (0);
147 }
148 
149 /*
150  * Copy a portion of a file into kernel memory.
151  * Cross block boundaries when necessary.
152  */
153 static int
154 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
155 {
156 	struct devdesc *dev = f->f_devdata;
157 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
158 	struct file *fp = (struct file *)f->f_fsdata;
159 	struct stat sb;
160 	size_t n;
161 	int rc;
162 
163 	rc = zfs_stat(f, &sb);
164 	if (rc)
165 		return (rc);
166 	n = size;
167 	if (fp->f_seekp + n > sb.st_size)
168 		n = sb.st_size - fp->f_seekp;
169 
170 	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
171 	if (rc)
172 		return (rc);
173 
174 	if (0) {
175 	    int i;
176 	    for (i = 0; i < n; i++)
177 		putchar(((char*) start)[i]);
178 	}
179 	fp->f_seekp += n;
180 	if (resid)
181 		*resid = size - n;
182 
183 	return (0);
184 }
185 
186 static off_t
187 zfs_seek(struct open_file *f, off_t offset, int where)
188 {
189 	struct file *fp = (struct file *)f->f_fsdata;
190 
191 	switch (where) {
192 	case SEEK_SET:
193 		fp->f_seekp = offset;
194 		break;
195 	case SEEK_CUR:
196 		fp->f_seekp += offset;
197 		break;
198 	case SEEK_END:
199 	    {
200 		struct stat sb;
201 		int error;
202 
203 		error = zfs_stat(f, &sb);
204 		if (error != 0) {
205 			errno = error;
206 			return (-1);
207 		}
208 		fp->f_seekp = sb.st_size - offset;
209 		break;
210 	    }
211 	default:
212 		errno = EINVAL;
213 		return (-1);
214 	}
215 	return (fp->f_seekp);
216 }
217 
218 static int
219 zfs_stat(struct open_file *f, struct stat *sb)
220 {
221 	struct devdesc *dev = f->f_devdata;
222 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
223 	struct file *fp = (struct file *)f->f_fsdata;
224 
225 	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
226 }
227 
228 static int
229 zfs_readdir(struct open_file *f, struct dirent *d)
230 {
231 	struct devdesc *dev = f->f_devdata;
232 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
233 	struct file *fp = (struct file *)f->f_fsdata;
234 	mzap_ent_phys_t mze;
235 	struct stat sb;
236 	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
237 	int rc;
238 
239 	rc = zfs_stat(f, &sb);
240 	if (rc)
241 		return (rc);
242 	if (!S_ISDIR(sb.st_mode))
243 		return (ENOTDIR);
244 
245 	/*
246 	 * If this is the first read, get the zap type.
247 	 */
248 	if (fp->f_seekp == 0) {
249 		rc = dnode_read(spa, &fp->f_dnode,
250 				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
251 		if (rc)
252 			return (rc);
253 
254 		if (fp->f_zap_type == ZBT_MICRO) {
255 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
256 		} else {
257 			rc = dnode_read(spa, &fp->f_dnode,
258 					offsetof(zap_phys_t, zap_num_leafs),
259 					&fp->f_num_leafs,
260 					sizeof(fp->f_num_leafs));
261 			if (rc)
262 				return (rc);
263 
264 			fp->f_seekp = bsize;
265 			fp->f_zap_leaf = malloc(bsize);
266 			if (fp->f_zap_leaf == NULL)
267 				return (ENOMEM);
268 			rc = dnode_read(spa, &fp->f_dnode,
269 					fp->f_seekp,
270 					fp->f_zap_leaf,
271 					bsize);
272 			if (rc)
273 				return (rc);
274 		}
275 	}
276 
277 	if (fp->f_zap_type == ZBT_MICRO) {
278 	mzap_next:
279 		if (fp->f_seekp >= bsize)
280 			return (ENOENT);
281 
282 		rc = dnode_read(spa, &fp->f_dnode,
283 				fp->f_seekp, &mze, sizeof(mze));
284 		if (rc)
285 			return (rc);
286 		fp->f_seekp += sizeof(mze);
287 
288 		if (!mze.mze_name[0])
289 			goto mzap_next;
290 
291 		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
292 		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
293 		strcpy(d->d_name, mze.mze_name);
294 		d->d_namlen = strlen(d->d_name);
295 		return (0);
296 	} else {
297 		zap_leaf_t zl;
298 		zap_leaf_chunk_t *zc, *nc;
299 		int chunk;
300 		size_t namelen;
301 		char *p;
302 		uint64_t value;
303 
304 		/*
305 		 * Initialise this so we can use the ZAP size
306 		 * calculating macros.
307 		 */
308 		zl.l_bs = ilog2(bsize);
309 		zl.l_phys = fp->f_zap_leaf;
310 
311 		/*
312 		 * Figure out which chunk we are currently looking at
313 		 * and consider seeking to the next leaf. We use the
314 		 * low bits of f_seekp as a simple chunk index.
315 		 */
316 	fzap_next:
317 		chunk = fp->f_seekp & (bsize - 1);
318 		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
319 			fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
320 			chunk = 0;
321 
322 			/*
323 			 * Check for EOF and read the new leaf.
324 			 */
325 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
326 				return (ENOENT);
327 
328 			rc = dnode_read(spa, &fp->f_dnode,
329 					fp->f_seekp,
330 					fp->f_zap_leaf,
331 					bsize);
332 			if (rc)
333 				return (rc);
334 		}
335 
336 		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
337 		fp->f_seekp++;
338 		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
339 			goto fzap_next;
340 
341 		namelen = zc->l_entry.le_name_numints;
342 		if (namelen > sizeof(d->d_name))
343 			namelen = sizeof(d->d_name);
344 
345 		/*
346 		 * Paste the name back together.
347 		 */
348 		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
349 		p = d->d_name;
350 		while (namelen > 0) {
351 			int len;
352 			len = namelen;
353 			if (len > ZAP_LEAF_ARRAY_BYTES)
354 				len = ZAP_LEAF_ARRAY_BYTES;
355 			memcpy(p, nc->l_array.la_array, len);
356 			p += len;
357 			namelen -= len;
358 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
359 		}
360 		d->d_name[sizeof(d->d_name) - 1] = 0;
361 
362 		/*
363 		 * Assume the first eight bytes of the value are
364 		 * a uint64_t.
365 		 */
366 		value = fzap_leaf_value(&zl, zc);
367 
368 		d->d_fileno = ZFS_DIRENT_OBJ(value);
369 		d->d_type = ZFS_DIRENT_TYPE(value);
370 		d->d_namlen = strlen(d->d_name);
371 
372 		return (0);
373 	}
374 }
375 
376 /*
377  * if path is NULL, create mount structure, but do not add it to list.
378  */
379 static int
380 zfs_mount(const char *dev, const char *path, void **data)
381 {
382 	struct zfs_devdesc *zfsdev;
383 	spa_t *spa;
384 	struct zfsmount *mnt;
385 	int rv;
386 
387 	errno = 0;
388 	rv = zfs_parsedev((struct devdesc **)&zfsdev, dev, NULL);
389 	if (rv != 0) {
390 		return (rv);
391 	}
392 
393 	spa = spa_find_by_dev(zfsdev);
394 	if (spa == NULL)
395 		return (ENXIO);
396 
397 	mnt = calloc(1, sizeof(*mnt));
398 	if (mnt != NULL && path != NULL)
399 		mnt->path = strdup(path);
400 	rv = errno;
401 
402 	if (mnt != NULL)
403 		rv = zfs_mount_impl(spa, zfsdev->root_guid, mnt);
404 	free(zfsdev);
405 
406 	if (rv == 0 && mnt != NULL && mnt->objset.os_type != DMU_OST_ZFS) {
407 		printf("Unexpected object set type %ju\n",
408 		    (uintmax_t)mnt->objset.os_type);
409 		rv = EIO;
410 	}
411 
412 	if (rv != 0) {
413 		if (mnt != NULL)
414 			free(mnt->path);
415 		free(mnt);
416 		return (rv);
417 	}
418 
419 	if (mnt != NULL) {
420 		*data = mnt;
421 		if (path != NULL)
422 			STAILQ_INSERT_TAIL(&zfsmount, mnt, next);
423 	}
424 
425 	return (rv);
426 }
427 
428 static int
429 zfs_unmount(const char *dev, void *data)
430 {
431 	struct zfsmount *mnt = data;
432 
433 	STAILQ_REMOVE(&zfsmount, mnt, zfsmount, next);
434 	free(mnt->path);
435 	free(mnt);
436 	return (0);
437 }
438 
439 static int
440 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
441 {
442 	int fd, ret;
443 	size_t res, head, tail, total_size, full_sec_size;
444 	unsigned secsz, do_tail_read;
445 	off_t start_sec;
446 	char *outbuf, *bouncebuf;
447 
448 	fd = (uintptr_t) priv;
449 	outbuf = (char *) buf;
450 	bouncebuf = NULL;
451 
452 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
453 	if (ret != 0)
454 		return (ret);
455 
456 	/*
457 	 * Handling reads of arbitrary offset and size - multi-sector case
458 	 * and single-sector case.
459 	 *
460 	 *                        Multi-sector Case
461 	 *                (do_tail_read = true if tail > 0)
462 	 *
463 	 *   |<----------------------total_size--------------------->|
464 	 *   |                                                       |
465 	 *   |<--head-->|<--------------bytes------------>|<--tail-->|
466 	 *   |          |                                 |          |
467 	 *   |          |       |<~full_sec_size~>|       |          |
468 	 *   +------------------+                 +------------------+
469 	 *   |          |0101010|     .  .  .     |0101011|          |
470 	 *   +------------------+                 +------------------+
471 	 *         start_sec                         start_sec + n
472 	 *
473 	 *
474 	 *                      Single-sector Case
475 	 *                    (do_tail_read = false)
476 	 *
477 	 *              |<------total_size = secsz----->|
478 	 *              |                               |
479 	 *              |<-head->|<---bytes--->|<-tail->|
480 	 *              +-------------------------------+
481 	 *              |        |0101010101010|        |
482 	 *              +-------------------------------+
483 	 *                          start_sec
484 	 */
485 	start_sec = offset / secsz;
486 	head = offset % secsz;
487 	total_size = roundup2(head + bytes, secsz);
488 	tail = total_size - (head + bytes);
489 	do_tail_read = ((tail > 0) && (head + bytes > secsz));
490 	full_sec_size = total_size;
491 	if (head > 0)
492 		full_sec_size -= secsz;
493 	if (do_tail_read)
494 		full_sec_size -= secsz;
495 
496 	/* Return of partial sector data requires a bounce buffer. */
497 	if ((head > 0) || do_tail_read || bytes < secsz) {
498 		bouncebuf = malloc(secsz);
499 		if (bouncebuf == NULL) {
500 			printf("vdev_read: out of memory\n");
501 			return (ENOMEM);
502 		}
503 	}
504 
505 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
506 		ret = errno;
507 		goto error;
508 	}
509 
510 	/* Partial data return from first sector */
511 	if (head > 0) {
512 		res = read(fd, bouncebuf, secsz);
513 		if (res != secsz) {
514 			ret = EIO;
515 			goto error;
516 		}
517 		memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
518 		outbuf += min(secsz - head, bytes);
519 	}
520 
521 	/*
522 	 * Full data return from read sectors.
523 	 * Note, there is still corner case where we read
524 	 * from sector boundary, but less than sector size, e.g. reading 512B
525 	 * from 4k sector.
526 	 */
527 	if (full_sec_size > 0) {
528 		if (bytes < full_sec_size) {
529 			res = read(fd, bouncebuf, secsz);
530 			if (res != secsz) {
531 				ret = EIO;
532 				goto error;
533 			}
534 			memcpy(outbuf, bouncebuf, bytes);
535 		} else {
536 			res = read(fd, outbuf, full_sec_size);
537 			if (res != full_sec_size) {
538 				ret = EIO;
539 				goto error;
540 			}
541 			outbuf += full_sec_size;
542 		}
543 	}
544 
545 	/* Partial data return from last sector */
546 	if (do_tail_read) {
547 		res = read(fd, bouncebuf, secsz);
548 		if (res != secsz) {
549 			ret = EIO;
550 			goto error;
551 		}
552 		memcpy(outbuf, bouncebuf, secsz - tail);
553 	}
554 
555 	ret = 0;
556 error:
557 	free(bouncebuf);
558 	return (ret);
559 }
560 
561 static int
562 vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes)
563 {
564 	int fd, ret;
565 	size_t head, tail, total_size, full_sec_size;
566 	unsigned secsz, do_tail_write;
567 	off_t start_sec;
568 	ssize_t res;
569 	char *outbuf, *bouncebuf;
570 
571 	fd = (uintptr_t)vdev->v_priv;
572 	outbuf = (char *)buf;
573 	bouncebuf = NULL;
574 
575 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
576 	if (ret != 0)
577 		return (ret);
578 
579 	start_sec = offset / secsz;
580 	head = offset % secsz;
581 	total_size = roundup2(head + bytes, secsz);
582 	tail = total_size - (head + bytes);
583 	do_tail_write = ((tail > 0) && (head + bytes > secsz));
584 	full_sec_size = total_size;
585 	if (head > 0)
586 		full_sec_size -= secsz;
587 	if (do_tail_write)
588 		full_sec_size -= secsz;
589 
590 	/* Partial sector write requires a bounce buffer. */
591 	if ((head > 0) || do_tail_write || bytes < secsz) {
592 		bouncebuf = malloc(secsz);
593 		if (bouncebuf == NULL) {
594 			printf("vdev_write: out of memory\n");
595 			return (ENOMEM);
596 		}
597 	}
598 
599 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
600 		ret = errno;
601 		goto error;
602 	}
603 
604 	/* Partial data for first sector */
605 	if (head > 0) {
606 		res = read(fd, bouncebuf, secsz);
607 		if ((unsigned)res != secsz) {
608 			ret = EIO;
609 			goto error;
610 		}
611 		memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes));
612 		(void) lseek(fd, -secsz, SEEK_CUR);
613 		res = write(fd, bouncebuf, secsz);
614 		if ((unsigned)res != secsz) {
615 			ret = EIO;
616 			goto error;
617 		}
618 		outbuf += min(secsz - head, bytes);
619 	}
620 
621 	/*
622 	 * Full data write to sectors.
623 	 * Note, there is still corner case where we write
624 	 * to sector boundary, but less than sector size, e.g. write 512B
625 	 * to 4k sector.
626 	 */
627 	if (full_sec_size > 0) {
628 		if (bytes < full_sec_size) {
629 			res = read(fd, bouncebuf, secsz);
630 			if ((unsigned)res != secsz) {
631 				ret = EIO;
632 				goto error;
633 			}
634 			memcpy(bouncebuf, outbuf, bytes);
635 			(void) lseek(fd, -secsz, SEEK_CUR);
636 			res = write(fd, bouncebuf, secsz);
637 			if ((unsigned)res != secsz) {
638 				ret = EIO;
639 				goto error;
640 			}
641 		} else {
642 			res = write(fd, outbuf, full_sec_size);
643 			if ((unsigned)res != full_sec_size) {
644 				ret = EIO;
645 				goto error;
646 			}
647 			outbuf += full_sec_size;
648 		}
649 	}
650 
651 	/* Partial data write to last sector */
652 	if (do_tail_write) {
653 		res = read(fd, bouncebuf, secsz);
654 		if ((unsigned)res != secsz) {
655 			ret = EIO;
656 			goto error;
657 		}
658 		memcpy(bouncebuf, outbuf, secsz - tail);
659 		(void) lseek(fd, -secsz, SEEK_CUR);
660 		res = write(fd, bouncebuf, secsz);
661 		if ((unsigned)res != secsz) {
662 			ret = EIO;
663 			goto error;
664 		}
665 	}
666 
667 	ret = 0;
668 error:
669 	free(bouncebuf);
670 	return (ret);
671 }
672 
673 static int
674 zfs_dev_init(void)
675 {
676 	spa_t *spa;
677 	spa_t *next;
678 	spa_t *prev;
679 
680 	zfs_init();
681 	if (archsw.arch_zfs_probe == NULL)
682 		return (ENXIO);
683 	archsw.arch_zfs_probe();
684 
685 	prev = NULL;
686 	spa = STAILQ_FIRST(&zfs_pools);
687 	while (spa != NULL) {
688 		next = STAILQ_NEXT(spa, spa_link);
689 		if (zfs_spa_init(spa)) {
690 			if (prev == NULL)
691 				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
692 			else
693 				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
694 		} else
695 			prev = spa;
696 		spa = next;
697 	}
698 	return (0);
699 }
700 
701 struct zfs_probe_args {
702 	int		fd;
703 	const char	*devname;
704 	uint64_t	*pool_guid;
705 	u_int		secsz;
706 };
707 
708 static int
709 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
710 {
711 	struct zfs_probe_args *ppa;
712 
713 	ppa = (struct zfs_probe_args *)arg;
714 	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
715 	    offset * ppa->secsz, buf, blocks * ppa->secsz));
716 }
717 
718 static int
719 zfs_probe(int fd, uint64_t *pool_guid)
720 {
721 	spa_t *spa;
722 	int ret;
723 
724 	spa = NULL;
725 	ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa);
726 	if (ret == 0 && pool_guid != NULL)
727 		if (*pool_guid == 0)
728 			*pool_guid = spa->spa_guid;
729 	return (ret);
730 }
731 
732 static int
733 zfs_probe_partition(void *arg, const char *partname,
734     const struct ptable_entry *part)
735 {
736 	struct zfs_probe_args *ppa, pa;
737 	struct ptable *table;
738 	char devname[32];
739 	int ret;
740 
741 	/* Probe only freebsd-zfs and freebsd partitions */
742 	if (part->type != PART_FREEBSD &&
743 	    part->type != PART_FREEBSD_ZFS)
744 		return (0);
745 
746 	ppa = (struct zfs_probe_args *)arg;
747 	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
748 	devname[strlen(ppa->devname) - 1] = '\0';
749 	snprintf(devname, sizeof(devname), "%s%s:", devname, partname);
750 	pa.fd = open(devname, O_RDWR);
751 	if (pa.fd == -1)
752 		return (0);
753 	ret = zfs_probe(pa.fd, ppa->pool_guid);
754 	if (ret == 0)
755 		return (0);
756 	/* Do we have BSD label here? */
757 	if (part->type == PART_FREEBSD) {
758 		pa.devname = devname;
759 		pa.pool_guid = ppa->pool_guid;
760 		pa.secsz = ppa->secsz;
761 		table = ptable_open(&pa, part->end - part->start + 1,
762 		    ppa->secsz, zfs_diskread);
763 		if (table != NULL) {
764 			ptable_iterate(table, &pa, zfs_probe_partition);
765 			ptable_close(table);
766 		}
767 	}
768 	close(pa.fd);
769 	return (0);
770 }
771 
772 /*
773  * Return bootenv nvlist from pool label.
774  */
775 int
776 zfs_get_bootenv(void *vdev, nvlist_t **benvp)
777 {
778 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
779 	nvlist_t *benv = NULL;
780 	vdev_t *vd;
781 	spa_t *spa;
782 
783 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
784 		return (ENOTSUP);
785 
786 	if ((spa = spa_find_by_dev(dev)) == NULL)
787 		return (ENXIO);
788 
789 	if (spa->spa_bootenv == NULL) {
790 		STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children,
791 		    v_childlink) {
792 			benv = vdev_read_bootenv(vd);
793 
794 			if (benv != NULL)
795 				break;
796 		}
797 		spa->spa_bootenv = benv;
798 	} else {
799 		benv = spa->spa_bootenv;
800 	}
801 
802 	if (benv == NULL)
803 		return (ENOENT);
804 
805 	*benvp = benv;
806 	return (0);
807 }
808 
809 /*
810  * Store nvlist to pool label bootenv area. Also updates cached pointer in spa.
811  */
812 int
813 zfs_set_bootenv(void *vdev, nvlist_t *benv)
814 {
815 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
816 	spa_t *spa;
817 	vdev_t *vd;
818 
819 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
820 		return (ENOTSUP);
821 
822 	if ((spa = spa_find_by_dev(dev)) == NULL)
823 		return (ENXIO);
824 
825 	STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) {
826 		vdev_write_bootenv(vd, benv);
827 	}
828 
829 	spa->spa_bootenv = benv;
830 	return (0);
831 }
832 
833 /*
834  * Get bootonce value by key. The bootonce <key, value> pair is removed
835  * from the bootenv nvlist and the remaining nvlist is committed back to disk.
836  */
837 int
838 zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size)
839 {
840 	nvlist_t *benv;
841 	char *result = NULL;
842 	int result_size, rv;
843 
844 	if ((rv = zfs_get_bootenv(vdev, &benv)) != 0)
845 		return (rv);
846 
847 	if ((rv = nvlist_find(benv, key, DATA_TYPE_STRING, NULL,
848 	    &result, &result_size)) == 0) {
849 		if (result_size == 0) {
850 			/* ignore empty string */
851 			rv = ENOENT;
852 		} else {
853 			size = MIN((size_t)result_size + 1, size);
854 			strlcpy(buf, result, size);
855 		}
856 		(void) nvlist_remove(benv, key, DATA_TYPE_STRING);
857 		(void) zfs_set_bootenv(vdev, benv);
858 	}
859 
860 	return (rv);
861 }
862 
863 /*
864  * nvstore backend.
865  */
866 
867 static int zfs_nvstore_setter(void *, int, const char *,
868     const void *, size_t);
869 static int zfs_nvstore_setter_str(void *, const char *, const char *,
870     const char *);
871 static int zfs_nvstore_unset_impl(void *, const char *, bool);
872 static int zfs_nvstore_setenv(void *, void *);
873 
874 /*
875  * nvstore is only present for current rootfs pool.
876  */
877 static int
878 zfs_nvstore_sethook(struct env_var *ev, int flags __unused, const void *value)
879 {
880 	struct zfs_devdesc *dev;
881 	int rv;
882 
883 	archsw.arch_getdev((void **)&dev, NULL, NULL);
884 	if (dev == NULL)
885 		return (ENXIO);
886 
887 	rv = zfs_nvstore_setter_str(dev, NULL, ev->ev_name, value);
888 
889 	free(dev);
890 	return (rv);
891 }
892 
893 /*
894  * nvstore is only present for current rootfs pool.
895  */
896 static int
897 zfs_nvstore_unsethook(struct env_var *ev)
898 {
899 	struct zfs_devdesc *dev;
900 	int rv;
901 
902 	archsw.arch_getdev((void **)&dev, NULL, NULL);
903 	if (dev == NULL)
904 		return (ENXIO);
905 
906 	rv = zfs_nvstore_unset_impl(dev, ev->ev_name, false);
907 
908 	free(dev);
909 	return (rv);
910 }
911 
912 static int
913 zfs_nvstore_getter(void *vdev, const char *name, void **data)
914 {
915 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
916 	spa_t *spa;
917 	nvlist_t *nv;
918 	char *str, **ptr;
919 	int size;
920 	int rv;
921 
922 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
923 		return (ENOTSUP);
924 
925 	if ((spa = spa_find_by_dev(dev)) == NULL)
926 		return (ENXIO);
927 
928 	if (spa->spa_bootenv == NULL)
929 		return (ENXIO);
930 
931 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
932 	    NULL, &nv, NULL) != 0)
933 		return (ENOENT);
934 
935 	rv = nvlist_find(nv, name, DATA_TYPE_STRING, NULL, &str, &size);
936 	if (rv == 0) {
937 		ptr = (char **)data;
938 		asprintf(ptr, "%.*s", size, str);
939 		if (*data == NULL)
940 			rv = ENOMEM;
941 	}
942 	nvlist_destroy(nv);
943 	return (rv);
944 }
945 
946 static int
947 zfs_nvstore_setter(void *vdev, int type, const char *name,
948     const void *data, size_t size)
949 {
950 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
951 	spa_t *spa;
952 	nvlist_t *nv;
953 	int rv;
954 	bool env_set = true;
955 
956 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
957 		return (ENOTSUP);
958 
959 	if ((spa = spa_find_by_dev(dev)) == NULL)
960 		return (ENXIO);
961 
962 	if (spa->spa_bootenv == NULL)
963 		return (ENXIO);
964 
965 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
966 	    NULL, &nv, NULL) != 0) {
967 		nv = nvlist_create(NV_UNIQUE_NAME);
968 		if (nv == NULL)
969 			return (ENOMEM);
970 	}
971 
972 	rv = 0;
973 	switch (type) {
974         case DATA_TYPE_INT8:
975 		if (size != sizeof (int8_t)) {
976 			rv = EINVAL;
977 			break;
978 		}
979 		rv = nvlist_add_int8(nv, name, *(int8_t *)data);
980 		break;
981 
982         case DATA_TYPE_INT16:
983 		if (size != sizeof (int16_t)) {
984 			rv = EINVAL;
985 			break;
986 		}
987 		rv = nvlist_add_int16(nv, name, *(int16_t *)data);
988 		break;
989 
990         case DATA_TYPE_INT32:
991 		if (size != sizeof (int32_t)) {
992 			rv = EINVAL;
993 			break;
994 		}
995 		rv = nvlist_add_int32(nv, name, *(int32_t *)data);
996 		break;
997 
998         case DATA_TYPE_INT64:
999 		if (size != sizeof (int64_t)) {
1000 			rv = EINVAL;
1001 			break;
1002 		}
1003 		rv = nvlist_add_int64(nv, name, *(int64_t *)data);
1004 		break;
1005 
1006         case DATA_TYPE_BYTE:
1007 		if (size != sizeof (uint8_t)) {
1008 			rv = EINVAL;
1009 			break;
1010 		}
1011 		rv = nvlist_add_byte(nv, name, *(int8_t *)data);
1012 		break;
1013 
1014         case DATA_TYPE_UINT8:
1015 		if (size != sizeof (uint8_t)) {
1016 			rv = EINVAL;
1017 			break;
1018 		}
1019 		rv = nvlist_add_uint8(nv, name, *(int8_t *)data);
1020 		break;
1021 
1022         case DATA_TYPE_UINT16:
1023 		if (size != sizeof (uint16_t)) {
1024 			rv = EINVAL;
1025 			break;
1026 		}
1027 		rv = nvlist_add_uint16(nv, name, *(uint16_t *)data);
1028 		break;
1029 
1030         case DATA_TYPE_UINT32:
1031 		if (size != sizeof (uint32_t)) {
1032 			rv = EINVAL;
1033 			break;
1034 		}
1035 		rv = nvlist_add_uint32(nv, name, *(uint32_t *)data);
1036 		break;
1037 
1038         case DATA_TYPE_UINT64:
1039 		if (size != sizeof (uint64_t)) {
1040 			rv = EINVAL;
1041 			break;
1042 		}
1043 		rv = nvlist_add_uint64(nv, name, *(uint64_t *)data);
1044 		break;
1045 
1046         case DATA_TYPE_STRING:
1047 		rv = nvlist_add_string(nv, name, data);
1048 		break;
1049 
1050 	case DATA_TYPE_BOOLEAN_VALUE:
1051 		if (size != sizeof (boolean_t)) {
1052 			rv = EINVAL;
1053 			break;
1054 		}
1055 		rv = nvlist_add_boolean_value(nv, name, *(boolean_t *)data);
1056 		break;
1057 
1058 	default:
1059 		rv = EINVAL;
1060 		break;
1061 	}
1062 
1063 	if (rv == 0) {
1064 		rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv);
1065 		if (rv == 0) {
1066 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1067 		}
1068 		if (rv == 0) {
1069 			if (env_set) {
1070 				rv = zfs_nvstore_setenv(vdev,
1071 				    nvpair_find(nv, name));
1072 			} else {
1073 				env_discard(env_getenv(name));
1074 				rv = 0;
1075 			}
1076 		}
1077 	}
1078 
1079 	nvlist_destroy(nv);
1080 	return (rv);
1081 }
1082 
1083 static int
1084 get_int64(const char *data, int64_t *ip)
1085 {
1086 	char *end;
1087 	int64_t val;
1088 
1089 	errno = 0;
1090 	val = strtoll(data, &end, 0);
1091 	if (errno != 0 || *data == '\0' || *end != '\0')
1092 		return (EINVAL);
1093 
1094 	*ip = val;
1095 	return (0);
1096 }
1097 
1098 static int
1099 get_uint64(const char *data, uint64_t *ip)
1100 {
1101 	char *end;
1102 	uint64_t val;
1103 
1104 	errno = 0;
1105 	val = strtoull(data, &end, 0);
1106 	if (errno != 0 || *data == '\0' || *end != '\0')
1107 		return (EINVAL);
1108 
1109 	*ip = val;
1110 	return (0);
1111 }
1112 
1113 /*
1114  * Translate textual data to data type. If type is not set, and we are
1115  * creating new pair, use DATA_TYPE_STRING.
1116  */
1117 static int
1118 zfs_nvstore_setter_str(void *vdev, const char *type, const char *name,
1119     const char *data)
1120 {
1121 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1122 	spa_t *spa;
1123 	nvlist_t *nv;
1124 	int rv;
1125 	data_type_t dt;
1126 	int64_t val;
1127 	uint64_t uval;
1128 
1129 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1130 		return (ENOTSUP);
1131 
1132 	if ((spa = spa_find_by_dev(dev)) == NULL)
1133 		return (ENXIO);
1134 
1135 	if (spa->spa_bootenv == NULL)
1136 		return (ENXIO);
1137 
1138 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1139 	    NULL, &nv, NULL) != 0) {
1140 		nv = NULL;
1141 	}
1142 
1143 	if (type == NULL) {
1144 		nvp_header_t *nvh;
1145 
1146 		/*
1147 		 * if there is no existing pair, default to string.
1148 		 * Otherwise, use type from existing pair.
1149 		 */
1150 		nvh = nvpair_find(nv, name);
1151 		if (nvh == NULL) {
1152 			dt = DATA_TYPE_STRING;
1153 		} else {
1154 			nv_string_t *nvp_name;
1155 			nv_pair_data_t *nvp_data;
1156 
1157 			nvp_name = (nv_string_t *)(nvh + 1);
1158 			nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1159 			    NV_ALIGN4(nvp_name->nv_size));
1160 			dt = nvp_data->nv_type;
1161 		}
1162 	} else {
1163 		dt = nvpair_type_from_name(type);
1164 	}
1165 	nvlist_destroy(nv);
1166 
1167 	rv = 0;
1168 	switch (dt) {
1169         case DATA_TYPE_INT8:
1170 		rv = get_int64(data, &val);
1171 		if (rv == 0) {
1172 			int8_t v = val;
1173 
1174 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1175 		}
1176 		break;
1177         case DATA_TYPE_INT16:
1178 		rv = get_int64(data, &val);
1179 		if (rv == 0) {
1180 			int16_t v = val;
1181 
1182 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1183 		}
1184 		break;
1185         case DATA_TYPE_INT32:
1186 		rv = get_int64(data, &val);
1187 		if (rv == 0) {
1188 			int32_t v = val;
1189 
1190 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1191 		}
1192 		break;
1193         case DATA_TYPE_INT64:
1194 		rv = get_int64(data, &val);
1195 		if (rv == 0) {
1196 			rv = zfs_nvstore_setter(vdev, dt, name, &val,
1197 			    sizeof (val));
1198 		}
1199 		break;
1200 
1201         case DATA_TYPE_BYTE:
1202 		rv = get_uint64(data, &uval);
1203 		if (rv == 0) {
1204 			uint8_t v = uval;
1205 
1206 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1207 		}
1208 		break;
1209 
1210         case DATA_TYPE_UINT8:
1211 		rv = get_uint64(data, &uval);
1212 		if (rv == 0) {
1213 			uint8_t v = uval;
1214 
1215 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1216 		}
1217 		break;
1218 
1219         case DATA_TYPE_UINT16:
1220 		rv = get_uint64(data, &uval);
1221 		if (rv == 0) {
1222 			uint16_t v = uval;
1223 
1224 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1225 		}
1226 		break;
1227 
1228         case DATA_TYPE_UINT32:
1229 		rv = get_uint64(data, &uval);
1230 		if (rv == 0) {
1231 			uint32_t v = uval;
1232 
1233 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1234 		}
1235 		break;
1236 
1237         case DATA_TYPE_UINT64:
1238 		rv = get_uint64(data, &uval);
1239 		if (rv == 0) {
1240 			rv = zfs_nvstore_setter(vdev, dt, name, &uval,
1241 			    sizeof (uval));
1242 		}
1243 		break;
1244 
1245         case DATA_TYPE_STRING:
1246 		rv = zfs_nvstore_setter(vdev, dt, name, data, strlen(data) + 1);
1247 		break;
1248 
1249 	case DATA_TYPE_BOOLEAN_VALUE:
1250 		rv = get_int64(data, &val);
1251 		if (rv == 0) {
1252 			boolean_t v = val;
1253 
1254 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1255 		}
1256 
1257 	default:
1258 		rv = EINVAL;
1259 	}
1260 	return (rv);
1261 }
1262 
1263 static int
1264 zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env)
1265 {
1266 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1267 	spa_t *spa;
1268 	nvlist_t *nv;
1269 	int rv;
1270 
1271 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1272 		return (ENOTSUP);
1273 
1274 	if ((spa = spa_find_by_dev(dev)) == NULL)
1275 		return (ENXIO);
1276 
1277 	if (spa->spa_bootenv == NULL)
1278 		return (ENXIO);
1279 
1280 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1281 	    NULL, &nv, NULL) != 0)
1282 		return (ENOENT);
1283 
1284 	rv = nvlist_remove(nv, name, DATA_TYPE_UNKNOWN);
1285 	if (rv == 0) {
1286 		if (nvlist_next_nvpair(nv, NULL) == NULL) {
1287 			rv = nvlist_remove(spa->spa_bootenv, OS_NVSTORE,
1288 			    DATA_TYPE_NVLIST);
1289 		} else {
1290 			rv = nvlist_add_nvlist(spa->spa_bootenv,
1291 			    OS_NVSTORE, nv);
1292 		}
1293 		if (rv == 0)
1294 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1295 	}
1296 
1297 	if (unset_env)
1298 		env_discard(env_getenv(name));
1299 	return (rv);
1300 }
1301 
1302 static int
1303 zfs_nvstore_unset(void *vdev, const char *name)
1304 {
1305 	return (zfs_nvstore_unset_impl(vdev, name, true));
1306 }
1307 
1308 static int
1309 zfs_nvstore_print(void *vdev __unused, void *ptr)
1310 {
1311 
1312 	nvpair_print(ptr, 0);
1313 	return (0);
1314 }
1315 
1316 /*
1317  * Create environment variable from nvpair.
1318  * set hook will update nvstore with new value, unset hook will remove
1319  * variable from nvstore.
1320  */
1321 static int
1322 zfs_nvstore_setenv(void *vdev __unused, void *ptr)
1323 {
1324 	nvp_header_t *nvh = ptr;
1325 	nv_string_t *nvp_name, *nvp_value;
1326 	nv_pair_data_t *nvp_data;
1327 	char *name, *value;
1328 	int rv = 0;
1329 
1330 	if (nvh == NULL)
1331 		return (ENOENT);
1332 
1333 	nvp_name = (nv_string_t *)(nvh + 1);
1334 	nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1335 	    NV_ALIGN4(nvp_name->nv_size));
1336 
1337 	if ((name = nvstring_get(nvp_name)) == NULL)
1338 		return (ENOMEM);
1339 
1340 	value = NULL;
1341 	switch (nvp_data->nv_type) {
1342 	case DATA_TYPE_BYTE:
1343 	case DATA_TYPE_UINT8:
1344 		(void) asprintf(&value, "%uc",
1345 		    *(unsigned *)&nvp_data->nv_data[0]);
1346 		if (value == NULL)
1347 			rv = ENOMEM;
1348 		break;
1349 
1350 	case DATA_TYPE_INT8:
1351 		(void) asprintf(&value, "%c", *(int *)&nvp_data->nv_data[0]);
1352 		if (value == NULL)
1353 			rv = ENOMEM;
1354 		break;
1355 
1356 	case DATA_TYPE_INT16:
1357 		(void) asprintf(&value, "%hd", *(short *)&nvp_data->nv_data[0]);
1358 		if (value == NULL)
1359 			rv = ENOMEM;
1360 		break;
1361 
1362 	case DATA_TYPE_UINT16:
1363 		(void) asprintf(&value, "%hu",
1364 		    *(unsigned short *)&nvp_data->nv_data[0]);
1365 		if (value == NULL)
1366 			rv = ENOMEM;
1367 		break;
1368 
1369 	case DATA_TYPE_BOOLEAN_VALUE:
1370 	case DATA_TYPE_INT32:
1371 		(void) asprintf(&value, "%d", *(int *)&nvp_data->nv_data[0]);
1372 		if (value == NULL)
1373 			rv = ENOMEM;
1374 		break;
1375 
1376 	case DATA_TYPE_UINT32:
1377 		(void) asprintf(&value, "%u",
1378 		    *(unsigned *)&nvp_data->nv_data[0]);
1379 		if (value == NULL)
1380 			rv = ENOMEM;
1381 		break;
1382 
1383 	case DATA_TYPE_INT64:
1384 		(void) asprintf(&value, "%jd",
1385 		    (intmax_t)*(int64_t *)&nvp_data->nv_data[0]);
1386 		if (value == NULL)
1387 			rv = ENOMEM;
1388 		break;
1389 
1390 	case DATA_TYPE_UINT64:
1391 		(void) asprintf(&value, "%ju",
1392 		    (uintmax_t)*(uint64_t *)&nvp_data->nv_data[0]);
1393 		if (value == NULL)
1394 			rv = ENOMEM;
1395 		break;
1396 
1397 	case DATA_TYPE_STRING:
1398 		nvp_value = (nv_string_t *)&nvp_data->nv_data[0];
1399 		if ((value = nvstring_get(nvp_value)) == NULL) {
1400 			rv = ENOMEM;
1401 			break;
1402 		}
1403 		break;
1404 
1405 	default:
1406 		rv = EINVAL;
1407 		break;
1408 	}
1409 
1410 	if (value != NULL) {
1411 		rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, value,
1412 		    zfs_nvstore_sethook, zfs_nvstore_unsethook);
1413 		free(value);
1414 	}
1415 	free(name);
1416 	return (rv);
1417 }
1418 
1419 static int
1420 zfs_nvstore_iterate(void *vdev, int (*cb)(void *, void *))
1421 {
1422 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1423 	spa_t *spa;
1424 	nvlist_t *nv;
1425 	nvp_header_t *nvh;
1426 	int rv;
1427 
1428 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1429 		return (ENOTSUP);
1430 
1431 	if ((spa = spa_find_by_dev(dev)) == NULL)
1432 		return (ENXIO);
1433 
1434 	if (spa->spa_bootenv == NULL)
1435 		return (ENXIO);
1436 
1437 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1438 	    NULL, &nv, NULL) != 0)
1439 		return (ENOENT);
1440 
1441 	rv = 0;
1442 	nvh = NULL;
1443 	while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) {
1444 		rv = cb(vdev, nvh);
1445 		if (rv != 0)
1446 			break;
1447 	}
1448 	return (rv);
1449 }
1450 
1451 nvs_callbacks_t nvstore_zfs_cb = {
1452 	.nvs_getter = zfs_nvstore_getter,
1453 	.nvs_setter = zfs_nvstore_setter,
1454 	.nvs_setter_str = zfs_nvstore_setter_str,
1455 	.nvs_unset = zfs_nvstore_unset,
1456 	.nvs_print = zfs_nvstore_print,
1457 	.nvs_iterate = zfs_nvstore_iterate
1458 };
1459 
1460 int
1461 zfs_attach_nvstore(void *vdev)
1462 {
1463 	struct zfs_devdesc *dev = vdev;
1464 	spa_t *spa;
1465 	uint64_t version;
1466 	int rv;
1467 
1468 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1469 		return (ENOTSUP);
1470 
1471 	if ((spa = spa_find_by_dev(dev)) == NULL)
1472 		return (ENXIO);
1473 
1474 	rv = nvlist_find(spa->spa_bootenv, BOOTENV_VERSION, DATA_TYPE_UINT64,
1475 	    NULL, &version, NULL);
1476 
1477 	if (rv != 0 || version != VB_NVLIST) {
1478 		return (ENXIO);
1479 	}
1480 
1481 	dev = malloc(sizeof (*dev));
1482 	if (dev == NULL)
1483 		return (ENOMEM);
1484 	memcpy(dev, vdev, sizeof (*dev));
1485 
1486 	rv = nvstore_init(spa->spa_name, &nvstore_zfs_cb, dev);
1487 	if (rv != 0)
1488 		free(dev);
1489 	else
1490 		rv = zfs_nvstore_iterate(dev, zfs_nvstore_setenv);
1491 	return (rv);
1492 }
1493 
1494 int
1495 zfs_probe_dev(const char *devname, uint64_t *pool_guid)
1496 {
1497 	struct ptable *table;
1498 	struct zfs_probe_args pa;
1499 	uint64_t mediasz;
1500 	int ret;
1501 
1502 	if (pool_guid)
1503 		*pool_guid = 0;
1504 	pa.fd = open(devname, O_RDWR);
1505 	if (pa.fd == -1)
1506 		return (ENXIO);
1507 	/* Probe the whole disk */
1508 	ret = zfs_probe(pa.fd, pool_guid);
1509 	if (ret == 0)
1510 		return (0);
1511 
1512 	/* Probe each partition */
1513 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
1514 	if (ret == 0)
1515 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
1516 	if (ret == 0) {
1517 		pa.devname = devname;
1518 		pa.pool_guid = pool_guid;
1519 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
1520 		    zfs_diskread);
1521 		if (table != NULL) {
1522 			ptable_iterate(table, &pa, zfs_probe_partition);
1523 			ptable_close(table);
1524 		}
1525 	}
1526 	close(pa.fd);
1527 	if (pool_guid && *pool_guid == 0)
1528 		ret = ENXIO;
1529 	return (ret);
1530 }
1531 
1532 /*
1533  * Print information about ZFS pools
1534  */
1535 static int
1536 zfs_dev_print(int verbose)
1537 {
1538 	spa_t *spa;
1539 	char line[80];
1540 	int ret = 0;
1541 
1542 	if (STAILQ_EMPTY(&zfs_pools))
1543 		return (0);
1544 
1545 	printf("%s devices:", zfs_dev.dv_name);
1546 	if ((ret = pager_output("\n")) != 0)
1547 		return (ret);
1548 
1549 	if (verbose) {
1550 		return (spa_all_status());
1551 	}
1552 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
1553 		snprintf(line, sizeof(line), "    zfs:%s\n", spa->spa_name);
1554 		ret = pager_output(line);
1555 		if (ret != 0)
1556 			break;
1557 	}
1558 	return (ret);
1559 }
1560 
1561 /*
1562  * Attempt to open the pool described by (dev) for use by (f).
1563  */
1564 static int
1565 zfs_dev_open(struct open_file *f, ...)
1566 {
1567 	va_list		args;
1568 	struct zfs_devdesc	*dev;
1569 	struct zfsmount	*mount;
1570 	spa_t		*spa;
1571 	int		rv;
1572 
1573 	va_start(args, f);
1574 	dev = va_arg(args, struct zfs_devdesc *);
1575 	va_end(args);
1576 
1577 	if ((spa = spa_find_by_dev(dev)) == NULL)
1578 		return (ENXIO);
1579 
1580 	STAILQ_FOREACH(mount, &zfsmount, next) {
1581 		if (spa->spa_guid == mount->spa->spa_guid)
1582 			break;
1583 	}
1584 
1585 	rv = 0;
1586 	/* This device is not set as currdev, mount us private copy. */
1587 	if (mount == NULL)
1588 		rv = zfs_mount(devformat(&dev->dd), NULL, (void **)&mount);
1589 
1590 	if (rv == 0) {
1591 		dev->dd.d_opendata = mount;
1592 	}
1593 	return (rv);
1594 }
1595 
1596 static int
1597 zfs_dev_close(struct open_file *f)
1598 {
1599 	struct devdesc *dev;
1600 	struct zfsmount	*mnt, *mount;
1601 
1602 	dev = f->f_devdata;
1603 	mnt = dev->d_opendata;
1604 
1605 	STAILQ_FOREACH(mount, &zfsmount, next) {
1606 		if (mnt->spa->spa_guid == mount->spa->spa_guid)
1607 			break;
1608 	}
1609 
1610 	/* XXX */
1611 	return (0);
1612 }
1613 
1614 static int
1615 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
1616 {
1617 
1618 	return (ENOSYS);
1619 }
1620 
1621 struct devsw zfs_dev = {
1622 	.dv_name = "zfs",
1623 	.dv_type = DEVT_ZFS,
1624 	.dv_init = zfs_dev_init,
1625 	.dv_strategy = zfs_dev_strategy,
1626 	.dv_open = zfs_dev_open,
1627 	.dv_close = zfs_dev_close,
1628 	.dv_ioctl = noioctl,
1629 	.dv_print = zfs_dev_print,
1630 	.dv_cleanup = nullsys,
1631 	.dv_fmtdev = zfs_fmtdev,
1632 	.dv_parsedev = zfs_parsedev,
1633 };
1634 
1635 static int
1636 zfs_parsedev(struct devdesc **idev, const char *devspec, const char **path)
1637 {
1638 	static char	rootname[ZFS_MAXNAMELEN];
1639 	static char	poolname[ZFS_MAXNAMELEN];
1640 	spa_t		*spa;
1641 	const char	*end;
1642 	const char	*np;
1643 	const char	*sep;
1644 	int		rv;
1645 	struct zfs_devdesc *dev;
1646 
1647 	np = devspec + 3;			/* Skip the leading 'zfs' */
1648 	if (*np != ':')
1649 		return (EINVAL);
1650 	np++;
1651 	end = strrchr(np, ':');
1652 	if (end == NULL)
1653 		return (EINVAL);
1654 	sep = strchr(np, '/');
1655 	if (sep == NULL || sep >= end)
1656 		sep = end;
1657 	memcpy(poolname, np, sep - np);
1658 	poolname[sep - np] = '\0';
1659 	if (sep < end) {
1660 		sep++;
1661 		memcpy(rootname, sep, end - sep);
1662 		rootname[end - sep] = '\0';
1663 	}
1664 	else
1665 		rootname[0] = '\0';
1666 
1667 	spa = spa_find_by_name(poolname);
1668 	if (!spa)
1669 		return (ENXIO);
1670 	dev = malloc(sizeof(*dev));
1671 	if (dev == NULL)
1672 		return (ENOMEM);
1673 	dev->pool_guid = spa->spa_guid;
1674 	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
1675 	if (rv != 0) {
1676 		free(dev);
1677 		return (rv);
1678 	}
1679 	if (path != NULL)
1680 		*path = (*end == '\0') ? end : end + 1;
1681 	dev->dd.d_dev = &zfs_dev;
1682 	*idev = &dev->dd;
1683 	return (0);
1684 }
1685 
1686 char *
1687 zfs_fmtdev(struct devdesc *vdev)
1688 {
1689 	static char		rootname[ZFS_MAXNAMELEN];
1690 	static char		buf[2 * ZFS_MAXNAMELEN + 8];
1691 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
1692 	spa_t			*spa;
1693 
1694 	buf[0] = '\0';
1695 	if (vdev->d_dev->dv_type != DEVT_ZFS)
1696 		return (buf);
1697 
1698 	/* Do we have any pools? */
1699 	spa = STAILQ_FIRST(&zfs_pools);
1700 	if (spa == NULL)
1701 		return (buf);
1702 
1703 	if (dev->pool_guid == 0)
1704 		dev->pool_guid = spa->spa_guid;
1705 	else
1706 		spa = spa_find_by_guid(dev->pool_guid);
1707 
1708 	if (spa == NULL) {
1709 		printf("ZFS: can't find pool by guid\n");
1710 		return (buf);
1711 	}
1712 	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
1713 		printf("ZFS: can't find root filesystem\n");
1714 		return (buf);
1715 	}
1716 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
1717 		printf("ZFS: can't find filesystem by guid\n");
1718 		return (buf);
1719 	}
1720 
1721 	if (rootname[0] == '\0')
1722 		snprintf(buf, sizeof(buf), "%s:%s:", dev->dd.d_dev->dv_name,
1723 		    spa->spa_name);
1724 	else
1725 		snprintf(buf, sizeof(buf), "%s:%s/%s:", dev->dd.d_dev->dv_name,
1726 		    spa->spa_name, rootname);
1727 	return (buf);
1728 }
1729 
1730 static int
1731 split_devname(const char *name, char *poolname, size_t size,
1732     const char **dsnamep)
1733 {
1734 	const char *dsname;
1735 	size_t len;
1736 
1737 	ASSERT(name != NULL);
1738 	ASSERT(poolname != NULL);
1739 
1740 	len = strlen(name);
1741 	dsname = strchr(name, '/');
1742 	if (dsname != NULL) {
1743 		len = dsname - name;
1744 		dsname++;
1745 	} else
1746 		dsname = "";
1747 
1748 	if (len + 1 > size)
1749 		return (EINVAL);
1750 
1751 	strlcpy(poolname, name, len + 1);
1752 
1753 	if (dsnamep != NULL)
1754 		*dsnamep = dsname;
1755 
1756 	return (0);
1757 }
1758 
1759 int
1760 zfs_list(const char *name)
1761 {
1762 	static char	poolname[ZFS_MAXNAMELEN];
1763 	uint64_t	objid;
1764 	spa_t		*spa;
1765 	const char	*dsname;
1766 	int		rv;
1767 
1768 	if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1769 		return (EINVAL);
1770 
1771 	spa = spa_find_by_name(poolname);
1772 	if (!spa)
1773 		return (ENXIO);
1774 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1775 	if (rv != 0)
1776 		return (rv);
1777 
1778 	return (zfs_list_dataset(spa, objid));
1779 }
1780 
1781 void
1782 init_zfs_boot_options(const char *currdev_in)
1783 {
1784 	char poolname[ZFS_MAXNAMELEN];
1785 	char *beroot, *currdev;
1786 	spa_t *spa;
1787 	int currdev_len;
1788 	const char *dsname;
1789 
1790 	currdev = NULL;
1791 	currdev_len = strlen(currdev_in);
1792 	if (currdev_len == 0)
1793 		return;
1794 	if (strncmp(currdev_in, "zfs:", 4) != 0)
1795 		return;
1796 	currdev = strdup(currdev_in);
1797 	if (currdev == NULL)
1798 		return;
1799 	/* Remove the trailing : */
1800 	currdev[currdev_len - 1] = '\0';
1801 
1802 	setenv("zfs_be_active", currdev, 1);
1803 	setenv("zfs_be_currpage", "1", 1);
1804 	/* Remove the last element (current bootenv) */
1805 	beroot = strrchr(currdev, '/');
1806 	if (beroot != NULL)
1807 		beroot[0] = '\0';
1808 	beroot = strchr(currdev, ':') + 1;
1809 	setenv("zfs_be_root", beroot, 1);
1810 
1811 	if (split_devname(beroot, poolname, sizeof(poolname), &dsname) != 0)
1812 		return;
1813 
1814 	spa = spa_find_by_name(poolname);
1815 	if (spa == NULL)
1816 		return;
1817 
1818 	zfs_bootenv_initial("bootenvs", spa, beroot, dsname, 0);
1819 	zfs_checkpoints_initial(spa, beroot, dsname);
1820 
1821 	free(currdev);
1822 }
1823 
1824 static void
1825 zfs_checkpoints_initial(spa_t *spa, const char *name, const char *dsname)
1826 {
1827 	char envname[32];
1828 
1829 	if (spa->spa_uberblock_checkpoint.ub_checkpoint_txg != 0) {
1830 		snprintf(envname, sizeof(envname), "zpool_checkpoint");
1831 		setenv(envname, name, 1);
1832 
1833 		spa->spa_uberblock = &spa->spa_uberblock_checkpoint;
1834 		spa->spa_mos = &spa->spa_mos_checkpoint;
1835 
1836 		zfs_bootenv_initial("bootenvs_check", spa, name, dsname, 1);
1837 
1838 		spa->spa_uberblock = &spa->spa_uberblock_master;
1839 		spa->spa_mos = &spa->spa_mos_master;
1840 	}
1841 }
1842 
1843 static void
1844 zfs_bootenv_initial(const char *envprefix, spa_t *spa, const char *rootname,
1845    const char *dsname, int checkpoint)
1846 {
1847 	char		envname[32], envval[256];
1848 	uint64_t	objid;
1849 	int		bootenvs_idx, rv;
1850 
1851 	SLIST_INIT(&zfs_be_head);
1852 	zfs_env_count = 0;
1853 
1854 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1855 	if (rv != 0)
1856 		return;
1857 
1858 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1859 	bootenvs_idx = 0;
1860 	/* Populate the initial environment variables */
1861 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1862 		/* Enumerate all bootenvs for general usage */
1863 		snprintf(envname, sizeof(envname), "%s[%d]",
1864 		    envprefix, bootenvs_idx);
1865 		snprintf(envval, sizeof(envval), "zfs:%s%s/%s",
1866 		    checkpoint ? "!" : "", rootname, zfs_be->name);
1867 		rv = setenv(envname, envval, 1);
1868 		if (rv != 0)
1869 			break;
1870 		bootenvs_idx++;
1871 	}
1872 	snprintf(envname, sizeof(envname), "%s_count", envprefix);
1873 	snprintf(envval, sizeof(envval), "%d", bootenvs_idx);
1874 	setenv(envname, envval, 1);
1875 
1876 	/* Clean up the SLIST of ZFS BEs */
1877 	while (!SLIST_EMPTY(&zfs_be_head)) {
1878 		zfs_be = SLIST_FIRST(&zfs_be_head);
1879 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1880 		free(zfs_be->name);
1881 		free(zfs_be);
1882 	}
1883 }
1884 
1885 int
1886 zfs_bootenv(const char *name)
1887 {
1888 	char		poolname[ZFS_MAXNAMELEN], *root;
1889 	const char	*dsname;
1890 	char		becount[4];
1891 	uint64_t	objid;
1892 	spa_t		*spa;
1893 	int		rv, pages, perpage, currpage;
1894 
1895 	if (name == NULL)
1896 		return (EINVAL);
1897 	if ((root = getenv("zfs_be_root")) == NULL)
1898 		return (EINVAL);
1899 
1900 	if (strcmp(name, root) != 0) {
1901 		if (setenv("zfs_be_root", name, 1) != 0)
1902 			return (ENOMEM);
1903 	}
1904 
1905 	SLIST_INIT(&zfs_be_head);
1906 	zfs_env_count = 0;
1907 
1908 	if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1909 		return (EINVAL);
1910 
1911 	spa = spa_find_by_name(poolname);
1912 	if (!spa)
1913 		return (ENXIO);
1914 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1915 	if (rv != 0)
1916 		return (rv);
1917 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1918 
1919 	/* Calculate and store the number of pages of BEs */
1920 	perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
1921 	pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
1922 	snprintf(becount, 4, "%d", pages);
1923 	if (setenv("zfs_be_pages", becount, 1) != 0)
1924 		return (ENOMEM);
1925 
1926 	/* Roll over the page counter if it has exceeded the maximum */
1927 	currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
1928 	if (currpage > pages) {
1929 		if (setenv("zfs_be_currpage", "1", 1) != 0)
1930 			return (ENOMEM);
1931 	}
1932 
1933 	/* Populate the menu environment variables */
1934 	zfs_set_env();
1935 
1936 	/* Clean up the SLIST of ZFS BEs */
1937 	while (!SLIST_EMPTY(&zfs_be_head)) {
1938 		zfs_be = SLIST_FIRST(&zfs_be_head);
1939 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1940 		free(zfs_be->name);
1941 		free(zfs_be);
1942 	}
1943 
1944 	return (rv);
1945 }
1946 
1947 int
1948 zfs_belist_add(const char *name, uint64_t value __unused)
1949 {
1950 
1951 	/* Skip special datasets that start with a $ character */
1952 	if (strncmp(name, "$", 1) == 0) {
1953 		return (0);
1954 	}
1955 	/* Add the boot environment to the head of the SLIST */
1956 	zfs_be = malloc(sizeof(struct zfs_be_entry));
1957 	if (zfs_be == NULL) {
1958 		return (ENOMEM);
1959 	}
1960 	zfs_be->name = strdup(name);
1961 	if (zfs_be->name == NULL) {
1962 		free(zfs_be);
1963 		return (ENOMEM);
1964 	}
1965 	SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
1966 	zfs_env_count++;
1967 
1968 	return (0);
1969 }
1970 
1971 int
1972 zfs_set_env(void)
1973 {
1974 	char envname[32], envval[256];
1975 	char *beroot, *pagenum;
1976 	int rv, page, ctr;
1977 
1978 	beroot = getenv("zfs_be_root");
1979 	if (beroot == NULL) {
1980 		return (1);
1981 	}
1982 
1983 	pagenum = getenv("zfs_be_currpage");
1984 	if (pagenum != NULL) {
1985 		page = strtol(pagenum, NULL, 10);
1986 	} else {
1987 		page = 1;
1988 	}
1989 
1990 	ctr = 1;
1991 	rv = 0;
1992 	zfs_env_index = ZFS_BE_FIRST;
1993 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1994 		/* Skip to the requested page number */
1995 		if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
1996 			ctr++;
1997 			continue;
1998 		}
1999 
2000 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
2001 		snprintf(envval, sizeof(envval), "%s", zfs_be->name);
2002 		rv = setenv(envname, envval, 1);
2003 		if (rv != 0) {
2004 			break;
2005 		}
2006 
2007 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
2008 		rv = setenv(envname, envval, 1);
2009 		if (rv != 0){
2010 			break;
2011 		}
2012 
2013 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
2014 		rv = setenv(envname, "set_bootenv", 1);
2015 		if (rv != 0){
2016 			break;
2017 		}
2018 
2019 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
2020 		snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
2021 		rv = setenv(envname, envval, 1);
2022 		if (rv != 0){
2023 			break;
2024 		}
2025 
2026 		zfs_env_index++;
2027 		if (zfs_env_index > ZFS_BE_LAST) {
2028 			break;
2029 		}
2030 
2031 	}
2032 
2033 	for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
2034 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
2035 		(void)unsetenv(envname);
2036 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
2037 		(void)unsetenv(envname);
2038 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
2039 		(void)unsetenv(envname);
2040 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
2041 		(void)unsetenv(envname);
2042 	}
2043 
2044 	return (rv);
2045 }
2046