xref: /freebsd/stand/libsa/zfs/zfs.c (revision 71625ec9ad2a9bc8c09784fbd23b759830e0ee5f)
1 /*-
2  * Copyright (c) 2007 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 /*
31  *	Stand-alone file reading package.
32  */
33 
34 #include <stand.h>
35 #include <sys/disk.h>
36 #include <sys/param.h>
37 #include <sys/time.h>
38 #include <sys/queue.h>
39 #include <part.h>
40 #include <stddef.h>
41 #include <stdarg.h>
42 #include <string.h>
43 #include <bootstrap.h>
44 
45 #include "libzfs.h"
46 
47 #include "zfsimpl.c"
48 
49 /* Define the range of indexes to be populated with ZFS Boot Environments */
50 #define		ZFS_BE_FIRST	4
51 #define		ZFS_BE_LAST	8
52 
53 static int	zfs_open(const char *path, struct open_file *f);
54 static int	zfs_close(struct open_file *f);
55 static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
56 static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
57 static int	zfs_stat(struct open_file *f, struct stat *sb);
58 static int	zfs_readdir(struct open_file *f, struct dirent *d);
59 static int	zfs_mount(const char *dev, const char *path, void **data);
60 static int	zfs_unmount(const char *dev, void *data);
61 
62 static void	zfs_bootenv_initial(const char *envname, spa_t *spa,
63 		    const char *name, const char *dsname, int checkpoint);
64 static void	zfs_checkpoints_initial(spa_t *spa, const char *name,
65 		    const char *dsname);
66 
67 static int	zfs_parsedev(struct devdesc **idev, const char *devspec,
68 		    const char **path);
69 
70 struct devsw zfs_dev;
71 
72 struct fs_ops zfs_fsops = {
73 	.fs_name = "zfs",
74 	.fo_open = zfs_open,
75 	.fo_close = zfs_close,
76 	.fo_read = zfs_read,
77 	.fo_write = null_write,
78 	.fo_seek = zfs_seek,
79 	.fo_stat = zfs_stat,
80 	.fo_readdir = zfs_readdir,
81 	.fo_mount = zfs_mount,
82 	.fo_unmount = zfs_unmount
83 };
84 
85 /*
86  * In-core open file.
87  */
88 struct file {
89 	off_t		f_seekp;	/* seek pointer */
90 	dnode_phys_t	f_dnode;
91 	uint64_t	f_zap_type;	/* zap type for readdir */
92 	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
93 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
94 };
95 
96 static int	zfs_env_index;
97 static int	zfs_env_count;
98 
99 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
100 struct zfs_be_list *zfs_be_headp;
101 struct zfs_be_entry {
102 	char *name;
103 	SLIST_ENTRY(zfs_be_entry) entries;
104 } *zfs_be, *zfs_be_tmp;
105 
106 /*
107  * Open a file.
108  */
109 static int
110 zfs_open(const char *upath, struct open_file *f)
111 {
112 	struct devdesc *dev = f->f_devdata;
113 	struct zfsmount *mount = dev->d_opendata;
114 	struct file *fp;
115 	int rc;
116 
117 	if (f->f_dev != &zfs_dev)
118 		return (EINVAL);
119 
120 	/* allocate file system specific data structure */
121 	fp = calloc(1, sizeof(struct file));
122 	if (fp == NULL)
123 		return (ENOMEM);
124 	f->f_fsdata = fp;
125 
126 	rc = zfs_lookup(mount, upath, &fp->f_dnode);
127 	fp->f_seekp = 0;
128 	if (rc) {
129 		f->f_fsdata = NULL;
130 		free(fp);
131 	}
132 	return (rc);
133 }
134 
135 static int
136 zfs_close(struct open_file *f)
137 {
138 	struct file *fp = (struct file *)f->f_fsdata;
139 
140 	dnode_cache_obj = NULL;
141 	f->f_fsdata = NULL;
142 
143 	free(fp);
144 	return (0);
145 }
146 
147 /*
148  * Copy a portion of a file into kernel memory.
149  * Cross block boundaries when necessary.
150  */
151 static int
152 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
153 {
154 	struct devdesc *dev = f->f_devdata;
155 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
156 	struct file *fp = (struct file *)f->f_fsdata;
157 	struct stat sb;
158 	size_t n;
159 	int rc;
160 
161 	rc = zfs_stat(f, &sb);
162 	if (rc)
163 		return (rc);
164 	n = size;
165 	if (fp->f_seekp + n > sb.st_size)
166 		n = sb.st_size - fp->f_seekp;
167 
168 	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
169 	if (rc)
170 		return (rc);
171 
172 	if (0) {
173 	    int i;
174 	    for (i = 0; i < n; i++)
175 		putchar(((char*) start)[i]);
176 	}
177 	fp->f_seekp += n;
178 	if (resid)
179 		*resid = size - n;
180 
181 	return (0);
182 }
183 
184 static off_t
185 zfs_seek(struct open_file *f, off_t offset, int where)
186 {
187 	struct file *fp = (struct file *)f->f_fsdata;
188 
189 	switch (where) {
190 	case SEEK_SET:
191 		fp->f_seekp = offset;
192 		break;
193 	case SEEK_CUR:
194 		fp->f_seekp += offset;
195 		break;
196 	case SEEK_END:
197 	    {
198 		struct stat sb;
199 		int error;
200 
201 		error = zfs_stat(f, &sb);
202 		if (error != 0) {
203 			errno = error;
204 			return (-1);
205 		}
206 		fp->f_seekp = sb.st_size - offset;
207 		break;
208 	    }
209 	default:
210 		errno = EINVAL;
211 		return (-1);
212 	}
213 	return (fp->f_seekp);
214 }
215 
216 static int
217 zfs_stat(struct open_file *f, struct stat *sb)
218 {
219 	struct devdesc *dev = f->f_devdata;
220 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
221 	struct file *fp = (struct file *)f->f_fsdata;
222 
223 	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
224 }
225 
226 static int
227 zfs_readdir(struct open_file *f, struct dirent *d)
228 {
229 	struct devdesc *dev = f->f_devdata;
230 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
231 	struct file *fp = (struct file *)f->f_fsdata;
232 	mzap_ent_phys_t mze;
233 	struct stat sb;
234 	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
235 	int rc;
236 
237 	rc = zfs_stat(f, &sb);
238 	if (rc)
239 		return (rc);
240 	if (!S_ISDIR(sb.st_mode))
241 		return (ENOTDIR);
242 
243 	/*
244 	 * If this is the first read, get the zap type.
245 	 */
246 	if (fp->f_seekp == 0) {
247 		rc = dnode_read(spa, &fp->f_dnode,
248 				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
249 		if (rc)
250 			return (rc);
251 
252 		if (fp->f_zap_type == ZBT_MICRO) {
253 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
254 		} else {
255 			rc = dnode_read(spa, &fp->f_dnode,
256 					offsetof(zap_phys_t, zap_num_leafs),
257 					&fp->f_num_leafs,
258 					sizeof(fp->f_num_leafs));
259 			if (rc)
260 				return (rc);
261 
262 			fp->f_seekp = bsize;
263 			fp->f_zap_leaf = malloc(bsize);
264 			if (fp->f_zap_leaf == NULL)
265 				return (ENOMEM);
266 			rc = dnode_read(spa, &fp->f_dnode,
267 					fp->f_seekp,
268 					fp->f_zap_leaf,
269 					bsize);
270 			if (rc)
271 				return (rc);
272 		}
273 	}
274 
275 	if (fp->f_zap_type == ZBT_MICRO) {
276 	mzap_next:
277 		if (fp->f_seekp >= bsize)
278 			return (ENOENT);
279 
280 		rc = dnode_read(spa, &fp->f_dnode,
281 				fp->f_seekp, &mze, sizeof(mze));
282 		if (rc)
283 			return (rc);
284 		fp->f_seekp += sizeof(mze);
285 
286 		if (!mze.mze_name[0])
287 			goto mzap_next;
288 
289 		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
290 		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
291 		strcpy(d->d_name, mze.mze_name);
292 		d->d_namlen = strlen(d->d_name);
293 		return (0);
294 	} else {
295 		zap_leaf_t zl;
296 		zap_leaf_chunk_t *zc, *nc;
297 		int chunk;
298 		size_t namelen;
299 		char *p;
300 		uint64_t value;
301 
302 		/*
303 		 * Initialise this so we can use the ZAP size
304 		 * calculating macros.
305 		 */
306 		zl.l_bs = ilog2(bsize);
307 		zl.l_phys = fp->f_zap_leaf;
308 
309 		/*
310 		 * Figure out which chunk we are currently looking at
311 		 * and consider seeking to the next leaf. We use the
312 		 * low bits of f_seekp as a simple chunk index.
313 		 */
314 	fzap_next:
315 		chunk = fp->f_seekp & (bsize - 1);
316 		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
317 			fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
318 			chunk = 0;
319 
320 			/*
321 			 * Check for EOF and read the new leaf.
322 			 */
323 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
324 				return (ENOENT);
325 
326 			rc = dnode_read(spa, &fp->f_dnode,
327 					fp->f_seekp,
328 					fp->f_zap_leaf,
329 					bsize);
330 			if (rc)
331 				return (rc);
332 		}
333 
334 		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
335 		fp->f_seekp++;
336 		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
337 			goto fzap_next;
338 
339 		namelen = zc->l_entry.le_name_numints;
340 		if (namelen > sizeof(d->d_name))
341 			namelen = sizeof(d->d_name);
342 
343 		/*
344 		 * Paste the name back together.
345 		 */
346 		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
347 		p = d->d_name;
348 		while (namelen > 0) {
349 			int len;
350 			len = namelen;
351 			if (len > ZAP_LEAF_ARRAY_BYTES)
352 				len = ZAP_LEAF_ARRAY_BYTES;
353 			memcpy(p, nc->l_array.la_array, len);
354 			p += len;
355 			namelen -= len;
356 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
357 		}
358 		d->d_name[sizeof(d->d_name) - 1] = 0;
359 
360 		/*
361 		 * Assume the first eight bytes of the value are
362 		 * a uint64_t.
363 		 */
364 		value = fzap_leaf_value(&zl, zc);
365 
366 		d->d_fileno = ZFS_DIRENT_OBJ(value);
367 		d->d_type = ZFS_DIRENT_TYPE(value);
368 		d->d_namlen = strlen(d->d_name);
369 
370 		return (0);
371 	}
372 }
373 
374 static spa_t *
375 spa_find_by_dev(struct zfs_devdesc *dev)
376 {
377 
378 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
379 		return (NULL);
380 
381 	if (dev->pool_guid == 0)
382 		return (STAILQ_FIRST(&zfs_pools));
383 
384 	return (spa_find_by_guid(dev->pool_guid));
385 }
386 
387 /*
388  * if path is NULL, create mount structure, but do not add it to list.
389  */
390 static int
391 zfs_mount(const char *dev, const char *path, void **data)
392 {
393 	struct zfs_devdesc *zfsdev = NULL;
394 	spa_t *spa;
395 	struct zfsmount *mnt = NULL;
396 	int rv;
397 
398 	errno = 0;
399 	rv = zfs_parsedev((struct devdesc **)&zfsdev, dev, NULL);
400 	if (rv != 0) {
401 		return (rv);
402 	}
403 
404 	spa = spa_find_by_dev(zfsdev);
405 	if (spa == NULL) {
406 		rv = ENXIO;
407 		goto err;
408 	}
409 
410 	mnt = calloc(1, sizeof(*mnt));
411 	if (mnt == NULL) {
412 		rv = ENOMEM;
413 		goto err;
414 	}
415 
416 	if (mnt->path != NULL) {
417 		mnt->path = strdup(path);
418 		if (mnt->path == NULL) {
419 			rv = ENOMEM;
420 			goto err;
421 		}
422 	}
423 
424 	rv = zfs_mount_impl(spa, zfsdev->root_guid, mnt);
425 
426 	if (rv == 0 && mnt->objset.os_type != DMU_OST_ZFS) {
427 		printf("Unexpected object set type %ju\n",
428 		    (uintmax_t)mnt->objset.os_type);
429 		rv = EIO;
430 	}
431 err:
432 	if (rv != 0) {
433 		if (mnt != NULL)
434 			free(mnt->path);
435 		free(mnt);
436 		free(zfsdev);
437 		return (rv);
438 	}
439 
440 	*data = mnt;
441 	if (path != NULL)
442 		STAILQ_INSERT_TAIL(&zfsmount, mnt, next);
443 
444 	free(zfsdev);
445 
446 	return (rv);
447 }
448 
449 static int
450 zfs_unmount(const char *dev, void *data)
451 {
452 	struct zfsmount *mnt = data;
453 
454 	STAILQ_REMOVE(&zfsmount, mnt, zfsmount, next);
455 	free(mnt->path);
456 	free(mnt);
457 	return (0);
458 }
459 
460 static int
461 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
462 {
463 	int fd, ret;
464 	size_t res, head, tail, total_size, full_sec_size;
465 	unsigned secsz, do_tail_read;
466 	off_t start_sec;
467 	char *outbuf, *bouncebuf;
468 
469 	fd = (uintptr_t) priv;
470 	outbuf = (char *) buf;
471 	bouncebuf = NULL;
472 
473 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
474 	if (ret != 0)
475 		return (ret);
476 
477 	/*
478 	 * Handling reads of arbitrary offset and size - multi-sector case
479 	 * and single-sector case.
480 	 *
481 	 *                        Multi-sector Case
482 	 *                (do_tail_read = true if tail > 0)
483 	 *
484 	 *   |<----------------------total_size--------------------->|
485 	 *   |                                                       |
486 	 *   |<--head-->|<--------------bytes------------>|<--tail-->|
487 	 *   |          |                                 |          |
488 	 *   |          |       |<~full_sec_size~>|       |          |
489 	 *   +------------------+                 +------------------+
490 	 *   |          |0101010|     .  .  .     |0101011|          |
491 	 *   +------------------+                 +------------------+
492 	 *         start_sec                         start_sec + n
493 	 *
494 	 *
495 	 *                      Single-sector Case
496 	 *                    (do_tail_read = false)
497 	 *
498 	 *              |<------total_size = secsz----->|
499 	 *              |                               |
500 	 *              |<-head->|<---bytes--->|<-tail->|
501 	 *              +-------------------------------+
502 	 *              |        |0101010101010|        |
503 	 *              +-------------------------------+
504 	 *                          start_sec
505 	 */
506 	start_sec = offset / secsz;
507 	head = offset % secsz;
508 	total_size = roundup2(head + bytes, secsz);
509 	tail = total_size - (head + bytes);
510 	do_tail_read = ((tail > 0) && (head + bytes > secsz));
511 	full_sec_size = total_size;
512 	if (head > 0)
513 		full_sec_size -= secsz;
514 	if (do_tail_read)
515 		full_sec_size -= secsz;
516 
517 	/* Return of partial sector data requires a bounce buffer. */
518 	if ((head > 0) || do_tail_read || bytes < secsz) {
519 		bouncebuf = malloc(secsz);
520 		if (bouncebuf == NULL) {
521 			printf("vdev_read: out of memory\n");
522 			return (ENOMEM);
523 		}
524 	}
525 
526 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
527 		ret = errno;
528 		goto error;
529 	}
530 
531 	/* Partial data return from first sector */
532 	if (head > 0) {
533 		res = read(fd, bouncebuf, secsz);
534 		if (res != secsz) {
535 			ret = EIO;
536 			goto error;
537 		}
538 		memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
539 		outbuf += min(secsz - head, bytes);
540 	}
541 
542 	/*
543 	 * Full data return from read sectors.
544 	 * Note, there is still corner case where we read
545 	 * from sector boundary, but less than sector size, e.g. reading 512B
546 	 * from 4k sector.
547 	 */
548 	if (full_sec_size > 0) {
549 		if (bytes < full_sec_size) {
550 			res = read(fd, bouncebuf, secsz);
551 			if (res != secsz) {
552 				ret = EIO;
553 				goto error;
554 			}
555 			memcpy(outbuf, bouncebuf, bytes);
556 		} else {
557 			res = read(fd, outbuf, full_sec_size);
558 			if (res != full_sec_size) {
559 				ret = EIO;
560 				goto error;
561 			}
562 			outbuf += full_sec_size;
563 		}
564 	}
565 
566 	/* Partial data return from last sector */
567 	if (do_tail_read) {
568 		res = read(fd, bouncebuf, secsz);
569 		if (res != secsz) {
570 			ret = EIO;
571 			goto error;
572 		}
573 		memcpy(outbuf, bouncebuf, secsz - tail);
574 	}
575 
576 	ret = 0;
577 error:
578 	free(bouncebuf);
579 	return (ret);
580 }
581 
582 static int
583 vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes)
584 {
585 	int fd, ret;
586 	size_t head, tail, total_size, full_sec_size;
587 	unsigned secsz, do_tail_write;
588 	off_t start_sec;
589 	ssize_t res;
590 	char *outbuf, *bouncebuf;
591 
592 	fd = (uintptr_t)vdev->v_priv;
593 	outbuf = (char *)buf;
594 	bouncebuf = NULL;
595 
596 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
597 	if (ret != 0)
598 		return (ret);
599 
600 	start_sec = offset / secsz;
601 	head = offset % secsz;
602 	total_size = roundup2(head + bytes, secsz);
603 	tail = total_size - (head + bytes);
604 	do_tail_write = ((tail > 0) && (head + bytes > secsz));
605 	full_sec_size = total_size;
606 	if (head > 0)
607 		full_sec_size -= secsz;
608 	if (do_tail_write)
609 		full_sec_size -= secsz;
610 
611 	/* Partial sector write requires a bounce buffer. */
612 	if ((head > 0) || do_tail_write || bytes < secsz) {
613 		bouncebuf = malloc(secsz);
614 		if (bouncebuf == NULL) {
615 			printf("vdev_write: out of memory\n");
616 			return (ENOMEM);
617 		}
618 	}
619 
620 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
621 		ret = errno;
622 		goto error;
623 	}
624 
625 	/* Partial data for first sector */
626 	if (head > 0) {
627 		res = read(fd, bouncebuf, secsz);
628 		if ((unsigned)res != secsz) {
629 			ret = EIO;
630 			goto error;
631 		}
632 		memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes));
633 		(void) lseek(fd, -secsz, SEEK_CUR);
634 		res = write(fd, bouncebuf, secsz);
635 		if ((unsigned)res != secsz) {
636 			ret = EIO;
637 			goto error;
638 		}
639 		outbuf += min(secsz - head, bytes);
640 	}
641 
642 	/*
643 	 * Full data write to sectors.
644 	 * Note, there is still corner case where we write
645 	 * to sector boundary, but less than sector size, e.g. write 512B
646 	 * to 4k sector.
647 	 */
648 	if (full_sec_size > 0) {
649 		if (bytes < full_sec_size) {
650 			res = read(fd, bouncebuf, secsz);
651 			if ((unsigned)res != secsz) {
652 				ret = EIO;
653 				goto error;
654 			}
655 			memcpy(bouncebuf, outbuf, bytes);
656 			(void) lseek(fd, -secsz, SEEK_CUR);
657 			res = write(fd, bouncebuf, secsz);
658 			if ((unsigned)res != secsz) {
659 				ret = EIO;
660 				goto error;
661 			}
662 		} else {
663 			res = write(fd, outbuf, full_sec_size);
664 			if ((unsigned)res != full_sec_size) {
665 				ret = EIO;
666 				goto error;
667 			}
668 			outbuf += full_sec_size;
669 		}
670 	}
671 
672 	/* Partial data write to last sector */
673 	if (do_tail_write) {
674 		res = read(fd, bouncebuf, secsz);
675 		if ((unsigned)res != secsz) {
676 			ret = EIO;
677 			goto error;
678 		}
679 		memcpy(bouncebuf, outbuf, secsz - tail);
680 		(void) lseek(fd, -secsz, SEEK_CUR);
681 		res = write(fd, bouncebuf, secsz);
682 		if ((unsigned)res != secsz) {
683 			ret = EIO;
684 			goto error;
685 		}
686 	}
687 
688 	ret = 0;
689 error:
690 	free(bouncebuf);
691 	return (ret);
692 }
693 
694 static int
695 zfs_dev_init(void)
696 {
697 	spa_t *spa;
698 	spa_t *next;
699 	spa_t *prev;
700 
701 	zfs_init();
702 	if (archsw.arch_zfs_probe == NULL)
703 		return (ENXIO);
704 	archsw.arch_zfs_probe();
705 
706 	prev = NULL;
707 	spa = STAILQ_FIRST(&zfs_pools);
708 	while (spa != NULL) {
709 		next = STAILQ_NEXT(spa, spa_link);
710 		if (zfs_spa_init(spa)) {
711 			if (prev == NULL)
712 				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
713 			else
714 				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
715 		} else
716 			prev = spa;
717 		spa = next;
718 	}
719 	return (0);
720 }
721 
722 struct zfs_probe_args {
723 	int		fd;
724 	const char	*devname;
725 	uint64_t	*pool_guid;
726 	u_int		secsz;
727 };
728 
729 static int
730 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
731 {
732 	struct zfs_probe_args *ppa;
733 
734 	ppa = (struct zfs_probe_args *)arg;
735 	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
736 	    offset * ppa->secsz, buf, blocks * ppa->secsz));
737 }
738 
739 static int
740 zfs_probe(int fd, uint64_t *pool_guid)
741 {
742 	spa_t *spa;
743 	int ret;
744 
745 	spa = NULL;
746 	ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa);
747 	if (ret == 0 && pool_guid != NULL)
748 		if (*pool_guid == 0)
749 			*pool_guid = spa->spa_guid;
750 	return (ret);
751 }
752 
753 static int
754 zfs_probe_partition(void *arg, const char *partname,
755     const struct ptable_entry *part)
756 {
757 	struct zfs_probe_args *ppa, pa;
758 	struct ptable *table;
759 	char devname[32];
760 	int ret;
761 
762 	/* Probe only freebsd-zfs and freebsd partitions */
763 	if (part->type != PART_FREEBSD &&
764 	    part->type != PART_FREEBSD_ZFS)
765 		return (0);
766 
767 	ppa = (struct zfs_probe_args *)arg;
768 	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
769 	devname[strlen(ppa->devname) - 1] = '\0';
770 	snprintf(devname, sizeof(devname), "%s%s:", devname, partname);
771 	pa.fd = open(devname, O_RDWR);
772 	if (pa.fd == -1)
773 		return (0);
774 	ret = zfs_probe(pa.fd, ppa->pool_guid);
775 	if (ret == 0)
776 		return (0);
777 	/* Do we have BSD label here? */
778 	if (part->type == PART_FREEBSD) {
779 		pa.devname = devname;
780 		pa.pool_guid = ppa->pool_guid;
781 		pa.secsz = ppa->secsz;
782 		table = ptable_open(&pa, part->end - part->start + 1,
783 		    ppa->secsz, zfs_diskread);
784 		if (table != NULL) {
785 			ptable_iterate(table, &pa, zfs_probe_partition);
786 			ptable_close(table);
787 		}
788 	}
789 	close(pa.fd);
790 	return (0);
791 }
792 
793 /*
794  * Return bootenv nvlist from pool label.
795  */
796 int
797 zfs_get_bootenv(void *vdev, nvlist_t **benvp)
798 {
799 	spa_t *spa;
800 
801 	if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
802 		return (ENXIO);
803 
804 	return (zfs_get_bootenv_spa(spa, benvp));
805 }
806 
807 /*
808  * Store nvlist to pool label bootenv area. Also updates cached pointer in spa.
809  */
810 int
811 zfs_set_bootenv(void *vdev, nvlist_t *benv)
812 {
813 	spa_t *spa;
814 
815 	if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
816 		return (ENXIO);
817 
818 	return (zfs_set_bootenv_spa(spa, benv));
819 }
820 
821 /*
822  * Get bootonce value by key. The bootonce <key, value> pair is removed
823  * from the bootenv nvlist and the remaining nvlist is committed back to disk.
824  */
825 int
826 zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size)
827 {
828 	spa_t *spa;
829 
830 	if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
831 		return (ENXIO);
832 
833 	return (zfs_get_bootonce_spa(spa, key, buf, size));
834 }
835 
836 /*
837  * nvstore backend.
838  */
839 
840 static int zfs_nvstore_setter(void *, int, const char *,
841     const void *, size_t);
842 static int zfs_nvstore_setter_str(void *, const char *, const char *,
843     const char *);
844 static int zfs_nvstore_unset_impl(void *, const char *, bool);
845 static int zfs_nvstore_setenv(void *, void *);
846 
847 /*
848  * nvstore is only present for current rootfs pool.
849  */
850 static int
851 zfs_nvstore_sethook(struct env_var *ev, int flags __unused, const void *value)
852 {
853 	struct zfs_devdesc *dev;
854 	int rv;
855 
856 	archsw.arch_getdev((void **)&dev, NULL, NULL);
857 	if (dev == NULL)
858 		return (ENXIO);
859 
860 	rv = zfs_nvstore_setter_str(dev, NULL, ev->ev_name, value);
861 
862 	free(dev);
863 	return (rv);
864 }
865 
866 /*
867  * nvstore is only present for current rootfs pool.
868  */
869 static int
870 zfs_nvstore_unsethook(struct env_var *ev)
871 {
872 	struct zfs_devdesc *dev;
873 	int rv;
874 
875 	archsw.arch_getdev((void **)&dev, NULL, NULL);
876 	if (dev == NULL)
877 		return (ENXIO);
878 
879 	rv = zfs_nvstore_unset_impl(dev, ev->ev_name, false);
880 
881 	free(dev);
882 	return (rv);
883 }
884 
885 static int
886 zfs_nvstore_getter(void *vdev, const char *name, void **data)
887 {
888 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
889 	spa_t *spa;
890 	nvlist_t *nv;
891 	char *str, **ptr;
892 	int size;
893 	int rv;
894 
895 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
896 		return (ENOTSUP);
897 
898 	if ((spa = spa_find_by_dev(dev)) == NULL)
899 		return (ENXIO);
900 
901 	if (spa->spa_bootenv == NULL)
902 		return (ENXIO);
903 
904 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
905 	    NULL, &nv, NULL) != 0)
906 		return (ENOENT);
907 
908 	rv = nvlist_find(nv, name, DATA_TYPE_STRING, NULL, &str, &size);
909 	if (rv == 0) {
910 		ptr = (char **)data;
911 		asprintf(ptr, "%.*s", size, str);
912 		if (*data == NULL)
913 			rv = ENOMEM;
914 	}
915 	nvlist_destroy(nv);
916 	return (rv);
917 }
918 
919 static int
920 zfs_nvstore_setter(void *vdev, int type, const char *name,
921     const void *data, size_t size)
922 {
923 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
924 	spa_t *spa;
925 	nvlist_t *nv;
926 	int rv;
927 	bool env_set = true;
928 
929 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
930 		return (ENOTSUP);
931 
932 	if ((spa = spa_find_by_dev(dev)) == NULL)
933 		return (ENXIO);
934 
935 	if (spa->spa_bootenv == NULL)
936 		return (ENXIO);
937 
938 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
939 	    NULL, &nv, NULL) != 0) {
940 		nv = nvlist_create(NV_UNIQUE_NAME);
941 		if (nv == NULL)
942 			return (ENOMEM);
943 	}
944 
945 	rv = 0;
946 	switch (type) {
947         case DATA_TYPE_INT8:
948 		if (size != sizeof (int8_t)) {
949 			rv = EINVAL;
950 			break;
951 		}
952 		rv = nvlist_add_int8(nv, name, *(int8_t *)data);
953 		break;
954 
955         case DATA_TYPE_INT16:
956 		if (size != sizeof (int16_t)) {
957 			rv = EINVAL;
958 			break;
959 		}
960 		rv = nvlist_add_int16(nv, name, *(int16_t *)data);
961 		break;
962 
963         case DATA_TYPE_INT32:
964 		if (size != sizeof (int32_t)) {
965 			rv = EINVAL;
966 			break;
967 		}
968 		rv = nvlist_add_int32(nv, name, *(int32_t *)data);
969 		break;
970 
971         case DATA_TYPE_INT64:
972 		if (size != sizeof (int64_t)) {
973 			rv = EINVAL;
974 			break;
975 		}
976 		rv = nvlist_add_int64(nv, name, *(int64_t *)data);
977 		break;
978 
979         case DATA_TYPE_BYTE:
980 		if (size != sizeof (uint8_t)) {
981 			rv = EINVAL;
982 			break;
983 		}
984 		rv = nvlist_add_byte(nv, name, *(int8_t *)data);
985 		break;
986 
987         case DATA_TYPE_UINT8:
988 		if (size != sizeof (uint8_t)) {
989 			rv = EINVAL;
990 			break;
991 		}
992 		rv = nvlist_add_uint8(nv, name, *(int8_t *)data);
993 		break;
994 
995         case DATA_TYPE_UINT16:
996 		if (size != sizeof (uint16_t)) {
997 			rv = EINVAL;
998 			break;
999 		}
1000 		rv = nvlist_add_uint16(nv, name, *(uint16_t *)data);
1001 		break;
1002 
1003         case DATA_TYPE_UINT32:
1004 		if (size != sizeof (uint32_t)) {
1005 			rv = EINVAL;
1006 			break;
1007 		}
1008 		rv = nvlist_add_uint32(nv, name, *(uint32_t *)data);
1009 		break;
1010 
1011         case DATA_TYPE_UINT64:
1012 		if (size != sizeof (uint64_t)) {
1013 			rv = EINVAL;
1014 			break;
1015 		}
1016 		rv = nvlist_add_uint64(nv, name, *(uint64_t *)data);
1017 		break;
1018 
1019         case DATA_TYPE_STRING:
1020 		rv = nvlist_add_string(nv, name, data);
1021 		break;
1022 
1023 	case DATA_TYPE_BOOLEAN_VALUE:
1024 		if (size != sizeof (boolean_t)) {
1025 			rv = EINVAL;
1026 			break;
1027 		}
1028 		rv = nvlist_add_boolean_value(nv, name, *(boolean_t *)data);
1029 		break;
1030 
1031 	default:
1032 		rv = EINVAL;
1033 		break;
1034 	}
1035 
1036 	if (rv == 0) {
1037 		rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv);
1038 		if (rv == 0) {
1039 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1040 		}
1041 		if (rv == 0) {
1042 			if (env_set) {
1043 				rv = zfs_nvstore_setenv(vdev,
1044 				    nvpair_find(nv, name));
1045 			} else {
1046 				env_discard(env_getenv(name));
1047 				rv = 0;
1048 			}
1049 		}
1050 	}
1051 
1052 	nvlist_destroy(nv);
1053 	return (rv);
1054 }
1055 
1056 static int
1057 get_int64(const char *data, int64_t *ip)
1058 {
1059 	char *end;
1060 	int64_t val;
1061 
1062 	errno = 0;
1063 	val = strtoll(data, &end, 0);
1064 	if (errno != 0 || *data == '\0' || *end != '\0')
1065 		return (EINVAL);
1066 
1067 	*ip = val;
1068 	return (0);
1069 }
1070 
1071 static int
1072 get_uint64(const char *data, uint64_t *ip)
1073 {
1074 	char *end;
1075 	uint64_t val;
1076 
1077 	errno = 0;
1078 	val = strtoull(data, &end, 0);
1079 	if (errno != 0 || *data == '\0' || *end != '\0')
1080 		return (EINVAL);
1081 
1082 	*ip = val;
1083 	return (0);
1084 }
1085 
1086 /*
1087  * Translate textual data to data type. If type is not set, and we are
1088  * creating new pair, use DATA_TYPE_STRING.
1089  */
1090 static int
1091 zfs_nvstore_setter_str(void *vdev, const char *type, const char *name,
1092     const char *data)
1093 {
1094 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1095 	spa_t *spa;
1096 	nvlist_t *nv;
1097 	int rv;
1098 	data_type_t dt;
1099 	int64_t val;
1100 	uint64_t uval;
1101 
1102 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1103 		return (ENOTSUP);
1104 
1105 	if ((spa = spa_find_by_dev(dev)) == NULL)
1106 		return (ENXIO);
1107 
1108 	if (spa->spa_bootenv == NULL)
1109 		return (ENXIO);
1110 
1111 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1112 	    NULL, &nv, NULL) != 0) {
1113 		nv = NULL;
1114 	}
1115 
1116 	if (type == NULL) {
1117 		nvp_header_t *nvh;
1118 
1119 		/*
1120 		 * if there is no existing pair, default to string.
1121 		 * Otherwise, use type from existing pair.
1122 		 */
1123 		nvh = nvpair_find(nv, name);
1124 		if (nvh == NULL) {
1125 			dt = DATA_TYPE_STRING;
1126 		} else {
1127 			nv_string_t *nvp_name;
1128 			nv_pair_data_t *nvp_data;
1129 
1130 			nvp_name = (nv_string_t *)(nvh + 1);
1131 			nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1132 			    NV_ALIGN4(nvp_name->nv_size));
1133 			dt = nvp_data->nv_type;
1134 		}
1135 	} else {
1136 		dt = nvpair_type_from_name(type);
1137 	}
1138 	nvlist_destroy(nv);
1139 
1140 	rv = 0;
1141 	switch (dt) {
1142         case DATA_TYPE_INT8:
1143 		rv = get_int64(data, &val);
1144 		if (rv == 0) {
1145 			int8_t v = val;
1146 
1147 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1148 		}
1149 		break;
1150         case DATA_TYPE_INT16:
1151 		rv = get_int64(data, &val);
1152 		if (rv == 0) {
1153 			int16_t v = val;
1154 
1155 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1156 		}
1157 		break;
1158         case DATA_TYPE_INT32:
1159 		rv = get_int64(data, &val);
1160 		if (rv == 0) {
1161 			int32_t v = val;
1162 
1163 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1164 		}
1165 		break;
1166         case DATA_TYPE_INT64:
1167 		rv = get_int64(data, &val);
1168 		if (rv == 0) {
1169 			rv = zfs_nvstore_setter(vdev, dt, name, &val,
1170 			    sizeof (val));
1171 		}
1172 		break;
1173 
1174         case DATA_TYPE_BYTE:
1175 		rv = get_uint64(data, &uval);
1176 		if (rv == 0) {
1177 			uint8_t v = uval;
1178 
1179 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1180 		}
1181 		break;
1182 
1183         case DATA_TYPE_UINT8:
1184 		rv = get_uint64(data, &uval);
1185 		if (rv == 0) {
1186 			uint8_t v = uval;
1187 
1188 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1189 		}
1190 		break;
1191 
1192         case DATA_TYPE_UINT16:
1193 		rv = get_uint64(data, &uval);
1194 		if (rv == 0) {
1195 			uint16_t v = uval;
1196 
1197 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1198 		}
1199 		break;
1200 
1201         case DATA_TYPE_UINT32:
1202 		rv = get_uint64(data, &uval);
1203 		if (rv == 0) {
1204 			uint32_t v = uval;
1205 
1206 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1207 		}
1208 		break;
1209 
1210         case DATA_TYPE_UINT64:
1211 		rv = get_uint64(data, &uval);
1212 		if (rv == 0) {
1213 			rv = zfs_nvstore_setter(vdev, dt, name, &uval,
1214 			    sizeof (uval));
1215 		}
1216 		break;
1217 
1218         case DATA_TYPE_STRING:
1219 		rv = zfs_nvstore_setter(vdev, dt, name, data, strlen(data) + 1);
1220 		break;
1221 
1222 	case DATA_TYPE_BOOLEAN_VALUE:
1223 		rv = get_int64(data, &val);
1224 		if (rv == 0) {
1225 			boolean_t v = val;
1226 
1227 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1228 		}
1229 
1230 	default:
1231 		rv = EINVAL;
1232 	}
1233 	return (rv);
1234 }
1235 
1236 static int
1237 zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env)
1238 {
1239 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1240 	spa_t *spa;
1241 	nvlist_t *nv;
1242 	int rv;
1243 
1244 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1245 		return (ENOTSUP);
1246 
1247 	if ((spa = spa_find_by_dev(dev)) == NULL)
1248 		return (ENXIO);
1249 
1250 	if (spa->spa_bootenv == NULL)
1251 		return (ENXIO);
1252 
1253 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1254 	    NULL, &nv, NULL) != 0)
1255 		return (ENOENT);
1256 
1257 	rv = nvlist_remove(nv, name, DATA_TYPE_UNKNOWN);
1258 	if (rv == 0) {
1259 		if (nvlist_next_nvpair(nv, NULL) == NULL) {
1260 			rv = nvlist_remove(spa->spa_bootenv, OS_NVSTORE,
1261 			    DATA_TYPE_NVLIST);
1262 		} else {
1263 			rv = nvlist_add_nvlist(spa->spa_bootenv,
1264 			    OS_NVSTORE, nv);
1265 		}
1266 		if (rv == 0)
1267 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1268 	}
1269 
1270 	if (unset_env)
1271 		env_discard(env_getenv(name));
1272 	return (rv);
1273 }
1274 
1275 static int
1276 zfs_nvstore_unset(void *vdev, const char *name)
1277 {
1278 	return (zfs_nvstore_unset_impl(vdev, name, true));
1279 }
1280 
1281 static int
1282 zfs_nvstore_print(void *vdev __unused, void *ptr)
1283 {
1284 
1285 	nvpair_print(ptr, 0);
1286 	return (0);
1287 }
1288 
1289 /*
1290  * Create environment variable from nvpair.
1291  * set hook will update nvstore with new value, unset hook will remove
1292  * variable from nvstore.
1293  */
1294 static int
1295 zfs_nvstore_setenv(void *vdev __unused, void *ptr)
1296 {
1297 	nvp_header_t *nvh = ptr;
1298 	nv_string_t *nvp_name, *nvp_value;
1299 	nv_pair_data_t *nvp_data;
1300 	char *name, *value;
1301 	int rv = 0;
1302 
1303 	if (nvh == NULL)
1304 		return (ENOENT);
1305 
1306 	nvp_name = (nv_string_t *)(nvh + 1);
1307 	nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1308 	    NV_ALIGN4(nvp_name->nv_size));
1309 
1310 	if ((name = nvstring_get(nvp_name)) == NULL)
1311 		return (ENOMEM);
1312 
1313 	value = NULL;
1314 	switch (nvp_data->nv_type) {
1315 	case DATA_TYPE_BYTE:
1316 	case DATA_TYPE_UINT8:
1317 		(void) asprintf(&value, "%uc",
1318 		    *(unsigned *)&nvp_data->nv_data[0]);
1319 		if (value == NULL)
1320 			rv = ENOMEM;
1321 		break;
1322 
1323 	case DATA_TYPE_INT8:
1324 		(void) asprintf(&value, "%c", *(int *)&nvp_data->nv_data[0]);
1325 		if (value == NULL)
1326 			rv = ENOMEM;
1327 		break;
1328 
1329 	case DATA_TYPE_INT16:
1330 		(void) asprintf(&value, "%hd", *(short *)&nvp_data->nv_data[0]);
1331 		if (value == NULL)
1332 			rv = ENOMEM;
1333 		break;
1334 
1335 	case DATA_TYPE_UINT16:
1336 		(void) asprintf(&value, "%hu",
1337 		    *(unsigned short *)&nvp_data->nv_data[0]);
1338 		if (value == NULL)
1339 			rv = ENOMEM;
1340 		break;
1341 
1342 	case DATA_TYPE_BOOLEAN_VALUE:
1343 	case DATA_TYPE_INT32:
1344 		(void) asprintf(&value, "%d", *(int *)&nvp_data->nv_data[0]);
1345 		if (value == NULL)
1346 			rv = ENOMEM;
1347 		break;
1348 
1349 	case DATA_TYPE_UINT32:
1350 		(void) asprintf(&value, "%u",
1351 		    *(unsigned *)&nvp_data->nv_data[0]);
1352 		if (value == NULL)
1353 			rv = ENOMEM;
1354 		break;
1355 
1356 	case DATA_TYPE_INT64:
1357 		(void) asprintf(&value, "%jd",
1358 		    (intmax_t)*(int64_t *)&nvp_data->nv_data[0]);
1359 		if (value == NULL)
1360 			rv = ENOMEM;
1361 		break;
1362 
1363 	case DATA_TYPE_UINT64:
1364 		(void) asprintf(&value, "%ju",
1365 		    (uintmax_t)*(uint64_t *)&nvp_data->nv_data[0]);
1366 		if (value == NULL)
1367 			rv = ENOMEM;
1368 		break;
1369 
1370 	case DATA_TYPE_STRING:
1371 		nvp_value = (nv_string_t *)&nvp_data->nv_data[0];
1372 		if ((value = nvstring_get(nvp_value)) == NULL) {
1373 			rv = ENOMEM;
1374 			break;
1375 		}
1376 		break;
1377 
1378 	default:
1379 		rv = EINVAL;
1380 		break;
1381 	}
1382 
1383 	if (value != NULL) {
1384 		rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, value,
1385 		    zfs_nvstore_sethook, zfs_nvstore_unsethook);
1386 		free(value);
1387 	}
1388 	free(name);
1389 	return (rv);
1390 }
1391 
1392 static int
1393 zfs_nvstore_iterate(void *vdev, int (*cb)(void *, void *))
1394 {
1395 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1396 	spa_t *spa;
1397 	nvlist_t *nv;
1398 	nvp_header_t *nvh;
1399 	int rv;
1400 
1401 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1402 		return (ENOTSUP);
1403 
1404 	if ((spa = spa_find_by_dev(dev)) == NULL)
1405 		return (ENXIO);
1406 
1407 	if (spa->spa_bootenv == NULL)
1408 		return (ENXIO);
1409 
1410 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1411 	    NULL, &nv, NULL) != 0)
1412 		return (ENOENT);
1413 
1414 	rv = 0;
1415 	nvh = NULL;
1416 	while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) {
1417 		rv = cb(vdev, nvh);
1418 		if (rv != 0)
1419 			break;
1420 	}
1421 	return (rv);
1422 }
1423 
1424 nvs_callbacks_t nvstore_zfs_cb = {
1425 	.nvs_getter = zfs_nvstore_getter,
1426 	.nvs_setter = zfs_nvstore_setter,
1427 	.nvs_setter_str = zfs_nvstore_setter_str,
1428 	.nvs_unset = zfs_nvstore_unset,
1429 	.nvs_print = zfs_nvstore_print,
1430 	.nvs_iterate = zfs_nvstore_iterate
1431 };
1432 
1433 int
1434 zfs_attach_nvstore(void *vdev)
1435 {
1436 	struct zfs_devdesc *dev = vdev;
1437 	spa_t *spa;
1438 	uint64_t version;
1439 	int rv;
1440 
1441 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1442 		return (ENOTSUP);
1443 
1444 	if ((spa = spa_find_by_dev(dev)) == NULL)
1445 		return (ENXIO);
1446 
1447 	rv = nvlist_find(spa->spa_bootenv, BOOTENV_VERSION, DATA_TYPE_UINT64,
1448 	    NULL, &version, NULL);
1449 
1450 	if (rv != 0 || version != VB_NVLIST) {
1451 		return (ENXIO);
1452 	}
1453 
1454 	dev = malloc(sizeof (*dev));
1455 	if (dev == NULL)
1456 		return (ENOMEM);
1457 	memcpy(dev, vdev, sizeof (*dev));
1458 
1459 	rv = nvstore_init(spa->spa_name, &nvstore_zfs_cb, dev);
1460 	if (rv != 0)
1461 		free(dev);
1462 	else
1463 		rv = zfs_nvstore_iterate(dev, zfs_nvstore_setenv);
1464 	return (rv);
1465 }
1466 
1467 int
1468 zfs_probe_dev(const char *devname, uint64_t *pool_guid, bool parts_too)
1469 {
1470 	struct ptable *table;
1471 	struct zfs_probe_args pa;
1472 	uint64_t mediasz;
1473 	int ret;
1474 
1475 	if (pool_guid)
1476 		*pool_guid = 0;
1477 	pa.fd = open(devname, O_RDWR);
1478 	if (pa.fd == -1)
1479 		return (ENXIO);
1480 	/* Probe the whole disk */
1481 	ret = zfs_probe(pa.fd, pool_guid);
1482 	if (ret == 0)
1483 		return (0);
1484 	if (!parts_too)
1485 		return (ENXIO);
1486 
1487 	/* Probe each partition */
1488 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
1489 	if (ret == 0)
1490 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
1491 	if (ret == 0) {
1492 		pa.devname = devname;
1493 		pa.pool_guid = pool_guid;
1494 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
1495 		    zfs_diskread);
1496 		if (table != NULL) {
1497 			ptable_iterate(table, &pa, zfs_probe_partition);
1498 			ptable_close(table);
1499 		}
1500 	}
1501 	close(pa.fd);
1502 	if (pool_guid && *pool_guid == 0)
1503 		ret = ENXIO;
1504 	return (ret);
1505 }
1506 
1507 /*
1508  * Print information about ZFS pools
1509  */
1510 static int
1511 zfs_dev_print(int verbose)
1512 {
1513 	spa_t *spa;
1514 	char line[80];
1515 	int ret = 0;
1516 
1517 	if (STAILQ_EMPTY(&zfs_pools))
1518 		return (0);
1519 
1520 	printf("%s devices:", zfs_dev.dv_name);
1521 	if ((ret = pager_output("\n")) != 0)
1522 		return (ret);
1523 
1524 	if (verbose) {
1525 		return (spa_all_status());
1526 	}
1527 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
1528 		snprintf(line, sizeof(line), "    zfs:%s\n", spa->spa_name);
1529 		ret = pager_output(line);
1530 		if (ret != 0)
1531 			break;
1532 	}
1533 	return (ret);
1534 }
1535 
1536 /*
1537  * Attempt to open the pool described by (dev) for use by (f).
1538  */
1539 static int
1540 zfs_dev_open(struct open_file *f, ...)
1541 {
1542 	va_list		args;
1543 	struct zfs_devdesc	*dev;
1544 	struct zfsmount	*mount;
1545 	spa_t		*spa;
1546 	int		rv;
1547 
1548 	va_start(args, f);
1549 	dev = va_arg(args, struct zfs_devdesc *);
1550 	va_end(args);
1551 
1552 	if ((spa = spa_find_by_dev(dev)) == NULL)
1553 		return (ENXIO);
1554 
1555 	STAILQ_FOREACH(mount, &zfsmount, next) {
1556 		if (spa->spa_guid == mount->spa->spa_guid)
1557 			break;
1558 	}
1559 
1560 	rv = 0;
1561 	/* This device is not set as currdev, mount us private copy. */
1562 	if (mount == NULL)
1563 		rv = zfs_mount(devformat(&dev->dd), NULL, (void **)&mount);
1564 
1565 	if (rv == 0) {
1566 		dev->dd.d_opendata = mount;
1567 	}
1568 	return (rv);
1569 }
1570 
1571 static int
1572 zfs_dev_close(struct open_file *f)
1573 {
1574 	struct devdesc *dev;
1575 	struct zfsmount	*mnt, *mount;
1576 
1577 	dev = f->f_devdata;
1578 	mnt = dev->d_opendata;
1579 
1580 	STAILQ_FOREACH(mount, &zfsmount, next) {
1581 		if (mnt->spa->spa_guid == mount->spa->spa_guid)
1582 			break;
1583 	}
1584 
1585 	/* XXX */
1586 	return (0);
1587 }
1588 
1589 static int
1590 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
1591 {
1592 
1593 	return (ENOSYS);
1594 }
1595 
1596 struct devsw zfs_dev = {
1597 	.dv_name = "zfs",
1598 	.dv_type = DEVT_ZFS,
1599 	.dv_init = zfs_dev_init,
1600 	.dv_strategy = zfs_dev_strategy,
1601 	.dv_open = zfs_dev_open,
1602 	.dv_close = zfs_dev_close,
1603 	.dv_ioctl = noioctl,
1604 	.dv_print = zfs_dev_print,
1605 	.dv_cleanup = nullsys,
1606 	.dv_fmtdev = zfs_fmtdev,
1607 	.dv_parsedev = zfs_parsedev,
1608 };
1609 
1610 static int
1611 zfs_parsedev(struct devdesc **idev, const char *devspec, const char **path)
1612 {
1613 	static char	rootname[ZFS_MAXNAMELEN];
1614 	static char	poolname[ZFS_MAXNAMELEN];
1615 	spa_t		*spa;
1616 	const char	*end;
1617 	const char	*np;
1618 	const char	*sep;
1619 	int		rv;
1620 	struct zfs_devdesc *dev;
1621 
1622 	np = devspec + 3;			/* Skip the leading 'zfs' */
1623 	if (*np != ':')
1624 		return (EINVAL);
1625 	np++;
1626 	end = strrchr(np, ':');
1627 	if (end == NULL)
1628 		return (EINVAL);
1629 	sep = strchr(np, '/');
1630 	if (sep == NULL || sep >= end)
1631 		sep = end;
1632 	memcpy(poolname, np, sep - np);
1633 	poolname[sep - np] = '\0';
1634 	if (sep < end) {
1635 		sep++;
1636 		memcpy(rootname, sep, end - sep);
1637 		rootname[end - sep] = '\0';
1638 	}
1639 	else
1640 		rootname[0] = '\0';
1641 
1642 	spa = spa_find_by_name(poolname);
1643 	if (!spa)
1644 		return (ENXIO);
1645 	dev = malloc(sizeof(*dev));
1646 	if (dev == NULL)
1647 		return (ENOMEM);
1648 	dev->pool_guid = spa->spa_guid;
1649 	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
1650 	if (rv != 0) {
1651 		free(dev);
1652 		return (rv);
1653 	}
1654 	if (path != NULL)
1655 		*path = (*end == '\0') ? end : end + 1;
1656 	dev->dd.d_dev = &zfs_dev;
1657 	*idev = &dev->dd;
1658 	return (0);
1659 }
1660 
1661 char *
1662 zfs_fmtdev(struct devdesc *vdev)
1663 {
1664 	static char		rootname[ZFS_MAXNAMELEN];
1665 	static char		buf[2 * ZFS_MAXNAMELEN + 8];
1666 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
1667 	spa_t			*spa;
1668 
1669 	buf[0] = '\0';
1670 	if (vdev->d_dev->dv_type != DEVT_ZFS)
1671 		return (buf);
1672 
1673 	/* Do we have any pools? */
1674 	spa = STAILQ_FIRST(&zfs_pools);
1675 	if (spa == NULL)
1676 		return (buf);
1677 
1678 	if (dev->pool_guid == 0)
1679 		dev->pool_guid = spa->spa_guid;
1680 	else
1681 		spa = spa_find_by_guid(dev->pool_guid);
1682 
1683 	if (spa == NULL) {
1684 		printf("ZFS: can't find pool by guid\n");
1685 		return (buf);
1686 	}
1687 	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
1688 		printf("ZFS: can't find root filesystem\n");
1689 		return (buf);
1690 	}
1691 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
1692 		printf("ZFS: can't find filesystem by guid\n");
1693 		return (buf);
1694 	}
1695 
1696 	if (rootname[0] == '\0')
1697 		snprintf(buf, sizeof(buf), "%s:%s:", dev->dd.d_dev->dv_name,
1698 		    spa->spa_name);
1699 	else
1700 		snprintf(buf, sizeof(buf), "%s:%s/%s:", dev->dd.d_dev->dv_name,
1701 		    spa->spa_name, rootname);
1702 	return (buf);
1703 }
1704 
1705 static int
1706 split_devname(const char *name, char *poolname, size_t size,
1707     const char **dsnamep)
1708 {
1709 	const char *dsname;
1710 	size_t len;
1711 
1712 	ASSERT(name != NULL);
1713 	ASSERT(poolname != NULL);
1714 
1715 	len = strlen(name);
1716 	dsname = strchr(name, '/');
1717 	if (dsname != NULL) {
1718 		len = dsname - name;
1719 		dsname++;
1720 	} else
1721 		dsname = "";
1722 
1723 	if (len + 1 > size)
1724 		return (EINVAL);
1725 
1726 	strlcpy(poolname, name, len + 1);
1727 
1728 	if (dsnamep != NULL)
1729 		*dsnamep = dsname;
1730 
1731 	return (0);
1732 }
1733 
1734 int
1735 zfs_list(const char *name)
1736 {
1737 	static char	poolname[ZFS_MAXNAMELEN];
1738 	uint64_t	objid;
1739 	spa_t		*spa;
1740 	const char	*dsname;
1741 	int		rv;
1742 
1743 	if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1744 		return (EINVAL);
1745 
1746 	spa = spa_find_by_name(poolname);
1747 	if (!spa)
1748 		return (ENXIO);
1749 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1750 	if (rv != 0)
1751 		return (rv);
1752 
1753 	return (zfs_list_dataset(spa, objid));
1754 }
1755 
1756 void
1757 init_zfs_boot_options(const char *currdev_in)
1758 {
1759 	char poolname[ZFS_MAXNAMELEN];
1760 	char *beroot, *currdev;
1761 	spa_t *spa;
1762 	int currdev_len;
1763 	const char *dsname;
1764 
1765 	currdev = NULL;
1766 	currdev_len = strlen(currdev_in);
1767 	if (currdev_len == 0)
1768 		return;
1769 	if (strncmp(currdev_in, "zfs:", 4) != 0)
1770 		return;
1771 	currdev = strdup(currdev_in);
1772 	if (currdev == NULL)
1773 		return;
1774 	/* Remove the trailing : */
1775 	currdev[currdev_len - 1] = '\0';
1776 
1777 	setenv("zfs_be_active", currdev, 1);
1778 	setenv("zfs_be_currpage", "1", 1);
1779 	/* Remove the last element (current bootenv) */
1780 	beroot = strrchr(currdev, '/');
1781 	if (beroot != NULL)
1782 		beroot[0] = '\0';
1783 	beroot = strchr(currdev, ':') + 1;
1784 	setenv("zfs_be_root", beroot, 1);
1785 
1786 	if (split_devname(beroot, poolname, sizeof(poolname), &dsname) != 0)
1787 		return;
1788 
1789 	spa = spa_find_by_name(poolname);
1790 	if (spa == NULL)
1791 		return;
1792 
1793 	zfs_bootenv_initial("bootenvs", spa, beroot, dsname, 0);
1794 	zfs_checkpoints_initial(spa, beroot, dsname);
1795 
1796 	free(currdev);
1797 }
1798 
1799 static void
1800 zfs_checkpoints_initial(spa_t *spa, const char *name, const char *dsname)
1801 {
1802 	char envname[32];
1803 
1804 	if (spa->spa_uberblock_checkpoint.ub_checkpoint_txg != 0) {
1805 		snprintf(envname, sizeof(envname), "zpool_checkpoint");
1806 		setenv(envname, name, 1);
1807 
1808 		spa->spa_uberblock = &spa->spa_uberblock_checkpoint;
1809 		spa->spa_mos = &spa->spa_mos_checkpoint;
1810 
1811 		zfs_bootenv_initial("bootenvs_check", spa, name, dsname, 1);
1812 
1813 		spa->spa_uberblock = &spa->spa_uberblock_master;
1814 		spa->spa_mos = &spa->spa_mos_master;
1815 	}
1816 }
1817 
1818 static void
1819 zfs_bootenv_initial(const char *envprefix, spa_t *spa, const char *rootname,
1820    const char *dsname, int checkpoint)
1821 {
1822 	char		envname[32], envval[256];
1823 	uint64_t	objid;
1824 	int		bootenvs_idx, rv;
1825 
1826 	SLIST_INIT(&zfs_be_head);
1827 	zfs_env_count = 0;
1828 
1829 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1830 	if (rv != 0)
1831 		return;
1832 
1833 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1834 	bootenvs_idx = 0;
1835 	/* Populate the initial environment variables */
1836 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1837 		/* Enumerate all bootenvs for general usage */
1838 		snprintf(envname, sizeof(envname), "%s[%d]",
1839 		    envprefix, bootenvs_idx);
1840 		snprintf(envval, sizeof(envval), "zfs:%s%s/%s",
1841 		    checkpoint ? "!" : "", rootname, zfs_be->name);
1842 		rv = setenv(envname, envval, 1);
1843 		if (rv != 0)
1844 			break;
1845 		bootenvs_idx++;
1846 	}
1847 	snprintf(envname, sizeof(envname), "%s_count", envprefix);
1848 	snprintf(envval, sizeof(envval), "%d", bootenvs_idx);
1849 	setenv(envname, envval, 1);
1850 
1851 	/* Clean up the SLIST of ZFS BEs */
1852 	while (!SLIST_EMPTY(&zfs_be_head)) {
1853 		zfs_be = SLIST_FIRST(&zfs_be_head);
1854 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1855 		free(zfs_be->name);
1856 		free(zfs_be);
1857 	}
1858 }
1859 
1860 int
1861 zfs_bootenv(const char *name)
1862 {
1863 	char		poolname[ZFS_MAXNAMELEN], *root;
1864 	const char	*dsname;
1865 	char		becount[4];
1866 	uint64_t	objid;
1867 	spa_t		*spa;
1868 	int		rv, pages, perpage, currpage;
1869 
1870 	if (name == NULL)
1871 		return (EINVAL);
1872 	if ((root = getenv("zfs_be_root")) == NULL)
1873 		return (EINVAL);
1874 
1875 	if (strcmp(name, root) != 0) {
1876 		if (setenv("zfs_be_root", name, 1) != 0)
1877 			return (ENOMEM);
1878 	}
1879 
1880 	SLIST_INIT(&zfs_be_head);
1881 	zfs_env_count = 0;
1882 
1883 	if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1884 		return (EINVAL);
1885 
1886 	spa = spa_find_by_name(poolname);
1887 	if (!spa)
1888 		return (ENXIO);
1889 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1890 	if (rv != 0)
1891 		return (rv);
1892 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1893 
1894 	/* Calculate and store the number of pages of BEs */
1895 	perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
1896 	pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
1897 	snprintf(becount, 4, "%d", pages);
1898 	if (setenv("zfs_be_pages", becount, 1) != 0)
1899 		return (ENOMEM);
1900 
1901 	/* Roll over the page counter if it has exceeded the maximum */
1902 	currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
1903 	if (currpage > pages) {
1904 		if (setenv("zfs_be_currpage", "1", 1) != 0)
1905 			return (ENOMEM);
1906 	}
1907 
1908 	/* Populate the menu environment variables */
1909 	zfs_set_env();
1910 
1911 	/* Clean up the SLIST of ZFS BEs */
1912 	while (!SLIST_EMPTY(&zfs_be_head)) {
1913 		zfs_be = SLIST_FIRST(&zfs_be_head);
1914 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1915 		free(zfs_be->name);
1916 		free(zfs_be);
1917 	}
1918 
1919 	return (rv);
1920 }
1921 
1922 int
1923 zfs_belist_add(const char *name, uint64_t value __unused)
1924 {
1925 
1926 	/* Skip special datasets that start with a $ character */
1927 	if (strncmp(name, "$", 1) == 0) {
1928 		return (0);
1929 	}
1930 	/* Add the boot environment to the head of the SLIST */
1931 	zfs_be = malloc(sizeof(struct zfs_be_entry));
1932 	if (zfs_be == NULL) {
1933 		return (ENOMEM);
1934 	}
1935 	zfs_be->name = strdup(name);
1936 	if (zfs_be->name == NULL) {
1937 		free(zfs_be);
1938 		return (ENOMEM);
1939 	}
1940 	SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
1941 	zfs_env_count++;
1942 
1943 	return (0);
1944 }
1945 
1946 int
1947 zfs_set_env(void)
1948 {
1949 	char envname[32], envval[256];
1950 	char *beroot, *pagenum;
1951 	int rv, page, ctr;
1952 
1953 	beroot = getenv("zfs_be_root");
1954 	if (beroot == NULL) {
1955 		return (1);
1956 	}
1957 
1958 	pagenum = getenv("zfs_be_currpage");
1959 	if (pagenum != NULL) {
1960 		page = strtol(pagenum, NULL, 10);
1961 	} else {
1962 		page = 1;
1963 	}
1964 
1965 	ctr = 1;
1966 	rv = 0;
1967 	zfs_env_index = ZFS_BE_FIRST;
1968 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1969 		/* Skip to the requested page number */
1970 		if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
1971 			ctr++;
1972 			continue;
1973 		}
1974 
1975 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1976 		snprintf(envval, sizeof(envval), "%s", zfs_be->name);
1977 		rv = setenv(envname, envval, 1);
1978 		if (rv != 0) {
1979 			break;
1980 		}
1981 
1982 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
1983 		rv = setenv(envname, envval, 1);
1984 		if (rv != 0){
1985 			break;
1986 		}
1987 
1988 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
1989 		rv = setenv(envname, "set_bootenv", 1);
1990 		if (rv != 0){
1991 			break;
1992 		}
1993 
1994 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
1995 		snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
1996 		rv = setenv(envname, envval, 1);
1997 		if (rv != 0){
1998 			break;
1999 		}
2000 
2001 		zfs_env_index++;
2002 		if (zfs_env_index > ZFS_BE_LAST) {
2003 			break;
2004 		}
2005 
2006 	}
2007 
2008 	for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
2009 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
2010 		(void)unsetenv(envname);
2011 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
2012 		(void)unsetenv(envname);
2013 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
2014 		(void)unsetenv(envname);
2015 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
2016 		(void)unsetenv(envname);
2017 	}
2018 
2019 	return (rv);
2020 }
2021