xref: /freebsd/stand/libsa/zfs/zfs.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /*-
2  * Copyright (c) 2007 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  *	Stand-alone file reading package.
29  */
30 
31 #include <stand.h>
32 #include <sys/disk.h>
33 #include <sys/param.h>
34 #include <sys/time.h>
35 #include <sys/queue.h>
36 #include <part.h>
37 #include <stddef.h>
38 #include <stdarg.h>
39 #include <string.h>
40 #include <bootstrap.h>
41 
42 #include "libzfs.h"
43 
44 #include "zfsimpl.c"
45 
46 /* Define the range of indexes to be populated with ZFS Boot Environments */
47 #define		ZFS_BE_FIRST	4
48 #define		ZFS_BE_LAST	8
49 
50 static int	zfs_open(const char *path, struct open_file *f);
51 static int	zfs_close(struct open_file *f);
52 static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
53 static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
54 static int	zfs_stat(struct open_file *f, struct stat *sb);
55 static int	zfs_readdir(struct open_file *f, struct dirent *d);
56 static int	zfs_mount(const char *dev, const char *path, void **data);
57 static int	zfs_unmount(const char *dev, void *data);
58 
59 static void	zfs_bootenv_initial(const char *envname, spa_t *spa,
60 		    const char *name, const char *dsname, int checkpoint);
61 static void	zfs_checkpoints_initial(spa_t *spa, const char *name,
62 		    const char *dsname);
63 
64 static int	zfs_parsedev(struct devdesc **idev, const char *devspec,
65 		    const char **path);
66 
67 struct devsw zfs_dev;
68 
69 struct fs_ops zfs_fsops = {
70 	.fs_name = "zfs",
71 	.fo_open = zfs_open,
72 	.fo_close = zfs_close,
73 	.fo_read = zfs_read,
74 	.fo_write = null_write,
75 	.fo_seek = zfs_seek,
76 	.fo_stat = zfs_stat,
77 	.fo_readdir = zfs_readdir,
78 	.fo_mount = zfs_mount,
79 	.fo_unmount = zfs_unmount
80 };
81 
82 /*
83  * In-core open file.
84  */
85 struct file {
86 	off_t		f_seekp;	/* seek pointer */
87 	dnode_phys_t	f_dnode;
88 	uint64_t	f_zap_type;	/* zap type for readdir */
89 	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
90 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
91 };
92 
93 static int	zfs_env_index;
94 static int	zfs_env_count;
95 
96 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
97 struct zfs_be_list *zfs_be_headp;
98 struct zfs_be_entry {
99 	char *name;
100 	SLIST_ENTRY(zfs_be_entry) entries;
101 } *zfs_be, *zfs_be_tmp;
102 
103 /*
104  * Open a file.
105  */
106 static int
107 zfs_open(const char *upath, struct open_file *f)
108 {
109 	struct devdesc *dev = f->f_devdata;
110 	struct zfsmount *mount = dev->d_opendata;
111 	struct file *fp;
112 	int rc;
113 
114 	if (f->f_dev != &zfs_dev)
115 		return (EINVAL);
116 
117 	/* allocate file system specific data structure */
118 	fp = calloc(1, sizeof(struct file));
119 	if (fp == NULL)
120 		return (ENOMEM);
121 	f->f_fsdata = fp;
122 
123 	rc = zfs_lookup(mount, upath, &fp->f_dnode);
124 	fp->f_seekp = 0;
125 	if (rc) {
126 		f->f_fsdata = NULL;
127 		free(fp);
128 	}
129 	return (rc);
130 }
131 
132 static int
133 zfs_close(struct open_file *f)
134 {
135 	struct file *fp = (struct file *)f->f_fsdata;
136 
137 	dnode_cache_obj = NULL;
138 	f->f_fsdata = NULL;
139 
140 	free(fp);
141 	return (0);
142 }
143 
144 /*
145  * Copy a portion of a file into kernel memory.
146  * Cross block boundaries when necessary.
147  */
148 static int
149 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
150 {
151 	struct devdesc *dev = f->f_devdata;
152 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
153 	struct file *fp = (struct file *)f->f_fsdata;
154 	struct stat sb;
155 	size_t n;
156 	int rc;
157 
158 	rc = zfs_stat(f, &sb);
159 	if (rc)
160 		return (rc);
161 	n = size;
162 	if (fp->f_seekp + n > sb.st_size)
163 		n = sb.st_size - fp->f_seekp;
164 
165 	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
166 	if (rc)
167 		return (rc);
168 
169 	if (0) {
170 	    int i;
171 	    for (i = 0; i < n; i++)
172 		putchar(((char*) start)[i]);
173 	}
174 	fp->f_seekp += n;
175 	if (resid)
176 		*resid = size - n;
177 
178 	return (0);
179 }
180 
181 static off_t
182 zfs_seek(struct open_file *f, off_t offset, int where)
183 {
184 	struct file *fp = (struct file *)f->f_fsdata;
185 
186 	switch (where) {
187 	case SEEK_SET:
188 		fp->f_seekp = offset;
189 		break;
190 	case SEEK_CUR:
191 		fp->f_seekp += offset;
192 		break;
193 	case SEEK_END:
194 	    {
195 		struct stat sb;
196 		int error;
197 
198 		error = zfs_stat(f, &sb);
199 		if (error != 0) {
200 			errno = error;
201 			return (-1);
202 		}
203 		fp->f_seekp = sb.st_size - offset;
204 		break;
205 	    }
206 	default:
207 		errno = EINVAL;
208 		return (-1);
209 	}
210 	return (fp->f_seekp);
211 }
212 
213 static int
214 zfs_stat(struct open_file *f, struct stat *sb)
215 {
216 	struct devdesc *dev = f->f_devdata;
217 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
218 	struct file *fp = (struct file *)f->f_fsdata;
219 
220 	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
221 }
222 
223 static int
224 zfs_readdir(struct open_file *f, struct dirent *d)
225 {
226 	struct devdesc *dev = f->f_devdata;
227 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
228 	struct file *fp = (struct file *)f->f_fsdata;
229 	mzap_ent_phys_t mze;
230 	struct stat sb;
231 	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
232 	int rc;
233 
234 	rc = zfs_stat(f, &sb);
235 	if (rc)
236 		return (rc);
237 	if (!S_ISDIR(sb.st_mode))
238 		return (ENOTDIR);
239 
240 	/*
241 	 * If this is the first read, get the zap type.
242 	 */
243 	if (fp->f_seekp == 0) {
244 		rc = dnode_read(spa, &fp->f_dnode,
245 				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
246 		if (rc)
247 			return (rc);
248 
249 		if (fp->f_zap_type == ZBT_MICRO) {
250 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
251 		} else {
252 			rc = dnode_read(spa, &fp->f_dnode,
253 					offsetof(zap_phys_t, zap_num_leafs),
254 					&fp->f_num_leafs,
255 					sizeof(fp->f_num_leafs));
256 			if (rc)
257 				return (rc);
258 
259 			fp->f_seekp = bsize;
260 			fp->f_zap_leaf = malloc(bsize);
261 			if (fp->f_zap_leaf == NULL)
262 				return (ENOMEM);
263 			rc = dnode_read(spa, &fp->f_dnode,
264 					fp->f_seekp,
265 					fp->f_zap_leaf,
266 					bsize);
267 			if (rc)
268 				return (rc);
269 		}
270 	}
271 
272 	if (fp->f_zap_type == ZBT_MICRO) {
273 	mzap_next:
274 		if (fp->f_seekp >= bsize)
275 			return (ENOENT);
276 
277 		rc = dnode_read(spa, &fp->f_dnode,
278 				fp->f_seekp, &mze, sizeof(mze));
279 		if (rc)
280 			return (rc);
281 		fp->f_seekp += sizeof(mze);
282 
283 		if (!mze.mze_name[0])
284 			goto mzap_next;
285 
286 		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
287 		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
288 		strcpy(d->d_name, mze.mze_name);
289 		d->d_namlen = strlen(d->d_name);
290 		return (0);
291 	} else {
292 		zap_leaf_t zl;
293 		zap_leaf_chunk_t *zc, *nc;
294 		int chunk;
295 		size_t namelen;
296 		char *p;
297 		uint64_t value;
298 
299 		/*
300 		 * Initialise this so we can use the ZAP size
301 		 * calculating macros.
302 		 */
303 		zl.l_bs = ilog2(bsize);
304 		zl.l_phys = fp->f_zap_leaf;
305 
306 		/*
307 		 * Figure out which chunk we are currently looking at
308 		 * and consider seeking to the next leaf. We use the
309 		 * low bits of f_seekp as a simple chunk index.
310 		 */
311 	fzap_next:
312 		chunk = fp->f_seekp & (bsize - 1);
313 		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
314 			fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
315 			chunk = 0;
316 
317 			/*
318 			 * Check for EOF and read the new leaf.
319 			 */
320 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
321 				return (ENOENT);
322 
323 			rc = dnode_read(spa, &fp->f_dnode,
324 					fp->f_seekp,
325 					fp->f_zap_leaf,
326 					bsize);
327 			if (rc)
328 				return (rc);
329 		}
330 
331 		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
332 		fp->f_seekp++;
333 		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
334 			goto fzap_next;
335 
336 		namelen = zc->l_entry.le_name_numints;
337 		if (namelen > sizeof(d->d_name))
338 			namelen = sizeof(d->d_name);
339 
340 		/*
341 		 * Paste the name back together.
342 		 */
343 		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
344 		p = d->d_name;
345 		while (namelen > 0) {
346 			int len;
347 			len = namelen;
348 			if (len > ZAP_LEAF_ARRAY_BYTES)
349 				len = ZAP_LEAF_ARRAY_BYTES;
350 			memcpy(p, nc->l_array.la_array, len);
351 			p += len;
352 			namelen -= len;
353 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
354 		}
355 		d->d_name[sizeof(d->d_name) - 1] = 0;
356 
357 		/*
358 		 * Assume the first eight bytes of the value are
359 		 * a uint64_t.
360 		 */
361 		value = fzap_leaf_value(&zl, zc);
362 
363 		d->d_fileno = ZFS_DIRENT_OBJ(value);
364 		d->d_type = ZFS_DIRENT_TYPE(value);
365 		d->d_namlen = strlen(d->d_name);
366 
367 		return (0);
368 	}
369 }
370 
371 static spa_t *
372 spa_find_by_dev(struct zfs_devdesc *dev)
373 {
374 
375 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
376 		return (NULL);
377 
378 	if (dev->pool_guid == 0)
379 		return (STAILQ_FIRST(&zfs_pools));
380 
381 	return (spa_find_by_guid(dev->pool_guid));
382 }
383 
384 /*
385  * if path is NULL, create mount structure, but do not add it to list.
386  */
387 static int
388 zfs_mount(const char *dev, const char *path, void **data)
389 {
390 	struct zfs_devdesc *zfsdev = NULL;
391 	spa_t *spa;
392 	struct zfsmount *mnt = NULL;
393 	int rv;
394 
395 	errno = 0;
396 	rv = zfs_parsedev((struct devdesc **)&zfsdev, dev, NULL);
397 	if (rv != 0) {
398 		return (rv);
399 	}
400 
401 	spa = spa_find_by_dev(zfsdev);
402 	if (spa == NULL) {
403 		rv = ENXIO;
404 		goto err;
405 	}
406 
407 	mnt = calloc(1, sizeof(*mnt));
408 	if (mnt == NULL) {
409 		rv = ENOMEM;
410 		goto err;
411 	}
412 
413 	if (mnt->path != NULL) {
414 		mnt->path = strdup(path);
415 		if (mnt->path == NULL) {
416 			rv = ENOMEM;
417 			goto err;
418 		}
419 	}
420 
421 	rv = zfs_mount_impl(spa, zfsdev->root_guid, mnt);
422 
423 	if (rv == 0 && mnt->objset.os_type != DMU_OST_ZFS) {
424 		printf("Unexpected object set type %ju\n",
425 		    (uintmax_t)mnt->objset.os_type);
426 		rv = EIO;
427 	}
428 err:
429 	if (rv != 0) {
430 		if (mnt != NULL)
431 			free(mnt->path);
432 		free(mnt);
433 		free(zfsdev);
434 		return (rv);
435 	}
436 
437 	*data = mnt;
438 	if (path != NULL)
439 		STAILQ_INSERT_TAIL(&zfsmount, mnt, next);
440 
441 	free(zfsdev);
442 
443 	return (rv);
444 }
445 
446 static int
447 zfs_unmount(const char *dev, void *data)
448 {
449 	struct zfsmount *mnt = data;
450 
451 	STAILQ_REMOVE(&zfsmount, mnt, zfsmount, next);
452 	free(mnt->path);
453 	free(mnt);
454 	return (0);
455 }
456 
457 static int
458 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
459 {
460 	int fd, ret;
461 	size_t res, head, tail, total_size, full_sec_size;
462 	unsigned secsz, do_tail_read;
463 	off_t start_sec;
464 	char *outbuf, *bouncebuf;
465 
466 	fd = (uintptr_t) priv;
467 	outbuf = (char *) buf;
468 	bouncebuf = NULL;
469 
470 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
471 	if (ret != 0)
472 		return (ret);
473 
474 	/*
475 	 * Handling reads of arbitrary offset and size - multi-sector case
476 	 * and single-sector case.
477 	 *
478 	 *                        Multi-sector Case
479 	 *                (do_tail_read = true if tail > 0)
480 	 *
481 	 *   |<----------------------total_size--------------------->|
482 	 *   |                                                       |
483 	 *   |<--head-->|<--------------bytes------------>|<--tail-->|
484 	 *   |          |                                 |          |
485 	 *   |          |       |<~full_sec_size~>|       |          |
486 	 *   +------------------+                 +------------------+
487 	 *   |          |0101010|     .  .  .     |0101011|          |
488 	 *   +------------------+                 +------------------+
489 	 *         start_sec                         start_sec + n
490 	 *
491 	 *
492 	 *                      Single-sector Case
493 	 *                    (do_tail_read = false)
494 	 *
495 	 *              |<------total_size = secsz----->|
496 	 *              |                               |
497 	 *              |<-head->|<---bytes--->|<-tail->|
498 	 *              +-------------------------------+
499 	 *              |        |0101010101010|        |
500 	 *              +-------------------------------+
501 	 *                          start_sec
502 	 */
503 	start_sec = offset / secsz;
504 	head = offset % secsz;
505 	total_size = roundup2(head + bytes, secsz);
506 	tail = total_size - (head + bytes);
507 	do_tail_read = ((tail > 0) && (head + bytes > secsz));
508 	full_sec_size = total_size;
509 	if (head > 0)
510 		full_sec_size -= secsz;
511 	if (do_tail_read)
512 		full_sec_size -= secsz;
513 
514 	/* Return of partial sector data requires a bounce buffer. */
515 	if ((head > 0) || do_tail_read || bytes < secsz) {
516 		bouncebuf = malloc(secsz);
517 		if (bouncebuf == NULL) {
518 			printf("vdev_read: out of memory\n");
519 			return (ENOMEM);
520 		}
521 	}
522 
523 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
524 		ret = errno;
525 		goto error;
526 	}
527 
528 	/* Partial data return from first sector */
529 	if (head > 0) {
530 		res = read(fd, bouncebuf, secsz);
531 		if (res != secsz) {
532 			ret = EIO;
533 			goto error;
534 		}
535 		memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
536 		outbuf += min(secsz - head, bytes);
537 	}
538 
539 	/*
540 	 * Full data return from read sectors.
541 	 * Note, there is still corner case where we read
542 	 * from sector boundary, but less than sector size, e.g. reading 512B
543 	 * from 4k sector.
544 	 */
545 	if (full_sec_size > 0) {
546 		if (bytes < full_sec_size) {
547 			res = read(fd, bouncebuf, secsz);
548 			if (res != secsz) {
549 				ret = EIO;
550 				goto error;
551 			}
552 			memcpy(outbuf, bouncebuf, bytes);
553 		} else {
554 			res = read(fd, outbuf, full_sec_size);
555 			if (res != full_sec_size) {
556 				ret = EIO;
557 				goto error;
558 			}
559 			outbuf += full_sec_size;
560 		}
561 	}
562 
563 	/* Partial data return from last sector */
564 	if (do_tail_read) {
565 		res = read(fd, bouncebuf, secsz);
566 		if (res != secsz) {
567 			ret = EIO;
568 			goto error;
569 		}
570 		memcpy(outbuf, bouncebuf, secsz - tail);
571 	}
572 
573 	ret = 0;
574 error:
575 	free(bouncebuf);
576 	return (ret);
577 }
578 
579 static int
580 vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes)
581 {
582 	int fd, ret;
583 	size_t head, tail, total_size, full_sec_size;
584 	unsigned secsz, do_tail_write;
585 	off_t start_sec;
586 	ssize_t res;
587 	char *outbuf, *bouncebuf;
588 
589 	fd = (uintptr_t)vdev->v_priv;
590 	outbuf = (char *)buf;
591 	bouncebuf = NULL;
592 
593 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
594 	if (ret != 0)
595 		return (ret);
596 
597 	start_sec = offset / secsz;
598 	head = offset % secsz;
599 	total_size = roundup2(head + bytes, secsz);
600 	tail = total_size - (head + bytes);
601 	do_tail_write = ((tail > 0) && (head + bytes > secsz));
602 	full_sec_size = total_size;
603 	if (head > 0)
604 		full_sec_size -= secsz;
605 	if (do_tail_write)
606 		full_sec_size -= secsz;
607 
608 	/* Partial sector write requires a bounce buffer. */
609 	if ((head > 0) || do_tail_write || bytes < secsz) {
610 		bouncebuf = malloc(secsz);
611 		if (bouncebuf == NULL) {
612 			printf("vdev_write: out of memory\n");
613 			return (ENOMEM);
614 		}
615 	}
616 
617 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
618 		ret = errno;
619 		goto error;
620 	}
621 
622 	/* Partial data for first sector */
623 	if (head > 0) {
624 		res = read(fd, bouncebuf, secsz);
625 		if ((unsigned)res != secsz) {
626 			ret = EIO;
627 			goto error;
628 		}
629 		memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes));
630 		(void) lseek(fd, -secsz, SEEK_CUR);
631 		res = write(fd, bouncebuf, secsz);
632 		if ((unsigned)res != secsz) {
633 			ret = EIO;
634 			goto error;
635 		}
636 		outbuf += min(secsz - head, bytes);
637 	}
638 
639 	/*
640 	 * Full data write to sectors.
641 	 * Note, there is still corner case where we write
642 	 * to sector boundary, but less than sector size, e.g. write 512B
643 	 * to 4k sector.
644 	 */
645 	if (full_sec_size > 0) {
646 		if (bytes < full_sec_size) {
647 			res = read(fd, bouncebuf, secsz);
648 			if ((unsigned)res != secsz) {
649 				ret = EIO;
650 				goto error;
651 			}
652 			memcpy(bouncebuf, outbuf, bytes);
653 			(void) lseek(fd, -secsz, SEEK_CUR);
654 			res = write(fd, bouncebuf, secsz);
655 			if ((unsigned)res != secsz) {
656 				ret = EIO;
657 				goto error;
658 			}
659 		} else {
660 			res = write(fd, outbuf, full_sec_size);
661 			if ((unsigned)res != full_sec_size) {
662 				ret = EIO;
663 				goto error;
664 			}
665 			outbuf += full_sec_size;
666 		}
667 	}
668 
669 	/* Partial data write to last sector */
670 	if (do_tail_write) {
671 		res = read(fd, bouncebuf, secsz);
672 		if ((unsigned)res != secsz) {
673 			ret = EIO;
674 			goto error;
675 		}
676 		memcpy(bouncebuf, outbuf, secsz - tail);
677 		(void) lseek(fd, -secsz, SEEK_CUR);
678 		res = write(fd, bouncebuf, secsz);
679 		if ((unsigned)res != secsz) {
680 			ret = EIO;
681 			goto error;
682 		}
683 	}
684 
685 	ret = 0;
686 error:
687 	free(bouncebuf);
688 	return (ret);
689 }
690 
691 static int
692 zfs_dev_init(void)
693 {
694 	spa_t *spa;
695 	spa_t *next;
696 	spa_t *prev;
697 
698 	zfs_init();
699 	if (archsw.arch_zfs_probe == NULL)
700 		return (ENXIO);
701 	archsw.arch_zfs_probe();
702 
703 	prev = NULL;
704 	spa = STAILQ_FIRST(&zfs_pools);
705 	while (spa != NULL) {
706 		next = STAILQ_NEXT(spa, spa_link);
707 		if (zfs_spa_init(spa)) {
708 			if (prev == NULL)
709 				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
710 			else
711 				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
712 		} else
713 			prev = spa;
714 		spa = next;
715 	}
716 	return (0);
717 }
718 
719 struct zfs_probe_args {
720 	int		fd;
721 	const char	*devname;
722 	uint64_t	*pool_guid;
723 	u_int		secsz;
724 };
725 
726 static int
727 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
728 {
729 	struct zfs_probe_args *ppa;
730 
731 	ppa = (struct zfs_probe_args *)arg;
732 	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
733 	    offset * ppa->secsz, buf, blocks * ppa->secsz));
734 }
735 
736 static int
737 zfs_probe(int fd, uint64_t *pool_guid)
738 {
739 	spa_t *spa;
740 	int ret;
741 
742 	spa = NULL;
743 	ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa);
744 	if (ret == 0 && pool_guid != NULL)
745 		if (*pool_guid == 0)
746 			*pool_guid = spa->spa_guid;
747 	return (ret);
748 }
749 
750 static int
751 zfs_probe_partition(void *arg, const char *partname,
752     const struct ptable_entry *part)
753 {
754 	struct zfs_probe_args *ppa, pa;
755 	struct ptable *table;
756 	char devname[32];
757 	int ret;
758 
759 	/* Probe only freebsd-zfs and freebsd partitions */
760 	if (part->type != PART_FREEBSD &&
761 	    part->type != PART_FREEBSD_ZFS)
762 		return (0);
763 
764 	ppa = (struct zfs_probe_args *)arg;
765 	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
766 	devname[strlen(ppa->devname) - 1] = '\0';
767 	snprintf(devname, sizeof(devname), "%s%s:", devname, partname);
768 	pa.fd = open(devname, O_RDWR);
769 	if (pa.fd == -1)
770 		return (0);
771 	ret = zfs_probe(pa.fd, ppa->pool_guid);
772 	if (ret == 0)
773 		return (0);
774 	/* Do we have BSD label here? */
775 	if (part->type == PART_FREEBSD) {
776 		pa.devname = devname;
777 		pa.pool_guid = ppa->pool_guid;
778 		pa.secsz = ppa->secsz;
779 		table = ptable_open(&pa, part->end - part->start + 1,
780 		    ppa->secsz, zfs_diskread);
781 		if (table != NULL) {
782 			ptable_iterate(table, &pa, zfs_probe_partition);
783 			ptable_close(table);
784 		}
785 	}
786 	close(pa.fd);
787 	return (0);
788 }
789 
790 /*
791  * Return bootenv nvlist from pool label.
792  */
793 int
794 zfs_get_bootenv(void *vdev, nvlist_t **benvp)
795 {
796 	spa_t *spa;
797 
798 	if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
799 		return (ENXIO);
800 
801 	return (zfs_get_bootenv_spa(spa, benvp));
802 }
803 
804 /*
805  * Store nvlist to pool label bootenv area. Also updates cached pointer in spa.
806  */
807 int
808 zfs_set_bootenv(void *vdev, nvlist_t *benv)
809 {
810 	spa_t *spa;
811 
812 	if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
813 		return (ENXIO);
814 
815 	return (zfs_set_bootenv_spa(spa, benv));
816 }
817 
818 /*
819  * Get bootonce value by key. The bootonce <key, value> pair is removed
820  * from the bootenv nvlist and the remaining nvlist is committed back to disk.
821  */
822 int
823 zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size)
824 {
825 	spa_t *spa;
826 
827 	if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
828 		return (ENXIO);
829 
830 	return (zfs_get_bootonce_spa(spa, key, buf, size));
831 }
832 
833 /*
834  * nvstore backend.
835  */
836 
837 static int zfs_nvstore_setter(void *, int, const char *,
838     const void *, size_t);
839 static int zfs_nvstore_setter_str(void *, const char *, const char *,
840     const char *);
841 static int zfs_nvstore_unset_impl(void *, const char *, bool);
842 static int zfs_nvstore_setenv(void *, void *);
843 
844 /*
845  * nvstore is only present for current rootfs pool.
846  */
847 static int
848 zfs_nvstore_sethook(struct env_var *ev, int flags __unused, const void *value)
849 {
850 	struct zfs_devdesc *dev;
851 	int rv;
852 
853 	archsw.arch_getdev((void **)&dev, NULL, NULL);
854 	if (dev == NULL)
855 		return (ENXIO);
856 
857 	rv = zfs_nvstore_setter_str(dev, NULL, ev->ev_name, value);
858 
859 	free(dev);
860 	return (rv);
861 }
862 
863 /*
864  * nvstore is only present for current rootfs pool.
865  */
866 static int
867 zfs_nvstore_unsethook(struct env_var *ev)
868 {
869 	struct zfs_devdesc *dev;
870 	int rv;
871 
872 	archsw.arch_getdev((void **)&dev, NULL, NULL);
873 	if (dev == NULL)
874 		return (ENXIO);
875 
876 	rv = zfs_nvstore_unset_impl(dev, ev->ev_name, false);
877 
878 	free(dev);
879 	return (rv);
880 }
881 
882 static int
883 zfs_nvstore_getter(void *vdev, const char *name, void **data)
884 {
885 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
886 	spa_t *spa;
887 	nvlist_t *nv;
888 	char *str, **ptr;
889 	int size;
890 	int rv;
891 
892 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
893 		return (ENOTSUP);
894 
895 	if ((spa = spa_find_by_dev(dev)) == NULL)
896 		return (ENXIO);
897 
898 	if (spa->spa_bootenv == NULL)
899 		return (ENXIO);
900 
901 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
902 	    NULL, &nv, NULL) != 0)
903 		return (ENOENT);
904 
905 	rv = nvlist_find(nv, name, DATA_TYPE_STRING, NULL, &str, &size);
906 	if (rv == 0) {
907 		ptr = (char **)data;
908 		asprintf(ptr, "%.*s", size, str);
909 		if (*data == NULL)
910 			rv = ENOMEM;
911 	}
912 	nvlist_destroy(nv);
913 	return (rv);
914 }
915 
916 static int
917 zfs_nvstore_setter(void *vdev, int type, const char *name,
918     const void *data, size_t size)
919 {
920 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
921 	spa_t *spa;
922 	nvlist_t *nv;
923 	int rv;
924 	bool env_set = true;
925 
926 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
927 		return (ENOTSUP);
928 
929 	if ((spa = spa_find_by_dev(dev)) == NULL)
930 		return (ENXIO);
931 
932 	if (spa->spa_bootenv == NULL)
933 		return (ENXIO);
934 
935 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
936 	    NULL, &nv, NULL) != 0) {
937 		nv = nvlist_create(NV_UNIQUE_NAME);
938 		if (nv == NULL)
939 			return (ENOMEM);
940 	}
941 
942 	rv = 0;
943 	switch (type) {
944         case DATA_TYPE_INT8:
945 		if (size != sizeof (int8_t)) {
946 			rv = EINVAL;
947 			break;
948 		}
949 		rv = nvlist_add_int8(nv, name, *(int8_t *)data);
950 		break;
951 
952         case DATA_TYPE_INT16:
953 		if (size != sizeof (int16_t)) {
954 			rv = EINVAL;
955 			break;
956 		}
957 		rv = nvlist_add_int16(nv, name, *(int16_t *)data);
958 		break;
959 
960         case DATA_TYPE_INT32:
961 		if (size != sizeof (int32_t)) {
962 			rv = EINVAL;
963 			break;
964 		}
965 		rv = nvlist_add_int32(nv, name, *(int32_t *)data);
966 		break;
967 
968         case DATA_TYPE_INT64:
969 		if (size != sizeof (int64_t)) {
970 			rv = EINVAL;
971 			break;
972 		}
973 		rv = nvlist_add_int64(nv, name, *(int64_t *)data);
974 		break;
975 
976         case DATA_TYPE_BYTE:
977 		if (size != sizeof (uint8_t)) {
978 			rv = EINVAL;
979 			break;
980 		}
981 		rv = nvlist_add_byte(nv, name, *(int8_t *)data);
982 		break;
983 
984         case DATA_TYPE_UINT8:
985 		if (size != sizeof (uint8_t)) {
986 			rv = EINVAL;
987 			break;
988 		}
989 		rv = nvlist_add_uint8(nv, name, *(int8_t *)data);
990 		break;
991 
992         case DATA_TYPE_UINT16:
993 		if (size != sizeof (uint16_t)) {
994 			rv = EINVAL;
995 			break;
996 		}
997 		rv = nvlist_add_uint16(nv, name, *(uint16_t *)data);
998 		break;
999 
1000         case DATA_TYPE_UINT32:
1001 		if (size != sizeof (uint32_t)) {
1002 			rv = EINVAL;
1003 			break;
1004 		}
1005 		rv = nvlist_add_uint32(nv, name, *(uint32_t *)data);
1006 		break;
1007 
1008         case DATA_TYPE_UINT64:
1009 		if (size != sizeof (uint64_t)) {
1010 			rv = EINVAL;
1011 			break;
1012 		}
1013 		rv = nvlist_add_uint64(nv, name, *(uint64_t *)data);
1014 		break;
1015 
1016         case DATA_TYPE_STRING:
1017 		rv = nvlist_add_string(nv, name, data);
1018 		break;
1019 
1020 	case DATA_TYPE_BOOLEAN_VALUE:
1021 		if (size != sizeof (boolean_t)) {
1022 			rv = EINVAL;
1023 			break;
1024 		}
1025 		rv = nvlist_add_boolean_value(nv, name, *(boolean_t *)data);
1026 		break;
1027 
1028 	default:
1029 		rv = EINVAL;
1030 		break;
1031 	}
1032 
1033 	if (rv == 0) {
1034 		rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv);
1035 		if (rv == 0) {
1036 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1037 		}
1038 		if (rv == 0) {
1039 			if (env_set) {
1040 				rv = zfs_nvstore_setenv(vdev,
1041 				    nvpair_find(nv, name));
1042 			} else {
1043 				env_discard(env_getenv(name));
1044 				rv = 0;
1045 			}
1046 		}
1047 	}
1048 
1049 	nvlist_destroy(nv);
1050 	return (rv);
1051 }
1052 
1053 static int
1054 get_int64(const char *data, int64_t *ip)
1055 {
1056 	char *end;
1057 	int64_t val;
1058 
1059 	errno = 0;
1060 	val = strtoll(data, &end, 0);
1061 	if (errno != 0 || *data == '\0' || *end != '\0')
1062 		return (EINVAL);
1063 
1064 	*ip = val;
1065 	return (0);
1066 }
1067 
1068 static int
1069 get_uint64(const char *data, uint64_t *ip)
1070 {
1071 	char *end;
1072 	uint64_t val;
1073 
1074 	errno = 0;
1075 	val = strtoull(data, &end, 0);
1076 	if (errno != 0 || *data == '\0' || *end != '\0')
1077 		return (EINVAL);
1078 
1079 	*ip = val;
1080 	return (0);
1081 }
1082 
1083 /*
1084  * Translate textual data to data type. If type is not set, and we are
1085  * creating new pair, use DATA_TYPE_STRING.
1086  */
1087 static int
1088 zfs_nvstore_setter_str(void *vdev, const char *type, const char *name,
1089     const char *data)
1090 {
1091 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1092 	spa_t *spa;
1093 	nvlist_t *nv;
1094 	int rv;
1095 	data_type_t dt;
1096 	int64_t val;
1097 	uint64_t uval;
1098 
1099 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1100 		return (ENOTSUP);
1101 
1102 	if ((spa = spa_find_by_dev(dev)) == NULL)
1103 		return (ENXIO);
1104 
1105 	if (spa->spa_bootenv == NULL)
1106 		return (ENXIO);
1107 
1108 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1109 	    NULL, &nv, NULL) != 0) {
1110 		nv = NULL;
1111 	}
1112 
1113 	if (type == NULL) {
1114 		nvp_header_t *nvh;
1115 
1116 		/*
1117 		 * if there is no existing pair, default to string.
1118 		 * Otherwise, use type from existing pair.
1119 		 */
1120 		nvh = nvpair_find(nv, name);
1121 		if (nvh == NULL) {
1122 			dt = DATA_TYPE_STRING;
1123 		} else {
1124 			nv_string_t *nvp_name;
1125 			nv_pair_data_t *nvp_data;
1126 
1127 			nvp_name = (nv_string_t *)(nvh + 1);
1128 			nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1129 			    NV_ALIGN4(nvp_name->nv_size));
1130 			dt = nvp_data->nv_type;
1131 		}
1132 	} else {
1133 		dt = nvpair_type_from_name(type);
1134 	}
1135 	nvlist_destroy(nv);
1136 
1137 	rv = 0;
1138 	switch (dt) {
1139         case DATA_TYPE_INT8:
1140 		rv = get_int64(data, &val);
1141 		if (rv == 0) {
1142 			int8_t v = val;
1143 
1144 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1145 		}
1146 		break;
1147         case DATA_TYPE_INT16:
1148 		rv = get_int64(data, &val);
1149 		if (rv == 0) {
1150 			int16_t v = val;
1151 
1152 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1153 		}
1154 		break;
1155         case DATA_TYPE_INT32:
1156 		rv = get_int64(data, &val);
1157 		if (rv == 0) {
1158 			int32_t v = val;
1159 
1160 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1161 		}
1162 		break;
1163         case DATA_TYPE_INT64:
1164 		rv = get_int64(data, &val);
1165 		if (rv == 0) {
1166 			rv = zfs_nvstore_setter(vdev, dt, name, &val,
1167 			    sizeof (val));
1168 		}
1169 		break;
1170 
1171         case DATA_TYPE_BYTE:
1172 		rv = get_uint64(data, &uval);
1173 		if (rv == 0) {
1174 			uint8_t v = uval;
1175 
1176 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1177 		}
1178 		break;
1179 
1180         case DATA_TYPE_UINT8:
1181 		rv = get_uint64(data, &uval);
1182 		if (rv == 0) {
1183 			uint8_t v = uval;
1184 
1185 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1186 		}
1187 		break;
1188 
1189         case DATA_TYPE_UINT16:
1190 		rv = get_uint64(data, &uval);
1191 		if (rv == 0) {
1192 			uint16_t v = uval;
1193 
1194 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1195 		}
1196 		break;
1197 
1198         case DATA_TYPE_UINT32:
1199 		rv = get_uint64(data, &uval);
1200 		if (rv == 0) {
1201 			uint32_t v = uval;
1202 
1203 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1204 		}
1205 		break;
1206 
1207         case DATA_TYPE_UINT64:
1208 		rv = get_uint64(data, &uval);
1209 		if (rv == 0) {
1210 			rv = zfs_nvstore_setter(vdev, dt, name, &uval,
1211 			    sizeof (uval));
1212 		}
1213 		break;
1214 
1215         case DATA_TYPE_STRING:
1216 		rv = zfs_nvstore_setter(vdev, dt, name, data, strlen(data) + 1);
1217 		break;
1218 
1219 	case DATA_TYPE_BOOLEAN_VALUE:
1220 		rv = get_int64(data, &val);
1221 		if (rv == 0) {
1222 			boolean_t v = val;
1223 
1224 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1225 		}
1226 
1227 	default:
1228 		rv = EINVAL;
1229 	}
1230 	return (rv);
1231 }
1232 
1233 static int
1234 zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env)
1235 {
1236 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1237 	spa_t *spa;
1238 	nvlist_t *nv;
1239 	int rv;
1240 
1241 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1242 		return (ENOTSUP);
1243 
1244 	if ((spa = spa_find_by_dev(dev)) == NULL)
1245 		return (ENXIO);
1246 
1247 	if (spa->spa_bootenv == NULL)
1248 		return (ENXIO);
1249 
1250 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1251 	    NULL, &nv, NULL) != 0)
1252 		return (ENOENT);
1253 
1254 	rv = nvlist_remove(nv, name, DATA_TYPE_UNKNOWN);
1255 	if (rv == 0) {
1256 		if (nvlist_next_nvpair(nv, NULL) == NULL) {
1257 			rv = nvlist_remove(spa->spa_bootenv, OS_NVSTORE,
1258 			    DATA_TYPE_NVLIST);
1259 		} else {
1260 			rv = nvlist_add_nvlist(spa->spa_bootenv,
1261 			    OS_NVSTORE, nv);
1262 		}
1263 		if (rv == 0)
1264 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1265 	}
1266 
1267 	if (unset_env) {
1268 		struct env_var *ev = env_getenv(name);
1269 
1270 		if (ev != NULL)
1271 			env_discard(ev);
1272 	}
1273 	return (rv);
1274 }
1275 
1276 static int
1277 zfs_nvstore_unset(void *vdev, const char *name)
1278 {
1279 	return (zfs_nvstore_unset_impl(vdev, name, true));
1280 }
1281 
1282 static int
1283 zfs_nvstore_print(void *vdev __unused, void *ptr)
1284 {
1285 
1286 	nvpair_print(ptr, 0);
1287 	return (0);
1288 }
1289 
1290 /*
1291  * Create environment variable from nvpair.
1292  * set hook will update nvstore with new value, unset hook will remove
1293  * variable from nvstore.
1294  */
1295 static int
1296 zfs_nvstore_setenv(void *vdev __unused, void *ptr)
1297 {
1298 	nvp_header_t *nvh = ptr;
1299 	nv_string_t *nvp_name, *nvp_value;
1300 	nv_pair_data_t *nvp_data;
1301 	char *name, *value;
1302 	int rv = 0;
1303 
1304 	if (nvh == NULL)
1305 		return (ENOENT);
1306 
1307 	nvp_name = (nv_string_t *)(nvh + 1);
1308 	nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1309 	    NV_ALIGN4(nvp_name->nv_size));
1310 
1311 	if ((name = nvstring_get(nvp_name)) == NULL)
1312 		return (ENOMEM);
1313 
1314 	value = NULL;
1315 	switch (nvp_data->nv_type) {
1316 	case DATA_TYPE_BYTE:
1317 	case DATA_TYPE_UINT8:
1318 		(void) asprintf(&value, "%uc",
1319 		    *(unsigned *)&nvp_data->nv_data[0]);
1320 		if (value == NULL)
1321 			rv = ENOMEM;
1322 		break;
1323 
1324 	case DATA_TYPE_INT8:
1325 		(void) asprintf(&value, "%c", *(int *)&nvp_data->nv_data[0]);
1326 		if (value == NULL)
1327 			rv = ENOMEM;
1328 		break;
1329 
1330 	case DATA_TYPE_INT16:
1331 		(void) asprintf(&value, "%hd", *(short *)&nvp_data->nv_data[0]);
1332 		if (value == NULL)
1333 			rv = ENOMEM;
1334 		break;
1335 
1336 	case DATA_TYPE_UINT16:
1337 		(void) asprintf(&value, "%hu",
1338 		    *(unsigned short *)&nvp_data->nv_data[0]);
1339 		if (value == NULL)
1340 			rv = ENOMEM;
1341 		break;
1342 
1343 	case DATA_TYPE_BOOLEAN_VALUE:
1344 	case DATA_TYPE_INT32:
1345 		(void) asprintf(&value, "%d", *(int *)&nvp_data->nv_data[0]);
1346 		if (value == NULL)
1347 			rv = ENOMEM;
1348 		break;
1349 
1350 	case DATA_TYPE_UINT32:
1351 		(void) asprintf(&value, "%u",
1352 		    *(unsigned *)&nvp_data->nv_data[0]);
1353 		if (value == NULL)
1354 			rv = ENOMEM;
1355 		break;
1356 
1357 	case DATA_TYPE_INT64:
1358 		(void) asprintf(&value, "%jd",
1359 		    (intmax_t)*(int64_t *)&nvp_data->nv_data[0]);
1360 		if (value == NULL)
1361 			rv = ENOMEM;
1362 		break;
1363 
1364 	case DATA_TYPE_UINT64:
1365 		(void) asprintf(&value, "%ju",
1366 		    (uintmax_t)*(uint64_t *)&nvp_data->nv_data[0]);
1367 		if (value == NULL)
1368 			rv = ENOMEM;
1369 		break;
1370 
1371 	case DATA_TYPE_STRING:
1372 		nvp_value = (nv_string_t *)&nvp_data->nv_data[0];
1373 		if ((value = nvstring_get(nvp_value)) == NULL) {
1374 			rv = ENOMEM;
1375 			break;
1376 		}
1377 		break;
1378 
1379 	default:
1380 		rv = EINVAL;
1381 		break;
1382 	}
1383 
1384 	if (value != NULL) {
1385 		rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, value,
1386 		    zfs_nvstore_sethook, zfs_nvstore_unsethook);
1387 		free(value);
1388 	}
1389 	free(name);
1390 	return (rv);
1391 }
1392 
1393 static int
1394 zfs_nvstore_iterate(void *vdev, int (*cb)(void *, void *))
1395 {
1396 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1397 	spa_t *spa;
1398 	nvlist_t *nv;
1399 	nvp_header_t *nvh;
1400 	int rv;
1401 
1402 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1403 		return (ENOTSUP);
1404 
1405 	if ((spa = spa_find_by_dev(dev)) == NULL)
1406 		return (ENXIO);
1407 
1408 	if (spa->spa_bootenv == NULL)
1409 		return (ENXIO);
1410 
1411 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1412 	    NULL, &nv, NULL) != 0)
1413 		return (ENOENT);
1414 
1415 	rv = 0;
1416 	nvh = NULL;
1417 	while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) {
1418 		rv = cb(vdev, nvh);
1419 		if (rv != 0)
1420 			break;
1421 	}
1422 	return (rv);
1423 }
1424 
1425 nvs_callbacks_t nvstore_zfs_cb = {
1426 	.nvs_getter = zfs_nvstore_getter,
1427 	.nvs_setter = zfs_nvstore_setter,
1428 	.nvs_setter_str = zfs_nvstore_setter_str,
1429 	.nvs_unset = zfs_nvstore_unset,
1430 	.nvs_print = zfs_nvstore_print,
1431 	.nvs_iterate = zfs_nvstore_iterate
1432 };
1433 
1434 int
1435 zfs_attach_nvstore(void *vdev)
1436 {
1437 	struct zfs_devdesc *dev = vdev;
1438 	spa_t *spa;
1439 	uint64_t version;
1440 	int rv;
1441 
1442 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1443 		return (ENOTSUP);
1444 
1445 	if ((spa = spa_find_by_dev(dev)) == NULL)
1446 		return (ENXIO);
1447 
1448 	rv = nvlist_find(spa->spa_bootenv, BOOTENV_VERSION, DATA_TYPE_UINT64,
1449 	    NULL, &version, NULL);
1450 
1451 	if (rv != 0 || version != VB_NVLIST) {
1452 		return (ENXIO);
1453 	}
1454 
1455 	dev = malloc(sizeof (*dev));
1456 	if (dev == NULL)
1457 		return (ENOMEM);
1458 	memcpy(dev, vdev, sizeof (*dev));
1459 
1460 	rv = nvstore_init(spa->spa_name, &nvstore_zfs_cb, dev);
1461 	if (rv != 0)
1462 		free(dev);
1463 	else
1464 		rv = zfs_nvstore_iterate(dev, zfs_nvstore_setenv);
1465 	return (rv);
1466 }
1467 
1468 int
1469 zfs_probe_dev(const char *devname, uint64_t *pool_guid, bool parts_too)
1470 {
1471 	struct ptable *table;
1472 	struct zfs_probe_args pa;
1473 	uint64_t mediasz;
1474 	int ret;
1475 
1476 	if (pool_guid)
1477 		*pool_guid = 0;
1478 	pa.fd = open(devname, O_RDWR);
1479 	if (pa.fd == -1)
1480 		return (ENXIO);
1481 	/* Probe the whole disk */
1482 	ret = zfs_probe(pa.fd, pool_guid);
1483 	if (ret == 0)
1484 		return (0);
1485 	if (!parts_too)
1486 		return (ENXIO);
1487 
1488 	/* Probe each partition */
1489 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
1490 	if (ret == 0)
1491 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
1492 	if (ret == 0) {
1493 		pa.devname = devname;
1494 		pa.pool_guid = pool_guid;
1495 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
1496 		    zfs_diskread);
1497 		if (table != NULL) {
1498 			ptable_iterate(table, &pa, zfs_probe_partition);
1499 			ptable_close(table);
1500 		}
1501 	}
1502 	close(pa.fd);
1503 	if (pool_guid && *pool_guid == 0)
1504 		ret = ENXIO;
1505 	return (ret);
1506 }
1507 
1508 /*
1509  * Print information about ZFS pools
1510  */
1511 static int
1512 zfs_dev_print(int verbose)
1513 {
1514 	spa_t *spa;
1515 	char line[80];
1516 	int ret = 0;
1517 
1518 	if (STAILQ_EMPTY(&zfs_pools))
1519 		return (0);
1520 
1521 	printf("%s devices:", zfs_dev.dv_name);
1522 	if ((ret = pager_output("\n")) != 0)
1523 		return (ret);
1524 
1525 	if (verbose) {
1526 		return (spa_all_status());
1527 	}
1528 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
1529 		snprintf(line, sizeof(line), "    zfs:%s\n", spa->spa_name);
1530 		ret = pager_output(line);
1531 		if (ret != 0)
1532 			break;
1533 	}
1534 	return (ret);
1535 }
1536 
1537 /*
1538  * Attempt to open the pool described by (dev) for use by (f).
1539  */
1540 static int
1541 zfs_dev_open(struct open_file *f, ...)
1542 {
1543 	va_list		args;
1544 	struct zfs_devdesc	*dev;
1545 	struct zfsmount	*mount;
1546 	spa_t		*spa;
1547 	int		rv;
1548 
1549 	va_start(args, f);
1550 	dev = va_arg(args, struct zfs_devdesc *);
1551 	va_end(args);
1552 
1553 	if ((spa = spa_find_by_dev(dev)) == NULL)
1554 		return (ENXIO);
1555 
1556 	STAILQ_FOREACH(mount, &zfsmount, next) {
1557 		if (spa->spa_guid == mount->spa->spa_guid)
1558 			break;
1559 	}
1560 
1561 	rv = 0;
1562 	/* This device is not set as currdev, mount us private copy. */
1563 	if (mount == NULL)
1564 		rv = zfs_mount(devformat(&dev->dd), NULL, (void **)&mount);
1565 
1566 	if (rv == 0) {
1567 		dev->dd.d_opendata = mount;
1568 	}
1569 	return (rv);
1570 }
1571 
1572 static int
1573 zfs_dev_close(struct open_file *f)
1574 {
1575 	struct devdesc *dev;
1576 	struct zfsmount	*mnt, *mount;
1577 
1578 	dev = f->f_devdata;
1579 	mnt = dev->d_opendata;
1580 
1581 	STAILQ_FOREACH(mount, &zfsmount, next) {
1582 		if (mnt->spa->spa_guid == mount->spa->spa_guid)
1583 			break;
1584 	}
1585 
1586 	/* XXX */
1587 	return (0);
1588 }
1589 
1590 static int
1591 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
1592 {
1593 
1594 	return (ENOSYS);
1595 }
1596 
1597 struct devsw zfs_dev = {
1598 	.dv_name = "zfs",
1599 	.dv_type = DEVT_ZFS,
1600 	.dv_init = zfs_dev_init,
1601 	.dv_strategy = zfs_dev_strategy,
1602 	.dv_open = zfs_dev_open,
1603 	.dv_close = zfs_dev_close,
1604 	.dv_ioctl = noioctl,
1605 	.dv_print = zfs_dev_print,
1606 	.dv_cleanup = nullsys,
1607 	.dv_fmtdev = zfs_fmtdev,
1608 	.dv_parsedev = zfs_parsedev,
1609 };
1610 
1611 static int
1612 zfs_parsedev(struct devdesc **idev, const char *devspec, const char **path)
1613 {
1614 	static char	rootname[ZFS_MAXNAMELEN];
1615 	static char	poolname[ZFS_MAXNAMELEN];
1616 	spa_t		*spa;
1617 	const char	*end;
1618 	const char	*np;
1619 	const char	*sep;
1620 	int		rv;
1621 	struct zfs_devdesc *dev;
1622 
1623 	np = devspec + 3;			/* Skip the leading 'zfs' */
1624 	if (*np != ':')
1625 		return (EINVAL);
1626 	np++;
1627 	end = strrchr(np, ':');
1628 	if (end == NULL)
1629 		return (EINVAL);
1630 	sep = strchr(np, '/');
1631 	if (sep == NULL || sep >= end)
1632 		sep = end;
1633 	memcpy(poolname, np, sep - np);
1634 	poolname[sep - np] = '\0';
1635 	if (sep < end) {
1636 		sep++;
1637 		memcpy(rootname, sep, end - sep);
1638 		rootname[end - sep] = '\0';
1639 	}
1640 	else
1641 		rootname[0] = '\0';
1642 
1643 	spa = spa_find_by_name(poolname);
1644 	if (!spa)
1645 		return (ENXIO);
1646 	dev = malloc(sizeof(*dev));
1647 	if (dev == NULL)
1648 		return (ENOMEM);
1649 	dev->pool_guid = spa->spa_guid;
1650 	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
1651 	if (rv != 0) {
1652 		free(dev);
1653 		return (rv);
1654 	}
1655 	if (path != NULL)
1656 		*path = (*end == '\0') ? end : end + 1;
1657 	dev->dd.d_dev = &zfs_dev;
1658 	*idev = &dev->dd;
1659 	return (0);
1660 }
1661 
1662 char *
1663 zfs_fmtdev(struct devdesc *vdev)
1664 {
1665 	static char		rootname[ZFS_MAXNAMELEN];
1666 	static char		buf[2 * ZFS_MAXNAMELEN + 8];
1667 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
1668 	spa_t			*spa;
1669 
1670 	buf[0] = '\0';
1671 	if (vdev->d_dev->dv_type != DEVT_ZFS)
1672 		return (buf);
1673 
1674 	/* Do we have any pools? */
1675 	spa = STAILQ_FIRST(&zfs_pools);
1676 	if (spa == NULL)
1677 		return (buf);
1678 
1679 	if (dev->pool_guid == 0)
1680 		dev->pool_guid = spa->spa_guid;
1681 	else
1682 		spa = spa_find_by_guid(dev->pool_guid);
1683 
1684 	if (spa == NULL) {
1685 		printf("ZFS: can't find pool by guid\n");
1686 		return (buf);
1687 	}
1688 	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
1689 		printf("ZFS: can't find root filesystem\n");
1690 		return (buf);
1691 	}
1692 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
1693 		printf("ZFS: can't find filesystem by guid\n");
1694 		return (buf);
1695 	}
1696 
1697 	if (rootname[0] == '\0')
1698 		snprintf(buf, sizeof(buf), "%s:%s:", dev->dd.d_dev->dv_name,
1699 		    spa->spa_name);
1700 	else
1701 		snprintf(buf, sizeof(buf), "%s:%s/%s:", dev->dd.d_dev->dv_name,
1702 		    spa->spa_name, rootname);
1703 	return (buf);
1704 }
1705 
1706 static int
1707 split_devname(const char *name, char *poolname, size_t size,
1708     const char **dsnamep)
1709 {
1710 	const char *dsname;
1711 	size_t len;
1712 
1713 	ASSERT(name != NULL);
1714 	ASSERT(poolname != NULL);
1715 
1716 	len = strlen(name);
1717 	dsname = strchr(name, '/');
1718 	if (dsname != NULL) {
1719 		len = dsname - name;
1720 		dsname++;
1721 	} else
1722 		dsname = "";
1723 
1724 	if (len + 1 > size)
1725 		return (EINVAL);
1726 
1727 	strlcpy(poolname, name, len + 1);
1728 
1729 	if (dsnamep != NULL)
1730 		*dsnamep = dsname;
1731 
1732 	return (0);
1733 }
1734 
1735 int
1736 zfs_list(const char *name)
1737 {
1738 	static char	poolname[ZFS_MAXNAMELEN];
1739 	uint64_t	objid;
1740 	spa_t		*spa;
1741 	const char	*dsname;
1742 	int		rv;
1743 
1744 	if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1745 		return (EINVAL);
1746 
1747 	spa = spa_find_by_name(poolname);
1748 	if (!spa)
1749 		return (ENXIO);
1750 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1751 	if (rv != 0)
1752 		return (rv);
1753 
1754 	return (zfs_list_dataset(spa, objid));
1755 }
1756 
1757 void
1758 init_zfs_boot_options(const char *currdev_in)
1759 {
1760 	char poolname[ZFS_MAXNAMELEN];
1761 	char *beroot, *currdev;
1762 	spa_t *spa;
1763 	int currdev_len;
1764 	const char *dsname;
1765 
1766 	currdev = NULL;
1767 	currdev_len = strlen(currdev_in);
1768 	if (currdev_len == 0)
1769 		return;
1770 	if (strncmp(currdev_in, "zfs:", 4) != 0)
1771 		return;
1772 	currdev = strdup(currdev_in);
1773 	if (currdev == NULL)
1774 		return;
1775 	/* Remove the trailing : */
1776 	currdev[currdev_len - 1] = '\0';
1777 
1778 	setenv("zfs_be_active", currdev, 1);
1779 	setenv("zfs_be_currpage", "1", 1);
1780 	/* Remove the last element (current bootenv) */
1781 	beroot = strrchr(currdev, '/');
1782 	if (beroot != NULL)
1783 		beroot[0] = '\0';
1784 	beroot = strchr(currdev, ':') + 1;
1785 	setenv("zfs_be_root", beroot, 1);
1786 
1787 	if (split_devname(beroot, poolname, sizeof(poolname), &dsname) != 0)
1788 		return;
1789 
1790 	spa = spa_find_by_name(poolname);
1791 	if (spa == NULL)
1792 		return;
1793 
1794 	zfs_bootenv_initial("bootenvs", spa, beroot, dsname, 0);
1795 	zfs_checkpoints_initial(spa, beroot, dsname);
1796 
1797 	free(currdev);
1798 }
1799 
1800 static void
1801 zfs_checkpoints_initial(spa_t *spa, const char *name, const char *dsname)
1802 {
1803 	char envname[32];
1804 
1805 	if (spa->spa_uberblock_checkpoint.ub_checkpoint_txg != 0) {
1806 		snprintf(envname, sizeof(envname), "zpool_checkpoint");
1807 		setenv(envname, name, 1);
1808 
1809 		spa->spa_uberblock = &spa->spa_uberblock_checkpoint;
1810 		spa->spa_mos = &spa->spa_mos_checkpoint;
1811 
1812 		zfs_bootenv_initial("bootenvs_check", spa, name, dsname, 1);
1813 
1814 		spa->spa_uberblock = &spa->spa_uberblock_master;
1815 		spa->spa_mos = &spa->spa_mos_master;
1816 	}
1817 }
1818 
1819 static void
1820 zfs_bootenv_initial(const char *envprefix, spa_t *spa, const char *rootname,
1821    const char *dsname, int checkpoint)
1822 {
1823 	char		envname[32], envval[256];
1824 	uint64_t	objid;
1825 	int		bootenvs_idx, rv;
1826 
1827 	SLIST_INIT(&zfs_be_head);
1828 	zfs_env_count = 0;
1829 
1830 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1831 	if (rv != 0)
1832 		return;
1833 
1834 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1835 	bootenvs_idx = 0;
1836 	/* Populate the initial environment variables */
1837 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1838 		/* Enumerate all bootenvs for general usage */
1839 		snprintf(envname, sizeof(envname), "%s[%d]",
1840 		    envprefix, bootenvs_idx);
1841 		snprintf(envval, sizeof(envval), "zfs:%s%s/%s",
1842 		    checkpoint ? "!" : "", rootname, zfs_be->name);
1843 		rv = setenv(envname, envval, 1);
1844 		if (rv != 0)
1845 			break;
1846 		bootenvs_idx++;
1847 	}
1848 	snprintf(envname, sizeof(envname), "%s_count", envprefix);
1849 	snprintf(envval, sizeof(envval), "%d", bootenvs_idx);
1850 	setenv(envname, envval, 1);
1851 
1852 	/* Clean up the SLIST of ZFS BEs */
1853 	while (!SLIST_EMPTY(&zfs_be_head)) {
1854 		zfs_be = SLIST_FIRST(&zfs_be_head);
1855 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1856 		free(zfs_be->name);
1857 		free(zfs_be);
1858 	}
1859 }
1860 
1861 int
1862 zfs_bootenv(const char *name)
1863 {
1864 	char		poolname[ZFS_MAXNAMELEN], *root;
1865 	const char	*dsname;
1866 	char		becount[4];
1867 	uint64_t	objid;
1868 	spa_t		*spa;
1869 	int		rv, pages, perpage, currpage;
1870 
1871 	if (name == NULL)
1872 		return (EINVAL);
1873 	if ((root = getenv("zfs_be_root")) == NULL)
1874 		return (EINVAL);
1875 
1876 	if (strcmp(name, root) != 0) {
1877 		if (setenv("zfs_be_root", name, 1) != 0)
1878 			return (ENOMEM);
1879 	}
1880 
1881 	SLIST_INIT(&zfs_be_head);
1882 	zfs_env_count = 0;
1883 
1884 	if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1885 		return (EINVAL);
1886 
1887 	spa = spa_find_by_name(poolname);
1888 	if (!spa)
1889 		return (ENXIO);
1890 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1891 	if (rv != 0)
1892 		return (rv);
1893 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1894 
1895 	/* Calculate and store the number of pages of BEs */
1896 	perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
1897 	pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
1898 	snprintf(becount, 4, "%d", pages);
1899 	if (setenv("zfs_be_pages", becount, 1) != 0)
1900 		return (ENOMEM);
1901 
1902 	/* Roll over the page counter if it has exceeded the maximum */
1903 	currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
1904 	if (currpage > pages) {
1905 		if (setenv("zfs_be_currpage", "1", 1) != 0)
1906 			return (ENOMEM);
1907 	}
1908 
1909 	/* Populate the menu environment variables */
1910 	zfs_set_env();
1911 
1912 	/* Clean up the SLIST of ZFS BEs */
1913 	while (!SLIST_EMPTY(&zfs_be_head)) {
1914 		zfs_be = SLIST_FIRST(&zfs_be_head);
1915 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1916 		free(zfs_be->name);
1917 		free(zfs_be);
1918 	}
1919 
1920 	return (rv);
1921 }
1922 
1923 int
1924 zfs_belist_add(const char *name, uint64_t value __unused)
1925 {
1926 
1927 	/* Skip special datasets that start with a $ character */
1928 	if (strncmp(name, "$", 1) == 0) {
1929 		return (0);
1930 	}
1931 	/* Add the boot environment to the head of the SLIST */
1932 	zfs_be = malloc(sizeof(struct zfs_be_entry));
1933 	if (zfs_be == NULL) {
1934 		return (ENOMEM);
1935 	}
1936 	zfs_be->name = strdup(name);
1937 	if (zfs_be->name == NULL) {
1938 		free(zfs_be);
1939 		return (ENOMEM);
1940 	}
1941 	SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
1942 	zfs_env_count++;
1943 
1944 	return (0);
1945 }
1946 
1947 int
1948 zfs_set_env(void)
1949 {
1950 	char envname[32], envval[256];
1951 	char *beroot, *pagenum;
1952 	int rv, page, ctr;
1953 
1954 	beroot = getenv("zfs_be_root");
1955 	if (beroot == NULL) {
1956 		return (1);
1957 	}
1958 
1959 	pagenum = getenv("zfs_be_currpage");
1960 	if (pagenum != NULL) {
1961 		page = strtol(pagenum, NULL, 10);
1962 	} else {
1963 		page = 1;
1964 	}
1965 
1966 	ctr = 1;
1967 	rv = 0;
1968 	zfs_env_index = ZFS_BE_FIRST;
1969 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1970 		/* Skip to the requested page number */
1971 		if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
1972 			ctr++;
1973 			continue;
1974 		}
1975 
1976 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1977 		snprintf(envval, sizeof(envval), "%s", zfs_be->name);
1978 		rv = setenv(envname, envval, 1);
1979 		if (rv != 0) {
1980 			break;
1981 		}
1982 
1983 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
1984 		rv = setenv(envname, envval, 1);
1985 		if (rv != 0){
1986 			break;
1987 		}
1988 
1989 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
1990 		rv = setenv(envname, "set_bootenv", 1);
1991 		if (rv != 0){
1992 			break;
1993 		}
1994 
1995 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
1996 		snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
1997 		rv = setenv(envname, envval, 1);
1998 		if (rv != 0){
1999 			break;
2000 		}
2001 
2002 		zfs_env_index++;
2003 		if (zfs_env_index > ZFS_BE_LAST) {
2004 			break;
2005 		}
2006 
2007 	}
2008 
2009 	for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
2010 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
2011 		(void)unsetenv(envname);
2012 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
2013 		(void)unsetenv(envname);
2014 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
2015 		(void)unsetenv(envname);
2016 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
2017 		(void)unsetenv(envname);
2018 	}
2019 
2020 	return (rv);
2021 }
2022