xref: /freebsd/stand/libsa/zfs/zfs.c (revision 258a0d760aa8b42899a000e30f610f900a402556)
1 /*-
2  * Copyright (c) 2007 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  *	$FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /*
33  *	Stand-alone file reading package.
34  */
35 
36 #include <stand.h>
37 #include <sys/disk.h>
38 #include <sys/param.h>
39 #include <sys/time.h>
40 #include <sys/queue.h>
41 #include <part.h>
42 #include <stddef.h>
43 #include <stdarg.h>
44 #include <string.h>
45 #include <bootstrap.h>
46 
47 #include "libzfs.h"
48 
49 #include "zfsimpl.c"
50 
51 /* Define the range of indexes to be populated with ZFS Boot Environments */
52 #define		ZFS_BE_FIRST	4
53 #define		ZFS_BE_LAST	8
54 
55 static int	zfs_open(const char *path, struct open_file *f);
56 static int	zfs_close(struct open_file *f);
57 static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
58 static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
59 static int	zfs_stat(struct open_file *f, struct stat *sb);
60 static int	zfs_readdir(struct open_file *f, struct dirent *d);
61 static int	zfs_mount(const char *dev, const char *path, void **data);
62 static int	zfs_unmount(const char *dev, void *data);
63 
64 static void	zfs_bootenv_initial(const char *envname, spa_t *spa,
65 		    const char *name, const char *dsname, int checkpoint);
66 static void	zfs_checkpoints_initial(spa_t *spa, const char *name,
67 		    const char *dsname);
68 
69 static int	zfs_parsedev(struct devdesc **idev, const char *devspec,
70 		    const char **path);
71 
72 struct devsw zfs_dev;
73 
74 struct fs_ops zfs_fsops = {
75 	.fs_name = "zfs",
76 	.fo_open = zfs_open,
77 	.fo_close = zfs_close,
78 	.fo_read = zfs_read,
79 	.fo_write = null_write,
80 	.fo_seek = zfs_seek,
81 	.fo_stat = zfs_stat,
82 	.fo_readdir = zfs_readdir,
83 	.fo_mount = zfs_mount,
84 	.fo_unmount = zfs_unmount
85 };
86 
87 /*
88  * In-core open file.
89  */
90 struct file {
91 	off_t		f_seekp;	/* seek pointer */
92 	dnode_phys_t	f_dnode;
93 	uint64_t	f_zap_type;	/* zap type for readdir */
94 	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
95 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
96 };
97 
98 static int	zfs_env_index;
99 static int	zfs_env_count;
100 
101 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
102 struct zfs_be_list *zfs_be_headp;
103 struct zfs_be_entry {
104 	char *name;
105 	SLIST_ENTRY(zfs_be_entry) entries;
106 } *zfs_be, *zfs_be_tmp;
107 
108 /*
109  * Open a file.
110  */
111 static int
112 zfs_open(const char *upath, struct open_file *f)
113 {
114 	struct devdesc *dev = f->f_devdata;
115 	struct zfsmount *mount = dev->d_opendata;
116 	struct file *fp;
117 	int rc;
118 
119 	if (f->f_dev != &zfs_dev)
120 		return (EINVAL);
121 
122 	/* allocate file system specific data structure */
123 	fp = calloc(1, sizeof(struct file));
124 	if (fp == NULL)
125 		return (ENOMEM);
126 	f->f_fsdata = fp;
127 
128 	rc = zfs_lookup(mount, upath, &fp->f_dnode);
129 	fp->f_seekp = 0;
130 	if (rc) {
131 		f->f_fsdata = NULL;
132 		free(fp);
133 	}
134 	return (rc);
135 }
136 
137 static int
138 zfs_close(struct open_file *f)
139 {
140 	struct file *fp = (struct file *)f->f_fsdata;
141 
142 	dnode_cache_obj = NULL;
143 	f->f_fsdata = NULL;
144 
145 	free(fp);
146 	return (0);
147 }
148 
149 /*
150  * Copy a portion of a file into kernel memory.
151  * Cross block boundaries when necessary.
152  */
153 static int
154 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
155 {
156 	struct devdesc *dev = f->f_devdata;
157 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
158 	struct file *fp = (struct file *)f->f_fsdata;
159 	struct stat sb;
160 	size_t n;
161 	int rc;
162 
163 	rc = zfs_stat(f, &sb);
164 	if (rc)
165 		return (rc);
166 	n = size;
167 	if (fp->f_seekp + n > sb.st_size)
168 		n = sb.st_size - fp->f_seekp;
169 
170 	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
171 	if (rc)
172 		return (rc);
173 
174 	if (0) {
175 	    int i;
176 	    for (i = 0; i < n; i++)
177 		putchar(((char*) start)[i]);
178 	}
179 	fp->f_seekp += n;
180 	if (resid)
181 		*resid = size - n;
182 
183 	return (0);
184 }
185 
186 static off_t
187 zfs_seek(struct open_file *f, off_t offset, int where)
188 {
189 	struct file *fp = (struct file *)f->f_fsdata;
190 
191 	switch (where) {
192 	case SEEK_SET:
193 		fp->f_seekp = offset;
194 		break;
195 	case SEEK_CUR:
196 		fp->f_seekp += offset;
197 		break;
198 	case SEEK_END:
199 	    {
200 		struct stat sb;
201 		int error;
202 
203 		error = zfs_stat(f, &sb);
204 		if (error != 0) {
205 			errno = error;
206 			return (-1);
207 		}
208 		fp->f_seekp = sb.st_size - offset;
209 		break;
210 	    }
211 	default:
212 		errno = EINVAL;
213 		return (-1);
214 	}
215 	return (fp->f_seekp);
216 }
217 
218 static int
219 zfs_stat(struct open_file *f, struct stat *sb)
220 {
221 	struct devdesc *dev = f->f_devdata;
222 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
223 	struct file *fp = (struct file *)f->f_fsdata;
224 
225 	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
226 }
227 
228 static int
229 zfs_readdir(struct open_file *f, struct dirent *d)
230 {
231 	struct devdesc *dev = f->f_devdata;
232 	const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa;
233 	struct file *fp = (struct file *)f->f_fsdata;
234 	mzap_ent_phys_t mze;
235 	struct stat sb;
236 	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
237 	int rc;
238 
239 	rc = zfs_stat(f, &sb);
240 	if (rc)
241 		return (rc);
242 	if (!S_ISDIR(sb.st_mode))
243 		return (ENOTDIR);
244 
245 	/*
246 	 * If this is the first read, get the zap type.
247 	 */
248 	if (fp->f_seekp == 0) {
249 		rc = dnode_read(spa, &fp->f_dnode,
250 				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
251 		if (rc)
252 			return (rc);
253 
254 		if (fp->f_zap_type == ZBT_MICRO) {
255 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
256 		} else {
257 			rc = dnode_read(spa, &fp->f_dnode,
258 					offsetof(zap_phys_t, zap_num_leafs),
259 					&fp->f_num_leafs,
260 					sizeof(fp->f_num_leafs));
261 			if (rc)
262 				return (rc);
263 
264 			fp->f_seekp = bsize;
265 			fp->f_zap_leaf = malloc(bsize);
266 			if (fp->f_zap_leaf == NULL)
267 				return (ENOMEM);
268 			rc = dnode_read(spa, &fp->f_dnode,
269 					fp->f_seekp,
270 					fp->f_zap_leaf,
271 					bsize);
272 			if (rc)
273 				return (rc);
274 		}
275 	}
276 
277 	if (fp->f_zap_type == ZBT_MICRO) {
278 	mzap_next:
279 		if (fp->f_seekp >= bsize)
280 			return (ENOENT);
281 
282 		rc = dnode_read(spa, &fp->f_dnode,
283 				fp->f_seekp, &mze, sizeof(mze));
284 		if (rc)
285 			return (rc);
286 		fp->f_seekp += sizeof(mze);
287 
288 		if (!mze.mze_name[0])
289 			goto mzap_next;
290 
291 		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
292 		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
293 		strcpy(d->d_name, mze.mze_name);
294 		d->d_namlen = strlen(d->d_name);
295 		return (0);
296 	} else {
297 		zap_leaf_t zl;
298 		zap_leaf_chunk_t *zc, *nc;
299 		int chunk;
300 		size_t namelen;
301 		char *p;
302 		uint64_t value;
303 
304 		/*
305 		 * Initialise this so we can use the ZAP size
306 		 * calculating macros.
307 		 */
308 		zl.l_bs = ilog2(bsize);
309 		zl.l_phys = fp->f_zap_leaf;
310 
311 		/*
312 		 * Figure out which chunk we are currently looking at
313 		 * and consider seeking to the next leaf. We use the
314 		 * low bits of f_seekp as a simple chunk index.
315 		 */
316 	fzap_next:
317 		chunk = fp->f_seekp & (bsize - 1);
318 		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
319 			fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
320 			chunk = 0;
321 
322 			/*
323 			 * Check for EOF and read the new leaf.
324 			 */
325 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
326 				return (ENOENT);
327 
328 			rc = dnode_read(spa, &fp->f_dnode,
329 					fp->f_seekp,
330 					fp->f_zap_leaf,
331 					bsize);
332 			if (rc)
333 				return (rc);
334 		}
335 
336 		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
337 		fp->f_seekp++;
338 		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
339 			goto fzap_next;
340 
341 		namelen = zc->l_entry.le_name_numints;
342 		if (namelen > sizeof(d->d_name))
343 			namelen = sizeof(d->d_name);
344 
345 		/*
346 		 * Paste the name back together.
347 		 */
348 		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
349 		p = d->d_name;
350 		while (namelen > 0) {
351 			int len;
352 			len = namelen;
353 			if (len > ZAP_LEAF_ARRAY_BYTES)
354 				len = ZAP_LEAF_ARRAY_BYTES;
355 			memcpy(p, nc->l_array.la_array, len);
356 			p += len;
357 			namelen -= len;
358 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
359 		}
360 		d->d_name[sizeof(d->d_name) - 1] = 0;
361 
362 		/*
363 		 * Assume the first eight bytes of the value are
364 		 * a uint64_t.
365 		 */
366 		value = fzap_leaf_value(&zl, zc);
367 
368 		d->d_fileno = ZFS_DIRENT_OBJ(value);
369 		d->d_type = ZFS_DIRENT_TYPE(value);
370 		d->d_namlen = strlen(d->d_name);
371 
372 		return (0);
373 	}
374 }
375 
376 static spa_t *
377 spa_find_by_dev(struct zfs_devdesc *dev)
378 {
379 
380 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
381 		return (NULL);
382 
383 	if (dev->pool_guid == 0)
384 		return (STAILQ_FIRST(&zfs_pools));
385 
386 	return (spa_find_by_guid(dev->pool_guid));
387 }
388 
389 /*
390  * if path is NULL, create mount structure, but do not add it to list.
391  */
392 static int
393 zfs_mount(const char *dev, const char *path, void **data)
394 {
395 	struct zfs_devdesc *zfsdev = NULL;
396 	spa_t *spa;
397 	struct zfsmount *mnt = NULL;
398 	int rv;
399 
400 	errno = 0;
401 	rv = zfs_parsedev((struct devdesc **)&zfsdev, dev, NULL);
402 	if (rv != 0) {
403 		return (rv);
404 	}
405 
406 	spa = spa_find_by_dev(zfsdev);
407 	if (spa == NULL) {
408 		rv = ENXIO;
409 		goto err;
410 	}
411 
412 	mnt = calloc(1, sizeof(*mnt));
413 	if (mnt == NULL) {
414 		rv = ENOMEM;
415 		goto err;
416 	}
417 
418 	if (mnt->path != NULL) {
419 		mnt->path = strdup(path);
420 		if (mnt->path == NULL) {
421 			rv = ENOMEM;
422 			goto err;
423 		}
424 	}
425 
426 	rv = zfs_mount_impl(spa, zfsdev->root_guid, mnt);
427 
428 	if (rv == 0 && mnt->objset.os_type != DMU_OST_ZFS) {
429 		printf("Unexpected object set type %ju\n",
430 		    (uintmax_t)mnt->objset.os_type);
431 		rv = EIO;
432 	}
433 err:
434 	if (rv != 0) {
435 		if (mnt != NULL)
436 			free(mnt->path);
437 		free(mnt);
438 		free(zfsdev);
439 		return (rv);
440 	}
441 
442 	*data = mnt;
443 	if (path != NULL)
444 		STAILQ_INSERT_TAIL(&zfsmount, mnt, next);
445 
446 	free(zfsdev);
447 
448 	return (rv);
449 }
450 
451 static int
452 zfs_unmount(const char *dev, void *data)
453 {
454 	struct zfsmount *mnt = data;
455 
456 	STAILQ_REMOVE(&zfsmount, mnt, zfsmount, next);
457 	free(mnt->path);
458 	free(mnt);
459 	return (0);
460 }
461 
462 static int
463 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
464 {
465 	int fd, ret;
466 	size_t res, head, tail, total_size, full_sec_size;
467 	unsigned secsz, do_tail_read;
468 	off_t start_sec;
469 	char *outbuf, *bouncebuf;
470 
471 	fd = (uintptr_t) priv;
472 	outbuf = (char *) buf;
473 	bouncebuf = NULL;
474 
475 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
476 	if (ret != 0)
477 		return (ret);
478 
479 	/*
480 	 * Handling reads of arbitrary offset and size - multi-sector case
481 	 * and single-sector case.
482 	 *
483 	 *                        Multi-sector Case
484 	 *                (do_tail_read = true if tail > 0)
485 	 *
486 	 *   |<----------------------total_size--------------------->|
487 	 *   |                                                       |
488 	 *   |<--head-->|<--------------bytes------------>|<--tail-->|
489 	 *   |          |                                 |          |
490 	 *   |          |       |<~full_sec_size~>|       |          |
491 	 *   +------------------+                 +------------------+
492 	 *   |          |0101010|     .  .  .     |0101011|          |
493 	 *   +------------------+                 +------------------+
494 	 *         start_sec                         start_sec + n
495 	 *
496 	 *
497 	 *                      Single-sector Case
498 	 *                    (do_tail_read = false)
499 	 *
500 	 *              |<------total_size = secsz----->|
501 	 *              |                               |
502 	 *              |<-head->|<---bytes--->|<-tail->|
503 	 *              +-------------------------------+
504 	 *              |        |0101010101010|        |
505 	 *              +-------------------------------+
506 	 *                          start_sec
507 	 */
508 	start_sec = offset / secsz;
509 	head = offset % secsz;
510 	total_size = roundup2(head + bytes, secsz);
511 	tail = total_size - (head + bytes);
512 	do_tail_read = ((tail > 0) && (head + bytes > secsz));
513 	full_sec_size = total_size;
514 	if (head > 0)
515 		full_sec_size -= secsz;
516 	if (do_tail_read)
517 		full_sec_size -= secsz;
518 
519 	/* Return of partial sector data requires a bounce buffer. */
520 	if ((head > 0) || do_tail_read || bytes < secsz) {
521 		bouncebuf = malloc(secsz);
522 		if (bouncebuf == NULL) {
523 			printf("vdev_read: out of memory\n");
524 			return (ENOMEM);
525 		}
526 	}
527 
528 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
529 		ret = errno;
530 		goto error;
531 	}
532 
533 	/* Partial data return from first sector */
534 	if (head > 0) {
535 		res = read(fd, bouncebuf, secsz);
536 		if (res != secsz) {
537 			ret = EIO;
538 			goto error;
539 		}
540 		memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
541 		outbuf += min(secsz - head, bytes);
542 	}
543 
544 	/*
545 	 * Full data return from read sectors.
546 	 * Note, there is still corner case where we read
547 	 * from sector boundary, but less than sector size, e.g. reading 512B
548 	 * from 4k sector.
549 	 */
550 	if (full_sec_size > 0) {
551 		if (bytes < full_sec_size) {
552 			res = read(fd, bouncebuf, secsz);
553 			if (res != secsz) {
554 				ret = EIO;
555 				goto error;
556 			}
557 			memcpy(outbuf, bouncebuf, bytes);
558 		} else {
559 			res = read(fd, outbuf, full_sec_size);
560 			if (res != full_sec_size) {
561 				ret = EIO;
562 				goto error;
563 			}
564 			outbuf += full_sec_size;
565 		}
566 	}
567 
568 	/* Partial data return from last sector */
569 	if (do_tail_read) {
570 		res = read(fd, bouncebuf, secsz);
571 		if (res != secsz) {
572 			ret = EIO;
573 			goto error;
574 		}
575 		memcpy(outbuf, bouncebuf, secsz - tail);
576 	}
577 
578 	ret = 0;
579 error:
580 	free(bouncebuf);
581 	return (ret);
582 }
583 
584 static int
585 vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes)
586 {
587 	int fd, ret;
588 	size_t head, tail, total_size, full_sec_size;
589 	unsigned secsz, do_tail_write;
590 	off_t start_sec;
591 	ssize_t res;
592 	char *outbuf, *bouncebuf;
593 
594 	fd = (uintptr_t)vdev->v_priv;
595 	outbuf = (char *)buf;
596 	bouncebuf = NULL;
597 
598 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
599 	if (ret != 0)
600 		return (ret);
601 
602 	start_sec = offset / secsz;
603 	head = offset % secsz;
604 	total_size = roundup2(head + bytes, secsz);
605 	tail = total_size - (head + bytes);
606 	do_tail_write = ((tail > 0) && (head + bytes > secsz));
607 	full_sec_size = total_size;
608 	if (head > 0)
609 		full_sec_size -= secsz;
610 	if (do_tail_write)
611 		full_sec_size -= secsz;
612 
613 	/* Partial sector write requires a bounce buffer. */
614 	if ((head > 0) || do_tail_write || bytes < secsz) {
615 		bouncebuf = malloc(secsz);
616 		if (bouncebuf == NULL) {
617 			printf("vdev_write: out of memory\n");
618 			return (ENOMEM);
619 		}
620 	}
621 
622 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
623 		ret = errno;
624 		goto error;
625 	}
626 
627 	/* Partial data for first sector */
628 	if (head > 0) {
629 		res = read(fd, bouncebuf, secsz);
630 		if ((unsigned)res != secsz) {
631 			ret = EIO;
632 			goto error;
633 		}
634 		memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes));
635 		(void) lseek(fd, -secsz, SEEK_CUR);
636 		res = write(fd, bouncebuf, secsz);
637 		if ((unsigned)res != secsz) {
638 			ret = EIO;
639 			goto error;
640 		}
641 		outbuf += min(secsz - head, bytes);
642 	}
643 
644 	/*
645 	 * Full data write to sectors.
646 	 * Note, there is still corner case where we write
647 	 * to sector boundary, but less than sector size, e.g. write 512B
648 	 * to 4k sector.
649 	 */
650 	if (full_sec_size > 0) {
651 		if (bytes < full_sec_size) {
652 			res = read(fd, bouncebuf, secsz);
653 			if ((unsigned)res != secsz) {
654 				ret = EIO;
655 				goto error;
656 			}
657 			memcpy(bouncebuf, outbuf, bytes);
658 			(void) lseek(fd, -secsz, SEEK_CUR);
659 			res = write(fd, bouncebuf, secsz);
660 			if ((unsigned)res != secsz) {
661 				ret = EIO;
662 				goto error;
663 			}
664 		} else {
665 			res = write(fd, outbuf, full_sec_size);
666 			if ((unsigned)res != full_sec_size) {
667 				ret = EIO;
668 				goto error;
669 			}
670 			outbuf += full_sec_size;
671 		}
672 	}
673 
674 	/* Partial data write to last sector */
675 	if (do_tail_write) {
676 		res = read(fd, bouncebuf, secsz);
677 		if ((unsigned)res != secsz) {
678 			ret = EIO;
679 			goto error;
680 		}
681 		memcpy(bouncebuf, outbuf, secsz - tail);
682 		(void) lseek(fd, -secsz, SEEK_CUR);
683 		res = write(fd, bouncebuf, secsz);
684 		if ((unsigned)res != secsz) {
685 			ret = EIO;
686 			goto error;
687 		}
688 	}
689 
690 	ret = 0;
691 error:
692 	free(bouncebuf);
693 	return (ret);
694 }
695 
696 static int
697 zfs_dev_init(void)
698 {
699 	spa_t *spa;
700 	spa_t *next;
701 	spa_t *prev;
702 
703 	zfs_init();
704 	if (archsw.arch_zfs_probe == NULL)
705 		return (ENXIO);
706 	archsw.arch_zfs_probe();
707 
708 	prev = NULL;
709 	spa = STAILQ_FIRST(&zfs_pools);
710 	while (spa != NULL) {
711 		next = STAILQ_NEXT(spa, spa_link);
712 		if (zfs_spa_init(spa)) {
713 			if (prev == NULL)
714 				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
715 			else
716 				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
717 		} else
718 			prev = spa;
719 		spa = next;
720 	}
721 	return (0);
722 }
723 
724 struct zfs_probe_args {
725 	int		fd;
726 	const char	*devname;
727 	uint64_t	*pool_guid;
728 	u_int		secsz;
729 };
730 
731 static int
732 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
733 {
734 	struct zfs_probe_args *ppa;
735 
736 	ppa = (struct zfs_probe_args *)arg;
737 	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
738 	    offset * ppa->secsz, buf, blocks * ppa->secsz));
739 }
740 
741 static int
742 zfs_probe(int fd, uint64_t *pool_guid)
743 {
744 	spa_t *spa;
745 	int ret;
746 
747 	spa = NULL;
748 	ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa);
749 	if (ret == 0 && pool_guid != NULL)
750 		if (*pool_guid == 0)
751 			*pool_guid = spa->spa_guid;
752 	return (ret);
753 }
754 
755 static int
756 zfs_probe_partition(void *arg, const char *partname,
757     const struct ptable_entry *part)
758 {
759 	struct zfs_probe_args *ppa, pa;
760 	struct ptable *table;
761 	char devname[32];
762 	int ret;
763 
764 	/* Probe only freebsd-zfs and freebsd partitions */
765 	if (part->type != PART_FREEBSD &&
766 	    part->type != PART_FREEBSD_ZFS)
767 		return (0);
768 
769 	ppa = (struct zfs_probe_args *)arg;
770 	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
771 	devname[strlen(ppa->devname) - 1] = '\0';
772 	snprintf(devname, sizeof(devname), "%s%s:", devname, partname);
773 	pa.fd = open(devname, O_RDWR);
774 	if (pa.fd == -1)
775 		return (0);
776 	ret = zfs_probe(pa.fd, ppa->pool_guid);
777 	if (ret == 0)
778 		return (0);
779 	/* Do we have BSD label here? */
780 	if (part->type == PART_FREEBSD) {
781 		pa.devname = devname;
782 		pa.pool_guid = ppa->pool_guid;
783 		pa.secsz = ppa->secsz;
784 		table = ptable_open(&pa, part->end - part->start + 1,
785 		    ppa->secsz, zfs_diskread);
786 		if (table != NULL) {
787 			ptable_iterate(table, &pa, zfs_probe_partition);
788 			ptable_close(table);
789 		}
790 	}
791 	close(pa.fd);
792 	return (0);
793 }
794 
795 /*
796  * Return bootenv nvlist from pool label.
797  */
798 int
799 zfs_get_bootenv(void *vdev, nvlist_t **benvp)
800 {
801 	spa_t *spa;
802 
803 	if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
804 		return (ENXIO);
805 
806 	return (zfs_get_bootenv_spa(spa, benvp));
807 }
808 
809 /*
810  * Store nvlist to pool label bootenv area. Also updates cached pointer in spa.
811  */
812 int
813 zfs_set_bootenv(void *vdev, nvlist_t *benv)
814 {
815 	spa_t *spa;
816 
817 	if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
818 		return (ENXIO);
819 
820 	return (zfs_set_bootenv_spa(spa, benv));
821 }
822 
823 /*
824  * Get bootonce value by key. The bootonce <key, value> pair is removed
825  * from the bootenv nvlist and the remaining nvlist is committed back to disk.
826  */
827 int
828 zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size)
829 {
830 	spa_t *spa;
831 
832 	if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL)
833 		return (ENXIO);
834 
835 	return (zfs_get_bootonce_spa(spa, key, buf, size));
836 }
837 
838 /*
839  * nvstore backend.
840  */
841 
842 static int zfs_nvstore_setter(void *, int, const char *,
843     const void *, size_t);
844 static int zfs_nvstore_setter_str(void *, const char *, const char *,
845     const char *);
846 static int zfs_nvstore_unset_impl(void *, const char *, bool);
847 static int zfs_nvstore_setenv(void *, void *);
848 
849 /*
850  * nvstore is only present for current rootfs pool.
851  */
852 static int
853 zfs_nvstore_sethook(struct env_var *ev, int flags __unused, const void *value)
854 {
855 	struct zfs_devdesc *dev;
856 	int rv;
857 
858 	archsw.arch_getdev((void **)&dev, NULL, NULL);
859 	if (dev == NULL)
860 		return (ENXIO);
861 
862 	rv = zfs_nvstore_setter_str(dev, NULL, ev->ev_name, value);
863 
864 	free(dev);
865 	return (rv);
866 }
867 
868 /*
869  * nvstore is only present for current rootfs pool.
870  */
871 static int
872 zfs_nvstore_unsethook(struct env_var *ev)
873 {
874 	struct zfs_devdesc *dev;
875 	int rv;
876 
877 	archsw.arch_getdev((void **)&dev, NULL, NULL);
878 	if (dev == NULL)
879 		return (ENXIO);
880 
881 	rv = zfs_nvstore_unset_impl(dev, ev->ev_name, false);
882 
883 	free(dev);
884 	return (rv);
885 }
886 
887 static int
888 zfs_nvstore_getter(void *vdev, const char *name, void **data)
889 {
890 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
891 	spa_t *spa;
892 	nvlist_t *nv;
893 	char *str, **ptr;
894 	int size;
895 	int rv;
896 
897 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
898 		return (ENOTSUP);
899 
900 	if ((spa = spa_find_by_dev(dev)) == NULL)
901 		return (ENXIO);
902 
903 	if (spa->spa_bootenv == NULL)
904 		return (ENXIO);
905 
906 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
907 	    NULL, &nv, NULL) != 0)
908 		return (ENOENT);
909 
910 	rv = nvlist_find(nv, name, DATA_TYPE_STRING, NULL, &str, &size);
911 	if (rv == 0) {
912 		ptr = (char **)data;
913 		asprintf(ptr, "%.*s", size, str);
914 		if (*data == NULL)
915 			rv = ENOMEM;
916 	}
917 	nvlist_destroy(nv);
918 	return (rv);
919 }
920 
921 static int
922 zfs_nvstore_setter(void *vdev, int type, const char *name,
923     const void *data, size_t size)
924 {
925 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
926 	spa_t *spa;
927 	nvlist_t *nv;
928 	int rv;
929 	bool env_set = true;
930 
931 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
932 		return (ENOTSUP);
933 
934 	if ((spa = spa_find_by_dev(dev)) == NULL)
935 		return (ENXIO);
936 
937 	if (spa->spa_bootenv == NULL)
938 		return (ENXIO);
939 
940 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
941 	    NULL, &nv, NULL) != 0) {
942 		nv = nvlist_create(NV_UNIQUE_NAME);
943 		if (nv == NULL)
944 			return (ENOMEM);
945 	}
946 
947 	rv = 0;
948 	switch (type) {
949         case DATA_TYPE_INT8:
950 		if (size != sizeof (int8_t)) {
951 			rv = EINVAL;
952 			break;
953 		}
954 		rv = nvlist_add_int8(nv, name, *(int8_t *)data);
955 		break;
956 
957         case DATA_TYPE_INT16:
958 		if (size != sizeof (int16_t)) {
959 			rv = EINVAL;
960 			break;
961 		}
962 		rv = nvlist_add_int16(nv, name, *(int16_t *)data);
963 		break;
964 
965         case DATA_TYPE_INT32:
966 		if (size != sizeof (int32_t)) {
967 			rv = EINVAL;
968 			break;
969 		}
970 		rv = nvlist_add_int32(nv, name, *(int32_t *)data);
971 		break;
972 
973         case DATA_TYPE_INT64:
974 		if (size != sizeof (int64_t)) {
975 			rv = EINVAL;
976 			break;
977 		}
978 		rv = nvlist_add_int64(nv, name, *(int64_t *)data);
979 		break;
980 
981         case DATA_TYPE_BYTE:
982 		if (size != sizeof (uint8_t)) {
983 			rv = EINVAL;
984 			break;
985 		}
986 		rv = nvlist_add_byte(nv, name, *(int8_t *)data);
987 		break;
988 
989         case DATA_TYPE_UINT8:
990 		if (size != sizeof (uint8_t)) {
991 			rv = EINVAL;
992 			break;
993 		}
994 		rv = nvlist_add_uint8(nv, name, *(int8_t *)data);
995 		break;
996 
997         case DATA_TYPE_UINT16:
998 		if (size != sizeof (uint16_t)) {
999 			rv = EINVAL;
1000 			break;
1001 		}
1002 		rv = nvlist_add_uint16(nv, name, *(uint16_t *)data);
1003 		break;
1004 
1005         case DATA_TYPE_UINT32:
1006 		if (size != sizeof (uint32_t)) {
1007 			rv = EINVAL;
1008 			break;
1009 		}
1010 		rv = nvlist_add_uint32(nv, name, *(uint32_t *)data);
1011 		break;
1012 
1013         case DATA_TYPE_UINT64:
1014 		if (size != sizeof (uint64_t)) {
1015 			rv = EINVAL;
1016 			break;
1017 		}
1018 		rv = nvlist_add_uint64(nv, name, *(uint64_t *)data);
1019 		break;
1020 
1021         case DATA_TYPE_STRING:
1022 		rv = nvlist_add_string(nv, name, data);
1023 		break;
1024 
1025 	case DATA_TYPE_BOOLEAN_VALUE:
1026 		if (size != sizeof (boolean_t)) {
1027 			rv = EINVAL;
1028 			break;
1029 		}
1030 		rv = nvlist_add_boolean_value(nv, name, *(boolean_t *)data);
1031 		break;
1032 
1033 	default:
1034 		rv = EINVAL;
1035 		break;
1036 	}
1037 
1038 	if (rv == 0) {
1039 		rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv);
1040 		if (rv == 0) {
1041 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1042 		}
1043 		if (rv == 0) {
1044 			if (env_set) {
1045 				rv = zfs_nvstore_setenv(vdev,
1046 				    nvpair_find(nv, name));
1047 			} else {
1048 				env_discard(env_getenv(name));
1049 				rv = 0;
1050 			}
1051 		}
1052 	}
1053 
1054 	nvlist_destroy(nv);
1055 	return (rv);
1056 }
1057 
1058 static int
1059 get_int64(const char *data, int64_t *ip)
1060 {
1061 	char *end;
1062 	int64_t val;
1063 
1064 	errno = 0;
1065 	val = strtoll(data, &end, 0);
1066 	if (errno != 0 || *data == '\0' || *end != '\0')
1067 		return (EINVAL);
1068 
1069 	*ip = val;
1070 	return (0);
1071 }
1072 
1073 static int
1074 get_uint64(const char *data, uint64_t *ip)
1075 {
1076 	char *end;
1077 	uint64_t val;
1078 
1079 	errno = 0;
1080 	val = strtoull(data, &end, 0);
1081 	if (errno != 0 || *data == '\0' || *end != '\0')
1082 		return (EINVAL);
1083 
1084 	*ip = val;
1085 	return (0);
1086 }
1087 
1088 /*
1089  * Translate textual data to data type. If type is not set, and we are
1090  * creating new pair, use DATA_TYPE_STRING.
1091  */
1092 static int
1093 zfs_nvstore_setter_str(void *vdev, const char *type, const char *name,
1094     const char *data)
1095 {
1096 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1097 	spa_t *spa;
1098 	nvlist_t *nv;
1099 	int rv;
1100 	data_type_t dt;
1101 	int64_t val;
1102 	uint64_t uval;
1103 
1104 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1105 		return (ENOTSUP);
1106 
1107 	if ((spa = spa_find_by_dev(dev)) == NULL)
1108 		return (ENXIO);
1109 
1110 	if (spa->spa_bootenv == NULL)
1111 		return (ENXIO);
1112 
1113 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1114 	    NULL, &nv, NULL) != 0) {
1115 		nv = NULL;
1116 	}
1117 
1118 	if (type == NULL) {
1119 		nvp_header_t *nvh;
1120 
1121 		/*
1122 		 * if there is no existing pair, default to string.
1123 		 * Otherwise, use type from existing pair.
1124 		 */
1125 		nvh = nvpair_find(nv, name);
1126 		if (nvh == NULL) {
1127 			dt = DATA_TYPE_STRING;
1128 		} else {
1129 			nv_string_t *nvp_name;
1130 			nv_pair_data_t *nvp_data;
1131 
1132 			nvp_name = (nv_string_t *)(nvh + 1);
1133 			nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1134 			    NV_ALIGN4(nvp_name->nv_size));
1135 			dt = nvp_data->nv_type;
1136 		}
1137 	} else {
1138 		dt = nvpair_type_from_name(type);
1139 	}
1140 	nvlist_destroy(nv);
1141 
1142 	rv = 0;
1143 	switch (dt) {
1144         case DATA_TYPE_INT8:
1145 		rv = get_int64(data, &val);
1146 		if (rv == 0) {
1147 			int8_t v = val;
1148 
1149 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1150 		}
1151 		break;
1152         case DATA_TYPE_INT16:
1153 		rv = get_int64(data, &val);
1154 		if (rv == 0) {
1155 			int16_t v = val;
1156 
1157 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1158 		}
1159 		break;
1160         case DATA_TYPE_INT32:
1161 		rv = get_int64(data, &val);
1162 		if (rv == 0) {
1163 			int32_t v = val;
1164 
1165 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1166 		}
1167 		break;
1168         case DATA_TYPE_INT64:
1169 		rv = get_int64(data, &val);
1170 		if (rv == 0) {
1171 			rv = zfs_nvstore_setter(vdev, dt, name, &val,
1172 			    sizeof (val));
1173 		}
1174 		break;
1175 
1176         case DATA_TYPE_BYTE:
1177 		rv = get_uint64(data, &uval);
1178 		if (rv == 0) {
1179 			uint8_t v = uval;
1180 
1181 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1182 		}
1183 		break;
1184 
1185         case DATA_TYPE_UINT8:
1186 		rv = get_uint64(data, &uval);
1187 		if (rv == 0) {
1188 			uint8_t v = uval;
1189 
1190 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1191 		}
1192 		break;
1193 
1194         case DATA_TYPE_UINT16:
1195 		rv = get_uint64(data, &uval);
1196 		if (rv == 0) {
1197 			uint16_t v = uval;
1198 
1199 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1200 		}
1201 		break;
1202 
1203         case DATA_TYPE_UINT32:
1204 		rv = get_uint64(data, &uval);
1205 		if (rv == 0) {
1206 			uint32_t v = uval;
1207 
1208 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1209 		}
1210 		break;
1211 
1212         case DATA_TYPE_UINT64:
1213 		rv = get_uint64(data, &uval);
1214 		if (rv == 0) {
1215 			rv = zfs_nvstore_setter(vdev, dt, name, &uval,
1216 			    sizeof (uval));
1217 		}
1218 		break;
1219 
1220         case DATA_TYPE_STRING:
1221 		rv = zfs_nvstore_setter(vdev, dt, name, data, strlen(data) + 1);
1222 		break;
1223 
1224 	case DATA_TYPE_BOOLEAN_VALUE:
1225 		rv = get_int64(data, &val);
1226 		if (rv == 0) {
1227 			boolean_t v = val;
1228 
1229 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1230 		}
1231 
1232 	default:
1233 		rv = EINVAL;
1234 	}
1235 	return (rv);
1236 }
1237 
1238 static int
1239 zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env)
1240 {
1241 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1242 	spa_t *spa;
1243 	nvlist_t *nv;
1244 	int rv;
1245 
1246 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1247 		return (ENOTSUP);
1248 
1249 	if ((spa = spa_find_by_dev(dev)) == NULL)
1250 		return (ENXIO);
1251 
1252 	if (spa->spa_bootenv == NULL)
1253 		return (ENXIO);
1254 
1255 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1256 	    NULL, &nv, NULL) != 0)
1257 		return (ENOENT);
1258 
1259 	rv = nvlist_remove(nv, name, DATA_TYPE_UNKNOWN);
1260 	if (rv == 0) {
1261 		if (nvlist_next_nvpair(nv, NULL) == NULL) {
1262 			rv = nvlist_remove(spa->spa_bootenv, OS_NVSTORE,
1263 			    DATA_TYPE_NVLIST);
1264 		} else {
1265 			rv = nvlist_add_nvlist(spa->spa_bootenv,
1266 			    OS_NVSTORE, nv);
1267 		}
1268 		if (rv == 0)
1269 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1270 	}
1271 
1272 	if (unset_env)
1273 		env_discard(env_getenv(name));
1274 	return (rv);
1275 }
1276 
1277 static int
1278 zfs_nvstore_unset(void *vdev, const char *name)
1279 {
1280 	return (zfs_nvstore_unset_impl(vdev, name, true));
1281 }
1282 
1283 static int
1284 zfs_nvstore_print(void *vdev __unused, void *ptr)
1285 {
1286 
1287 	nvpair_print(ptr, 0);
1288 	return (0);
1289 }
1290 
1291 /*
1292  * Create environment variable from nvpair.
1293  * set hook will update nvstore with new value, unset hook will remove
1294  * variable from nvstore.
1295  */
1296 static int
1297 zfs_nvstore_setenv(void *vdev __unused, void *ptr)
1298 {
1299 	nvp_header_t *nvh = ptr;
1300 	nv_string_t *nvp_name, *nvp_value;
1301 	nv_pair_data_t *nvp_data;
1302 	char *name, *value;
1303 	int rv = 0;
1304 
1305 	if (nvh == NULL)
1306 		return (ENOENT);
1307 
1308 	nvp_name = (nv_string_t *)(nvh + 1);
1309 	nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1310 	    NV_ALIGN4(nvp_name->nv_size));
1311 
1312 	if ((name = nvstring_get(nvp_name)) == NULL)
1313 		return (ENOMEM);
1314 
1315 	value = NULL;
1316 	switch (nvp_data->nv_type) {
1317 	case DATA_TYPE_BYTE:
1318 	case DATA_TYPE_UINT8:
1319 		(void) asprintf(&value, "%uc",
1320 		    *(unsigned *)&nvp_data->nv_data[0]);
1321 		if (value == NULL)
1322 			rv = ENOMEM;
1323 		break;
1324 
1325 	case DATA_TYPE_INT8:
1326 		(void) asprintf(&value, "%c", *(int *)&nvp_data->nv_data[0]);
1327 		if (value == NULL)
1328 			rv = ENOMEM;
1329 		break;
1330 
1331 	case DATA_TYPE_INT16:
1332 		(void) asprintf(&value, "%hd", *(short *)&nvp_data->nv_data[0]);
1333 		if (value == NULL)
1334 			rv = ENOMEM;
1335 		break;
1336 
1337 	case DATA_TYPE_UINT16:
1338 		(void) asprintf(&value, "%hu",
1339 		    *(unsigned short *)&nvp_data->nv_data[0]);
1340 		if (value == NULL)
1341 			rv = ENOMEM;
1342 		break;
1343 
1344 	case DATA_TYPE_BOOLEAN_VALUE:
1345 	case DATA_TYPE_INT32:
1346 		(void) asprintf(&value, "%d", *(int *)&nvp_data->nv_data[0]);
1347 		if (value == NULL)
1348 			rv = ENOMEM;
1349 		break;
1350 
1351 	case DATA_TYPE_UINT32:
1352 		(void) asprintf(&value, "%u",
1353 		    *(unsigned *)&nvp_data->nv_data[0]);
1354 		if (value == NULL)
1355 			rv = ENOMEM;
1356 		break;
1357 
1358 	case DATA_TYPE_INT64:
1359 		(void) asprintf(&value, "%jd",
1360 		    (intmax_t)*(int64_t *)&nvp_data->nv_data[0]);
1361 		if (value == NULL)
1362 			rv = ENOMEM;
1363 		break;
1364 
1365 	case DATA_TYPE_UINT64:
1366 		(void) asprintf(&value, "%ju",
1367 		    (uintmax_t)*(uint64_t *)&nvp_data->nv_data[0]);
1368 		if (value == NULL)
1369 			rv = ENOMEM;
1370 		break;
1371 
1372 	case DATA_TYPE_STRING:
1373 		nvp_value = (nv_string_t *)&nvp_data->nv_data[0];
1374 		if ((value = nvstring_get(nvp_value)) == NULL) {
1375 			rv = ENOMEM;
1376 			break;
1377 		}
1378 		break;
1379 
1380 	default:
1381 		rv = EINVAL;
1382 		break;
1383 	}
1384 
1385 	if (value != NULL) {
1386 		rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, value,
1387 		    zfs_nvstore_sethook, zfs_nvstore_unsethook);
1388 		free(value);
1389 	}
1390 	free(name);
1391 	return (rv);
1392 }
1393 
1394 static int
1395 zfs_nvstore_iterate(void *vdev, int (*cb)(void *, void *))
1396 {
1397 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1398 	spa_t *spa;
1399 	nvlist_t *nv;
1400 	nvp_header_t *nvh;
1401 	int rv;
1402 
1403 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1404 		return (ENOTSUP);
1405 
1406 	if ((spa = spa_find_by_dev(dev)) == NULL)
1407 		return (ENXIO);
1408 
1409 	if (spa->spa_bootenv == NULL)
1410 		return (ENXIO);
1411 
1412 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1413 	    NULL, &nv, NULL) != 0)
1414 		return (ENOENT);
1415 
1416 	rv = 0;
1417 	nvh = NULL;
1418 	while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) {
1419 		rv = cb(vdev, nvh);
1420 		if (rv != 0)
1421 			break;
1422 	}
1423 	return (rv);
1424 }
1425 
1426 nvs_callbacks_t nvstore_zfs_cb = {
1427 	.nvs_getter = zfs_nvstore_getter,
1428 	.nvs_setter = zfs_nvstore_setter,
1429 	.nvs_setter_str = zfs_nvstore_setter_str,
1430 	.nvs_unset = zfs_nvstore_unset,
1431 	.nvs_print = zfs_nvstore_print,
1432 	.nvs_iterate = zfs_nvstore_iterate
1433 };
1434 
1435 int
1436 zfs_attach_nvstore(void *vdev)
1437 {
1438 	struct zfs_devdesc *dev = vdev;
1439 	spa_t *spa;
1440 	uint64_t version;
1441 	int rv;
1442 
1443 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1444 		return (ENOTSUP);
1445 
1446 	if ((spa = spa_find_by_dev(dev)) == NULL)
1447 		return (ENXIO);
1448 
1449 	rv = nvlist_find(spa->spa_bootenv, BOOTENV_VERSION, DATA_TYPE_UINT64,
1450 	    NULL, &version, NULL);
1451 
1452 	if (rv != 0 || version != VB_NVLIST) {
1453 		return (ENXIO);
1454 	}
1455 
1456 	dev = malloc(sizeof (*dev));
1457 	if (dev == NULL)
1458 		return (ENOMEM);
1459 	memcpy(dev, vdev, sizeof (*dev));
1460 
1461 	rv = nvstore_init(spa->spa_name, &nvstore_zfs_cb, dev);
1462 	if (rv != 0)
1463 		free(dev);
1464 	else
1465 		rv = zfs_nvstore_iterate(dev, zfs_nvstore_setenv);
1466 	return (rv);
1467 }
1468 
1469 int
1470 zfs_probe_dev(const char *devname, uint64_t *pool_guid, bool parts_too)
1471 {
1472 	struct ptable *table;
1473 	struct zfs_probe_args pa;
1474 	uint64_t mediasz;
1475 	int ret;
1476 
1477 	if (pool_guid)
1478 		*pool_guid = 0;
1479 	pa.fd = open(devname, O_RDWR);
1480 	if (pa.fd == -1)
1481 		return (ENXIO);
1482 	/* Probe the whole disk */
1483 	ret = zfs_probe(pa.fd, pool_guid);
1484 	if (ret == 0)
1485 		return (0);
1486 	if (!parts_too)
1487 		return (ENXIO);
1488 
1489 	/* Probe each partition */
1490 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
1491 	if (ret == 0)
1492 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
1493 	if (ret == 0) {
1494 		pa.devname = devname;
1495 		pa.pool_guid = pool_guid;
1496 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
1497 		    zfs_diskread);
1498 		if (table != NULL) {
1499 			ptable_iterate(table, &pa, zfs_probe_partition);
1500 			ptable_close(table);
1501 		}
1502 	}
1503 	close(pa.fd);
1504 	if (pool_guid && *pool_guid == 0)
1505 		ret = ENXIO;
1506 	return (ret);
1507 }
1508 
1509 /*
1510  * Print information about ZFS pools
1511  */
1512 static int
1513 zfs_dev_print(int verbose)
1514 {
1515 	spa_t *spa;
1516 	char line[80];
1517 	int ret = 0;
1518 
1519 	if (STAILQ_EMPTY(&zfs_pools))
1520 		return (0);
1521 
1522 	printf("%s devices:", zfs_dev.dv_name);
1523 	if ((ret = pager_output("\n")) != 0)
1524 		return (ret);
1525 
1526 	if (verbose) {
1527 		return (spa_all_status());
1528 	}
1529 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
1530 		snprintf(line, sizeof(line), "    zfs:%s\n", spa->spa_name);
1531 		ret = pager_output(line);
1532 		if (ret != 0)
1533 			break;
1534 	}
1535 	return (ret);
1536 }
1537 
1538 /*
1539  * Attempt to open the pool described by (dev) for use by (f).
1540  */
1541 static int
1542 zfs_dev_open(struct open_file *f, ...)
1543 {
1544 	va_list		args;
1545 	struct zfs_devdesc	*dev;
1546 	struct zfsmount	*mount;
1547 	spa_t		*spa;
1548 	int		rv;
1549 
1550 	va_start(args, f);
1551 	dev = va_arg(args, struct zfs_devdesc *);
1552 	va_end(args);
1553 
1554 	if ((spa = spa_find_by_dev(dev)) == NULL)
1555 		return (ENXIO);
1556 
1557 	STAILQ_FOREACH(mount, &zfsmount, next) {
1558 		if (spa->spa_guid == mount->spa->spa_guid)
1559 			break;
1560 	}
1561 
1562 	rv = 0;
1563 	/* This device is not set as currdev, mount us private copy. */
1564 	if (mount == NULL)
1565 		rv = zfs_mount(devformat(&dev->dd), NULL, (void **)&mount);
1566 
1567 	if (rv == 0) {
1568 		dev->dd.d_opendata = mount;
1569 	}
1570 	return (rv);
1571 }
1572 
1573 static int
1574 zfs_dev_close(struct open_file *f)
1575 {
1576 	struct devdesc *dev;
1577 	struct zfsmount	*mnt, *mount;
1578 
1579 	dev = f->f_devdata;
1580 	mnt = dev->d_opendata;
1581 
1582 	STAILQ_FOREACH(mount, &zfsmount, next) {
1583 		if (mnt->spa->spa_guid == mount->spa->spa_guid)
1584 			break;
1585 	}
1586 
1587 	/* XXX */
1588 	return (0);
1589 }
1590 
1591 static int
1592 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
1593 {
1594 
1595 	return (ENOSYS);
1596 }
1597 
1598 struct devsw zfs_dev = {
1599 	.dv_name = "zfs",
1600 	.dv_type = DEVT_ZFS,
1601 	.dv_init = zfs_dev_init,
1602 	.dv_strategy = zfs_dev_strategy,
1603 	.dv_open = zfs_dev_open,
1604 	.dv_close = zfs_dev_close,
1605 	.dv_ioctl = noioctl,
1606 	.dv_print = zfs_dev_print,
1607 	.dv_cleanup = nullsys,
1608 	.dv_fmtdev = zfs_fmtdev,
1609 	.dv_parsedev = zfs_parsedev,
1610 };
1611 
1612 static int
1613 zfs_parsedev(struct devdesc **idev, const char *devspec, const char **path)
1614 {
1615 	static char	rootname[ZFS_MAXNAMELEN];
1616 	static char	poolname[ZFS_MAXNAMELEN];
1617 	spa_t		*spa;
1618 	const char	*end;
1619 	const char	*np;
1620 	const char	*sep;
1621 	int		rv;
1622 	struct zfs_devdesc *dev;
1623 
1624 	np = devspec + 3;			/* Skip the leading 'zfs' */
1625 	if (*np != ':')
1626 		return (EINVAL);
1627 	np++;
1628 	end = strrchr(np, ':');
1629 	if (end == NULL)
1630 		return (EINVAL);
1631 	sep = strchr(np, '/');
1632 	if (sep == NULL || sep >= end)
1633 		sep = end;
1634 	memcpy(poolname, np, sep - np);
1635 	poolname[sep - np] = '\0';
1636 	if (sep < end) {
1637 		sep++;
1638 		memcpy(rootname, sep, end - sep);
1639 		rootname[end - sep] = '\0';
1640 	}
1641 	else
1642 		rootname[0] = '\0';
1643 
1644 	spa = spa_find_by_name(poolname);
1645 	if (!spa)
1646 		return (ENXIO);
1647 	dev = malloc(sizeof(*dev));
1648 	if (dev == NULL)
1649 		return (ENOMEM);
1650 	dev->pool_guid = spa->spa_guid;
1651 	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
1652 	if (rv != 0) {
1653 		free(dev);
1654 		return (rv);
1655 	}
1656 	if (path != NULL)
1657 		*path = (*end == '\0') ? end : end + 1;
1658 	dev->dd.d_dev = &zfs_dev;
1659 	*idev = &dev->dd;
1660 	return (0);
1661 }
1662 
1663 char *
1664 zfs_fmtdev(struct devdesc *vdev)
1665 {
1666 	static char		rootname[ZFS_MAXNAMELEN];
1667 	static char		buf[2 * ZFS_MAXNAMELEN + 8];
1668 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
1669 	spa_t			*spa;
1670 
1671 	buf[0] = '\0';
1672 	if (vdev->d_dev->dv_type != DEVT_ZFS)
1673 		return (buf);
1674 
1675 	/* Do we have any pools? */
1676 	spa = STAILQ_FIRST(&zfs_pools);
1677 	if (spa == NULL)
1678 		return (buf);
1679 
1680 	if (dev->pool_guid == 0)
1681 		dev->pool_guid = spa->spa_guid;
1682 	else
1683 		spa = spa_find_by_guid(dev->pool_guid);
1684 
1685 	if (spa == NULL) {
1686 		printf("ZFS: can't find pool by guid\n");
1687 		return (buf);
1688 	}
1689 	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
1690 		printf("ZFS: can't find root filesystem\n");
1691 		return (buf);
1692 	}
1693 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
1694 		printf("ZFS: can't find filesystem by guid\n");
1695 		return (buf);
1696 	}
1697 
1698 	if (rootname[0] == '\0')
1699 		snprintf(buf, sizeof(buf), "%s:%s:", dev->dd.d_dev->dv_name,
1700 		    spa->spa_name);
1701 	else
1702 		snprintf(buf, sizeof(buf), "%s:%s/%s:", dev->dd.d_dev->dv_name,
1703 		    spa->spa_name, rootname);
1704 	return (buf);
1705 }
1706 
1707 static int
1708 split_devname(const char *name, char *poolname, size_t size,
1709     const char **dsnamep)
1710 {
1711 	const char *dsname;
1712 	size_t len;
1713 
1714 	ASSERT(name != NULL);
1715 	ASSERT(poolname != NULL);
1716 
1717 	len = strlen(name);
1718 	dsname = strchr(name, '/');
1719 	if (dsname != NULL) {
1720 		len = dsname - name;
1721 		dsname++;
1722 	} else
1723 		dsname = "";
1724 
1725 	if (len + 1 > size)
1726 		return (EINVAL);
1727 
1728 	strlcpy(poolname, name, len + 1);
1729 
1730 	if (dsnamep != NULL)
1731 		*dsnamep = dsname;
1732 
1733 	return (0);
1734 }
1735 
1736 int
1737 zfs_list(const char *name)
1738 {
1739 	static char	poolname[ZFS_MAXNAMELEN];
1740 	uint64_t	objid;
1741 	spa_t		*spa;
1742 	const char	*dsname;
1743 	int		rv;
1744 
1745 	if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1746 		return (EINVAL);
1747 
1748 	spa = spa_find_by_name(poolname);
1749 	if (!spa)
1750 		return (ENXIO);
1751 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1752 	if (rv != 0)
1753 		return (rv);
1754 
1755 	return (zfs_list_dataset(spa, objid));
1756 }
1757 
1758 void
1759 init_zfs_boot_options(const char *currdev_in)
1760 {
1761 	char poolname[ZFS_MAXNAMELEN];
1762 	char *beroot, *currdev;
1763 	spa_t *spa;
1764 	int currdev_len;
1765 	const char *dsname;
1766 
1767 	currdev = NULL;
1768 	currdev_len = strlen(currdev_in);
1769 	if (currdev_len == 0)
1770 		return;
1771 	if (strncmp(currdev_in, "zfs:", 4) != 0)
1772 		return;
1773 	currdev = strdup(currdev_in);
1774 	if (currdev == NULL)
1775 		return;
1776 	/* Remove the trailing : */
1777 	currdev[currdev_len - 1] = '\0';
1778 
1779 	setenv("zfs_be_active", currdev, 1);
1780 	setenv("zfs_be_currpage", "1", 1);
1781 	/* Remove the last element (current bootenv) */
1782 	beroot = strrchr(currdev, '/');
1783 	if (beroot != NULL)
1784 		beroot[0] = '\0';
1785 	beroot = strchr(currdev, ':') + 1;
1786 	setenv("zfs_be_root", beroot, 1);
1787 
1788 	if (split_devname(beroot, poolname, sizeof(poolname), &dsname) != 0)
1789 		return;
1790 
1791 	spa = spa_find_by_name(poolname);
1792 	if (spa == NULL)
1793 		return;
1794 
1795 	zfs_bootenv_initial("bootenvs", spa, beroot, dsname, 0);
1796 	zfs_checkpoints_initial(spa, beroot, dsname);
1797 
1798 	free(currdev);
1799 }
1800 
1801 static void
1802 zfs_checkpoints_initial(spa_t *spa, const char *name, const char *dsname)
1803 {
1804 	char envname[32];
1805 
1806 	if (spa->spa_uberblock_checkpoint.ub_checkpoint_txg != 0) {
1807 		snprintf(envname, sizeof(envname), "zpool_checkpoint");
1808 		setenv(envname, name, 1);
1809 
1810 		spa->spa_uberblock = &spa->spa_uberblock_checkpoint;
1811 		spa->spa_mos = &spa->spa_mos_checkpoint;
1812 
1813 		zfs_bootenv_initial("bootenvs_check", spa, name, dsname, 1);
1814 
1815 		spa->spa_uberblock = &spa->spa_uberblock_master;
1816 		spa->spa_mos = &spa->spa_mos_master;
1817 	}
1818 }
1819 
1820 static void
1821 zfs_bootenv_initial(const char *envprefix, spa_t *spa, const char *rootname,
1822    const char *dsname, int checkpoint)
1823 {
1824 	char		envname[32], envval[256];
1825 	uint64_t	objid;
1826 	int		bootenvs_idx, rv;
1827 
1828 	SLIST_INIT(&zfs_be_head);
1829 	zfs_env_count = 0;
1830 
1831 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1832 	if (rv != 0)
1833 		return;
1834 
1835 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1836 	bootenvs_idx = 0;
1837 	/* Populate the initial environment variables */
1838 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1839 		/* Enumerate all bootenvs for general usage */
1840 		snprintf(envname, sizeof(envname), "%s[%d]",
1841 		    envprefix, bootenvs_idx);
1842 		snprintf(envval, sizeof(envval), "zfs:%s%s/%s",
1843 		    checkpoint ? "!" : "", rootname, zfs_be->name);
1844 		rv = setenv(envname, envval, 1);
1845 		if (rv != 0)
1846 			break;
1847 		bootenvs_idx++;
1848 	}
1849 	snprintf(envname, sizeof(envname), "%s_count", envprefix);
1850 	snprintf(envval, sizeof(envval), "%d", bootenvs_idx);
1851 	setenv(envname, envval, 1);
1852 
1853 	/* Clean up the SLIST of ZFS BEs */
1854 	while (!SLIST_EMPTY(&zfs_be_head)) {
1855 		zfs_be = SLIST_FIRST(&zfs_be_head);
1856 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1857 		free(zfs_be->name);
1858 		free(zfs_be);
1859 	}
1860 }
1861 
1862 int
1863 zfs_bootenv(const char *name)
1864 {
1865 	char		poolname[ZFS_MAXNAMELEN], *root;
1866 	const char	*dsname;
1867 	char		becount[4];
1868 	uint64_t	objid;
1869 	spa_t		*spa;
1870 	int		rv, pages, perpage, currpage;
1871 
1872 	if (name == NULL)
1873 		return (EINVAL);
1874 	if ((root = getenv("zfs_be_root")) == NULL)
1875 		return (EINVAL);
1876 
1877 	if (strcmp(name, root) != 0) {
1878 		if (setenv("zfs_be_root", name, 1) != 0)
1879 			return (ENOMEM);
1880 	}
1881 
1882 	SLIST_INIT(&zfs_be_head);
1883 	zfs_env_count = 0;
1884 
1885 	if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1886 		return (EINVAL);
1887 
1888 	spa = spa_find_by_name(poolname);
1889 	if (!spa)
1890 		return (ENXIO);
1891 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1892 	if (rv != 0)
1893 		return (rv);
1894 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1895 
1896 	/* Calculate and store the number of pages of BEs */
1897 	perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
1898 	pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
1899 	snprintf(becount, 4, "%d", pages);
1900 	if (setenv("zfs_be_pages", becount, 1) != 0)
1901 		return (ENOMEM);
1902 
1903 	/* Roll over the page counter if it has exceeded the maximum */
1904 	currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
1905 	if (currpage > pages) {
1906 		if (setenv("zfs_be_currpage", "1", 1) != 0)
1907 			return (ENOMEM);
1908 	}
1909 
1910 	/* Populate the menu environment variables */
1911 	zfs_set_env();
1912 
1913 	/* Clean up the SLIST of ZFS BEs */
1914 	while (!SLIST_EMPTY(&zfs_be_head)) {
1915 		zfs_be = SLIST_FIRST(&zfs_be_head);
1916 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1917 		free(zfs_be->name);
1918 		free(zfs_be);
1919 	}
1920 
1921 	return (rv);
1922 }
1923 
1924 int
1925 zfs_belist_add(const char *name, uint64_t value __unused)
1926 {
1927 
1928 	/* Skip special datasets that start with a $ character */
1929 	if (strncmp(name, "$", 1) == 0) {
1930 		return (0);
1931 	}
1932 	/* Add the boot environment to the head of the SLIST */
1933 	zfs_be = malloc(sizeof(struct zfs_be_entry));
1934 	if (zfs_be == NULL) {
1935 		return (ENOMEM);
1936 	}
1937 	zfs_be->name = strdup(name);
1938 	if (zfs_be->name == NULL) {
1939 		free(zfs_be);
1940 		return (ENOMEM);
1941 	}
1942 	SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
1943 	zfs_env_count++;
1944 
1945 	return (0);
1946 }
1947 
1948 int
1949 zfs_set_env(void)
1950 {
1951 	char envname[32], envval[256];
1952 	char *beroot, *pagenum;
1953 	int rv, page, ctr;
1954 
1955 	beroot = getenv("zfs_be_root");
1956 	if (beroot == NULL) {
1957 		return (1);
1958 	}
1959 
1960 	pagenum = getenv("zfs_be_currpage");
1961 	if (pagenum != NULL) {
1962 		page = strtol(pagenum, NULL, 10);
1963 	} else {
1964 		page = 1;
1965 	}
1966 
1967 	ctr = 1;
1968 	rv = 0;
1969 	zfs_env_index = ZFS_BE_FIRST;
1970 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1971 		/* Skip to the requested page number */
1972 		if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
1973 			ctr++;
1974 			continue;
1975 		}
1976 
1977 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1978 		snprintf(envval, sizeof(envval), "%s", zfs_be->name);
1979 		rv = setenv(envname, envval, 1);
1980 		if (rv != 0) {
1981 			break;
1982 		}
1983 
1984 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
1985 		rv = setenv(envname, envval, 1);
1986 		if (rv != 0){
1987 			break;
1988 		}
1989 
1990 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
1991 		rv = setenv(envname, "set_bootenv", 1);
1992 		if (rv != 0){
1993 			break;
1994 		}
1995 
1996 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
1997 		snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
1998 		rv = setenv(envname, envval, 1);
1999 		if (rv != 0){
2000 			break;
2001 		}
2002 
2003 		zfs_env_index++;
2004 		if (zfs_env_index > ZFS_BE_LAST) {
2005 			break;
2006 		}
2007 
2008 	}
2009 
2010 	for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
2011 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
2012 		(void)unsetenv(envname);
2013 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
2014 		(void)unsetenv(envname);
2015 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
2016 		(void)unsetenv(envname);
2017 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
2018 		(void)unsetenv(envname);
2019 	}
2020 
2021 	return (rv);
2022 }
2023