xref: /freebsd/stand/libsa/zfs/zfs.c (revision a3d9bf49b57923118c339642594246ef73872ee8)
1 /*-
2  * Copyright (c) 2007 Doug Rabson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  *	$FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /*
33  *	Stand-alone file reading package.
34  */
35 
36 #include <stand.h>
37 #include <sys/disk.h>
38 #include <sys/param.h>
39 #include <sys/time.h>
40 #include <sys/queue.h>
41 #include <disk.h>
42 #include <part.h>
43 #include <stddef.h>
44 #include <stdarg.h>
45 #include <string.h>
46 #include <bootstrap.h>
47 
48 #include "libzfs.h"
49 
50 #include "zfsimpl.c"
51 
52 /* Define the range of indexes to be populated with ZFS Boot Environments */
53 #define		ZFS_BE_FIRST	4
54 #define		ZFS_BE_LAST	8
55 
56 static int	zfs_open(const char *path, struct open_file *f);
57 static int	zfs_close(struct open_file *f);
58 static int	zfs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
59 static off_t	zfs_seek(struct open_file *f, off_t offset, int where);
60 static int	zfs_stat(struct open_file *f, struct stat *sb);
61 static int	zfs_readdir(struct open_file *f, struct dirent *d);
62 
63 static void	zfs_bootenv_initial(const char *envname, spa_t *spa,
64 		    const char *name, const char *dsname, int checkpoint);
65 static void	zfs_checkpoints_initial(spa_t *spa, const char *name,
66 		    const char *dsname);
67 
68 struct devsw zfs_dev;
69 
70 struct fs_ops zfs_fsops = {
71 	"zfs",
72 	zfs_open,
73 	zfs_close,
74 	zfs_read,
75 	null_write,
76 	zfs_seek,
77 	zfs_stat,
78 	zfs_readdir
79 };
80 
81 /*
82  * In-core open file.
83  */
84 struct file {
85 	off_t		f_seekp;	/* seek pointer */
86 	dnode_phys_t	f_dnode;
87 	uint64_t	f_zap_type;	/* zap type for readdir */
88 	uint64_t	f_num_leafs;	/* number of fzap leaf blocks */
89 	zap_leaf_phys_t	*f_zap_leaf;	/* zap leaf buffer */
90 };
91 
92 static int	zfs_env_index;
93 static int	zfs_env_count;
94 
95 SLIST_HEAD(zfs_be_list, zfs_be_entry) zfs_be_head = SLIST_HEAD_INITIALIZER(zfs_be_head);
96 struct zfs_be_list *zfs_be_headp;
97 struct zfs_be_entry {
98 	char *name;
99 	SLIST_ENTRY(zfs_be_entry) entries;
100 } *zfs_be, *zfs_be_tmp;
101 
102 /*
103  * Open a file.
104  */
105 static int
106 zfs_open(const char *upath, struct open_file *f)
107 {
108 	struct zfsmount *mount = (struct zfsmount *)f->f_devdata;
109 	struct file *fp;
110 	int rc;
111 
112 	if (f->f_dev != &zfs_dev)
113 		return (EINVAL);
114 
115 	/* allocate file system specific data structure */
116 	fp = calloc(1, sizeof(struct file));
117 	if (fp == NULL)
118 		return (ENOMEM);
119 	f->f_fsdata = fp;
120 
121 	rc = zfs_lookup(mount, upath, &fp->f_dnode);
122 	fp->f_seekp = 0;
123 	if (rc) {
124 		f->f_fsdata = NULL;
125 		free(fp);
126 	}
127 	return (rc);
128 }
129 
130 static int
131 zfs_close(struct open_file *f)
132 {
133 	struct file *fp = (struct file *)f->f_fsdata;
134 
135 	dnode_cache_obj = NULL;
136 	f->f_fsdata = NULL;
137 
138 	free(fp);
139 	return (0);
140 }
141 
142 /*
143  * Copy a portion of a file into kernel memory.
144  * Cross block boundaries when necessary.
145  */
146 static int
147 zfs_read(struct open_file *f, void *start, size_t size, size_t *resid	/* out */)
148 {
149 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
150 	struct file *fp = (struct file *)f->f_fsdata;
151 	struct stat sb;
152 	size_t n;
153 	int rc;
154 
155 	rc = zfs_stat(f, &sb);
156 	if (rc)
157 		return (rc);
158 	n = size;
159 	if (fp->f_seekp + n > sb.st_size)
160 		n = sb.st_size - fp->f_seekp;
161 
162 	rc = dnode_read(spa, &fp->f_dnode, fp->f_seekp, start, n);
163 	if (rc)
164 		return (rc);
165 
166 	if (0) {
167 	    int i;
168 	    for (i = 0; i < n; i++)
169 		putchar(((char*) start)[i]);
170 	}
171 	fp->f_seekp += n;
172 	if (resid)
173 		*resid = size - n;
174 
175 	return (0);
176 }
177 
178 static off_t
179 zfs_seek(struct open_file *f, off_t offset, int where)
180 {
181 	struct file *fp = (struct file *)f->f_fsdata;
182 
183 	switch (where) {
184 	case SEEK_SET:
185 		fp->f_seekp = offset;
186 		break;
187 	case SEEK_CUR:
188 		fp->f_seekp += offset;
189 		break;
190 	case SEEK_END:
191 	    {
192 		struct stat sb;
193 		int error;
194 
195 		error = zfs_stat(f, &sb);
196 		if (error != 0) {
197 			errno = error;
198 			return (-1);
199 		}
200 		fp->f_seekp = sb.st_size - offset;
201 		break;
202 	    }
203 	default:
204 		errno = EINVAL;
205 		return (-1);
206 	}
207 	return (fp->f_seekp);
208 }
209 
210 static int
211 zfs_stat(struct open_file *f, struct stat *sb)
212 {
213 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
214 	struct file *fp = (struct file *)f->f_fsdata;
215 
216 	return (zfs_dnode_stat(spa, &fp->f_dnode, sb));
217 }
218 
219 static int
220 zfs_readdir(struct open_file *f, struct dirent *d)
221 {
222 	const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa;
223 	struct file *fp = (struct file *)f->f_fsdata;
224 	mzap_ent_phys_t mze;
225 	struct stat sb;
226 	size_t bsize = fp->f_dnode.dn_datablkszsec << SPA_MINBLOCKSHIFT;
227 	int rc;
228 
229 	rc = zfs_stat(f, &sb);
230 	if (rc)
231 		return (rc);
232 	if (!S_ISDIR(sb.st_mode))
233 		return (ENOTDIR);
234 
235 	/*
236 	 * If this is the first read, get the zap type.
237 	 */
238 	if (fp->f_seekp == 0) {
239 		rc = dnode_read(spa, &fp->f_dnode,
240 				0, &fp->f_zap_type, sizeof(fp->f_zap_type));
241 		if (rc)
242 			return (rc);
243 
244 		if (fp->f_zap_type == ZBT_MICRO) {
245 			fp->f_seekp = offsetof(mzap_phys_t, mz_chunk);
246 		} else {
247 			rc = dnode_read(spa, &fp->f_dnode,
248 					offsetof(zap_phys_t, zap_num_leafs),
249 					&fp->f_num_leafs,
250 					sizeof(fp->f_num_leafs));
251 			if (rc)
252 				return (rc);
253 
254 			fp->f_seekp = bsize;
255 			fp->f_zap_leaf = malloc(bsize);
256 			if (fp->f_zap_leaf == NULL)
257 				return (ENOMEM);
258 			rc = dnode_read(spa, &fp->f_dnode,
259 					fp->f_seekp,
260 					fp->f_zap_leaf,
261 					bsize);
262 			if (rc)
263 				return (rc);
264 		}
265 	}
266 
267 	if (fp->f_zap_type == ZBT_MICRO) {
268 	mzap_next:
269 		if (fp->f_seekp >= bsize)
270 			return (ENOENT);
271 
272 		rc = dnode_read(spa, &fp->f_dnode,
273 				fp->f_seekp, &mze, sizeof(mze));
274 		if (rc)
275 			return (rc);
276 		fp->f_seekp += sizeof(mze);
277 
278 		if (!mze.mze_name[0])
279 			goto mzap_next;
280 
281 		d->d_fileno = ZFS_DIRENT_OBJ(mze.mze_value);
282 		d->d_type = ZFS_DIRENT_TYPE(mze.mze_value);
283 		strcpy(d->d_name, mze.mze_name);
284 		d->d_namlen = strlen(d->d_name);
285 		return (0);
286 	} else {
287 		zap_leaf_t zl;
288 		zap_leaf_chunk_t *zc, *nc;
289 		int chunk;
290 		size_t namelen;
291 		char *p;
292 		uint64_t value;
293 
294 		/*
295 		 * Initialise this so we can use the ZAP size
296 		 * calculating macros.
297 		 */
298 		zl.l_bs = ilog2(bsize);
299 		zl.l_phys = fp->f_zap_leaf;
300 
301 		/*
302 		 * Figure out which chunk we are currently looking at
303 		 * and consider seeking to the next leaf. We use the
304 		 * low bits of f_seekp as a simple chunk index.
305 		 */
306 	fzap_next:
307 		chunk = fp->f_seekp & (bsize - 1);
308 		if (chunk == ZAP_LEAF_NUMCHUNKS(&zl)) {
309 			fp->f_seekp = rounddown2(fp->f_seekp, bsize) + bsize;
310 			chunk = 0;
311 
312 			/*
313 			 * Check for EOF and read the new leaf.
314 			 */
315 			if (fp->f_seekp >= bsize * fp->f_num_leafs)
316 				return (ENOENT);
317 
318 			rc = dnode_read(spa, &fp->f_dnode,
319 					fp->f_seekp,
320 					fp->f_zap_leaf,
321 					bsize);
322 			if (rc)
323 				return (rc);
324 		}
325 
326 		zc = &ZAP_LEAF_CHUNK(&zl, chunk);
327 		fp->f_seekp++;
328 		if (zc->l_entry.le_type != ZAP_CHUNK_ENTRY)
329 			goto fzap_next;
330 
331 		namelen = zc->l_entry.le_name_numints;
332 		if (namelen > sizeof(d->d_name))
333 			namelen = sizeof(d->d_name);
334 
335 		/*
336 		 * Paste the name back together.
337 		 */
338 		nc = &ZAP_LEAF_CHUNK(&zl, zc->l_entry.le_name_chunk);
339 		p = d->d_name;
340 		while (namelen > 0) {
341 			int len;
342 			len = namelen;
343 			if (len > ZAP_LEAF_ARRAY_BYTES)
344 				len = ZAP_LEAF_ARRAY_BYTES;
345 			memcpy(p, nc->l_array.la_array, len);
346 			p += len;
347 			namelen -= len;
348 			nc = &ZAP_LEAF_CHUNK(&zl, nc->l_array.la_next);
349 		}
350 		d->d_name[sizeof(d->d_name) - 1] = 0;
351 
352 		/*
353 		 * Assume the first eight bytes of the value are
354 		 * a uint64_t.
355 		 */
356 		value = fzap_leaf_value(&zl, zc);
357 
358 		d->d_fileno = ZFS_DIRENT_OBJ(value);
359 		d->d_type = ZFS_DIRENT_TYPE(value);
360 		d->d_namlen = strlen(d->d_name);
361 
362 		return (0);
363 	}
364 }
365 
366 static int
367 vdev_read(vdev_t *vdev, void *priv, off_t offset, void *buf, size_t bytes)
368 {
369 	int fd, ret;
370 	size_t res, head, tail, total_size, full_sec_size;
371 	unsigned secsz, do_tail_read;
372 	off_t start_sec;
373 	char *outbuf, *bouncebuf;
374 
375 	fd = (uintptr_t) priv;
376 	outbuf = (char *) buf;
377 	bouncebuf = NULL;
378 
379 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
380 	if (ret != 0)
381 		return (ret);
382 
383 	/*
384 	 * Handling reads of arbitrary offset and size - multi-sector case
385 	 * and single-sector case.
386 	 *
387 	 *                        Multi-sector Case
388 	 *                (do_tail_read = true if tail > 0)
389 	 *
390 	 *   |<----------------------total_size--------------------->|
391 	 *   |                                                       |
392 	 *   |<--head-->|<--------------bytes------------>|<--tail-->|
393 	 *   |          |                                 |          |
394 	 *   |          |       |<~full_sec_size~>|       |          |
395 	 *   +------------------+                 +------------------+
396 	 *   |          |0101010|     .  .  .     |0101011|          |
397 	 *   +------------------+                 +------------------+
398 	 *         start_sec                         start_sec + n
399 	 *
400 	 *
401 	 *                      Single-sector Case
402 	 *                    (do_tail_read = false)
403 	 *
404 	 *              |<------total_size = secsz----->|
405 	 *              |                               |
406 	 *              |<-head->|<---bytes--->|<-tail->|
407 	 *              +-------------------------------+
408 	 *              |        |0101010101010|        |
409 	 *              +-------------------------------+
410 	 *                          start_sec
411 	 */
412 	start_sec = offset / secsz;
413 	head = offset % secsz;
414 	total_size = roundup2(head + bytes, secsz);
415 	tail = total_size - (head + bytes);
416 	do_tail_read = ((tail > 0) && (head + bytes > secsz));
417 	full_sec_size = total_size;
418 	if (head > 0)
419 		full_sec_size -= secsz;
420 	if (do_tail_read)
421 		full_sec_size -= secsz;
422 
423 	/* Return of partial sector data requires a bounce buffer. */
424 	if ((head > 0) || do_tail_read || bytes < secsz) {
425 		bouncebuf = malloc(secsz);
426 		if (bouncebuf == NULL) {
427 			printf("vdev_read: out of memory\n");
428 			return (ENOMEM);
429 		}
430 	}
431 
432 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
433 		ret = errno;
434 		goto error;
435 	}
436 
437 	/* Partial data return from first sector */
438 	if (head > 0) {
439 		res = read(fd, bouncebuf, secsz);
440 		if (res != secsz) {
441 			ret = EIO;
442 			goto error;
443 		}
444 		memcpy(outbuf, bouncebuf + head, min(secsz - head, bytes));
445 		outbuf += min(secsz - head, bytes);
446 	}
447 
448 	/*
449 	 * Full data return from read sectors.
450 	 * Note, there is still corner case where we read
451 	 * from sector boundary, but less than sector size, e.g. reading 512B
452 	 * from 4k sector.
453 	 */
454 	if (full_sec_size > 0) {
455 		if (bytes < full_sec_size) {
456 			res = read(fd, bouncebuf, secsz);
457 			if (res != secsz) {
458 				ret = EIO;
459 				goto error;
460 			}
461 			memcpy(outbuf, bouncebuf, bytes);
462 		} else {
463 			res = read(fd, outbuf, full_sec_size);
464 			if (res != full_sec_size) {
465 				ret = EIO;
466 				goto error;
467 			}
468 			outbuf += full_sec_size;
469 		}
470 	}
471 
472 	/* Partial data return from last sector */
473 	if (do_tail_read) {
474 		res = read(fd, bouncebuf, secsz);
475 		if (res != secsz) {
476 			ret = EIO;
477 			goto error;
478 		}
479 		memcpy(outbuf, bouncebuf, secsz - tail);
480 	}
481 
482 	ret = 0;
483 error:
484 	free(bouncebuf);
485 	return (ret);
486 }
487 
488 static int
489 vdev_write(vdev_t *vdev, off_t offset, void *buf, size_t bytes)
490 {
491 	int fd, ret;
492 	size_t head, tail, total_size, full_sec_size;
493 	unsigned secsz, do_tail_write;
494 	off_t start_sec;
495 	ssize_t res;
496 	char *outbuf, *bouncebuf;
497 
498 	fd = (uintptr_t)vdev->v_priv;
499 	outbuf = (char *)buf;
500 	bouncebuf = NULL;
501 
502 	ret = ioctl(fd, DIOCGSECTORSIZE, &secsz);
503 	if (ret != 0)
504 		return (ret);
505 
506 	start_sec = offset / secsz;
507 	head = offset % secsz;
508 	total_size = roundup2(head + bytes, secsz);
509 	tail = total_size - (head + bytes);
510 	do_tail_write = ((tail > 0) && (head + bytes > secsz));
511 	full_sec_size = total_size;
512 	if (head > 0)
513 		full_sec_size -= secsz;
514 	if (do_tail_write)
515 		full_sec_size -= secsz;
516 
517 	/* Partial sector write requires a bounce buffer. */
518 	if ((head > 0) || do_tail_write || bytes < secsz) {
519 		bouncebuf = malloc(secsz);
520 		if (bouncebuf == NULL) {
521 			printf("vdev_write: out of memory\n");
522 			return (ENOMEM);
523 		}
524 	}
525 
526 	if (lseek(fd, start_sec * secsz, SEEK_SET) == -1) {
527 		ret = errno;
528 		goto error;
529 	}
530 
531 	/* Partial data for first sector */
532 	if (head > 0) {
533 		res = read(fd, bouncebuf, secsz);
534 		if ((unsigned)res != secsz) {
535 			ret = EIO;
536 			goto error;
537 		}
538 		memcpy(bouncebuf + head, outbuf, min(secsz - head, bytes));
539 		(void) lseek(fd, -secsz, SEEK_CUR);
540 		res = write(fd, bouncebuf, secsz);
541 		if ((unsigned)res != secsz) {
542 			ret = EIO;
543 			goto error;
544 		}
545 		outbuf += min(secsz - head, bytes);
546 	}
547 
548 	/*
549 	 * Full data write to sectors.
550 	 * Note, there is still corner case where we write
551 	 * to sector boundary, but less than sector size, e.g. write 512B
552 	 * to 4k sector.
553 	 */
554 	if (full_sec_size > 0) {
555 		if (bytes < full_sec_size) {
556 			res = read(fd, bouncebuf, secsz);
557 			if ((unsigned)res != secsz) {
558 				ret = EIO;
559 				goto error;
560 			}
561 			memcpy(bouncebuf, outbuf, bytes);
562 			(void) lseek(fd, -secsz, SEEK_CUR);
563 			res = write(fd, bouncebuf, secsz);
564 			if ((unsigned)res != secsz) {
565 				ret = EIO;
566 				goto error;
567 			}
568 		} else {
569 			res = write(fd, outbuf, full_sec_size);
570 			if ((unsigned)res != full_sec_size) {
571 				ret = EIO;
572 				goto error;
573 			}
574 			outbuf += full_sec_size;
575 		}
576 	}
577 
578 	/* Partial data write to last sector */
579 	if (do_tail_write) {
580 		res = read(fd, bouncebuf, secsz);
581 		if ((unsigned)res != secsz) {
582 			ret = EIO;
583 			goto error;
584 		}
585 		memcpy(bouncebuf, outbuf, secsz - tail);
586 		(void) lseek(fd, -secsz, SEEK_CUR);
587 		res = write(fd, bouncebuf, secsz);
588 		if ((unsigned)res != secsz) {
589 			ret = EIO;
590 			goto error;
591 		}
592 	}
593 
594 	ret = 0;
595 error:
596 	free(bouncebuf);
597 	return (ret);
598 }
599 
600 static int
601 zfs_dev_init(void)
602 {
603 	spa_t *spa;
604 	spa_t *next;
605 	spa_t *prev;
606 
607 	zfs_init();
608 	if (archsw.arch_zfs_probe == NULL)
609 		return (ENXIO);
610 	archsw.arch_zfs_probe();
611 
612 	prev = NULL;
613 	spa = STAILQ_FIRST(&zfs_pools);
614 	while (spa != NULL) {
615 		next = STAILQ_NEXT(spa, spa_link);
616 		if (zfs_spa_init(spa)) {
617 			if (prev == NULL)
618 				STAILQ_REMOVE_HEAD(&zfs_pools, spa_link);
619 			else
620 				STAILQ_REMOVE_AFTER(&zfs_pools, prev, spa_link);
621 		} else
622 			prev = spa;
623 		spa = next;
624 	}
625 	return (0);
626 }
627 
628 struct zfs_probe_args {
629 	int		fd;
630 	const char	*devname;
631 	uint64_t	*pool_guid;
632 	u_int		secsz;
633 };
634 
635 static int
636 zfs_diskread(void *arg, void *buf, size_t blocks, uint64_t offset)
637 {
638 	struct zfs_probe_args *ppa;
639 
640 	ppa = (struct zfs_probe_args *)arg;
641 	return (vdev_read(NULL, (void *)(uintptr_t)ppa->fd,
642 	    offset * ppa->secsz, buf, blocks * ppa->secsz));
643 }
644 
645 static int
646 zfs_probe(int fd, uint64_t *pool_guid)
647 {
648 	spa_t *spa;
649 	int ret;
650 
651 	spa = NULL;
652 	ret = vdev_probe(vdev_read, vdev_write, (void *)(uintptr_t)fd, &spa);
653 	if (ret == 0 && pool_guid != NULL)
654 		if (*pool_guid == 0)
655 			*pool_guid = spa->spa_guid;
656 	return (ret);
657 }
658 
659 static int
660 zfs_probe_partition(void *arg, const char *partname,
661     const struct ptable_entry *part)
662 {
663 	struct zfs_probe_args *ppa, pa;
664 	struct ptable *table;
665 	char devname[32];
666 	int ret;
667 
668 	/* Probe only freebsd-zfs and freebsd partitions */
669 	if (part->type != PART_FREEBSD &&
670 	    part->type != PART_FREEBSD_ZFS)
671 		return (0);
672 
673 	ppa = (struct zfs_probe_args *)arg;
674 	strncpy(devname, ppa->devname, strlen(ppa->devname) - 1);
675 	devname[strlen(ppa->devname) - 1] = '\0';
676 	snprintf(devname, sizeof(devname), "%s%s:", devname, partname);
677 	pa.fd = open(devname, O_RDWR);
678 	if (pa.fd == -1)
679 		return (0);
680 	ret = zfs_probe(pa.fd, ppa->pool_guid);
681 	if (ret == 0)
682 		return (0);
683 	/* Do we have BSD label here? */
684 	if (part->type == PART_FREEBSD) {
685 		pa.devname = devname;
686 		pa.pool_guid = ppa->pool_guid;
687 		pa.secsz = ppa->secsz;
688 		table = ptable_open(&pa, part->end - part->start + 1,
689 		    ppa->secsz, zfs_diskread);
690 		if (table != NULL) {
691 			ptable_iterate(table, &pa, zfs_probe_partition);
692 			ptable_close(table);
693 		}
694 	}
695 	close(pa.fd);
696 	return (0);
697 }
698 
699 /*
700  * Return bootenv nvlist from pool label.
701  */
702 int
703 zfs_get_bootenv(void *vdev, nvlist_t **benvp)
704 {
705 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
706 	nvlist_t *benv = NULL;
707 	vdev_t *vd;
708 	spa_t *spa;
709 
710 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
711 		return (ENOTSUP);
712 
713 	if ((spa = spa_find_by_dev(dev)) == NULL)
714 		return (ENXIO);
715 
716 	if (spa->spa_bootenv == NULL) {
717 		STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children,
718 		    v_childlink) {
719 			benv = vdev_read_bootenv(vd);
720 
721 			if (benv != NULL)
722 				break;
723 		}
724 		spa->spa_bootenv = benv;
725 	} else {
726 		benv = spa->spa_bootenv;
727 	}
728 
729 	if (benv == NULL)
730 		return (ENOENT);
731 
732 	*benvp = benv;
733 	return (0);
734 }
735 
736 /*
737  * Store nvlist to pool label bootenv area. Also updates cached pointer in spa.
738  */
739 int
740 zfs_set_bootenv(void *vdev, nvlist_t *benv)
741 {
742 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
743 	spa_t *spa;
744 	vdev_t *vd;
745 
746 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
747 		return (ENOTSUP);
748 
749 	if ((spa = spa_find_by_dev(dev)) == NULL)
750 		return (ENXIO);
751 
752 	STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) {
753 		vdev_write_bootenv(vd, benv);
754 	}
755 
756 	spa->spa_bootenv = benv;
757 	return (0);
758 }
759 
760 /*
761  * Get bootonce value by key. The bootonce <key, value> pair is removed
762  * from the bootenv nvlist and the remaining nvlist is committed back to disk.
763  */
764 int
765 zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size)
766 {
767 	nvlist_t *benv;
768 	char *result = NULL;
769 	int result_size, rv;
770 
771 	if ((rv = zfs_get_bootenv(vdev, &benv)) != 0)
772 		return (rv);
773 
774 	if ((rv = nvlist_find(benv, key, DATA_TYPE_STRING, NULL,
775 	    &result, &result_size)) == 0) {
776 		if (result_size == 0) {
777 			/* ignore empty string */
778 			rv = ENOENT;
779 		} else {
780 			size = MIN((size_t)result_size + 1, size);
781 			strlcpy(buf, result, size);
782 		}
783 		(void) nvlist_remove(benv, key, DATA_TYPE_STRING);
784 		(void) zfs_set_bootenv(vdev, benv);
785 	}
786 
787 	return (rv);
788 }
789 
790 /*
791  * nvstore backend.
792  */
793 
794 static int zfs_nvstore_setter(void *, int, const char *,
795     const void *, size_t);
796 static int zfs_nvstore_setter_str(void *, const char *, const char *,
797     const char *);
798 static int zfs_nvstore_unset_impl(void *, const char *, bool);
799 static int zfs_nvstore_setenv(void *, void *);
800 
801 /*
802  * nvstore is only present for current rootfs pool.
803  */
804 static int
805 zfs_nvstore_sethook(struct env_var *ev, int flags __unused, const void *value)
806 {
807 	struct zfs_devdesc *dev;
808 	int rv;
809 
810 	archsw.arch_getdev((void **)&dev, NULL, NULL);
811 	if (dev == NULL)
812 		return (ENXIO);
813 
814 	rv = zfs_nvstore_setter_str(dev, NULL, ev->ev_name, value);
815 
816 	free(dev);
817 	return (rv);
818 }
819 
820 /*
821  * nvstore is only present for current rootfs pool.
822  */
823 static int
824 zfs_nvstore_unsethook(struct env_var *ev)
825 {
826 	struct zfs_devdesc *dev;
827 	int rv;
828 
829 	archsw.arch_getdev((void **)&dev, NULL, NULL);
830 	if (dev == NULL)
831 		return (ENXIO);
832 
833 	rv = zfs_nvstore_unset_impl(dev, ev->ev_name, false);
834 
835 	free(dev);
836 	return (rv);
837 }
838 
839 static int
840 zfs_nvstore_getter(void *vdev, const char *name, void **data)
841 {
842 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
843 	spa_t *spa;
844 	nvlist_t *nv;
845 	char *str, **ptr;
846 	int size;
847 	int rv;
848 
849 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
850 		return (ENOTSUP);
851 
852 	if ((spa = spa_find_by_dev(dev)) == NULL)
853 		return (ENXIO);
854 
855 	if (spa->spa_bootenv == NULL)
856 		return (ENXIO);
857 
858 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
859 	    NULL, &nv, NULL) != 0)
860 		return (ENOENT);
861 
862 	rv = nvlist_find(nv, name, DATA_TYPE_STRING, NULL, &str, &size);
863 	if (rv == 0) {
864 		ptr = (char **)data;
865 		asprintf(ptr, "%.*s", size, str);
866 		if (*data == NULL)
867 			rv = ENOMEM;
868 	}
869 	nvlist_destroy(nv);
870 	return (rv);
871 }
872 
873 static int
874 zfs_nvstore_setter(void *vdev, int type, const char *name,
875     const void *data, size_t size)
876 {
877 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
878 	spa_t *spa;
879 	nvlist_t *nv;
880 	int rv;
881 	bool env_set = true;
882 
883 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
884 		return (ENOTSUP);
885 
886 	if ((spa = spa_find_by_dev(dev)) == NULL)
887 		return (ENXIO);
888 
889 	if (spa->spa_bootenv == NULL)
890 		return (ENXIO);
891 
892 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
893 	    NULL, &nv, NULL) != 0) {
894 		nv = nvlist_create(NV_UNIQUE_NAME);
895 		if (nv == NULL)
896 			return (ENOMEM);
897 	}
898 
899 	rv = 0;
900 	switch (type) {
901         case DATA_TYPE_INT8:
902 		if (size != sizeof (int8_t)) {
903 			rv = EINVAL;
904 			break;
905 		}
906 		rv = nvlist_add_int8(nv, name, *(int8_t *)data);
907 		break;
908 
909         case DATA_TYPE_INT16:
910 		if (size != sizeof (int16_t)) {
911 			rv = EINVAL;
912 			break;
913 		}
914 		rv = nvlist_add_int16(nv, name, *(int16_t *)data);
915 		break;
916 
917         case DATA_TYPE_INT32:
918 		if (size != sizeof (int32_t)) {
919 			rv = EINVAL;
920 			break;
921 		}
922 		rv = nvlist_add_int32(nv, name, *(int32_t *)data);
923 		break;
924 
925         case DATA_TYPE_INT64:
926 		if (size != sizeof (int64_t)) {
927 			rv = EINVAL;
928 			break;
929 		}
930 		rv = nvlist_add_int64(nv, name, *(int64_t *)data);
931 		break;
932 
933         case DATA_TYPE_BYTE:
934 		if (size != sizeof (uint8_t)) {
935 			rv = EINVAL;
936 			break;
937 		}
938 		rv = nvlist_add_byte(nv, name, *(int8_t *)data);
939 		break;
940 
941         case DATA_TYPE_UINT8:
942 		if (size != sizeof (uint8_t)) {
943 			rv = EINVAL;
944 			break;
945 		}
946 		rv = nvlist_add_uint8(nv, name, *(int8_t *)data);
947 		break;
948 
949         case DATA_TYPE_UINT16:
950 		if (size != sizeof (uint16_t)) {
951 			rv = EINVAL;
952 			break;
953 		}
954 		rv = nvlist_add_uint16(nv, name, *(uint16_t *)data);
955 		break;
956 
957         case DATA_TYPE_UINT32:
958 		if (size != sizeof (uint32_t)) {
959 			rv = EINVAL;
960 			break;
961 		}
962 		rv = nvlist_add_uint32(nv, name, *(uint32_t *)data);
963 		break;
964 
965         case DATA_TYPE_UINT64:
966 		if (size != sizeof (uint64_t)) {
967 			rv = EINVAL;
968 			break;
969 		}
970 		rv = nvlist_add_uint64(nv, name, *(uint64_t *)data);
971 		break;
972 
973         case DATA_TYPE_STRING:
974 		rv = nvlist_add_string(nv, name, data);
975 		break;
976 
977 	case DATA_TYPE_BOOLEAN_VALUE:
978 		if (size != sizeof (boolean_t)) {
979 			rv = EINVAL;
980 			break;
981 		}
982 		rv = nvlist_add_boolean_value(nv, name, *(boolean_t *)data);
983 		break;
984 
985 	default:
986 		rv = EINVAL;
987 		break;
988 	}
989 
990 	if (rv == 0) {
991 		rv = nvlist_add_nvlist(spa->spa_bootenv, OS_NVSTORE, nv);
992 		if (rv == 0) {
993 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
994 		}
995 		if (rv == 0) {
996 			if (env_set) {
997 				rv = zfs_nvstore_setenv(vdev,
998 				    nvpair_find(nv, name));
999 			} else {
1000 				env_discard(env_getenv(name));
1001 				rv = 0;
1002 			}
1003 		}
1004 	}
1005 
1006 	nvlist_destroy(nv);
1007 	return (rv);
1008 }
1009 
1010 static int
1011 get_int64(const char *data, int64_t *ip)
1012 {
1013 	char *end;
1014 	int64_t val;
1015 
1016 	errno = 0;
1017 	val = strtoll(data, &end, 0);
1018 	if (errno != 0 || *data == '\0' || *end != '\0')
1019 		return (EINVAL);
1020 
1021 	*ip = val;
1022 	return (0);
1023 }
1024 
1025 static int
1026 get_uint64(const char *data, uint64_t *ip)
1027 {
1028 	char *end;
1029 	uint64_t val;
1030 
1031 	errno = 0;
1032 	val = strtoull(data, &end, 0);
1033 	if (errno != 0 || *data == '\0' || *end != '\0')
1034 		return (EINVAL);
1035 
1036 	*ip = val;
1037 	return (0);
1038 }
1039 
1040 /*
1041  * Translate textual data to data type. If type is not set, and we are
1042  * creating new pair, use DATA_TYPE_STRING.
1043  */
1044 static int
1045 zfs_nvstore_setter_str(void *vdev, const char *type, const char *name,
1046     const char *data)
1047 {
1048 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1049 	spa_t *spa;
1050 	nvlist_t *nv;
1051 	int rv;
1052 	data_type_t dt;
1053 	int64_t val;
1054 	uint64_t uval;
1055 
1056 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1057 		return (ENOTSUP);
1058 
1059 	if ((spa = spa_find_by_dev(dev)) == NULL)
1060 		return (ENXIO);
1061 
1062 	if (spa->spa_bootenv == NULL)
1063 		return (ENXIO);
1064 
1065 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1066 	    NULL, &nv, NULL) != 0) {
1067 		nv = NULL;
1068 	}
1069 
1070 	if (type == NULL) {
1071 		nvp_header_t *nvh;
1072 
1073 		/*
1074 		 * if there is no existing pair, default to string.
1075 		 * Otherwise, use type from existing pair.
1076 		 */
1077 		nvh = nvpair_find(nv, name);
1078 		if (nvh == NULL) {
1079 			dt = DATA_TYPE_STRING;
1080 		} else {
1081 			nv_string_t *nvp_name;
1082 			nv_pair_data_t *nvp_data;
1083 
1084 			nvp_name = (nv_string_t *)(nvh + 1);
1085 			nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1086 			    NV_ALIGN4(nvp_name->nv_size));
1087 			dt = nvp_data->nv_type;
1088 		}
1089 	} else {
1090 		dt = nvpair_type_from_name(type);
1091 	}
1092 	nvlist_destroy(nv);
1093 
1094 	rv = 0;
1095 	switch (dt) {
1096         case DATA_TYPE_INT8:
1097 		rv = get_int64(data, &val);
1098 		if (rv == 0) {
1099 			int8_t v = val;
1100 
1101 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1102 		}
1103 		break;
1104         case DATA_TYPE_INT16:
1105 		rv = get_int64(data, &val);
1106 		if (rv == 0) {
1107 			int16_t v = val;
1108 
1109 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1110 		}
1111 		break;
1112         case DATA_TYPE_INT32:
1113 		rv = get_int64(data, &val);
1114 		if (rv == 0) {
1115 			int32_t v = val;
1116 
1117 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1118 		}
1119 		break;
1120         case DATA_TYPE_INT64:
1121 		rv = get_int64(data, &val);
1122 		if (rv == 0) {
1123 			rv = zfs_nvstore_setter(vdev, dt, name, &val,
1124 			    sizeof (val));
1125 		}
1126 		break;
1127 
1128         case DATA_TYPE_BYTE:
1129 		rv = get_uint64(data, &uval);
1130 		if (rv == 0) {
1131 			uint8_t v = uval;
1132 
1133 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1134 		}
1135 		break;
1136 
1137         case DATA_TYPE_UINT8:
1138 		rv = get_uint64(data, &uval);
1139 		if (rv == 0) {
1140 			uint8_t v = uval;
1141 
1142 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1143 		}
1144 		break;
1145 
1146         case DATA_TYPE_UINT16:
1147 		rv = get_uint64(data, &uval);
1148 		if (rv == 0) {
1149 			uint16_t v = uval;
1150 
1151 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1152 		}
1153 		break;
1154 
1155         case DATA_TYPE_UINT32:
1156 		rv = get_uint64(data, &uval);
1157 		if (rv == 0) {
1158 			uint32_t v = uval;
1159 
1160 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1161 		}
1162 		break;
1163 
1164         case DATA_TYPE_UINT64:
1165 		rv = get_uint64(data, &uval);
1166 		if (rv == 0) {
1167 			rv = zfs_nvstore_setter(vdev, dt, name, &uval,
1168 			    sizeof (uval));
1169 		}
1170 		break;
1171 
1172         case DATA_TYPE_STRING:
1173 		rv = zfs_nvstore_setter(vdev, dt, name, data, strlen(data) + 1);
1174 		break;
1175 
1176 	case DATA_TYPE_BOOLEAN_VALUE:
1177 		rv = get_int64(data, &val);
1178 		if (rv == 0) {
1179 			boolean_t v = val;
1180 
1181 			rv = zfs_nvstore_setter(vdev, dt, name, &v, sizeof (v));
1182 		}
1183 
1184 	default:
1185 		rv = EINVAL;
1186 	}
1187 	return (rv);
1188 }
1189 
1190 static int
1191 zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env)
1192 {
1193 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1194 	spa_t *spa;
1195 	nvlist_t *nv;
1196 	int rv;
1197 
1198 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1199 		return (ENOTSUP);
1200 
1201 	if ((spa = spa_find_by_dev(dev)) == NULL)
1202 		return (ENXIO);
1203 
1204 	if (spa->spa_bootenv == NULL)
1205 		return (ENXIO);
1206 
1207 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1208 	    NULL, &nv, NULL) != 0)
1209 		return (ENOENT);
1210 
1211 	rv = nvlist_remove(nv, name, DATA_TYPE_UNKNOWN);
1212 	if (rv == 0) {
1213 		if (nvlist_next_nvpair(nv, NULL) == NULL) {
1214 			rv = nvlist_remove(spa->spa_bootenv, OS_NVSTORE,
1215 			    DATA_TYPE_NVLIST);
1216 		} else {
1217 			rv = nvlist_add_nvlist(spa->spa_bootenv,
1218 			    OS_NVSTORE, nv);
1219 		}
1220 		if (rv == 0)
1221 			rv = zfs_set_bootenv(vdev, spa->spa_bootenv);
1222 	}
1223 
1224 	if (unset_env)
1225 		env_discard(env_getenv(name));
1226 	return (rv);
1227 }
1228 
1229 static int
1230 zfs_nvstore_unset(void *vdev, const char *name)
1231 {
1232 	return (zfs_nvstore_unset_impl(vdev, name, true));
1233 }
1234 
1235 static int
1236 zfs_nvstore_print(void *vdev __unused, void *ptr)
1237 {
1238 
1239 	nvpair_print(ptr, 0);
1240 	return (0);
1241 }
1242 
1243 /*
1244  * Create environment variable from nvpair.
1245  * set hook will update nvstore with new value, unset hook will remove
1246  * variable from nvstore.
1247  */
1248 static int
1249 zfs_nvstore_setenv(void *vdev __unused, void *ptr)
1250 {
1251 	nvp_header_t *nvh = ptr;
1252 	nv_string_t *nvp_name, *nvp_value;
1253 	nv_pair_data_t *nvp_data;
1254 	char *name, *value;
1255 	int rv = 0;
1256 
1257 	if (nvh == NULL)
1258 		return (ENOENT);
1259 
1260 	nvp_name = (nv_string_t *)(nvh + 1);
1261 	nvp_data = (nv_pair_data_t *)(&nvp_name->nv_data[0] +
1262 	    NV_ALIGN4(nvp_name->nv_size));
1263 
1264 	if ((name = nvstring_get(nvp_name)) == NULL)
1265 		return (ENOMEM);
1266 
1267 	value = NULL;
1268 	switch (nvp_data->nv_type) {
1269 	case DATA_TYPE_BYTE:
1270 	case DATA_TYPE_UINT8:
1271 		(void) asprintf(&value, "%uc",
1272 		    *(unsigned *)&nvp_data->nv_data[0]);
1273 		if (value == NULL)
1274 			rv = ENOMEM;
1275 		break;
1276 
1277 	case DATA_TYPE_INT8:
1278 		(void) asprintf(&value, "%c", *(int *)&nvp_data->nv_data[0]);
1279 		if (value == NULL)
1280 			rv = ENOMEM;
1281 		break;
1282 
1283 	case DATA_TYPE_INT16:
1284 		(void) asprintf(&value, "%hd", *(short *)&nvp_data->nv_data[0]);
1285 		if (value == NULL)
1286 			rv = ENOMEM;
1287 		break;
1288 
1289 	case DATA_TYPE_UINT16:
1290 		(void) asprintf(&value, "%hu",
1291 		    *(unsigned short *)&nvp_data->nv_data[0]);
1292 		if (value == NULL)
1293 			rv = ENOMEM;
1294 		break;
1295 
1296 	case DATA_TYPE_BOOLEAN_VALUE:
1297 	case DATA_TYPE_INT32:
1298 		(void) asprintf(&value, "%d", *(int *)&nvp_data->nv_data[0]);
1299 		if (value == NULL)
1300 			rv = ENOMEM;
1301 		break;
1302 
1303 	case DATA_TYPE_UINT32:
1304 		(void) asprintf(&value, "%u",
1305 		    *(unsigned *)&nvp_data->nv_data[0]);
1306 		if (value == NULL)
1307 			rv = ENOMEM;
1308 		break;
1309 
1310 	case DATA_TYPE_INT64:
1311 		(void) asprintf(&value, "%jd",
1312 		    (intmax_t)*(int64_t *)&nvp_data->nv_data[0]);
1313 		if (value == NULL)
1314 			rv = ENOMEM;
1315 		break;
1316 
1317 	case DATA_TYPE_UINT64:
1318 		(void) asprintf(&value, "%ju",
1319 		    (uintmax_t)*(uint64_t *)&nvp_data->nv_data[0]);
1320 		if (value == NULL)
1321 			rv = ENOMEM;
1322 		break;
1323 
1324 	case DATA_TYPE_STRING:
1325 		nvp_value = (nv_string_t *)&nvp_data->nv_data[0];
1326 		if ((value = nvstring_get(nvp_value)) == NULL) {
1327 			rv = ENOMEM;
1328 			break;
1329 		}
1330 		break;
1331 
1332 	default:
1333 		rv = EINVAL;
1334 		break;
1335 	}
1336 
1337 	if (value != NULL) {
1338 		rv = env_setenv(name, EV_VOLATILE | EV_NOHOOK, value,
1339 		    zfs_nvstore_sethook, zfs_nvstore_unsethook);
1340 		free(value);
1341 	}
1342 	free(name);
1343 	return (rv);
1344 }
1345 
1346 static int
1347 zfs_nvstore_iterate(void *vdev, int (*cb)(void *, void *))
1348 {
1349 	struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev;
1350 	spa_t *spa;
1351 	nvlist_t *nv;
1352 	nvp_header_t *nvh;
1353 	int rv;
1354 
1355 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1356 		return (ENOTSUP);
1357 
1358 	if ((spa = spa_find_by_dev(dev)) == NULL)
1359 		return (ENXIO);
1360 
1361 	if (spa->spa_bootenv == NULL)
1362 		return (ENXIO);
1363 
1364 	if (nvlist_find(spa->spa_bootenv, OS_NVSTORE, DATA_TYPE_NVLIST,
1365 	    NULL, &nv, NULL) != 0)
1366 		return (ENOENT);
1367 
1368 	rv = 0;
1369 	nvh = NULL;
1370 	while ((nvh = nvlist_next_nvpair(nv, nvh)) != NULL) {
1371 		rv = cb(vdev, nvh);
1372 		if (rv != 0)
1373 			break;
1374 	}
1375 	return (rv);
1376 }
1377 
1378 nvs_callbacks_t nvstore_zfs_cb = {
1379 	.nvs_getter = zfs_nvstore_getter,
1380 	.nvs_setter = zfs_nvstore_setter,
1381 	.nvs_setter_str = zfs_nvstore_setter_str,
1382 	.nvs_unset = zfs_nvstore_unset,
1383 	.nvs_print = zfs_nvstore_print,
1384 	.nvs_iterate = zfs_nvstore_iterate
1385 };
1386 
1387 int
1388 zfs_attach_nvstore(void *vdev)
1389 {
1390 	struct zfs_devdesc *dev = vdev;
1391 	spa_t *spa;
1392 	uint64_t version;
1393 	int rv;
1394 
1395 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1396 		return (ENOTSUP);
1397 
1398 	if ((spa = spa_find_by_dev(dev)) == NULL)
1399 		return (ENXIO);
1400 
1401 	rv = nvlist_find(spa->spa_bootenv, BOOTENV_VERSION, DATA_TYPE_UINT64,
1402 	    NULL, &version, NULL);
1403 
1404 	if (rv != 0 || version != VB_NVLIST) {
1405 		return (ENXIO);
1406 	}
1407 
1408 	dev = malloc(sizeof (*dev));
1409 	if (dev == NULL)
1410 		return (ENOMEM);
1411 	memcpy(dev, vdev, sizeof (*dev));
1412 
1413 	rv = nvstore_init(spa->spa_name, &nvstore_zfs_cb, dev);
1414 	if (rv != 0)
1415 		free(dev);
1416 	else
1417 		rv = zfs_nvstore_iterate(dev, zfs_nvstore_setenv);
1418 	return (rv);
1419 }
1420 
1421 int
1422 zfs_probe_dev(const char *devname, uint64_t *pool_guid)
1423 {
1424 	struct disk_devdesc *dev;
1425 	struct ptable *table;
1426 	struct zfs_probe_args pa;
1427 	uint64_t mediasz;
1428 	int ret;
1429 
1430 	if (pool_guid)
1431 		*pool_guid = 0;
1432 	pa.fd = open(devname, O_RDWR);
1433 	if (pa.fd == -1)
1434 		return (ENXIO);
1435 	/*
1436 	 * We will not probe the whole disk, we can not boot from such
1437 	 * disks and some systems will misreport the disk sizes and will
1438 	 * hang while accessing the disk.
1439 	 */
1440 	if (archsw.arch_getdev((void **)&dev, devname, NULL) == 0) {
1441 		int partition = dev->d_partition;
1442 		int slice = dev->d_slice;
1443 
1444 		free(dev);
1445 		if (partition != D_PARTNONE && slice != D_SLICENONE) {
1446 			ret = zfs_probe(pa.fd, pool_guid);
1447 			if (ret == 0)
1448 				return (0);
1449 		}
1450 	}
1451 
1452 	/* Probe each partition */
1453 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
1454 	if (ret == 0)
1455 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
1456 	if (ret == 0) {
1457 		pa.devname = devname;
1458 		pa.pool_guid = pool_guid;
1459 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
1460 		    zfs_diskread);
1461 		if (table != NULL) {
1462 			ptable_iterate(table, &pa, zfs_probe_partition);
1463 			ptable_close(table);
1464 		}
1465 	}
1466 	close(pa.fd);
1467 	if (pool_guid && *pool_guid == 0)
1468 		ret = ENXIO;
1469 	return (ret);
1470 }
1471 
1472 /*
1473  * Print information about ZFS pools
1474  */
1475 static int
1476 zfs_dev_print(int verbose)
1477 {
1478 	spa_t *spa;
1479 	char line[80];
1480 	int ret = 0;
1481 
1482 	if (STAILQ_EMPTY(&zfs_pools))
1483 		return (0);
1484 
1485 	printf("%s devices:", zfs_dev.dv_name);
1486 	if ((ret = pager_output("\n")) != 0)
1487 		return (ret);
1488 
1489 	if (verbose) {
1490 		return (spa_all_status());
1491 	}
1492 	STAILQ_FOREACH(spa, &zfs_pools, spa_link) {
1493 		snprintf(line, sizeof(line), "    zfs:%s\n", spa->spa_name);
1494 		ret = pager_output(line);
1495 		if (ret != 0)
1496 			break;
1497 	}
1498 	return (ret);
1499 }
1500 
1501 /*
1502  * Attempt to open the pool described by (dev) for use by (f).
1503  */
1504 static int
1505 zfs_dev_open(struct open_file *f, ...)
1506 {
1507 	va_list		args;
1508 	struct zfs_devdesc	*dev;
1509 	struct zfsmount	*mount;
1510 	spa_t		*spa;
1511 	int		rv;
1512 
1513 	va_start(args, f);
1514 	dev = va_arg(args, struct zfs_devdesc *);
1515 	va_end(args);
1516 
1517 	if ((spa = spa_find_by_dev(dev)) == NULL)
1518 		return (ENXIO);
1519 
1520 	mount = malloc(sizeof(*mount));
1521 	if (mount == NULL)
1522 		rv = ENOMEM;
1523 	else
1524 		rv = zfs_mount(spa, dev->root_guid, mount);
1525 	if (rv != 0) {
1526 		free(mount);
1527 		return (rv);
1528 	}
1529 	if (mount->objset.os_type != DMU_OST_ZFS) {
1530 		printf("Unexpected object set type %ju\n",
1531 		    (uintmax_t)mount->objset.os_type);
1532 		free(mount);
1533 		return (EIO);
1534 	}
1535 	f->f_devdata = mount;
1536 	free(dev);
1537 	return (0);
1538 }
1539 
1540 static int
1541 zfs_dev_close(struct open_file *f)
1542 {
1543 
1544 	free(f->f_devdata);
1545 	f->f_devdata = NULL;
1546 	return (0);
1547 }
1548 
1549 static int
1550 zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize)
1551 {
1552 
1553 	return (ENOSYS);
1554 }
1555 
1556 struct devsw zfs_dev = {
1557 	.dv_name = "zfs",
1558 	.dv_type = DEVT_ZFS,
1559 	.dv_init = zfs_dev_init,
1560 	.dv_strategy = zfs_dev_strategy,
1561 	.dv_open = zfs_dev_open,
1562 	.dv_close = zfs_dev_close,
1563 	.dv_ioctl = noioctl,
1564 	.dv_print = zfs_dev_print,
1565 	.dv_cleanup = NULL
1566 };
1567 
1568 int
1569 zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path)
1570 {
1571 	static char	rootname[ZFS_MAXNAMELEN];
1572 	static char	poolname[ZFS_MAXNAMELEN];
1573 	spa_t		*spa;
1574 	const char	*end;
1575 	const char	*np;
1576 	const char	*sep;
1577 	int		rv;
1578 
1579 	np = devspec;
1580 	if (*np != ':')
1581 		return (EINVAL);
1582 	np++;
1583 	end = strrchr(np, ':');
1584 	if (end == NULL)
1585 		return (EINVAL);
1586 	sep = strchr(np, '/');
1587 	if (sep == NULL || sep >= end)
1588 		sep = end;
1589 	memcpy(poolname, np, sep - np);
1590 	poolname[sep - np] = '\0';
1591 	if (sep < end) {
1592 		sep++;
1593 		memcpy(rootname, sep, end - sep);
1594 		rootname[end - sep] = '\0';
1595 	}
1596 	else
1597 		rootname[0] = '\0';
1598 
1599 	spa = spa_find_by_name(poolname);
1600 	if (!spa)
1601 		return (ENXIO);
1602 	dev->pool_guid = spa->spa_guid;
1603 	rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid);
1604 	if (rv != 0)
1605 		return (rv);
1606 	if (path != NULL)
1607 		*path = (*end == '\0') ? end : end + 1;
1608 	dev->dd.d_dev = &zfs_dev;
1609 	return (0);
1610 }
1611 
1612 char *
1613 zfs_fmtdev(void *vdev)
1614 {
1615 	static char		rootname[ZFS_MAXNAMELEN];
1616 	static char		buf[2 * ZFS_MAXNAMELEN + 8];
1617 	struct zfs_devdesc	*dev = (struct zfs_devdesc *)vdev;
1618 	spa_t			*spa;
1619 
1620 	buf[0] = '\0';
1621 	if (dev->dd.d_dev->dv_type != DEVT_ZFS)
1622 		return (buf);
1623 
1624 	/* Do we have any pools? */
1625 	spa = STAILQ_FIRST(&zfs_pools);
1626 	if (spa == NULL)
1627 		return (buf);
1628 
1629 	if (dev->pool_guid == 0)
1630 		dev->pool_guid = spa->spa_guid;
1631 	else
1632 		spa = spa_find_by_guid(dev->pool_guid);
1633 
1634 	if (spa == NULL) {
1635 		printf("ZFS: can't find pool by guid\n");
1636 		return (buf);
1637 	}
1638 	if (dev->root_guid == 0 && zfs_get_root(spa, &dev->root_guid)) {
1639 		printf("ZFS: can't find root filesystem\n");
1640 		return (buf);
1641 	}
1642 	if (zfs_rlookup(spa, dev->root_guid, rootname)) {
1643 		printf("ZFS: can't find filesystem by guid\n");
1644 		return (buf);
1645 	}
1646 
1647 	if (rootname[0] == '\0')
1648 		snprintf(buf, sizeof(buf), "%s:%s:", dev->dd.d_dev->dv_name,
1649 		    spa->spa_name);
1650 	else
1651 		snprintf(buf, sizeof(buf), "%s:%s/%s:", dev->dd.d_dev->dv_name,
1652 		    spa->spa_name, rootname);
1653 	return (buf);
1654 }
1655 
1656 static int
1657 split_devname(const char *name, char *poolname, size_t size,
1658     const char **dsnamep)
1659 {
1660 	const char *dsname;
1661 	size_t len;
1662 
1663 	ASSERT(name != NULL);
1664 	ASSERT(poolname != NULL);
1665 
1666 	len = strlen(name);
1667 	dsname = strchr(name, '/');
1668 	if (dsname != NULL) {
1669 		len = dsname - name;
1670 		dsname++;
1671 	} else
1672 		dsname = "";
1673 
1674 	if (len + 1 > size)
1675 		return (EINVAL);
1676 
1677 	strlcpy(poolname, name, len + 1);
1678 
1679 	if (dsnamep != NULL)
1680 		*dsnamep = dsname;
1681 
1682 	return (0);
1683 }
1684 
1685 int
1686 zfs_list(const char *name)
1687 {
1688 	static char	poolname[ZFS_MAXNAMELEN];
1689 	uint64_t	objid;
1690 	spa_t		*spa;
1691 	const char	*dsname;
1692 	int		rv;
1693 
1694 	if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1695 		return (EINVAL);
1696 
1697 	spa = spa_find_by_name(poolname);
1698 	if (!spa)
1699 		return (ENXIO);
1700 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1701 	if (rv != 0)
1702 		return (rv);
1703 
1704 	return (zfs_list_dataset(spa, objid));
1705 }
1706 
1707 void
1708 init_zfs_boot_options(const char *currdev_in)
1709 {
1710 	char poolname[ZFS_MAXNAMELEN];
1711 	char *beroot, *currdev;
1712 	spa_t *spa;
1713 	int currdev_len;
1714 	const char *dsname;
1715 
1716 	currdev = NULL;
1717 	currdev_len = strlen(currdev_in);
1718 	if (currdev_len == 0)
1719 		return;
1720 	if (strncmp(currdev_in, "zfs:", 4) != 0)
1721 		return;
1722 	currdev = strdup(currdev_in);
1723 	if (currdev == NULL)
1724 		return;
1725 	/* Remove the trailing : */
1726 	currdev[currdev_len - 1] = '\0';
1727 
1728 	setenv("zfs_be_active", currdev, 1);
1729 	setenv("zfs_be_currpage", "1", 1);
1730 	/* Remove the last element (current bootenv) */
1731 	beroot = strrchr(currdev, '/');
1732 	if (beroot != NULL)
1733 		beroot[0] = '\0';
1734 	beroot = strchr(currdev, ':') + 1;
1735 	setenv("zfs_be_root", beroot, 1);
1736 
1737 	if (split_devname(beroot, poolname, sizeof(poolname), &dsname) != 0)
1738 		return;
1739 
1740 	spa = spa_find_by_name(poolname);
1741 	if (spa == NULL)
1742 		return;
1743 
1744 	zfs_bootenv_initial("bootenvs", spa, beroot, dsname, 0);
1745 	zfs_checkpoints_initial(spa, beroot, dsname);
1746 
1747 	free(currdev);
1748 }
1749 
1750 static void
1751 zfs_checkpoints_initial(spa_t *spa, const char *name, const char *dsname)
1752 {
1753 	char envname[32];
1754 
1755 	if (spa->spa_uberblock_checkpoint.ub_checkpoint_txg != 0) {
1756 		snprintf(envname, sizeof(envname), "zpool_checkpoint");
1757 		setenv(envname, name, 1);
1758 
1759 		spa->spa_uberblock = &spa->spa_uberblock_checkpoint;
1760 		spa->spa_mos = &spa->spa_mos_checkpoint;
1761 
1762 		zfs_bootenv_initial("bootenvs_check", spa, name, dsname, 1);
1763 
1764 		spa->spa_uberblock = &spa->spa_uberblock_master;
1765 		spa->spa_mos = &spa->spa_mos_master;
1766 	}
1767 }
1768 
1769 static void
1770 zfs_bootenv_initial(const char *envprefix, spa_t *spa, const char *rootname,
1771    const char *dsname, int checkpoint)
1772 {
1773 	char		envname[32], envval[256];
1774 	uint64_t	objid;
1775 	int		bootenvs_idx, rv;
1776 
1777 	SLIST_INIT(&zfs_be_head);
1778 	zfs_env_count = 0;
1779 
1780 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1781 	if (rv != 0)
1782 		return;
1783 
1784 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1785 	bootenvs_idx = 0;
1786 	/* Populate the initial environment variables */
1787 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1788 		/* Enumerate all bootenvs for general usage */
1789 		snprintf(envname, sizeof(envname), "%s[%d]",
1790 		    envprefix, bootenvs_idx);
1791 		snprintf(envval, sizeof(envval), "zfs:%s%s/%s",
1792 		    checkpoint ? "!" : "", rootname, zfs_be->name);
1793 		rv = setenv(envname, envval, 1);
1794 		if (rv != 0)
1795 			break;
1796 		bootenvs_idx++;
1797 	}
1798 	snprintf(envname, sizeof(envname), "%s_count", envprefix);
1799 	snprintf(envval, sizeof(envval), "%d", bootenvs_idx);
1800 	setenv(envname, envval, 1);
1801 
1802 	/* Clean up the SLIST of ZFS BEs */
1803 	while (!SLIST_EMPTY(&zfs_be_head)) {
1804 		zfs_be = SLIST_FIRST(&zfs_be_head);
1805 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1806 		free(zfs_be->name);
1807 		free(zfs_be);
1808 	}
1809 }
1810 
1811 int
1812 zfs_bootenv(const char *name)
1813 {
1814 	char		poolname[ZFS_MAXNAMELEN], *root;
1815 	const char	*dsname;
1816 	char		becount[4];
1817 	uint64_t	objid;
1818 	spa_t		*spa;
1819 	int		rv, pages, perpage, currpage;
1820 
1821 	if (name == NULL)
1822 		return (EINVAL);
1823 	if ((root = getenv("zfs_be_root")) == NULL)
1824 		return (EINVAL);
1825 
1826 	if (strcmp(name, root) != 0) {
1827 		if (setenv("zfs_be_root", name, 1) != 0)
1828 			return (ENOMEM);
1829 	}
1830 
1831 	SLIST_INIT(&zfs_be_head);
1832 	zfs_env_count = 0;
1833 
1834 	if (split_devname(name, poolname, sizeof(poolname), &dsname) != 0)
1835 		return (EINVAL);
1836 
1837 	spa = spa_find_by_name(poolname);
1838 	if (!spa)
1839 		return (ENXIO);
1840 	rv = zfs_lookup_dataset(spa, dsname, &objid);
1841 	if (rv != 0)
1842 		return (rv);
1843 	rv = zfs_callback_dataset(spa, objid, zfs_belist_add);
1844 
1845 	/* Calculate and store the number of pages of BEs */
1846 	perpage = (ZFS_BE_LAST - ZFS_BE_FIRST + 1);
1847 	pages = (zfs_env_count / perpage) + ((zfs_env_count % perpage) > 0 ? 1 : 0);
1848 	snprintf(becount, 4, "%d", pages);
1849 	if (setenv("zfs_be_pages", becount, 1) != 0)
1850 		return (ENOMEM);
1851 
1852 	/* Roll over the page counter if it has exceeded the maximum */
1853 	currpage = strtol(getenv("zfs_be_currpage"), NULL, 10);
1854 	if (currpage > pages) {
1855 		if (setenv("zfs_be_currpage", "1", 1) != 0)
1856 			return (ENOMEM);
1857 	}
1858 
1859 	/* Populate the menu environment variables */
1860 	zfs_set_env();
1861 
1862 	/* Clean up the SLIST of ZFS BEs */
1863 	while (!SLIST_EMPTY(&zfs_be_head)) {
1864 		zfs_be = SLIST_FIRST(&zfs_be_head);
1865 		SLIST_REMOVE_HEAD(&zfs_be_head, entries);
1866 		free(zfs_be->name);
1867 		free(zfs_be);
1868 	}
1869 
1870 	return (rv);
1871 }
1872 
1873 int
1874 zfs_belist_add(const char *name, uint64_t value __unused)
1875 {
1876 
1877 	/* Skip special datasets that start with a $ character */
1878 	if (strncmp(name, "$", 1) == 0) {
1879 		return (0);
1880 	}
1881 	/* Add the boot environment to the head of the SLIST */
1882 	zfs_be = malloc(sizeof(struct zfs_be_entry));
1883 	if (zfs_be == NULL) {
1884 		return (ENOMEM);
1885 	}
1886 	zfs_be->name = strdup(name);
1887 	if (zfs_be->name == NULL) {
1888 		free(zfs_be);
1889 		return (ENOMEM);
1890 	}
1891 	SLIST_INSERT_HEAD(&zfs_be_head, zfs_be, entries);
1892 	zfs_env_count++;
1893 
1894 	return (0);
1895 }
1896 
1897 int
1898 zfs_set_env(void)
1899 {
1900 	char envname[32], envval[256];
1901 	char *beroot, *pagenum;
1902 	int rv, page, ctr;
1903 
1904 	beroot = getenv("zfs_be_root");
1905 	if (beroot == NULL) {
1906 		return (1);
1907 	}
1908 
1909 	pagenum = getenv("zfs_be_currpage");
1910 	if (pagenum != NULL) {
1911 		page = strtol(pagenum, NULL, 10);
1912 	} else {
1913 		page = 1;
1914 	}
1915 
1916 	ctr = 1;
1917 	rv = 0;
1918 	zfs_env_index = ZFS_BE_FIRST;
1919 	SLIST_FOREACH_SAFE(zfs_be, &zfs_be_head, entries, zfs_be_tmp) {
1920 		/* Skip to the requested page number */
1921 		if (ctr <= ((ZFS_BE_LAST - ZFS_BE_FIRST + 1) * (page - 1))) {
1922 			ctr++;
1923 			continue;
1924 		}
1925 
1926 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1927 		snprintf(envval, sizeof(envval), "%s", zfs_be->name);
1928 		rv = setenv(envname, envval, 1);
1929 		if (rv != 0) {
1930 			break;
1931 		}
1932 
1933 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
1934 		rv = setenv(envname, envval, 1);
1935 		if (rv != 0){
1936 			break;
1937 		}
1938 
1939 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
1940 		rv = setenv(envname, "set_bootenv", 1);
1941 		if (rv != 0){
1942 			break;
1943 		}
1944 
1945 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
1946 		snprintf(envval, sizeof(envval), "zfs:%s/%s", beroot, zfs_be->name);
1947 		rv = setenv(envname, envval, 1);
1948 		if (rv != 0){
1949 			break;
1950 		}
1951 
1952 		zfs_env_index++;
1953 		if (zfs_env_index > ZFS_BE_LAST) {
1954 			break;
1955 		}
1956 
1957 	}
1958 
1959 	for (; zfs_env_index <= ZFS_BE_LAST; zfs_env_index++) {
1960 		snprintf(envname, sizeof(envname), "bootenvmenu_caption[%d]", zfs_env_index);
1961 		(void)unsetenv(envname);
1962 		snprintf(envname, sizeof(envname), "bootenvansi_caption[%d]", zfs_env_index);
1963 		(void)unsetenv(envname);
1964 		snprintf(envname, sizeof(envname), "bootenvmenu_command[%d]", zfs_env_index);
1965 		(void)unsetenv(envname);
1966 		snprintf(envname, sizeof(envname), "bootenv_root[%d]", zfs_env_index);
1967 		(void)unsetenv(envname);
1968 	}
1969 
1970 	return (rv);
1971 }
1972