xref: /illumos-gate/usr/src/boot/i386/gptzfsboot/zfsboot.c (revision ddb365bfc9e868ad24ccdcb0dc91af18b10df082)
1 /*
2  * Copyright (c) 1998 Robert Nordier
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are freely
6  * permitted provided that the above copyright notice and this
7  * paragraph and the following disclaimer are duplicated in all
8  * such forms.
9  *
10  * This software is provided "AS IS" and without any express or
11  * implied warranties, including, without limitation, the implied
12  * warranties of merchantability and fitness for a particular
13  * purpose.
14  */
15 
16 #include <sys/cdefs.h>
17 #include <stand.h>
18 
19 #include <sys/param.h>
20 #include <sys/errno.h>
21 #include <sys/diskmbr.h>
22 #include <sys/vtoc.h>
23 #include <sys/disk.h>
24 #include <sys/reboot.h>
25 #include <sys/queue.h>
26 #include <sys/multiboot.h>
27 #include <sys/zfs_bootenv.h>
28 
29 #include <machine/bootinfo.h>
30 #include <machine/elf.h>
31 #include <machine/pc/bios.h>
32 
33 #include <stdarg.h>
34 #include <stdbool.h>
35 #include <stddef.h>
36 
37 #include <a.out.h>
38 #include "bootstrap.h"
39 #include "libi386.h"
40 #include <btxv86.h>
41 
42 #include "lib.h"
43 #include "rbx.h"
44 #include "cons.h"
45 #include "bootargs.h"
46 #include "disk.h"
47 #include "part.h"
48 #include "paths.h"
49 
50 #include "libzfs.h"
51 
52 #define	ARGS		0x900
53 #define	NOPT		15
54 #define	NDEV		3
55 
56 #define	BIOS_NUMDRIVES	0x475
57 #define	DRV_HARD	0x80
58 #define	DRV_MASK	0x7f
59 
60 #define	TYPE_AD		0
61 #define	TYPE_DA		1
62 #define	TYPE_MAXHARD	TYPE_DA
63 #define	TYPE_FD		2
64 
65 extern uint32_t _end;
66 
67 /*
68  * Fake multiboot header to provide versioning and to pass
69  * partition start LBA. Partition is either GPT partition or
70  * VTOC slice.
71  */
72 extern const struct multiboot_header mb_header;
73 extern uint64_t start_sector;
74 
75 static const char optstr[NOPT] = "DhaCcdgmnpqrstv"; /* Also 'P', 'S' */
76 static const unsigned char flags[NOPT] = {
77     RBX_DUAL,
78     RBX_SERIAL,
79     RBX_ASKNAME,
80     RBX_CDROM,
81     RBX_CONFIG,
82     RBX_KDB,
83     RBX_GDB,
84     RBX_MUTE,
85     RBX_NOINTR,
86     RBX_PAUSE,
87     RBX_QUIET,
88     RBX_DFLTROOT,
89     RBX_SINGLE,
90     RBX_TEXT_MODE,
91     RBX_VERBOSE
92 };
93 uint32_t opts;
94 
95 /*
96  * Paths to try loading before falling back to the boot2 prompt.
97  */
98 #define	PATH_ZFSLOADER "/boot/zfsloader"
99 static const struct string {
100 	const char *p;
101 	size_t len;
102 } loadpath[] = {
103 	{ PATH_LOADER, sizeof (PATH_LOADER) },
104 	{ PATH_ZFSLOADER, sizeof (PATH_ZFSLOADER) }
105 };
106 
107 static const unsigned char dev_maj[NDEV] = {30, 4, 2};
108 extern int sio_port;
109 
110 static struct i386_devdesc *bdev;
111 static char cmd[512];
112 static char cmddup[512];
113 static char kname[1024];
114 static int comspeed = SIOSPD;
115 static struct bootinfo bootinfo;
116 static uint32_t bootdev;
117 static struct zfs_boot_args zfsargs;
118 
119 extern vm_offset_t high_heap_base;
120 extern uint32_t	bios_basemem, bios_extmem, high_heap_size;
121 
122 static char *heap_top;
123 static char *heap_bottom;
124 
125 static void i386_zfs_probe(void);
126 static void load(void);
127 static int parse_cmd(void);
128 
129 struct arch_switch archsw;	/* MI/MD interface boundary */
130 static char boot_devname[2 * ZFS_MAXNAMELEN + 8]; /* disk or pool:dataset */
131 
132 struct devsw *devsw[] = {
133 	&bioshd,
134 	&zfs_dev,
135 	NULL
136 };
137 
138 struct fs_ops *file_system[] = {
139 	&zfs_fsops,
140 	&ufs_fsops,
141 	&dosfs_fsops,
142 	NULL
143 };
144 
145 caddr_t
146 ptov(uintptr_t x)
147 {
148 	return (PTOV(x));
149 }
150 
151 int
152 main(void)
153 {
154 	unsigned i;
155 	int fd;
156 	bool auto_boot;
157 	bool nextboot = false;
158 	struct disk_devdesc devdesc;
159 	char *ptr;
160 
161 	bios_getmem();
162 
163 	if (high_heap_size > 0) {
164 		heap_top = PTOV(high_heap_base + high_heap_size);
165 		heap_bottom = PTOV(high_heap_base);
166 	} else {
167 		heap_bottom = (char *)
168 		    (roundup2(__base + (int32_t)&_end, 0x10000) - __base);
169 		heap_top = (char *)PTOV(bios_basemem);
170 	}
171 	setheap(heap_bottom, heap_top);
172 
173 	/*
174 	 * detect ACPI for future reference. This may set console to comconsole
175 	 * if we do have ACPI SPCR table.
176 	 */
177 	biosacpi_detect();
178 	ptr = getenv("console");
179 	if (ptr != NULL && strcmp(ptr, "text") != 0) {
180 		char mode[10];
181 
182 		ioctrl = IO_SERIAL;
183 		snprintf(mode, sizeof (mode), "%s-mode", ptr);
184 
185 		switch (ptr[3]) {
186 		case 'a':
187 			sio_port = 0x3F8;
188 			break;
189 		case 'b':
190 			sio_port = 0x2F8;
191 			break;
192 		case 'c':
193 			sio_port = 0x3E8;
194 			break;
195 		case 'd':
196 			sio_port = 0x2E8;
197 			break;
198 		}
199 		ptr = getenv(mode);
200 		if (ptr != NULL) {
201 			comspeed = strtoul(ptr, NULL, 0);
202 			if (sio_init(115200 / comspeed) != 0)
203 				ioctrl |= IO_KEYBOARD;
204 		}
205 	}
206 
207 	/*
208 	 * Initialise the block cache. Set the upper limit.
209 	 */
210 	bcache_init(32768, 512);
211 
212 	archsw.arch_autoload = NULL;
213 	archsw.arch_getdev = i386_getdev;
214 	archsw.arch_copyin = NULL;
215 	archsw.arch_copyout = NULL;
216 	archsw.arch_readin = NULL;
217 	archsw.arch_isainb = NULL;
218 	archsw.arch_isaoutb = NULL;
219 	archsw.arch_zfs_probe = i386_zfs_probe;
220 
221 	bootinfo.bi_version = BOOTINFO_VERSION;
222 	bootinfo.bi_size = sizeof (bootinfo);
223 	bootinfo.bi_basemem = bios_basemem / 1024;
224 	bootinfo.bi_extmem = bios_extmem / 1024;
225 	bootinfo.bi_memsizes_valid++;
226 	bootinfo.bi_bios_dev = *(uint8_t *)PTOV(ARGS);
227 
228 	/*
229 	 * Set up fall back device name. bd_bios2unit() is not available yet.
230 	 */
231 	if (bootinfo.bi_bios_dev < 0x80)
232 		snprintf(boot_devname, sizeof (boot_devname), "disk%d:",
233 		    bootinfo.bi_bios_dev);
234 	else
235 		snprintf(boot_devname, sizeof (boot_devname), "disk%d:",
236 		    bootinfo.bi_bios_dev - 0x80);
237 
238 	for (i = 0; devsw[i] != NULL; i++)
239 		if (devsw[i]->dv_init != NULL)
240 			(devsw[i]->dv_init)();
241 
242 	disk_parsedev(&devdesc, boot_devname + 4, NULL);
243 
244 	bootdev = MAKEBOOTDEV(dev_maj[DEVT_DISK], devdesc.d_slice + 1,
245 	    devdesc.dd.d_unit,
246 	    devdesc.d_partition >= 0 ? devdesc.d_partition : 0xff);
247 
248 	/*
249 	 * zfs_fmtdev() can be called only after dv_init
250 	 */
251 	if (bdev != NULL && bdev->dd.d_dev->dv_type == DEVT_ZFS) {
252 		/* set up proper device name string for ZFS */
253 		strncpy(boot_devname, zfs_fmtdev(bdev), sizeof (boot_devname));
254 		if (zfs_get_bootonce(bdev, OS_BOOTONCE, cmd,
255 		    sizeof (cmd)) == 0) {
256 			nvlist_t *benv;
257 
258 			nextboot = true;
259 			memcpy(cmddup, cmd, sizeof (cmd));
260 			if (parse_cmd()) {
261 				printf("failed to parse bootonce command\n");
262 				exit(0);
263 			}
264 			if (!OPT_CHECK(RBX_QUIET))
265 				printf("zfs bootonce: %s\n", cmddup);
266 
267 			if (zfs_get_bootenv(bdev, &benv) == 0) {
268 				nvlist_add_string(benv, OS_BOOTONCE_USED,
269 				    cmddup);
270 				zfs_set_bootenv(bdev, benv);
271 			}
272 			/* Do not process this command twice */
273 			*cmd = 0;
274 		}
275 	}
276 
277 	/* now make sure we have bdev on all cases */
278 	free(bdev);
279 	i386_getdev((void **)&bdev, boot_devname, NULL);
280 
281 	env_setenv("currdev", EV_VOLATILE, boot_devname, i386_setcurrdev,
282 	    env_nounset);
283 
284 	/* Process configuration file */
285 	setenv("screen-#rows", "24", 1);
286 	auto_boot = true;
287 
288 	fd = open(PATH_CONFIG, O_RDONLY);
289 	if (fd == -1)
290 		fd = open(PATH_DOTCONFIG, O_RDONLY);
291 
292 	if (fd != -1) {
293 		ssize_t cmdlen;
294 
295 		if ((cmdlen = read(fd, cmd, sizeof (cmd))) > 0)
296 			cmd[cmdlen] = '\0';
297 		else
298 			*cmd = '\0';
299 		close(fd);
300 	}
301 
302 	if (*cmd) {
303 		/*
304 		 * Note that parse_cmd() is destructive to cmd[] and we also
305 		 * want to honor RBX_QUIET option that could be present in
306 		 * cmd[].
307 		 */
308 		memcpy(cmddup, cmd, sizeof (cmd));
309 		if (parse_cmd())
310 			auto_boot = false;
311 		if (!OPT_CHECK(RBX_QUIET))
312 			printf("%s: %s\n", PATH_CONFIG, cmddup);
313 		/* Do not process this command twice */
314 		*cmd = 0;
315 	}
316 
317 	/* Do not risk waiting at the prompt forever. */
318 	if (nextboot && !auto_boot)
319 		exit(0);
320 
321 	if (auto_boot && !*kname) {
322 		/*
323 		 * Try to exec stage 3 boot loader. If interrupted by a
324 		 * keypress, or in case of failure, drop the user to the
325 		 * boot2 prompt..
326 		 */
327 		auto_boot = false;
328 		for (i = 0; i < nitems(loadpath); i++) {
329 			memcpy(kname, loadpath[i].p, loadpath[i].len);
330 			if (keyhit(3))
331 				break;
332 			load();
333 		}
334 	}
335 	/* Reset to default */
336 	memcpy(kname, loadpath[0].p, loadpath[0].len);
337 
338 	/* Present the user with the boot2 prompt. */
339 
340 	for (;;) {
341 		if (!auto_boot || !OPT_CHECK(RBX_QUIET)) {
342 			printf("\nillumos/x86 boot\n");
343 			printf("Default: %s%s\nboot: ", boot_devname, kname);
344 		}
345 		if (ioctrl & IO_SERIAL)
346 			sio_flush();
347 		if (!auto_boot || keyhit(5))
348 			getstr(cmd, sizeof (cmd));
349 		else if (!auto_boot || !OPT_CHECK(RBX_QUIET))
350 			putchar('\n');
351 		auto_boot = false;
352 		if (parse_cmd())
353 			putchar('\a');
354 		else
355 			load();
356 	}
357 }
358 
359 /* XXX - Needed for btxld to link the boot2 binary; do not remove. */
360 void
361 exit(int x)
362 {
363 	__exit(x);
364 }
365 
366 static void
367 load(void)
368 {
369 	union {
370 		struct exec ex;
371 		Elf32_Ehdr eh;
372 	} hdr;
373 	static Elf32_Phdr ep[2];
374 	static Elf32_Shdr es[2];
375 	caddr_t p;
376 	uint32_t addr, x;
377 	int fd, fmt, i, j;
378 
379 	if ((fd = open(kname, O_RDONLY)) == -1) {
380 		printf("\nCan't find %s\n", kname);
381 		return;
382 	}
383 	if (read(fd, &hdr, sizeof (hdr)) != sizeof (hdr)) {
384 		close(fd);
385 		return;
386 	}
387 	if (N_GETMAGIC(hdr.ex) == ZMAGIC) {
388 		fmt = 0;
389 	} else if (IS_ELF(hdr.eh)) {
390 		fmt = 1;
391 	} else {
392 		printf("Invalid %s\n", "format");
393 		close(fd);
394 		return;
395 	}
396 	if (fmt == 0) {
397 		addr = hdr.ex.a_entry & 0xffffff;
398 		p = PTOV(addr);
399 		lseek(fd, PAGE_SIZE, SEEK_SET);
400 		if (read(fd, p, hdr.ex.a_text) != hdr.ex.a_text) {
401 			close(fd);
402 			return;
403 		}
404 		p += roundup2(hdr.ex.a_text, PAGE_SIZE);
405 		if (read(fd, p, hdr.ex.a_data) != hdr.ex.a_data) {
406 			close(fd);
407 			return;
408 		}
409 		p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE);
410 		bootinfo.bi_symtab = VTOP(p);
411 		memcpy(p, &hdr.ex.a_syms, sizeof (hdr.ex.a_syms));
412 		p += sizeof (hdr.ex.a_syms);
413 		if (hdr.ex.a_syms) {
414 			if (read(fd, p, hdr.ex.a_syms) != hdr.ex.a_syms) {
415 				close(fd);
416 				return;
417 			}
418 			p += hdr.ex.a_syms;
419 			if (read(fd, p, sizeof (int)) != sizeof (int)) {
420 				close(fd);
421 				return;
422 			}
423 			x = *(uint32_t *)p;
424 			p += sizeof (int);
425 			x -= sizeof (int);
426 			if (read(fd, p, x) != x) {
427 				close(fd);
428 				return;
429 			}
430 			p += x;
431 		}
432 	} else {
433 		lseek(fd, hdr.eh.e_phoff, SEEK_SET);
434 		for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) {
435 			if (read(fd, ep + j, sizeof (ep[0])) !=
436 			    sizeof (ep[0])) {
437 				close(fd);
438 				return;
439 			}
440 			if (ep[j].p_type == PT_LOAD)
441 				j++;
442 		}
443 		for (i = 0; i < 2; i++) {
444 			p = PTOV(ep[i].p_paddr & 0xffffff);
445 			lseek(fd, ep[i].p_offset, SEEK_SET);
446 			if (read(fd, p, ep[i].p_filesz) != ep[i].p_filesz) {
447 				close(fd);
448 				return;
449 			}
450 		}
451 		p += roundup2(ep[1].p_memsz, PAGE_SIZE);
452 		bootinfo.bi_symtab = VTOP(p);
453 		if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) {
454 			lseek(fd, hdr.eh.e_shoff +
455 			    sizeof (es[0]) * (hdr.eh.e_shstrndx + 1),
456 			    SEEK_SET);
457 			if (read(fd, &es, sizeof (es)) != sizeof (es)) {
458 				close(fd);
459 				return;
460 			}
461 			for (i = 0; i < 2; i++) {
462 				memcpy(p, &es[i].sh_size,
463 				    sizeof (es[i].sh_size));
464 				p += sizeof (es[i].sh_size);
465 				lseek(fd, es[i].sh_offset, SEEK_SET);
466 				if (read(fd, p, es[i].sh_size) !=
467 				    es[i].sh_size) {
468 					close(fd);
469 					return;
470 				}
471 				p += es[i].sh_size;
472 			}
473 		}
474 		addr = hdr.eh.e_entry & 0xffffff;
475 	}
476 	close(fd);
477 
478 	bootinfo.bi_esymtab = VTOP(p);
479 	bootinfo.bi_kernelname = VTOP(kname);
480 
481 	if (bdev->dd.d_dev->dv_type == DEVT_ZFS) {
482 		zfsargs.size = sizeof (zfsargs);
483 		zfsargs.pool = bdev->d_kind.zfs.pool_guid;
484 		zfsargs.root = bdev->d_kind.zfs.root_guid;
485 		__exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
486 		    bootdev,
487 		    KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG,
488 		    (uint32_t)bdev->d_kind.zfs.pool_guid,
489 		    (uint32_t)(bdev->d_kind.zfs.pool_guid >> 32),
490 		    VTOP(&bootinfo),
491 		    zfsargs);
492 	} else {
493 		__exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK),
494 		    bootdev, 0, 0, 0, VTOP(&bootinfo));
495 	}
496 }
497 
498 static int
499 mount_root(char *arg)
500 {
501 	char *root;
502 	struct i386_devdesc *ddesc;
503 	uint8_t part;
504 
505 	if (asprintf(&root, "%s:", arg) < 0)
506 		return (1);
507 
508 	if (i386_getdev((void **)&ddesc, root, NULL)) {
509 		free(root);
510 		return (1);
511 	}
512 
513 	/* we should have new device descriptor, free old and replace it. */
514 	free(bdev);
515 	bdev = ddesc;
516 	if (bdev->dd.d_dev->dv_type == DEVT_DISK) {
517 		if (bdev->d_kind.biosdisk.partition == -1)
518 			part = 0xff;
519 		else
520 			part = bdev->d_kind.biosdisk.partition;
521 		bootdev = MAKEBOOTDEV(dev_maj[bdev->dd.d_dev->dv_type],
522 		    bdev->d_kind.biosdisk.slice + 1,
523 		    bdev->dd.d_unit, part);
524 		bootinfo.bi_bios_dev = bd_unit2bios(bdev);
525 	}
526 	strncpy(boot_devname, root, sizeof (boot_devname));
527 	setenv("currdev", root, 1);
528 	free(root);
529 	return (0);
530 }
531 
532 static void
533 fs_list(char *arg)
534 {
535 	int fd;
536 	struct dirent *d;
537 	char line[80];
538 
539 	fd = open(arg, O_RDONLY);
540 	if (fd < 0)
541 		return;
542 	pager_open();
543 	while ((d = readdirfd(fd)) != NULL) {
544 		sprintf(line, "%s\n", d->d_name);
545 		if (pager_output(line))
546 			break;
547 	}
548 	pager_close();
549 	close(fd);
550 }
551 
552 static int
553 parse_cmd(void)
554 {
555 	char *arg = cmd;
556 	char *ep, *p, *q;
557 	const char *cp;
558 	char line[80];
559 	int c, i;
560 
561 	while ((c = *arg++)) {
562 		if (isspace(c))
563 			continue;
564 
565 		for (p = arg; *p != '\0' && !isspace(*p); p++)
566 			;
567 		ep = p;
568 		if (*p != '\0')
569 			*p++ = '\0';
570 		if (c == '-') {
571 			while ((c = *arg++)) {
572 				if (isspace(c))
573 					break;
574 
575 				if (c == 'P') {
576 					if (*(uint8_t *)PTOV(0x496) & 0x10) {
577 						cp = "yes";
578 					} else {
579 						opts |= OPT_SET(RBX_DUAL);
580 						opts |= OPT_SET(RBX_SERIAL);
581 						cp = "no";
582 					}
583 					printf("Keyboard: %s\n", cp);
584 					continue;
585 				} else if (c == 'S') {
586 					char *end;
587 
588 					errno = 0;
589 					i = strtol(arg, &end, 10);
590 					if (errno == 0 &&
591 					    *arg != '\0' &&
592 					    *end == '\0' &&
593 					    i > 0 &&
594 					    i <= 115200) {
595 						comspeed = i;
596 						break;
597 					} else {
598 						printf("warning: bad value for "
599 						    "speed: %s\n", arg);
600 					}
601 					arg = end;
602 					/*
603 					 * Fall through to error below
604 					 * ('S' not in optstr[]).
605 					 */
606 				}
607 				for (i = 0; c != optstr[i]; i++)
608 					if (i == NOPT - 1)
609 						return (-1);
610 				opts ^= OPT_SET(flags[i]);
611 			}
612 			if (OPT_CHECK(RBX_DUAL))
613 				ioctrl = IO_SERIAL | IO_KEYBOARD;
614 			else if (OPT_CHECK(RBX_SERIAL))
615 				ioctrl = IO_SERIAL;
616 			else
617 				ioctrl = IO_KEYBOARD;
618 
619 			if (ioctrl & IO_SERIAL) {
620 				if (sio_init(115200 / comspeed) != 0)
621 					ioctrl &= ~IO_SERIAL;
622 			}
623 		} else if (c == '?') {
624 			printf("\n");
625 			fs_list(arg);
626 			zfs_list(arg);
627 			return (-1);
628 		} else {
629 			arg--;
630 
631 			/*
632 			 * Report pool status if the comment is 'status'. Lets
633 			 * hope no-one wants to load /status as a kernel.
634 			 */
635 			if (strcmp(arg, "status") == 0) {
636 				pager_open();
637 				for (i = 0; devsw[i] != NULL; i++) {
638 					if (devsw[i]->dv_print != NULL) {
639 						if (devsw[i]->dv_print(1))
640 							break;
641 					} else {
642 						sprintf(line,
643 						    "%s: (unknown)\n",
644 						    devsw[i]->dv_name);
645 						if (pager_output(line))
646 							break;
647 					}
648 				}
649 				pager_close();
650 				return (-1);
651 			}
652 
653 			/*
654 			 * If there is a colon, switch pools.
655 			 */
656 			if (strncmp(arg, "zfs:", 4) == 0)
657 				q = strrchr(arg + 4, ':');
658 			else
659 				q = strrchr(arg, ':');
660 
661 			if (q != NULL) {
662 				*q++ = '\0';
663 				if (mount_root(arg) != 0)
664 					return (-1);
665 				arg = q;
666 			}
667 			if ((i = ep - arg)) {
668 				if ((size_t)i >= sizeof (kname))
669 					return (-1);
670 				memcpy(kname, arg, i + 1);
671 			}
672 		}
673 		arg = p;
674 	}
675 	return (0);
676 }
677 
678 /*
679  * probe arguments for partition iterator (see below)
680  */
681 struct probe_args {
682 	int		fd;
683 	char		*devname;
684 	uint_t		secsz;
685 	uint64_t	offset;
686 };
687 
688 /*
689  * simple wrapper around read() to avoid using device specific
690  * strategy() directly.
691  */
692 static int
693 parttblread(void *arg, void *buf, size_t blocks, uint64_t offset)
694 {
695 	struct probe_args *ppa = arg;
696 	size_t size = ppa->secsz * blocks;
697 
698 	lseek(ppa->fd, offset * ppa->secsz, SEEK_SET);
699 	if (read(ppa->fd, buf, size) == size)
700 		return (0);
701 	return (EIO);
702 }
703 
704 /*
705  * scan partition entries to find boot partition starting at start_sector.
706  * in case of MBR partition type PART_SOLARIS2, read VTOC and recurse.
707  */
708 static int
709 probe_partition(void *arg, const char *partname,
710     const struct ptable_entry *part)
711 {
712 	struct probe_args pa, *ppa = arg;
713 	struct ptable *table;
714 	uint64_t *pool_guid_ptr = NULL;
715 	uint64_t pool_guid = 0;
716 	char devname[32];
717 	int len, ret = 0;
718 
719 	len = strlen(ppa->devname);
720 	if (len > sizeof (devname))
721 		len = sizeof (devname);
722 
723 	strncpy(devname, ppa->devname, len - 1);
724 	devname[len - 1] = '\0';
725 	snprintf(devname, sizeof (devname), "%s%s:", devname, partname);
726 
727 	/* filter out partitions *not* used by zfs */
728 	switch (part->type) {
729 	case PART_RESERVED:	/* efi reserverd */
730 	case PART_VTOC_BOOT:	/* vtoc boot area */
731 	case PART_VTOC_SWAP:
732 		return (ret);
733 	default:
734 		break;
735 	}
736 
737 	if (part->type == PART_SOLARIS2) {
738 		pa.offset = part->start;
739 		pa.fd = open(devname, O_RDONLY);
740 		if (pa.fd == -1)
741 			return (ret);
742 		pa.devname = devname;
743 		pa.secsz = ppa->secsz;
744 		table = ptable_open(&pa, part->end - part->start + 1,
745 		    ppa->secsz, parttblread);
746 		if (table != NULL) {
747 			enum ptable_type pt = ptable_gettype(table);
748 
749 			if (pt == PTABLE_VTOC8 || pt == PTABLE_VTOC) {
750 				ret = ptable_iterate(table, &pa,
751 				    probe_partition);
752 				ptable_close(table);
753 				close(pa.fd);
754 				return (ret);
755 			}
756 			ptable_close(table);
757 		}
758 		close(pa.fd);
759 	}
760 
761 	if (ppa->offset + part->start == start_sector) {
762 		/* Ask zfs_probe_dev to provide guid. */
763 		pool_guid_ptr = &pool_guid;
764 		/* Set up boot device name for non-zfs case. */
765 		strncpy(boot_devname, devname, sizeof (boot_devname));
766 	}
767 
768 	ret = zfs_probe_dev(devname, pool_guid_ptr);
769 	if (pool_guid != 0 && bdev == NULL) {
770 		bdev = malloc(sizeof (struct i386_devdesc));
771 		bzero(bdev, sizeof (struct i386_devdesc));
772 		bdev->dd.d_dev = &zfs_dev;
773 		bdev->d_kind.zfs.pool_guid = pool_guid;
774 
775 		/*
776 		 * We can not set up zfs boot device name yet, as the
777 		 * zfs dv_init() is not completed. We will set boot_devname
778 		 * in main, after devsw setup.
779 		 */
780 	}
781 
782 	return (0);
783 }
784 
785 /*
786  * open partition table on disk and scan partition entries to find
787  * boot partition starting at start_sector (recorded by installboot).
788  */
789 static int
790 probe_disk(char *devname)
791 {
792 	struct ptable *table;
793 	struct probe_args pa;
794 	uint64_t mediasz;
795 	int ret;
796 
797 	pa.offset = 0;
798 	pa.devname = devname;
799 	pa.fd = open(devname, O_RDONLY);
800 	if (pa.fd == -1) {
801 		return (ENXIO);
802 	}
803 
804 	ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz);
805 	if (ret == 0)
806 		ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz);
807 	if (ret == 0) {
808 		table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz,
809 		    parttblread);
810 		if (table != NULL) {
811 			ret = ptable_iterate(table, &pa, probe_partition);
812 			ptable_close(table);
813 		}
814 	}
815 	close(pa.fd);
816 	return (ret);
817 }
818 
819 /*
820  * Probe all disks to discover ZFS pools. The idea is to walk all possible
821  * disk devices, however, we also need to identify possible boot pool.
822  * For boot pool detection we have boot disk passed us from BIOS, recorded
823  * in bootinfo.bi_bios_dev, and start_sector LBA recorded by installboot.
824  *
825  * To detect boot pool, we can not use generic zfs_probe_dev() on boot disk,
826  * but we need to walk partitions, as we have no way to pass start_sector
827  * to zfs_probe_dev(). Note we do need to detect the partition correcponding
828  * to non-zfs case, so here we can set boot_devname for both cases.
829  */
830 static void
831 i386_zfs_probe(void)
832 {
833 	char devname[32];
834 	int boot_unit;
835 	struct i386_devdesc dev;
836 
837 	dev.dd.d_dev = &bioshd;
838 	/* Translate bios dev to our unit number. */
839 	boot_unit = bd_bios2unit(bootinfo.bi_bios_dev);
840 
841 	/*
842 	 * Open all the disks we can find and see if we can reconstruct
843 	 * ZFS pools from them.
844 	 */
845 	for (dev.dd.d_unit = 0; bd_unit2bios(&dev) >= 0; dev.dd.d_unit++) {
846 		snprintf(devname, sizeof (devname), "%s%d:", bioshd.dv_name,
847 		    dev.dd.d_unit);
848 		/* If this is not boot disk, use generic probe. */
849 		if (dev.dd.d_unit != boot_unit)
850 			zfs_probe_dev(devname, NULL);
851 		else
852 			probe_disk(devname);
853 	}
854 }
855