1 /*- 2 * Copyright (c) 1998 Robert Nordier 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are freely 6 * permitted provided that the above copyright notice and this 7 * paragraph and the following disclaimer are duplicated in all 8 * such forms. 9 * 10 * This software is provided "AS IS" and without any express or 11 * implied warranties, including, without limitation, the implied 12 * warranties of merchantability and fitness for a particular 13 * purpose. 14 */ 15 16 #include <sys/cdefs.h> 17 #include <stand.h> 18 19 #include <sys/param.h> 20 #include <sys/errno.h> 21 #include <sys/diskmbr.h> 22 #include <sys/vtoc.h> 23 #include <sys/disk.h> 24 #include <sys/reboot.h> 25 #include <sys/queue.h> 26 #include <multiboot.h> 27 28 #include <machine/bootinfo.h> 29 #include <machine/elf.h> 30 #include <machine/pc/bios.h> 31 32 #include <stdarg.h> 33 #include <stddef.h> 34 35 #include <a.out.h> 36 #include "bootstrap.h" 37 #include "libi386.h" 38 #include <btxv86.h> 39 40 #include "lib.h" 41 #include "rbx.h" 42 #include "cons.h" 43 #include "bootargs.h" 44 #include "disk.h" 45 #include "part.h" 46 #include "paths.h" 47 48 #include "libzfs.h" 49 50 #define ARGS 0x900 51 #define NOPT 14 52 #define NDEV 3 53 54 #define BIOS_NUMDRIVES 0x475 55 #define DRV_HARD 0x80 56 #define DRV_MASK 0x7f 57 58 #define TYPE_AD 0 59 #define TYPE_DA 1 60 #define TYPE_MAXHARD TYPE_DA 61 #define TYPE_FD 2 62 63 extern uint32_t _end; 64 65 /* 66 * Fake multiboot header to provide versioning and to pass 67 * partition start LBA. Partition is either GPT partition or 68 * VTOC slice. 69 */ 70 extern const struct multiboot_header mb_header; 71 extern uint64_t start_sector; 72 73 static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */ 74 static const unsigned char flags[NOPT] = { 75 RBX_DUAL, 76 RBX_SERIAL, 77 RBX_ASKNAME, 78 RBX_CDROM, 79 RBX_CONFIG, 80 RBX_KDB, 81 RBX_GDB, 82 RBX_MUTE, 83 RBX_NOINTR, 84 RBX_PAUSE, 85 RBX_QUIET, 86 RBX_DFLTROOT, 87 RBX_SINGLE, 88 RBX_VERBOSE 89 }; 90 uint32_t opts; 91 92 static const unsigned char dev_maj[NDEV] = {30, 4, 2}; 93 94 static struct i386_devdesc *bdev; 95 static char cmd[512]; 96 static char cmddup[512]; 97 static char kname[1024]; 98 static int comspeed = SIOSPD; 99 static struct bootinfo bootinfo; 100 static uint32_t bootdev; 101 static struct zfs_boot_args zfsargs; 102 103 extern vm_offset_t high_heap_base; 104 extern uint32_t bios_basemem, bios_extmem, high_heap_size; 105 106 static char *heap_top; 107 static char *heap_bottom; 108 109 static void i386_zfs_probe(void); 110 void exit(int); 111 static void load(void); 112 static int parse_cmd(void); 113 114 struct arch_switch archsw; /* MI/MD interface boundary */ 115 static char boot_devname[2 * ZFS_MAXNAMELEN + 8]; /* disk or pool:dataset */ 116 117 struct devsw *devsw[] = { 118 &biosdisk, 119 &zfs_dev, 120 NULL 121 }; 122 123 struct fs_ops *file_system[] = { 124 &zfs_fsops, 125 &ufs_fsops, 126 &dosfs_fsops, 127 NULL 128 }; 129 130 int 131 main(void) 132 { 133 int auto_boot, i, fd; 134 struct disk_devdesc devdesc; 135 136 bios_getmem(); 137 138 if (high_heap_size > 0) { 139 heap_top = PTOV(high_heap_base + high_heap_size); 140 heap_bottom = PTOV(high_heap_base); 141 } else { 142 heap_bottom = (char *) 143 (roundup2(__base + (int32_t)&_end, 0x10000) - __base); 144 heap_top = (char *) PTOV(bios_basemem); 145 } 146 setheap(heap_bottom, heap_top); 147 148 /* 149 * Initialise the block cache. Set the upper limit. 150 */ 151 bcache_init(32768, 512); 152 153 archsw.arch_autoload = NULL; 154 archsw.arch_getdev = i386_getdev; 155 archsw.arch_copyin = NULL; 156 archsw.arch_copyout = NULL; 157 archsw.arch_readin = NULL; 158 archsw.arch_isainb = NULL; 159 archsw.arch_isaoutb = NULL; 160 archsw.arch_zfs_probe = i386_zfs_probe; 161 162 bootinfo.bi_version = BOOTINFO_VERSION; 163 bootinfo.bi_size = sizeof(bootinfo); 164 bootinfo.bi_basemem = bios_basemem / 1024; 165 bootinfo.bi_extmem = bios_extmem / 1024; 166 bootinfo.bi_memsizes_valid++; 167 bootinfo.bi_bios_dev = *(uint8_t *)PTOV(ARGS); 168 169 /* Set up fall back device name. */ 170 snprintf(boot_devname, sizeof (boot_devname), "disk%d:", 171 bd_bios2unit(bootinfo.bi_bios_dev)); 172 173 for (i = 0; devsw[i] != NULL; i++) 174 if (devsw[i]->dv_init != NULL) 175 (devsw[i]->dv_init)(); 176 177 disk_parsedev(&devdesc, boot_devname+4, NULL); 178 179 bootdev = MAKEBOOTDEV(dev_maj[devdesc.d_type], devdesc.d_slice + 1, 180 devdesc.d_unit, devdesc.d_partition >= 0? devdesc.d_partition:0xff); 181 182 /* 183 * zfs_fmtdev() can be called only after dv_init 184 */ 185 if (bdev != NULL && bdev->d_type == DEVT_ZFS) { 186 /* set up proper device name string for ZFS */ 187 strncpy(boot_devname, zfs_fmtdev(bdev), sizeof (boot_devname)); 188 } 189 190 /* now make sure we have bdev on all cases */ 191 if (bdev != NULL) 192 free(bdev); 193 i386_getdev((void **)&bdev, boot_devname, NULL); 194 195 env_setenv("currdev", EV_VOLATILE, boot_devname, i386_setcurrdev, 196 env_nounset); 197 198 /* Process configuration file */ 199 setenv("LINES", "24", 1); 200 auto_boot = 1; 201 202 fd = open(PATH_CONFIG, O_RDONLY); 203 if (fd == -1) 204 fd = open(PATH_DOTCONFIG, O_RDONLY); 205 206 if (fd != -1) { 207 read(fd, cmd, sizeof(cmd)); 208 close(fd); 209 } 210 211 if (*cmd) { 212 /* 213 * Note that parse_cmd() is destructive to cmd[] and we also want 214 * to honor RBX_QUIET option that could be present in cmd[]. 215 */ 216 memcpy(cmddup, cmd, sizeof(cmd)); 217 if (parse_cmd()) 218 auto_boot = 0; 219 if (!OPT_CHECK(RBX_QUIET)) 220 printf("%s: %s\n", PATH_CONFIG, cmddup); 221 /* Do not process this command twice */ 222 *cmd = 0; 223 } 224 225 /* 226 * Try to exec stage 3 boot loader. If interrupted by a keypress, 227 * or in case of failure, switch off auto boot. 228 */ 229 230 if (auto_boot && !*kname) { 231 memcpy(kname, PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS)); 232 if (!keyhit(3)) { 233 load(); 234 auto_boot = 0; 235 } 236 } 237 238 /* Present the user with the boot2 prompt. */ 239 240 for (;;) { 241 if (!auto_boot || !OPT_CHECK(RBX_QUIET)) { 242 printf("\nillumos/x86 boot\n"); 243 printf("Default: %s%s\nboot: ", boot_devname, kname); 244 } 245 if (ioctrl & IO_SERIAL) 246 sio_flush(); 247 if (!auto_boot || keyhit(5)) 248 getstr(cmd, sizeof(cmd)); 249 else if (!auto_boot || !OPT_CHECK(RBX_QUIET)) 250 putchar('\n'); 251 auto_boot = 0; 252 if (parse_cmd()) 253 putchar('\a'); 254 else 255 load(); 256 } 257 } 258 259 /* XXX - Needed for btxld to link the boot2 binary; do not remove. */ 260 void 261 exit(int x) 262 { 263 } 264 265 static void 266 load(void) 267 { 268 union { 269 struct exec ex; 270 Elf32_Ehdr eh; 271 } hdr; 272 static Elf32_Phdr ep[2]; 273 static Elf32_Shdr es[2]; 274 caddr_t p; 275 uint32_t addr, x; 276 int fd, fmt, i, j; 277 278 if ((fd = open(kname, O_RDONLY)) == -1) { 279 printf("\nCan't find %s\n", kname); 280 return; 281 } 282 if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr)) { 283 close(fd); 284 return; 285 } 286 if (N_GETMAGIC(hdr.ex) == ZMAGIC) 287 fmt = 0; 288 else if (IS_ELF(hdr.eh)) 289 fmt = 1; 290 else { 291 printf("Invalid %s\n", "format"); 292 close(fd); 293 return; 294 } 295 if (fmt == 0) { 296 addr = hdr.ex.a_entry & 0xffffff; 297 p = PTOV(addr); 298 lseek(fd, PAGE_SIZE, SEEK_SET); 299 if (read(fd, p, hdr.ex.a_text) != hdr.ex.a_text) { 300 close(fd); 301 return; 302 } 303 p += roundup2(hdr.ex.a_text, PAGE_SIZE); 304 if (read(fd, p, hdr.ex.a_data) != hdr.ex.a_data) { 305 close(fd); 306 return; 307 } 308 p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE); 309 bootinfo.bi_symtab = VTOP(p); 310 memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms)); 311 p += sizeof(hdr.ex.a_syms); 312 if (hdr.ex.a_syms) { 313 if (read(fd, p, hdr.ex.a_syms) != hdr.ex.a_syms) { 314 close(fd); 315 return; 316 } 317 p += hdr.ex.a_syms; 318 if (read(fd, p, sizeof(int)) != sizeof(int)) { 319 close(fd); 320 return; 321 } 322 x = *(uint32_t *)p; 323 p += sizeof(int); 324 x -= sizeof(int); 325 if (read(fd, p, x) != x) { 326 close(fd); 327 return; 328 } 329 p += x; 330 } 331 } else { 332 lseek(fd, hdr.eh.e_phoff, SEEK_SET); 333 for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) { 334 if (read(fd, ep + j, sizeof(ep[0])) != sizeof(ep[0])) { 335 close(fd); 336 return; 337 } 338 if (ep[j].p_type == PT_LOAD) 339 j++; 340 } 341 for (i = 0; i < 2; i++) { 342 p = PTOV(ep[i].p_paddr & 0xffffff); 343 lseek(fd, ep[i].p_offset, SEEK_SET); 344 if (read(fd, p, ep[i].p_filesz) != ep[i].p_filesz) { 345 close(fd); 346 return; 347 } 348 } 349 p += roundup2(ep[1].p_memsz, PAGE_SIZE); 350 bootinfo.bi_symtab = VTOP(p); 351 if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) { 352 lseek(fd, hdr.eh.e_shoff + sizeof(es[0]) * (hdr.eh.e_shstrndx + 1), 353 SEEK_SET); 354 if (read(fd, &es, sizeof(es)) != sizeof(es)) { 355 close(fd); 356 return; 357 } 358 for (i = 0; i < 2; i++) { 359 memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size)); 360 p += sizeof(es[i].sh_size); 361 lseek(fd, es[i].sh_offset, SEEK_SET); 362 if (read(fd, p, es[i].sh_size) != es[i].sh_size) { 363 close(fd); 364 return; 365 } 366 p += es[i].sh_size; 367 } 368 } 369 addr = hdr.eh.e_entry & 0xffffff; 370 } 371 close(fd); 372 373 bootinfo.bi_esymtab = VTOP(p); 374 bootinfo.bi_kernelname = VTOP(kname); 375 376 if (bdev->d_type == DEVT_ZFS) { 377 zfsargs.size = sizeof(zfsargs); 378 zfsargs.pool = bdev->d_kind.zfs.pool_guid; 379 zfsargs.root = bdev->d_kind.zfs.root_guid; 380 __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), 381 bootdev, 382 KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG, 383 (uint32_t) bdev->d_kind.zfs.pool_guid, 384 (uint32_t) (bdev->d_kind.zfs.pool_guid >> 32), 385 VTOP(&bootinfo), 386 zfsargs); 387 } else 388 __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), 389 bootdev, 0, 0, 0, VTOP(&bootinfo)); 390 } 391 392 static int 393 mount_root(char *arg) 394 { 395 char *root; 396 struct i386_devdesc *ddesc; 397 uint8_t part; 398 399 root = malloc(strlen(arg) + 2); 400 if (root == NULL) 401 return (1); 402 sprintf(root, "%s:", arg); 403 if (i386_getdev((void **)&ddesc, root, NULL)) { 404 free(root); 405 return (1); 406 } 407 408 /* we should have new device descriptor, free old and replace it. */ 409 if (bdev != NULL) 410 free(bdev); 411 bdev = ddesc; 412 if (bdev->d_type == DEVT_DISK) { 413 if (bdev->d_kind.biosdisk.partition == -1) 414 part = 0xff; 415 else 416 part = bdev->d_kind.biosdisk.partition; 417 bootdev = MAKEBOOTDEV(dev_maj[bdev->d_type], 418 bdev->d_kind.biosdisk.slice + 1, 419 bdev->d_unit, part); 420 bootinfo.bi_bios_dev = bd_unit2bios(bdev->d_unit); 421 } 422 setenv("currdev", root, 1); 423 free(root); 424 return (0); 425 } 426 427 static void 428 fs_list(char *arg) 429 { 430 int fd; 431 struct dirent *d; 432 char line[80]; 433 434 fd = open(arg, O_RDONLY); 435 if (fd < 0) 436 return; 437 pager_open(); 438 while ((d = readdirfd(fd)) != NULL) { 439 sprintf(line, "%s\n", d->d_name); 440 if (pager_output(line)) 441 break; 442 } 443 pager_close(); 444 close(fd); 445 } 446 447 static int 448 parse_cmd(void) 449 { 450 char *arg = cmd; 451 char *ep, *p, *q; 452 const char *cp; 453 char line[80]; 454 int c, i, j; 455 456 while ((c = *arg++)) { 457 if (c == ' ' || c == '\t' || c == '\n') 458 continue; 459 for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++); 460 ep = p; 461 if (*p) 462 *p++ = 0; 463 if (c == '-') { 464 while ((c = *arg++)) { 465 if (c == 'P') { 466 if (*(uint8_t *)PTOV(0x496) & 0x10) { 467 cp = "yes"; 468 } else { 469 opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL); 470 cp = "no"; 471 } 472 printf("Keyboard: %s\n", cp); 473 continue; 474 } else if (c == 'S') { 475 j = 0; 476 while ((unsigned int)(i = *arg++ - '0') <= 9) 477 j = j * 10 + i; 478 if (j > 0 && i == -'0') { 479 comspeed = j; 480 break; 481 } 482 /* Fall through to error below ('S' not in optstr[]). */ 483 } 484 for (i = 0; c != optstr[i]; i++) 485 if (i == NOPT - 1) 486 return -1; 487 opts ^= OPT_SET(flags[i]); 488 } 489 ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) : 490 OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD; 491 if (ioctrl & IO_SERIAL) { 492 if (sio_init(115200 / comspeed) != 0) 493 ioctrl &= ~IO_SERIAL; 494 } 495 } if (c == '?') { 496 printf("\n"); 497 fs_list(arg); 498 zfs_list(arg); 499 return -1; 500 } else { 501 arg--; 502 503 /* 504 * Report pool status if the comment is 'status'. Lets 505 * hope no-one wants to load /status as a kernel. 506 */ 507 if (!strcmp(arg, "status")) { 508 pager_open(); 509 for (i = 0; devsw[i] != NULL; i++) { 510 if (devsw[i]->dv_print != NULL) { 511 sprintf(line, "\n%s devices:\n", devsw[i]->dv_name); 512 if (pager_output(line)) 513 break; 514 if (devsw[i]->dv_print(1)) 515 break; 516 } else { 517 sprintf(line, "%s: (unknown)\n", devsw[i]->dv_name); 518 if (pager_output(line)) 519 break; 520 } 521 } 522 pager_close(); 523 return -1; 524 } 525 526 /* 527 * If there is a colon, switch pools. 528 */ 529 if (strncmp(arg, "zfs:", 4) == 0) 530 q = strchr(arg + 4, ':'); 531 else 532 q = strchr(arg, ':'); 533 if (q) { 534 *q++ = '\0'; 535 if (mount_root(arg) != 0) 536 return -1; 537 arg = q; 538 } 539 if ((i = ep - arg)) { 540 if ((size_t)i >= sizeof(kname)) 541 return -1; 542 memcpy(kname, arg, i + 1); 543 } 544 } 545 arg = p; 546 } 547 return 0; 548 } 549 550 /* 551 * probe arguments for partition iterator (see below) 552 */ 553 struct probe_args { 554 int fd; 555 char *devname; 556 u_int secsz; 557 uint64_t offset; 558 }; 559 560 /* 561 * simple wrapper around read() to avoid using device specific 562 * strategy() directly. 563 */ 564 static int 565 parttblread(void *arg, void *buf, size_t blocks, uint64_t offset) 566 { 567 struct probe_args *ppa = arg; 568 size_t size = ppa->secsz * blocks; 569 570 lseek(ppa->fd, offset * ppa->secsz, SEEK_SET); 571 if (read(ppa->fd, buf, size) == size) 572 return (0); 573 return (EIO); 574 } 575 576 /* 577 * scan partition entries to find boot partition starting at start_sector. 578 * in case of MBR partition type PART_SOLARIS2, read VTOC and recurse. 579 */ 580 static int 581 probe_partition(void *arg, const char *partname, 582 const struct ptable_entry *part) 583 { 584 struct probe_args pa, *ppa = arg; 585 struct ptable *table; 586 uint64_t *pool_guid_ptr = NULL; 587 uint64_t pool_guid = 0; 588 char devname[32]; 589 int len, ret = 0; 590 591 len = strlen(ppa->devname); 592 if (len > sizeof (devname)) 593 len = sizeof (devname); 594 595 strncpy(devname, ppa->devname, len - 1); 596 devname[len - 1] = '\0'; 597 snprintf(devname, sizeof (devname), "%s%s:", devname, partname); 598 599 /* filter out partitions *not* used by zfs */ 600 switch (part->type) { 601 case PART_RESERVED: /* efi reserverd */ 602 case PART_VTOC_BOOT: /* vtoc boot area */ 603 case PART_VTOC_SWAP: 604 return (ret); 605 default: 606 break; 607 } 608 609 if (part->type == PART_SOLARIS2) { 610 pa.offset = part->start; 611 pa.fd = open(devname, O_RDONLY); 612 if (pa.fd == -1) 613 return (ret); 614 pa.devname = devname; 615 pa.secsz = ppa->secsz; 616 table = ptable_open(&pa, part->end - part->start + 1, 617 ppa->secsz, parttblread); 618 if (table != NULL) { 619 ret = ptable_iterate(table, &pa, probe_partition); 620 ptable_close(table); 621 } 622 close(pa.fd); 623 return (ret); 624 } 625 626 if (ppa->offset + part->start == start_sector) { 627 /* Ask zfs_probe_dev to provide guid. */ 628 pool_guid_ptr = &pool_guid; 629 /* Set up boot device name for non-zfs case. */ 630 strncpy(boot_devname, devname, sizeof (boot_devname)); 631 } 632 633 ret = zfs_probe_dev(devname, pool_guid_ptr); 634 if (pool_guid != 0 && bdev == NULL) { 635 bdev = malloc(sizeof (struct i386_devdesc)); 636 bzero(bdev, sizeof (struct i386_devdesc)); 637 bdev->d_type = DEVT_ZFS; 638 bdev->d_dev = &zfs_dev; 639 bdev->d_kind.zfs.pool_guid = pool_guid; 640 641 /* 642 * We can not set up zfs boot device name yet, as the 643 * zfs dv_init() is not completed. We will set boot_devname 644 * in main, after devsw setup. 645 */ 646 } 647 648 return (0); 649 } 650 651 /* 652 * open partition table on disk and scan partition entries to find 653 * boot partition starting at start_sector (recorded by installboot). 654 */ 655 static int 656 probe_disk(char *devname) 657 { 658 struct ptable *table; 659 struct probe_args pa; 660 uint64_t mediasz; 661 int ret; 662 663 pa.offset = 0; 664 pa.devname = devname; 665 pa.fd = open(devname, O_RDONLY); 666 if (pa.fd == -1) { 667 return (ENXIO); 668 } 669 670 ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz); 671 if (ret == 0) 672 ret = ioctl(pa.fd, DIOCGSECTORSIZE, &pa.secsz); 673 if (ret == 0) { 674 table = ptable_open(&pa, mediasz / pa.secsz, pa.secsz, 675 parttblread); 676 if (table != NULL) { 677 ret = ptable_iterate(table, &pa, probe_partition); 678 ptable_close(table); 679 } 680 } 681 close(pa.fd); 682 return (ret); 683 } 684 685 /* 686 * Probe all disks to discover ZFS pools. The idea is to walk all possible 687 * disk devices, however, we also need to identify possible boot pool. 688 * For boot pool detection we have boot disk passed us from BIOS, recorded 689 * in bootinfo.bi_bios_dev, and start_sector LBA recorded by installboot. 690 * 691 * To detect boot pool, we can not use generic zfs_probe_dev() on boot disk, 692 * but we need to walk partitions, as we have no way to pass start_sector 693 * to zfs_probe_dev(). Note we do need to detect the partition correcponding 694 * to non-zfs case, so here we can set boot_devname for both cases. 695 */ 696 static void 697 i386_zfs_probe(void) 698 { 699 char devname[32]; 700 int boot_unit, unit; 701 702 /* Translate bios dev to our unit number. */ 703 boot_unit = bd_bios2unit(bootinfo.bi_bios_dev); 704 705 /* 706 * Open all the disks we can find and see if we can reconstruct 707 * ZFS pools from them. 708 */ 709 for (unit = 0; unit < MAXBDDEV; unit++) { 710 if (bd_unit2bios(unit) == -1) 711 break; 712 713 sprintf(devname, "disk%d:", unit); 714 /* If this is not boot disk, use generic probe. */ 715 if (unit != boot_unit) 716 zfs_probe_dev(devname, NULL); 717 else 718 probe_disk(devname); 719 } 720 } 721 722 uint64_t 723 ldi_get_size(void *priv) 724 { 725 int fd = (uintptr_t) priv; 726 uint64_t size; 727 728 ioctl(fd, DIOCGMEDIASIZE, &size); 729 return (size); 730 } 731