1 /*- 2 * Copyright (c) 1998 Robert Nordier 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are freely 6 * permitted provided that the above copyright notice and this 7 * paragraph and the following disclaimer are duplicated in all 8 * such forms. 9 * 10 * This software is provided "AS IS" and without any express or 11 * implied warranties, including, without limitation, the implied 12 * warranties of merchantability and fitness for a particular 13 * purpose. 14 */ 15 16 #include <sys/cdefs.h> 17 __FBSDID("$FreeBSD$"); 18 19 #include <sys/param.h> 20 #include <sys/errno.h> 21 #include <sys/diskmbr.h> 22 #ifdef GPT 23 #include <sys/gpt.h> 24 #endif 25 #include <sys/reboot.h> 26 #include <sys/queue.h> 27 28 #include <machine/bootinfo.h> 29 #include <machine/elf.h> 30 #include <machine/pc/bios.h> 31 32 #include <stdarg.h> 33 #include <stddef.h> 34 35 #include <a.out.h> 36 37 #include <btxv86.h> 38 39 #include "lib.h" 40 #include "rbx.h" 41 #include "drv.h" 42 #include "edd.h" 43 #include "util.h" 44 #include "cons.h" 45 #include "bootargs.h" 46 #include "paths.h" 47 48 #include "libzfs.h" 49 50 #define ARGS 0x900 51 #define NOPT 14 52 #define NDEV 3 53 54 #define BIOS_NUMDRIVES 0x475 55 #define DRV_HARD 0x80 56 #define DRV_MASK 0x7f 57 58 #define TYPE_AD 0 59 #define TYPE_DA 1 60 #define TYPE_MAXHARD TYPE_DA 61 #define TYPE_FD 2 62 63 #define DEV_GELIBOOT_BSIZE 4096 64 65 extern uint32_t _end; 66 67 #ifdef GPT 68 static const uuid_t freebsd_zfs_uuid = GPT_ENT_TYPE_FREEBSD_ZFS; 69 #endif 70 static const char optstr[NOPT] = "DhaCcdgmnpqrsv"; /* Also 'P', 'S' */ 71 static const unsigned char flags[NOPT] = { 72 RBX_DUAL, 73 RBX_SERIAL, 74 RBX_ASKNAME, 75 RBX_CDROM, 76 RBX_CONFIG, 77 RBX_KDB, 78 RBX_GDB, 79 RBX_MUTE, 80 RBX_NOINTR, 81 RBX_PAUSE, 82 RBX_QUIET, 83 RBX_DFLTROOT, 84 RBX_SINGLE, 85 RBX_VERBOSE 86 }; 87 uint32_t opts; 88 89 static const unsigned char dev_maj[NDEV] = {30, 4, 2}; 90 91 static char cmd[512]; 92 static char cmddup[512]; 93 static char kname[1024]; 94 static char rootname[256]; 95 static int comspeed = SIOSPD; 96 static struct bootinfo bootinfo; 97 static uint32_t bootdev; 98 static struct zfs_boot_args zfsargs; 99 static struct zfsmount zfsmount; 100 101 vm_offset_t high_heap_base; 102 uint32_t bios_basemem, bios_extmem, high_heap_size; 103 104 static struct bios_smap smap; 105 106 /* 107 * The minimum amount of memory to reserve in bios_extmem for the heap. 108 */ 109 #define HEAP_MIN (64 * 1024 * 1024) 110 111 static char *heap_next; 112 static char *heap_end; 113 114 /* Buffers that must not span a 64k boundary. */ 115 #define READ_BUF_SIZE 8192 116 struct dmadat { 117 char rdbuf[READ_BUF_SIZE]; /* for reading large things */ 118 char secbuf[READ_BUF_SIZE]; /* for MBR/disklabel */ 119 }; 120 static struct dmadat *dmadat; 121 122 void exit(int); 123 void reboot(void); 124 static void load(void); 125 static int parse_cmd(void); 126 static void bios_getmem(void); 127 void *malloc(size_t n); 128 void free(void *ptr); 129 int main(void); 130 131 void * 132 malloc(size_t n) 133 { 134 char *p = heap_next; 135 if (p + n > heap_end) { 136 printf("malloc failure\n"); 137 for (;;) 138 ; 139 /* NOTREACHED */ 140 return (0); 141 } 142 heap_next += n; 143 return (p); 144 } 145 146 void 147 free(void *ptr) 148 { 149 150 return; 151 } 152 153 static char * 154 strdup(const char *s) 155 { 156 char *p = malloc(strlen(s) + 1); 157 strcpy(p, s); 158 return (p); 159 } 160 161 #ifdef LOADER_GELI_SUPPORT 162 #include "geliboot.c" 163 static char gelipw[GELI_PW_MAXLEN]; 164 static struct keybuf *gelibuf; 165 #endif 166 167 #include "zfsimpl.c" 168 169 /* 170 * Read from a dnode (which must be from a ZPL filesystem). 171 */ 172 static int 173 zfs_read(spa_t *spa, const dnode_phys_t *dnode, off_t *offp, void *start, size_t size) 174 { 175 const znode_phys_t *zp = (const znode_phys_t *) dnode->dn_bonus; 176 size_t n; 177 int rc; 178 179 n = size; 180 if (*offp + n > zp->zp_size) 181 n = zp->zp_size - *offp; 182 183 rc = dnode_read(spa, dnode, *offp, start, n); 184 if (rc) 185 return (-1); 186 *offp += n; 187 188 return (n); 189 } 190 191 /* 192 * Current ZFS pool 193 */ 194 static spa_t *spa; 195 static spa_t *primary_spa; 196 static vdev_t *primary_vdev; 197 198 /* 199 * A wrapper for dskread that doesn't have to worry about whether the 200 * buffer pointer crosses a 64k boundary. 201 */ 202 static int 203 vdev_read(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) 204 { 205 char *p; 206 daddr_t lba, alignlba; 207 off_t diff; 208 unsigned int nb, alignnb; 209 struct dsk *dsk = (struct dsk *) priv; 210 211 if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1))) 212 return -1; 213 214 p = buf; 215 lba = off / DEV_BSIZE; 216 lba += dsk->start; 217 /* 218 * Align reads to 4k else 4k sector GELIs will not decrypt. 219 * Round LBA down to nearest multiple of DEV_GELIBOOT_BSIZE bytes. 220 */ 221 alignlba = rounddown2(off, DEV_GELIBOOT_BSIZE) / DEV_BSIZE; 222 /* 223 * The read must be aligned to DEV_GELIBOOT_BSIZE bytes relative to the 224 * start of the GELI partition, not the start of the actual disk. 225 */ 226 alignlba += dsk->start; 227 diff = (lba - alignlba) * DEV_BSIZE; 228 229 while (bytes > 0) { 230 nb = bytes / DEV_BSIZE; 231 /* 232 * Ensure that the read size plus the leading offset does not 233 * exceed the size of the read buffer. 234 */ 235 if (nb > (READ_BUF_SIZE - diff) / DEV_BSIZE) 236 nb = (READ_BUF_SIZE - diff) / DEV_BSIZE; 237 /* 238 * Round the number of blocks to read up to the nearest multiple 239 * of DEV_GELIBOOT_BSIZE. 240 */ 241 alignnb = roundup2(nb * DEV_BSIZE + diff, DEV_GELIBOOT_BSIZE) 242 / DEV_BSIZE; 243 244 if (drvread(dsk, dmadat->rdbuf, alignlba, alignnb)) 245 return -1; 246 #ifdef LOADER_GELI_SUPPORT 247 /* decrypt */ 248 if (is_geli(dsk) == 0) { 249 if (geli_read(dsk, ((alignlba - dsk->start) * 250 DEV_BSIZE), dmadat->rdbuf, alignnb * DEV_BSIZE)) 251 return (-1); 252 } 253 #endif 254 memcpy(p, dmadat->rdbuf + diff, nb * DEV_BSIZE); 255 p += nb * DEV_BSIZE; 256 lba += nb; 257 alignlba += alignnb; 258 bytes -= nb * DEV_BSIZE; 259 /* Don't need the leading offset after the first block. */ 260 diff = 0; 261 } 262 263 return 0; 264 } 265 266 static int 267 vdev_write(vdev_t *vdev, void *priv, off_t off, void *buf, size_t bytes) 268 { 269 char *p; 270 daddr_t lba; 271 unsigned int nb; 272 struct dsk *dsk = (struct dsk *) priv; 273 274 if ((off & (DEV_BSIZE - 1)) || (bytes & (DEV_BSIZE - 1))) 275 return -1; 276 277 p = buf; 278 lba = off / DEV_BSIZE; 279 lba += dsk->start; 280 while (bytes > 0) { 281 nb = bytes / DEV_BSIZE; 282 if (nb > READ_BUF_SIZE / DEV_BSIZE) 283 nb = READ_BUF_SIZE / DEV_BSIZE; 284 memcpy(dmadat->rdbuf, p, nb * DEV_BSIZE); 285 if (drvwrite(dsk, dmadat->rdbuf, lba, nb)) 286 return -1; 287 p += nb * DEV_BSIZE; 288 lba += nb; 289 bytes -= nb * DEV_BSIZE; 290 } 291 292 return 0; 293 } 294 295 static int 296 xfsread(const dnode_phys_t *dnode, off_t *offp, void *buf, size_t nbyte) 297 { 298 if ((size_t)zfs_read(spa, dnode, offp, buf, nbyte) != nbyte) { 299 printf("Invalid format\n"); 300 return -1; 301 } 302 return 0; 303 } 304 305 /* 306 * Read Pad2 (formerly "Boot Block Header") area of the first 307 * vdev label of the given vdev. 308 */ 309 static int 310 vdev_read_pad2(vdev_t *vdev, char *buf, size_t size) 311 { 312 blkptr_t bp; 313 char *tmp = zap_scratch; 314 off_t off = offsetof(vdev_label_t, vl_pad2); 315 316 if (size > VDEV_PAD_SIZE) 317 size = VDEV_PAD_SIZE; 318 319 BP_ZERO(&bp); 320 BP_SET_LSIZE(&bp, VDEV_PAD_SIZE); 321 BP_SET_PSIZE(&bp, VDEV_PAD_SIZE); 322 BP_SET_CHECKSUM(&bp, ZIO_CHECKSUM_LABEL); 323 BP_SET_COMPRESS(&bp, ZIO_COMPRESS_OFF); 324 DVA_SET_OFFSET(BP_IDENTITY(&bp), off); 325 if (vdev_read_phys(vdev, &bp, tmp, off, 0)) 326 return (EIO); 327 memcpy(buf, tmp, size); 328 return (0); 329 } 330 331 static int 332 vdev_clear_pad2(vdev_t *vdev) 333 { 334 char *zeroes = zap_scratch; 335 uint64_t *end; 336 off_t off = offsetof(vdev_label_t, vl_pad2); 337 338 memset(zeroes, 0, VDEV_PAD_SIZE); 339 end = (uint64_t *)(zeroes + VDEV_PAD_SIZE); 340 /* ZIO_CHECKSUM_LABEL magic and pre-calcualted checksum for all zeros */ 341 end[-5] = 0x0210da7ab10c7a11; 342 end[-4] = 0x97f48f807f6e2a3f; 343 end[-3] = 0xaf909f1658aacefc; 344 end[-2] = 0xcbd1ea57ff6db48b; 345 end[-1] = 0x6ec692db0d465fab; 346 if (vdev_write(vdev, vdev->v_read_priv, off, zeroes, VDEV_PAD_SIZE)) 347 return (EIO); 348 return (0); 349 } 350 351 static void 352 bios_getmem(void) 353 { 354 uint64_t size; 355 356 /* Parse system memory map */ 357 v86.ebx = 0; 358 do { 359 v86.ctl = V86_FLAGS; 360 v86.addr = 0x15; /* int 0x15 function 0xe820*/ 361 v86.eax = 0xe820; 362 v86.ecx = sizeof(struct bios_smap); 363 v86.edx = SMAP_SIG; 364 v86.es = VTOPSEG(&smap); 365 v86.edi = VTOPOFF(&smap); 366 v86int(); 367 if (V86_CY(v86.efl) || (v86.eax != SMAP_SIG)) 368 break; 369 /* look for a low-memory segment that's large enough */ 370 if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0) && 371 (smap.length >= (512 * 1024))) 372 bios_basemem = smap.length; 373 /* look for the first segment in 'extended' memory */ 374 if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base == 0x100000)) { 375 bios_extmem = smap.length; 376 } 377 378 /* 379 * Look for the largest segment in 'extended' memory beyond 380 * 1MB but below 4GB. 381 */ 382 if ((smap.type == SMAP_TYPE_MEMORY) && (smap.base > 0x100000) && 383 (smap.base < 0x100000000ull)) { 384 size = smap.length; 385 386 /* 387 * If this segment crosses the 4GB boundary, truncate it. 388 */ 389 if (smap.base + size > 0x100000000ull) 390 size = 0x100000000ull - smap.base; 391 392 if (size > high_heap_size) { 393 high_heap_size = size; 394 high_heap_base = smap.base; 395 } 396 } 397 } while (v86.ebx != 0); 398 399 /* Fall back to the old compatibility function for base memory */ 400 if (bios_basemem == 0) { 401 v86.ctl = 0; 402 v86.addr = 0x12; /* int 0x12 */ 403 v86int(); 404 405 bios_basemem = (v86.eax & 0xffff) * 1024; 406 } 407 408 /* Fall back through several compatibility functions for extended memory */ 409 if (bios_extmem == 0) { 410 v86.ctl = V86_FLAGS; 411 v86.addr = 0x15; /* int 0x15 function 0xe801*/ 412 v86.eax = 0xe801; 413 v86int(); 414 if (!V86_CY(v86.efl)) { 415 bios_extmem = ((v86.ecx & 0xffff) + ((v86.edx & 0xffff) * 64)) * 1024; 416 } 417 } 418 if (bios_extmem == 0) { 419 v86.ctl = 0; 420 v86.addr = 0x15; /* int 0x15 function 0x88*/ 421 v86.eax = 0x8800; 422 v86int(); 423 bios_extmem = (v86.eax & 0xffff) * 1024; 424 } 425 426 /* 427 * If we have extended memory and did not find a suitable heap 428 * region in the SMAP, use the last 3MB of 'extended' memory as a 429 * high heap candidate. 430 */ 431 if (bios_extmem >= HEAP_MIN && high_heap_size < HEAP_MIN) { 432 high_heap_size = HEAP_MIN; 433 high_heap_base = bios_extmem + 0x100000 - HEAP_MIN; 434 } 435 } 436 437 /* 438 * Try to detect a device supported by the legacy int13 BIOS 439 */ 440 static int 441 int13probe(int drive) 442 { 443 v86.ctl = V86_FLAGS; 444 v86.addr = 0x13; 445 v86.eax = 0x800; 446 v86.edx = drive; 447 v86int(); 448 449 if (!V86_CY(v86.efl) && /* carry clear */ 450 ((v86.edx & 0xff) != (drive & DRV_MASK))) { /* unit # OK */ 451 if ((v86.ecx & 0x3f) == 0) { /* absurd sector size */ 452 return(0); /* skip device */ 453 } 454 return (1); 455 } 456 return(0); 457 } 458 459 /* 460 * We call this when we find a ZFS vdev - ZFS consumes the dsk 461 * structure so we must make a new one. 462 */ 463 static struct dsk * 464 copy_dsk(struct dsk *dsk) 465 { 466 struct dsk *newdsk; 467 468 newdsk = malloc(sizeof(struct dsk)); 469 *newdsk = *dsk; 470 return (newdsk); 471 } 472 473 /* 474 * Get disk size from eax=0x800 and 0x4800. We need to probe both 475 * because 0x4800 may not be available and we would like to get more 476 * or less correct disk size - if it is possible at all. 477 * Note we do not really want to touch drv.c because that code is shared 478 * with boot2 and we can not afford to grow that code. 479 */ 480 static uint64_t 481 drvsize_ext(struct dsk *dskp) 482 { 483 uint64_t size, tmp; 484 int cyl, hds, sec; 485 486 v86.ctl = V86_FLAGS; 487 v86.addr = 0x13; 488 v86.eax = 0x800; 489 v86.edx = dskp->drive; 490 v86int(); 491 492 /* Don't error out if we get bad sector number, try EDD as well */ 493 if (V86_CY(v86.efl) || /* carry set */ 494 (v86.edx & 0xff) <= (unsigned)(dskp->drive & 0x7f)) /* unit # bad */ 495 return (0); 496 497 cyl = ((v86.ecx & 0xc0) << 2) + ((v86.ecx & 0xff00) >> 8) + 1; 498 /* Convert max head # -> # of heads */ 499 hds = ((v86.edx & 0xff00) >> 8) + 1; 500 sec = v86.ecx & 0x3f; 501 502 size = (uint64_t)cyl * hds * sec; 503 504 /* Determine if we can use EDD with this device. */ 505 v86.ctl = V86_FLAGS; 506 v86.addr = 0x13; 507 v86.eax = 0x4100; 508 v86.edx = dskp->drive; 509 v86.ebx = 0x55aa; 510 v86int(); 511 if (V86_CY(v86.efl) || /* carry set */ 512 (v86.ebx & 0xffff) != 0xaa55 || /* signature */ 513 (v86.ecx & EDD_INTERFACE_FIXED_DISK) == 0) 514 return (size); 515 516 tmp = drvsize(dskp); 517 if (tmp > size) 518 size = tmp; 519 520 return (size); 521 } 522 523 /* 524 * The "layered" ioctl to read disk/partition size. Unfortunately 525 * the zfsboot case is hardest, because we do not have full software 526 * stack available, so we need to do some manual work here. 527 */ 528 uint64_t 529 ldi_get_size(void *priv) 530 { 531 struct dsk *dskp = priv; 532 uint64_t size = dskp->size; 533 534 if (dskp->start == 0) 535 size = drvsize_ext(dskp); 536 537 return (size * DEV_BSIZE); 538 } 539 540 static void 541 probe_drive(struct dsk *dsk) 542 { 543 #ifdef GPT 544 struct gpt_hdr hdr; 545 struct gpt_ent *ent; 546 unsigned part, entries_per_sec; 547 daddr_t slba; 548 #endif 549 #if defined(GPT) || defined(LOADER_GELI_SUPPORT) 550 daddr_t elba; 551 #endif 552 553 struct dos_partition *dp; 554 char *sec; 555 unsigned i; 556 557 /* 558 * If we find a vdev on the whole disk, stop here. 559 */ 560 if (vdev_probe(vdev_read, dsk, NULL) == 0) 561 return; 562 563 #ifdef LOADER_GELI_SUPPORT 564 /* 565 * Taste the disk, if it is GELI encrypted, decrypt it and check to see if 566 * it is a usable vdev then. Otherwise dig 567 * out the partition table and probe each slice/partition 568 * in turn for a vdev or GELI encrypted vdev. 569 */ 570 elba = drvsize_ext(dsk); 571 if (elba > 0) { 572 elba--; 573 } 574 if (geli_taste(vdev_read, dsk, elba) == 0) { 575 if (geli_havekey(dsk) == 0 || geli_passphrase(&gelipw, dsk->unit, 576 ':', 0, dsk) == 0) { 577 if (vdev_probe(vdev_read, dsk, NULL) == 0) { 578 return; 579 } 580 } 581 } 582 #endif /* LOADER_GELI_SUPPORT */ 583 584 sec = dmadat->secbuf; 585 dsk->start = 0; 586 587 #ifdef GPT 588 /* 589 * First check for GPT. 590 */ 591 if (drvread(dsk, sec, 1, 1)) { 592 return; 593 } 594 memcpy(&hdr, sec, sizeof(hdr)); 595 if (memcmp(hdr.hdr_sig, GPT_HDR_SIG, sizeof(hdr.hdr_sig)) != 0 || 596 hdr.hdr_lba_self != 1 || hdr.hdr_revision < 0x00010000 || 597 hdr.hdr_entsz < sizeof(*ent) || DEV_BSIZE % hdr.hdr_entsz != 0) { 598 goto trymbr; 599 } 600 601 /* 602 * Probe all GPT partitions for the presence of ZFS pools. We 603 * return the spa_t for the first we find (if requested). This 604 * will have the effect of booting from the first pool on the 605 * disk. 606 * 607 * If no vdev is found, GELI decrypting the device and try again 608 */ 609 entries_per_sec = DEV_BSIZE / hdr.hdr_entsz; 610 slba = hdr.hdr_lba_table; 611 elba = slba + hdr.hdr_entries / entries_per_sec; 612 while (slba < elba) { 613 dsk->start = 0; 614 if (drvread(dsk, sec, slba, 1)) 615 return; 616 for (part = 0; part < entries_per_sec; part++) { 617 ent = (struct gpt_ent *)(sec + part * hdr.hdr_entsz); 618 if (memcmp(&ent->ent_type, &freebsd_zfs_uuid, 619 sizeof(uuid_t)) == 0) { 620 dsk->start = ent->ent_lba_start; 621 dsk->size = ent->ent_lba_end - ent->ent_lba_start + 1; 622 dsk->slice = part + 1; 623 dsk->part = 255; 624 if (vdev_probe(vdev_read, dsk, NULL) == 0) { 625 /* 626 * This slice had a vdev. We need a new dsk 627 * structure now since the vdev now owns this one. 628 */ 629 dsk = copy_dsk(dsk); 630 } 631 #ifdef LOADER_GELI_SUPPORT 632 else if (geli_taste(vdev_read, dsk, ent->ent_lba_end - 633 ent->ent_lba_start) == 0) { 634 if (geli_havekey(dsk) == 0 || geli_passphrase(&gelipw, 635 dsk->unit, 'p', dsk->slice, dsk) == 0) { 636 /* 637 * This slice has GELI, check it for ZFS. 638 */ 639 if (vdev_probe(vdev_read, dsk, NULL) == 0) { 640 /* 641 * This slice had a vdev. We need a new dsk 642 * structure now since the vdev now owns this one. 643 */ 644 dsk = copy_dsk(dsk); 645 } 646 break; 647 } 648 } 649 #endif /* LOADER_GELI_SUPPORT */ 650 } 651 } 652 slba++; 653 } 654 return; 655 trymbr: 656 #endif /* GPT */ 657 658 if (drvread(dsk, sec, DOSBBSECTOR, 1)) 659 return; 660 dp = (void *)(sec + DOSPARTOFF); 661 662 for (i = 0; i < NDOSPART; i++) { 663 if (!dp[i].dp_typ) 664 continue; 665 dsk->start = dp[i].dp_start; 666 dsk->size = dp[i].dp_size; 667 dsk->slice = i + 1; 668 if (vdev_probe(vdev_read, dsk, NULL) == 0) { 669 dsk = copy_dsk(dsk); 670 } 671 #ifdef LOADER_GELI_SUPPORT 672 else if (geli_taste(vdev_read, dsk, dp[i].dp_size - 673 dp[i].dp_start) == 0) { 674 if (geli_havekey(dsk) == 0 || geli_passphrase(&gelipw, dsk->unit, 675 's', i, dsk) == 0) { 676 /* 677 * This slice has GELI, check it for ZFS. 678 */ 679 if (vdev_probe(vdev_read, dsk, NULL) == 0) { 680 /* 681 * This slice had a vdev. We need a new dsk 682 * structure now since the vdev now owns this one. 683 */ 684 dsk = copy_dsk(dsk); 685 } 686 break; 687 } 688 } 689 #endif /* LOADER_GELI_SUPPORT */ 690 } 691 } 692 693 int 694 main(void) 695 { 696 dnode_phys_t dn; 697 off_t off; 698 struct dsk *dsk; 699 int autoboot, i; 700 int nextboot; 701 int rc; 702 703 dmadat = (void *)(roundup2(__base + (int32_t)&_end, 0x10000) - __base); 704 705 bios_getmem(); 706 707 if (high_heap_size > 0) { 708 heap_end = PTOV(high_heap_base + high_heap_size); 709 heap_next = PTOV(high_heap_base); 710 } else { 711 heap_next = (char *)dmadat + sizeof(*dmadat); 712 heap_end = (char *)PTOV(bios_basemem); 713 } 714 715 dsk = malloc(sizeof(struct dsk)); 716 dsk->drive = *(uint8_t *)PTOV(ARGS); 717 dsk->type = dsk->drive & DRV_HARD ? TYPE_AD : TYPE_FD; 718 dsk->unit = dsk->drive & DRV_MASK; 719 dsk->slice = *(uint8_t *)PTOV(ARGS + 1) + 1; 720 dsk->part = 0; 721 dsk->start = 0; 722 dsk->size = 0; 723 724 bootinfo.bi_version = BOOTINFO_VERSION; 725 bootinfo.bi_size = sizeof(bootinfo); 726 bootinfo.bi_basemem = bios_basemem / 1024; 727 bootinfo.bi_extmem = bios_extmem / 1024; 728 bootinfo.bi_memsizes_valid++; 729 bootinfo.bi_bios_dev = dsk->drive; 730 731 bootdev = MAKEBOOTDEV(dev_maj[dsk->type], 732 dsk->slice, dsk->unit, dsk->part); 733 734 /* Process configuration file */ 735 736 autoboot = 1; 737 738 #ifdef LOADER_GELI_SUPPORT 739 geli_init(); 740 #endif 741 zfs_init(); 742 743 /* 744 * Probe the boot drive first - we will try to boot from whatever 745 * pool we find on that drive. 746 */ 747 probe_drive(dsk); 748 749 /* 750 * Probe the rest of the drives that the bios knows about. This 751 * will find any other available pools and it may fill in missing 752 * vdevs for the boot pool. 753 */ 754 #ifndef VIRTUALBOX 755 for (i = 0; i < *(unsigned char *)PTOV(BIOS_NUMDRIVES); i++) 756 #else 757 for (i = 0; i < MAXBDDEV; i++) 758 #endif 759 { 760 if ((i | DRV_HARD) == *(uint8_t *)PTOV(ARGS)) 761 continue; 762 763 if (!int13probe(i | DRV_HARD)) 764 break; 765 766 dsk = malloc(sizeof(struct dsk)); 767 dsk->drive = i | DRV_HARD; 768 dsk->type = dsk->drive & TYPE_AD; 769 dsk->unit = i; 770 dsk->slice = 0; 771 dsk->part = 0; 772 dsk->start = 0; 773 dsk->size = 0; 774 probe_drive(dsk); 775 } 776 777 /* 778 * The first discovered pool, if any, is the pool. 779 */ 780 spa = spa_get_primary(); 781 if (!spa) { 782 printf("%s: No ZFS pools located, can't boot\n", BOOTPROG); 783 for (;;) 784 ; 785 } 786 787 primary_spa = spa; 788 primary_vdev = spa_get_primary_vdev(spa); 789 790 nextboot = 0; 791 rc = vdev_read_pad2(primary_vdev, cmd, sizeof(cmd)); 792 if (vdev_clear_pad2(primary_vdev)) 793 printf("failed to clear pad2 area of primary vdev\n"); 794 if (rc == 0) { 795 if (*cmd) { 796 /* 797 * We could find an old-style ZFS Boot Block header here. 798 * Simply ignore it. 799 */ 800 if (*(uint64_t *)cmd != 0x2f5b007b10c) { 801 /* 802 * Note that parse() is destructive to cmd[] and we also want 803 * to honor RBX_QUIET option that could be present in cmd[]. 804 */ 805 nextboot = 1; 806 memcpy(cmddup, cmd, sizeof(cmd)); 807 if (parse_cmd()) { 808 printf("failed to parse pad2 area of primary vdev\n"); 809 reboot(); 810 } 811 if (!OPT_CHECK(RBX_QUIET)) 812 printf("zfs nextboot: %s\n", cmddup); 813 } 814 /* Do not process this command twice */ 815 *cmd = 0; 816 } 817 } else 818 printf("failed to read pad2 area of primary vdev\n"); 819 820 /* Mount ZFS only if it's not already mounted via nextboot parsing. */ 821 if (zfsmount.spa == NULL && 822 (zfs_spa_init(spa) != 0 || zfs_mount(spa, 0, &zfsmount) != 0)) { 823 printf("%s: failed to mount default pool %s\n", 824 BOOTPROG, spa->spa_name); 825 autoboot = 0; 826 } else if (zfs_lookup(&zfsmount, PATH_CONFIG, &dn) == 0 || 827 zfs_lookup(&zfsmount, PATH_DOTCONFIG, &dn) == 0) { 828 off = 0; 829 zfs_read(spa, &dn, &off, cmd, sizeof(cmd)); 830 } 831 832 if (*cmd) { 833 /* 834 * Note that parse_cmd() is destructive to cmd[] and we also want 835 * to honor RBX_QUIET option that could be present in cmd[]. 836 */ 837 memcpy(cmddup, cmd, sizeof(cmd)); 838 if (parse_cmd()) 839 autoboot = 0; 840 if (!OPT_CHECK(RBX_QUIET)) 841 printf("%s: %s\n", PATH_CONFIG, cmddup); 842 /* Do not process this command twice */ 843 *cmd = 0; 844 } 845 846 /* Do not risk waiting at the prompt forever. */ 847 if (nextboot && !autoboot) 848 reboot(); 849 850 /* 851 * Try to exec /boot/loader. If interrupted by a keypress, 852 * or in case of failure, try to load a kernel directly instead. 853 */ 854 855 if (autoboot && !*kname) { 856 memcpy(kname, PATH_LOADER_ZFS, sizeof(PATH_LOADER_ZFS)); 857 if (!keyhit(3)) { 858 load(); 859 memcpy(kname, PATH_KERNEL, sizeof(PATH_KERNEL)); 860 } 861 } 862 863 /* Present the user with the boot2 prompt. */ 864 865 for (;;) { 866 if (!autoboot || !OPT_CHECK(RBX_QUIET)) { 867 printf("\nFreeBSD/x86 boot\n"); 868 if (zfs_rlookup(spa, zfsmount.rootobj, rootname) != 0) 869 printf("Default: %s/<0x%llx>:%s\n" 870 "boot: ", 871 spa->spa_name, zfsmount.rootobj, kname); 872 else if (rootname[0] != '\0') 873 printf("Default: %s/%s:%s\n" 874 "boot: ", 875 spa->spa_name, rootname, kname); 876 else 877 printf("Default: %s:%s\n" 878 "boot: ", 879 spa->spa_name, kname); 880 } 881 if (ioctrl & IO_SERIAL) 882 sio_flush(); 883 if (!autoboot || keyhit(5)) 884 getstr(cmd, sizeof(cmd)); 885 else if (!autoboot || !OPT_CHECK(RBX_QUIET)) 886 putchar('\n'); 887 autoboot = 0; 888 if (parse_cmd()) 889 putchar('\a'); 890 else 891 load(); 892 } 893 } 894 895 /* XXX - Needed for btxld to link the boot2 binary; do not remove. */ 896 void 897 exit(int x) 898 { 899 __exit(x); 900 } 901 902 void 903 reboot(void) 904 { 905 __exit(0); 906 } 907 908 static void 909 load(void) 910 { 911 union { 912 struct exec ex; 913 Elf32_Ehdr eh; 914 } hdr; 915 static Elf32_Phdr ep[2]; 916 static Elf32_Shdr es[2]; 917 caddr_t p; 918 dnode_phys_t dn; 919 off_t off; 920 uint32_t addr, x; 921 int fmt, i, j; 922 923 if (zfs_lookup(&zfsmount, kname, &dn)) { 924 printf("\nCan't find %s\n", kname); 925 return; 926 } 927 off = 0; 928 if (xfsread(&dn, &off, &hdr, sizeof(hdr))) 929 return; 930 if (N_GETMAGIC(hdr.ex) == ZMAGIC) 931 fmt = 0; 932 else if (IS_ELF(hdr.eh)) 933 fmt = 1; 934 else { 935 printf("Invalid %s\n", "format"); 936 return; 937 } 938 if (fmt == 0) { 939 addr = hdr.ex.a_entry & 0xffffff; 940 p = PTOV(addr); 941 off = PAGE_SIZE; 942 if (xfsread(&dn, &off, p, hdr.ex.a_text)) 943 return; 944 p += roundup2(hdr.ex.a_text, PAGE_SIZE); 945 if (xfsread(&dn, &off, p, hdr.ex.a_data)) 946 return; 947 p += hdr.ex.a_data + roundup2(hdr.ex.a_bss, PAGE_SIZE); 948 bootinfo.bi_symtab = VTOP(p); 949 memcpy(p, &hdr.ex.a_syms, sizeof(hdr.ex.a_syms)); 950 p += sizeof(hdr.ex.a_syms); 951 if (hdr.ex.a_syms) { 952 if (xfsread(&dn, &off, p, hdr.ex.a_syms)) 953 return; 954 p += hdr.ex.a_syms; 955 if (xfsread(&dn, &off, p, sizeof(int))) 956 return; 957 x = *(uint32_t *)p; 958 p += sizeof(int); 959 x -= sizeof(int); 960 if (xfsread(&dn, &off, p, x)) 961 return; 962 p += x; 963 } 964 } else { 965 off = hdr.eh.e_phoff; 966 for (j = i = 0; i < hdr.eh.e_phnum && j < 2; i++) { 967 if (xfsread(&dn, &off, ep + j, sizeof(ep[0]))) 968 return; 969 if (ep[j].p_type == PT_LOAD) 970 j++; 971 } 972 for (i = 0; i < 2; i++) { 973 p = PTOV(ep[i].p_paddr & 0xffffff); 974 off = ep[i].p_offset; 975 if (xfsread(&dn, &off, p, ep[i].p_filesz)) 976 return; 977 } 978 p += roundup2(ep[1].p_memsz, PAGE_SIZE); 979 bootinfo.bi_symtab = VTOP(p); 980 if (hdr.eh.e_shnum == hdr.eh.e_shstrndx + 3) { 981 off = hdr.eh.e_shoff + sizeof(es[0]) * 982 (hdr.eh.e_shstrndx + 1); 983 if (xfsread(&dn, &off, &es, sizeof(es))) 984 return; 985 for (i = 0; i < 2; i++) { 986 memcpy(p, &es[i].sh_size, sizeof(es[i].sh_size)); 987 p += sizeof(es[i].sh_size); 988 off = es[i].sh_offset; 989 if (xfsread(&dn, &off, p, es[i].sh_size)) 990 return; 991 p += es[i].sh_size; 992 } 993 } 994 addr = hdr.eh.e_entry & 0xffffff; 995 } 996 bootinfo.bi_esymtab = VTOP(p); 997 bootinfo.bi_kernelname = VTOP(kname); 998 zfsargs.size = sizeof(zfsargs); 999 zfsargs.pool = zfsmount.spa->spa_guid; 1000 zfsargs.root = zfsmount.rootobj; 1001 zfsargs.primary_pool = primary_spa->spa_guid; 1002 #ifdef LOADER_GELI_SUPPORT 1003 explicit_bzero(gelipw, sizeof(gelipw)); 1004 gelibuf = malloc(sizeof(struct keybuf) + (GELI_MAX_KEYS * sizeof(struct keybuf_ent))); 1005 geli_fill_keybuf(gelibuf); 1006 zfsargs.notapw = '\0'; 1007 zfsargs.keybuf_sentinel = KEYBUF_SENTINEL; 1008 zfsargs.keybuf = gelibuf; 1009 #else 1010 zfsargs.gelipw[0] = '\0'; 1011 #endif 1012 if (primary_vdev != NULL) 1013 zfsargs.primary_vdev = primary_vdev->v_guid; 1014 else 1015 printf("failed to detect primary vdev\n"); 1016 __exec((caddr_t)addr, RB_BOOTINFO | (opts & RBX_MASK), 1017 bootdev, 1018 KARGS_FLAGS_ZFS | KARGS_FLAGS_EXTARG, 1019 (uint32_t) spa->spa_guid, 1020 (uint32_t) (spa->spa_guid >> 32), 1021 VTOP(&bootinfo), 1022 zfsargs); 1023 } 1024 1025 static int 1026 zfs_mount_ds(char *dsname) 1027 { 1028 uint64_t newroot; 1029 spa_t *newspa; 1030 char *q; 1031 1032 q = strchr(dsname, '/'); 1033 if (q) 1034 *q++ = '\0'; 1035 newspa = spa_find_by_name(dsname); 1036 if (newspa == NULL) { 1037 printf("\nCan't find ZFS pool %s\n", dsname); 1038 return -1; 1039 } 1040 1041 if (zfs_spa_init(newspa)) 1042 return -1; 1043 1044 newroot = 0; 1045 if (q) { 1046 if (zfs_lookup_dataset(newspa, q, &newroot)) { 1047 printf("\nCan't find dataset %s in ZFS pool %s\n", 1048 q, newspa->spa_name); 1049 return -1; 1050 } 1051 } 1052 if (zfs_mount(newspa, newroot, &zfsmount)) { 1053 printf("\nCan't mount ZFS dataset\n"); 1054 return -1; 1055 } 1056 spa = newspa; 1057 return (0); 1058 } 1059 1060 static int 1061 parse_cmd(void) 1062 { 1063 char *arg = cmd; 1064 char *ep, *p, *q; 1065 const char *cp; 1066 int c, i, j; 1067 1068 while ((c = *arg++)) { 1069 if (c == ' ' || c == '\t' || c == '\n') 1070 continue; 1071 for (p = arg; *p && *p != '\n' && *p != ' ' && *p != '\t'; p++); 1072 ep = p; 1073 if (*p) 1074 *p++ = 0; 1075 if (c == '-') { 1076 while ((c = *arg++)) { 1077 if (c == 'P') { 1078 if (*(uint8_t *)PTOV(0x496) & 0x10) { 1079 cp = "yes"; 1080 } else { 1081 opts |= OPT_SET(RBX_DUAL) | OPT_SET(RBX_SERIAL); 1082 cp = "no"; 1083 } 1084 printf("Keyboard: %s\n", cp); 1085 continue; 1086 } else if (c == 'S') { 1087 j = 0; 1088 while ((unsigned int)(i = *arg++ - '0') <= 9) 1089 j = j * 10 + i; 1090 if (j > 0 && i == -'0') { 1091 comspeed = j; 1092 break; 1093 } 1094 /* Fall through to error below ('S' not in optstr[]). */ 1095 } 1096 for (i = 0; c != optstr[i]; i++) 1097 if (i == NOPT - 1) 1098 return -1; 1099 opts ^= OPT_SET(flags[i]); 1100 } 1101 ioctrl = OPT_CHECK(RBX_DUAL) ? (IO_SERIAL|IO_KEYBOARD) : 1102 OPT_CHECK(RBX_SERIAL) ? IO_SERIAL : IO_KEYBOARD; 1103 if (ioctrl & IO_SERIAL) { 1104 if (sio_init(115200 / comspeed) != 0) 1105 ioctrl &= ~IO_SERIAL; 1106 } 1107 } if (c == '?') { 1108 dnode_phys_t dn; 1109 1110 if (zfs_lookup(&zfsmount, arg, &dn) == 0) { 1111 zap_list(spa, &dn); 1112 } 1113 return -1; 1114 } else { 1115 arg--; 1116 1117 /* 1118 * Report pool status if the comment is 'status'. Lets 1119 * hope no-one wants to load /status as a kernel. 1120 */ 1121 if (!strcmp(arg, "status")) { 1122 spa_all_status(); 1123 return -1; 1124 } 1125 1126 /* 1127 * If there is "zfs:" prefix simply ignore it. 1128 */ 1129 if (strncmp(arg, "zfs:", 4) == 0) 1130 arg += 4; 1131 1132 /* 1133 * If there is a colon, switch pools. 1134 */ 1135 q = strchr(arg, ':'); 1136 if (q) { 1137 *q++ = '\0'; 1138 if (zfs_mount_ds(arg) != 0) 1139 return -1; 1140 arg = q; 1141 } 1142 if ((i = ep - arg)) { 1143 if ((size_t)i >= sizeof(kname)) 1144 return -1; 1145 memcpy(kname, arg, i + 1); 1146 } 1147 } 1148 arg = p; 1149 } 1150 return 0; 1151 } 1152