1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright (c) 2010, Intel Corporation. 27 * All rights reserved. 28 * 29 * Copyright 2013 Joyent, Inc. All rights reserved. 30 */ 31 32 /* 33 * This file contains the functionality that mimics the boot operations 34 * on SPARC systems or the old boot.bin/multiboot programs on x86 systems. 35 * The x86 kernel now does everything on its own. 36 */ 37 38 #include <sys/types.h> 39 #include <sys/bootconf.h> 40 #include <sys/bootsvcs.h> 41 #include <sys/bootinfo.h> 42 #include <sys/multiboot.h> 43 #include <sys/bootvfs.h> 44 #include <sys/bootprops.h> 45 #include <sys/varargs.h> 46 #include <sys/param.h> 47 #include <sys/machparam.h> 48 #include <sys/machsystm.h> 49 #include <sys/archsystm.h> 50 #include <sys/boot_console.h> 51 #include <sys/cmn_err.h> 52 #include <sys/systm.h> 53 #include <sys/promif.h> 54 #include <sys/archsystm.h> 55 #include <sys/x86_archext.h> 56 #include <sys/kobj.h> 57 #include <sys/privregs.h> 58 #include <sys/sysmacros.h> 59 #include <sys/ctype.h> 60 #include <sys/fastboot.h> 61 #ifdef __xpv 62 #include <sys/hypervisor.h> 63 #include <net/if.h> 64 #endif 65 #include <vm/kboot_mmu.h> 66 #include <vm/hat_pte.h> 67 #include <sys/kobj.h> 68 #include <sys/kobj_lex.h> 69 #include <sys/pci_cfgspace_impl.h> 70 #include <sys/fastboot_impl.h> 71 #include <sys/acpi/acconfig.h> 72 #include <sys/acpi/acpi.h> 73 74 static int have_console = 0; /* set once primitive console is initialized */ 75 static char *boot_args = ""; 76 77 /* 78 * Debugging macros 79 */ 80 static uint_t kbm_debug = 0; 81 #define DBG_MSG(s) { if (kbm_debug) bop_printf(NULL, "%s", s); } 82 #define DBG(x) { if (kbm_debug) \ 83 bop_printf(NULL, "%s is %" PRIx64 "\n", #x, (uint64_t)(x)); \ 84 } 85 86 #define PUT_STRING(s) { \ 87 char *cp; \ 88 for (cp = (s); *cp; ++cp) \ 89 bcons_putchar(*cp); \ 90 } 91 92 struct xboot_info *xbootp; /* boot info from "glue" code in low memory */ 93 bootops_t bootop; /* simple bootops we'll pass on to kernel */ 94 struct bsys_mem bm; 95 96 static uintptr_t next_virt; /* next available virtual address */ 97 static paddr_t next_phys; /* next available physical address from dboot */ 98 static paddr_t high_phys = -(paddr_t)1; /* last used physical address */ 99 100 /* 101 * buffer for vsnprintf for console I/O 102 */ 103 #define BUFFERSIZE 512 104 static char buffer[BUFFERSIZE]; 105 /* 106 * stuff to store/report/manipulate boot property settings. 107 */ 108 typedef struct bootprop { 109 struct bootprop *bp_next; 110 char *bp_name; 111 uint_t bp_vlen; 112 char *bp_value; 113 } bootprop_t; 114 115 static bootprop_t *bprops = NULL; 116 static char *curr_page = NULL; /* ptr to avail bprop memory */ 117 static int curr_space = 0; /* amount of memory at curr_page */ 118 119 #ifdef __xpv 120 start_info_t *xen_info; 121 shared_info_t *HYPERVISOR_shared_info; 122 #endif 123 124 /* 125 * some allocator statistics 126 */ 127 static ulong_t total_bop_alloc_scratch = 0; 128 static ulong_t total_bop_alloc_kernel = 0; 129 130 static void build_firmware_properties(void); 131 132 static int early_allocation = 1; 133 134 int force_fastreboot = 0; 135 volatile int fastreboot_onpanic = 0; 136 int post_fastreboot = 0; 137 #ifdef __xpv 138 volatile int fastreboot_capable = 0; 139 #else 140 volatile int fastreboot_capable = 1; 141 #endif 142 143 /* 144 * Information saved from current boot for fast reboot. 145 * If the information size exceeds what we have allocated, fast reboot 146 * will not be supported. 147 */ 148 multiboot_info_t saved_mbi; 149 mb_memory_map_t saved_mmap[FASTBOOT_SAVED_MMAP_COUNT]; 150 uint8_t saved_drives[FASTBOOT_SAVED_DRIVES_SIZE]; 151 char saved_cmdline[FASTBOOT_SAVED_CMDLINE_LEN]; 152 int saved_cmdline_len = 0; 153 size_t saved_file_size[FASTBOOT_MAX_FILES_MAP]; 154 155 /* 156 * Turn off fastreboot_onpanic to avoid panic loop. 157 */ 158 char fastreboot_onpanic_cmdline[FASTBOOT_SAVED_CMDLINE_LEN]; 159 static const char fastreboot_onpanic_args[] = " -B fastreboot_onpanic=0"; 160 161 /* 162 * Pointers to where System Resource Affinity Table (SRAT), System Locality 163 * Information Table (SLIT) and Maximum System Capability Table (MSCT) 164 * are mapped into virtual memory 165 */ 166 ACPI_TABLE_SRAT *srat_ptr = NULL; 167 ACPI_TABLE_SLIT *slit_ptr = NULL; 168 ACPI_TABLE_MSCT *msct_ptr = NULL; 169 170 /* 171 * Arbitrary limit on number of localities we handle; if 172 * this limit is raised to more than UINT16_MAX, make sure 173 * process_slit() knows how to handle it. 174 */ 175 #define SLIT_LOCALITIES_MAX (4096) 176 177 #define SLIT_NUM_PROPNAME "acpi-slit-localities" 178 #define SLIT_PROPNAME "acpi-slit" 179 180 /* 181 * Allocate aligned physical memory at boot time. This allocator allocates 182 * from the highest possible addresses. This avoids exhausting memory that 183 * would be useful for DMA buffers. 184 */ 185 paddr_t 186 do_bop_phys_alloc(uint64_t size, uint64_t align) 187 { 188 paddr_t pa = 0; 189 paddr_t start; 190 paddr_t end; 191 struct memlist *ml = (struct memlist *)xbootp->bi_phys_install; 192 193 /* 194 * Be careful if high memory usage is limited in startup.c 195 * Since there are holes in the low part of the physical address 196 * space we can treat physmem as a pfn (not just a pgcnt) and 197 * get a conservative upper limit. 198 */ 199 if (physmem != 0 && high_phys > pfn_to_pa(physmem)) 200 high_phys = pfn_to_pa(physmem); 201 202 /* 203 * find the lowest or highest available memory in physinstalled 204 * On 32 bit avoid physmem above 4Gig if PAE isn't enabled 205 */ 206 #if defined(__i386) 207 if (xbootp->bi_use_pae == 0 && high_phys > FOUR_GIG) 208 high_phys = FOUR_GIG; 209 #endif 210 211 /* 212 * find the highest available memory in physinstalled 213 */ 214 size = P2ROUNDUP(size, align); 215 for (; ml; ml = ml->ml_next) { 216 start = P2ROUNDUP(ml->ml_address, align); 217 end = P2ALIGN(ml->ml_address + ml->ml_size, align); 218 if (start < next_phys) 219 start = P2ROUNDUP(next_phys, align); 220 if (end > high_phys) 221 end = P2ALIGN(high_phys, align); 222 223 if (end <= start) 224 continue; 225 if (end - start < size) 226 continue; 227 228 /* 229 * Early allocations need to use low memory, since 230 * physmem might be further limited by bootenv.rc 231 */ 232 if (early_allocation) { 233 if (pa == 0 || start < pa) 234 pa = start; 235 } else { 236 if (end - size > pa) 237 pa = end - size; 238 } 239 } 240 if (pa != 0) { 241 if (early_allocation) 242 next_phys = pa + size; 243 else 244 high_phys = pa; 245 return (pa); 246 } 247 bop_panic("do_bop_phys_alloc(0x%" PRIx64 ", 0x%" PRIx64 248 ") Out of memory\n", size, align); 249 /*NOTREACHED*/ 250 } 251 252 uintptr_t 253 alloc_vaddr(size_t size, paddr_t align) 254 { 255 uintptr_t rv; 256 257 next_virt = P2ROUNDUP(next_virt, (uintptr_t)align); 258 rv = (uintptr_t)next_virt; 259 next_virt += size; 260 return (rv); 261 } 262 263 /* 264 * Allocate virtual memory. The size is always rounded up to a multiple 265 * of base pagesize. 266 */ 267 268 /*ARGSUSED*/ 269 static caddr_t 270 do_bsys_alloc(bootops_t *bop, caddr_t virthint, size_t size, int align) 271 { 272 paddr_t a = align; /* same type as pa for masking */ 273 uint_t pgsize; 274 paddr_t pa; 275 uintptr_t va; 276 ssize_t s; /* the aligned size */ 277 uint_t level; 278 uint_t is_kernel = (virthint != 0); 279 280 if (a < MMU_PAGESIZE) 281 a = MMU_PAGESIZE; 282 else if (!ISP2(a)) 283 prom_panic("do_bsys_alloc() incorrect alignment"); 284 size = P2ROUNDUP(size, MMU_PAGESIZE); 285 286 /* 287 * Use the next aligned virtual address if we weren't given one. 288 */ 289 if (virthint == NULL) { 290 virthint = (caddr_t)alloc_vaddr(size, a); 291 total_bop_alloc_scratch += size; 292 } else { 293 total_bop_alloc_kernel += size; 294 } 295 296 /* 297 * allocate the physical memory 298 */ 299 pa = do_bop_phys_alloc(size, a); 300 301 /* 302 * Add the mappings to the page tables, try large pages first. 303 */ 304 va = (uintptr_t)virthint; 305 s = size; 306 level = 1; 307 pgsize = xbootp->bi_use_pae ? TWO_MEG : FOUR_MEG; 308 if (xbootp->bi_use_largepage && a == pgsize) { 309 while (IS_P2ALIGNED(pa, pgsize) && IS_P2ALIGNED(va, pgsize) && 310 s >= pgsize) { 311 kbm_map(va, pa, level, is_kernel); 312 va += pgsize; 313 pa += pgsize; 314 s -= pgsize; 315 } 316 } 317 318 /* 319 * Map remaining pages use small mappings 320 */ 321 level = 0; 322 pgsize = MMU_PAGESIZE; 323 while (s > 0) { 324 kbm_map(va, pa, level, is_kernel); 325 va += pgsize; 326 pa += pgsize; 327 s -= pgsize; 328 } 329 return (virthint); 330 } 331 332 /* 333 * Free virtual memory - we'll just ignore these. 334 */ 335 /*ARGSUSED*/ 336 static void 337 do_bsys_free(bootops_t *bop, caddr_t virt, size_t size) 338 { 339 bop_printf(NULL, "do_bsys_free(virt=0x%p, size=0x%lx) ignored\n", 340 (void *)virt, size); 341 } 342 343 /* 344 * Old interface 345 */ 346 /*ARGSUSED*/ 347 static caddr_t 348 do_bsys_ealloc( 349 bootops_t *bop, 350 caddr_t virthint, 351 size_t size, 352 int align, 353 int flags) 354 { 355 prom_panic("unsupported call to BOP_EALLOC()\n"); 356 return (0); 357 } 358 359 360 static void 361 bsetprop(char *name, int nlen, void *value, int vlen) 362 { 363 uint_t size; 364 uint_t need_size; 365 bootprop_t *b; 366 367 /* 368 * align the size to 16 byte boundary 369 */ 370 size = sizeof (bootprop_t) + nlen + 1 + vlen; 371 size = (size + 0xf) & ~0xf; 372 if (size > curr_space) { 373 need_size = (size + (MMU_PAGEOFFSET)) & MMU_PAGEMASK; 374 curr_page = do_bsys_alloc(NULL, 0, need_size, MMU_PAGESIZE); 375 curr_space = need_size; 376 } 377 378 /* 379 * use a bootprop_t at curr_page and link into list 380 */ 381 b = (bootprop_t *)curr_page; 382 curr_page += sizeof (bootprop_t); 383 curr_space -= sizeof (bootprop_t); 384 b->bp_next = bprops; 385 bprops = b; 386 387 /* 388 * follow by name and ending zero byte 389 */ 390 b->bp_name = curr_page; 391 bcopy(name, curr_page, nlen); 392 curr_page += nlen; 393 *curr_page++ = 0; 394 curr_space -= nlen + 1; 395 396 /* 397 * copy in value, but no ending zero byte 398 */ 399 b->bp_value = curr_page; 400 b->bp_vlen = vlen; 401 if (vlen > 0) { 402 bcopy(value, curr_page, vlen); 403 curr_page += vlen; 404 curr_space -= vlen; 405 } 406 407 /* 408 * align new values of curr_page, curr_space 409 */ 410 while (curr_space & 0xf) { 411 ++curr_page; 412 --curr_space; 413 } 414 } 415 416 static void 417 bsetprops(char *name, char *value) 418 { 419 bsetprop(name, strlen(name), value, strlen(value) + 1); 420 } 421 422 static void 423 bsetprop64(char *name, uint64_t value) 424 { 425 bsetprop(name, strlen(name), (void *)&value, sizeof (value)); 426 } 427 428 static void 429 bsetpropsi(char *name, int value) 430 { 431 char prop_val[32]; 432 433 (void) snprintf(prop_val, sizeof (prop_val), "%d", value); 434 bsetprops(name, prop_val); 435 } 436 437 /* 438 * to find the size of the buffer to allocate 439 */ 440 /*ARGSUSED*/ 441 int 442 do_bsys_getproplen(bootops_t *bop, const char *name) 443 { 444 bootprop_t *b; 445 446 for (b = bprops; b; b = b->bp_next) { 447 if (strcmp(name, b->bp_name) != 0) 448 continue; 449 return (b->bp_vlen); 450 } 451 return (-1); 452 } 453 454 /* 455 * get the value associated with this name 456 */ 457 /*ARGSUSED*/ 458 int 459 do_bsys_getprop(bootops_t *bop, const char *name, void *value) 460 { 461 bootprop_t *b; 462 463 for (b = bprops; b; b = b->bp_next) { 464 if (strcmp(name, b->bp_name) != 0) 465 continue; 466 bcopy(b->bp_value, value, b->bp_vlen); 467 return (0); 468 } 469 return (-1); 470 } 471 472 /* 473 * get the name of the next property in succession from the standalone 474 */ 475 /*ARGSUSED*/ 476 static char * 477 do_bsys_nextprop(bootops_t *bop, char *name) 478 { 479 bootprop_t *b; 480 481 /* 482 * A null name is a special signal for the 1st boot property 483 */ 484 if (name == NULL || strlen(name) == 0) { 485 if (bprops == NULL) 486 return (NULL); 487 return (bprops->bp_name); 488 } 489 490 for (b = bprops; b; b = b->bp_next) { 491 if (name != b->bp_name) 492 continue; 493 b = b->bp_next; 494 if (b == NULL) 495 return (NULL); 496 return (b->bp_name); 497 } 498 return (NULL); 499 } 500 501 /* 502 * Parse numeric value from a string. Understands decimal, hex, octal, - and ~ 503 */ 504 static int 505 parse_value(char *p, uint64_t *retval) 506 { 507 int adjust = 0; 508 uint64_t tmp = 0; 509 int digit; 510 int radix = 10; 511 512 *retval = 0; 513 if (*p == '-' || *p == '~') 514 adjust = *p++; 515 516 if (*p == '0') { 517 ++p; 518 if (*p == 0) 519 return (0); 520 if (*p == 'x' || *p == 'X') { 521 radix = 16; 522 ++p; 523 } else { 524 radix = 8; 525 ++p; 526 } 527 } 528 while (*p) { 529 if ('0' <= *p && *p <= '9') 530 digit = *p - '0'; 531 else if ('a' <= *p && *p <= 'f') 532 digit = 10 + *p - 'a'; 533 else if ('A' <= *p && *p <= 'F') 534 digit = 10 + *p - 'A'; 535 else 536 return (-1); 537 if (digit >= radix) 538 return (-1); 539 tmp = tmp * radix + digit; 540 ++p; 541 } 542 if (adjust == '-') 543 tmp = -tmp; 544 else if (adjust == '~') 545 tmp = ~tmp; 546 *retval = tmp; 547 return (0); 548 } 549 550 /* 551 * 2nd part of building the table of boot properties. This includes: 552 * - values from /boot/solaris/bootenv.rc (ie. eeprom(1m) values) 553 * 554 * lines look like one of: 555 * ^$ 556 * ^# comment till end of line 557 * setprop name 'value' 558 * setprop name value 559 * setprop name "value" 560 * 561 * we do single character I/O since this is really just looking at memory 562 */ 563 void 564 boot_prop_finish(void) 565 { 566 int fd; 567 char *line; 568 int c; 569 int bytes_read; 570 char *name; 571 int n_len; 572 char *value; 573 int v_len; 574 char *inputdev; /* these override the command line if serial ports */ 575 char *outputdev; 576 char *consoledev; 577 uint64_t lvalue; 578 int use_xencons = 0; 579 580 #ifdef __xpv 581 if (!DOMAIN_IS_INITDOMAIN(xen_info)) 582 use_xencons = 1; 583 #endif /* __xpv */ 584 585 DBG_MSG("Opening /boot/solaris/bootenv.rc\n"); 586 fd = BRD_OPEN(bfs_ops, "/boot/solaris/bootenv.rc", 0); 587 DBG(fd); 588 589 line = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE); 590 while (fd >= 0) { 591 592 /* 593 * get a line 594 */ 595 for (c = 0; ; ++c) { 596 bytes_read = BRD_READ(bfs_ops, fd, line + c, 1); 597 if (bytes_read == 0) { 598 if (c == 0) 599 goto done; 600 break; 601 } 602 if (line[c] == '\n') 603 break; 604 } 605 line[c] = 0; 606 607 /* 608 * ignore comment lines 609 */ 610 c = 0; 611 while (ISSPACE(line[c])) 612 ++c; 613 if (line[c] == '#' || line[c] == 0) 614 continue; 615 616 /* 617 * must have "setprop " or "setprop\t" 618 */ 619 if (strncmp(line + c, "setprop ", 8) != 0 && 620 strncmp(line + c, "setprop\t", 8) != 0) 621 continue; 622 c += 8; 623 while (ISSPACE(line[c])) 624 ++c; 625 if (line[c] == 0) 626 continue; 627 628 /* 629 * gather up the property name 630 */ 631 name = line + c; 632 n_len = 0; 633 while (line[c] && !ISSPACE(line[c])) 634 ++n_len, ++c; 635 636 /* 637 * gather up the value, if any 638 */ 639 value = ""; 640 v_len = 0; 641 while (ISSPACE(line[c])) 642 ++c; 643 if (line[c] != 0) { 644 value = line + c; 645 while (line[c] && !ISSPACE(line[c])) 646 ++v_len, ++c; 647 } 648 649 if (v_len >= 2 && value[0] == value[v_len - 1] && 650 (value[0] == '\'' || value[0] == '"')) { 651 ++value; 652 v_len -= 2; 653 } 654 name[n_len] = 0; 655 if (v_len > 0) 656 value[v_len] = 0; 657 else 658 continue; 659 660 /* 661 * ignore "boot-file" property, it's now meaningless 662 */ 663 if (strcmp(name, "boot-file") == 0) 664 continue; 665 if (strcmp(name, "boot-args") == 0 && 666 strlen(boot_args) > 0) 667 continue; 668 669 /* 670 * If a property was explicitly set on the command line 671 * it will override a setting in bootenv.rc 672 */ 673 if (do_bsys_getproplen(NULL, name) > 0) 674 continue; 675 676 bsetprop(name, n_len, value, v_len + 1); 677 } 678 done: 679 if (fd >= 0) 680 (void) BRD_CLOSE(bfs_ops, fd); 681 682 /* 683 * Check if we have to limit the boot time allocator 684 */ 685 if (do_bsys_getproplen(NULL, "physmem") != -1 && 686 do_bsys_getprop(NULL, "physmem", line) >= 0 && 687 parse_value(line, &lvalue) != -1) { 688 if (0 < lvalue && (lvalue < physmem || physmem == 0)) { 689 physmem = (pgcnt_t)lvalue; 690 DBG(physmem); 691 } 692 } 693 early_allocation = 0; 694 695 /* 696 * check to see if we have to override the default value of the console 697 */ 698 if (!use_xencons) { 699 inputdev = line; 700 v_len = do_bsys_getproplen(NULL, "input-device"); 701 if (v_len > 0) 702 (void) do_bsys_getprop(NULL, "input-device", inputdev); 703 else 704 v_len = 0; 705 inputdev[v_len] = 0; 706 707 outputdev = inputdev + v_len + 1; 708 v_len = do_bsys_getproplen(NULL, "output-device"); 709 if (v_len > 0) 710 (void) do_bsys_getprop(NULL, "output-device", 711 outputdev); 712 else 713 v_len = 0; 714 outputdev[v_len] = 0; 715 716 consoledev = outputdev + v_len + 1; 717 v_len = do_bsys_getproplen(NULL, "console"); 718 if (v_len > 0) { 719 (void) do_bsys_getprop(NULL, "console", consoledev); 720 if (post_fastreboot && 721 strcmp(consoledev, "graphics") == 0) { 722 bsetprops("console", "text"); 723 v_len = strlen("text"); 724 bcopy("text", consoledev, v_len); 725 } 726 } else { 727 v_len = 0; 728 } 729 consoledev[v_len] = 0; 730 bcons_init2(inputdev, outputdev, consoledev); 731 } else { 732 /* 733 * Ensure console property exists 734 * If not create it as "hypervisor" 735 */ 736 v_len = do_bsys_getproplen(NULL, "console"); 737 if (v_len < 0) 738 bsetprops("console", "hypervisor"); 739 inputdev = outputdev = consoledev = "hypervisor"; 740 bcons_init2(inputdev, outputdev, consoledev); 741 } 742 743 if (strstr((char *)xbootp->bi_cmdline, "prom_debug") || kbm_debug) { 744 value = line; 745 bop_printf(NULL, "\nBoot properties:\n"); 746 name = ""; 747 while ((name = do_bsys_nextprop(NULL, name)) != NULL) { 748 bop_printf(NULL, "\t0x%p %s = ", (void *)name, name); 749 (void) do_bsys_getprop(NULL, name, value); 750 v_len = do_bsys_getproplen(NULL, name); 751 bop_printf(NULL, "len=%d ", v_len); 752 value[v_len] = 0; 753 bop_printf(NULL, "%s\n", value); 754 } 755 } 756 } 757 758 /* 759 * print formatted output 760 */ 761 /*PRINTFLIKE2*/ 762 /*ARGSUSED*/ 763 void 764 bop_printf(bootops_t *bop, const char *fmt, ...) 765 { 766 va_list ap; 767 768 if (have_console == 0) 769 return; 770 771 va_start(ap, fmt); 772 (void) vsnprintf(buffer, BUFFERSIZE, fmt, ap); 773 va_end(ap); 774 PUT_STRING(buffer); 775 } 776 777 /* 778 * Another panic() variant; this one can be used even earlier during boot than 779 * prom_panic(). 780 */ 781 /*PRINTFLIKE1*/ 782 void 783 bop_panic(const char *fmt, ...) 784 { 785 va_list ap; 786 787 va_start(ap, fmt); 788 bop_printf(NULL, fmt, ap); 789 va_end(ap); 790 791 bop_printf(NULL, "\nPress any key to reboot.\n"); 792 (void) bcons_getchar(); 793 bop_printf(NULL, "Resetting...\n"); 794 pc_reset(); 795 } 796 797 /* 798 * Do a real mode interrupt BIOS call 799 */ 800 typedef struct bios_regs { 801 unsigned short ax, bx, cx, dx, si, di, bp, es, ds; 802 } bios_regs_t; 803 typedef int (*bios_func_t)(int, bios_regs_t *); 804 805 /*ARGSUSED*/ 806 static void 807 do_bsys_doint(bootops_t *bop, int intnum, struct bop_regs *rp) 808 { 809 #if defined(__xpv) 810 prom_panic("unsupported call to BOP_DOINT()\n"); 811 #else /* __xpv */ 812 static int firsttime = 1; 813 bios_func_t bios_func = (bios_func_t)(void *)(uintptr_t)0x5000; 814 bios_regs_t br; 815 816 /* 817 * The first time we do this, we have to copy the pre-packaged 818 * low memory bios call code image into place. 819 */ 820 if (firsttime) { 821 extern char bios_image[]; 822 extern uint32_t bios_size; 823 824 bcopy(bios_image, (void *)bios_func, bios_size); 825 firsttime = 0; 826 } 827 828 br.ax = rp->eax.word.ax; 829 br.bx = rp->ebx.word.bx; 830 br.cx = rp->ecx.word.cx; 831 br.dx = rp->edx.word.dx; 832 br.bp = rp->ebp.word.bp; 833 br.si = rp->esi.word.si; 834 br.di = rp->edi.word.di; 835 br.ds = rp->ds; 836 br.es = rp->es; 837 838 DBG_MSG("Doing BIOS call..."); 839 DBG(br.ax); 840 DBG(br.bx); 841 DBG(br.dx); 842 rp->eflags = bios_func(intnum, &br); 843 DBG_MSG("done\n"); 844 845 rp->eax.word.ax = br.ax; 846 rp->ebx.word.bx = br.bx; 847 rp->ecx.word.cx = br.cx; 848 rp->edx.word.dx = br.dx; 849 rp->ebp.word.bp = br.bp; 850 rp->esi.word.si = br.si; 851 rp->edi.word.di = br.di; 852 rp->ds = br.ds; 853 rp->es = br.es; 854 #endif /* __xpv */ 855 } 856 857 static struct boot_syscalls bop_sysp = { 858 bcons_getchar, 859 bcons_putchar, 860 bcons_ischar, 861 }; 862 863 static char *whoami; 864 865 #define BUFLEN 64 866 867 #if defined(__xpv) 868 869 static char namebuf[32]; 870 871 static void 872 xen_parse_props(char *s, char *prop_map[], int n_prop) 873 { 874 char **prop_name = prop_map; 875 char *cp = s, *scp; 876 877 do { 878 scp = cp; 879 while ((*cp != NULL) && (*cp != ':')) 880 cp++; 881 882 if ((scp != cp) && (*prop_name != NULL)) { 883 *cp = NULL; 884 bsetprops(*prop_name, scp); 885 } 886 887 cp++; 888 prop_name++; 889 n_prop--; 890 } while (n_prop > 0); 891 } 892 893 #define VBDPATHLEN 64 894 895 /* 896 * parse the 'xpv-root' property to create properties used by 897 * ufs_mountroot. 898 */ 899 static void 900 xen_vbdroot_props(char *s) 901 { 902 char vbdpath[VBDPATHLEN] = "/xpvd/xdf@"; 903 const char lnamefix[] = "/dev/dsk/c0d"; 904 char *pnp; 905 char *prop_p; 906 char mi; 907 short minor; 908 long addr = 0; 909 910 pnp = vbdpath + strlen(vbdpath); 911 prop_p = s + strlen(lnamefix); 912 while ((*prop_p != '\0') && (*prop_p != 's') && (*prop_p != 'p')) 913 addr = addr * 10 + *prop_p++ - '0'; 914 (void) snprintf(pnp, VBDPATHLEN, "%lx", addr); 915 pnp = vbdpath + strlen(vbdpath); 916 if (*prop_p == 's') 917 mi = 'a'; 918 else if (*prop_p == 'p') 919 mi = 'q'; 920 else 921 ASSERT(0); /* shouldn't be here */ 922 prop_p++; 923 ASSERT(*prop_p != '\0'); 924 if (ISDIGIT(*prop_p)) { 925 minor = *prop_p - '0'; 926 prop_p++; 927 if (ISDIGIT(*prop_p)) { 928 minor = minor * 10 + *prop_p - '0'; 929 } 930 } else { 931 /* malformed root path, use 0 as default */ 932 minor = 0; 933 } 934 ASSERT(minor < 16); /* at most 16 partitions */ 935 mi += minor; 936 *pnp++ = ':'; 937 *pnp++ = mi; 938 *pnp++ = '\0'; 939 bsetprops("fstype", "ufs"); 940 bsetprops("bootpath", vbdpath); 941 942 DBG_MSG("VBD bootpath set to "); 943 DBG_MSG(vbdpath); 944 DBG_MSG("\n"); 945 } 946 947 /* 948 * parse the xpv-nfsroot property to create properties used by 949 * nfs_mountroot. 950 */ 951 static void 952 xen_nfsroot_props(char *s) 953 { 954 char *prop_map[] = { 955 BP_SERVER_IP, /* server IP address */ 956 BP_SERVER_NAME, /* server hostname */ 957 BP_SERVER_PATH, /* root path */ 958 }; 959 int n_prop = sizeof (prop_map) / sizeof (prop_map[0]); 960 961 bsetprop("fstype", 6, "nfs", 4); 962 963 xen_parse_props(s, prop_map, n_prop); 964 965 /* 966 * If a server name wasn't specified, use a default. 967 */ 968 if (do_bsys_getproplen(NULL, BP_SERVER_NAME) == -1) 969 bsetprops(BP_SERVER_NAME, "unknown"); 970 } 971 972 /* 973 * Extract our IP address, etc. from the "xpv-ip" property. 974 */ 975 static void 976 xen_ip_props(char *s) 977 { 978 char *prop_map[] = { 979 BP_HOST_IP, /* IP address */ 980 NULL, /* NFS server IP address (ignored in */ 981 /* favour of xpv-nfsroot) */ 982 BP_ROUTER_IP, /* IP gateway */ 983 BP_SUBNET_MASK, /* IP subnet mask */ 984 "xpv-hostname", /* hostname (ignored) */ 985 BP_NETWORK_INTERFACE, /* interface name */ 986 "xpv-hcp", /* host configuration protocol */ 987 }; 988 int n_prop = sizeof (prop_map) / sizeof (prop_map[0]); 989 char ifname[IFNAMSIZ]; 990 991 xen_parse_props(s, prop_map, n_prop); 992 993 /* 994 * A Linux dom0 administrator expects all interfaces to be 995 * called "ethX", which is not the case here. 996 * 997 * If the interface name specified is "eth0", presume that 998 * this is really intended to be "xnf0" (the first domU -> 999 * dom0 interface for this domain). 1000 */ 1001 if ((do_bsys_getprop(NULL, BP_NETWORK_INTERFACE, ifname) == 0) && 1002 (strcmp("eth0", ifname) == 0)) { 1003 bsetprops(BP_NETWORK_INTERFACE, "xnf0"); 1004 bop_printf(NULL, 1005 "network interface name 'eth0' replaced with 'xnf0'\n"); 1006 } 1007 } 1008 1009 #else /* __xpv */ 1010 1011 static void 1012 setup_rarp_props(struct sol_netinfo *sip) 1013 { 1014 char buf[BUFLEN]; /* to hold ip/mac addrs */ 1015 uint8_t *val; 1016 1017 val = (uint8_t *)&sip->sn_ciaddr; 1018 (void) snprintf(buf, BUFLEN, "%d.%d.%d.%d", 1019 val[0], val[1], val[2], val[3]); 1020 bsetprops(BP_HOST_IP, buf); 1021 1022 val = (uint8_t *)&sip->sn_siaddr; 1023 (void) snprintf(buf, BUFLEN, "%d.%d.%d.%d", 1024 val[0], val[1], val[2], val[3]); 1025 bsetprops(BP_SERVER_IP, buf); 1026 1027 if (sip->sn_giaddr != 0) { 1028 val = (uint8_t *)&sip->sn_giaddr; 1029 (void) snprintf(buf, BUFLEN, "%d.%d.%d.%d", 1030 val[0], val[1], val[2], val[3]); 1031 bsetprops(BP_ROUTER_IP, buf); 1032 } 1033 1034 if (sip->sn_netmask != 0) { 1035 val = (uint8_t *)&sip->sn_netmask; 1036 (void) snprintf(buf, BUFLEN, "%d.%d.%d.%d", 1037 val[0], val[1], val[2], val[3]); 1038 bsetprops(BP_SUBNET_MASK, buf); 1039 } 1040 1041 if (sip->sn_mactype != 4 || sip->sn_maclen != 6) { 1042 bop_printf(NULL, "unsupported mac type %d, mac len %d\n", 1043 sip->sn_mactype, sip->sn_maclen); 1044 } else { 1045 val = sip->sn_macaddr; 1046 (void) snprintf(buf, BUFLEN, "%x:%x:%x:%x:%x:%x", 1047 val[0], val[1], val[2], val[3], val[4], val[5]); 1048 bsetprops(BP_BOOT_MAC, buf); 1049 } 1050 } 1051 1052 #endif /* __xpv */ 1053 1054 static void 1055 build_panic_cmdline(const char *cmd, int cmdlen) 1056 { 1057 int proplen; 1058 size_t arglen; 1059 1060 arglen = sizeof (fastreboot_onpanic_args); 1061 /* 1062 * If we allready have fastreboot-onpanic set to zero, 1063 * don't add them again. 1064 */ 1065 if ((proplen = do_bsys_getproplen(NULL, FASTREBOOT_ONPANIC)) > 0 && 1066 proplen <= sizeof (fastreboot_onpanic_cmdline)) { 1067 (void) do_bsys_getprop(NULL, FASTREBOOT_ONPANIC, 1068 fastreboot_onpanic_cmdline); 1069 if (FASTREBOOT_ONPANIC_NOTSET(fastreboot_onpanic_cmdline)) 1070 arglen = 1; 1071 } 1072 1073 /* 1074 * construct fastreboot_onpanic_cmdline 1075 */ 1076 if (cmdlen + arglen > sizeof (fastreboot_onpanic_cmdline)) { 1077 DBG_MSG("Command line too long: clearing " 1078 FASTREBOOT_ONPANIC "\n"); 1079 fastreboot_onpanic = 0; 1080 } else { 1081 bcopy(cmd, fastreboot_onpanic_cmdline, cmdlen); 1082 if (arglen != 1) 1083 bcopy(fastreboot_onpanic_args, 1084 fastreboot_onpanic_cmdline + cmdlen, arglen); 1085 else 1086 fastreboot_onpanic_cmdline[cmdlen] = 0; 1087 } 1088 } 1089 1090 1091 #ifndef __xpv 1092 /* 1093 * Construct boot command line for Fast Reboot 1094 */ 1095 static void 1096 build_fastboot_cmdline(void) 1097 { 1098 saved_cmdline_len = strlen(xbootp->bi_cmdline) + 1; 1099 if (saved_cmdline_len > FASTBOOT_SAVED_CMDLINE_LEN) { 1100 DBG(saved_cmdline_len); 1101 DBG_MSG("Command line too long: clearing fastreboot_capable\n"); 1102 fastreboot_capable = 0; 1103 } else { 1104 bcopy((void *)(xbootp->bi_cmdline), (void *)saved_cmdline, 1105 saved_cmdline_len); 1106 saved_cmdline[saved_cmdline_len - 1] = '\0'; 1107 build_panic_cmdline(saved_cmdline, saved_cmdline_len - 1); 1108 } 1109 } 1110 1111 /* 1112 * Save memory layout, disk drive information, unix and boot archive sizes for 1113 * Fast Reboot. 1114 */ 1115 static void 1116 save_boot_info(multiboot_info_t *mbi, struct xboot_info *xbi) 1117 { 1118 struct boot_modules *modp; 1119 int i; 1120 1121 bcopy(mbi, &saved_mbi, sizeof (multiboot_info_t)); 1122 if (mbi->mmap_length > sizeof (saved_mmap)) { 1123 DBG_MSG("mbi->mmap_length too big: clearing " 1124 "fastreboot_capable\n"); 1125 fastreboot_capable = 0; 1126 } else { 1127 bcopy((void *)(uintptr_t)mbi->mmap_addr, (void *)saved_mmap, 1128 mbi->mmap_length); 1129 } 1130 1131 if ((mbi->flags & MB_INFO_DRIVE_INFO) != 0) { 1132 if (mbi->drives_length > sizeof (saved_drives)) { 1133 DBG(mbi->drives_length); 1134 DBG_MSG("mbi->drives_length too big: clearing " 1135 "fastreboot_capable\n"); 1136 fastreboot_capable = 0; 1137 } else { 1138 bcopy((void *)(uintptr_t)mbi->drives_addr, 1139 (void *)saved_drives, mbi->drives_length); 1140 } 1141 } else { 1142 saved_mbi.drives_length = 0; 1143 saved_mbi.drives_addr = NULL; 1144 } 1145 1146 /* 1147 * Current file sizes. Used by fastboot.c to figure out how much 1148 * memory to reserve for panic reboot. 1149 * Use the module list from the dboot-constructed xboot_info 1150 * instead of the list referenced by the multiboot structure 1151 * because that structure may not be addressable now. 1152 */ 1153 saved_file_size[FASTBOOT_NAME_UNIX] = FOUR_MEG - PAGESIZE; 1154 for (i = 0, modp = (struct boot_modules *)(uintptr_t)xbi->bi_modules; 1155 i < xbi->bi_module_cnt; i++, modp++) { 1156 saved_file_size[FASTBOOT_NAME_BOOTARCHIVE] += modp->bm_size; 1157 } 1158 } 1159 #endif /* __xpv */ 1160 1161 1162 /* 1163 * 1st pass at building the table of boot properties. This includes: 1164 * - values set on the command line: -B a=x,b=y,c=z .... 1165 * - known values we just compute (ie. from xbootp) 1166 * - values from /boot/solaris/bootenv.rc (ie. eeprom(1m) values) 1167 * 1168 * the grub command line looked like: 1169 * kernel boot-file [-B prop=value[,prop=value]...] [boot-args] 1170 * 1171 * whoami is the same as boot-file 1172 */ 1173 static void 1174 build_boot_properties(void) 1175 { 1176 char *name; 1177 int name_len; 1178 char *value; 1179 int value_len; 1180 struct boot_modules *bm, *rdbm; 1181 char *propbuf; 1182 int quoted = 0; 1183 int boot_arg_len; 1184 uint_t i, midx; 1185 char modid[32]; 1186 #ifndef __xpv 1187 static int stdout_val = 0; 1188 uchar_t boot_device; 1189 char str[3]; 1190 multiboot_info_t *mbi; 1191 int netboot; 1192 struct sol_netinfo *sip; 1193 #endif 1194 1195 /* 1196 * These have to be done first, so that kobj_mount_root() works 1197 */ 1198 DBG_MSG("Building boot properties\n"); 1199 propbuf = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, 0); 1200 DBG((uintptr_t)propbuf); 1201 if (xbootp->bi_module_cnt > 0) { 1202 bm = xbootp->bi_modules; 1203 rdbm = NULL; 1204 for (midx = i = 0; i < xbootp->bi_module_cnt; i++) { 1205 if (bm[i].bm_type == BMT_ROOTFS) { 1206 rdbm = &bm[i]; 1207 continue; 1208 } 1209 if (bm[i].bm_type == BMT_HASH || bm[i].bm_name == NULL) 1210 continue; 1211 1212 (void) snprintf(modid, sizeof (modid), 1213 "module-name-%u", midx); 1214 bsetprops(modid, (char *)bm[i].bm_name); 1215 (void) snprintf(modid, sizeof (modid), 1216 "module-addr-%u", midx); 1217 bsetprop64(modid, (uint64_t)(uintptr_t)bm[i].bm_addr); 1218 (void) snprintf(modid, sizeof (modid), 1219 "module-size-%u", midx); 1220 bsetprop64(modid, (uint64_t)bm[i].bm_size); 1221 ++midx; 1222 } 1223 if (rdbm != NULL) { 1224 bsetprop64("ramdisk_start", 1225 (uint64_t)(uintptr_t)rdbm->bm_addr); 1226 bsetprop64("ramdisk_end", 1227 (uint64_t)(uintptr_t)rdbm->bm_addr + rdbm->bm_size); 1228 } 1229 } 1230 1231 /* 1232 * If there are any boot time modules or hashes present, then disable 1233 * fast reboot. 1234 */ 1235 if (xbootp->bi_module_cnt > 1) { 1236 fastreboot_disable(FBNS_BOOTMOD); 1237 } 1238 1239 DBG_MSG("Parsing command line for boot properties\n"); 1240 value = xbootp->bi_cmdline; 1241 1242 /* 1243 * allocate memory to collect boot_args into 1244 */ 1245 boot_arg_len = strlen(xbootp->bi_cmdline) + 1; 1246 boot_args = do_bsys_alloc(NULL, NULL, boot_arg_len, MMU_PAGESIZE); 1247 boot_args[0] = 0; 1248 boot_arg_len = 0; 1249 1250 #ifdef __xpv 1251 /* 1252 * Xen puts a lot of device information in front of the kernel name 1253 * let's grab them and make them boot properties. The first 1254 * string w/o an "=" in it will be the boot-file property. 1255 */ 1256 (void) strcpy(namebuf, "xpv-"); 1257 for (;;) { 1258 /* 1259 * get to next property 1260 */ 1261 while (ISSPACE(*value)) 1262 ++value; 1263 name = value; 1264 /* 1265 * look for an "=" 1266 */ 1267 while (*value && !ISSPACE(*value) && *value != '=') { 1268 value++; 1269 } 1270 if (*value != '=') { /* no "=" in the property */ 1271 value = name; 1272 break; 1273 } 1274 name_len = value - name; 1275 value_len = 0; 1276 /* 1277 * skip over the "=" 1278 */ 1279 value++; 1280 while (value[value_len] && !ISSPACE(value[value_len])) { 1281 ++value_len; 1282 } 1283 /* 1284 * build property name with "xpv-" prefix 1285 */ 1286 if (name_len + 4 > 32) { /* skip if name too long */ 1287 value += value_len; 1288 continue; 1289 } 1290 bcopy(name, &namebuf[4], name_len); 1291 name_len += 4; 1292 namebuf[name_len] = 0; 1293 bcopy(value, propbuf, value_len); 1294 propbuf[value_len] = 0; 1295 bsetprops(namebuf, propbuf); 1296 1297 /* 1298 * xpv-root is set to the logical disk name of the xen 1299 * VBD when booting from a disk-based filesystem. 1300 */ 1301 if (strcmp(namebuf, "xpv-root") == 0) 1302 xen_vbdroot_props(propbuf); 1303 /* 1304 * While we're here, if we have a "xpv-nfsroot" property 1305 * then we need to set "fstype" to "nfs" so we mount 1306 * our root from the nfs server. Also parse the xpv-nfsroot 1307 * property to create the properties that nfs_mountroot will 1308 * need to find the root and mount it. 1309 */ 1310 if (strcmp(namebuf, "xpv-nfsroot") == 0) 1311 xen_nfsroot_props(propbuf); 1312 1313 if (strcmp(namebuf, "xpv-ip") == 0) 1314 xen_ip_props(propbuf); 1315 value += value_len; 1316 } 1317 #endif 1318 1319 while (ISSPACE(*value)) 1320 ++value; 1321 /* 1322 * value now points at the boot-file 1323 */ 1324 value_len = 0; 1325 while (value[value_len] && !ISSPACE(value[value_len])) 1326 ++value_len; 1327 if (value_len > 0) { 1328 whoami = propbuf; 1329 bcopy(value, whoami, value_len); 1330 whoami[value_len] = 0; 1331 bsetprops("boot-file", whoami); 1332 /* 1333 * strip leading path stuff from whoami, so running from 1334 * PXE/miniroot makes sense. 1335 */ 1336 if (strstr(whoami, "/platform/") != NULL) 1337 whoami = strstr(whoami, "/platform/"); 1338 bsetprops("whoami", whoami); 1339 } 1340 1341 /* 1342 * Values forcibly set boot properties on the command line via -B. 1343 * Allow use of quotes in values. Other stuff goes on kernel 1344 * command line. 1345 */ 1346 name = value + value_len; 1347 while (*name != 0) { 1348 /* 1349 * anything not " -B" is copied to the command line 1350 */ 1351 if (!ISSPACE(name[0]) || name[1] != '-' || name[2] != 'B') { 1352 boot_args[boot_arg_len++] = *name; 1353 boot_args[boot_arg_len] = 0; 1354 ++name; 1355 continue; 1356 } 1357 1358 /* 1359 * skip the " -B" and following white space 1360 */ 1361 name += 3; 1362 while (ISSPACE(*name)) 1363 ++name; 1364 while (*name && !ISSPACE(*name)) { 1365 value = strstr(name, "="); 1366 if (value == NULL) 1367 break; 1368 name_len = value - name; 1369 ++value; 1370 value_len = 0; 1371 quoted = 0; 1372 for (; ; ++value_len) { 1373 if (!value[value_len]) 1374 break; 1375 1376 /* 1377 * is this value quoted? 1378 */ 1379 if (value_len == 0 && 1380 (value[0] == '\'' || value[0] == '"')) { 1381 quoted = value[0]; 1382 ++value_len; 1383 } 1384 1385 /* 1386 * In the quote accept any character, 1387 * but look for ending quote. 1388 */ 1389 if (quoted) { 1390 if (value[value_len] == quoted) 1391 quoted = 0; 1392 continue; 1393 } 1394 1395 /* 1396 * a comma or white space ends the value 1397 */ 1398 if (value[value_len] == ',' || 1399 ISSPACE(value[value_len])) 1400 break; 1401 } 1402 1403 if (value_len == 0) { 1404 bsetprop(name, name_len, "true", 5); 1405 } else { 1406 char *v = value; 1407 int l = value_len; 1408 if (v[0] == v[l - 1] && 1409 (v[0] == '\'' || v[0] == '"')) { 1410 ++v; 1411 l -= 2; 1412 } 1413 bcopy(v, propbuf, l); 1414 propbuf[l] = '\0'; 1415 bsetprop(name, name_len, propbuf, 1416 l + 1); 1417 } 1418 name = value + value_len; 1419 while (*name == ',') 1420 ++name; 1421 } 1422 } 1423 1424 /* 1425 * set boot-args property 1426 * 1275 name is bootargs, so set 1427 * that too 1428 */ 1429 bsetprops("boot-args", boot_args); 1430 bsetprops("bootargs", boot_args); 1431 1432 #ifndef __xpv 1433 /* 1434 * set the BIOS boot device from GRUB 1435 */ 1436 netboot = 0; 1437 mbi = xbootp->bi_mb_info; 1438 1439 /* 1440 * Build boot command line for Fast Reboot 1441 */ 1442 build_fastboot_cmdline(); 1443 1444 /* 1445 * Save various boot information for Fast Reboot 1446 */ 1447 save_boot_info(mbi, xbootp); 1448 1449 if (mbi != NULL && mbi->flags & MB_INFO_BOOTDEV) { 1450 boot_device = mbi->boot_device >> 24; 1451 if (boot_device == 0x20) 1452 netboot++; 1453 str[0] = (boot_device >> 4) + '0'; 1454 str[1] = (boot_device & 0xf) + '0'; 1455 str[2] = 0; 1456 bsetprops("bios-boot-device", str); 1457 } else { 1458 netboot = 1; 1459 } 1460 1461 /* 1462 * In the netboot case, drives_info is overloaded with the dhcp ack. 1463 * This is not multiboot compliant and requires special pxegrub! 1464 */ 1465 if (netboot && mbi->drives_length != 0) { 1466 sip = (struct sol_netinfo *)(uintptr_t)mbi->drives_addr; 1467 if (sip->sn_infotype == SN_TYPE_BOOTP) 1468 bsetprop("bootp-response", sizeof ("bootp-response"), 1469 (void *)(uintptr_t)mbi->drives_addr, 1470 mbi->drives_length); 1471 else if (sip->sn_infotype == SN_TYPE_RARP) 1472 setup_rarp_props(sip); 1473 } 1474 bsetprop("stdout", strlen("stdout"), 1475 &stdout_val, sizeof (stdout_val)); 1476 #endif /* __xpv */ 1477 1478 /* 1479 * more conjured up values for made up things.... 1480 */ 1481 #if defined(__xpv) 1482 bsetprops("mfg-name", "i86xpv"); 1483 bsetprops("impl-arch-name", "i86xpv"); 1484 #else 1485 bsetprops("mfg-name", "i86pc"); 1486 bsetprops("impl-arch-name", "i86pc"); 1487 #endif 1488 1489 /* 1490 * Build firmware-provided system properties 1491 */ 1492 build_firmware_properties(); 1493 1494 /* 1495 * XXPV 1496 * 1497 * Find out what these are: 1498 * - cpuid_feature_ecx_include 1499 * - cpuid_feature_ecx_exclude 1500 * - cpuid_feature_edx_include 1501 * - cpuid_feature_edx_exclude 1502 * 1503 * Find out what these are in multiboot: 1504 * - netdev-path 1505 * - fstype 1506 */ 1507 } 1508 1509 #ifdef __xpv 1510 /* 1511 * Under the Hypervisor, memory usable for DMA may be scarce. One 1512 * very likely large pool of DMA friendly memory is occupied by 1513 * the boot_archive, as it was loaded by grub into low MFNs. 1514 * 1515 * Here we free up that memory by copying the boot archive to what are 1516 * likely higher MFN pages and then swapping the mfn/pfn mappings. 1517 */ 1518 #define PFN_2GIG 0x80000 1519 static void 1520 relocate_boot_archive(void) 1521 { 1522 mfn_t max_mfn = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL); 1523 struct boot_modules *bm = xbootp->bi_modules; 1524 uintptr_t va; 1525 pfn_t va_pfn; 1526 mfn_t va_mfn; 1527 caddr_t copy; 1528 pfn_t copy_pfn; 1529 mfn_t copy_mfn; 1530 size_t len; 1531 int slop; 1532 int total = 0; 1533 int relocated = 0; 1534 int mmu_update_return; 1535 mmu_update_t t[2]; 1536 x86pte_t pte; 1537 1538 /* 1539 * If all MFN's are below 2Gig, don't bother doing this. 1540 */ 1541 if (max_mfn < PFN_2GIG) 1542 return; 1543 if (xbootp->bi_module_cnt < 1) { 1544 DBG_MSG("no boot_archive!"); 1545 return; 1546 } 1547 1548 DBG_MSG("moving boot_archive to high MFN memory\n"); 1549 va = (uintptr_t)bm->bm_addr; 1550 len = bm->bm_size; 1551 slop = va & MMU_PAGEOFFSET; 1552 if (slop) { 1553 va += MMU_PAGESIZE - slop; 1554 len -= MMU_PAGESIZE - slop; 1555 } 1556 len = P2ALIGN(len, MMU_PAGESIZE); 1557 1558 /* 1559 * Go through all boot_archive pages, swapping any low MFN pages 1560 * with memory at next_phys. 1561 */ 1562 while (len != 0) { 1563 ++total; 1564 va_pfn = mmu_btop(va - ONE_GIG); 1565 va_mfn = mfn_list[va_pfn]; 1566 if (mfn_list[va_pfn] < PFN_2GIG) { 1567 copy = kbm_remap_window(next_phys, 1); 1568 bcopy((void *)va, copy, MMU_PAGESIZE); 1569 copy_pfn = mmu_btop(next_phys); 1570 copy_mfn = mfn_list[copy_pfn]; 1571 1572 pte = mfn_to_ma(copy_mfn) | PT_NOCONSIST | PT_VALID; 1573 if (HYPERVISOR_update_va_mapping(va, pte, 1574 UVMF_INVLPG | UVMF_LOCAL)) 1575 bop_panic("relocate_boot_archive(): " 1576 "HYPERVISOR_update_va_mapping() failed"); 1577 1578 mfn_list[va_pfn] = copy_mfn; 1579 mfn_list[copy_pfn] = va_mfn; 1580 1581 t[0].ptr = mfn_to_ma(copy_mfn) | MMU_MACHPHYS_UPDATE; 1582 t[0].val = va_pfn; 1583 t[1].ptr = mfn_to_ma(va_mfn) | MMU_MACHPHYS_UPDATE; 1584 t[1].val = copy_pfn; 1585 if (HYPERVISOR_mmu_update(t, 2, &mmu_update_return, 1586 DOMID_SELF) != 0 || mmu_update_return != 2) 1587 bop_panic("relocate_boot_archive(): " 1588 "HYPERVISOR_mmu_update() failed"); 1589 1590 next_phys += MMU_PAGESIZE; 1591 ++relocated; 1592 } 1593 len -= MMU_PAGESIZE; 1594 va += MMU_PAGESIZE; 1595 } 1596 DBG_MSG("Relocated pages:\n"); 1597 DBG(relocated); 1598 DBG_MSG("Out of total pages:\n"); 1599 DBG(total); 1600 } 1601 #endif /* __xpv */ 1602 1603 #if !defined(__xpv) 1604 /* 1605 * Install a temporary IDT that lets us catch errors in the boot time code. 1606 * We shouldn't get any faults at all while this is installed, so we'll 1607 * just generate a traceback and exit. 1608 */ 1609 #ifdef __amd64 1610 static const int bcode_sel = B64CODE_SEL; 1611 #else 1612 static const int bcode_sel = B32CODE_SEL; 1613 #endif 1614 1615 /* 1616 * simple description of a stack frame (args are 32 bit only currently) 1617 */ 1618 typedef struct bop_frame { 1619 struct bop_frame *old_frame; 1620 pc_t retaddr; 1621 long arg[1]; 1622 } bop_frame_t; 1623 1624 void 1625 bop_traceback(bop_frame_t *frame) 1626 { 1627 pc_t pc; 1628 int cnt; 1629 char *ksym; 1630 ulong_t off; 1631 #if defined(__i386) 1632 int a; 1633 #endif 1634 1635 bop_printf(NULL, "Stack traceback:\n"); 1636 for (cnt = 0; cnt < 30; ++cnt) { /* up to 30 frames */ 1637 pc = frame->retaddr; 1638 if (pc == 0) 1639 break; 1640 ksym = kobj_getsymname(pc, &off); 1641 if (ksym) 1642 bop_printf(NULL, " %s+%lx", ksym, off); 1643 else 1644 bop_printf(NULL, " 0x%lx", pc); 1645 1646 frame = frame->old_frame; 1647 if (frame == 0) { 1648 bop_printf(NULL, "\n"); 1649 break; 1650 } 1651 #if defined(__i386) 1652 for (a = 0; a < 6; ++a) { /* try for 6 args */ 1653 if ((void *)&frame->arg[a] == (void *)frame->old_frame) 1654 break; 1655 if (a == 0) 1656 bop_printf(NULL, "("); 1657 else 1658 bop_printf(NULL, ","); 1659 bop_printf(NULL, "0x%lx", frame->arg[a]); 1660 } 1661 bop_printf(NULL, ")"); 1662 #endif 1663 bop_printf(NULL, "\n"); 1664 } 1665 } 1666 1667 struct trapframe { 1668 ulong_t error_code; /* optional */ 1669 ulong_t inst_ptr; 1670 ulong_t code_seg; 1671 ulong_t flags_reg; 1672 #ifdef __amd64 1673 ulong_t stk_ptr; 1674 ulong_t stk_seg; 1675 #endif 1676 }; 1677 1678 void 1679 bop_trap(ulong_t *tfp) 1680 { 1681 struct trapframe *tf = (struct trapframe *)tfp; 1682 bop_frame_t fakeframe; 1683 static int depth = 0; 1684 1685 /* 1686 * Check for an infinite loop of traps. 1687 */ 1688 if (++depth > 2) 1689 bop_panic("Nested trap"); 1690 1691 bop_printf(NULL, "Unexpected trap\n"); 1692 1693 /* 1694 * adjust the tf for optional error_code by detecting the code selector 1695 */ 1696 if (tf->code_seg != bcode_sel) 1697 tf = (struct trapframe *)(tfp - 1); 1698 else 1699 bop_printf(NULL, "error code 0x%lx\n", 1700 tf->error_code & 0xffffffff); 1701 1702 bop_printf(NULL, "instruction pointer 0x%lx\n", tf->inst_ptr); 1703 bop_printf(NULL, "code segment 0x%lx\n", tf->code_seg & 0xffff); 1704 bop_printf(NULL, "flags register 0x%lx\n", tf->flags_reg); 1705 #ifdef __amd64 1706 bop_printf(NULL, "return %%rsp 0x%lx\n", tf->stk_ptr); 1707 bop_printf(NULL, "return %%ss 0x%lx\n", tf->stk_seg & 0xffff); 1708 #endif 1709 1710 /* grab %[er]bp pushed by our code from the stack */ 1711 fakeframe.old_frame = (bop_frame_t *)*(tfp - 3); 1712 fakeframe.retaddr = (pc_t)tf->inst_ptr; 1713 bop_printf(NULL, "Attempting stack backtrace:\n"); 1714 bop_traceback(&fakeframe); 1715 bop_panic("unexpected trap in early boot"); 1716 } 1717 1718 extern void bop_trap_handler(void); 1719 1720 static gate_desc_t *bop_idt; 1721 1722 static desctbr_t bop_idt_info; 1723 1724 static void 1725 bop_idt_init(void) 1726 { 1727 int t; 1728 1729 bop_idt = (gate_desc_t *) 1730 do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE); 1731 bzero(bop_idt, MMU_PAGESIZE); 1732 for (t = 0; t < NIDT; ++t) { 1733 /* 1734 * Note that since boot runs without a TSS, the 1735 * double fault handler cannot use an alternate stack 1736 * (64-bit) or a task gate (32-bit). 1737 */ 1738 set_gatesegd(&bop_idt[t], &bop_trap_handler, bcode_sel, 1739 SDT_SYSIGT, TRP_KPL, 0); 1740 } 1741 bop_idt_info.dtr_limit = (NIDT * sizeof (gate_desc_t)) - 1; 1742 bop_idt_info.dtr_base = (uintptr_t)bop_idt; 1743 wr_idtr(&bop_idt_info); 1744 } 1745 #endif /* !defined(__xpv) */ 1746 1747 /* 1748 * This is where we enter the kernel. It dummies up the boot_ops and 1749 * boot_syscalls vectors and jumps off to _kobj_boot() 1750 */ 1751 void 1752 _start(struct xboot_info *xbp) 1753 { 1754 bootops_t *bops = &bootop; 1755 extern void _kobj_boot(); 1756 1757 /* 1758 * 1st off - initialize the console for any error messages 1759 */ 1760 xbootp = xbp; 1761 #ifdef __xpv 1762 HYPERVISOR_shared_info = (void *)xbootp->bi_shared_info; 1763 xen_info = xbootp->bi_xen_start_info; 1764 #endif 1765 1766 #ifndef __xpv 1767 if (*((uint32_t *)(FASTBOOT_SWTCH_PA + FASTBOOT_STACK_OFFSET)) == 1768 FASTBOOT_MAGIC) { 1769 post_fastreboot = 1; 1770 *((uint32_t *)(FASTBOOT_SWTCH_PA + FASTBOOT_STACK_OFFSET)) = 0; 1771 } 1772 #endif 1773 1774 bcons_init((void *)xbootp->bi_cmdline); 1775 have_console = 1; 1776 1777 /* 1778 * enable debugging 1779 */ 1780 if (strstr((char *)xbootp->bi_cmdline, "kbm_debug")) 1781 kbm_debug = 1; 1782 1783 DBG_MSG("\n\n*** Entered Solaris in _start() cmdline is: "); 1784 DBG_MSG((char *)xbootp->bi_cmdline); 1785 DBG_MSG("\n\n\n"); 1786 1787 /* 1788 * physavail is no longer used by startup 1789 */ 1790 bm.physinstalled = xbp->bi_phys_install; 1791 bm.pcimem = xbp->bi_pcimem; 1792 bm.rsvdmem = xbp->bi_rsvdmem; 1793 bm.physavail = NULL; 1794 1795 /* 1796 * initialize the boot time allocator 1797 */ 1798 next_phys = xbootp->bi_next_paddr; 1799 DBG(next_phys); 1800 next_virt = (uintptr_t)xbootp->bi_next_vaddr; 1801 DBG(next_virt); 1802 DBG_MSG("Initializing boot time memory management..."); 1803 #ifdef __xpv 1804 { 1805 xen_platform_parameters_t p; 1806 1807 /* This call shouldn't fail, dboot already did it once. */ 1808 (void) HYPERVISOR_xen_version(XENVER_platform_parameters, &p); 1809 mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start); 1810 DBG(xen_virt_start); 1811 } 1812 #endif 1813 kbm_init(xbootp); 1814 DBG_MSG("done\n"); 1815 1816 /* 1817 * Fill in the bootops vector 1818 */ 1819 bops->bsys_version = BO_VERSION; 1820 bops->boot_mem = &bm; 1821 bops->bsys_alloc = do_bsys_alloc; 1822 bops->bsys_free = do_bsys_free; 1823 bops->bsys_getproplen = do_bsys_getproplen; 1824 bops->bsys_getprop = do_bsys_getprop; 1825 bops->bsys_nextprop = do_bsys_nextprop; 1826 bops->bsys_printf = bop_printf; 1827 bops->bsys_doint = do_bsys_doint; 1828 1829 /* 1830 * BOP_EALLOC() is no longer needed 1831 */ 1832 bops->bsys_ealloc = do_bsys_ealloc; 1833 1834 #ifdef __xpv 1835 /* 1836 * On domain 0 we need to free up some physical memory that is 1837 * usable for DMA. Since GRUB loaded the boot_archive, it is 1838 * sitting in low MFN memory. We'll relocated the boot archive 1839 * pages to high PFN memory. 1840 */ 1841 if (DOMAIN_IS_INITDOMAIN(xen_info)) 1842 relocate_boot_archive(); 1843 #endif 1844 1845 #ifndef __xpv 1846 /* 1847 * Install an IDT to catch early pagefaults (shouldn't have any). 1848 * Also needed for kmdb. 1849 */ 1850 bop_idt_init(); 1851 #endif 1852 1853 /* 1854 * Start building the boot properties from the command line 1855 */ 1856 DBG_MSG("Initializing boot properties:\n"); 1857 build_boot_properties(); 1858 1859 if (strstr((char *)xbootp->bi_cmdline, "prom_debug") || kbm_debug) { 1860 char *name; 1861 char *value; 1862 char *cp; 1863 int len; 1864 1865 value = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE); 1866 bop_printf(NULL, "\nBoot properties:\n"); 1867 name = ""; 1868 while ((name = do_bsys_nextprop(NULL, name)) != NULL) { 1869 bop_printf(NULL, "\t0x%p %s = ", (void *)name, name); 1870 (void) do_bsys_getprop(NULL, name, value); 1871 len = do_bsys_getproplen(NULL, name); 1872 bop_printf(NULL, "len=%d ", len); 1873 value[len] = 0; 1874 for (cp = value; *cp; ++cp) { 1875 if (' ' <= *cp && *cp <= '~') 1876 bop_printf(NULL, "%c", *cp); 1877 else 1878 bop_printf(NULL, "-0x%x-", *cp); 1879 } 1880 bop_printf(NULL, "\n"); 1881 } 1882 } 1883 1884 /* 1885 * jump into krtld... 1886 */ 1887 _kobj_boot(&bop_sysp, NULL, bops, NULL); 1888 } 1889 1890 1891 /*ARGSUSED*/ 1892 static caddr_t 1893 no_more_alloc(bootops_t *bop, caddr_t virthint, size_t size, int align) 1894 { 1895 panic("Attempt to bsys_alloc() too late\n"); 1896 return (NULL); 1897 } 1898 1899 /*ARGSUSED*/ 1900 static void 1901 no_more_free(bootops_t *bop, caddr_t virt, size_t size) 1902 { 1903 panic("Attempt to bsys_free() too late\n"); 1904 } 1905 1906 void 1907 bop_no_more_mem(void) 1908 { 1909 DBG(total_bop_alloc_scratch); 1910 DBG(total_bop_alloc_kernel); 1911 bootops->bsys_alloc = no_more_alloc; 1912 bootops->bsys_free = no_more_free; 1913 } 1914 1915 1916 /* 1917 * Set ACPI firmware properties 1918 */ 1919 1920 static caddr_t 1921 vmap_phys(size_t length, paddr_t pa) 1922 { 1923 paddr_t start, end; 1924 caddr_t va; 1925 size_t len, page; 1926 1927 #ifdef __xpv 1928 pa = pfn_to_pa(xen_assign_pfn(mmu_btop(pa))) | (pa & MMU_PAGEOFFSET); 1929 #endif 1930 start = P2ALIGN(pa, MMU_PAGESIZE); 1931 end = P2ROUNDUP(pa + length, MMU_PAGESIZE); 1932 len = end - start; 1933 va = (caddr_t)alloc_vaddr(len, MMU_PAGESIZE); 1934 for (page = 0; page < len; page += MMU_PAGESIZE) 1935 kbm_map((uintptr_t)va + page, start + page, 0, 0); 1936 return (va + (pa & MMU_PAGEOFFSET)); 1937 } 1938 1939 static uint8_t 1940 checksum_table(uint8_t *tp, size_t len) 1941 { 1942 uint8_t sum = 0; 1943 1944 while (len-- > 0) 1945 sum += *tp++; 1946 1947 return (sum); 1948 } 1949 1950 static int 1951 valid_rsdp(ACPI_TABLE_RSDP *rp) 1952 { 1953 1954 /* validate the V1.x checksum */ 1955 if (checksum_table((uint8_t *)rp, ACPI_RSDP_CHECKSUM_LENGTH) != 0) 1956 return (0); 1957 1958 /* If pre-ACPI 2.0, this is a valid RSDP */ 1959 if (rp->Revision < 2) 1960 return (1); 1961 1962 /* validate the V2.x checksum */ 1963 if (checksum_table((uint8_t *)rp, ACPI_RSDP_XCHECKSUM_LENGTH) != 0) 1964 return (0); 1965 1966 return (1); 1967 } 1968 1969 /* 1970 * Scan memory range for an RSDP; 1971 * see ACPI 3.0 Spec, 5.2.5.1 1972 */ 1973 static ACPI_TABLE_RSDP * 1974 scan_rsdp(paddr_t start, paddr_t end) 1975 { 1976 size_t len = end - start; 1977 caddr_t ptr; 1978 1979 ptr = vmap_phys(len, start); 1980 while (len > 0) { 1981 if (strncmp(ptr, ACPI_SIG_RSDP, strlen(ACPI_SIG_RSDP)) == 0 && 1982 valid_rsdp((ACPI_TABLE_RSDP *)ptr)) 1983 return ((ACPI_TABLE_RSDP *)ptr); 1984 1985 ptr += ACPI_RSDP_SCAN_STEP; 1986 len -= ACPI_RSDP_SCAN_STEP; 1987 } 1988 1989 return (NULL); 1990 } 1991 1992 /* 1993 * Refer to ACPI 3.0 Spec, section 5.2.5.1 to understand this function 1994 */ 1995 static ACPI_TABLE_RSDP * 1996 find_rsdp() 1997 { 1998 ACPI_TABLE_RSDP *rsdp; 1999 uint16_t *ebda_seg; 2000 paddr_t ebda_addr; 2001 2002 /* 2003 * Get the EBDA segment and scan the first 1K 2004 */ 2005 ebda_seg = (uint16_t *)vmap_phys(sizeof (uint16_t), 2006 ACPI_EBDA_PTR_LOCATION); 2007 ebda_addr = *ebda_seg << 4; 2008 rsdp = scan_rsdp(ebda_addr, ebda_addr + ACPI_EBDA_WINDOW_SIZE); 2009 if (rsdp == NULL) 2010 /* if EBDA doesn't contain RSDP, look in BIOS memory */ 2011 rsdp = scan_rsdp(ACPI_HI_RSDP_WINDOW_BASE, 2012 ACPI_HI_RSDP_WINDOW_BASE + ACPI_HI_RSDP_WINDOW_SIZE); 2013 return (rsdp); 2014 } 2015 2016 static ACPI_TABLE_HEADER * 2017 map_fw_table(paddr_t table_addr) 2018 { 2019 ACPI_TABLE_HEADER *tp; 2020 size_t len = MAX(sizeof (*tp), MMU_PAGESIZE); 2021 2022 /* 2023 * Map at least a page; if the table is larger than this, remap it 2024 */ 2025 tp = (ACPI_TABLE_HEADER *)vmap_phys(len, table_addr); 2026 if (tp->Length > len) 2027 tp = (ACPI_TABLE_HEADER *)vmap_phys(tp->Length, table_addr); 2028 return (tp); 2029 } 2030 2031 static ACPI_TABLE_HEADER * 2032 find_fw_table(char *signature) 2033 { 2034 static int revision = 0; 2035 static ACPI_TABLE_XSDT *xsdt; 2036 static int len; 2037 paddr_t xsdt_addr; 2038 ACPI_TABLE_RSDP *rsdp; 2039 ACPI_TABLE_HEADER *tp; 2040 paddr_t table_addr; 2041 int n; 2042 2043 if (strlen(signature) != ACPI_NAME_SIZE) 2044 return (NULL); 2045 2046 /* 2047 * Reading the ACPI 3.0 Spec, section 5.2.5.3 will help 2048 * understand this code. If we haven't already found the RSDT/XSDT, 2049 * revision will be 0. Find the RSDP and check the revision 2050 * to find out whether to use the RSDT or XSDT. If revision is 2051 * 0 or 1, use the RSDT and set internal revision to 1; if it is 2, 2052 * use the XSDT. If the XSDT address is 0, though, fall back to 2053 * revision 1 and use the RSDT. 2054 */ 2055 if (revision == 0) { 2056 if ((rsdp = find_rsdp()) != NULL) { 2057 revision = rsdp->Revision; 2058 /* 2059 * ACPI 6.0 states that current revision is 2 2060 * from acpi_table_rsdp definition: 2061 * Must be (0) for ACPI 1.0 or (2) for ACPI 2.0+ 2062 */ 2063 if (revision > 2) 2064 revision = 2; 2065 switch (revision) { 2066 case 2: 2067 /* 2068 * Use the XSDT unless BIOS is buggy and 2069 * claims to be rev 2 but has a null XSDT 2070 * address 2071 */ 2072 xsdt_addr = rsdp->XsdtPhysicalAddress; 2073 if (xsdt_addr != 0) 2074 break; 2075 /* FALLTHROUGH */ 2076 case 0: 2077 /* treat RSDP rev 0 as revision 1 internally */ 2078 revision = 1; 2079 /* FALLTHROUGH */ 2080 case 1: 2081 /* use the RSDT for rev 0/1 */ 2082 xsdt_addr = rsdp->RsdtPhysicalAddress; 2083 break; 2084 default: 2085 /* unknown revision */ 2086 revision = 0; 2087 break; 2088 } 2089 } 2090 if (revision == 0) 2091 return (NULL); 2092 2093 /* cache the XSDT info */ 2094 xsdt = (ACPI_TABLE_XSDT *)map_fw_table(xsdt_addr); 2095 len = (xsdt->Header.Length - sizeof (xsdt->Header)) / 2096 ((revision == 1) ? sizeof (uint32_t) : sizeof (uint64_t)); 2097 } 2098 2099 /* 2100 * Scan the table headers looking for a signature match 2101 */ 2102 for (n = 0; n < len; n++) { 2103 ACPI_TABLE_RSDT *rsdt = (ACPI_TABLE_RSDT *)xsdt; 2104 table_addr = (revision == 1) ? rsdt->TableOffsetEntry[n] : 2105 xsdt->TableOffsetEntry[n]; 2106 2107 if (table_addr == 0) 2108 continue; 2109 tp = map_fw_table(table_addr); 2110 if (strncmp(tp->Signature, signature, ACPI_NAME_SIZE) == 0) { 2111 return (tp); 2112 } 2113 } 2114 return (NULL); 2115 } 2116 2117 static void 2118 process_mcfg(ACPI_TABLE_MCFG *tp) 2119 { 2120 ACPI_MCFG_ALLOCATION *cfg_baap; 2121 char *cfg_baa_endp; 2122 int64_t ecfginfo[4]; 2123 2124 cfg_baap = (ACPI_MCFG_ALLOCATION *)((uintptr_t)tp + sizeof (*tp)); 2125 cfg_baa_endp = ((char *)tp) + tp->Header.Length; 2126 while ((char *)cfg_baap < cfg_baa_endp) { 2127 if (cfg_baap->Address != 0 && cfg_baap->PciSegment == 0) { 2128 ecfginfo[0] = cfg_baap->Address; 2129 ecfginfo[1] = cfg_baap->PciSegment; 2130 ecfginfo[2] = cfg_baap->StartBusNumber; 2131 ecfginfo[3] = cfg_baap->EndBusNumber; 2132 bsetprop(MCFG_PROPNAME, strlen(MCFG_PROPNAME), 2133 ecfginfo, sizeof (ecfginfo)); 2134 break; 2135 } 2136 cfg_baap++; 2137 } 2138 } 2139 2140 #ifndef __xpv 2141 static void 2142 process_madt_entries(ACPI_TABLE_MADT *tp, uint32_t *cpu_countp, 2143 uint32_t *cpu_possible_countp, uint32_t *cpu_apicid_array) 2144 { 2145 ACPI_SUBTABLE_HEADER *item, *end; 2146 uint32_t cpu_count = 0; 2147 uint32_t cpu_possible_count = 0; 2148 2149 /* 2150 * Determine number of CPUs and keep track of "final" APIC ID 2151 * for each CPU by walking through ACPI MADT processor list 2152 */ 2153 end = (ACPI_SUBTABLE_HEADER *)(tp->Header.Length + (uintptr_t)tp); 2154 item = (ACPI_SUBTABLE_HEADER *)((uintptr_t)tp + sizeof (*tp)); 2155 2156 while (item < end) { 2157 switch (item->Type) { 2158 case ACPI_MADT_TYPE_LOCAL_APIC: { 2159 ACPI_MADT_LOCAL_APIC *cpu = 2160 (ACPI_MADT_LOCAL_APIC *) item; 2161 2162 if (cpu->LapicFlags & ACPI_MADT_ENABLED) { 2163 if (cpu_apicid_array != NULL) 2164 cpu_apicid_array[cpu_count] = cpu->Id; 2165 cpu_count++; 2166 } 2167 cpu_possible_count++; 2168 break; 2169 } 2170 case ACPI_MADT_TYPE_LOCAL_X2APIC: { 2171 ACPI_MADT_LOCAL_X2APIC *cpu = 2172 (ACPI_MADT_LOCAL_X2APIC *) item; 2173 2174 if (cpu->LapicFlags & ACPI_MADT_ENABLED) { 2175 if (cpu_apicid_array != NULL) 2176 cpu_apicid_array[cpu_count] = 2177 cpu->LocalApicId; 2178 cpu_count++; 2179 } 2180 cpu_possible_count++; 2181 break; 2182 } 2183 default: 2184 if (kbm_debug) 2185 bop_printf(NULL, "MADT type %d\n", item->Type); 2186 break; 2187 } 2188 2189 item = (ACPI_SUBTABLE_HEADER *)((uintptr_t)item + item->Length); 2190 } 2191 if (cpu_countp) 2192 *cpu_countp = cpu_count; 2193 if (cpu_possible_countp) 2194 *cpu_possible_countp = cpu_possible_count; 2195 } 2196 2197 static void 2198 process_madt(ACPI_TABLE_MADT *tp) 2199 { 2200 uint32_t cpu_count = 0; 2201 uint32_t cpu_possible_count = 0; 2202 uint32_t *cpu_apicid_array; /* x2APIC ID is 32bit! */ 2203 2204 if (tp != NULL) { 2205 /* count cpu's */ 2206 process_madt_entries(tp, &cpu_count, &cpu_possible_count, NULL); 2207 2208 cpu_apicid_array = (uint32_t *)do_bsys_alloc(NULL, NULL, 2209 cpu_count * sizeof (*cpu_apicid_array), MMU_PAGESIZE); 2210 if (cpu_apicid_array == NULL) 2211 bop_panic("Not enough memory for APIC ID array"); 2212 2213 /* copy IDs */ 2214 process_madt_entries(tp, NULL, NULL, cpu_apicid_array); 2215 2216 /* 2217 * Make boot property for array of "final" APIC IDs for each 2218 * CPU 2219 */ 2220 bsetprop(BP_CPU_APICID_ARRAY, strlen(BP_CPU_APICID_ARRAY), 2221 cpu_apicid_array, cpu_count * sizeof (*cpu_apicid_array)); 2222 } 2223 2224 /* 2225 * Check whether property plat-max-ncpus is already set. 2226 */ 2227 if (do_bsys_getproplen(NULL, PLAT_MAX_NCPUS_NAME) < 0) { 2228 /* 2229 * Set plat-max-ncpus to number of maximum possible CPUs given 2230 * in MADT if it hasn't been set. 2231 * There's no formal way to detect max possible CPUs supported 2232 * by platform according to ACPI spec3.0b. So current CPU 2233 * hotplug implementation expects that all possible CPUs will 2234 * have an entry in MADT table and set plat-max-ncpus to number 2235 * of entries in MADT. 2236 * With introducing of ACPI4.0, Maximum System Capability Table 2237 * (MSCT) provides maximum number of CPUs supported by platform. 2238 * If MSCT is unavailable, fall back to old way. 2239 */ 2240 if (tp != NULL) 2241 bsetpropsi(PLAT_MAX_NCPUS_NAME, cpu_possible_count); 2242 } 2243 2244 /* 2245 * Set boot property boot-max-ncpus to number of CPUs existing at 2246 * boot time. boot-max-ncpus is mainly used for optimization. 2247 */ 2248 if (tp != NULL) 2249 bsetpropsi(BOOT_MAX_NCPUS_NAME, cpu_count); 2250 2251 /* 2252 * User-set boot-ncpus overrides firmware count 2253 */ 2254 if (do_bsys_getproplen(NULL, BOOT_NCPUS_NAME) >= 0) 2255 return; 2256 2257 /* 2258 * Set boot property boot-ncpus to number of active CPUs given in MADT 2259 * if it hasn't been set yet. 2260 */ 2261 if (tp != NULL) 2262 bsetpropsi(BOOT_NCPUS_NAME, cpu_count); 2263 } 2264 2265 static void 2266 process_srat(ACPI_TABLE_SRAT *tp) 2267 { 2268 ACPI_SUBTABLE_HEADER *item, *end; 2269 int i; 2270 int proc_num, mem_num; 2271 #pragma pack(1) 2272 struct { 2273 uint32_t domain; 2274 uint32_t apic_id; 2275 uint32_t sapic_id; 2276 } processor; 2277 struct { 2278 uint32_t domain; 2279 uint32_t x2apic_id; 2280 } x2apic; 2281 struct { 2282 uint32_t domain; 2283 uint64_t addr; 2284 uint64_t length; 2285 uint32_t flags; 2286 } memory; 2287 #pragma pack() 2288 char prop_name[30]; 2289 uint64_t maxmem = 0; 2290 2291 if (tp == NULL) 2292 return; 2293 2294 proc_num = mem_num = 0; 2295 end = (ACPI_SUBTABLE_HEADER *)(tp->Header.Length + (uintptr_t)tp); 2296 item = (ACPI_SUBTABLE_HEADER *)((uintptr_t)tp + sizeof (*tp)); 2297 while (item < end) { 2298 switch (item->Type) { 2299 case ACPI_SRAT_TYPE_CPU_AFFINITY: { 2300 ACPI_SRAT_CPU_AFFINITY *cpu = 2301 (ACPI_SRAT_CPU_AFFINITY *) item; 2302 2303 if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED)) 2304 break; 2305 processor.domain = cpu->ProximityDomainLo; 2306 for (i = 0; i < 3; i++) 2307 processor.domain += 2308 cpu->ProximityDomainHi[i] << ((i + 1) * 8); 2309 processor.apic_id = cpu->ApicId; 2310 processor.sapic_id = cpu->LocalSapicEid; 2311 (void) snprintf(prop_name, 30, "acpi-srat-processor-%d", 2312 proc_num); 2313 bsetprop(prop_name, strlen(prop_name), &processor, 2314 sizeof (processor)); 2315 proc_num++; 2316 break; 2317 } 2318 case ACPI_SRAT_TYPE_MEMORY_AFFINITY: { 2319 ACPI_SRAT_MEM_AFFINITY *mem = 2320 (ACPI_SRAT_MEM_AFFINITY *)item; 2321 2322 if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED)) 2323 break; 2324 memory.domain = mem->ProximityDomain; 2325 memory.addr = mem->BaseAddress; 2326 memory.length = mem->Length; 2327 memory.flags = mem->Flags; 2328 (void) snprintf(prop_name, 30, "acpi-srat-memory-%d", 2329 mem_num); 2330 bsetprop(prop_name, strlen(prop_name), &memory, 2331 sizeof (memory)); 2332 if ((mem->Flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && 2333 (memory.addr + memory.length > maxmem)) { 2334 maxmem = memory.addr + memory.length; 2335 } 2336 mem_num++; 2337 break; 2338 } 2339 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: { 2340 ACPI_SRAT_X2APIC_CPU_AFFINITY *x2cpu = 2341 (ACPI_SRAT_X2APIC_CPU_AFFINITY *) item; 2342 2343 if (!(x2cpu->Flags & ACPI_SRAT_CPU_ENABLED)) 2344 break; 2345 x2apic.domain = x2cpu->ProximityDomain; 2346 x2apic.x2apic_id = x2cpu->ApicId; 2347 (void) snprintf(prop_name, 30, "acpi-srat-processor-%d", 2348 proc_num); 2349 bsetprop(prop_name, strlen(prop_name), &x2apic, 2350 sizeof (x2apic)); 2351 proc_num++; 2352 break; 2353 } 2354 default: 2355 if (kbm_debug) 2356 bop_printf(NULL, "SRAT type %d\n", item->Type); 2357 break; 2358 } 2359 2360 item = (ACPI_SUBTABLE_HEADER *) 2361 (item->Length + (uintptr_t)item); 2362 } 2363 2364 /* 2365 * The maximum physical address calculated from the SRAT table is more 2366 * accurate than that calculated from the MSCT table. 2367 */ 2368 if (maxmem != 0) { 2369 plat_dr_physmax = btop(maxmem); 2370 } 2371 } 2372 2373 static void 2374 process_slit(ACPI_TABLE_SLIT *tp) 2375 { 2376 2377 /* 2378 * Check the number of localities; if it's too huge, we just 2379 * return and locality enumeration code will handle this later, 2380 * if possible. 2381 * 2382 * Note that the size of the table is the square of the 2383 * number of localities; if the number of localities exceeds 2384 * UINT16_MAX, the table size may overflow an int when being 2385 * passed to bsetprop() below. 2386 */ 2387 if (tp->LocalityCount >= SLIT_LOCALITIES_MAX) 2388 return; 2389 2390 bsetprop(SLIT_NUM_PROPNAME, strlen(SLIT_NUM_PROPNAME), 2391 &tp->LocalityCount, sizeof (tp->LocalityCount)); 2392 bsetprop(SLIT_PROPNAME, strlen(SLIT_PROPNAME), &tp->Entry, 2393 tp->LocalityCount * tp->LocalityCount); 2394 } 2395 2396 static ACPI_TABLE_MSCT * 2397 process_msct(ACPI_TABLE_MSCT *tp) 2398 { 2399 int last_seen = 0; 2400 int proc_num = 0; 2401 ACPI_MSCT_PROXIMITY *item, *end; 2402 extern uint64_t plat_dr_options; 2403 2404 ASSERT(tp != NULL); 2405 2406 end = (ACPI_MSCT_PROXIMITY *)(tp->Header.Length + (uintptr_t)tp); 2407 for (item = (void *)((uintptr_t)tp + tp->ProximityOffset); 2408 item < end; 2409 item = (void *)(item->Length + (uintptr_t)item)) { 2410 /* 2411 * Sanity check according to section 5.2.19.1 of ACPI 4.0. 2412 * Revision 1 2413 * Length 22 2414 */ 2415 if (item->Revision != 1 || item->Length != 22) { 2416 cmn_err(CE_CONT, 2417 "?boot: unknown proximity domain structure in MSCT " 2418 "with Revision(%d), Length(%d).\n", 2419 (int)item->Revision, (int)item->Length); 2420 return (NULL); 2421 } else if (item->RangeStart > item->RangeEnd) { 2422 cmn_err(CE_CONT, 2423 "?boot: invalid proximity domain structure in MSCT " 2424 "with RangeStart(%u), RangeEnd(%u).\n", 2425 item->RangeStart, item->RangeEnd); 2426 return (NULL); 2427 } else if (item->RangeStart != last_seen) { 2428 /* 2429 * Items must be organized in ascending order of the 2430 * proximity domain enumerations. 2431 */ 2432 cmn_err(CE_CONT, 2433 "?boot: invalid proximity domain structure in MSCT," 2434 " items are not orginized in ascending order.\n"); 2435 return (NULL); 2436 } 2437 2438 /* 2439 * If ProcessorCapacity is 0 then there would be no CPUs in this 2440 * domain. 2441 */ 2442 if (item->ProcessorCapacity != 0) { 2443 proc_num += (item->RangeEnd - item->RangeStart + 1) * 2444 item->ProcessorCapacity; 2445 } 2446 2447 last_seen = item->RangeEnd - item->RangeStart + 1; 2448 /* 2449 * Break out if all proximity domains have been processed. 2450 * Some BIOSes may have unused items at the end of MSCT table. 2451 */ 2452 if (last_seen > tp->MaxProximityDomains) { 2453 break; 2454 } 2455 } 2456 if (last_seen != tp->MaxProximityDomains + 1) { 2457 cmn_err(CE_CONT, 2458 "?boot: invalid proximity domain structure in MSCT, " 2459 "proximity domain count doesn't match.\n"); 2460 return (NULL); 2461 } 2462 2463 /* 2464 * Set plat-max-ncpus property if it hasn't been set yet. 2465 */ 2466 if (do_bsys_getproplen(NULL, PLAT_MAX_NCPUS_NAME) < 0) { 2467 if (proc_num != 0) { 2468 bsetpropsi(PLAT_MAX_NCPUS_NAME, proc_num); 2469 } 2470 } 2471 2472 /* 2473 * Use Maximum Physical Address from the MSCT table as upper limit for 2474 * memory hot-adding by default. It may be overridden by value from 2475 * the SRAT table or the "plat-dr-physmax" boot option. 2476 */ 2477 plat_dr_physmax = btop(tp->MaxAddress + 1); 2478 2479 /* 2480 * Existence of MSCT implies CPU/memory hotplug-capability for the 2481 * platform. 2482 */ 2483 plat_dr_options |= PLAT_DR_FEATURE_CPU; 2484 plat_dr_options |= PLAT_DR_FEATURE_MEMORY; 2485 2486 return (tp); 2487 } 2488 2489 #else /* __xpv */ 2490 static void 2491 enumerate_xen_cpus() 2492 { 2493 processorid_t id, max_id; 2494 2495 /* 2496 * User-set boot-ncpus overrides enumeration 2497 */ 2498 if (do_bsys_getproplen(NULL, BOOT_NCPUS_NAME) >= 0) 2499 return; 2500 2501 /* 2502 * Probe every possible virtual CPU id and remember the 2503 * highest id present; the count of CPUs is one greater 2504 * than this. This tacitly assumes at least cpu 0 is present. 2505 */ 2506 max_id = 0; 2507 for (id = 0; id < MAX_VIRT_CPUS; id++) 2508 if (HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL) == 0) 2509 max_id = id; 2510 2511 bsetpropsi(BOOT_NCPUS_NAME, max_id+1); 2512 2513 } 2514 #endif /* __xpv */ 2515 2516 static void 2517 build_firmware_properties(void) 2518 { 2519 ACPI_TABLE_HEADER *tp = NULL; 2520 2521 #ifndef __xpv 2522 if ((tp = find_fw_table(ACPI_SIG_MSCT)) != NULL) 2523 msct_ptr = process_msct((ACPI_TABLE_MSCT *)tp); 2524 else 2525 msct_ptr = NULL; 2526 2527 if ((tp = find_fw_table(ACPI_SIG_MADT)) != NULL) 2528 process_madt((ACPI_TABLE_MADT *)tp); 2529 2530 if ((srat_ptr = (ACPI_TABLE_SRAT *) 2531 find_fw_table(ACPI_SIG_SRAT)) != NULL) 2532 process_srat(srat_ptr); 2533 2534 if (slit_ptr = (ACPI_TABLE_SLIT *)find_fw_table(ACPI_SIG_SLIT)) 2535 process_slit(slit_ptr); 2536 2537 tp = find_fw_table(ACPI_SIG_MCFG); 2538 #else /* __xpv */ 2539 enumerate_xen_cpus(); 2540 if (DOMAIN_IS_INITDOMAIN(xen_info)) 2541 tp = find_fw_table(ACPI_SIG_MCFG); 2542 #endif /* __xpv */ 2543 if (tp != NULL) 2544 process_mcfg((ACPI_TABLE_MCFG *)tp); 2545 } 2546 2547 /* 2548 * fake up a boot property for deferred early console output 2549 * this is used by both graphical boot and the (developer only) 2550 * USB serial console 2551 */ 2552 void * 2553 defcons_init(size_t size) 2554 { 2555 static char *p = NULL; 2556 2557 p = do_bsys_alloc(NULL, NULL, size, MMU_PAGESIZE); 2558 *p = 0; 2559 bsetprop("deferred-console-buf", strlen("deferred-console-buf") + 1, 2560 &p, sizeof (p)); 2561 return (p); 2562 } 2563 2564 /*ARGSUSED*/ 2565 int 2566 boot_compinfo(int fd, struct compinfo *cbp) 2567 { 2568 cbp->iscmp = 0; 2569 cbp->blksize = MAXBSIZE; 2570 return (0); 2571 } 2572 2573 #define BP_MAX_STRLEN 32 2574 2575 /* 2576 * Get value for given boot property 2577 */ 2578 int 2579 bootprop_getval(const char *prop_name, u_longlong_t *prop_value) 2580 { 2581 int boot_prop_len; 2582 char str[BP_MAX_STRLEN]; 2583 u_longlong_t value; 2584 2585 boot_prop_len = BOP_GETPROPLEN(bootops, prop_name); 2586 if (boot_prop_len < 0 || boot_prop_len > sizeof (str) || 2587 BOP_GETPROP(bootops, prop_name, str) < 0 || 2588 kobj_getvalue(str, &value) == -1) 2589 return (-1); 2590 2591 if (prop_value) 2592 *prop_value = value; 2593 2594 return (0); 2595 } 2596