1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright 2020 Joyent, Inc. 27 */ 28 29 30 #include <sys/types.h> 31 #include <sys/machparam.h> 32 #include <sys/x86_archext.h> 33 #include <sys/systm.h> 34 #include <sys/mach_mmu.h> 35 #include <sys/multiboot.h> 36 #include <sys/multiboot2.h> 37 #include <sys/multiboot2_impl.h> 38 #include <sys/sysmacros.h> 39 #include <sys/framebuffer.h> 40 #include <sys/sha1.h> 41 #include <util/string.h> 42 #include <util/strtolctype.h> 43 #include <sys/efi.h> 44 45 /* 46 * Compile time debug knob. We do not have any early mechanism to control it 47 * as the boot is the earliest mechanism we have, and we do not want to have 48 * it being switched on by default. 49 */ 50 int dboot_debug = 0; 51 52 #if defined(__xpv) 53 54 #include <sys/hypervisor.h> 55 uintptr_t xen_virt_start; 56 pfn_t *mfn_to_pfn_mapping; 57 58 #else /* !__xpv */ 59 60 extern multiboot_header_t mb_header; 61 extern uint32_t mb2_load_addr; 62 extern int have_cpuid(void); 63 64 #endif /* !__xpv */ 65 66 #include <sys/inttypes.h> 67 #include <sys/bootinfo.h> 68 #include <sys/mach_mmu.h> 69 #include <sys/boot_console.h> 70 71 #include "dboot_asm.h" 72 #include "dboot_printf.h" 73 #include "dboot_xboot.h" 74 #include "dboot_elfload.h" 75 76 #define SHA1_ASCII_LENGTH (SHA1_DIGEST_LENGTH * 2) 77 78 /* 79 * This file contains code that runs to transition us from either a multiboot 80 * compliant loader (32 bit non-paging) or a XPV domain loader to 81 * regular kernel execution. Its task is to setup the kernel memory image 82 * and page tables. 83 * 84 * The code executes as: 85 * - 32 bits under GRUB (for 32 or 64 bit Solaris) 86 * - a 32 bit program for the 32-bit PV hypervisor 87 * - a 64 bit program for the 64-bit PV hypervisor (at least for now) 88 * 89 * Under the PV hypervisor, we must create mappings for any memory beyond the 90 * initial start of day allocation (such as the kernel itself). 91 * 92 * When on the metal, the mapping between maddr_t and paddr_t is 1:1. 93 * Since we are running in real mode, so all such memory is accessible. 94 */ 95 96 /* 97 * Standard bits used in PTE (page level) and PTP (internal levels) 98 */ 99 x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER; 100 x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST; 101 102 /* 103 * This is the target addresses (physical) where the kernel text and data 104 * nucleus pages will be unpacked. On the hypervisor this is actually a 105 * virtual address. 106 */ 107 paddr_t ktext_phys; 108 uint32_t ksize = 2 * FOUR_MEG; /* kernel nucleus is 8Meg */ 109 110 static uint64_t target_kernel_text; /* value to use for KERNEL_TEXT */ 111 112 /* 113 * The stack is setup in assembler before entering startup_kernel() 114 */ 115 char stack_space[STACK_SIZE]; 116 117 /* 118 * Used to track physical memory allocation 119 */ 120 static paddr_t next_avail_addr = 0; 121 122 #if defined(__xpv) 123 /* 124 * Additional information needed for hypervisor memory allocation. 125 * Only memory up to scratch_end is mapped by page tables. 126 * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so 127 * to derive a pfn from a pointer, you subtract mfn_base. 128 */ 129 130 static paddr_t scratch_end = 0; /* we can't write all of mem here */ 131 static paddr_t mfn_base; /* addr corresponding to mfn_list[0] */ 132 start_info_t *xen_info; 133 134 #else /* __xpv */ 135 136 /* 137 * If on the metal, then we have a multiboot loader. 138 */ 139 uint32_t mb_magic; /* magic from boot loader */ 140 uint32_t mb_addr; /* multiboot info package from loader */ 141 int multiboot_version; 142 multiboot_info_t *mb_info; 143 multiboot2_info_header_t *mb2_info; 144 int num_entries; /* mmap entry count */ 145 boolean_t num_entries_set; /* is mmap entry count set */ 146 uintptr_t load_addr; 147 static boot_framebuffer_t framebuffer __aligned(16); 148 static boot_framebuffer_t *fb; 149 150 /* can not be automatic variables because of alignment */ 151 static efi_guid_t smbios3 = SMBIOS3_TABLE_GUID; 152 static efi_guid_t smbios = SMBIOS_TABLE_GUID; 153 static efi_guid_t acpi2 = EFI_ACPI_TABLE_GUID; 154 static efi_guid_t acpi1 = ACPI_10_TABLE_GUID; 155 #endif /* __xpv */ 156 157 /* 158 * This contains information passed to the kernel 159 */ 160 struct xboot_info boot_info __aligned(16); 161 struct xboot_info *bi; 162 163 /* 164 * Page table and memory stuff. 165 */ 166 static paddr_t max_mem; /* maximum memory address */ 167 168 /* 169 * Information about processor MMU 170 */ 171 int amd64_support = 0; 172 int largepage_support = 0; 173 int pae_support = 0; 174 int pge_support = 0; 175 int NX_support = 0; 176 int PAT_support = 0; 177 178 /* 179 * Low 32 bits of kernel entry address passed back to assembler. 180 * When running a 64 bit kernel, the high 32 bits are 0xffffffff. 181 */ 182 uint32_t entry_addr_low; 183 184 /* 185 * Memlists for the kernel. We shouldn't need a lot of these. 186 */ 187 #define MAX_MEMLIST (50) 188 struct boot_memlist memlists[MAX_MEMLIST]; 189 uint_t memlists_used = 0; 190 struct boot_memlist pcimemlists[MAX_MEMLIST]; 191 uint_t pcimemlists_used = 0; 192 struct boot_memlist rsvdmemlists[MAX_MEMLIST]; 193 uint_t rsvdmemlists_used = 0; 194 195 /* 196 * This should match what's in the bootloader. It's arbitrary, but GRUB 197 * in particular has limitations on how much space it can use before it 198 * stops working properly. This should be enough. 199 */ 200 struct boot_modules modules[MAX_BOOT_MODULES]; 201 uint_t modules_used = 0; 202 203 #ifdef __xpv 204 /* 205 * Xen strips the size field out of the mb_memory_map_t, see struct e820entry 206 * definition in Xen source. 207 */ 208 typedef struct { 209 uint32_t base_addr_low; 210 uint32_t base_addr_high; 211 uint32_t length_low; 212 uint32_t length_high; 213 uint32_t type; 214 } mmap_t; 215 216 /* 217 * There is 512KB of scratch area after the boot stack page. 218 * We'll use that for everything except the kernel nucleus pages which are too 219 * big to fit there and are allocated last anyway. 220 */ 221 #define MAXMAPS 100 222 static mmap_t map_buffer[MAXMAPS]; 223 #else 224 typedef mb_memory_map_t mmap_t; 225 #endif 226 227 /* 228 * Debugging macros 229 */ 230 uint_t prom_debug = 0; 231 uint_t map_debug = 0; 232 233 static char noname[2] = "-"; 234 235 /* 236 * Either hypervisor-specific or grub-specific code builds the initial 237 * memlists. This code does the sort/merge/link for final use. 238 */ 239 static void 240 sort_physinstall(void) 241 { 242 int i; 243 #if !defined(__xpv) 244 int j; 245 struct boot_memlist tmp; 246 247 /* 248 * Now sort the memlists, in case they weren't in order. 249 * Yeah, this is a bubble sort; small, simple and easy to get right. 250 */ 251 DBG_MSG("Sorting phys-installed list\n"); 252 for (j = memlists_used - 1; j > 0; --j) { 253 for (i = 0; i < j; ++i) { 254 if (memlists[i].addr < memlists[i + 1].addr) 255 continue; 256 tmp = memlists[i]; 257 memlists[i] = memlists[i + 1]; 258 memlists[i + 1] = tmp; 259 } 260 } 261 262 /* 263 * Merge any memlists that don't have holes between them. 264 */ 265 for (i = 0; i <= memlists_used - 1; ++i) { 266 if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr) 267 continue; 268 269 if (prom_debug) 270 dboot_printf( 271 "merging mem segs %" PRIx64 "...%" PRIx64 272 " w/ %" PRIx64 "...%" PRIx64 "\n", 273 memlists[i].addr, 274 memlists[i].addr + memlists[i].size, 275 memlists[i + 1].addr, 276 memlists[i + 1].addr + memlists[i + 1].size); 277 278 memlists[i].size += memlists[i + 1].size; 279 for (j = i + 1; j < memlists_used - 1; ++j) 280 memlists[j] = memlists[j + 1]; 281 --memlists_used; 282 DBG(memlists_used); 283 --i; /* after merging we need to reexamine, so do this */ 284 } 285 #endif /* __xpv */ 286 287 if (prom_debug) { 288 dboot_printf("\nFinal memlists:\n"); 289 for (i = 0; i < memlists_used; ++i) { 290 dboot_printf("\t%d: addr=%" PRIx64 " size=%" 291 PRIx64 "\n", i, memlists[i].addr, memlists[i].size); 292 } 293 } 294 295 /* 296 * link together the memlists with native size pointers 297 */ 298 memlists[0].next = 0; 299 memlists[0].prev = 0; 300 for (i = 1; i < memlists_used; ++i) { 301 memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1); 302 memlists[i].next = 0; 303 memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i); 304 } 305 bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists; 306 DBG(bi->bi_phys_install); 307 } 308 309 /* 310 * build bios reserved memlists 311 */ 312 static void 313 build_rsvdmemlists(void) 314 { 315 int i; 316 317 rsvdmemlists[0].next = 0; 318 rsvdmemlists[0].prev = 0; 319 for (i = 1; i < rsvdmemlists_used; ++i) { 320 rsvdmemlists[i].prev = 321 (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1); 322 rsvdmemlists[i].next = 0; 323 rsvdmemlists[i - 1].next = 324 (native_ptr_t)(uintptr_t)(rsvdmemlists + i); 325 } 326 bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists; 327 DBG(bi->bi_rsvdmem); 328 } 329 330 #if defined(__xpv) 331 332 /* 333 * halt on the hypervisor after a delay to drain console output 334 */ 335 __NORETURN void 336 dboot_halt(void) 337 { 338 uint_t i = 10000; 339 340 while (--i) 341 (void) HYPERVISOR_yield(); 342 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 343 /* never reached */ 344 for (;;) 345 ; 346 } 347 348 /* 349 * From a machine address, find the corresponding pseudo-physical address. 350 * Pseudo-physical address are contiguous and run from mfn_base in each VM. 351 * Machine addresses are the real underlying hardware addresses. 352 * These are needed for page table entries. Note that this routine is 353 * poorly protected. A bad value of "ma" will cause a page fault. 354 */ 355 paddr_t 356 ma_to_pa(maddr_t ma) 357 { 358 ulong_t pgoff = ma & MMU_PAGEOFFSET; 359 ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)]; 360 paddr_t pa; 361 362 if (pfn >= xen_info->nr_pages) 363 return (-(paddr_t)1); 364 pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff; 365 #ifdef DEBUG 366 if (ma != pa_to_ma(pa)) 367 dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", " 368 "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa)); 369 #endif 370 return (pa); 371 } 372 373 /* 374 * From a pseudo-physical address, find the corresponding machine address. 375 */ 376 maddr_t 377 pa_to_ma(paddr_t pa) 378 { 379 pfn_t pfn; 380 ulong_t mfn; 381 382 pfn = mmu_btop(pa - mfn_base); 383 if (pa < mfn_base || pfn >= xen_info->nr_pages) 384 dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa); 385 mfn = ((ulong_t *)xen_info->mfn_list)[pfn]; 386 #ifdef DEBUG 387 if (mfn_to_pfn_mapping[mfn] != pfn) 388 dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n", 389 pfn, mfn, mfn_to_pfn_mapping[mfn]); 390 #endif 391 return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET)); 392 } 393 394 #endif /* __xpv */ 395 396 x86pte_t 397 get_pteval(paddr_t table, uint_t index) 398 { 399 if (pae_support) 400 return (((x86pte_t *)(uintptr_t)table)[index]); 401 return (((x86pte32_t *)(uintptr_t)table)[index]); 402 } 403 404 /*ARGSUSED*/ 405 void 406 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 407 { 408 #ifdef __xpv 409 mmu_update_t t; 410 maddr_t mtable = pa_to_ma(table); 411 int retcnt; 412 413 t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE; 414 t.val = pteval; 415 if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1) 416 dboot_panic("HYPERVISOR_mmu_update() failed"); 417 #else /* __xpv */ 418 uintptr_t tab_addr = (uintptr_t)table; 419 420 if (pae_support) 421 ((x86pte_t *)tab_addr)[index] = pteval; 422 else 423 ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval; 424 if (level == top_level && level == 2) 425 reload_cr3(); 426 #endif /* __xpv */ 427 } 428 429 paddr_t 430 make_ptable(x86pte_t *pteval, uint_t level) 431 { 432 paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 433 434 if (level == top_level && level == 2) 435 *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID; 436 else 437 *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits; 438 439 #ifdef __xpv 440 /* Remove write permission to the new page table. */ 441 if (HYPERVISOR_update_va_mapping(new_table, 442 *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL)) 443 dboot_panic("HYP_update_va_mapping error"); 444 #endif 445 446 if (map_debug) 447 dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%" 448 PRIx64 "\n", level, (ulong_t)new_table, *pteval); 449 return (new_table); 450 } 451 452 x86pte_t * 453 map_pte(paddr_t table, uint_t index) 454 { 455 return ((x86pte_t *)(uintptr_t)(table + index * pte_size)); 456 } 457 458 /* 459 * dump out the contents of page tables... 460 */ 461 static void 462 dump_tables(void) 463 { 464 uint_t save_index[4]; /* for recursion */ 465 char *save_table[4]; /* for recursion */ 466 uint_t l; 467 uint64_t va; 468 uint64_t pgsize; 469 int index; 470 int i; 471 x86pte_t pteval; 472 char *table; 473 static char *tablist = "\t\t\t"; 474 char *tabs = tablist + 3 - top_level; 475 uint_t pa, pa1; 476 #if !defined(__xpv) 477 #define maddr_t paddr_t 478 #endif /* !__xpv */ 479 480 dboot_printf("Finished pagetables:\n"); 481 table = (char *)(uintptr_t)top_page_table; 482 l = top_level; 483 va = 0; 484 for (index = 0; index < ptes_per_table; ++index) { 485 pgsize = 1ull << shift_amt[l]; 486 if (pae_support) 487 pteval = ((x86pte_t *)table)[index]; 488 else 489 pteval = ((x86pte32_t *)table)[index]; 490 if (pteval == 0) 491 goto next_entry; 492 493 dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64, 494 tabs + l, (void *)table, index, (uint64_t)pteval, va); 495 pa = ma_to_pa(pteval & MMU_PAGEMASK); 496 dboot_printf(" physaddr=%x\n", pa); 497 498 /* 499 * Don't try to walk hypervisor private pagetables 500 */ 501 if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) { 502 save_table[l] = table; 503 save_index[l] = index; 504 --l; 505 index = -1; 506 table = (char *)(uintptr_t) 507 ma_to_pa(pteval & MMU_PAGEMASK); 508 goto recursion; 509 } 510 511 /* 512 * shorten dump for consecutive mappings 513 */ 514 for (i = 1; index + i < ptes_per_table; ++i) { 515 if (pae_support) 516 pteval = ((x86pte_t *)table)[index + i]; 517 else 518 pteval = ((x86pte32_t *)table)[index + i]; 519 if (pteval == 0) 520 break; 521 pa1 = ma_to_pa(pteval & MMU_PAGEMASK); 522 if (pa1 != pa + i * pgsize) 523 break; 524 } 525 if (i > 2) { 526 dboot_printf("%s...\n", tabs + l); 527 va += pgsize * (i - 2); 528 index += i - 2; 529 } 530 next_entry: 531 va += pgsize; 532 if (l == 3 && index == 255) /* VA hole */ 533 va = 0xffff800000000000ull; 534 recursion: 535 ; 536 } 537 if (l < top_level) { 538 ++l; 539 index = save_index[l]; 540 table = save_table[l]; 541 goto recursion; 542 } 543 } 544 545 /* 546 * Add a mapping for the machine page at the given virtual address. 547 */ 548 static void 549 map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level) 550 { 551 x86pte_t *ptep; 552 x86pte_t pteval; 553 554 pteval = ma | pte_bits; 555 if (level > 0) 556 pteval |= PT_PAGESIZE; 557 if (va >= target_kernel_text && pge_support) 558 pteval |= PT_GLOBAL; 559 560 if (map_debug && ma != va) 561 dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64 562 " pte=0x%" PRIx64 " l=%d\n", 563 (uint64_t)ma, (uint64_t)va, pteval, level); 564 565 #if defined(__xpv) 566 /* 567 * see if we can avoid find_pte() on the hypervisor 568 */ 569 if (HYPERVISOR_update_va_mapping(va, pteval, 570 UVMF_INVLPG | UVMF_LOCAL) == 0) 571 return; 572 #endif 573 574 /* 575 * Find the pte that will map this address. This creates any 576 * missing intermediate level page tables 577 */ 578 ptep = find_pte(va, NULL, level, 0); 579 580 /* 581 * When paravirtualized, we must use hypervisor calls to modify the 582 * PTE, since paging is active. On real hardware we just write to 583 * the pagetables which aren't in use yet. 584 */ 585 #if defined(__xpv) 586 ptep = ptep; /* shut lint up */ 587 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL)) 588 dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64 589 " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "", 590 (uint64_t)va, level, (uint64_t)ma, pteval); 591 #else 592 if (va < 1024 * 1024) 593 pteval |= PT_NOCACHE; /* for video RAM */ 594 if (pae_support) 595 *ptep = pteval; 596 else 597 *((x86pte32_t *)ptep) = (x86pte32_t)pteval; 598 #endif 599 } 600 601 /* 602 * Add a mapping for the physical page at the given virtual address. 603 */ 604 static void 605 map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level) 606 { 607 map_ma_at_va(pa_to_ma(pa), va, level); 608 } 609 610 /* 611 * This is called to remove start..end from the 612 * possible range of PCI addresses. 613 */ 614 const uint64_t pci_lo_limit = 0x00100000ul; 615 const uint64_t pci_hi_limit = 0xfff00000ul; 616 static void 617 exclude_from_pci(uint64_t start, uint64_t end) 618 { 619 int i; 620 int j; 621 struct boot_memlist *ml; 622 623 for (i = 0; i < pcimemlists_used; ++i) { 624 ml = &pcimemlists[i]; 625 626 /* delete the entire range? */ 627 if (start <= ml->addr && ml->addr + ml->size <= end) { 628 --pcimemlists_used; 629 for (j = i; j < pcimemlists_used; ++j) 630 pcimemlists[j] = pcimemlists[j + 1]; 631 --i; /* to revisit the new one at this index */ 632 } 633 634 /* split a range? */ 635 else if (ml->addr < start && end < ml->addr + ml->size) { 636 637 ++pcimemlists_used; 638 if (pcimemlists_used > MAX_MEMLIST) 639 dboot_panic("too many pcimemlists"); 640 641 for (j = pcimemlists_used - 1; j > i; --j) 642 pcimemlists[j] = pcimemlists[j - 1]; 643 ml->size = start - ml->addr; 644 645 ++ml; 646 ml->size = (ml->addr + ml->size) - end; 647 ml->addr = end; 648 ++i; /* skip on to next one */ 649 } 650 651 /* cut memory off the start? */ 652 else if (ml->addr < end && end < ml->addr + ml->size) { 653 ml->size -= end - ml->addr; 654 ml->addr = end; 655 } 656 657 /* cut memory off the end? */ 658 else if (ml->addr <= start && start < ml->addr + ml->size) { 659 ml->size = start - ml->addr; 660 } 661 } 662 } 663 664 /* 665 * During memory allocation, find the highest address not used yet. 666 */ 667 static void 668 check_higher(paddr_t a) 669 { 670 if (a < next_avail_addr) 671 return; 672 next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE); 673 DBG(next_avail_addr); 674 } 675 676 static int 677 dboot_loader_mmap_entries(void) 678 { 679 #if !defined(__xpv) 680 if (num_entries_set == B_TRUE) 681 return (num_entries); 682 683 switch (multiboot_version) { 684 case 1: 685 DBG(mb_info->flags); 686 if (mb_info->flags & 0x40) { 687 mb_memory_map_t *mmap; 688 caddr32_t mmap_addr; 689 690 DBG(mb_info->mmap_addr); 691 DBG(mb_info->mmap_length); 692 check_higher(mb_info->mmap_addr + mb_info->mmap_length); 693 694 for (mmap_addr = mb_info->mmap_addr; 695 mmap_addr < mb_info->mmap_addr + 696 mb_info->mmap_length; 697 mmap_addr += mmap->size + sizeof (mmap->size)) { 698 mmap = (mb_memory_map_t *)(uintptr_t)mmap_addr; 699 ++num_entries; 700 } 701 702 num_entries_set = B_TRUE; 703 } 704 break; 705 case 2: 706 num_entries = dboot_multiboot2_efi_mmap_nentries(mb2_info); 707 if (num_entries == 0) 708 num_entries = dboot_multiboot2_mmap_nentries(mb2_info); 709 if (num_entries == 0) 710 dboot_panic("No memory map?\n"); 711 num_entries_set = B_TRUE; 712 break; 713 default: 714 dboot_panic("Unknown multiboot version: %d\n", 715 multiboot_version); 716 break; 717 } 718 return (num_entries); 719 #else 720 return (MAXMAPS); 721 #endif 722 } 723 724 #if !defined(__xpv) 725 static uint32_t 726 dboot_efi_to_smap_type(int index, uint32_t type) 727 { 728 uint64_t addr; 729 730 /* 731 * ACPI 6.1 tells the lower memory should be reported as 732 * normal memory, so we enforce page 0 type even as 733 * vmware maps it as acpi reclaimable. 734 */ 735 if (dboot_multiboot2_efi_mmap_get_base(mb2_info, index, &addr)) { 736 if (addr == 0) 737 return (1); 738 } 739 740 /* 741 * Translate UEFI memory types to SMAP types. 742 * See "ACPI Specification Release 6.5 Errata A" 743 * Table 15-6 (page 785), UEFI Memory Types and mapping to ACPI address 744 * range types. 745 */ 746 747 switch (type) { 748 case EfiLoaderCode: 749 case EfiLoaderData: 750 case EfiBootServicesCode: 751 case EfiBootServicesData: 752 case EfiConventionalMemory: 753 return (1); 754 case EfiReservedMemoryType: 755 case EfiRuntimeServicesCode: 756 case EfiRuntimeServicesData: 757 case EfiMemoryMappedIO: 758 case EfiMemoryMappedIOPortSpace: 759 case EfiPalCode: 760 case EfiUnusableMemory: 761 return (2); 762 case EfiACPIReclaimMemory: 763 return (3); 764 case EfiACPIMemoryNVS: 765 return (4); 766 } 767 768 return (2); 769 } 770 #endif 771 772 static uint32_t 773 dboot_loader_mmap_get_type(int index) 774 { 775 #if !defined(__xpv) 776 mb_memory_map_t *mp, *mpend; 777 uint32_t type; 778 int i; 779 780 switch (multiboot_version) { 781 case 1: 782 mp = (mb_memory_map_t *)(uintptr_t)mb_info->mmap_addr; 783 mpend = (mb_memory_map_t *)(uintptr_t) 784 (mb_info->mmap_addr + mb_info->mmap_length); 785 786 for (i = 0; mp < mpend && i != index; i++) 787 mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size + 788 sizeof (mp->size)); 789 if (mp >= mpend) { 790 dboot_panic("dboot_loader_mmap_get_type(): index " 791 "out of bounds: %d\n", index); 792 } 793 return (mp->type); 794 795 case 2: 796 if (dboot_multiboot2_efi_mmap_get_type(mb2_info, index, &type)) 797 return (dboot_efi_to_smap_type(index, type)); 798 799 if (dboot_multiboot2_mmap_get_type(mb2_info, index, &type)) 800 return (type); 801 802 dboot_panic("Can not get memory type for %d\n", index); 803 804 default: 805 dboot_panic("Unknown multiboot version: %d\n", 806 multiboot_version); 807 break; 808 } 809 return (0); 810 #else 811 return (map_buffer[index].type); 812 #endif 813 } 814 815 static uint64_t 816 dboot_loader_mmap_get_base(int index) 817 { 818 #if !defined(__xpv) 819 mb_memory_map_t *mp, *mpend; 820 uint64_t base; 821 int i; 822 823 switch (multiboot_version) { 824 case 1: 825 mp = (mb_memory_map_t *)mb_info->mmap_addr; 826 mpend = (mb_memory_map_t *) 827 (mb_info->mmap_addr + mb_info->mmap_length); 828 829 for (i = 0; mp < mpend && i != index; i++) 830 mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size + 831 sizeof (mp->size)); 832 if (mp >= mpend) { 833 dboot_panic("dboot_loader_mmap_get_base(): index " 834 "out of bounds: %d\n", index); 835 } 836 return (((uint64_t)mp->base_addr_high << 32) + 837 (uint64_t)mp->base_addr_low); 838 839 case 2: 840 if (dboot_multiboot2_efi_mmap_get_base(mb2_info, index, &base)) 841 return (base); 842 843 if (dboot_multiboot2_mmap_get_base(mb2_info, index, &base)) 844 return (base); 845 846 dboot_panic("Can not get memory address for %d\n", index); 847 848 default: 849 dboot_panic("Unknown multiboot version: %d\n", 850 multiboot_version); 851 break; 852 } 853 return (0); 854 #else 855 return (((uint64_t)map_buffer[index].base_addr_high << 32) + 856 (uint64_t)map_buffer[index].base_addr_low); 857 #endif 858 } 859 860 static uint64_t 861 dboot_loader_mmap_get_length(int index) 862 { 863 #if !defined(__xpv) 864 mb_memory_map_t *mp, *mpend; 865 uint64_t length; 866 int i; 867 868 switch (multiboot_version) { 869 case 1: 870 mp = (mb_memory_map_t *)mb_info->mmap_addr; 871 mpend = (mb_memory_map_t *) 872 (mb_info->mmap_addr + mb_info->mmap_length); 873 874 for (i = 0; mp < mpend && i != index; i++) 875 mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size + 876 sizeof (mp->size)); 877 if (mp >= mpend) { 878 dboot_panic("dboot_loader_mmap_get_length(): index " 879 "out of bounds: %d\n", index); 880 } 881 return (((uint64_t)mp->length_high << 32) + 882 (uint64_t)mp->length_low); 883 884 case 2: 885 if (dboot_multiboot2_efi_mmap_get_length(mb2_info, 886 index, &length)) 887 return (length); 888 889 if (dboot_multiboot2_mmap_get_length(mb2_info, 890 index, &length)) 891 return (length); 892 893 dboot_panic("Can not get memory length for %d\n", index); 894 895 default: 896 dboot_panic("Unknown multiboot version: %d\n", 897 multiboot_version); 898 break; 899 } 900 return (0); 901 #else 902 return (((uint64_t)map_buffer[index].length_high << 32) + 903 (uint64_t)map_buffer[index].length_low); 904 #endif 905 } 906 907 static void 908 build_pcimemlists(void) 909 { 910 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 911 uint64_t start; 912 uint64_t end; 913 int i, num; 914 915 if (prom_debug) 916 dboot_printf("building pcimemlists:\n"); 917 /* 918 * initialize 919 */ 920 pcimemlists[0].addr = pci_lo_limit; 921 pcimemlists[0].size = pci_hi_limit - pci_lo_limit; 922 pcimemlists_used = 1; 923 924 num = dboot_loader_mmap_entries(); 925 /* 926 * Fill in PCI memlists. 927 */ 928 for (i = 0; i < num; ++i) { 929 start = dboot_loader_mmap_get_base(i); 930 end = start + dboot_loader_mmap_get_length(i); 931 932 if (prom_debug) 933 dboot_printf("\ttype: %d %" PRIx64 "..%" 934 PRIx64 "\n", dboot_loader_mmap_get_type(i), 935 start, end); 936 937 /* 938 * page align start and end 939 */ 940 start = (start + page_offset) & ~page_offset; 941 end &= ~page_offset; 942 if (end <= start) 943 continue; 944 945 exclude_from_pci(start, end); 946 } 947 948 /* 949 * Finish off the pcimemlist 950 */ 951 if (prom_debug) { 952 for (i = 0; i < pcimemlists_used; ++i) { 953 dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%" 954 PRIx64 "\n", pcimemlists[i].addr, 955 pcimemlists[i].addr + pcimemlists[i].size); 956 } 957 } 958 pcimemlists[0].next = 0; 959 pcimemlists[0].prev = 0; 960 for (i = 1; i < pcimemlists_used; ++i) { 961 pcimemlists[i].prev = 962 (native_ptr_t)(uintptr_t)(pcimemlists + i - 1); 963 pcimemlists[i].next = 0; 964 pcimemlists[i - 1].next = 965 (native_ptr_t)(uintptr_t)(pcimemlists + i); 966 } 967 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists; 968 DBG(bi->bi_pcimem); 969 } 970 971 #if defined(__xpv) 972 /* 973 * Initialize memory allocator stuff from hypervisor-supplied start info. 974 */ 975 static void 976 init_mem_alloc(void) 977 { 978 int local; /* variables needed to find start region */ 979 paddr_t scratch_start; 980 xen_memory_map_t map; 981 982 DBG_MSG("Entered init_mem_alloc()\n"); 983 984 /* 985 * Free memory follows the stack. There's at least 512KB of scratch 986 * space, rounded up to at least 2Mb alignment. That should be enough 987 * for the page tables we'll need to build. The nucleus memory is 988 * allocated last and will be outside the addressible range. We'll 989 * switch to new page tables before we unpack the kernel 990 */ 991 scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE); 992 DBG(scratch_start); 993 scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG); 994 DBG(scratch_end); 995 996 /* 997 * For paranoia, leave some space between hypervisor data and ours. 998 * Use 500 instead of 512. 999 */ 1000 next_avail_addr = scratch_end - 500 * 1024; 1001 DBG(next_avail_addr); 1002 1003 /* 1004 * The domain builder gives us at most 1 module 1005 */ 1006 DBG(xen_info->mod_len); 1007 if (xen_info->mod_len > 0) { 1008 DBG(xen_info->mod_start); 1009 modules[0].bm_addr = 1010 (native_ptr_t)(uintptr_t)xen_info->mod_start; 1011 modules[0].bm_size = xen_info->mod_len; 1012 bi->bi_module_cnt = 1; 1013 bi->bi_modules = (native_ptr_t)(uintptr_t)modules; 1014 } else { 1015 bi->bi_module_cnt = 0; 1016 bi->bi_modules = (native_ptr_t)(uintptr_t)NULL; 1017 } 1018 DBG(bi->bi_module_cnt); 1019 DBG(bi->bi_modules); 1020 1021 DBG(xen_info->mfn_list); 1022 DBG(xen_info->nr_pages); 1023 max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT; 1024 DBG(max_mem); 1025 1026 /* 1027 * Using pseudo-physical addresses, so only 1 memlist element 1028 */ 1029 memlists[0].addr = 0; 1030 DBG(memlists[0].addr); 1031 memlists[0].size = max_mem; 1032 DBG(memlists[0].size); 1033 memlists_used = 1; 1034 DBG(memlists_used); 1035 1036 /* 1037 * finish building physinstall list 1038 */ 1039 sort_physinstall(); 1040 1041 /* 1042 * build bios reserved memlists 1043 */ 1044 build_rsvdmemlists(); 1045 1046 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 1047 /* 1048 * build PCI Memory list 1049 */ 1050 map.nr_entries = MAXMAPS; 1051 /*LINTED: constant in conditional context*/ 1052 set_xen_guest_handle(map.buffer, map_buffer); 1053 if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0) 1054 dboot_panic("getting XENMEM_machine_memory_map failed"); 1055 build_pcimemlists(); 1056 } 1057 } 1058 1059 #else /* !__xpv */ 1060 1061 static void 1062 dboot_multiboot1_xboot_consinfo(void) 1063 { 1064 fb->framebuffer = 0; 1065 } 1066 1067 static void 1068 dboot_multiboot2_xboot_consinfo(void) 1069 { 1070 multiboot_tag_framebuffer_t *fbtag; 1071 fbtag = dboot_multiboot2_find_tag(mb2_info, 1072 MULTIBOOT_TAG_TYPE_FRAMEBUFFER); 1073 fb->framebuffer = (uint64_t)(uintptr_t)fbtag; 1074 } 1075 1076 static int 1077 dboot_multiboot_modcount(void) 1078 { 1079 switch (multiboot_version) { 1080 case 1: 1081 return (mb_info->mods_count); 1082 1083 case 2: 1084 return (dboot_multiboot2_modcount(mb2_info)); 1085 1086 default: 1087 dboot_panic("Unknown multiboot version: %d\n", 1088 multiboot_version); 1089 break; 1090 } 1091 return (0); 1092 } 1093 1094 static uint32_t 1095 dboot_multiboot_modstart(int index) 1096 { 1097 switch (multiboot_version) { 1098 case 1: 1099 return (((mb_module_t *)mb_info->mods_addr)[index].mod_start); 1100 1101 case 2: 1102 return (dboot_multiboot2_modstart(mb2_info, index)); 1103 1104 default: 1105 dboot_panic("Unknown multiboot version: %d\n", 1106 multiboot_version); 1107 break; 1108 } 1109 return (0); 1110 } 1111 1112 static uint32_t 1113 dboot_multiboot_modend(int index) 1114 { 1115 switch (multiboot_version) { 1116 case 1: 1117 return (((mb_module_t *)mb_info->mods_addr)[index].mod_end); 1118 1119 case 2: 1120 return (dboot_multiboot2_modend(mb2_info, index)); 1121 1122 default: 1123 dboot_panic("Unknown multiboot version: %d\n", 1124 multiboot_version); 1125 break; 1126 } 1127 return (0); 1128 } 1129 1130 static char * 1131 dboot_multiboot_modcmdline(int index) 1132 { 1133 switch (multiboot_version) { 1134 case 1: 1135 return ((char *)((mb_module_t *) 1136 mb_info->mods_addr)[index].mod_name); 1137 1138 case 2: 1139 return (dboot_multiboot2_modcmdline(mb2_info, index)); 1140 1141 default: 1142 dboot_panic("Unknown multiboot version: %d\n", 1143 multiboot_version); 1144 break; 1145 } 1146 return (0); 1147 } 1148 1149 /* 1150 * Find the modules used by console setup. 1151 * Since we need the console to print early boot messages, the console is set up 1152 * before anything else and therefore we need to pick up the needed modules. 1153 * 1154 * Note, we just will search for and if found, will pass the modules 1155 * to console setup, the proper module list processing will happen later. 1156 * Currently used modules are boot environment and console font. 1157 */ 1158 static void 1159 dboot_find_console_modules(void) 1160 { 1161 int i, modcount; 1162 uint32_t mod_start, mod_end; 1163 char *cmdline; 1164 1165 modcount = dboot_multiboot_modcount(); 1166 bi->bi_module_cnt = 0; 1167 for (i = 0; i < modcount; ++i) { 1168 cmdline = dboot_multiboot_modcmdline(i); 1169 if (cmdline == NULL) 1170 continue; 1171 1172 if (strstr(cmdline, "type=console-font") != NULL) 1173 modules[bi->bi_module_cnt].bm_type = BMT_FONT; 1174 else if (strstr(cmdline, "type=environment") != NULL) 1175 modules[bi->bi_module_cnt].bm_type = BMT_ENV; 1176 else 1177 continue; 1178 1179 mod_start = dboot_multiboot_modstart(i); 1180 mod_end = dboot_multiboot_modend(i); 1181 modules[bi->bi_module_cnt].bm_addr = 1182 (native_ptr_t)(uintptr_t)mod_start; 1183 modules[bi->bi_module_cnt].bm_size = mod_end - mod_start; 1184 modules[bi->bi_module_cnt].bm_name = 1185 (native_ptr_t)(uintptr_t)NULL; 1186 modules[bi->bi_module_cnt].bm_hash = 1187 (native_ptr_t)(uintptr_t)NULL; 1188 bi->bi_module_cnt++; 1189 } 1190 if (bi->bi_module_cnt != 0) 1191 bi->bi_modules = (native_ptr_t)(uintptr_t)modules; 1192 } 1193 1194 static boolean_t 1195 dboot_multiboot_basicmeminfo(uint32_t *lower, uint32_t *upper) 1196 { 1197 boolean_t rv = B_FALSE; 1198 1199 switch (multiboot_version) { 1200 case 1: 1201 if (mb_info->flags & 0x01) { 1202 *lower = mb_info->mem_lower; 1203 *upper = mb_info->mem_upper; 1204 rv = B_TRUE; 1205 } 1206 break; 1207 1208 case 2: 1209 return (dboot_multiboot2_basicmeminfo(mb2_info, lower, upper)); 1210 1211 default: 1212 dboot_panic("Unknown multiboot version: %d\n", 1213 multiboot_version); 1214 break; 1215 } 1216 return (rv); 1217 } 1218 1219 static uint8_t 1220 dboot_a2h(char v) 1221 { 1222 if (v >= 'a') 1223 return (v - 'a' + 0xa); 1224 else if (v >= 'A') 1225 return (v - 'A' + 0xa); 1226 else if (v >= '0') 1227 return (v - '0'); 1228 else 1229 dboot_panic("bad ASCII hex character %c\n", v); 1230 1231 return (0); 1232 } 1233 1234 static void 1235 digest_a2h(const char *ascii, uint8_t *digest) 1236 { 1237 unsigned int i; 1238 1239 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) { 1240 digest[i] = dboot_a2h(ascii[i * 2]) << 4; 1241 digest[i] |= dboot_a2h(ascii[i * 2 + 1]); 1242 } 1243 } 1244 1245 /* 1246 * Generate a SHA-1 hash of the first len bytes of image, and compare it with 1247 * the ASCII-format hash found in the 40-byte buffer at ascii. If they 1248 * match, return 0, otherwise -1. This works only for images smaller than 1249 * 4 GB, which should not be a problem. 1250 */ 1251 static int 1252 check_image_hash(uint_t midx) 1253 { 1254 const char *ascii; 1255 const void *image; 1256 size_t len; 1257 SHA1_CTX ctx; 1258 uint8_t digest[SHA1_DIGEST_LENGTH]; 1259 uint8_t baseline[SHA1_DIGEST_LENGTH]; 1260 unsigned int i; 1261 1262 ascii = (const char *)(uintptr_t)modules[midx].bm_hash; 1263 image = (const void *)(uintptr_t)modules[midx].bm_addr; 1264 len = (size_t)modules[midx].bm_size; 1265 1266 digest_a2h(ascii, baseline); 1267 1268 SHA1Init(&ctx); 1269 SHA1Update(&ctx, image, len); 1270 SHA1Final(digest, &ctx); 1271 1272 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) { 1273 if (digest[i] != baseline[i]) 1274 return (-1); 1275 } 1276 1277 return (0); 1278 } 1279 1280 static const char * 1281 type_to_str(boot_module_type_t type) 1282 { 1283 switch (type) { 1284 case BMT_ROOTFS: 1285 return ("rootfs"); 1286 case BMT_FILE: 1287 return ("file"); 1288 case BMT_HASH: 1289 return ("hash"); 1290 case BMT_ENV: 1291 return ("environment"); 1292 case BMT_FONT: 1293 return ("console-font"); 1294 default: 1295 return ("unknown"); 1296 } 1297 } 1298 1299 static void 1300 check_images(void) 1301 { 1302 uint_t i; 1303 char displayhash[SHA1_ASCII_LENGTH + 1]; 1304 1305 for (i = 0; i < modules_used; i++) { 1306 if (prom_debug) { 1307 dboot_printf("module #%d: name %s type %s " 1308 "addr %lx size %lx\n", 1309 i, (char *)(uintptr_t)modules[i].bm_name, 1310 type_to_str(modules[i].bm_type), 1311 (ulong_t)modules[i].bm_addr, 1312 (ulong_t)modules[i].bm_size); 1313 } 1314 1315 if (modules[i].bm_type == BMT_HASH || 1316 modules[i].bm_hash == (native_ptr_t)(uintptr_t)NULL) { 1317 DBG_MSG("module has no hash; skipping check\n"); 1318 continue; 1319 } 1320 (void) memcpy(displayhash, 1321 (void *)(uintptr_t)modules[i].bm_hash, 1322 SHA1_ASCII_LENGTH); 1323 displayhash[SHA1_ASCII_LENGTH] = '\0'; 1324 if (prom_debug) { 1325 dboot_printf("checking expected hash [%s]: ", 1326 displayhash); 1327 } 1328 1329 if (check_image_hash(i) != 0) 1330 dboot_panic("hash mismatch!\n"); 1331 else 1332 DBG_MSG("OK\n"); 1333 } 1334 } 1335 1336 /* 1337 * Determine the module's starting address, size, name, and type, and fill the 1338 * boot_modules structure. This structure is used by the bop code, except for 1339 * hashes which are checked prior to transferring control to the kernel. 1340 */ 1341 static void 1342 process_module(int midx) 1343 { 1344 uint32_t mod_start = dboot_multiboot_modstart(midx); 1345 uint32_t mod_end = dboot_multiboot_modend(midx); 1346 char *cmdline = dboot_multiboot_modcmdline(midx); 1347 char *p, *q; 1348 1349 check_higher(mod_end); 1350 if (prom_debug) { 1351 dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n", 1352 midx, cmdline, (ulong_t)mod_start, (ulong_t)mod_end); 1353 } 1354 1355 if (mod_start > mod_end) { 1356 dboot_panic("module #%d: module start address 0x%lx greater " 1357 "than end address 0x%lx", midx, 1358 (ulong_t)mod_start, (ulong_t)mod_end); 1359 } 1360 1361 /* 1362 * A brief note on lengths and sizes: GRUB, for reasons unknown, passes 1363 * the address of the last valid byte in a module plus 1 as mod_end. 1364 * This is of course a bug; the multiboot specification simply states 1365 * that mod_start and mod_end "contain the start and end addresses of 1366 * the boot module itself" which is pretty obviously not what GRUB is 1367 * doing. However, fixing it requires that not only this code be 1368 * changed but also that other code consuming this value and values 1369 * derived from it be fixed, and that the kernel and GRUB must either 1370 * both have the bug or neither. While there are a lot of combinations 1371 * that will work, there are also some that won't, so for simplicity 1372 * we'll just cope with the bug. That means we won't actually hash the 1373 * byte at mod_end, and we will expect that mod_end for the hash file 1374 * itself is one greater than some multiple of 41 (40 bytes of ASCII 1375 * hash plus a newline for each module). We set bm_size to the true 1376 * correct number of bytes in each module, achieving exactly this. 1377 */ 1378 1379 modules[midx].bm_addr = (native_ptr_t)(uintptr_t)mod_start; 1380 modules[midx].bm_size = mod_end - mod_start; 1381 modules[midx].bm_name = (native_ptr_t)(uintptr_t)cmdline; 1382 modules[midx].bm_hash = (native_ptr_t)(uintptr_t)NULL; 1383 modules[midx].bm_type = BMT_FILE; 1384 1385 if (cmdline == NULL) { 1386 modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname; 1387 return; 1388 } 1389 1390 p = cmdline; 1391 modules[midx].bm_name = 1392 (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r"); 1393 1394 while (p != NULL) { 1395 q = strsep(&p, " \t\f\n\r"); 1396 if (strncmp(q, "name=", 5) == 0) { 1397 if (q[5] != '\0' && !isspace(q[5])) { 1398 modules[midx].bm_name = 1399 (native_ptr_t)(uintptr_t)(q + 5); 1400 } 1401 continue; 1402 } 1403 1404 if (strncmp(q, "type=", 5) == 0) { 1405 if (q[5] == '\0' || isspace(q[5])) 1406 continue; 1407 q += 5; 1408 if (strcmp(q, "rootfs") == 0) { 1409 modules[midx].bm_type = BMT_ROOTFS; 1410 } else if (strcmp(q, "hash") == 0) { 1411 modules[midx].bm_type = BMT_HASH; 1412 } else if (strcmp(q, "environment") == 0) { 1413 modules[midx].bm_type = BMT_ENV; 1414 } else if (strcmp(q, "console-font") == 0) { 1415 modules[midx].bm_type = BMT_FONT; 1416 } else if (strcmp(q, "file") != 0) { 1417 dboot_printf("\tmodule #%d: unknown module " 1418 "type '%s'; defaulting to 'file'\n", 1419 midx, q); 1420 } 1421 continue; 1422 } 1423 1424 if (strncmp(q, "hash=", 5) == 0) { 1425 if (q[5] != '\0' && !isspace(q[5])) { 1426 modules[midx].bm_hash = 1427 (native_ptr_t)(uintptr_t)(q + 5); 1428 } 1429 continue; 1430 } 1431 1432 dboot_printf("ignoring unknown option '%s'\n", q); 1433 } 1434 } 1435 1436 /* 1437 * Backward compatibility: if there are exactly one or two modules, both 1438 * of type 'file' and neither with an embedded hash value, we have been 1439 * given the legacy style modules. In this case we need to treat the first 1440 * module as a rootfs and the second as a hash referencing that module. 1441 * Otherwise, even if the configuration is invalid, we assume that the 1442 * operator knows what he's doing or at least isn't being bitten by this 1443 * interface change. 1444 */ 1445 static void 1446 fixup_modules(void) 1447 { 1448 if (modules_used == 0 || modules_used > 2) 1449 return; 1450 1451 if (modules[0].bm_type != BMT_FILE || 1452 (modules_used > 1 && modules[1].bm_type != BMT_FILE)) { 1453 return; 1454 } 1455 1456 if (modules[0].bm_hash != (native_ptr_t)(uintptr_t)NULL || 1457 (modules_used > 1 && 1458 modules[1].bm_hash != (native_ptr_t)(uintptr_t)NULL)) { 1459 return; 1460 } 1461 1462 modules[0].bm_type = BMT_ROOTFS; 1463 if (modules_used > 1) { 1464 modules[1].bm_type = BMT_HASH; 1465 modules[1].bm_name = modules[0].bm_name; 1466 } 1467 } 1468 1469 /* 1470 * For modules that do not have assigned hashes but have a separate hash module, 1471 * find the assigned hash module and set the primary module's bm_hash to point 1472 * to the hash data from that module. We will then ignore modules of type 1473 * BMT_HASH from this point forward. 1474 */ 1475 static void 1476 assign_module_hashes(void) 1477 { 1478 uint_t i, j; 1479 1480 for (i = 0; i < modules_used; i++) { 1481 if (modules[i].bm_type == BMT_HASH || 1482 modules[i].bm_hash != (native_ptr_t)(uintptr_t)NULL) { 1483 continue; 1484 } 1485 1486 for (j = 0; j < modules_used; j++) { 1487 if (modules[j].bm_type != BMT_HASH || 1488 strcmp((char *)(uintptr_t)modules[j].bm_name, 1489 (char *)(uintptr_t)modules[i].bm_name) != 0) { 1490 continue; 1491 } 1492 1493 if (modules[j].bm_size < SHA1_ASCII_LENGTH) { 1494 dboot_printf("Short hash module of length " 1495 "0x%lx bytes; ignoring\n", 1496 (ulong_t)modules[j].bm_size); 1497 } else { 1498 modules[i].bm_hash = modules[j].bm_addr; 1499 } 1500 break; 1501 } 1502 } 1503 } 1504 1505 /* 1506 * Walk through the module information finding the last used address. 1507 * The first available address will become the top level page table. 1508 */ 1509 static void 1510 dboot_process_modules(void) 1511 { 1512 int i, modcount; 1513 extern char _end[]; 1514 1515 DBG_MSG("\nFinding Modules\n"); 1516 modcount = dboot_multiboot_modcount(); 1517 if (modcount > MAX_BOOT_MODULES) { 1518 dboot_panic("Too many modules (%d) -- the maximum is %d.", 1519 modcount, MAX_BOOT_MODULES); 1520 } 1521 /* 1522 * search the modules to find the last used address 1523 * we'll build the module list while we're walking through here 1524 */ 1525 check_higher((paddr_t)(uintptr_t)&_end); 1526 for (i = 0; i < modcount; ++i) { 1527 process_module(i); 1528 modules_used++; 1529 } 1530 bi->bi_modules = (native_ptr_t)(uintptr_t)modules; 1531 DBG(bi->bi_modules); 1532 bi->bi_module_cnt = modcount; 1533 DBG(bi->bi_module_cnt); 1534 1535 fixup_modules(); 1536 assign_module_hashes(); 1537 check_images(); 1538 } 1539 1540 /* 1541 * We then build the phys_install memlist from the multiboot information. 1542 */ 1543 static void 1544 dboot_process_mmap(void) 1545 { 1546 uint64_t start; 1547 uint64_t end; 1548 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 1549 uint32_t lower, upper, type, t; 1550 int i, mmap_entries; 1551 1552 /* 1553 * Walk through the memory map from multiboot and build our memlist 1554 * structures. Note these will have native format pointers. 1555 */ 1556 DBG_MSG("\nFinding Memory Map\n"); 1557 num_entries = 0; 1558 num_entries_set = B_FALSE; 1559 max_mem = 0; 1560 t = 0; 1561 if ((mmap_entries = dboot_loader_mmap_entries()) > 0) { 1562 struct boot_memlist *mlist; 1563 uint_t *indexp; 1564 1565 for (i = 0; i < mmap_entries; i++) { 1566 start = dboot_loader_mmap_get_base(i); 1567 end = start + dboot_loader_mmap_get_length(i); 1568 type = dboot_loader_mmap_get_type(i); 1569 1570 if (prom_debug) 1571 dboot_printf("\ttype: %u %" PRIx64 "..%" 1572 PRIx64 "\n", type, start, end); 1573 1574 /* 1575 * page align start and end 1576 */ 1577 start = (start + page_offset) & ~page_offset; 1578 end &= ~page_offset; 1579 if (end <= start) 1580 continue; 1581 1582 /* 1583 * only type 1 is usable RAM 1584 */ 1585 switch (type) { 1586 case 1: 1587 if (end > max_mem) 1588 max_mem = end; 1589 mlist = memlists; 1590 indexp = &memlists_used; 1591 break; 1592 case 2: 1593 mlist = rsvdmemlists; 1594 indexp = &rsvdmemlists_used; 1595 break; 1596 default: 1597 continue; 1598 } 1599 1600 if (memlists_used > MAX_MEMLIST) 1601 dboot_panic("too many memlists"); 1602 if (rsvdmemlists_used > MAX_MEMLIST) 1603 dboot_panic("too many rsvdmemlists"); 1604 1605 if (mlist[*indexp].size != 0 && 1606 type == t && 1607 (mlist[*indexp].addr + 1608 mlist[*indexp].size) == start) { 1609 mlist[*indexp].size = 1610 end - mlist[*indexp].addr; 1611 continue; 1612 } 1613 /* do we need new entry? */ 1614 if (mlist[*indexp].size != 0) { 1615 *indexp = *indexp + 1; 1616 if (*indexp > MAX_MEMLIST) 1617 continue; 1618 } 1619 1620 t = type; 1621 mlist[*indexp].addr = start; 1622 mlist[*indexp].size = end - start; 1623 } 1624 1625 if (memlists[memlists_used].size != 0) { 1626 memlists_used++; 1627 } 1628 if (rsvdmemlists[rsvdmemlists_used].size != 0) { 1629 rsvdmemlists_used++; 1630 } 1631 1632 if (prom_debug) { 1633 for (i = 0; i < memlists_used; i++) { 1634 dboot_printf("memlists[%u] %" 1635 PRIx64 "..%" PRIx64 "\n", 1636 i, 1637 memlists[i].addr, 1638 memlists[i].size); 1639 } 1640 for (i = 0; i < rsvdmemlists_used; i++) { 1641 dboot_printf("rsvdmemlists[%u] %" 1642 PRIx64 "..%" PRIx64 "\n", 1643 i, 1644 rsvdmemlists[i].addr, 1645 rsvdmemlists[i].size); 1646 } 1647 } 1648 1649 build_pcimemlists(); 1650 } else if (dboot_multiboot_basicmeminfo(&lower, &upper)) { 1651 DBG(lower); 1652 memlists[memlists_used].addr = 0; 1653 memlists[memlists_used].size = lower * 1024; 1654 ++memlists_used; 1655 DBG(upper); 1656 memlists[memlists_used].addr = 1024 * 1024; 1657 memlists[memlists_used].size = upper * 1024; 1658 ++memlists_used; 1659 1660 /* 1661 * Old platform - assume I/O space at the end of memory. 1662 */ 1663 pcimemlists[0].addr = (upper * 1024) + (1024 * 1024); 1664 pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr; 1665 pcimemlists[0].next = 0; 1666 pcimemlists[0].prev = 0; 1667 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists; 1668 DBG(bi->bi_pcimem); 1669 } else { 1670 dboot_panic("No memory info from boot loader!!!"); 1671 } 1672 1673 /* 1674 * finish processing the physinstall list 1675 */ 1676 sort_physinstall(); 1677 1678 /* 1679 * build bios reserved mem lists 1680 */ 1681 build_rsvdmemlists(); 1682 } 1683 1684 /* 1685 * The highest address is used as the starting point for dboot's simple 1686 * memory allocator. 1687 * 1688 * Finding the highest address in case of Multiboot 1 protocol is 1689 * quite painful in the sense that some information provided by 1690 * the multiboot info structure points to BIOS data, and some to RAM. 1691 * 1692 * The module list was processed and checked already by dboot_process_modules(), 1693 * so we will check the command line string and the memory map. 1694 * 1695 * This list of to be checked items is based on our current knowledge of 1696 * allocations made by grub1 and will need to be reviewed if there 1697 * are updates about the information provided by Multiboot 1. 1698 * 1699 * In the case of the Multiboot 2, our life is much simpler, as the MB2 1700 * information tag list is one contiguous chunk of memory. 1701 */ 1702 static paddr_t 1703 dboot_multiboot1_highest_addr(void) 1704 { 1705 paddr_t addr = (paddr_t)(uintptr_t)NULL; 1706 char *cmdl = (char *)mb_info->cmdline; 1707 1708 if (mb_info->flags & MB_INFO_CMDLINE) 1709 addr = ((paddr_t)((uintptr_t)cmdl + strlen(cmdl) + 1)); 1710 1711 if (mb_info->flags & MB_INFO_MEM_MAP) 1712 addr = MAX(addr, 1713 ((paddr_t)(mb_info->mmap_addr + mb_info->mmap_length))); 1714 return (addr); 1715 } 1716 1717 static void 1718 dboot_multiboot_highest_addr(void) 1719 { 1720 paddr_t addr; 1721 1722 switch (multiboot_version) { 1723 case 1: 1724 addr = dboot_multiboot1_highest_addr(); 1725 if (addr != (paddr_t)(uintptr_t)NULL) 1726 check_higher(addr); 1727 break; 1728 case 2: 1729 addr = dboot_multiboot2_highest_addr(mb2_info); 1730 if (addr != (paddr_t)(uintptr_t)NULL) 1731 check_higher(addr); 1732 break; 1733 default: 1734 dboot_panic("Unknown multiboot version: %d\n", 1735 multiboot_version); 1736 break; 1737 } 1738 } 1739 1740 /* 1741 * Walk the boot loader provided information and find the highest free address. 1742 */ 1743 static void 1744 init_mem_alloc(void) 1745 { 1746 DBG_MSG("Entered init_mem_alloc()\n"); 1747 dboot_process_modules(); 1748 dboot_process_mmap(); 1749 dboot_multiboot_highest_addr(); 1750 } 1751 1752 static int 1753 dboot_same_guids(efi_guid_t *g1, efi_guid_t *g2) 1754 { 1755 int i; 1756 1757 if (g1->time_low != g2->time_low) 1758 return (0); 1759 if (g1->time_mid != g2->time_mid) 1760 return (0); 1761 if (g1->time_hi_and_version != g2->time_hi_and_version) 1762 return (0); 1763 if (g1->clock_seq_hi_and_reserved != g2->clock_seq_hi_and_reserved) 1764 return (0); 1765 if (g1->clock_seq_low != g2->clock_seq_low) 1766 return (0); 1767 1768 for (i = 0; i < 6; i++) { 1769 if (g1->node_addr[i] != g2->node_addr[i]) 1770 return (0); 1771 } 1772 return (1); 1773 } 1774 1775 static void 1776 process_efi32(EFI_SYSTEM_TABLE32 *efi) 1777 { 1778 uint32_t entries; 1779 EFI_CONFIGURATION_TABLE32 *config; 1780 efi_guid_t VendorGuid; 1781 int i; 1782 1783 entries = efi->NumberOfTableEntries; 1784 config = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t) 1785 efi->ConfigurationTable; 1786 1787 for (i = 0; i < entries; i++) { 1788 (void) memcpy(&VendorGuid, &config[i].VendorGuid, 1789 sizeof (VendorGuid)); 1790 if (dboot_same_guids(&VendorGuid, &smbios3)) { 1791 bi->bi_smbios = (native_ptr_t)(uintptr_t) 1792 config[i].VendorTable; 1793 } 1794 if (bi->bi_smbios == 0 && 1795 dboot_same_guids(&VendorGuid, &smbios)) { 1796 bi->bi_smbios = (native_ptr_t)(uintptr_t) 1797 config[i].VendorTable; 1798 } 1799 if (dboot_same_guids(&VendorGuid, &acpi2)) { 1800 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t) 1801 config[i].VendorTable; 1802 } 1803 if (bi->bi_acpi_rsdp == 0 && 1804 dboot_same_guids(&VendorGuid, &acpi1)) { 1805 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t) 1806 config[i].VendorTable; 1807 } 1808 } 1809 } 1810 1811 static void 1812 process_efi64(EFI_SYSTEM_TABLE64 *efi) 1813 { 1814 uint64_t entries; 1815 EFI_CONFIGURATION_TABLE64 *config; 1816 efi_guid_t VendorGuid; 1817 int i; 1818 1819 entries = efi->NumberOfTableEntries; 1820 config = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t) 1821 efi->ConfigurationTable; 1822 1823 for (i = 0; i < entries; i++) { 1824 (void) memcpy(&VendorGuid, &config[i].VendorGuid, 1825 sizeof (VendorGuid)); 1826 if (dboot_same_guids(&VendorGuid, &smbios3)) { 1827 bi->bi_smbios = (native_ptr_t)(uintptr_t) 1828 config[i].VendorTable; 1829 } 1830 if (bi->bi_smbios == 0 && 1831 dboot_same_guids(&VendorGuid, &smbios)) { 1832 bi->bi_smbios = (native_ptr_t)(uintptr_t) 1833 config[i].VendorTable; 1834 } 1835 /* Prefer acpi v2+ over v1. */ 1836 if (dboot_same_guids(&VendorGuid, &acpi2)) { 1837 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t) 1838 config[i].VendorTable; 1839 } 1840 if (bi->bi_acpi_rsdp == 0 && 1841 dboot_same_guids(&VendorGuid, &acpi1)) { 1842 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t) 1843 config[i].VendorTable; 1844 } 1845 } 1846 } 1847 1848 static void 1849 dboot_multiboot_get_fwtables(void) 1850 { 1851 multiboot_tag_new_acpi_t *nacpitagp; 1852 multiboot_tag_old_acpi_t *oacpitagp; 1853 multiboot_tag_efi64_t *efi64tagp = NULL; 1854 multiboot_tag_efi32_t *efi32tagp = NULL; 1855 1856 /* no fw tables from multiboot 1 */ 1857 if (multiboot_version != 2) 1858 return; 1859 1860 efi64tagp = (multiboot_tag_efi64_t *) 1861 dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_EFI64); 1862 if (efi64tagp != NULL) { 1863 bi->bi_uefi_arch = XBI_UEFI_ARCH_64; 1864 bi->bi_uefi_systab = (native_ptr_t)(uintptr_t) 1865 efi64tagp->mb_pointer; 1866 process_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t) 1867 efi64tagp->mb_pointer); 1868 } else { 1869 efi32tagp = (multiboot_tag_efi32_t *) 1870 dboot_multiboot2_find_tag(mb2_info, 1871 MULTIBOOT_TAG_TYPE_EFI32); 1872 if (efi32tagp != NULL) { 1873 bi->bi_uefi_arch = XBI_UEFI_ARCH_32; 1874 bi->bi_uefi_systab = (native_ptr_t)(uintptr_t) 1875 efi32tagp->mb_pointer; 1876 process_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t) 1877 efi32tagp->mb_pointer); 1878 } 1879 } 1880 1881 /* 1882 * The multiboot2 info contains a copy of the RSDP; stash a pointer to 1883 * it (see find_rsdp() in fakebop). 1884 */ 1885 nacpitagp = (multiboot_tag_new_acpi_t *) 1886 dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_ACPI_NEW); 1887 oacpitagp = (multiboot_tag_old_acpi_t *) 1888 dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_ACPI_OLD); 1889 1890 if (nacpitagp != NULL) { 1891 bi->bi_acpi_rsdp_copy = (native_ptr_t)(uintptr_t) 1892 &nacpitagp->mb_rsdp[0]; 1893 } else if (oacpitagp != NULL) { 1894 bi->bi_acpi_rsdp_copy = (native_ptr_t)(uintptr_t) 1895 &oacpitagp->mb_rsdp[0]; 1896 } 1897 } 1898 1899 /* print out EFI version string with newline */ 1900 static void 1901 dboot_print_efi_version(uint32_t ver) 1902 { 1903 int rev; 1904 1905 dboot_printf("%d.", EFI_REV_MAJOR(ver)); 1906 1907 rev = EFI_REV_MINOR(ver); 1908 if ((rev % 10) != 0) { 1909 dboot_printf("%d.%d\n", rev / 10, rev % 10); 1910 } else { 1911 dboot_printf("%d\n", rev / 10); 1912 } 1913 } 1914 1915 static void 1916 print_efi32(EFI_SYSTEM_TABLE32 *efi) 1917 { 1918 uint16_t *data; 1919 EFI_CONFIGURATION_TABLE32 *conf; 1920 int i; 1921 1922 dboot_printf("EFI32 signature: %llx\n", 1923 (unsigned long long)efi->Hdr.Signature); 1924 dboot_printf("EFI system version: "); 1925 dboot_print_efi_version(efi->Hdr.Revision); 1926 dboot_printf("EFI system vendor: "); 1927 data = (uint16_t *)(uintptr_t)efi->FirmwareVendor; 1928 for (i = 0; data[i] != 0; i++) 1929 dboot_printf("%c", (char)data[i]); 1930 dboot_printf("\nEFI firmware revision: "); 1931 dboot_print_efi_version(efi->FirmwareRevision); 1932 dboot_printf("EFI system table number of entries: %d\n", 1933 efi->NumberOfTableEntries); 1934 conf = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t) 1935 efi->ConfigurationTable; 1936 for (i = 0; i < (int)efi->NumberOfTableEntries; i++) { 1937 dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i, 1938 conf[i].VendorGuid.time_low, 1939 conf[i].VendorGuid.time_mid, 1940 conf[i].VendorGuid.time_hi_and_version, 1941 conf[i].VendorGuid.clock_seq_hi_and_reserved, 1942 conf[i].VendorGuid.clock_seq_low); 1943 dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", 1944 conf[i].VendorGuid.node_addr[0], 1945 conf[i].VendorGuid.node_addr[1], 1946 conf[i].VendorGuid.node_addr[2], 1947 conf[i].VendorGuid.node_addr[3], 1948 conf[i].VendorGuid.node_addr[4], 1949 conf[i].VendorGuid.node_addr[5]); 1950 } 1951 } 1952 1953 static void 1954 print_efi64(EFI_SYSTEM_TABLE64 *efi) 1955 { 1956 uint16_t *data; 1957 EFI_CONFIGURATION_TABLE64 *conf; 1958 int i; 1959 1960 dboot_printf("EFI64 signature: %llx\n", 1961 (unsigned long long)efi->Hdr.Signature); 1962 dboot_printf("EFI system version: "); 1963 dboot_print_efi_version(efi->Hdr.Revision); 1964 dboot_printf("EFI system vendor: "); 1965 data = (uint16_t *)(uintptr_t)efi->FirmwareVendor; 1966 for (i = 0; data[i] != 0; i++) 1967 dboot_printf("%c", (char)data[i]); 1968 dboot_printf("\nEFI firmware revision: "); 1969 dboot_print_efi_version(efi->FirmwareRevision); 1970 dboot_printf("EFI system table number of entries: %" PRIu64 "\n", 1971 efi->NumberOfTableEntries); 1972 conf = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t) 1973 efi->ConfigurationTable; 1974 for (i = 0; i < (int)efi->NumberOfTableEntries; i++) { 1975 dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i, 1976 conf[i].VendorGuid.time_low, 1977 conf[i].VendorGuid.time_mid, 1978 conf[i].VendorGuid.time_hi_and_version, 1979 conf[i].VendorGuid.clock_seq_hi_and_reserved, 1980 conf[i].VendorGuid.clock_seq_low); 1981 dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", 1982 conf[i].VendorGuid.node_addr[0], 1983 conf[i].VendorGuid.node_addr[1], 1984 conf[i].VendorGuid.node_addr[2], 1985 conf[i].VendorGuid.node_addr[3], 1986 conf[i].VendorGuid.node_addr[4], 1987 conf[i].VendorGuid.node_addr[5]); 1988 } 1989 } 1990 #endif /* !__xpv */ 1991 1992 /* 1993 * Simple memory allocator, allocates aligned physical memory. 1994 * Note that startup_kernel() only allocates memory, never frees. 1995 * Memory usage just grows in an upward direction. 1996 */ 1997 static void * 1998 do_mem_alloc(uint32_t size, uint32_t align) 1999 { 2000 uint_t i; 2001 uint64_t best; 2002 uint64_t start; 2003 uint64_t end; 2004 2005 /* 2006 * make sure size is a multiple of pagesize 2007 */ 2008 size = RNDUP(size, MMU_PAGESIZE); 2009 next_avail_addr = RNDUP(next_avail_addr, align); 2010 2011 /* 2012 * XXPV fixme joe 2013 * 2014 * a really large bootarchive that causes you to run out of memory 2015 * may cause this to blow up 2016 */ 2017 /* LINTED E_UNEXPECTED_UINT_PROMOTION */ 2018 best = (uint64_t)-size; 2019 for (i = 0; i < memlists_used; ++i) { 2020 start = memlists[i].addr; 2021 #if defined(__xpv) 2022 start += mfn_base; 2023 #endif 2024 end = start + memlists[i].size; 2025 2026 /* 2027 * did we find the desired address? 2028 */ 2029 if (start <= next_avail_addr && next_avail_addr + size <= end) { 2030 best = next_avail_addr; 2031 goto done; 2032 } 2033 2034 /* 2035 * if not is this address the best so far? 2036 */ 2037 if (start > next_avail_addr && start < best && 2038 RNDUP(start, align) + size <= end) 2039 best = RNDUP(start, align); 2040 } 2041 2042 /* 2043 * We didn't find exactly the address we wanted, due to going off the 2044 * end of a memory region. Return the best found memory address. 2045 */ 2046 done: 2047 next_avail_addr = best + size; 2048 #if defined(__xpv) 2049 if (next_avail_addr > scratch_end) 2050 dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: " 2051 "0x%lx", (ulong_t)next_avail_addr, 2052 (ulong_t)scratch_end); 2053 #endif 2054 (void) memset((void *)(uintptr_t)best, 0, size); 2055 return ((void *)(uintptr_t)best); 2056 } 2057 2058 void * 2059 mem_alloc(uint32_t size) 2060 { 2061 return (do_mem_alloc(size, MMU_PAGESIZE)); 2062 } 2063 2064 2065 /* 2066 * Build page tables to map all of memory used so far as well as the kernel. 2067 */ 2068 static void 2069 build_page_tables(void) 2070 { 2071 uint32_t psize; 2072 uint32_t level; 2073 uint32_t off; 2074 uint64_t start; 2075 #if !defined(__xpv) 2076 uint32_t i; 2077 uint64_t end; 2078 #endif /* __xpv */ 2079 2080 /* 2081 * If we're on metal, we need to create the top level pagetable. 2082 */ 2083 #if defined(__xpv) 2084 top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base; 2085 #else /* __xpv */ 2086 top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 2087 #endif /* __xpv */ 2088 DBG((uintptr_t)top_page_table); 2089 2090 /* 2091 * Determine if we'll use large mappings for kernel, then map it. 2092 */ 2093 if (largepage_support) { 2094 psize = lpagesize; 2095 level = 1; 2096 } else { 2097 psize = MMU_PAGESIZE; 2098 level = 0; 2099 } 2100 2101 DBG_MSG("Mapping kernel\n"); 2102 DBG(ktext_phys); 2103 DBG(target_kernel_text); 2104 DBG(ksize); 2105 DBG(psize); 2106 for (off = 0; off < ksize; off += psize) 2107 map_pa_at_va(ktext_phys + off, target_kernel_text + off, level); 2108 2109 /* 2110 * The kernel will need a 1 page window to work with page tables 2111 */ 2112 bi->bi_pt_window = (native_ptr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 2113 DBG(bi->bi_pt_window); 2114 bi->bi_pte_to_pt_window = 2115 (native_ptr_t)(uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0); 2116 DBG(bi->bi_pte_to_pt_window); 2117 2118 #if defined(__xpv) 2119 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 2120 /* If this is a domU we're done. */ 2121 DBG_MSG("\nPage tables constructed\n"); 2122 return; 2123 } 2124 #endif /* __xpv */ 2125 2126 /* 2127 * We need 1:1 mappings for the lower 1M of memory to access 2128 * BIOS tables used by a couple of drivers during boot. 2129 * 2130 * The following code works because our simple memory allocator 2131 * only grows usage in an upwards direction. 2132 * 2133 * Note that by this point in boot some mappings for low memory 2134 * may already exist because we've already accessed device in low 2135 * memory. (Specifically the video frame buffer and keyboard 2136 * status ports.) If we're booting on raw hardware then GRUB 2137 * created these mappings for us. If we're booting under a 2138 * hypervisor then we went ahead and remapped these devices into 2139 * memory allocated within dboot itself. 2140 */ 2141 if (map_debug) 2142 dboot_printf("1:1 map pa=0..1Meg\n"); 2143 for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) { 2144 #if defined(__xpv) 2145 map_ma_at_va(start, start, 0); 2146 #else /* __xpv */ 2147 map_pa_at_va(start, start, 0); 2148 #endif /* __xpv */ 2149 } 2150 2151 #if !defined(__xpv) 2152 2153 for (i = 0; i < memlists_used; ++i) { 2154 start = memlists[i].addr; 2155 end = start + memlists[i].size; 2156 2157 if (map_debug) 2158 dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n", 2159 start, end); 2160 while (start < end && start < next_avail_addr) { 2161 map_pa_at_va(start, start, 0); 2162 start += MMU_PAGESIZE; 2163 } 2164 if (start >= next_avail_addr) 2165 break; 2166 } 2167 2168 /* 2169 * Map framebuffer memory as PT_NOCACHE as this is memory from a 2170 * device and therefore must not be cached. 2171 */ 2172 if (fb != NULL && fb->framebuffer != 0) { 2173 multiboot_tag_framebuffer_t *fb_tagp; 2174 fb_tagp = (multiboot_tag_framebuffer_t *)(uintptr_t) 2175 fb->framebuffer; 2176 2177 start = fb_tagp->framebuffer_common.framebuffer_addr; 2178 end = start + fb_tagp->framebuffer_common.framebuffer_height * 2179 fb_tagp->framebuffer_common.framebuffer_pitch; 2180 2181 if (map_debug) 2182 dboot_printf("FB 1:1 map pa=%" PRIx64 "..%" PRIx64 "\n", 2183 start, end); 2184 pte_bits |= PT_NOCACHE; 2185 if (PAT_support != 0) 2186 pte_bits |= PT_PAT_4K; 2187 2188 while (start < end) { 2189 map_pa_at_va(start, start, 0); 2190 start += MMU_PAGESIZE; 2191 } 2192 pte_bits &= ~PT_NOCACHE; 2193 if (PAT_support != 0) 2194 pte_bits &= ~PT_PAT_4K; 2195 } 2196 #endif /* !__xpv */ 2197 2198 DBG_MSG("\nPage tables constructed\n"); 2199 } 2200 2201 #define NO_MULTIBOOT \ 2202 "multiboot is no longer used to boot the Solaris Operating System.\n\ 2203 The grub entry should be changed to:\n\ 2204 kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\ 2205 module$ /platform/i86pc/$ISADIR/boot_archive\n\ 2206 See http://illumos.org/msg/SUNOS-8000-AK for details.\n" 2207 2208 static void 2209 dboot_init_xboot_consinfo(void) 2210 { 2211 bi = &boot_info; 2212 2213 #if !defined(__xpv) 2214 fb = &framebuffer; 2215 bi->bi_framebuffer = (native_ptr_t)(uintptr_t)fb; 2216 2217 switch (multiboot_version) { 2218 case 1: 2219 dboot_multiboot1_xboot_consinfo(); 2220 break; 2221 case 2: 2222 dboot_multiboot2_xboot_consinfo(); 2223 break; 2224 default: 2225 dboot_panic("Unknown multiboot version: %d\n", 2226 multiboot_version); 2227 break; 2228 } 2229 dboot_find_console_modules(); 2230 #endif 2231 } 2232 2233 /* 2234 * Set up basic data from the boot loader. 2235 * The load_addr is part of AOUT kludge setup in dboot_grub.s, to support 2236 * 32-bit dboot code setup used to set up and start 64-bit kernel. 2237 * AOUT kludge does allow 32-bit boot loader, such as grub1, to load and 2238 * start 64-bit illumos kernel. 2239 */ 2240 static void 2241 dboot_loader_init(void) 2242 { 2243 #if !defined(__xpv) 2244 mb_info = NULL; 2245 mb2_info = NULL; 2246 2247 switch (mb_magic) { 2248 case MB_BOOTLOADER_MAGIC: 2249 multiboot_version = 1; 2250 mb_info = (multiboot_info_t *)(uintptr_t)mb_addr; 2251 #if defined(_BOOT_TARGET_amd64) 2252 load_addr = mb_header.load_addr; 2253 #endif 2254 break; 2255 2256 case MULTIBOOT2_BOOTLOADER_MAGIC: 2257 multiboot_version = 2; 2258 mb2_info = (multiboot2_info_header_t *)(uintptr_t)mb_addr; 2259 #if defined(_BOOT_TARGET_amd64) 2260 load_addr = mb2_load_addr; 2261 #endif 2262 break; 2263 2264 default: 2265 dboot_panic("Unknown bootloader magic: 0x%x\n", mb_magic); 2266 break; 2267 } 2268 #endif /* !defined(__xpv) */ 2269 } 2270 2271 /* Extract the kernel command line from [multi]boot information. */ 2272 static char * 2273 dboot_loader_cmdline(void) 2274 { 2275 char *line = NULL; 2276 2277 #if defined(__xpv) 2278 line = (char *)xen_info->cmd_line; 2279 #else /* __xpv */ 2280 2281 switch (multiboot_version) { 2282 case 1: 2283 if (mb_info->flags & MB_INFO_CMDLINE) 2284 line = (char *)mb_info->cmdline; 2285 break; 2286 2287 case 2: 2288 line = dboot_multiboot2_cmdline(mb2_info); 2289 break; 2290 2291 default: 2292 dboot_panic("Unknown multiboot version: %d\n", 2293 multiboot_version); 2294 break; 2295 } 2296 2297 #endif /* __xpv */ 2298 2299 /* 2300 * Make sure we have valid pointer so the string operations 2301 * will not crash us. 2302 */ 2303 if (line == NULL) 2304 line = ""; 2305 2306 return (line); 2307 } 2308 2309 static char * 2310 dboot_loader_name(void) 2311 { 2312 #if defined(__xpv) 2313 return (NULL); 2314 #else /* __xpv */ 2315 multiboot_tag_string_t *tag; 2316 2317 switch (multiboot_version) { 2318 case 1: 2319 return ((char *)(uintptr_t)mb_info->boot_loader_name); 2320 2321 case 2: 2322 tag = dboot_multiboot2_find_tag(mb2_info, 2323 MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME); 2324 return (tag->mb_string); 2325 default: 2326 dboot_panic("Unknown multiboot version: %d\n", 2327 multiboot_version); 2328 break; 2329 } 2330 2331 return (NULL); 2332 #endif /* __xpv */ 2333 } 2334 2335 /* 2336 * startup_kernel has a pretty simple job. It builds pagetables which reflect 2337 * 1:1 mappings for all memory in use. It then also adds mappings for 2338 * the kernel nucleus at virtual address of target_kernel_text using large page 2339 * mappings. The page table pages are also accessible at 1:1 mapped 2340 * virtual addresses. 2341 */ 2342 /*ARGSUSED*/ 2343 void 2344 startup_kernel(void) 2345 { 2346 char *cmdline; 2347 char *bootloader; 2348 #if defined(__xpv) 2349 physdev_set_iopl_t set_iopl; 2350 #endif /* __xpv */ 2351 2352 if (dboot_debug == 1) 2353 bcons_init(NULL); /* Set very early console to ttya. */ 2354 dboot_loader_init(); 2355 /* 2356 * At this point we are executing in a 32 bit real mode. 2357 */ 2358 2359 bootloader = dboot_loader_name(); 2360 cmdline = dboot_loader_cmdline(); 2361 2362 #if defined(__xpv) 2363 /* 2364 * For dom0, before we initialize the console subsystem we'll 2365 * need to enable io operations, so set I/O priveldge level to 1. 2366 */ 2367 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 2368 set_iopl.iopl = 1; 2369 (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 2370 } 2371 #endif /* __xpv */ 2372 2373 dboot_init_xboot_consinfo(); 2374 bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline; 2375 bcons_init(bi); /* Now we can set the real console. */ 2376 2377 prom_debug = (find_boot_prop("prom_debug") != NULL); 2378 map_debug = (find_boot_prop("map_debug") != NULL); 2379 2380 #if !defined(__xpv) 2381 dboot_multiboot_get_fwtables(); 2382 #endif 2383 DBG_MSG("\n\nillumos prekernel set: "); 2384 DBG_MSG(cmdline); 2385 DBG_MSG("\n"); 2386 2387 if (bootloader != NULL && prom_debug) { 2388 dboot_printf("Kernel loaded by: %s\n", bootloader); 2389 #if !defined(__xpv) 2390 dboot_printf("Using multiboot %d boot protocol.\n", 2391 multiboot_version); 2392 #endif 2393 } 2394 2395 if (strstr(cmdline, "multiboot") != NULL) { 2396 dboot_panic(NO_MULTIBOOT); 2397 } 2398 2399 DBG((uintptr_t)bi); 2400 #if !defined(__xpv) 2401 DBG((uintptr_t)mb_info); 2402 DBG((uintptr_t)mb2_info); 2403 if (mb2_info != NULL) 2404 DBG(mb2_info->mbi_total_size); 2405 DBG(bi->bi_acpi_rsdp); 2406 DBG(bi->bi_acpi_rsdp_copy); 2407 DBG(bi->bi_smbios); 2408 DBG(bi->bi_uefi_arch); 2409 DBG(bi->bi_uefi_systab); 2410 2411 if (bi->bi_uefi_systab && prom_debug) { 2412 if (bi->bi_uefi_arch == XBI_UEFI_ARCH_64) { 2413 print_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t) 2414 bi->bi_uefi_systab); 2415 } else { 2416 print_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t) 2417 bi->bi_uefi_systab); 2418 } 2419 } 2420 #endif 2421 2422 /* 2423 * Need correct target_kernel_text value 2424 */ 2425 target_kernel_text = KERNEL_TEXT; 2426 DBG(target_kernel_text); 2427 2428 #if defined(__xpv) 2429 2430 /* 2431 * XXPV Derive this stuff from CPUID / what the hypervisor has enabled 2432 */ 2433 2434 #if defined(_BOOT_TARGET_amd64) 2435 /* 2436 * 64-bit hypervisor. 2437 */ 2438 amd64_support = 1; 2439 pae_support = 1; 2440 2441 #else /* _BOOT_TARGET_amd64 */ 2442 2443 /* 2444 * See if we are running on a PAE Hypervisor 2445 */ 2446 { 2447 xen_capabilities_info_t caps; 2448 2449 if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0) 2450 dboot_panic("HYPERVISOR_xen_version(caps) failed"); 2451 caps[sizeof (caps) - 1] = 0; 2452 if (prom_debug) 2453 dboot_printf("xen capabilities %s\n", caps); 2454 if (strstr(caps, "x86_32p") != NULL) 2455 pae_support = 1; 2456 } 2457 2458 #endif /* _BOOT_TARGET_amd64 */ 2459 { 2460 xen_platform_parameters_t p; 2461 2462 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0) 2463 dboot_panic("HYPERVISOR_xen_version(parms) failed"); 2464 DBG(p.virt_start); 2465 mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start); 2466 } 2467 2468 /* 2469 * The hypervisor loads stuff starting at 1Gig 2470 */ 2471 mfn_base = ONE_GIG; 2472 DBG(mfn_base); 2473 2474 /* 2475 * enable writable page table mode for the hypervisor 2476 */ 2477 if (HYPERVISOR_vm_assist(VMASST_CMD_enable, 2478 VMASST_TYPE_writable_pagetables) < 0) 2479 dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed"); 2480 2481 /* 2482 * check for NX support 2483 */ 2484 if (pae_support) { 2485 uint32_t eax = 0x80000000; 2486 uint32_t edx = get_cpuid_edx(&eax); 2487 2488 if (eax >= 0x80000001) { 2489 eax = 0x80000001; 2490 edx = get_cpuid_edx(&eax); 2491 if (edx & CPUID_AMD_EDX_NX) 2492 NX_support = 1; 2493 } 2494 } 2495 2496 /* 2497 * check for PAT support 2498 */ 2499 { 2500 uint32_t eax = 1; 2501 uint32_t edx = get_cpuid_edx(&eax); 2502 2503 if (edx & CPUID_INTC_EDX_PAT) 2504 PAT_support = 1; 2505 } 2506 #if !defined(_BOOT_TARGET_amd64) 2507 2508 /* 2509 * The 32-bit hypervisor uses segmentation to protect itself from 2510 * guests. This means when a guest attempts to install a flat 4GB 2511 * code or data descriptor the 32-bit hypervisor will protect itself 2512 * by silently shrinking the segment such that if the guest attempts 2513 * any access where the hypervisor lives a #gp fault is generated. 2514 * The problem is that some applications expect a full 4GB flat 2515 * segment for their current thread pointer and will use negative 2516 * offset segment wrap around to access data. TLS support in linux 2517 * brand is one example of this. 2518 * 2519 * The 32-bit hypervisor can catch the #gp fault in these cases 2520 * and emulate the access without passing the #gp fault to the guest 2521 * but only if VMASST_TYPE_4gb_segments is explicitly turned on. 2522 * Seems like this should have been the default. 2523 * Either way, we want the hypervisor -- and not Solaris -- to deal 2524 * to deal with emulating these accesses. 2525 */ 2526 if (HYPERVISOR_vm_assist(VMASST_CMD_enable, 2527 VMASST_TYPE_4gb_segments) < 0) 2528 dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed"); 2529 #endif /* !_BOOT_TARGET_amd64 */ 2530 2531 #else /* __xpv */ 2532 2533 /* 2534 * use cpuid to enable MMU features 2535 */ 2536 if (have_cpuid()) { 2537 uint32_t eax, edx; 2538 2539 eax = 1; 2540 edx = get_cpuid_edx(&eax); 2541 if (edx & CPUID_INTC_EDX_PSE) 2542 largepage_support = 1; 2543 if (edx & CPUID_INTC_EDX_PGE) 2544 pge_support = 1; 2545 if (edx & CPUID_INTC_EDX_PAE) 2546 pae_support = 1; 2547 if (edx & CPUID_INTC_EDX_PAT) 2548 PAT_support = 1; 2549 2550 eax = 0x80000000; 2551 edx = get_cpuid_edx(&eax); 2552 if (eax >= 0x80000001) { 2553 eax = 0x80000001; 2554 edx = get_cpuid_edx(&eax); 2555 if (edx & CPUID_AMD_EDX_LM) 2556 amd64_support = 1; 2557 if (edx & CPUID_AMD_EDX_NX) 2558 NX_support = 1; 2559 } 2560 } else { 2561 dboot_printf("cpuid not supported\n"); 2562 } 2563 #endif /* __xpv */ 2564 2565 2566 #if defined(_BOOT_TARGET_amd64) 2567 if (amd64_support == 0) 2568 dboot_panic("long mode not supported, rebooting"); 2569 else if (pae_support == 0) 2570 dboot_panic("long mode, but no PAE; rebooting"); 2571 #else 2572 /* 2573 * Allow the command line to over-ride use of PAE for 32 bit. 2574 */ 2575 if (strstr(cmdline, "disablePAE=true") != NULL) { 2576 pae_support = 0; 2577 NX_support = 0; 2578 amd64_support = 0; 2579 } 2580 #endif 2581 2582 /* 2583 * initialize the simple memory allocator 2584 */ 2585 init_mem_alloc(); 2586 2587 #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64) 2588 /* 2589 * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory 2590 */ 2591 if (max_mem < FOUR_GIG && NX_support == 0) 2592 pae_support = 0; 2593 #endif 2594 2595 /* 2596 * configure mmu information 2597 */ 2598 if (pae_support) { 2599 shift_amt = shift_amt_pae; 2600 ptes_per_table = 512; 2601 pte_size = 8; 2602 lpagesize = TWO_MEG; 2603 #if defined(_BOOT_TARGET_amd64) 2604 top_level = 3; 2605 #else 2606 top_level = 2; 2607 #endif 2608 } else { 2609 pae_support = 0; 2610 NX_support = 0; 2611 shift_amt = shift_amt_nopae; 2612 ptes_per_table = 1024; 2613 pte_size = 4; 2614 lpagesize = FOUR_MEG; 2615 top_level = 1; 2616 } 2617 2618 DBG(PAT_support); 2619 DBG(pge_support); 2620 DBG(NX_support); 2621 DBG(largepage_support); 2622 DBG(amd64_support); 2623 DBG(top_level); 2624 DBG(pte_size); 2625 DBG(ptes_per_table); 2626 DBG(lpagesize); 2627 2628 #if defined(__xpv) 2629 ktext_phys = ONE_GIG; /* from UNIX Mapfile */ 2630 #else 2631 ktext_phys = FOUR_MEG; /* from UNIX Mapfile */ 2632 #endif 2633 2634 #if !defined(__xpv) && defined(_BOOT_TARGET_amd64) 2635 /* 2636 * For grub, copy kernel bits from the ELF64 file to final place. 2637 */ 2638 DBG_MSG("\nAllocating nucleus pages.\n"); 2639 ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG); 2640 2641 if (ktext_phys == 0) 2642 dboot_panic("failed to allocate aligned kernel memory"); 2643 DBG(load_addr); 2644 if (dboot_elfload64(load_addr) != 0) 2645 dboot_panic("failed to parse kernel ELF image, rebooting"); 2646 #endif 2647 2648 DBG(ktext_phys); 2649 2650 /* 2651 * Allocate page tables. 2652 */ 2653 build_page_tables(); 2654 2655 /* 2656 * return to assembly code to switch to running kernel 2657 */ 2658 entry_addr_low = (uint32_t)target_kernel_text; 2659 DBG(entry_addr_low); 2660 bi->bi_use_largepage = largepage_support; 2661 bi->bi_use_pae = pae_support; 2662 bi->bi_use_pge = pge_support; 2663 bi->bi_use_nx = NX_support; 2664 2665 #if defined(__xpv) 2666 2667 bi->bi_next_paddr = next_avail_addr - mfn_base; 2668 DBG(bi->bi_next_paddr); 2669 bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr; 2670 DBG(bi->bi_next_vaddr); 2671 2672 /* 2673 * unmap unused pages in start area to make them available for DMA 2674 */ 2675 while (next_avail_addr < scratch_end) { 2676 (void) HYPERVISOR_update_va_mapping(next_avail_addr, 2677 0, UVMF_INVLPG | UVMF_LOCAL); 2678 next_avail_addr += MMU_PAGESIZE; 2679 } 2680 2681 bi->bi_xen_start_info = (native_ptr_t)(uintptr_t)xen_info; 2682 DBG((uintptr_t)HYPERVISOR_shared_info); 2683 bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info; 2684 bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base; 2685 2686 #else /* __xpv */ 2687 2688 bi->bi_next_paddr = next_avail_addr; 2689 DBG(bi->bi_next_paddr); 2690 bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr; 2691 DBG(bi->bi_next_vaddr); 2692 bi->bi_mb_version = multiboot_version; 2693 2694 switch (multiboot_version) { 2695 case 1: 2696 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb_info; 2697 break; 2698 case 2: 2699 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb2_info; 2700 break; 2701 default: 2702 dboot_panic("Unknown multiboot version: %d\n", 2703 multiboot_version); 2704 break; 2705 } 2706 bi->bi_top_page_table = (uintptr_t)top_page_table; 2707 2708 #endif /* __xpv */ 2709 2710 bi->bi_kseg_size = FOUR_MEG; 2711 DBG(bi->bi_kseg_size); 2712 2713 #ifndef __xpv 2714 if (map_debug) 2715 dump_tables(); 2716 #endif 2717 2718 DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n"); 2719 2720 #ifndef __xpv 2721 /* Update boot info with FB data */ 2722 fb->cursor.origin.x = fb_info.cursor.origin.x; 2723 fb->cursor.origin.y = fb_info.cursor.origin.y; 2724 fb->cursor.pos.x = fb_info.cursor.pos.x; 2725 fb->cursor.pos.y = fb_info.cursor.pos.y; 2726 fb->cursor.visible = fb_info.cursor.visible; 2727 #endif 2728 } 2729