1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright 2020 Joyent, Inc. 27 */ 28 29 30 #include <sys/types.h> 31 #include <sys/machparam.h> 32 #include <sys/x86_archext.h> 33 #include <sys/systm.h> 34 #include <sys/mach_mmu.h> 35 #include <sys/multiboot.h> 36 #include <sys/multiboot2.h> 37 #include <sys/multiboot2_impl.h> 38 #include <sys/sysmacros.h> 39 #include <sys/framebuffer.h> 40 #include <sys/sha1.h> 41 #include <util/string.h> 42 #include <util/strtolctype.h> 43 #include <sys/efi.h> 44 45 /* 46 * Compile time debug knob. We do not have any early mechanism to control it 47 * as the boot is the earliest mechanism we have, and we do not want to have 48 * it being switched on by default. 49 */ 50 int dboot_debug = 0; 51 52 #if defined(__xpv) 53 54 #include <sys/hypervisor.h> 55 uintptr_t xen_virt_start; 56 pfn_t *mfn_to_pfn_mapping; 57 58 #else /* !__xpv */ 59 60 extern multiboot_header_t mb_header; 61 extern uint32_t mb2_load_addr; 62 extern int have_cpuid(void); 63 64 #endif /* !__xpv */ 65 66 #include <sys/inttypes.h> 67 #include <sys/bootinfo.h> 68 #include <sys/mach_mmu.h> 69 #include <sys/boot_console.h> 70 71 #include "dboot_asm.h" 72 #include "dboot_printf.h" 73 #include "dboot_xboot.h" 74 #include "dboot_elfload.h" 75 76 #define SHA1_ASCII_LENGTH (SHA1_DIGEST_LENGTH * 2) 77 78 /* 79 * This file contains code that runs to transition us from either a multiboot 80 * compliant loader (32 bit non-paging) or a XPV domain loader to 81 * regular kernel execution. Its task is to setup the kernel memory image 82 * and page tables. 83 * 84 * The code executes as: 85 * - 32 bits under GRUB (for 32 or 64 bit Solaris) 86 * - a 32 bit program for the 32-bit PV hypervisor 87 * - a 64 bit program for the 64-bit PV hypervisor (at least for now) 88 * 89 * Under the PV hypervisor, we must create mappings for any memory beyond the 90 * initial start of day allocation (such as the kernel itself). 91 * 92 * When on the metal, the mapping between maddr_t and paddr_t is 1:1. 93 * Since we are running in real mode, so all such memory is accessible. 94 */ 95 96 /* 97 * Standard bits used in PTE (page level) and PTP (internal levels) 98 */ 99 x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER; 100 x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST; 101 102 /* 103 * This is the target addresses (physical) where the kernel text and data 104 * nucleus pages will be unpacked. On the hypervisor this is actually a 105 * virtual address. 106 */ 107 paddr_t ktext_phys; 108 uint32_t ksize = 2 * FOUR_MEG; /* kernel nucleus is 8Meg */ 109 110 static uint64_t target_kernel_text; /* value to use for KERNEL_TEXT */ 111 112 /* 113 * The stack is setup in assembler before entering startup_kernel() 114 */ 115 char stack_space[STACK_SIZE]; 116 117 /* 118 * Used to track physical memory allocation 119 */ 120 static paddr_t next_avail_addr = 0; 121 122 #if defined(__xpv) 123 /* 124 * Additional information needed for hypervisor memory allocation. 125 * Only memory up to scratch_end is mapped by page tables. 126 * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so 127 * to derive a pfn from a pointer, you subtract mfn_base. 128 */ 129 130 static paddr_t scratch_end = 0; /* we can't write all of mem here */ 131 static paddr_t mfn_base; /* addr corresponding to mfn_list[0] */ 132 start_info_t *xen_info; 133 134 #else /* __xpv */ 135 136 /* 137 * If on the metal, then we have a multiboot loader. 138 */ 139 uint32_t mb_magic; /* magic from boot loader */ 140 uint32_t mb_addr; /* multiboot info package from loader */ 141 int multiboot_version; 142 multiboot_info_t *mb_info; 143 multiboot2_info_header_t *mb2_info; 144 multiboot_tag_mmap_t *mb2_mmap_tagp; 145 int num_entries; /* mmap entry count */ 146 boolean_t num_entries_set; /* is mmap entry count set */ 147 uintptr_t load_addr; 148 static boot_framebuffer_t framebuffer __aligned(16); 149 static boot_framebuffer_t *fb; 150 151 /* can not be automatic variables because of alignment */ 152 static efi_guid_t smbios3 = SMBIOS3_TABLE_GUID; 153 static efi_guid_t smbios = SMBIOS_TABLE_GUID; 154 static efi_guid_t acpi2 = EFI_ACPI_TABLE_GUID; 155 static efi_guid_t acpi1 = ACPI_10_TABLE_GUID; 156 #endif /* __xpv */ 157 158 /* 159 * This contains information passed to the kernel 160 */ 161 struct xboot_info boot_info __aligned(16); 162 struct xboot_info *bi; 163 164 /* 165 * Page table and memory stuff. 166 */ 167 static paddr_t max_mem; /* maximum memory address */ 168 169 /* 170 * Information about processor MMU 171 */ 172 int amd64_support = 0; 173 int largepage_support = 0; 174 int pae_support = 0; 175 int pge_support = 0; 176 int NX_support = 0; 177 int PAT_support = 0; 178 179 /* 180 * Low 32 bits of kernel entry address passed back to assembler. 181 * When running a 64 bit kernel, the high 32 bits are 0xffffffff. 182 */ 183 uint32_t entry_addr_low; 184 185 /* 186 * Memlists for the kernel. We shouldn't need a lot of these. 187 */ 188 #define MAX_MEMLIST (50) 189 struct boot_memlist memlists[MAX_MEMLIST]; 190 uint_t memlists_used = 0; 191 struct boot_memlist pcimemlists[MAX_MEMLIST]; 192 uint_t pcimemlists_used = 0; 193 struct boot_memlist rsvdmemlists[MAX_MEMLIST]; 194 uint_t rsvdmemlists_used = 0; 195 196 /* 197 * This should match what's in the bootloader. It's arbitrary, but GRUB 198 * in particular has limitations on how much space it can use before it 199 * stops working properly. This should be enough. 200 */ 201 struct boot_modules modules[MAX_BOOT_MODULES]; 202 uint_t modules_used = 0; 203 204 #ifdef __xpv 205 /* 206 * Xen strips the size field out of the mb_memory_map_t, see struct e820entry 207 * definition in Xen source. 208 */ 209 typedef struct { 210 uint32_t base_addr_low; 211 uint32_t base_addr_high; 212 uint32_t length_low; 213 uint32_t length_high; 214 uint32_t type; 215 } mmap_t; 216 217 /* 218 * There is 512KB of scratch area after the boot stack page. 219 * We'll use that for everything except the kernel nucleus pages which are too 220 * big to fit there and are allocated last anyway. 221 */ 222 #define MAXMAPS 100 223 static mmap_t map_buffer[MAXMAPS]; 224 #else 225 typedef mb_memory_map_t mmap_t; 226 #endif 227 228 /* 229 * Debugging macros 230 */ 231 uint_t prom_debug = 0; 232 uint_t map_debug = 0; 233 234 static char noname[2] = "-"; 235 236 /* 237 * Either hypervisor-specific or grub-specific code builds the initial 238 * memlists. This code does the sort/merge/link for final use. 239 */ 240 static void 241 sort_physinstall(void) 242 { 243 int i; 244 #if !defined(__xpv) 245 int j; 246 struct boot_memlist tmp; 247 248 /* 249 * Now sort the memlists, in case they weren't in order. 250 * Yeah, this is a bubble sort; small, simple and easy to get right. 251 */ 252 DBG_MSG("Sorting phys-installed list\n"); 253 for (j = memlists_used - 1; j > 0; --j) { 254 for (i = 0; i < j; ++i) { 255 if (memlists[i].addr < memlists[i + 1].addr) 256 continue; 257 tmp = memlists[i]; 258 memlists[i] = memlists[i + 1]; 259 memlists[i + 1] = tmp; 260 } 261 } 262 263 /* 264 * Merge any memlists that don't have holes between them. 265 */ 266 for (i = 0; i <= memlists_used - 1; ++i) { 267 if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr) 268 continue; 269 270 if (prom_debug) 271 dboot_printf( 272 "merging mem segs %" PRIx64 "...%" PRIx64 273 " w/ %" PRIx64 "...%" PRIx64 "\n", 274 memlists[i].addr, 275 memlists[i].addr + memlists[i].size, 276 memlists[i + 1].addr, 277 memlists[i + 1].addr + memlists[i + 1].size); 278 279 memlists[i].size += memlists[i + 1].size; 280 for (j = i + 1; j < memlists_used - 1; ++j) 281 memlists[j] = memlists[j + 1]; 282 --memlists_used; 283 DBG(memlists_used); 284 --i; /* after merging we need to reexamine, so do this */ 285 } 286 #endif /* __xpv */ 287 288 if (prom_debug) { 289 dboot_printf("\nFinal memlists:\n"); 290 for (i = 0; i < memlists_used; ++i) { 291 dboot_printf("\t%d: addr=%" PRIx64 " size=%" 292 PRIx64 "\n", i, memlists[i].addr, memlists[i].size); 293 } 294 } 295 296 /* 297 * link together the memlists with native size pointers 298 */ 299 memlists[0].next = 0; 300 memlists[0].prev = 0; 301 for (i = 1; i < memlists_used; ++i) { 302 memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1); 303 memlists[i].next = 0; 304 memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i); 305 } 306 bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists; 307 DBG(bi->bi_phys_install); 308 } 309 310 /* 311 * build bios reserved memlists 312 */ 313 static void 314 build_rsvdmemlists(void) 315 { 316 int i; 317 318 rsvdmemlists[0].next = 0; 319 rsvdmemlists[0].prev = 0; 320 for (i = 1; i < rsvdmemlists_used; ++i) { 321 rsvdmemlists[i].prev = 322 (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1); 323 rsvdmemlists[i].next = 0; 324 rsvdmemlists[i - 1].next = 325 (native_ptr_t)(uintptr_t)(rsvdmemlists + i); 326 } 327 bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists; 328 DBG(bi->bi_rsvdmem); 329 } 330 331 #if defined(__xpv) 332 333 /* 334 * halt on the hypervisor after a delay to drain console output 335 */ 336 void 337 dboot_halt(void) 338 { 339 uint_t i = 10000; 340 341 while (--i) 342 (void) HYPERVISOR_yield(); 343 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 344 } 345 346 /* 347 * From a machine address, find the corresponding pseudo-physical address. 348 * Pseudo-physical address are contiguous and run from mfn_base in each VM. 349 * Machine addresses are the real underlying hardware addresses. 350 * These are needed for page table entries. Note that this routine is 351 * poorly protected. A bad value of "ma" will cause a page fault. 352 */ 353 paddr_t 354 ma_to_pa(maddr_t ma) 355 { 356 ulong_t pgoff = ma & MMU_PAGEOFFSET; 357 ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)]; 358 paddr_t pa; 359 360 if (pfn >= xen_info->nr_pages) 361 return (-(paddr_t)1); 362 pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff; 363 #ifdef DEBUG 364 if (ma != pa_to_ma(pa)) 365 dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", " 366 "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa)); 367 #endif 368 return (pa); 369 } 370 371 /* 372 * From a pseudo-physical address, find the corresponding machine address. 373 */ 374 maddr_t 375 pa_to_ma(paddr_t pa) 376 { 377 pfn_t pfn; 378 ulong_t mfn; 379 380 pfn = mmu_btop(pa - mfn_base); 381 if (pa < mfn_base || pfn >= xen_info->nr_pages) 382 dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa); 383 mfn = ((ulong_t *)xen_info->mfn_list)[pfn]; 384 #ifdef DEBUG 385 if (mfn_to_pfn_mapping[mfn] != pfn) 386 dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n", 387 pfn, mfn, mfn_to_pfn_mapping[mfn]); 388 #endif 389 return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET)); 390 } 391 392 #endif /* __xpv */ 393 394 x86pte_t 395 get_pteval(paddr_t table, uint_t index) 396 { 397 if (pae_support) 398 return (((x86pte_t *)(uintptr_t)table)[index]); 399 return (((x86pte32_t *)(uintptr_t)table)[index]); 400 } 401 402 /*ARGSUSED*/ 403 void 404 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 405 { 406 #ifdef __xpv 407 mmu_update_t t; 408 maddr_t mtable = pa_to_ma(table); 409 int retcnt; 410 411 t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE; 412 t.val = pteval; 413 if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1) 414 dboot_panic("HYPERVISOR_mmu_update() failed"); 415 #else /* __xpv */ 416 uintptr_t tab_addr = (uintptr_t)table; 417 418 if (pae_support) 419 ((x86pte_t *)tab_addr)[index] = pteval; 420 else 421 ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval; 422 if (level == top_level && level == 2) 423 reload_cr3(); 424 #endif /* __xpv */ 425 } 426 427 paddr_t 428 make_ptable(x86pte_t *pteval, uint_t level) 429 { 430 paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 431 432 if (level == top_level && level == 2) 433 *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID; 434 else 435 *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits; 436 437 #ifdef __xpv 438 /* Remove write permission to the new page table. */ 439 if (HYPERVISOR_update_va_mapping(new_table, 440 *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL)) 441 dboot_panic("HYP_update_va_mapping error"); 442 #endif 443 444 if (map_debug) 445 dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%" 446 PRIx64 "\n", level, (ulong_t)new_table, *pteval); 447 return (new_table); 448 } 449 450 x86pte_t * 451 map_pte(paddr_t table, uint_t index) 452 { 453 return ((x86pte_t *)(uintptr_t)(table + index * pte_size)); 454 } 455 456 /* 457 * dump out the contents of page tables... 458 */ 459 static void 460 dump_tables(void) 461 { 462 uint_t save_index[4]; /* for recursion */ 463 char *save_table[4]; /* for recursion */ 464 uint_t l; 465 uint64_t va; 466 uint64_t pgsize; 467 int index; 468 int i; 469 x86pte_t pteval; 470 char *table; 471 static char *tablist = "\t\t\t"; 472 char *tabs = tablist + 3 - top_level; 473 uint_t pa, pa1; 474 #if !defined(__xpv) 475 #define maddr_t paddr_t 476 #endif /* !__xpv */ 477 478 dboot_printf("Finished pagetables:\n"); 479 table = (char *)(uintptr_t)top_page_table; 480 l = top_level; 481 va = 0; 482 for (index = 0; index < ptes_per_table; ++index) { 483 pgsize = 1ull << shift_amt[l]; 484 if (pae_support) 485 pteval = ((x86pte_t *)table)[index]; 486 else 487 pteval = ((x86pte32_t *)table)[index]; 488 if (pteval == 0) 489 goto next_entry; 490 491 dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64, 492 tabs + l, (void *)table, index, (uint64_t)pteval, va); 493 pa = ma_to_pa(pteval & MMU_PAGEMASK); 494 dboot_printf(" physaddr=%x\n", pa); 495 496 /* 497 * Don't try to walk hypervisor private pagetables 498 */ 499 if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) { 500 save_table[l] = table; 501 save_index[l] = index; 502 --l; 503 index = -1; 504 table = (char *)(uintptr_t) 505 ma_to_pa(pteval & MMU_PAGEMASK); 506 goto recursion; 507 } 508 509 /* 510 * shorten dump for consecutive mappings 511 */ 512 for (i = 1; index + i < ptes_per_table; ++i) { 513 if (pae_support) 514 pteval = ((x86pte_t *)table)[index + i]; 515 else 516 pteval = ((x86pte32_t *)table)[index + i]; 517 if (pteval == 0) 518 break; 519 pa1 = ma_to_pa(pteval & MMU_PAGEMASK); 520 if (pa1 != pa + i * pgsize) 521 break; 522 } 523 if (i > 2) { 524 dboot_printf("%s...\n", tabs + l); 525 va += pgsize * (i - 2); 526 index += i - 2; 527 } 528 next_entry: 529 va += pgsize; 530 if (l == 3 && index == 256) /* VA hole */ 531 va = 0xffff800000000000ull; 532 recursion: 533 ; 534 } 535 if (l < top_level) { 536 ++l; 537 index = save_index[l]; 538 table = save_table[l]; 539 goto recursion; 540 } 541 } 542 543 /* 544 * Add a mapping for the machine page at the given virtual address. 545 */ 546 static void 547 map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level) 548 { 549 x86pte_t *ptep; 550 x86pte_t pteval; 551 552 pteval = ma | pte_bits; 553 if (level > 0) 554 pteval |= PT_PAGESIZE; 555 if (va >= target_kernel_text && pge_support) 556 pteval |= PT_GLOBAL; 557 558 if (map_debug && ma != va) 559 dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64 560 " pte=0x%" PRIx64 " l=%d\n", 561 (uint64_t)ma, (uint64_t)va, pteval, level); 562 563 #if defined(__xpv) 564 /* 565 * see if we can avoid find_pte() on the hypervisor 566 */ 567 if (HYPERVISOR_update_va_mapping(va, pteval, 568 UVMF_INVLPG | UVMF_LOCAL) == 0) 569 return; 570 #endif 571 572 /* 573 * Find the pte that will map this address. This creates any 574 * missing intermediate level page tables 575 */ 576 ptep = find_pte(va, NULL, level, 0); 577 578 /* 579 * When paravirtualized, we must use hypervisor calls to modify the 580 * PTE, since paging is active. On real hardware we just write to 581 * the pagetables which aren't in use yet. 582 */ 583 #if defined(__xpv) 584 ptep = ptep; /* shut lint up */ 585 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL)) 586 dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64 587 " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "", 588 (uint64_t)va, level, (uint64_t)ma, pteval); 589 #else 590 if (va < 1024 * 1024) 591 pteval |= PT_NOCACHE; /* for video RAM */ 592 if (pae_support) 593 *ptep = pteval; 594 else 595 *((x86pte32_t *)ptep) = (x86pte32_t)pteval; 596 #endif 597 } 598 599 /* 600 * Add a mapping for the physical page at the given virtual address. 601 */ 602 static void 603 map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level) 604 { 605 map_ma_at_va(pa_to_ma(pa), va, level); 606 } 607 608 /* 609 * This is called to remove start..end from the 610 * possible range of PCI addresses. 611 */ 612 const uint64_t pci_lo_limit = 0x00100000ul; 613 const uint64_t pci_hi_limit = 0xfff00000ul; 614 static void 615 exclude_from_pci(uint64_t start, uint64_t end) 616 { 617 int i; 618 int j; 619 struct boot_memlist *ml; 620 621 for (i = 0; i < pcimemlists_used; ++i) { 622 ml = &pcimemlists[i]; 623 624 /* delete the entire range? */ 625 if (start <= ml->addr && ml->addr + ml->size <= end) { 626 --pcimemlists_used; 627 for (j = i; j < pcimemlists_used; ++j) 628 pcimemlists[j] = pcimemlists[j + 1]; 629 --i; /* to revisit the new one at this index */ 630 } 631 632 /* split a range? */ 633 else if (ml->addr < start && end < ml->addr + ml->size) { 634 635 ++pcimemlists_used; 636 if (pcimemlists_used > MAX_MEMLIST) 637 dboot_panic("too many pcimemlists"); 638 639 for (j = pcimemlists_used - 1; j > i; --j) 640 pcimemlists[j] = pcimemlists[j - 1]; 641 ml->size = start - ml->addr; 642 643 ++ml; 644 ml->size = (ml->addr + ml->size) - end; 645 ml->addr = end; 646 ++i; /* skip on to next one */ 647 } 648 649 /* cut memory off the start? */ 650 else if (ml->addr < end && end < ml->addr + ml->size) { 651 ml->size -= end - ml->addr; 652 ml->addr = end; 653 } 654 655 /* cut memory off the end? */ 656 else if (ml->addr <= start && start < ml->addr + ml->size) { 657 ml->size = start - ml->addr; 658 } 659 } 660 } 661 662 /* 663 * During memory allocation, find the highest address not used yet. 664 */ 665 static void 666 check_higher(paddr_t a) 667 { 668 if (a < next_avail_addr) 669 return; 670 next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE); 671 DBG(next_avail_addr); 672 } 673 674 static int 675 dboot_loader_mmap_entries(void) 676 { 677 #if !defined(__xpv) 678 if (num_entries_set == B_TRUE) 679 return (num_entries); 680 681 switch (multiboot_version) { 682 case 1: 683 DBG(mb_info->flags); 684 if (mb_info->flags & 0x40) { 685 mb_memory_map_t *mmap; 686 caddr32_t mmap_addr; 687 688 DBG(mb_info->mmap_addr); 689 DBG(mb_info->mmap_length); 690 check_higher(mb_info->mmap_addr + mb_info->mmap_length); 691 692 for (mmap_addr = mb_info->mmap_addr; 693 mmap_addr < mb_info->mmap_addr + 694 mb_info->mmap_length; 695 mmap_addr += mmap->size + sizeof (mmap->size)) { 696 mmap = (mb_memory_map_t *)(uintptr_t)mmap_addr; 697 ++num_entries; 698 } 699 700 num_entries_set = B_TRUE; 701 } 702 break; 703 case 2: 704 num_entries_set = B_TRUE; 705 num_entries = dboot_multiboot2_mmap_nentries(mb2_info, 706 mb2_mmap_tagp); 707 break; 708 default: 709 dboot_panic("Unknown multiboot version: %d\n", 710 multiboot_version); 711 break; 712 } 713 return (num_entries); 714 #else 715 return (MAXMAPS); 716 #endif 717 } 718 719 static uint32_t 720 dboot_loader_mmap_get_type(int index) 721 { 722 #if !defined(__xpv) 723 mb_memory_map_t *mp, *mpend; 724 caddr32_t mmap_addr; 725 int i; 726 727 switch (multiboot_version) { 728 case 1: 729 mp = (mb_memory_map_t *)(uintptr_t)mb_info->mmap_addr; 730 mpend = (mb_memory_map_t *)(uintptr_t) 731 (mb_info->mmap_addr + mb_info->mmap_length); 732 733 for (i = 0; mp < mpend && i != index; i++) 734 mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size + 735 sizeof (mp->size)); 736 if (mp >= mpend) { 737 dboot_panic("dboot_loader_mmap_get_type(): index " 738 "out of bounds: %d\n", index); 739 } 740 return (mp->type); 741 742 case 2: 743 return (dboot_multiboot2_mmap_get_type(mb2_info, 744 mb2_mmap_tagp, index)); 745 746 default: 747 dboot_panic("Unknown multiboot version: %d\n", 748 multiboot_version); 749 break; 750 } 751 return (0); 752 #else 753 return (map_buffer[index].type); 754 #endif 755 } 756 757 static uint64_t 758 dboot_loader_mmap_get_base(int index) 759 { 760 #if !defined(__xpv) 761 mb_memory_map_t *mp, *mpend; 762 int i; 763 764 switch (multiboot_version) { 765 case 1: 766 mp = (mb_memory_map_t *)mb_info->mmap_addr; 767 mpend = (mb_memory_map_t *) 768 (mb_info->mmap_addr + mb_info->mmap_length); 769 770 for (i = 0; mp < mpend && i != index; i++) 771 mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size + 772 sizeof (mp->size)); 773 if (mp >= mpend) { 774 dboot_panic("dboot_loader_mmap_get_base(): index " 775 "out of bounds: %d\n", index); 776 } 777 return (((uint64_t)mp->base_addr_high << 32) + 778 (uint64_t)mp->base_addr_low); 779 780 case 2: 781 return (dboot_multiboot2_mmap_get_base(mb2_info, 782 mb2_mmap_tagp, index)); 783 784 default: 785 dboot_panic("Unknown multiboot version: %d\n", 786 multiboot_version); 787 break; 788 } 789 return (0); 790 #else 791 return (((uint64_t)map_buffer[index].base_addr_high << 32) + 792 (uint64_t)map_buffer[index].base_addr_low); 793 #endif 794 } 795 796 static uint64_t 797 dboot_loader_mmap_get_length(int index) 798 { 799 #if !defined(__xpv) 800 mb_memory_map_t *mp, *mpend; 801 int i; 802 803 switch (multiboot_version) { 804 case 1: 805 mp = (mb_memory_map_t *)mb_info->mmap_addr; 806 mpend = (mb_memory_map_t *) 807 (mb_info->mmap_addr + mb_info->mmap_length); 808 809 for (i = 0; mp < mpend && i != index; i++) 810 mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size + 811 sizeof (mp->size)); 812 if (mp >= mpend) { 813 dboot_panic("dboot_loader_mmap_get_length(): index " 814 "out of bounds: %d\n", index); 815 } 816 return (((uint64_t)mp->length_high << 32) + 817 (uint64_t)mp->length_low); 818 819 case 2: 820 return (dboot_multiboot2_mmap_get_length(mb2_info, 821 mb2_mmap_tagp, index)); 822 823 default: 824 dboot_panic("Unknown multiboot version: %d\n", 825 multiboot_version); 826 break; 827 } 828 return (0); 829 #else 830 return (((uint64_t)map_buffer[index].length_high << 32) + 831 (uint64_t)map_buffer[index].length_low); 832 #endif 833 } 834 835 static void 836 build_pcimemlists(void) 837 { 838 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 839 uint64_t start; 840 uint64_t end; 841 int i, num; 842 843 /* 844 * initialize 845 */ 846 pcimemlists[0].addr = pci_lo_limit; 847 pcimemlists[0].size = pci_hi_limit - pci_lo_limit; 848 pcimemlists_used = 1; 849 850 num = dboot_loader_mmap_entries(); 851 /* 852 * Fill in PCI memlists. 853 */ 854 for (i = 0; i < num; ++i) { 855 start = dboot_loader_mmap_get_base(i); 856 end = start + dboot_loader_mmap_get_length(i); 857 858 if (prom_debug) 859 dboot_printf("\ttype: %d %" PRIx64 "..%" 860 PRIx64 "\n", dboot_loader_mmap_get_type(i), 861 start, end); 862 863 /* 864 * page align start and end 865 */ 866 start = (start + page_offset) & ~page_offset; 867 end &= ~page_offset; 868 if (end <= start) 869 continue; 870 871 exclude_from_pci(start, end); 872 } 873 874 /* 875 * Finish off the pcimemlist 876 */ 877 if (prom_debug) { 878 for (i = 0; i < pcimemlists_used; ++i) { 879 dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%" 880 PRIx64 "\n", pcimemlists[i].addr, 881 pcimemlists[i].addr + pcimemlists[i].size); 882 } 883 } 884 pcimemlists[0].next = 0; 885 pcimemlists[0].prev = 0; 886 for (i = 1; i < pcimemlists_used; ++i) { 887 pcimemlists[i].prev = 888 (native_ptr_t)(uintptr_t)(pcimemlists + i - 1); 889 pcimemlists[i].next = 0; 890 pcimemlists[i - 1].next = 891 (native_ptr_t)(uintptr_t)(pcimemlists + i); 892 } 893 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists; 894 DBG(bi->bi_pcimem); 895 } 896 897 #if defined(__xpv) 898 /* 899 * Initialize memory allocator stuff from hypervisor-supplied start info. 900 */ 901 static void 902 init_mem_alloc(void) 903 { 904 int local; /* variables needed to find start region */ 905 paddr_t scratch_start; 906 xen_memory_map_t map; 907 908 DBG_MSG("Entered init_mem_alloc()\n"); 909 910 /* 911 * Free memory follows the stack. There's at least 512KB of scratch 912 * space, rounded up to at least 2Mb alignment. That should be enough 913 * for the page tables we'll need to build. The nucleus memory is 914 * allocated last and will be outside the addressible range. We'll 915 * switch to new page tables before we unpack the kernel 916 */ 917 scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE); 918 DBG(scratch_start); 919 scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG); 920 DBG(scratch_end); 921 922 /* 923 * For paranoia, leave some space between hypervisor data and ours. 924 * Use 500 instead of 512. 925 */ 926 next_avail_addr = scratch_end - 500 * 1024; 927 DBG(next_avail_addr); 928 929 /* 930 * The domain builder gives us at most 1 module 931 */ 932 DBG(xen_info->mod_len); 933 if (xen_info->mod_len > 0) { 934 DBG(xen_info->mod_start); 935 modules[0].bm_addr = 936 (native_ptr_t)(uintptr_t)xen_info->mod_start; 937 modules[0].bm_size = xen_info->mod_len; 938 bi->bi_module_cnt = 1; 939 bi->bi_modules = (native_ptr_t)(uintptr_t)modules; 940 } else { 941 bi->bi_module_cnt = 0; 942 bi->bi_modules = (native_ptr_t)(uintptr_t)NULL; 943 } 944 DBG(bi->bi_module_cnt); 945 DBG(bi->bi_modules); 946 947 DBG(xen_info->mfn_list); 948 DBG(xen_info->nr_pages); 949 max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT; 950 DBG(max_mem); 951 952 /* 953 * Using pseudo-physical addresses, so only 1 memlist element 954 */ 955 memlists[0].addr = 0; 956 DBG(memlists[0].addr); 957 memlists[0].size = max_mem; 958 DBG(memlists[0].size); 959 memlists_used = 1; 960 DBG(memlists_used); 961 962 /* 963 * finish building physinstall list 964 */ 965 sort_physinstall(); 966 967 /* 968 * build bios reserved memlists 969 */ 970 build_rsvdmemlists(); 971 972 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 973 /* 974 * build PCI Memory list 975 */ 976 map.nr_entries = MAXMAPS; 977 /*LINTED: constant in conditional context*/ 978 set_xen_guest_handle(map.buffer, map_buffer); 979 if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0) 980 dboot_panic("getting XENMEM_machine_memory_map failed"); 981 build_pcimemlists(); 982 } 983 } 984 985 #else /* !__xpv */ 986 987 static void 988 dboot_multiboot1_xboot_consinfo(void) 989 { 990 fb->framebuffer = 0; 991 } 992 993 static void 994 dboot_multiboot2_xboot_consinfo(void) 995 { 996 multiboot_tag_framebuffer_t *fbtag; 997 fbtag = dboot_multiboot2_find_tag(mb2_info, 998 MULTIBOOT_TAG_TYPE_FRAMEBUFFER); 999 fb->framebuffer = (uint64_t)(uintptr_t)fbtag; 1000 } 1001 1002 static int 1003 dboot_multiboot_modcount(void) 1004 { 1005 switch (multiboot_version) { 1006 case 1: 1007 return (mb_info->mods_count); 1008 1009 case 2: 1010 return (dboot_multiboot2_modcount(mb2_info)); 1011 1012 default: 1013 dboot_panic("Unknown multiboot version: %d\n", 1014 multiboot_version); 1015 break; 1016 } 1017 return (0); 1018 } 1019 1020 static uint32_t 1021 dboot_multiboot_modstart(int index) 1022 { 1023 switch (multiboot_version) { 1024 case 1: 1025 return (((mb_module_t *)mb_info->mods_addr)[index].mod_start); 1026 1027 case 2: 1028 return (dboot_multiboot2_modstart(mb2_info, index)); 1029 1030 default: 1031 dboot_panic("Unknown multiboot version: %d\n", 1032 multiboot_version); 1033 break; 1034 } 1035 return (0); 1036 } 1037 1038 static uint32_t 1039 dboot_multiboot_modend(int index) 1040 { 1041 switch (multiboot_version) { 1042 case 1: 1043 return (((mb_module_t *)mb_info->mods_addr)[index].mod_end); 1044 1045 case 2: 1046 return (dboot_multiboot2_modend(mb2_info, index)); 1047 1048 default: 1049 dboot_panic("Unknown multiboot version: %d\n", 1050 multiboot_version); 1051 break; 1052 } 1053 return (0); 1054 } 1055 1056 static char * 1057 dboot_multiboot_modcmdline(int index) 1058 { 1059 switch (multiboot_version) { 1060 case 1: 1061 return ((char *)((mb_module_t *) 1062 mb_info->mods_addr)[index].mod_name); 1063 1064 case 2: 1065 return (dboot_multiboot2_modcmdline(mb2_info, index)); 1066 1067 default: 1068 dboot_panic("Unknown multiboot version: %d\n", 1069 multiboot_version); 1070 break; 1071 } 1072 return (0); 1073 } 1074 1075 /* 1076 * Find the modules used by console setup. 1077 * Since we need the console to print early boot messages, the console is set up 1078 * before anything else and therefore we need to pick up the needed modules. 1079 * 1080 * Note, we just will search for and if found, will pass the modules 1081 * to console setup, the proper module list processing will happen later. 1082 * Currently used modules are boot environment and console font. 1083 */ 1084 static void 1085 dboot_find_console_modules(void) 1086 { 1087 int i, modcount; 1088 uint32_t mod_start, mod_end; 1089 char *cmdline; 1090 1091 modcount = dboot_multiboot_modcount(); 1092 bi->bi_module_cnt = 0; 1093 for (i = 0; i < modcount; ++i) { 1094 cmdline = dboot_multiboot_modcmdline(i); 1095 if (cmdline == NULL) 1096 continue; 1097 1098 if (strstr(cmdline, "type=console-font") != NULL) 1099 modules[bi->bi_module_cnt].bm_type = BMT_FONT; 1100 else if (strstr(cmdline, "type=environment") != NULL) 1101 modules[bi->bi_module_cnt].bm_type = BMT_ENV; 1102 else 1103 continue; 1104 1105 mod_start = dboot_multiboot_modstart(i); 1106 mod_end = dboot_multiboot_modend(i); 1107 modules[bi->bi_module_cnt].bm_addr = 1108 (native_ptr_t)(uintptr_t)mod_start; 1109 modules[bi->bi_module_cnt].bm_size = mod_end - mod_start; 1110 modules[bi->bi_module_cnt].bm_name = 1111 (native_ptr_t)(uintptr_t)NULL; 1112 modules[bi->bi_module_cnt].bm_hash = 1113 (native_ptr_t)(uintptr_t)NULL; 1114 bi->bi_module_cnt++; 1115 } 1116 if (bi->bi_module_cnt != 0) 1117 bi->bi_modules = (native_ptr_t)(uintptr_t)modules; 1118 } 1119 1120 static boolean_t 1121 dboot_multiboot_basicmeminfo(uint32_t *lower, uint32_t *upper) 1122 { 1123 boolean_t rv = B_FALSE; 1124 1125 switch (multiboot_version) { 1126 case 1: 1127 if (mb_info->flags & 0x01) { 1128 *lower = mb_info->mem_lower; 1129 *upper = mb_info->mem_upper; 1130 rv = B_TRUE; 1131 } 1132 break; 1133 1134 case 2: 1135 return (dboot_multiboot2_basicmeminfo(mb2_info, lower, upper)); 1136 1137 default: 1138 dboot_panic("Unknown multiboot version: %d\n", 1139 multiboot_version); 1140 break; 1141 } 1142 return (rv); 1143 } 1144 1145 static uint8_t 1146 dboot_a2h(char v) 1147 { 1148 if (v >= 'a') 1149 return (v - 'a' + 0xa); 1150 else if (v >= 'A') 1151 return (v - 'A' + 0xa); 1152 else if (v >= '0') 1153 return (v - '0'); 1154 else 1155 dboot_panic("bad ASCII hex character %c\n", v); 1156 1157 return (0); 1158 } 1159 1160 static void 1161 digest_a2h(const char *ascii, uint8_t *digest) 1162 { 1163 unsigned int i; 1164 1165 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) { 1166 digest[i] = dboot_a2h(ascii[i * 2]) << 4; 1167 digest[i] |= dboot_a2h(ascii[i * 2 + 1]); 1168 } 1169 } 1170 1171 /* 1172 * Generate a SHA-1 hash of the first len bytes of image, and compare it with 1173 * the ASCII-format hash found in the 40-byte buffer at ascii. If they 1174 * match, return 0, otherwise -1. This works only for images smaller than 1175 * 4 GB, which should not be a problem. 1176 */ 1177 static int 1178 check_image_hash(uint_t midx) 1179 { 1180 const char *ascii; 1181 const void *image; 1182 size_t len; 1183 SHA1_CTX ctx; 1184 uint8_t digest[SHA1_DIGEST_LENGTH]; 1185 uint8_t baseline[SHA1_DIGEST_LENGTH]; 1186 unsigned int i; 1187 1188 ascii = (const char *)(uintptr_t)modules[midx].bm_hash; 1189 image = (const void *)(uintptr_t)modules[midx].bm_addr; 1190 len = (size_t)modules[midx].bm_size; 1191 1192 digest_a2h(ascii, baseline); 1193 1194 SHA1Init(&ctx); 1195 SHA1Update(&ctx, image, len); 1196 SHA1Final(digest, &ctx); 1197 1198 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) { 1199 if (digest[i] != baseline[i]) 1200 return (-1); 1201 } 1202 1203 return (0); 1204 } 1205 1206 static const char * 1207 type_to_str(boot_module_type_t type) 1208 { 1209 switch (type) { 1210 case BMT_ROOTFS: 1211 return ("rootfs"); 1212 case BMT_FILE: 1213 return ("file"); 1214 case BMT_HASH: 1215 return ("hash"); 1216 case BMT_ENV: 1217 return ("environment"); 1218 case BMT_FONT: 1219 return ("console-font"); 1220 default: 1221 return ("unknown"); 1222 } 1223 } 1224 1225 static void 1226 check_images(void) 1227 { 1228 uint_t i; 1229 char displayhash[SHA1_ASCII_LENGTH + 1]; 1230 1231 for (i = 0; i < modules_used; i++) { 1232 if (prom_debug) { 1233 dboot_printf("module #%d: name %s type %s " 1234 "addr %lx size %lx\n", 1235 i, (char *)(uintptr_t)modules[i].bm_name, 1236 type_to_str(modules[i].bm_type), 1237 (ulong_t)modules[i].bm_addr, 1238 (ulong_t)modules[i].bm_size); 1239 } 1240 1241 if (modules[i].bm_type == BMT_HASH || 1242 modules[i].bm_hash == (native_ptr_t)(uintptr_t)NULL) { 1243 DBG_MSG("module has no hash; skipping check\n"); 1244 continue; 1245 } 1246 (void) memcpy(displayhash, 1247 (void *)(uintptr_t)modules[i].bm_hash, 1248 SHA1_ASCII_LENGTH); 1249 displayhash[SHA1_ASCII_LENGTH] = '\0'; 1250 if (prom_debug) { 1251 dboot_printf("checking expected hash [%s]: ", 1252 displayhash); 1253 } 1254 1255 if (check_image_hash(i) != 0) 1256 dboot_panic("hash mismatch!\n"); 1257 else 1258 DBG_MSG("OK\n"); 1259 } 1260 } 1261 1262 /* 1263 * Determine the module's starting address, size, name, and type, and fill the 1264 * boot_modules structure. This structure is used by the bop code, except for 1265 * hashes which are checked prior to transferring control to the kernel. 1266 */ 1267 static void 1268 process_module(int midx) 1269 { 1270 uint32_t mod_start = dboot_multiboot_modstart(midx); 1271 uint32_t mod_end = dboot_multiboot_modend(midx); 1272 char *cmdline = dboot_multiboot_modcmdline(midx); 1273 char *p, *q; 1274 1275 check_higher(mod_end); 1276 if (prom_debug) { 1277 dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n", 1278 midx, cmdline, (ulong_t)mod_start, (ulong_t)mod_end); 1279 } 1280 1281 if (mod_start > mod_end) { 1282 dboot_panic("module #%d: module start address 0x%lx greater " 1283 "than end address 0x%lx", midx, 1284 (ulong_t)mod_start, (ulong_t)mod_end); 1285 } 1286 1287 /* 1288 * A brief note on lengths and sizes: GRUB, for reasons unknown, passes 1289 * the address of the last valid byte in a module plus 1 as mod_end. 1290 * This is of course a bug; the multiboot specification simply states 1291 * that mod_start and mod_end "contain the start and end addresses of 1292 * the boot module itself" which is pretty obviously not what GRUB is 1293 * doing. However, fixing it requires that not only this code be 1294 * changed but also that other code consuming this value and values 1295 * derived from it be fixed, and that the kernel and GRUB must either 1296 * both have the bug or neither. While there are a lot of combinations 1297 * that will work, there are also some that won't, so for simplicity 1298 * we'll just cope with the bug. That means we won't actually hash the 1299 * byte at mod_end, and we will expect that mod_end for the hash file 1300 * itself is one greater than some multiple of 41 (40 bytes of ASCII 1301 * hash plus a newline for each module). We set bm_size to the true 1302 * correct number of bytes in each module, achieving exactly this. 1303 */ 1304 1305 modules[midx].bm_addr = (native_ptr_t)(uintptr_t)mod_start; 1306 modules[midx].bm_size = mod_end - mod_start; 1307 modules[midx].bm_name = (native_ptr_t)(uintptr_t)cmdline; 1308 modules[midx].bm_hash = (native_ptr_t)(uintptr_t)NULL; 1309 modules[midx].bm_type = BMT_FILE; 1310 1311 if (cmdline == NULL) { 1312 modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname; 1313 return; 1314 } 1315 1316 p = cmdline; 1317 modules[midx].bm_name = 1318 (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r"); 1319 1320 while (p != NULL) { 1321 q = strsep(&p, " \t\f\n\r"); 1322 if (strncmp(q, "name=", 5) == 0) { 1323 if (q[5] != '\0' && !isspace(q[5])) { 1324 modules[midx].bm_name = 1325 (native_ptr_t)(uintptr_t)(q + 5); 1326 } 1327 continue; 1328 } 1329 1330 if (strncmp(q, "type=", 5) == 0) { 1331 if (q[5] == '\0' || isspace(q[5])) 1332 continue; 1333 q += 5; 1334 if (strcmp(q, "rootfs") == 0) { 1335 modules[midx].bm_type = BMT_ROOTFS; 1336 } else if (strcmp(q, "hash") == 0) { 1337 modules[midx].bm_type = BMT_HASH; 1338 } else if (strcmp(q, "environment") == 0) { 1339 modules[midx].bm_type = BMT_ENV; 1340 } else if (strcmp(q, "console-font") == 0) { 1341 modules[midx].bm_type = BMT_FONT; 1342 } else if (strcmp(q, "file") != 0) { 1343 dboot_printf("\tmodule #%d: unknown module " 1344 "type '%s'; defaulting to 'file'\n", 1345 midx, q); 1346 } 1347 continue; 1348 } 1349 1350 if (strncmp(q, "hash=", 5) == 0) { 1351 if (q[5] != '\0' && !isspace(q[5])) { 1352 modules[midx].bm_hash = 1353 (native_ptr_t)(uintptr_t)(q + 5); 1354 } 1355 continue; 1356 } 1357 1358 dboot_printf("ignoring unknown option '%s'\n", q); 1359 } 1360 } 1361 1362 /* 1363 * Backward compatibility: if there are exactly one or two modules, both 1364 * of type 'file' and neither with an embedded hash value, we have been 1365 * given the legacy style modules. In this case we need to treat the first 1366 * module as a rootfs and the second as a hash referencing that module. 1367 * Otherwise, even if the configuration is invalid, we assume that the 1368 * operator knows what he's doing or at least isn't being bitten by this 1369 * interface change. 1370 */ 1371 static void 1372 fixup_modules(void) 1373 { 1374 if (modules_used == 0 || modules_used > 2) 1375 return; 1376 1377 if (modules[0].bm_type != BMT_FILE || 1378 modules_used > 1 && modules[1].bm_type != BMT_FILE) { 1379 return; 1380 } 1381 1382 if (modules[0].bm_hash != (native_ptr_t)(uintptr_t)NULL || 1383 modules_used > 1 && 1384 modules[1].bm_hash != (native_ptr_t)(uintptr_t)NULL) { 1385 return; 1386 } 1387 1388 modules[0].bm_type = BMT_ROOTFS; 1389 if (modules_used > 1) { 1390 modules[1].bm_type = BMT_HASH; 1391 modules[1].bm_name = modules[0].bm_name; 1392 } 1393 } 1394 1395 /* 1396 * For modules that do not have assigned hashes but have a separate hash module, 1397 * find the assigned hash module and set the primary module's bm_hash to point 1398 * to the hash data from that module. We will then ignore modules of type 1399 * BMT_HASH from this point forward. 1400 */ 1401 static void 1402 assign_module_hashes(void) 1403 { 1404 uint_t i, j; 1405 1406 for (i = 0; i < modules_used; i++) { 1407 if (modules[i].bm_type == BMT_HASH || 1408 modules[i].bm_hash != (native_ptr_t)(uintptr_t)NULL) { 1409 continue; 1410 } 1411 1412 for (j = 0; j < modules_used; j++) { 1413 if (modules[j].bm_type != BMT_HASH || 1414 strcmp((char *)(uintptr_t)modules[j].bm_name, 1415 (char *)(uintptr_t)modules[i].bm_name) != 0) { 1416 continue; 1417 } 1418 1419 if (modules[j].bm_size < SHA1_ASCII_LENGTH) { 1420 dboot_printf("Short hash module of length " 1421 "0x%lx bytes; ignoring\n", 1422 (ulong_t)modules[j].bm_size); 1423 } else { 1424 modules[i].bm_hash = modules[j].bm_addr; 1425 } 1426 break; 1427 } 1428 } 1429 } 1430 1431 /* 1432 * Walk through the module information finding the last used address. 1433 * The first available address will become the top level page table. 1434 */ 1435 static void 1436 dboot_process_modules(void) 1437 { 1438 int i, modcount; 1439 extern char _end[]; 1440 1441 DBG_MSG("\nFinding Modules\n"); 1442 modcount = dboot_multiboot_modcount(); 1443 if (modcount > MAX_BOOT_MODULES) { 1444 dboot_panic("Too many modules (%d) -- the maximum is %d.", 1445 modcount, MAX_BOOT_MODULES); 1446 } 1447 /* 1448 * search the modules to find the last used address 1449 * we'll build the module list while we're walking through here 1450 */ 1451 check_higher((paddr_t)(uintptr_t)&_end); 1452 for (i = 0; i < modcount; ++i) { 1453 process_module(i); 1454 modules_used++; 1455 } 1456 bi->bi_modules = (native_ptr_t)(uintptr_t)modules; 1457 DBG(bi->bi_modules); 1458 bi->bi_module_cnt = modcount; 1459 DBG(bi->bi_module_cnt); 1460 1461 fixup_modules(); 1462 assign_module_hashes(); 1463 check_images(); 1464 } 1465 1466 /* 1467 * We then build the phys_install memlist from the multiboot information. 1468 */ 1469 static void 1470 dboot_process_mmap(void) 1471 { 1472 uint64_t start; 1473 uint64_t end; 1474 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 1475 uint32_t lower, upper; 1476 int i, mmap_entries; 1477 1478 /* 1479 * Walk through the memory map from multiboot and build our memlist 1480 * structures. Note these will have native format pointers. 1481 */ 1482 DBG_MSG("\nFinding Memory Map\n"); 1483 num_entries = 0; 1484 num_entries_set = B_FALSE; 1485 max_mem = 0; 1486 if ((mmap_entries = dboot_loader_mmap_entries()) > 0) { 1487 for (i = 0; i < mmap_entries; i++) { 1488 uint32_t type = dboot_loader_mmap_get_type(i); 1489 start = dboot_loader_mmap_get_base(i); 1490 end = start + dboot_loader_mmap_get_length(i); 1491 1492 if (prom_debug) 1493 dboot_printf("\ttype: %d %" PRIx64 "..%" 1494 PRIx64 "\n", type, start, end); 1495 1496 /* 1497 * page align start and end 1498 */ 1499 start = (start + page_offset) & ~page_offset; 1500 end &= ~page_offset; 1501 if (end <= start) 1502 continue; 1503 1504 /* 1505 * only type 1 is usable RAM 1506 */ 1507 switch (type) { 1508 case 1: 1509 if (end > max_mem) 1510 max_mem = end; 1511 memlists[memlists_used].addr = start; 1512 memlists[memlists_used].size = end - start; 1513 ++memlists_used; 1514 if (memlists_used > MAX_MEMLIST) 1515 dboot_panic("too many memlists"); 1516 break; 1517 case 2: 1518 rsvdmemlists[rsvdmemlists_used].addr = start; 1519 rsvdmemlists[rsvdmemlists_used].size = 1520 end - start; 1521 ++rsvdmemlists_used; 1522 if (rsvdmemlists_used > MAX_MEMLIST) 1523 dboot_panic("too many rsvdmemlists"); 1524 break; 1525 default: 1526 continue; 1527 } 1528 } 1529 build_pcimemlists(); 1530 } else if (dboot_multiboot_basicmeminfo(&lower, &upper)) { 1531 DBG(lower); 1532 memlists[memlists_used].addr = 0; 1533 memlists[memlists_used].size = lower * 1024; 1534 ++memlists_used; 1535 DBG(upper); 1536 memlists[memlists_used].addr = 1024 * 1024; 1537 memlists[memlists_used].size = upper * 1024; 1538 ++memlists_used; 1539 1540 /* 1541 * Old platform - assume I/O space at the end of memory. 1542 */ 1543 pcimemlists[0].addr = (upper * 1024) + (1024 * 1024); 1544 pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr; 1545 pcimemlists[0].next = 0; 1546 pcimemlists[0].prev = 0; 1547 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists; 1548 DBG(bi->bi_pcimem); 1549 } else { 1550 dboot_panic("No memory info from boot loader!!!"); 1551 } 1552 1553 /* 1554 * finish processing the physinstall list 1555 */ 1556 sort_physinstall(); 1557 1558 /* 1559 * build bios reserved mem lists 1560 */ 1561 build_rsvdmemlists(); 1562 } 1563 1564 /* 1565 * The highest address is used as the starting point for dboot's simple 1566 * memory allocator. 1567 * 1568 * Finding the highest address in case of Multiboot 1 protocol is 1569 * quite painful in the sense that some information provided by 1570 * the multiboot info structure points to BIOS data, and some to RAM. 1571 * 1572 * The module list was processed and checked already by dboot_process_modules(), 1573 * so we will check the command line string and the memory map. 1574 * 1575 * This list of to be checked items is based on our current knowledge of 1576 * allocations made by grub1 and will need to be reviewed if there 1577 * are updates about the information provided by Multiboot 1. 1578 * 1579 * In the case of the Multiboot 2, our life is much simpler, as the MB2 1580 * information tag list is one contiguous chunk of memory. 1581 */ 1582 static paddr_t 1583 dboot_multiboot1_highest_addr(void) 1584 { 1585 paddr_t addr = (paddr_t)(uintptr_t)NULL; 1586 char *cmdl = (char *)mb_info->cmdline; 1587 1588 if (mb_info->flags & MB_INFO_CMDLINE) 1589 addr = ((paddr_t)((uintptr_t)cmdl + strlen(cmdl) + 1)); 1590 1591 if (mb_info->flags & MB_INFO_MEM_MAP) 1592 addr = MAX(addr, 1593 ((paddr_t)(mb_info->mmap_addr + mb_info->mmap_length))); 1594 return (addr); 1595 } 1596 1597 static void 1598 dboot_multiboot_highest_addr(void) 1599 { 1600 paddr_t addr; 1601 1602 switch (multiboot_version) { 1603 case 1: 1604 addr = dboot_multiboot1_highest_addr(); 1605 if (addr != (paddr_t)(uintptr_t)NULL) 1606 check_higher(addr); 1607 break; 1608 case 2: 1609 addr = dboot_multiboot2_highest_addr(mb2_info); 1610 if (addr != (paddr_t)(uintptr_t)NULL) 1611 check_higher(addr); 1612 break; 1613 default: 1614 dboot_panic("Unknown multiboot version: %d\n", 1615 multiboot_version); 1616 break; 1617 } 1618 } 1619 1620 /* 1621 * Walk the boot loader provided information and find the highest free address. 1622 */ 1623 static void 1624 init_mem_alloc(void) 1625 { 1626 DBG_MSG("Entered init_mem_alloc()\n"); 1627 dboot_process_modules(); 1628 dboot_process_mmap(); 1629 dboot_multiboot_highest_addr(); 1630 } 1631 1632 static int 1633 dboot_same_guids(efi_guid_t *g1, efi_guid_t *g2) 1634 { 1635 int i; 1636 1637 if (g1->time_low != g2->time_low) 1638 return (0); 1639 if (g1->time_mid != g2->time_mid) 1640 return (0); 1641 if (g1->time_hi_and_version != g2->time_hi_and_version) 1642 return (0); 1643 if (g1->clock_seq_hi_and_reserved != g2->clock_seq_hi_and_reserved) 1644 return (0); 1645 if (g1->clock_seq_low != g2->clock_seq_low) 1646 return (0); 1647 1648 for (i = 0; i < 6; i++) { 1649 if (g1->node_addr[i] != g2->node_addr[i]) 1650 return (0); 1651 } 1652 return (1); 1653 } 1654 1655 static void 1656 process_efi32(EFI_SYSTEM_TABLE32 *efi) 1657 { 1658 uint32_t entries; 1659 EFI_CONFIGURATION_TABLE32 *config; 1660 efi_guid_t VendorGuid; 1661 int i; 1662 1663 entries = efi->NumberOfTableEntries; 1664 config = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t) 1665 efi->ConfigurationTable; 1666 1667 for (i = 0; i < entries; i++) { 1668 (void) memcpy(&VendorGuid, &config[i].VendorGuid, 1669 sizeof (VendorGuid)); 1670 if (dboot_same_guids(&VendorGuid, &smbios3)) { 1671 bi->bi_smbios = (native_ptr_t)(uintptr_t) 1672 config[i].VendorTable; 1673 } 1674 if (bi->bi_smbios == 0 && 1675 dboot_same_guids(&VendorGuid, &smbios)) { 1676 bi->bi_smbios = (native_ptr_t)(uintptr_t) 1677 config[i].VendorTable; 1678 } 1679 if (dboot_same_guids(&VendorGuid, &acpi2)) { 1680 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t) 1681 config[i].VendorTable; 1682 } 1683 if (bi->bi_acpi_rsdp == 0 && 1684 dboot_same_guids(&VendorGuid, &acpi1)) { 1685 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t) 1686 config[i].VendorTable; 1687 } 1688 } 1689 } 1690 1691 static void 1692 process_efi64(EFI_SYSTEM_TABLE64 *efi) 1693 { 1694 uint64_t entries; 1695 EFI_CONFIGURATION_TABLE64 *config; 1696 efi_guid_t VendorGuid; 1697 int i; 1698 1699 entries = efi->NumberOfTableEntries; 1700 config = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t) 1701 efi->ConfigurationTable; 1702 1703 for (i = 0; i < entries; i++) { 1704 (void) memcpy(&VendorGuid, &config[i].VendorGuid, 1705 sizeof (VendorGuid)); 1706 if (dboot_same_guids(&VendorGuid, &smbios3)) { 1707 bi->bi_smbios = (native_ptr_t)(uintptr_t) 1708 config[i].VendorTable; 1709 } 1710 if (bi->bi_smbios == 0 && 1711 dboot_same_guids(&VendorGuid, &smbios)) { 1712 bi->bi_smbios = (native_ptr_t)(uintptr_t) 1713 config[i].VendorTable; 1714 } 1715 /* Prefer acpi v2+ over v1. */ 1716 if (dboot_same_guids(&VendorGuid, &acpi2)) { 1717 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t) 1718 config[i].VendorTable; 1719 } 1720 if (bi->bi_acpi_rsdp == 0 && 1721 dboot_same_guids(&VendorGuid, &acpi1)) { 1722 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t) 1723 config[i].VendorTable; 1724 } 1725 } 1726 } 1727 1728 static void 1729 dboot_multiboot_get_fwtables(void) 1730 { 1731 multiboot_tag_new_acpi_t *nacpitagp; 1732 multiboot_tag_old_acpi_t *oacpitagp; 1733 multiboot_tag_efi64_t *efi64tagp = NULL; 1734 multiboot_tag_efi32_t *efi32tagp = NULL; 1735 1736 /* no fw tables from multiboot 1 */ 1737 if (multiboot_version != 2) 1738 return; 1739 1740 efi64tagp = (multiboot_tag_efi64_t *) 1741 dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_EFI64); 1742 if (efi64tagp != NULL) { 1743 bi->bi_uefi_arch = XBI_UEFI_ARCH_64; 1744 bi->bi_uefi_systab = (native_ptr_t)(uintptr_t) 1745 efi64tagp->mb_pointer; 1746 process_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t) 1747 efi64tagp->mb_pointer); 1748 } else { 1749 efi32tagp = (multiboot_tag_efi32_t *) 1750 dboot_multiboot2_find_tag(mb2_info, 1751 MULTIBOOT_TAG_TYPE_EFI32); 1752 if (efi32tagp != NULL) { 1753 bi->bi_uefi_arch = XBI_UEFI_ARCH_32; 1754 bi->bi_uefi_systab = (native_ptr_t)(uintptr_t) 1755 efi32tagp->mb_pointer; 1756 process_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t) 1757 efi32tagp->mb_pointer); 1758 } 1759 } 1760 1761 /* 1762 * The multiboot2 info contains a copy of the RSDP; stash a pointer to 1763 * it (see find_rsdp() in fakebop). 1764 */ 1765 nacpitagp = (multiboot_tag_new_acpi_t *) 1766 dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_ACPI_NEW); 1767 oacpitagp = (multiboot_tag_old_acpi_t *) 1768 dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_ACPI_OLD); 1769 1770 if (nacpitagp != NULL) { 1771 bi->bi_acpi_rsdp_copy = (native_ptr_t)(uintptr_t) 1772 &nacpitagp->mb_rsdp[0]; 1773 } else if (oacpitagp != NULL) { 1774 bi->bi_acpi_rsdp_copy = (native_ptr_t)(uintptr_t) 1775 &oacpitagp->mb_rsdp[0]; 1776 } 1777 } 1778 1779 /* print out EFI version string with newline */ 1780 static void 1781 dboot_print_efi_version(uint32_t ver) 1782 { 1783 int rev; 1784 1785 dboot_printf("%d.", EFI_REV_MAJOR(ver)); 1786 1787 rev = EFI_REV_MINOR(ver); 1788 if ((rev % 10) != 0) { 1789 dboot_printf("%d.%d\n", rev / 10, rev % 10); 1790 } else { 1791 dboot_printf("%d\n", rev / 10); 1792 } 1793 } 1794 1795 static void 1796 print_efi32(EFI_SYSTEM_TABLE32 *efi) 1797 { 1798 uint16_t *data; 1799 EFI_CONFIGURATION_TABLE32 *conf; 1800 int i; 1801 1802 dboot_printf("EFI32 signature: %llx\n", 1803 (unsigned long long)efi->Hdr.Signature); 1804 dboot_printf("EFI system version: "); 1805 dboot_print_efi_version(efi->Hdr.Revision); 1806 dboot_printf("EFI system vendor: "); 1807 data = (uint16_t *)(uintptr_t)efi->FirmwareVendor; 1808 for (i = 0; data[i] != 0; i++) 1809 dboot_printf("%c", (char)data[i]); 1810 dboot_printf("\nEFI firmware revision: "); 1811 dboot_print_efi_version(efi->FirmwareRevision); 1812 dboot_printf("EFI system table number of entries: %d\n", 1813 efi->NumberOfTableEntries); 1814 conf = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t) 1815 efi->ConfigurationTable; 1816 for (i = 0; i < (int)efi->NumberOfTableEntries; i++) { 1817 dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i, 1818 conf[i].VendorGuid.time_low, 1819 conf[i].VendorGuid.time_mid, 1820 conf[i].VendorGuid.time_hi_and_version, 1821 conf[i].VendorGuid.clock_seq_hi_and_reserved, 1822 conf[i].VendorGuid.clock_seq_low); 1823 dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", 1824 conf[i].VendorGuid.node_addr[0], 1825 conf[i].VendorGuid.node_addr[1], 1826 conf[i].VendorGuid.node_addr[2], 1827 conf[i].VendorGuid.node_addr[3], 1828 conf[i].VendorGuid.node_addr[4], 1829 conf[i].VendorGuid.node_addr[5]); 1830 } 1831 } 1832 1833 static void 1834 print_efi64(EFI_SYSTEM_TABLE64 *efi) 1835 { 1836 uint16_t *data; 1837 EFI_CONFIGURATION_TABLE64 *conf; 1838 int i; 1839 1840 dboot_printf("EFI64 signature: %llx\n", 1841 (unsigned long long)efi->Hdr.Signature); 1842 dboot_printf("EFI system version: "); 1843 dboot_print_efi_version(efi->Hdr.Revision); 1844 dboot_printf("EFI system vendor: "); 1845 data = (uint16_t *)(uintptr_t)efi->FirmwareVendor; 1846 for (i = 0; data[i] != 0; i++) 1847 dboot_printf("%c", (char)data[i]); 1848 dboot_printf("\nEFI firmware revision: "); 1849 dboot_print_efi_version(efi->FirmwareRevision); 1850 dboot_printf("EFI system table number of entries: %" PRIu64 "\n", 1851 efi->NumberOfTableEntries); 1852 conf = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t) 1853 efi->ConfigurationTable; 1854 for (i = 0; i < (int)efi->NumberOfTableEntries; i++) { 1855 dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i, 1856 conf[i].VendorGuid.time_low, 1857 conf[i].VendorGuid.time_mid, 1858 conf[i].VendorGuid.time_hi_and_version, 1859 conf[i].VendorGuid.clock_seq_hi_and_reserved, 1860 conf[i].VendorGuid.clock_seq_low); 1861 dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", 1862 conf[i].VendorGuid.node_addr[0], 1863 conf[i].VendorGuid.node_addr[1], 1864 conf[i].VendorGuid.node_addr[2], 1865 conf[i].VendorGuid.node_addr[3], 1866 conf[i].VendorGuid.node_addr[4], 1867 conf[i].VendorGuid.node_addr[5]); 1868 } 1869 } 1870 #endif /* !__xpv */ 1871 1872 /* 1873 * Simple memory allocator, allocates aligned physical memory. 1874 * Note that startup_kernel() only allocates memory, never frees. 1875 * Memory usage just grows in an upward direction. 1876 */ 1877 static void * 1878 do_mem_alloc(uint32_t size, uint32_t align) 1879 { 1880 uint_t i; 1881 uint64_t best; 1882 uint64_t start; 1883 uint64_t end; 1884 1885 /* 1886 * make sure size is a multiple of pagesize 1887 */ 1888 size = RNDUP(size, MMU_PAGESIZE); 1889 next_avail_addr = RNDUP(next_avail_addr, align); 1890 1891 /* 1892 * XXPV fixme joe 1893 * 1894 * a really large bootarchive that causes you to run out of memory 1895 * may cause this to blow up 1896 */ 1897 /* LINTED E_UNEXPECTED_UINT_PROMOTION */ 1898 best = (uint64_t)-size; 1899 for (i = 0; i < memlists_used; ++i) { 1900 start = memlists[i].addr; 1901 #if defined(__xpv) 1902 start += mfn_base; 1903 #endif 1904 end = start + memlists[i].size; 1905 1906 /* 1907 * did we find the desired address? 1908 */ 1909 if (start <= next_avail_addr && next_avail_addr + size <= end) { 1910 best = next_avail_addr; 1911 goto done; 1912 } 1913 1914 /* 1915 * if not is this address the best so far? 1916 */ 1917 if (start > next_avail_addr && start < best && 1918 RNDUP(start, align) + size <= end) 1919 best = RNDUP(start, align); 1920 } 1921 1922 /* 1923 * We didn't find exactly the address we wanted, due to going off the 1924 * end of a memory region. Return the best found memory address. 1925 */ 1926 done: 1927 next_avail_addr = best + size; 1928 #if defined(__xpv) 1929 if (next_avail_addr > scratch_end) 1930 dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: " 1931 "0x%lx", (ulong_t)next_avail_addr, 1932 (ulong_t)scratch_end); 1933 #endif 1934 (void) memset((void *)(uintptr_t)best, 0, size); 1935 return ((void *)(uintptr_t)best); 1936 } 1937 1938 void * 1939 mem_alloc(uint32_t size) 1940 { 1941 return (do_mem_alloc(size, MMU_PAGESIZE)); 1942 } 1943 1944 1945 /* 1946 * Build page tables to map all of memory used so far as well as the kernel. 1947 */ 1948 static void 1949 build_page_tables(void) 1950 { 1951 uint32_t psize; 1952 uint32_t level; 1953 uint32_t off; 1954 uint64_t start; 1955 #if !defined(__xpv) 1956 uint32_t i; 1957 uint64_t end; 1958 #endif /* __xpv */ 1959 1960 /* 1961 * If we're on metal, we need to create the top level pagetable. 1962 */ 1963 #if defined(__xpv) 1964 top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base; 1965 #else /* __xpv */ 1966 top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 1967 #endif /* __xpv */ 1968 DBG((uintptr_t)top_page_table); 1969 1970 /* 1971 * Determine if we'll use large mappings for kernel, then map it. 1972 */ 1973 if (largepage_support) { 1974 psize = lpagesize; 1975 level = 1; 1976 } else { 1977 psize = MMU_PAGESIZE; 1978 level = 0; 1979 } 1980 1981 DBG_MSG("Mapping kernel\n"); 1982 DBG(ktext_phys); 1983 DBG(target_kernel_text); 1984 DBG(ksize); 1985 DBG(psize); 1986 for (off = 0; off < ksize; off += psize) 1987 map_pa_at_va(ktext_phys + off, target_kernel_text + off, level); 1988 1989 /* 1990 * The kernel will need a 1 page window to work with page tables 1991 */ 1992 bi->bi_pt_window = (native_ptr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 1993 DBG(bi->bi_pt_window); 1994 bi->bi_pte_to_pt_window = 1995 (native_ptr_t)(uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0); 1996 DBG(bi->bi_pte_to_pt_window); 1997 1998 #if defined(__xpv) 1999 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 2000 /* If this is a domU we're done. */ 2001 DBG_MSG("\nPage tables constructed\n"); 2002 return; 2003 } 2004 #endif /* __xpv */ 2005 2006 /* 2007 * We need 1:1 mappings for the lower 1M of memory to access 2008 * BIOS tables used by a couple of drivers during boot. 2009 * 2010 * The following code works because our simple memory allocator 2011 * only grows usage in an upwards direction. 2012 * 2013 * Note that by this point in boot some mappings for low memory 2014 * may already exist because we've already accessed device in low 2015 * memory. (Specifically the video frame buffer and keyboard 2016 * status ports.) If we're booting on raw hardware then GRUB 2017 * created these mappings for us. If we're booting under a 2018 * hypervisor then we went ahead and remapped these devices into 2019 * memory allocated within dboot itself. 2020 */ 2021 if (map_debug) 2022 dboot_printf("1:1 map pa=0..1Meg\n"); 2023 for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) { 2024 #if defined(__xpv) 2025 map_ma_at_va(start, start, 0); 2026 #else /* __xpv */ 2027 map_pa_at_va(start, start, 0); 2028 #endif /* __xpv */ 2029 } 2030 2031 #if !defined(__xpv) 2032 2033 for (i = 0; i < memlists_used; ++i) { 2034 start = memlists[i].addr; 2035 end = start + memlists[i].size; 2036 2037 if (map_debug) 2038 dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n", 2039 start, end); 2040 while (start < end && start < next_avail_addr) { 2041 map_pa_at_va(start, start, 0); 2042 start += MMU_PAGESIZE; 2043 } 2044 if (start >= next_avail_addr) 2045 break; 2046 } 2047 2048 /* 2049 * Map framebuffer memory as PT_NOCACHE as this is memory from a 2050 * device and therefore must not be cached. 2051 */ 2052 if (fb != NULL && fb->framebuffer != 0) { 2053 multiboot_tag_framebuffer_t *fb_tagp; 2054 fb_tagp = (multiboot_tag_framebuffer_t *)(uintptr_t) 2055 fb->framebuffer; 2056 2057 start = fb_tagp->framebuffer_common.framebuffer_addr; 2058 end = start + fb_tagp->framebuffer_common.framebuffer_height * 2059 fb_tagp->framebuffer_common.framebuffer_pitch; 2060 2061 if (map_debug) 2062 dboot_printf("FB 1:1 map pa=%" PRIx64 "..%" PRIx64 "\n", 2063 start, end); 2064 pte_bits |= PT_NOCACHE; 2065 if (PAT_support != 0) 2066 pte_bits |= PT_PAT_4K; 2067 2068 while (start < end) { 2069 map_pa_at_va(start, start, 0); 2070 start += MMU_PAGESIZE; 2071 } 2072 pte_bits &= ~PT_NOCACHE; 2073 if (PAT_support != 0) 2074 pte_bits &= ~PT_PAT_4K; 2075 } 2076 #endif /* !__xpv */ 2077 2078 DBG_MSG("\nPage tables constructed\n"); 2079 } 2080 2081 #define NO_MULTIBOOT \ 2082 "multiboot is no longer used to boot the Solaris Operating System.\n\ 2083 The grub entry should be changed to:\n\ 2084 kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\ 2085 module$ /platform/i86pc/$ISADIR/boot_archive\n\ 2086 See http://illumos.org/msg/SUNOS-8000-AK for details.\n" 2087 2088 static void 2089 dboot_init_xboot_consinfo(void) 2090 { 2091 bi = &boot_info; 2092 2093 #if !defined(__xpv) 2094 fb = &framebuffer; 2095 bi->bi_framebuffer = (native_ptr_t)(uintptr_t)fb; 2096 2097 switch (multiboot_version) { 2098 case 1: 2099 dboot_multiboot1_xboot_consinfo(); 2100 break; 2101 case 2: 2102 dboot_multiboot2_xboot_consinfo(); 2103 break; 2104 default: 2105 dboot_panic("Unknown multiboot version: %d\n", 2106 multiboot_version); 2107 break; 2108 } 2109 dboot_find_console_modules(); 2110 #endif 2111 } 2112 2113 /* 2114 * Set up basic data from the boot loader. 2115 * The load_addr is part of AOUT kludge setup in dboot_grub.s, to support 2116 * 32-bit dboot code setup used to set up and start 64-bit kernel. 2117 * AOUT kludge does allow 32-bit boot loader, such as grub1, to load and 2118 * start 64-bit illumos kernel. 2119 */ 2120 static void 2121 dboot_loader_init(void) 2122 { 2123 #if !defined(__xpv) 2124 mb_info = NULL; 2125 mb2_info = NULL; 2126 2127 switch (mb_magic) { 2128 case MB_BOOTLOADER_MAGIC: 2129 multiboot_version = 1; 2130 mb_info = (multiboot_info_t *)(uintptr_t)mb_addr; 2131 #if defined(_BOOT_TARGET_amd64) 2132 load_addr = mb_header.load_addr; 2133 #endif 2134 break; 2135 2136 case MULTIBOOT2_BOOTLOADER_MAGIC: 2137 multiboot_version = 2; 2138 mb2_info = (multiboot2_info_header_t *)(uintptr_t)mb_addr; 2139 mb2_mmap_tagp = dboot_multiboot2_get_mmap_tagp(mb2_info); 2140 #if defined(_BOOT_TARGET_amd64) 2141 load_addr = mb2_load_addr; 2142 #endif 2143 break; 2144 2145 default: 2146 dboot_panic("Unknown bootloader magic: 0x%x\n", mb_magic); 2147 break; 2148 } 2149 #endif /* !defined(__xpv) */ 2150 } 2151 2152 /* Extract the kernel command line from [multi]boot information. */ 2153 static char * 2154 dboot_loader_cmdline(void) 2155 { 2156 char *line = NULL; 2157 2158 #if defined(__xpv) 2159 line = (char *)xen_info->cmd_line; 2160 #else /* __xpv */ 2161 2162 switch (multiboot_version) { 2163 case 1: 2164 if (mb_info->flags & MB_INFO_CMDLINE) 2165 line = (char *)mb_info->cmdline; 2166 break; 2167 2168 case 2: 2169 line = dboot_multiboot2_cmdline(mb2_info); 2170 break; 2171 2172 default: 2173 dboot_panic("Unknown multiboot version: %d\n", 2174 multiboot_version); 2175 break; 2176 } 2177 2178 #endif /* __xpv */ 2179 2180 /* 2181 * Make sure we have valid pointer so the string operations 2182 * will not crash us. 2183 */ 2184 if (line == NULL) 2185 line = ""; 2186 2187 return (line); 2188 } 2189 2190 static char * 2191 dboot_loader_name(void) 2192 { 2193 #if defined(__xpv) 2194 return (NULL); 2195 #else /* __xpv */ 2196 multiboot_tag_string_t *tag; 2197 2198 switch (multiboot_version) { 2199 case 1: 2200 return ((char *)(uintptr_t)mb_info->boot_loader_name); 2201 2202 case 2: 2203 tag = dboot_multiboot2_find_tag(mb2_info, 2204 MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME); 2205 return (tag->mb_string); 2206 default: 2207 dboot_panic("Unknown multiboot version: %d\n", 2208 multiboot_version); 2209 break; 2210 } 2211 2212 return (NULL); 2213 #endif /* __xpv */ 2214 } 2215 2216 /* 2217 * startup_kernel has a pretty simple job. It builds pagetables which reflect 2218 * 1:1 mappings for all memory in use. It then also adds mappings for 2219 * the kernel nucleus at virtual address of target_kernel_text using large page 2220 * mappings. The page table pages are also accessible at 1:1 mapped 2221 * virtual addresses. 2222 */ 2223 /*ARGSUSED*/ 2224 void 2225 startup_kernel(void) 2226 { 2227 char *cmdline; 2228 char *bootloader; 2229 #if defined(__xpv) 2230 physdev_set_iopl_t set_iopl; 2231 #endif /* __xpv */ 2232 2233 if (dboot_debug == 1) 2234 bcons_init(NULL); /* Set very early console to ttya. */ 2235 dboot_loader_init(); 2236 /* 2237 * At this point we are executing in a 32 bit real mode. 2238 */ 2239 2240 bootloader = dboot_loader_name(); 2241 cmdline = dboot_loader_cmdline(); 2242 2243 #if defined(__xpv) 2244 /* 2245 * For dom0, before we initialize the console subsystem we'll 2246 * need to enable io operations, so set I/O priveldge level to 1. 2247 */ 2248 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 2249 set_iopl.iopl = 1; 2250 (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 2251 } 2252 #endif /* __xpv */ 2253 2254 dboot_init_xboot_consinfo(); 2255 bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline; 2256 bcons_init(bi); /* Now we can set the real console. */ 2257 2258 prom_debug = (find_boot_prop("prom_debug") != NULL); 2259 map_debug = (find_boot_prop("map_debug") != NULL); 2260 2261 #if !defined(__xpv) 2262 dboot_multiboot_get_fwtables(); 2263 #endif 2264 DBG_MSG("\n\nillumos prekernel set: "); 2265 DBG_MSG(cmdline); 2266 DBG_MSG("\n"); 2267 2268 if (bootloader != NULL && prom_debug) { 2269 dboot_printf("Kernel loaded by: %s\n", bootloader); 2270 #if !defined(__xpv) 2271 dboot_printf("Using multiboot %d boot protocol.\n", 2272 multiboot_version); 2273 #endif 2274 } 2275 2276 if (strstr(cmdline, "multiboot") != NULL) { 2277 dboot_panic(NO_MULTIBOOT); 2278 } 2279 2280 DBG((uintptr_t)bi); 2281 #if !defined(__xpv) 2282 DBG((uintptr_t)mb_info); 2283 DBG((uintptr_t)mb2_info); 2284 if (mb2_info != NULL) 2285 DBG(mb2_info->mbi_total_size); 2286 DBG(bi->bi_acpi_rsdp); 2287 DBG(bi->bi_acpi_rsdp_copy); 2288 DBG(bi->bi_smbios); 2289 DBG(bi->bi_uefi_arch); 2290 DBG(bi->bi_uefi_systab); 2291 2292 if (bi->bi_uefi_systab && prom_debug) { 2293 if (bi->bi_uefi_arch == XBI_UEFI_ARCH_64) { 2294 print_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t) 2295 bi->bi_uefi_systab); 2296 } else { 2297 print_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t) 2298 bi->bi_uefi_systab); 2299 } 2300 } 2301 #endif 2302 2303 /* 2304 * Need correct target_kernel_text value 2305 */ 2306 target_kernel_text = KERNEL_TEXT; 2307 DBG(target_kernel_text); 2308 2309 #if defined(__xpv) 2310 2311 /* 2312 * XXPV Derive this stuff from CPUID / what the hypervisor has enabled 2313 */ 2314 2315 #if defined(_BOOT_TARGET_amd64) 2316 /* 2317 * 64-bit hypervisor. 2318 */ 2319 amd64_support = 1; 2320 pae_support = 1; 2321 2322 #else /* _BOOT_TARGET_amd64 */ 2323 2324 /* 2325 * See if we are running on a PAE Hypervisor 2326 */ 2327 { 2328 xen_capabilities_info_t caps; 2329 2330 if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0) 2331 dboot_panic("HYPERVISOR_xen_version(caps) failed"); 2332 caps[sizeof (caps) - 1] = 0; 2333 if (prom_debug) 2334 dboot_printf("xen capabilities %s\n", caps); 2335 if (strstr(caps, "x86_32p") != NULL) 2336 pae_support = 1; 2337 } 2338 2339 #endif /* _BOOT_TARGET_amd64 */ 2340 { 2341 xen_platform_parameters_t p; 2342 2343 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0) 2344 dboot_panic("HYPERVISOR_xen_version(parms) failed"); 2345 DBG(p.virt_start); 2346 mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start); 2347 } 2348 2349 /* 2350 * The hypervisor loads stuff starting at 1Gig 2351 */ 2352 mfn_base = ONE_GIG; 2353 DBG(mfn_base); 2354 2355 /* 2356 * enable writable page table mode for the hypervisor 2357 */ 2358 if (HYPERVISOR_vm_assist(VMASST_CMD_enable, 2359 VMASST_TYPE_writable_pagetables) < 0) 2360 dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed"); 2361 2362 /* 2363 * check for NX support 2364 */ 2365 if (pae_support) { 2366 uint32_t eax = 0x80000000; 2367 uint32_t edx = get_cpuid_edx(&eax); 2368 2369 if (eax >= 0x80000001) { 2370 eax = 0x80000001; 2371 edx = get_cpuid_edx(&eax); 2372 if (edx & CPUID_AMD_EDX_NX) 2373 NX_support = 1; 2374 } 2375 } 2376 2377 /* 2378 * check for PAT support 2379 */ 2380 { 2381 uint32_t eax = 1; 2382 uint32_t edx = get_cpuid_edx(&eax); 2383 2384 if (edx & CPUID_INTC_EDX_PAT) 2385 PAT_support = 1; 2386 } 2387 #if !defined(_BOOT_TARGET_amd64) 2388 2389 /* 2390 * The 32-bit hypervisor uses segmentation to protect itself from 2391 * guests. This means when a guest attempts to install a flat 4GB 2392 * code or data descriptor the 32-bit hypervisor will protect itself 2393 * by silently shrinking the segment such that if the guest attempts 2394 * any access where the hypervisor lives a #gp fault is generated. 2395 * The problem is that some applications expect a full 4GB flat 2396 * segment for their current thread pointer and will use negative 2397 * offset segment wrap around to access data. TLS support in linux 2398 * brand is one example of this. 2399 * 2400 * The 32-bit hypervisor can catch the #gp fault in these cases 2401 * and emulate the access without passing the #gp fault to the guest 2402 * but only if VMASST_TYPE_4gb_segments is explicitly turned on. 2403 * Seems like this should have been the default. 2404 * Either way, we want the hypervisor -- and not Solaris -- to deal 2405 * to deal with emulating these accesses. 2406 */ 2407 if (HYPERVISOR_vm_assist(VMASST_CMD_enable, 2408 VMASST_TYPE_4gb_segments) < 0) 2409 dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed"); 2410 #endif /* !_BOOT_TARGET_amd64 */ 2411 2412 #else /* __xpv */ 2413 2414 /* 2415 * use cpuid to enable MMU features 2416 */ 2417 if (have_cpuid()) { 2418 uint32_t eax, edx; 2419 2420 eax = 1; 2421 edx = get_cpuid_edx(&eax); 2422 if (edx & CPUID_INTC_EDX_PSE) 2423 largepage_support = 1; 2424 if (edx & CPUID_INTC_EDX_PGE) 2425 pge_support = 1; 2426 if (edx & CPUID_INTC_EDX_PAE) 2427 pae_support = 1; 2428 if (edx & CPUID_INTC_EDX_PAT) 2429 PAT_support = 1; 2430 2431 eax = 0x80000000; 2432 edx = get_cpuid_edx(&eax); 2433 if (eax >= 0x80000001) { 2434 eax = 0x80000001; 2435 edx = get_cpuid_edx(&eax); 2436 if (edx & CPUID_AMD_EDX_LM) 2437 amd64_support = 1; 2438 if (edx & CPUID_AMD_EDX_NX) 2439 NX_support = 1; 2440 } 2441 } else { 2442 dboot_printf("cpuid not supported\n"); 2443 } 2444 #endif /* __xpv */ 2445 2446 2447 #if defined(_BOOT_TARGET_amd64) 2448 if (amd64_support == 0) 2449 dboot_panic("long mode not supported, rebooting"); 2450 else if (pae_support == 0) 2451 dboot_panic("long mode, but no PAE; rebooting"); 2452 #else 2453 /* 2454 * Allow the command line to over-ride use of PAE for 32 bit. 2455 */ 2456 if (strstr(cmdline, "disablePAE=true") != NULL) { 2457 pae_support = 0; 2458 NX_support = 0; 2459 amd64_support = 0; 2460 } 2461 #endif 2462 2463 /* 2464 * initialize the simple memory allocator 2465 */ 2466 init_mem_alloc(); 2467 2468 #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64) 2469 /* 2470 * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory 2471 */ 2472 if (max_mem < FOUR_GIG && NX_support == 0) 2473 pae_support = 0; 2474 #endif 2475 2476 /* 2477 * configure mmu information 2478 */ 2479 if (pae_support) { 2480 shift_amt = shift_amt_pae; 2481 ptes_per_table = 512; 2482 pte_size = 8; 2483 lpagesize = TWO_MEG; 2484 #if defined(_BOOT_TARGET_amd64) 2485 top_level = 3; 2486 #else 2487 top_level = 2; 2488 #endif 2489 } else { 2490 pae_support = 0; 2491 NX_support = 0; 2492 shift_amt = shift_amt_nopae; 2493 ptes_per_table = 1024; 2494 pte_size = 4; 2495 lpagesize = FOUR_MEG; 2496 top_level = 1; 2497 } 2498 2499 DBG(PAT_support); 2500 DBG(pge_support); 2501 DBG(NX_support); 2502 DBG(largepage_support); 2503 DBG(amd64_support); 2504 DBG(top_level); 2505 DBG(pte_size); 2506 DBG(ptes_per_table); 2507 DBG(lpagesize); 2508 2509 #if defined(__xpv) 2510 ktext_phys = ONE_GIG; /* from UNIX Mapfile */ 2511 #else 2512 ktext_phys = FOUR_MEG; /* from UNIX Mapfile */ 2513 #endif 2514 2515 #if !defined(__xpv) && defined(_BOOT_TARGET_amd64) 2516 /* 2517 * For grub, copy kernel bits from the ELF64 file to final place. 2518 */ 2519 DBG_MSG("\nAllocating nucleus pages.\n"); 2520 ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG); 2521 2522 if (ktext_phys == 0) 2523 dboot_panic("failed to allocate aligned kernel memory"); 2524 DBG(load_addr); 2525 if (dboot_elfload64(load_addr) != 0) 2526 dboot_panic("failed to parse kernel ELF image, rebooting"); 2527 #endif 2528 2529 DBG(ktext_phys); 2530 2531 /* 2532 * Allocate page tables. 2533 */ 2534 build_page_tables(); 2535 2536 /* 2537 * return to assembly code to switch to running kernel 2538 */ 2539 entry_addr_low = (uint32_t)target_kernel_text; 2540 DBG(entry_addr_low); 2541 bi->bi_use_largepage = largepage_support; 2542 bi->bi_use_pae = pae_support; 2543 bi->bi_use_pge = pge_support; 2544 bi->bi_use_nx = NX_support; 2545 2546 #if defined(__xpv) 2547 2548 bi->bi_next_paddr = next_avail_addr - mfn_base; 2549 DBG(bi->bi_next_paddr); 2550 bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr; 2551 DBG(bi->bi_next_vaddr); 2552 2553 /* 2554 * unmap unused pages in start area to make them available for DMA 2555 */ 2556 while (next_avail_addr < scratch_end) { 2557 (void) HYPERVISOR_update_va_mapping(next_avail_addr, 2558 0, UVMF_INVLPG | UVMF_LOCAL); 2559 next_avail_addr += MMU_PAGESIZE; 2560 } 2561 2562 bi->bi_xen_start_info = (native_ptr_t)(uintptr_t)xen_info; 2563 DBG((uintptr_t)HYPERVISOR_shared_info); 2564 bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info; 2565 bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base; 2566 2567 #else /* __xpv */ 2568 2569 bi->bi_next_paddr = next_avail_addr; 2570 DBG(bi->bi_next_paddr); 2571 bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr; 2572 DBG(bi->bi_next_vaddr); 2573 bi->bi_mb_version = multiboot_version; 2574 2575 switch (multiboot_version) { 2576 case 1: 2577 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb_info; 2578 break; 2579 case 2: 2580 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb2_info; 2581 break; 2582 default: 2583 dboot_panic("Unknown multiboot version: %d\n", 2584 multiboot_version); 2585 break; 2586 } 2587 bi->bi_top_page_table = (uintptr_t)top_page_table; 2588 2589 #endif /* __xpv */ 2590 2591 bi->bi_kseg_size = FOUR_MEG; 2592 DBG(bi->bi_kseg_size); 2593 2594 #ifndef __xpv 2595 if (map_debug) 2596 dump_tables(); 2597 #endif 2598 2599 DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n"); 2600 2601 #ifndef __xpv 2602 /* Update boot info with FB data */ 2603 fb->cursor.origin.x = fb_info.cursor.origin.x; 2604 fb->cursor.origin.y = fb_info.cursor.origin.y; 2605 fb->cursor.pos.x = fb_info.cursor.pos.x; 2606 fb->cursor.pos.y = fb_info.cursor.pos.y; 2607 fb->cursor.visible = fb_info.cursor.visible; 2608 #endif 2609 } 2610