1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * Copyright 2013 Joyent, Inc. All rights reserved. 27 */ 28 29 30 #include <sys/types.h> 31 #include <sys/machparam.h> 32 #include <sys/x86_archext.h> 33 #include <sys/systm.h> 34 #include <sys/mach_mmu.h> 35 #include <sys/multiboot.h> 36 #include <sys/multiboot2.h> 37 #include <sys/multiboot2_impl.h> 38 #include <sys/sysmacros.h> 39 #include <sys/sha1.h> 40 #include <util/string.h> 41 #include <util/strtolctype.h> 42 43 #if defined(__xpv) 44 45 #include <sys/hypervisor.h> 46 uintptr_t xen_virt_start; 47 pfn_t *mfn_to_pfn_mapping; 48 49 #else /* !__xpv */ 50 51 extern multiboot_header_t mb_header; 52 extern uint32_t mb2_load_addr; 53 extern int have_cpuid(void); 54 55 #endif /* !__xpv */ 56 57 #include <sys/inttypes.h> 58 #include <sys/bootinfo.h> 59 #include <sys/mach_mmu.h> 60 #include <sys/boot_console.h> 61 62 #include "dboot_asm.h" 63 #include "dboot_printf.h" 64 #include "dboot_xboot.h" 65 #include "dboot_elfload.h" 66 67 #define SHA1_ASCII_LENGTH (SHA1_DIGEST_LENGTH * 2) 68 69 /* 70 * This file contains code that runs to transition us from either a multiboot 71 * compliant loader (32 bit non-paging) or a XPV domain loader to 72 * regular kernel execution. Its task is to setup the kernel memory image 73 * and page tables. 74 * 75 * The code executes as: 76 * - 32 bits under GRUB (for 32 or 64 bit Solaris) 77 * - a 32 bit program for the 32-bit PV hypervisor 78 * - a 64 bit program for the 64-bit PV hypervisor (at least for now) 79 * 80 * Under the PV hypervisor, we must create mappings for any memory beyond the 81 * initial start of day allocation (such as the kernel itself). 82 * 83 * When on the metal, the mapping between maddr_t and paddr_t is 1:1. 84 * Since we are running in real mode, so all such memory is accessible. 85 */ 86 87 /* 88 * Standard bits used in PTE (page level) and PTP (internal levels) 89 */ 90 x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER; 91 x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST; 92 93 /* 94 * This is the target addresses (physical) where the kernel text and data 95 * nucleus pages will be unpacked. On the hypervisor this is actually a 96 * virtual address. 97 */ 98 paddr_t ktext_phys; 99 uint32_t ksize = 2 * FOUR_MEG; /* kernel nucleus is 8Meg */ 100 101 static uint64_t target_kernel_text; /* value to use for KERNEL_TEXT */ 102 103 /* 104 * The stack is setup in assembler before entering startup_kernel() 105 */ 106 char stack_space[STACK_SIZE]; 107 108 /* 109 * Used to track physical memory allocation 110 */ 111 static paddr_t next_avail_addr = 0; 112 113 #if defined(__xpv) 114 /* 115 * Additional information needed for hypervisor memory allocation. 116 * Only memory up to scratch_end is mapped by page tables. 117 * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so 118 * to derive a pfn from a pointer, you subtract mfn_base. 119 */ 120 121 static paddr_t scratch_end = 0; /* we can't write all of mem here */ 122 static paddr_t mfn_base; /* addr corresponding to mfn_list[0] */ 123 start_info_t *xen_info; 124 125 #else /* __xpv */ 126 127 /* 128 * If on the metal, then we have a multiboot loader. 129 */ 130 uint32_t mb_magic; /* magic from boot loader */ 131 uint32_t mb_addr; /* multiboot info package from loader */ 132 int multiboot_version; 133 multiboot_info_t *mb_info; 134 multiboot2_info_header_t *mb2_info; 135 multiboot_tag_mmap_t *mb2_mmap_tagp; 136 int num_entries; /* mmap entry count */ 137 boolean_t num_entries_set; /* is mmap entry count set */ 138 uintptr_t load_addr; 139 140 #endif /* __xpv */ 141 142 /* 143 * This contains information passed to the kernel 144 */ 145 struct xboot_info boot_info[2]; /* extra space to fix alignement for amd64 */ 146 struct xboot_info *bi; 147 148 /* 149 * Page table and memory stuff. 150 */ 151 static paddr_t max_mem; /* maximum memory address */ 152 153 /* 154 * Information about processor MMU 155 */ 156 int amd64_support = 0; 157 int largepage_support = 0; 158 int pae_support = 0; 159 int pge_support = 0; 160 int NX_support = 0; 161 162 /* 163 * Low 32 bits of kernel entry address passed back to assembler. 164 * When running a 64 bit kernel, the high 32 bits are 0xffffffff. 165 */ 166 uint32_t entry_addr_low; 167 168 /* 169 * Memlists for the kernel. We shouldn't need a lot of these. 170 */ 171 #define MAX_MEMLIST (50) 172 struct boot_memlist memlists[MAX_MEMLIST]; 173 uint_t memlists_used = 0; 174 struct boot_memlist pcimemlists[MAX_MEMLIST]; 175 uint_t pcimemlists_used = 0; 176 struct boot_memlist rsvdmemlists[MAX_MEMLIST]; 177 uint_t rsvdmemlists_used = 0; 178 179 /* 180 * This should match what's in the bootloader. It's arbitrary, but GRUB 181 * in particular has limitations on how much space it can use before it 182 * stops working properly. This should be enough. 183 */ 184 struct boot_modules modules[MAX_BOOT_MODULES]; 185 uint_t modules_used = 0; 186 187 #ifdef __xpv 188 /* 189 * Xen strips the size field out of the mb_memory_map_t, see struct e820entry 190 * definition in Xen source. 191 */ 192 typedef struct { 193 uint32_t base_addr_low; 194 uint32_t base_addr_high; 195 uint32_t length_low; 196 uint32_t length_high; 197 uint32_t type; 198 } mmap_t; 199 200 /* 201 * There is 512KB of scratch area after the boot stack page. 202 * We'll use that for everything except the kernel nucleus pages which are too 203 * big to fit there and are allocated last anyway. 204 */ 205 #define MAXMAPS 100 206 static mmap_t map_buffer[MAXMAPS]; 207 #else 208 typedef mb_memory_map_t mmap_t; 209 #endif 210 211 /* 212 * Debugging macros 213 */ 214 uint_t prom_debug = 0; 215 uint_t map_debug = 0; 216 217 static char noname[2] = "-"; 218 219 /* 220 * Either hypervisor-specific or grub-specific code builds the initial 221 * memlists. This code does the sort/merge/link for final use. 222 */ 223 static void 224 sort_physinstall(void) 225 { 226 int i; 227 #if !defined(__xpv) 228 int j; 229 struct boot_memlist tmp; 230 231 /* 232 * Now sort the memlists, in case they weren't in order. 233 * Yeah, this is a bubble sort; small, simple and easy to get right. 234 */ 235 DBG_MSG("Sorting phys-installed list\n"); 236 for (j = memlists_used - 1; j > 0; --j) { 237 for (i = 0; i < j; ++i) { 238 if (memlists[i].addr < memlists[i + 1].addr) 239 continue; 240 tmp = memlists[i]; 241 memlists[i] = memlists[i + 1]; 242 memlists[i + 1] = tmp; 243 } 244 } 245 246 /* 247 * Merge any memlists that don't have holes between them. 248 */ 249 for (i = 0; i <= memlists_used - 1; ++i) { 250 if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr) 251 continue; 252 253 if (prom_debug) 254 dboot_printf( 255 "merging mem segs %" PRIx64 "...%" PRIx64 256 " w/ %" PRIx64 "...%" PRIx64 "\n", 257 memlists[i].addr, 258 memlists[i].addr + memlists[i].size, 259 memlists[i + 1].addr, 260 memlists[i + 1].addr + memlists[i + 1].size); 261 262 memlists[i].size += memlists[i + 1].size; 263 for (j = i + 1; j < memlists_used - 1; ++j) 264 memlists[j] = memlists[j + 1]; 265 --memlists_used; 266 DBG(memlists_used); 267 --i; /* after merging we need to reexamine, so do this */ 268 } 269 #endif /* __xpv */ 270 271 if (prom_debug) { 272 dboot_printf("\nFinal memlists:\n"); 273 for (i = 0; i < memlists_used; ++i) { 274 dboot_printf("\t%d: addr=%" PRIx64 " size=%" 275 PRIx64 "\n", i, memlists[i].addr, memlists[i].size); 276 } 277 } 278 279 /* 280 * link together the memlists with native size pointers 281 */ 282 memlists[0].next = 0; 283 memlists[0].prev = 0; 284 for (i = 1; i < memlists_used; ++i) { 285 memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1); 286 memlists[i].next = 0; 287 memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i); 288 } 289 bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists; 290 DBG(bi->bi_phys_install); 291 } 292 293 /* 294 * build bios reserved memlists 295 */ 296 static void 297 build_rsvdmemlists(void) 298 { 299 int i; 300 301 rsvdmemlists[0].next = 0; 302 rsvdmemlists[0].prev = 0; 303 for (i = 1; i < rsvdmemlists_used; ++i) { 304 rsvdmemlists[i].prev = 305 (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1); 306 rsvdmemlists[i].next = 0; 307 rsvdmemlists[i - 1].next = 308 (native_ptr_t)(uintptr_t)(rsvdmemlists + i); 309 } 310 bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists; 311 DBG(bi->bi_rsvdmem); 312 } 313 314 #if defined(__xpv) 315 316 /* 317 * halt on the hypervisor after a delay to drain console output 318 */ 319 void 320 dboot_halt(void) 321 { 322 uint_t i = 10000; 323 324 while (--i) 325 (void) HYPERVISOR_yield(); 326 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 327 } 328 329 /* 330 * From a machine address, find the corresponding pseudo-physical address. 331 * Pseudo-physical address are contiguous and run from mfn_base in each VM. 332 * Machine addresses are the real underlying hardware addresses. 333 * These are needed for page table entries. Note that this routine is 334 * poorly protected. A bad value of "ma" will cause a page fault. 335 */ 336 paddr_t 337 ma_to_pa(maddr_t ma) 338 { 339 ulong_t pgoff = ma & MMU_PAGEOFFSET; 340 ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)]; 341 paddr_t pa; 342 343 if (pfn >= xen_info->nr_pages) 344 return (-(paddr_t)1); 345 pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff; 346 #ifdef DEBUG 347 if (ma != pa_to_ma(pa)) 348 dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", " 349 "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa)); 350 #endif 351 return (pa); 352 } 353 354 /* 355 * From a pseudo-physical address, find the corresponding machine address. 356 */ 357 maddr_t 358 pa_to_ma(paddr_t pa) 359 { 360 pfn_t pfn; 361 ulong_t mfn; 362 363 pfn = mmu_btop(pa - mfn_base); 364 if (pa < mfn_base || pfn >= xen_info->nr_pages) 365 dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa); 366 mfn = ((ulong_t *)xen_info->mfn_list)[pfn]; 367 #ifdef DEBUG 368 if (mfn_to_pfn_mapping[mfn] != pfn) 369 dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n", 370 pfn, mfn, mfn_to_pfn_mapping[mfn]); 371 #endif 372 return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET)); 373 } 374 375 #endif /* __xpv */ 376 377 x86pte_t 378 get_pteval(paddr_t table, uint_t index) 379 { 380 if (pae_support) 381 return (((x86pte_t *)(uintptr_t)table)[index]); 382 return (((x86pte32_t *)(uintptr_t)table)[index]); 383 } 384 385 /*ARGSUSED*/ 386 void 387 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 388 { 389 #ifdef __xpv 390 mmu_update_t t; 391 maddr_t mtable = pa_to_ma(table); 392 int retcnt; 393 394 t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE; 395 t.val = pteval; 396 if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1) 397 dboot_panic("HYPERVISOR_mmu_update() failed"); 398 #else /* __xpv */ 399 uintptr_t tab_addr = (uintptr_t)table; 400 401 if (pae_support) 402 ((x86pte_t *)tab_addr)[index] = pteval; 403 else 404 ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval; 405 if (level == top_level && level == 2) 406 reload_cr3(); 407 #endif /* __xpv */ 408 } 409 410 paddr_t 411 make_ptable(x86pte_t *pteval, uint_t level) 412 { 413 paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 414 415 if (level == top_level && level == 2) 416 *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID; 417 else 418 *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits; 419 420 #ifdef __xpv 421 /* Remove write permission to the new page table. */ 422 if (HYPERVISOR_update_va_mapping(new_table, 423 *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL)) 424 dboot_panic("HYP_update_va_mapping error"); 425 #endif 426 427 if (map_debug) 428 dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%" 429 PRIx64 "\n", level, (ulong_t)new_table, *pteval); 430 return (new_table); 431 } 432 433 x86pte_t * 434 map_pte(paddr_t table, uint_t index) 435 { 436 return ((x86pte_t *)(uintptr_t)(table + index * pte_size)); 437 } 438 439 /* 440 * dump out the contents of page tables... 441 */ 442 static void 443 dump_tables(void) 444 { 445 uint_t save_index[4]; /* for recursion */ 446 char *save_table[4]; /* for recursion */ 447 uint_t l; 448 uint64_t va; 449 uint64_t pgsize; 450 int index; 451 int i; 452 x86pte_t pteval; 453 char *table; 454 static char *tablist = "\t\t\t"; 455 char *tabs = tablist + 3 - top_level; 456 uint_t pa, pa1; 457 #if !defined(__xpv) 458 #define maddr_t paddr_t 459 #endif /* !__xpv */ 460 461 dboot_printf("Finished pagetables:\n"); 462 table = (char *)(uintptr_t)top_page_table; 463 l = top_level; 464 va = 0; 465 for (index = 0; index < ptes_per_table; ++index) { 466 pgsize = 1ull << shift_amt[l]; 467 if (pae_support) 468 pteval = ((x86pte_t *)table)[index]; 469 else 470 pteval = ((x86pte32_t *)table)[index]; 471 if (pteval == 0) 472 goto next_entry; 473 474 dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64, 475 tabs + l, (void *)table, index, (uint64_t)pteval, va); 476 pa = ma_to_pa(pteval & MMU_PAGEMASK); 477 dboot_printf(" physaddr=%x\n", pa); 478 479 /* 480 * Don't try to walk hypervisor private pagetables 481 */ 482 if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) { 483 save_table[l] = table; 484 save_index[l] = index; 485 --l; 486 index = -1; 487 table = (char *)(uintptr_t) 488 ma_to_pa(pteval & MMU_PAGEMASK); 489 goto recursion; 490 } 491 492 /* 493 * shorten dump for consecutive mappings 494 */ 495 for (i = 1; index + i < ptes_per_table; ++i) { 496 if (pae_support) 497 pteval = ((x86pte_t *)table)[index + i]; 498 else 499 pteval = ((x86pte32_t *)table)[index + i]; 500 if (pteval == 0) 501 break; 502 pa1 = ma_to_pa(pteval & MMU_PAGEMASK); 503 if (pa1 != pa + i * pgsize) 504 break; 505 } 506 if (i > 2) { 507 dboot_printf("%s...\n", tabs + l); 508 va += pgsize * (i - 2); 509 index += i - 2; 510 } 511 next_entry: 512 va += pgsize; 513 if (l == 3 && index == 256) /* VA hole */ 514 va = 0xffff800000000000ull; 515 recursion: 516 ; 517 } 518 if (l < top_level) { 519 ++l; 520 index = save_index[l]; 521 table = save_table[l]; 522 goto recursion; 523 } 524 } 525 526 /* 527 * Add a mapping for the machine page at the given virtual address. 528 */ 529 static void 530 map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level) 531 { 532 x86pte_t *ptep; 533 x86pte_t pteval; 534 535 pteval = ma | pte_bits; 536 if (level > 0) 537 pteval |= PT_PAGESIZE; 538 if (va >= target_kernel_text && pge_support) 539 pteval |= PT_GLOBAL; 540 541 if (map_debug && ma != va) 542 dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64 543 " pte=0x%" PRIx64 " l=%d\n", 544 (uint64_t)ma, (uint64_t)va, pteval, level); 545 546 #if defined(__xpv) 547 /* 548 * see if we can avoid find_pte() on the hypervisor 549 */ 550 if (HYPERVISOR_update_va_mapping(va, pteval, 551 UVMF_INVLPG | UVMF_LOCAL) == 0) 552 return; 553 #endif 554 555 /* 556 * Find the pte that will map this address. This creates any 557 * missing intermediate level page tables 558 */ 559 ptep = find_pte(va, NULL, level, 0); 560 561 /* 562 * When paravirtualized, we must use hypervisor calls to modify the 563 * PTE, since paging is active. On real hardware we just write to 564 * the pagetables which aren't in use yet. 565 */ 566 #if defined(__xpv) 567 ptep = ptep; /* shut lint up */ 568 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL)) 569 dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64 570 " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "", 571 (uint64_t)va, level, (uint64_t)ma, pteval); 572 #else 573 if (va < 1024 * 1024) 574 pteval |= PT_NOCACHE; /* for video RAM */ 575 if (pae_support) 576 *ptep = pteval; 577 else 578 *((x86pte32_t *)ptep) = (x86pte32_t)pteval; 579 #endif 580 } 581 582 /* 583 * Add a mapping for the physical page at the given virtual address. 584 */ 585 static void 586 map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level) 587 { 588 map_ma_at_va(pa_to_ma(pa), va, level); 589 } 590 591 /* 592 * This is called to remove start..end from the 593 * possible range of PCI addresses. 594 */ 595 const uint64_t pci_lo_limit = 0x00100000ul; 596 const uint64_t pci_hi_limit = 0xfff00000ul; 597 static void 598 exclude_from_pci(uint64_t start, uint64_t end) 599 { 600 int i; 601 int j; 602 struct boot_memlist *ml; 603 604 for (i = 0; i < pcimemlists_used; ++i) { 605 ml = &pcimemlists[i]; 606 607 /* delete the entire range? */ 608 if (start <= ml->addr && ml->addr + ml->size <= end) { 609 --pcimemlists_used; 610 for (j = i; j < pcimemlists_used; ++j) 611 pcimemlists[j] = pcimemlists[j + 1]; 612 --i; /* to revisit the new one at this index */ 613 } 614 615 /* split a range? */ 616 else if (ml->addr < start && end < ml->addr + ml->size) { 617 618 ++pcimemlists_used; 619 if (pcimemlists_used > MAX_MEMLIST) 620 dboot_panic("too many pcimemlists"); 621 622 for (j = pcimemlists_used - 1; j > i; --j) 623 pcimemlists[j] = pcimemlists[j - 1]; 624 ml->size = start - ml->addr; 625 626 ++ml; 627 ml->size = (ml->addr + ml->size) - end; 628 ml->addr = end; 629 ++i; /* skip on to next one */ 630 } 631 632 /* cut memory off the start? */ 633 else if (ml->addr < end && end < ml->addr + ml->size) { 634 ml->size -= end - ml->addr; 635 ml->addr = end; 636 } 637 638 /* cut memory off the end? */ 639 else if (ml->addr <= start && start < ml->addr + ml->size) { 640 ml->size = start - ml->addr; 641 } 642 } 643 } 644 645 /* 646 * During memory allocation, find the highest address not used yet. 647 */ 648 static void 649 check_higher(paddr_t a) 650 { 651 if (a < next_avail_addr) 652 return; 653 next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE); 654 DBG(next_avail_addr); 655 } 656 657 static int 658 dboot_loader_mmap_entries(void) 659 { 660 #if !defined(__xpv) 661 if (num_entries_set == B_TRUE) 662 return (num_entries); 663 664 switch (multiboot_version) { 665 case 1: 666 DBG(mb_info->flags); 667 if (mb_info->flags & 0x40) { 668 mb_memory_map_t *mmap; 669 670 DBG(mb_info->mmap_addr); 671 DBG(mb_info->mmap_length); 672 check_higher(mb_info->mmap_addr + mb_info->mmap_length); 673 674 for (mmap = (mb_memory_map_t *)mb_info->mmap_addr; 675 (uint32_t)mmap < mb_info->mmap_addr + 676 mb_info->mmap_length; 677 mmap = (mb_memory_map_t *)((uint32_t)mmap + 678 mmap->size + sizeof (mmap->size))) 679 ++num_entries; 680 681 num_entries_set = B_TRUE; 682 } 683 break; 684 case 2: 685 num_entries_set = B_TRUE; 686 num_entries = dboot_multiboot2_mmap_nentries(mb2_info, 687 mb2_mmap_tagp); 688 break; 689 default: 690 dboot_panic("Unknown multiboot version: %d\n", 691 multiboot_version); 692 break; 693 } 694 return (num_entries); 695 #else 696 return (MAXMAPS); 697 #endif 698 } 699 700 static uint32_t 701 dboot_loader_mmap_get_type(int index) 702 { 703 #if !defined(__xpv) 704 mb_memory_map_t *mp, *mpend; 705 int i; 706 707 switch (multiboot_version) { 708 case 1: 709 mp = (mb_memory_map_t *)mb_info->mmap_addr; 710 mpend = (mb_memory_map_t *) 711 (mb_info->mmap_addr + mb_info->mmap_length); 712 713 for (i = 0; mp < mpend && i != index; i++) 714 mp = (mb_memory_map_t *)((uint32_t)mp + mp->size + 715 sizeof (mp->size)); 716 if (mp >= mpend) { 717 dboot_panic("dboot_loader_mmap_get_type(): index " 718 "out of bounds: %d\n", index); 719 } 720 return (mp->type); 721 722 case 2: 723 return (dboot_multiboot2_mmap_get_type(mb2_info, 724 mb2_mmap_tagp, index)); 725 726 default: 727 dboot_panic("Unknown multiboot version: %d\n", 728 multiboot_version); 729 break; 730 } 731 return (0); 732 #else 733 return (map_buffer[index].type); 734 #endif 735 } 736 737 static uint64_t 738 dboot_loader_mmap_get_base(int index) 739 { 740 #if !defined(__xpv) 741 mb_memory_map_t *mp, *mpend; 742 int i; 743 744 switch (multiboot_version) { 745 case 1: 746 mp = (mb_memory_map_t *)mb_info->mmap_addr; 747 mpend = (mb_memory_map_t *) 748 (mb_info->mmap_addr + mb_info->mmap_length); 749 750 for (i = 0; mp < mpend && i != index; i++) 751 mp = (mb_memory_map_t *)((uint32_t)mp + mp->size + 752 sizeof (mp->size)); 753 if (mp >= mpend) { 754 dboot_panic("dboot_loader_mmap_get_base(): index " 755 "out of bounds: %d\n", index); 756 } 757 return (((uint64_t)mp->base_addr_high << 32) + 758 (uint64_t)mp->base_addr_low); 759 760 case 2: 761 return (dboot_multiboot2_mmap_get_base(mb2_info, 762 mb2_mmap_tagp, index)); 763 764 default: 765 dboot_panic("Unknown multiboot version: %d\n", 766 multiboot_version); 767 break; 768 } 769 return (0); 770 #else 771 return (((uint64_t)map_buffer[index].base_addr_high << 32) + 772 (uint64_t)map_buffer[index].base_addr_low); 773 #endif 774 } 775 776 static uint64_t 777 dboot_loader_mmap_get_length(int index) 778 { 779 #if !defined(__xpv) 780 mb_memory_map_t *mp, *mpend; 781 int i; 782 783 switch (multiboot_version) { 784 case 1: 785 mp = (mb_memory_map_t *)mb_info->mmap_addr; 786 mpend = (mb_memory_map_t *) 787 (mb_info->mmap_addr + mb_info->mmap_length); 788 789 for (i = 0; mp < mpend && i != index; i++) 790 mp = (mb_memory_map_t *)((uint32_t)mp + mp->size + 791 sizeof (mp->size)); 792 if (mp >= mpend) { 793 dboot_panic("dboot_loader_mmap_get_length(): index " 794 "out of bounds: %d\n", index); 795 } 796 return (((uint64_t)mp->length_high << 32) + 797 (uint64_t)mp->length_low); 798 799 case 2: 800 return (dboot_multiboot2_mmap_get_length(mb2_info, 801 mb2_mmap_tagp, index)); 802 803 default: 804 dboot_panic("Unknown multiboot version: %d\n", 805 multiboot_version); 806 break; 807 } 808 return (0); 809 #else 810 return (((uint64_t)map_buffer[index].length_high << 32) + 811 (uint64_t)map_buffer[index].length_low); 812 #endif 813 } 814 815 static void 816 build_pcimemlists(void) 817 { 818 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 819 uint64_t start; 820 uint64_t end; 821 int i, num; 822 823 /* 824 * initialize 825 */ 826 pcimemlists[0].addr = pci_lo_limit; 827 pcimemlists[0].size = pci_hi_limit - pci_lo_limit; 828 pcimemlists_used = 1; 829 830 num = dboot_loader_mmap_entries(); 831 /* 832 * Fill in PCI memlists. 833 */ 834 for (i = 0; i < num; ++i) { 835 start = dboot_loader_mmap_get_base(i); 836 end = start + dboot_loader_mmap_get_length(i); 837 838 if (prom_debug) 839 dboot_printf("\ttype: %d %" PRIx64 "..%" 840 PRIx64 "\n", dboot_loader_mmap_get_type(i), 841 start, end); 842 843 /* 844 * page align start and end 845 */ 846 start = (start + page_offset) & ~page_offset; 847 end &= ~page_offset; 848 if (end <= start) 849 continue; 850 851 exclude_from_pci(start, end); 852 } 853 854 /* 855 * Finish off the pcimemlist 856 */ 857 if (prom_debug) { 858 for (i = 0; i < pcimemlists_used; ++i) { 859 dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%" 860 PRIx64 "\n", pcimemlists[i].addr, 861 pcimemlists[i].addr + pcimemlists[i].size); 862 } 863 } 864 pcimemlists[0].next = 0; 865 pcimemlists[0].prev = 0; 866 for (i = 1; i < pcimemlists_used; ++i) { 867 pcimemlists[i].prev = 868 (native_ptr_t)(uintptr_t)(pcimemlists + i - 1); 869 pcimemlists[i].next = 0; 870 pcimemlists[i - 1].next = 871 (native_ptr_t)(uintptr_t)(pcimemlists + i); 872 } 873 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists; 874 DBG(bi->bi_pcimem); 875 } 876 877 #if defined(__xpv) 878 /* 879 * Initialize memory allocator stuff from hypervisor-supplied start info. 880 */ 881 static void 882 init_mem_alloc(void) 883 { 884 int local; /* variables needed to find start region */ 885 paddr_t scratch_start; 886 xen_memory_map_t map; 887 888 DBG_MSG("Entered init_mem_alloc()\n"); 889 890 /* 891 * Free memory follows the stack. There's at least 512KB of scratch 892 * space, rounded up to at least 2Mb alignment. That should be enough 893 * for the page tables we'll need to build. The nucleus memory is 894 * allocated last and will be outside the addressible range. We'll 895 * switch to new page tables before we unpack the kernel 896 */ 897 scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE); 898 DBG(scratch_start); 899 scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG); 900 DBG(scratch_end); 901 902 /* 903 * For paranoia, leave some space between hypervisor data and ours. 904 * Use 500 instead of 512. 905 */ 906 next_avail_addr = scratch_end - 500 * 1024; 907 DBG(next_avail_addr); 908 909 /* 910 * The domain builder gives us at most 1 module 911 */ 912 DBG(xen_info->mod_len); 913 if (xen_info->mod_len > 0) { 914 DBG(xen_info->mod_start); 915 modules[0].bm_addr = 916 (native_ptr_t)(uintptr_t)xen_info->mod_start; 917 modules[0].bm_size = xen_info->mod_len; 918 bi->bi_module_cnt = 1; 919 bi->bi_modules = (native_ptr_t)(uintptr_t)modules; 920 } else { 921 bi->bi_module_cnt = 0; 922 bi->bi_modules = (native_ptr_t)(uintptr_t)NULL; 923 } 924 DBG(bi->bi_module_cnt); 925 DBG(bi->bi_modules); 926 927 DBG(xen_info->mfn_list); 928 DBG(xen_info->nr_pages); 929 max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT; 930 DBG(max_mem); 931 932 /* 933 * Using pseudo-physical addresses, so only 1 memlist element 934 */ 935 memlists[0].addr = 0; 936 DBG(memlists[0].addr); 937 memlists[0].size = max_mem; 938 DBG(memlists[0].size); 939 memlists_used = 1; 940 DBG(memlists_used); 941 942 /* 943 * finish building physinstall list 944 */ 945 sort_physinstall(); 946 947 /* 948 * build bios reserved memlists 949 */ 950 build_rsvdmemlists(); 951 952 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 953 /* 954 * build PCI Memory list 955 */ 956 map.nr_entries = MAXMAPS; 957 /*LINTED: constant in conditional context*/ 958 set_xen_guest_handle(map.buffer, map_buffer); 959 if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0) 960 dboot_panic("getting XENMEM_machine_memory_map failed"); 961 build_pcimemlists(); 962 } 963 } 964 965 #else /* !__xpv */ 966 967 static void 968 dboot_multiboot1_xboot_consinfo(void) 969 { 970 } 971 972 static void 973 dboot_multiboot2_xboot_consinfo(void) 974 { 975 } 976 977 static int 978 dboot_multiboot_modcount(void) 979 { 980 switch (multiboot_version) { 981 case 1: 982 return (mb_info->mods_count); 983 984 case 2: 985 return (dboot_multiboot2_modcount(mb2_info)); 986 987 default: 988 dboot_panic("Unknown multiboot version: %d\n", 989 multiboot_version); 990 break; 991 } 992 return (0); 993 } 994 995 static uint32_t 996 dboot_multiboot_modstart(int index) 997 { 998 switch (multiboot_version) { 999 case 1: 1000 return (((mb_module_t *)mb_info->mods_addr)[index].mod_start); 1001 1002 case 2: 1003 return (dboot_multiboot2_modstart(mb2_info, index)); 1004 1005 default: 1006 dboot_panic("Unknown multiboot version: %d\n", 1007 multiboot_version); 1008 break; 1009 } 1010 return (0); 1011 } 1012 1013 static uint32_t 1014 dboot_multiboot_modend(int index) 1015 { 1016 switch (multiboot_version) { 1017 case 1: 1018 return (((mb_module_t *)mb_info->mods_addr)[index].mod_end); 1019 1020 case 2: 1021 return (dboot_multiboot2_modend(mb2_info, index)); 1022 1023 default: 1024 dboot_panic("Unknown multiboot version: %d\n", 1025 multiboot_version); 1026 break; 1027 } 1028 return (0); 1029 } 1030 1031 static char * 1032 dboot_multiboot_modcmdline(int index) 1033 { 1034 switch (multiboot_version) { 1035 case 1: 1036 return ((char *)((mb_module_t *) 1037 mb_info->mods_addr)[index].mod_name); 1038 1039 case 2: 1040 return (dboot_multiboot2_modcmdline(mb2_info, index)); 1041 1042 default: 1043 dboot_panic("Unknown multiboot version: %d\n", 1044 multiboot_version); 1045 break; 1046 } 1047 return (0); 1048 } 1049 1050 /* 1051 * Find the environment module for console setup. 1052 * Since we need the console to print early boot messages, the console is set up 1053 * before anything else and therefore we need to pick up the environment module 1054 * early too. 1055 * 1056 * Note, we just will search for and if found, will pass the env 1057 * module to console setup, the proper module list processing will happen later. 1058 */ 1059 static void 1060 dboot_find_env(void) 1061 { 1062 int i, modcount; 1063 uint32_t mod_start, mod_end; 1064 char *cmdline; 1065 1066 modcount = dboot_multiboot_modcount(); 1067 1068 for (i = 0; i < modcount; ++i) { 1069 cmdline = dboot_multiboot_modcmdline(i); 1070 if (cmdline == NULL) 1071 continue; 1072 1073 if (strstr(cmdline, "type=environment") == NULL) 1074 continue; 1075 1076 mod_start = dboot_multiboot_modstart(i); 1077 mod_end = dboot_multiboot_modend(i); 1078 modules[0].bm_addr = (native_ptr_t)(uintptr_t)mod_start; 1079 modules[0].bm_size = mod_end - mod_start; 1080 modules[0].bm_name = (native_ptr_t)(uintptr_t)NULL; 1081 modules[0].bm_hash = (native_ptr_t)(uintptr_t)NULL; 1082 modules[0].bm_type = BMT_ENV; 1083 bi->bi_modules = (native_ptr_t)(uintptr_t)modules; 1084 bi->bi_module_cnt = 1; 1085 return; 1086 } 1087 } 1088 1089 static boolean_t 1090 dboot_multiboot_basicmeminfo(uint32_t *lower, uint32_t *upper) 1091 { 1092 boolean_t rv = B_FALSE; 1093 1094 switch (multiboot_version) { 1095 case 1: 1096 if (mb_info->flags & 0x01) { 1097 *lower = mb_info->mem_lower; 1098 *upper = mb_info->mem_upper; 1099 rv = B_TRUE; 1100 } 1101 break; 1102 1103 case 2: 1104 return (dboot_multiboot2_basicmeminfo(mb2_info, lower, upper)); 1105 1106 default: 1107 dboot_panic("Unknown multiboot version: %d\n", 1108 multiboot_version); 1109 break; 1110 } 1111 return (rv); 1112 } 1113 1114 static uint8_t 1115 dboot_a2h(char v) 1116 { 1117 if (v >= 'a') 1118 return (v - 'a' + 0xa); 1119 else if (v >= 'A') 1120 return (v - 'A' + 0xa); 1121 else if (v >= '0') 1122 return (v - '0'); 1123 else 1124 dboot_panic("bad ASCII hex character %c\n", v); 1125 1126 return (0); 1127 } 1128 1129 static void 1130 digest_a2h(const char *ascii, uint8_t *digest) 1131 { 1132 unsigned int i; 1133 1134 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) { 1135 digest[i] = dboot_a2h(ascii[i * 2]) << 4; 1136 digest[i] |= dboot_a2h(ascii[i * 2 + 1]); 1137 } 1138 } 1139 1140 /* 1141 * Generate a SHA-1 hash of the first len bytes of image, and compare it with 1142 * the ASCII-format hash found in the 40-byte buffer at ascii. If they 1143 * match, return 0, otherwise -1. This works only for images smaller than 1144 * 4 GB, which should not be a problem. 1145 */ 1146 static int 1147 check_image_hash(uint_t midx) 1148 { 1149 const char *ascii; 1150 const void *image; 1151 size_t len; 1152 SHA1_CTX ctx; 1153 uint8_t digest[SHA1_DIGEST_LENGTH]; 1154 uint8_t baseline[SHA1_DIGEST_LENGTH]; 1155 unsigned int i; 1156 1157 ascii = (const char *)(uintptr_t)modules[midx].bm_hash; 1158 image = (const void *)(uintptr_t)modules[midx].bm_addr; 1159 len = (size_t)modules[midx].bm_size; 1160 1161 digest_a2h(ascii, baseline); 1162 1163 SHA1Init(&ctx); 1164 SHA1Update(&ctx, image, len); 1165 SHA1Final(digest, &ctx); 1166 1167 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) { 1168 if (digest[i] != baseline[i]) 1169 return (-1); 1170 } 1171 1172 return (0); 1173 } 1174 1175 static const char * 1176 type_to_str(boot_module_type_t type) 1177 { 1178 switch (type) { 1179 case BMT_ROOTFS: 1180 return ("rootfs"); 1181 case BMT_FILE: 1182 return ("file"); 1183 case BMT_HASH: 1184 return ("hash"); 1185 case BMT_ENV: 1186 return ("environment"); 1187 default: 1188 return ("unknown"); 1189 } 1190 } 1191 1192 static void 1193 check_images(void) 1194 { 1195 uint_t i; 1196 char displayhash[SHA1_ASCII_LENGTH + 1]; 1197 1198 for (i = 0; i < modules_used; i++) { 1199 if (prom_debug) { 1200 dboot_printf("module #%d: name %s type %s " 1201 "addr %lx size %lx\n", 1202 i, (char *)(uintptr_t)modules[i].bm_name, 1203 type_to_str(modules[i].bm_type), 1204 (ulong_t)modules[i].bm_addr, 1205 (ulong_t)modules[i].bm_size); 1206 } 1207 1208 if (modules[i].bm_type == BMT_HASH || 1209 modules[i].bm_hash == (native_ptr_t)(uintptr_t)NULL) { 1210 DBG_MSG("module has no hash; skipping check\n"); 1211 continue; 1212 } 1213 (void) memcpy(displayhash, 1214 (void *)(uintptr_t)modules[i].bm_hash, 1215 SHA1_ASCII_LENGTH); 1216 displayhash[SHA1_ASCII_LENGTH] = '\0'; 1217 if (prom_debug) { 1218 dboot_printf("checking expected hash [%s]: ", 1219 displayhash); 1220 } 1221 1222 if (check_image_hash(i) != 0) 1223 dboot_panic("hash mismatch!\n"); 1224 else 1225 DBG_MSG("OK\n"); 1226 } 1227 } 1228 1229 /* 1230 * Determine the module's starting address, size, name, and type, and fill the 1231 * boot_modules structure. This structure is used by the bop code, except for 1232 * hashes which are checked prior to transferring control to the kernel. 1233 */ 1234 static void 1235 process_module(int midx) 1236 { 1237 uint32_t mod_start = dboot_multiboot_modstart(midx); 1238 uint32_t mod_end = dboot_multiboot_modend(midx); 1239 char *cmdline = dboot_multiboot_modcmdline(midx); 1240 char *p, *q; 1241 1242 check_higher(mod_end); 1243 if (prom_debug) { 1244 dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n", 1245 midx, cmdline, (ulong_t)mod_start, (ulong_t)mod_end); 1246 } 1247 1248 if (mod_start > mod_end) { 1249 dboot_panic("module #%d: module start address 0x%lx greater " 1250 "than end address 0x%lx", midx, 1251 (ulong_t)mod_start, (ulong_t)mod_end); 1252 } 1253 1254 /* 1255 * A brief note on lengths and sizes: GRUB, for reasons unknown, passes 1256 * the address of the last valid byte in a module plus 1 as mod_end. 1257 * This is of course a bug; the multiboot specification simply states 1258 * that mod_start and mod_end "contain the start and end addresses of 1259 * the boot module itself" which is pretty obviously not what GRUB is 1260 * doing. However, fixing it requires that not only this code be 1261 * changed but also that other code consuming this value and values 1262 * derived from it be fixed, and that the kernel and GRUB must either 1263 * both have the bug or neither. While there are a lot of combinations 1264 * that will work, there are also some that won't, so for simplicity 1265 * we'll just cope with the bug. That means we won't actually hash the 1266 * byte at mod_end, and we will expect that mod_end for the hash file 1267 * itself is one greater than some multiple of 41 (40 bytes of ASCII 1268 * hash plus a newline for each module). We set bm_size to the true 1269 * correct number of bytes in each module, achieving exactly this. 1270 */ 1271 1272 modules[midx].bm_addr = (native_ptr_t)(uintptr_t)mod_start; 1273 modules[midx].bm_size = mod_end - mod_start; 1274 modules[midx].bm_name = (native_ptr_t)(uintptr_t)cmdline; 1275 modules[midx].bm_hash = (native_ptr_t)(uintptr_t)NULL; 1276 modules[midx].bm_type = BMT_FILE; 1277 1278 if (cmdline == NULL) { 1279 modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname; 1280 return; 1281 } 1282 1283 p = cmdline; 1284 modules[midx].bm_name = 1285 (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r"); 1286 1287 while (p != NULL) { 1288 q = strsep(&p, " \t\f\n\r"); 1289 if (strncmp(q, "name=", 5) == 0) { 1290 if (q[5] != '\0' && !isspace(q[5])) { 1291 modules[midx].bm_name = 1292 (native_ptr_t)(uintptr_t)(q + 5); 1293 } 1294 continue; 1295 } 1296 1297 if (strncmp(q, "type=", 5) == 0) { 1298 if (q[5] == '\0' || isspace(q[5])) 1299 continue; 1300 q += 5; 1301 if (strcmp(q, "rootfs") == 0) { 1302 modules[midx].bm_type = BMT_ROOTFS; 1303 } else if (strcmp(q, "hash") == 0) { 1304 modules[midx].bm_type = BMT_HASH; 1305 } else if (strcmp(q, "environment") == 0) { 1306 modules[midx].bm_type = BMT_ENV; 1307 } else if (strcmp(q, "file") != 0) { 1308 dboot_printf("\tmodule #%d: unknown module " 1309 "type '%s'; defaulting to 'file'", 1310 midx, q); 1311 } 1312 continue; 1313 } 1314 1315 if (strncmp(q, "hash=", 5) == 0) { 1316 if (q[5] != '\0' && !isspace(q[5])) { 1317 modules[midx].bm_hash = 1318 (native_ptr_t)(uintptr_t)(q + 5); 1319 } 1320 continue; 1321 } 1322 1323 dboot_printf("ignoring unknown option '%s'\n", q); 1324 } 1325 } 1326 1327 /* 1328 * Backward compatibility: if there are exactly one or two modules, both 1329 * of type 'file' and neither with an embedded hash value, we have been 1330 * given the legacy style modules. In this case we need to treat the first 1331 * module as a rootfs and the second as a hash referencing that module. 1332 * Otherwise, even if the configuration is invalid, we assume that the 1333 * operator knows what he's doing or at least isn't being bitten by this 1334 * interface change. 1335 */ 1336 static void 1337 fixup_modules(void) 1338 { 1339 if (modules_used == 0 || modules_used > 2) 1340 return; 1341 1342 if (modules[0].bm_type != BMT_FILE || 1343 modules_used > 1 && modules[1].bm_type != BMT_FILE) { 1344 return; 1345 } 1346 1347 if (modules[0].bm_hash != (native_ptr_t)(uintptr_t)NULL || 1348 modules_used > 1 && 1349 modules[1].bm_hash != (native_ptr_t)(uintptr_t)NULL) { 1350 return; 1351 } 1352 1353 modules[0].bm_type = BMT_ROOTFS; 1354 if (modules_used > 1) { 1355 modules[1].bm_type = BMT_HASH; 1356 modules[1].bm_name = modules[0].bm_name; 1357 } 1358 } 1359 1360 /* 1361 * For modules that do not have assigned hashes but have a separate hash module, 1362 * find the assigned hash module and set the primary module's bm_hash to point 1363 * to the hash data from that module. We will then ignore modules of type 1364 * BMT_HASH from this point forward. 1365 */ 1366 static void 1367 assign_module_hashes(void) 1368 { 1369 uint_t i, j; 1370 1371 for (i = 0; i < modules_used; i++) { 1372 if (modules[i].bm_type == BMT_HASH || 1373 modules[i].bm_hash != (native_ptr_t)(uintptr_t)NULL) { 1374 continue; 1375 } 1376 1377 for (j = 0; j < modules_used; j++) { 1378 if (modules[j].bm_type != BMT_HASH || 1379 strcmp((char *)(uintptr_t)modules[j].bm_name, 1380 (char *)(uintptr_t)modules[i].bm_name) != 0) { 1381 continue; 1382 } 1383 1384 if (modules[j].bm_size < SHA1_ASCII_LENGTH) { 1385 dboot_printf("Short hash module of length " 1386 "0x%lx bytes; ignoring\n", 1387 (ulong_t)modules[j].bm_size); 1388 } else { 1389 modules[i].bm_hash = modules[j].bm_addr; 1390 } 1391 break; 1392 } 1393 } 1394 } 1395 1396 /* 1397 * Walk through the module information finding the last used address. 1398 * The first available address will become the top level page table. 1399 */ 1400 static void 1401 dboot_process_modules(void) 1402 { 1403 int i, modcount; 1404 extern char _end[]; 1405 1406 DBG_MSG("\nFinding Modules\n"); 1407 modcount = dboot_multiboot_modcount(); 1408 if (modcount > MAX_BOOT_MODULES) { 1409 dboot_panic("Too many modules (%d) -- the maximum is %d.", 1410 modcount, MAX_BOOT_MODULES); 1411 } 1412 /* 1413 * search the modules to find the last used address 1414 * we'll build the module list while we're walking through here 1415 */ 1416 check_higher((paddr_t)(uintptr_t)&_end); 1417 for (i = 0; i < modcount; ++i) { 1418 process_module(i); 1419 modules_used++; 1420 } 1421 bi->bi_modules = (native_ptr_t)(uintptr_t)modules; 1422 DBG(bi->bi_modules); 1423 bi->bi_module_cnt = modcount; 1424 DBG(bi->bi_module_cnt); 1425 1426 fixup_modules(); 1427 assign_module_hashes(); 1428 check_images(); 1429 } 1430 1431 /* 1432 * We then build the phys_install memlist from the multiboot information. 1433 */ 1434 static void 1435 dboot_process_mmap(void) 1436 { 1437 uint64_t start; 1438 uint64_t end; 1439 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 1440 uint32_t lower, upper; 1441 int i, mmap_entries; 1442 1443 /* 1444 * Walk through the memory map from multiboot and build our memlist 1445 * structures. Note these will have native format pointers. 1446 */ 1447 DBG_MSG("\nFinding Memory Map\n"); 1448 num_entries = 0; 1449 num_entries_set = B_FALSE; 1450 max_mem = 0; 1451 if ((mmap_entries = dboot_loader_mmap_entries()) > 0) { 1452 for (i = 0; i < mmap_entries; i++) { 1453 uint32_t type = dboot_loader_mmap_get_type(i); 1454 start = dboot_loader_mmap_get_base(i); 1455 end = start + dboot_loader_mmap_get_length(i); 1456 1457 if (prom_debug) 1458 dboot_printf("\ttype: %d %" PRIx64 "..%" 1459 PRIx64 "\n", type, start, end); 1460 1461 /* 1462 * page align start and end 1463 */ 1464 start = (start + page_offset) & ~page_offset; 1465 end &= ~page_offset; 1466 if (end <= start) 1467 continue; 1468 1469 /* 1470 * only type 1 is usable RAM 1471 */ 1472 switch (type) { 1473 case 1: 1474 if (end > max_mem) 1475 max_mem = end; 1476 memlists[memlists_used].addr = start; 1477 memlists[memlists_used].size = end - start; 1478 ++memlists_used; 1479 if (memlists_used > MAX_MEMLIST) 1480 dboot_panic("too many memlists"); 1481 break; 1482 case 2: 1483 rsvdmemlists[rsvdmemlists_used].addr = start; 1484 rsvdmemlists[rsvdmemlists_used].size = 1485 end - start; 1486 ++rsvdmemlists_used; 1487 if (rsvdmemlists_used > MAX_MEMLIST) 1488 dboot_panic("too many rsvdmemlists"); 1489 break; 1490 default: 1491 continue; 1492 } 1493 } 1494 build_pcimemlists(); 1495 } else if (dboot_multiboot_basicmeminfo(&lower, &upper)) { 1496 DBG(lower); 1497 memlists[memlists_used].addr = 0; 1498 memlists[memlists_used].size = lower * 1024; 1499 ++memlists_used; 1500 DBG(upper); 1501 memlists[memlists_used].addr = 1024 * 1024; 1502 memlists[memlists_used].size = upper * 1024; 1503 ++memlists_used; 1504 1505 /* 1506 * Old platform - assume I/O space at the end of memory. 1507 */ 1508 pcimemlists[0].addr = (upper * 1024) + (1024 * 1024); 1509 pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr; 1510 pcimemlists[0].next = 0; 1511 pcimemlists[0].prev = 0; 1512 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists; 1513 DBG(bi->bi_pcimem); 1514 } else { 1515 dboot_panic("No memory info from boot loader!!!"); 1516 } 1517 1518 /* 1519 * finish processing the physinstall list 1520 */ 1521 sort_physinstall(); 1522 1523 /* 1524 * build bios reserved mem lists 1525 */ 1526 build_rsvdmemlists(); 1527 } 1528 1529 /* 1530 * The highest address is used as the starting point for dboot's simple 1531 * memory allocator. 1532 * 1533 * Finding the highest address in case of Multiboot 1 protocol is 1534 * quite painful in the sense that some information provided by 1535 * the multiboot info structure points to BIOS data, and some to RAM. 1536 * 1537 * The module list was processed and checked already by dboot_process_modules(), 1538 * so we will check the command line string and the memory map. 1539 * 1540 * This list of to be checked items is based on our current knowledge of 1541 * allocations made by grub1 and will need to be reviewed if there 1542 * are updates about the information provided by Multiboot 1. 1543 * 1544 * In the case of the Multiboot 2, our life is much simpler, as the MB2 1545 * information tag list is one contiguous chunk of memory. 1546 */ 1547 static paddr_t 1548 dboot_multiboot1_highest_addr(void) 1549 { 1550 paddr_t addr = (paddr_t)(uintptr_t)NULL; 1551 char *cmdl = (char *)mb_info->cmdline; 1552 1553 if (mb_info->flags & MB_INFO_CMDLINE) 1554 addr = ((paddr_t)((uintptr_t)cmdl + strlen(cmdl) + 1)); 1555 1556 if (mb_info->flags & MB_INFO_MEM_MAP) 1557 addr = MAX(addr, 1558 ((paddr_t)(mb_info->mmap_addr + mb_info->mmap_length))); 1559 return (addr); 1560 } 1561 1562 static void 1563 dboot_multiboot_highest_addr(void) 1564 { 1565 paddr_t addr; 1566 1567 switch (multiboot_version) { 1568 case 1: 1569 addr = dboot_multiboot1_highest_addr(); 1570 if (addr != (paddr_t)(uintptr_t)NULL) 1571 check_higher(addr); 1572 break; 1573 case 2: 1574 addr = dboot_multiboot2_highest_addr(mb2_info); 1575 if (addr != (paddr_t)(uintptr_t)NULL) 1576 check_higher(addr); 1577 break; 1578 default: 1579 dboot_panic("Unknown multiboot version: %d\n", 1580 multiboot_version); 1581 break; 1582 } 1583 } 1584 1585 /* 1586 * Walk the boot loader provided information and find the highest free address. 1587 */ 1588 static void 1589 init_mem_alloc(void) 1590 { 1591 DBG_MSG("Entered init_mem_alloc()\n"); 1592 dboot_process_modules(); 1593 dboot_process_mmap(); 1594 dboot_multiboot_highest_addr(); 1595 } 1596 1597 static void 1598 dboot_multiboot_get_fwtables(void) 1599 { 1600 multiboot_tag_new_acpi_t *nacpitagp; 1601 multiboot_tag_old_acpi_t *oacpitagp; 1602 1603 /* no fw tables from multiboot 1 */ 1604 if (multiboot_version != 2) 1605 return; 1606 1607 /* only provide SMBIOS pointer in case of UEFI */ 1608 bi->bi_smbios = (native_ptr_t)(uintptr_t)NULL; 1609 1610 nacpitagp = (multiboot_tag_new_acpi_t *) 1611 dboot_multiboot2_find_tag(mb2_info, 1612 MULTIBOOT_TAG_TYPE_ACPI_NEW); 1613 oacpitagp = (multiboot_tag_old_acpi_t *) 1614 dboot_multiboot2_find_tag(mb2_info, 1615 MULTIBOOT_TAG_TYPE_ACPI_OLD); 1616 1617 if (nacpitagp != NULL) { 1618 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t) 1619 &nacpitagp->mb_rsdp[0]; 1620 } else if (oacpitagp != NULL) { 1621 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t) 1622 &oacpitagp->mb_rsdp[0]; 1623 } else { 1624 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)NULL; 1625 } 1626 } 1627 #endif /* !__xpv */ 1628 1629 /* 1630 * Simple memory allocator, allocates aligned physical memory. 1631 * Note that startup_kernel() only allocates memory, never frees. 1632 * Memory usage just grows in an upward direction. 1633 */ 1634 static void * 1635 do_mem_alloc(uint32_t size, uint32_t align) 1636 { 1637 uint_t i; 1638 uint64_t best; 1639 uint64_t start; 1640 uint64_t end; 1641 1642 /* 1643 * make sure size is a multiple of pagesize 1644 */ 1645 size = RNDUP(size, MMU_PAGESIZE); 1646 next_avail_addr = RNDUP(next_avail_addr, align); 1647 1648 /* 1649 * XXPV fixme joe 1650 * 1651 * a really large bootarchive that causes you to run out of memory 1652 * may cause this to blow up 1653 */ 1654 /* LINTED E_UNEXPECTED_UINT_PROMOTION */ 1655 best = (uint64_t)-size; 1656 for (i = 0; i < memlists_used; ++i) { 1657 start = memlists[i].addr; 1658 #if defined(__xpv) 1659 start += mfn_base; 1660 #endif 1661 end = start + memlists[i].size; 1662 1663 /* 1664 * did we find the desired address? 1665 */ 1666 if (start <= next_avail_addr && next_avail_addr + size <= end) { 1667 best = next_avail_addr; 1668 goto done; 1669 } 1670 1671 /* 1672 * if not is this address the best so far? 1673 */ 1674 if (start > next_avail_addr && start < best && 1675 RNDUP(start, align) + size <= end) 1676 best = RNDUP(start, align); 1677 } 1678 1679 /* 1680 * We didn't find exactly the address we wanted, due to going off the 1681 * end of a memory region. Return the best found memory address. 1682 */ 1683 done: 1684 next_avail_addr = best + size; 1685 #if defined(__xpv) 1686 if (next_avail_addr > scratch_end) 1687 dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: " 1688 "0x%lx", (ulong_t)next_avail_addr, 1689 (ulong_t)scratch_end); 1690 #endif 1691 (void) memset((void *)(uintptr_t)best, 0, size); 1692 return ((void *)(uintptr_t)best); 1693 } 1694 1695 void * 1696 mem_alloc(uint32_t size) 1697 { 1698 return (do_mem_alloc(size, MMU_PAGESIZE)); 1699 } 1700 1701 1702 /* 1703 * Build page tables to map all of memory used so far as well as the kernel. 1704 */ 1705 static void 1706 build_page_tables(void) 1707 { 1708 uint32_t psize; 1709 uint32_t level; 1710 uint32_t off; 1711 uint64_t start; 1712 #if !defined(__xpv) 1713 uint32_t i; 1714 uint64_t end; 1715 #endif /* __xpv */ 1716 1717 /* 1718 * If we're on metal, we need to create the top level pagetable. 1719 */ 1720 #if defined(__xpv) 1721 top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base; 1722 #else /* __xpv */ 1723 top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 1724 #endif /* __xpv */ 1725 DBG((uintptr_t)top_page_table); 1726 1727 /* 1728 * Determine if we'll use large mappings for kernel, then map it. 1729 */ 1730 if (largepage_support) { 1731 psize = lpagesize; 1732 level = 1; 1733 } else { 1734 psize = MMU_PAGESIZE; 1735 level = 0; 1736 } 1737 1738 DBG_MSG("Mapping kernel\n"); 1739 DBG(ktext_phys); 1740 DBG(target_kernel_text); 1741 DBG(ksize); 1742 DBG(psize); 1743 for (off = 0; off < ksize; off += psize) 1744 map_pa_at_va(ktext_phys + off, target_kernel_text + off, level); 1745 1746 /* 1747 * The kernel will need a 1 page window to work with page tables 1748 */ 1749 bi->bi_pt_window = (native_ptr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 1750 DBG(bi->bi_pt_window); 1751 bi->bi_pte_to_pt_window = 1752 (native_ptr_t)(uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0); 1753 DBG(bi->bi_pte_to_pt_window); 1754 1755 #if defined(__xpv) 1756 if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 1757 /* If this is a domU we're done. */ 1758 DBG_MSG("\nPage tables constructed\n"); 1759 return; 1760 } 1761 #endif /* __xpv */ 1762 1763 /* 1764 * We need 1:1 mappings for the lower 1M of memory to access 1765 * BIOS tables used by a couple of drivers during boot. 1766 * 1767 * The following code works because our simple memory allocator 1768 * only grows usage in an upwards direction. 1769 * 1770 * Note that by this point in boot some mappings for low memory 1771 * may already exist because we've already accessed device in low 1772 * memory. (Specifically the video frame buffer and keyboard 1773 * status ports.) If we're booting on raw hardware then GRUB 1774 * created these mappings for us. If we're booting under a 1775 * hypervisor then we went ahead and remapped these devices into 1776 * memory allocated within dboot itself. 1777 */ 1778 if (map_debug) 1779 dboot_printf("1:1 map pa=0..1Meg\n"); 1780 for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) { 1781 #if defined(__xpv) 1782 map_ma_at_va(start, start, 0); 1783 #else /* __xpv */ 1784 map_pa_at_va(start, start, 0); 1785 #endif /* __xpv */ 1786 } 1787 1788 #if !defined(__xpv) 1789 for (i = 0; i < memlists_used; ++i) { 1790 start = memlists[i].addr; 1791 1792 end = start + memlists[i].size; 1793 1794 if (map_debug) 1795 dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n", 1796 start, end); 1797 while (start < end && start < next_avail_addr) { 1798 map_pa_at_va(start, start, 0); 1799 start += MMU_PAGESIZE; 1800 } 1801 } 1802 #endif /* !__xpv */ 1803 1804 DBG_MSG("\nPage tables constructed\n"); 1805 } 1806 1807 #define NO_MULTIBOOT \ 1808 "multiboot is no longer used to boot the Solaris Operating System.\n\ 1809 The grub entry should be changed to:\n\ 1810 kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\ 1811 module$ /platform/i86pc/$ISADIR/boot_archive\n\ 1812 See http://illumos.org/msg/SUNOS-8000-AK for details.\n" 1813 1814 static void 1815 dboot_init_xboot_consinfo(void) 1816 { 1817 uintptr_t addr; 1818 /* 1819 * boot info must be 16 byte aligned for 64 bit kernel ABI 1820 */ 1821 addr = (uintptr_t)boot_info; 1822 addr = (addr + 0xf) & ~0xf; 1823 bi = (struct xboot_info *)addr; 1824 1825 #if !defined(__xpv) 1826 switch (multiboot_version) { 1827 case 1: 1828 dboot_multiboot1_xboot_consinfo(); 1829 break; 1830 case 2: 1831 dboot_multiboot2_xboot_consinfo(); 1832 break; 1833 default: 1834 dboot_panic("Unknown multiboot version: %d\n", 1835 multiboot_version); 1836 break; 1837 } 1838 /* 1839 * Lookup environment module for the console. Complete module list 1840 * will be built after console setup. 1841 */ 1842 dboot_find_env(); 1843 #endif 1844 } 1845 1846 /* 1847 * Set up basic data from the boot loader. 1848 * The load_addr is part of AOUT kludge setup in dboot_grub.s, to support 1849 * 32-bit dboot code setup used to set up and start 64-bit kernel. 1850 * AOUT kludge does allow 32-bit boot loader, such as grub1, to load and 1851 * start 64-bit illumos kernel. 1852 */ 1853 static void 1854 dboot_loader_init(void) 1855 { 1856 #if !defined(__xpv) 1857 mb_info = NULL; 1858 mb2_info = NULL; 1859 1860 switch (mb_magic) { 1861 case MB_BOOTLOADER_MAGIC: 1862 multiboot_version = 1; 1863 mb_info = (multiboot_info_t *)(uintptr_t)mb_addr; 1864 #if defined(_BOOT_TARGET_amd64) 1865 load_addr = mb_header.load_addr; 1866 #endif 1867 break; 1868 1869 case MULTIBOOT2_BOOTLOADER_MAGIC: 1870 multiboot_version = 2; 1871 mb2_info = (multiboot2_info_header_t *)(uintptr_t)mb_addr; 1872 mb2_mmap_tagp = dboot_multiboot2_get_mmap_tagp(mb2_info); 1873 #if defined(_BOOT_TARGET_amd64) 1874 load_addr = mb2_load_addr; 1875 #endif 1876 break; 1877 1878 default: 1879 dboot_panic("Unknown bootloader magic: 0x%x\n", mb_magic); 1880 break; 1881 } 1882 #endif /* !defined(__xpv) */ 1883 } 1884 1885 /* Extract the kernel command line from [multi]boot information. */ 1886 static char * 1887 dboot_loader_cmdline(void) 1888 { 1889 char *line = NULL; 1890 1891 #if defined(__xpv) 1892 line = (char *)xen_info->cmd_line; 1893 #else /* __xpv */ 1894 1895 switch (multiboot_version) { 1896 case 1: 1897 if (mb_info->flags & MB_INFO_CMDLINE) 1898 line = (char *)mb_info->cmdline; 1899 break; 1900 1901 case 2: 1902 line = dboot_multiboot2_cmdline(mb2_info); 1903 break; 1904 1905 default: 1906 dboot_panic("Unknown multiboot version: %d\n", 1907 multiboot_version); 1908 break; 1909 } 1910 1911 #endif /* __xpv */ 1912 1913 /* 1914 * Make sure we have valid pointer so the string operations 1915 * will not crash us. 1916 */ 1917 if (line == NULL) 1918 line = ""; 1919 1920 return (line); 1921 } 1922 1923 static char * 1924 dboot_loader_name(void) 1925 { 1926 #if defined(__xpv) 1927 return (NULL); 1928 #else /* __xpv */ 1929 multiboot_tag_string_t *tag; 1930 1931 switch (multiboot_version) { 1932 case 1: 1933 return ((char *)mb_info->boot_loader_name); 1934 1935 case 2: 1936 tag = dboot_multiboot2_find_tag(mb2_info, 1937 MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME); 1938 return (tag->mb_string); 1939 default: 1940 dboot_panic("Unknown multiboot version: %d\n", 1941 multiboot_version); 1942 break; 1943 } 1944 1945 return (NULL); 1946 #endif /* __xpv */ 1947 } 1948 /* 1949 * startup_kernel has a pretty simple job. It builds pagetables which reflect 1950 * 1:1 mappings for all memory in use. It then also adds mappings for 1951 * the kernel nucleus at virtual address of target_kernel_text using large page 1952 * mappings. The page table pages are also accessible at 1:1 mapped 1953 * virtual addresses. 1954 */ 1955 /*ARGSUSED*/ 1956 void 1957 startup_kernel(void) 1958 { 1959 char *cmdline; 1960 char *bootloader; 1961 #if defined(__xpv) 1962 physdev_set_iopl_t set_iopl; 1963 #endif /* __xpv */ 1964 1965 dboot_loader_init(); 1966 /* 1967 * At this point we are executing in a 32 bit real mode. 1968 */ 1969 1970 bootloader = dboot_loader_name(); 1971 cmdline = dboot_loader_cmdline(); 1972 1973 #if defined(__xpv) 1974 /* 1975 * For dom0, before we initialize the console subsystem we'll 1976 * need to enable io operations, so set I/O priveldge level to 1. 1977 */ 1978 if (DOMAIN_IS_INITDOMAIN(xen_info)) { 1979 set_iopl.iopl = 1; 1980 (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 1981 } 1982 #endif /* __xpv */ 1983 1984 dboot_init_xboot_consinfo(); 1985 bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline; 1986 bcons_init(bi); 1987 1988 prom_debug = (find_boot_prop("prom_debug") != NULL); 1989 map_debug = (find_boot_prop("map_debug") != NULL); 1990 1991 #if !defined(__xpv) 1992 dboot_multiboot_get_fwtables(); 1993 #endif 1994 DBG_MSG("\n\nillumos prekernel set: "); 1995 DBG_MSG(cmdline); 1996 DBG_MSG("\n"); 1997 1998 if (bootloader != NULL && prom_debug) { 1999 dboot_printf("Kernel loaded by: %s\n", bootloader); 2000 #if !defined(__xpv) 2001 dboot_printf("Using multiboot %d boot protocol.\n", 2002 multiboot_version); 2003 #endif 2004 } 2005 2006 if (strstr(cmdline, "multiboot") != NULL) { 2007 dboot_panic(NO_MULTIBOOT); 2008 } 2009 2010 DBG((uintptr_t)bi); 2011 #if !defined(__xpv) 2012 DBG((uintptr_t)mb_info); 2013 DBG((uintptr_t)mb2_info); 2014 if (mb2_info != NULL) 2015 DBG(mb2_info->mbi_total_size); 2016 DBG(bi->bi_acpi_rsdp); 2017 DBG(bi->bi_smbios); 2018 #endif 2019 2020 /* 2021 * Need correct target_kernel_text value 2022 */ 2023 #if defined(_BOOT_TARGET_amd64) 2024 target_kernel_text = KERNEL_TEXT_amd64; 2025 #elif defined(__xpv) 2026 target_kernel_text = KERNEL_TEXT_i386_xpv; 2027 #else 2028 target_kernel_text = KERNEL_TEXT_i386; 2029 #endif 2030 DBG(target_kernel_text); 2031 2032 #if defined(__xpv) 2033 2034 /* 2035 * XXPV Derive this stuff from CPUID / what the hypervisor has enabled 2036 */ 2037 2038 #if defined(_BOOT_TARGET_amd64) 2039 /* 2040 * 64-bit hypervisor. 2041 */ 2042 amd64_support = 1; 2043 pae_support = 1; 2044 2045 #else /* _BOOT_TARGET_amd64 */ 2046 2047 /* 2048 * See if we are running on a PAE Hypervisor 2049 */ 2050 { 2051 xen_capabilities_info_t caps; 2052 2053 if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0) 2054 dboot_panic("HYPERVISOR_xen_version(caps) failed"); 2055 caps[sizeof (caps) - 1] = 0; 2056 if (prom_debug) 2057 dboot_printf("xen capabilities %s\n", caps); 2058 if (strstr(caps, "x86_32p") != NULL) 2059 pae_support = 1; 2060 } 2061 2062 #endif /* _BOOT_TARGET_amd64 */ 2063 { 2064 xen_platform_parameters_t p; 2065 2066 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0) 2067 dboot_panic("HYPERVISOR_xen_version(parms) failed"); 2068 DBG(p.virt_start); 2069 mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start); 2070 } 2071 2072 /* 2073 * The hypervisor loads stuff starting at 1Gig 2074 */ 2075 mfn_base = ONE_GIG; 2076 DBG(mfn_base); 2077 2078 /* 2079 * enable writable page table mode for the hypervisor 2080 */ 2081 if (HYPERVISOR_vm_assist(VMASST_CMD_enable, 2082 VMASST_TYPE_writable_pagetables) < 0) 2083 dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed"); 2084 2085 /* 2086 * check for NX support 2087 */ 2088 if (pae_support) { 2089 uint32_t eax = 0x80000000; 2090 uint32_t edx = get_cpuid_edx(&eax); 2091 2092 if (eax >= 0x80000001) { 2093 eax = 0x80000001; 2094 edx = get_cpuid_edx(&eax); 2095 if (edx & CPUID_AMD_EDX_NX) 2096 NX_support = 1; 2097 } 2098 } 2099 2100 #if !defined(_BOOT_TARGET_amd64) 2101 2102 /* 2103 * The 32-bit hypervisor uses segmentation to protect itself from 2104 * guests. This means when a guest attempts to install a flat 4GB 2105 * code or data descriptor the 32-bit hypervisor will protect itself 2106 * by silently shrinking the segment such that if the guest attempts 2107 * any access where the hypervisor lives a #gp fault is generated. 2108 * The problem is that some applications expect a full 4GB flat 2109 * segment for their current thread pointer and will use negative 2110 * offset segment wrap around to access data. TLS support in linux 2111 * brand is one example of this. 2112 * 2113 * The 32-bit hypervisor can catch the #gp fault in these cases 2114 * and emulate the access without passing the #gp fault to the guest 2115 * but only if VMASST_TYPE_4gb_segments is explicitly turned on. 2116 * Seems like this should have been the default. 2117 * Either way, we want the hypervisor -- and not Solaris -- to deal 2118 * to deal with emulating these accesses. 2119 */ 2120 if (HYPERVISOR_vm_assist(VMASST_CMD_enable, 2121 VMASST_TYPE_4gb_segments) < 0) 2122 dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed"); 2123 #endif /* !_BOOT_TARGET_amd64 */ 2124 2125 #else /* __xpv */ 2126 2127 /* 2128 * use cpuid to enable MMU features 2129 */ 2130 if (have_cpuid()) { 2131 uint32_t eax, edx; 2132 2133 eax = 1; 2134 edx = get_cpuid_edx(&eax); 2135 if (edx & CPUID_INTC_EDX_PSE) 2136 largepage_support = 1; 2137 if (edx & CPUID_INTC_EDX_PGE) 2138 pge_support = 1; 2139 if (edx & CPUID_INTC_EDX_PAE) 2140 pae_support = 1; 2141 2142 eax = 0x80000000; 2143 edx = get_cpuid_edx(&eax); 2144 if (eax >= 0x80000001) { 2145 eax = 0x80000001; 2146 edx = get_cpuid_edx(&eax); 2147 if (edx & CPUID_AMD_EDX_LM) 2148 amd64_support = 1; 2149 if (edx & CPUID_AMD_EDX_NX) 2150 NX_support = 1; 2151 } 2152 } else { 2153 dboot_printf("cpuid not supported\n"); 2154 } 2155 #endif /* __xpv */ 2156 2157 2158 #if defined(_BOOT_TARGET_amd64) 2159 if (amd64_support == 0) 2160 dboot_panic("long mode not supported, rebooting"); 2161 else if (pae_support == 0) 2162 dboot_panic("long mode, but no PAE; rebooting"); 2163 #else 2164 /* 2165 * Allow the command line to over-ride use of PAE for 32 bit. 2166 */ 2167 if (strstr(cmdline, "disablePAE=true") != NULL) { 2168 pae_support = 0; 2169 NX_support = 0; 2170 amd64_support = 0; 2171 } 2172 #endif 2173 2174 /* 2175 * initialize the simple memory allocator 2176 */ 2177 init_mem_alloc(); 2178 2179 #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64) 2180 /* 2181 * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory 2182 */ 2183 if (max_mem < FOUR_GIG && NX_support == 0) 2184 pae_support = 0; 2185 #endif 2186 2187 /* 2188 * configure mmu information 2189 */ 2190 if (pae_support) { 2191 shift_amt = shift_amt_pae; 2192 ptes_per_table = 512; 2193 pte_size = 8; 2194 lpagesize = TWO_MEG; 2195 #if defined(_BOOT_TARGET_amd64) 2196 top_level = 3; 2197 #else 2198 top_level = 2; 2199 #endif 2200 } else { 2201 pae_support = 0; 2202 NX_support = 0; 2203 shift_amt = shift_amt_nopae; 2204 ptes_per_table = 1024; 2205 pte_size = 4; 2206 lpagesize = FOUR_MEG; 2207 top_level = 1; 2208 } 2209 2210 DBG(pge_support); 2211 DBG(NX_support); 2212 DBG(largepage_support); 2213 DBG(amd64_support); 2214 DBG(top_level); 2215 DBG(pte_size); 2216 DBG(ptes_per_table); 2217 DBG(lpagesize); 2218 2219 #if defined(__xpv) 2220 ktext_phys = ONE_GIG; /* from UNIX Mapfile */ 2221 #else 2222 ktext_phys = FOUR_MEG; /* from UNIX Mapfile */ 2223 #endif 2224 2225 #if !defined(__xpv) && defined(_BOOT_TARGET_amd64) 2226 /* 2227 * For grub, copy kernel bits from the ELF64 file to final place. 2228 */ 2229 DBG_MSG("\nAllocating nucleus pages.\n"); 2230 ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG); 2231 if (ktext_phys == 0) 2232 dboot_panic("failed to allocate aligned kernel memory"); 2233 DBG(load_addr); 2234 if (dboot_elfload64(load_addr) != 0) 2235 dboot_panic("failed to parse kernel ELF image, rebooting"); 2236 #endif 2237 2238 DBG(ktext_phys); 2239 2240 /* 2241 * Allocate page tables. 2242 */ 2243 build_page_tables(); 2244 2245 /* 2246 * return to assembly code to switch to running kernel 2247 */ 2248 entry_addr_low = (uint32_t)target_kernel_text; 2249 DBG(entry_addr_low); 2250 bi->bi_use_largepage = largepage_support; 2251 bi->bi_use_pae = pae_support; 2252 bi->bi_use_pge = pge_support; 2253 bi->bi_use_nx = NX_support; 2254 2255 #if defined(__xpv) 2256 2257 bi->bi_next_paddr = next_avail_addr - mfn_base; 2258 DBG(bi->bi_next_paddr); 2259 bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr; 2260 DBG(bi->bi_next_vaddr); 2261 2262 /* 2263 * unmap unused pages in start area to make them available for DMA 2264 */ 2265 while (next_avail_addr < scratch_end) { 2266 (void) HYPERVISOR_update_va_mapping(next_avail_addr, 2267 0, UVMF_INVLPG | UVMF_LOCAL); 2268 next_avail_addr += MMU_PAGESIZE; 2269 } 2270 2271 bi->bi_xen_start_info = (native_ptr_t)(uintptr_t)xen_info; 2272 DBG((uintptr_t)HYPERVISOR_shared_info); 2273 bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info; 2274 bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base; 2275 2276 #else /* __xpv */ 2277 2278 bi->bi_next_paddr = next_avail_addr; 2279 DBG(bi->bi_next_paddr); 2280 bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr; 2281 DBG(bi->bi_next_vaddr); 2282 bi->bi_mb_version = multiboot_version; 2283 2284 switch (multiboot_version) { 2285 case 1: 2286 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb_info; 2287 break; 2288 case 2: 2289 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb2_info; 2290 break; 2291 default: 2292 dboot_panic("Unknown multiboot version: %d\n", 2293 multiboot_version); 2294 break; 2295 } 2296 bi->bi_top_page_table = (uintptr_t)top_page_table; 2297 2298 #endif /* __xpv */ 2299 2300 bi->bi_kseg_size = FOUR_MEG; 2301 DBG(bi->bi_kseg_size); 2302 2303 #ifndef __xpv 2304 if (map_debug) 2305 dump_tables(); 2306 #endif 2307 2308 DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n"); 2309 } 2310