1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/machparam.h> 31 #include <sys/x86_archext.h> 32 #include <sys/systm.h> 33 #include <sys/mach_mmu.h> 34 35 #include <sys/multiboot.h> 36 37 extern multiboot_header_t mb_header; 38 extern int have_cpuid(void); 39 extern uint32_t get_cpuid_edx(uint32_t *eax); 40 41 #include <sys/inttypes.h> 42 #include <sys/bootinfo.h> 43 #include <sys/mach_mmu.h> 44 #include <sys/boot_console.h> 45 46 #include "dboot_printf.h" 47 #include "dboot_xboot.h" 48 #include "dboot_elfload.h" 49 50 /* 51 * This file contains code that runs to transition us from either a multiboot 52 * compliant loader (32 bit non-paging) or Xen domain loader to regular kernel 53 * execution. Its task is to setup the kernel memory image and page tables. 54 * 55 * The code executes as: 56 * - 32 bits under GRUB (for 32 or 64 bit Solaris) 57 * - 32 bit program for Xen 32 bit 58 * - 64 bit program for Xen 64 bit (at least that's my assumption for now) 59 * 60 * Under Xen, we must create mappings for any memory beyond the initial 61 * start of day allocation (such as the kernel itself). 62 * 63 * When not under Xen, the mapping between maddr_t and paddr_t is 1:1. 64 * Since we are running in real mode, so all such memory is accessible. 65 */ 66 67 /* 68 * Standard bits used in PTE (page level) and PTP (internal levels) 69 */ 70 x86pte_t ptp_bits = PT_VALID | PT_REF | PT_USER | PT_WRITABLE | PT_USER; 71 x86pte_t pte_bits = PT_VALID | PT_REF | PT_MOD | PT_NOCONSIST | PT_WRITABLE; 72 73 /* 74 * This is the target addresses (physical) where the kernel text and data 75 * nucleus pages will be unpacked. On Xen this is actually a virtual address. 76 */ 77 paddr_t ktext_phys; 78 uint32_t ksize = 2 * FOUR_MEG; /* kernel nucleus is 8Meg */ 79 80 static uint64_t target_kernel_text; /* value to use for KERNEL_TEXT */ 81 82 /* 83 * The stack is setup in assembler before entering startup_kernel() 84 */ 85 char stack_space[STACK_SIZE]; 86 87 /* 88 * Used to track physical memory allocation 89 */ 90 static paddr_t next_avail_addr = 0; 91 92 multiboot_info_t *mb_info; 93 94 /* 95 * This contains information passed to the kernel 96 */ 97 struct xboot_info boot_info[2]; /* extra space to fix alignement for amd64 */ 98 struct xboot_info *bi; 99 100 /* 101 * Page table and memory stuff. 102 */ 103 static uint64_t max_mem; /* maximum memory address */ 104 105 /* 106 * Information about processor MMU 107 */ 108 int amd64_support = 0; 109 int largepage_support = 0; 110 int pae_support = 0; 111 int pge_support = 0; 112 int NX_support = 0; 113 114 /* 115 * Low 32 bits of kernel entry address passed back to assembler. 116 * When running a 64 bit kernel, the high 32 bits are 0xffffffff. 117 */ 118 uint32_t entry_addr_low; 119 120 /* 121 * Memlists for the kernel. We shouldn't need a lot of these. 122 */ 123 #define MAX_MEMLIST (50) 124 struct boot_memlist memlists[MAX_MEMLIST]; 125 uint_t memlists_used = 0; 126 struct boot_memlist pcimemlists[MAX_MEMLIST]; 127 uint_t pcimemlists_used = 0; 128 129 #define MAX_MODULES (10) 130 struct boot_modules modules[MAX_MODULES]; 131 uint_t modules_used = 0; 132 133 /* 134 * Debugging macros 135 */ 136 uint_t prom_debug = 0; 137 uint_t map_debug = 0; 138 139 /* 140 * The Xen/Grub specific code builds the initial memlists. This code does 141 * sort/merge/link for final use. 142 */ 143 static void 144 sort_physinstall(void) 145 { 146 int i; 147 int j; 148 struct boot_memlist tmp; 149 150 /* 151 * Now sort the memlists, in case they weren't in order. 152 * Yeah, this is a bubble sort; small, simple and easy to get right. 153 */ 154 DBG_MSG("Sorting phys-installed list\n"); 155 for (j = memlists_used - 1; j > 0; --j) { 156 for (i = 0; i < j; ++i) { 157 if (memlists[i].addr < memlists[i + 1].addr) 158 continue; 159 tmp = memlists[i]; 160 memlists[i] = memlists[i + 1]; 161 memlists[i + 1] = tmp; 162 } 163 } 164 165 /* 166 * Merge any memlists that don't have holes between them. 167 */ 168 for (i = 0; i <= memlists_used - 1; ++i) { 169 if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr) 170 continue; 171 172 if (prom_debug) 173 dboot_printf( 174 "merging mem segs %" PRIx64 "...%" PRIx64 175 " w/ %" PRIx64 "...%" PRIx64 "\n", 176 memlists[i].addr, 177 memlists[i].addr + memlists[i].size, 178 memlists[i + 1].addr, 179 memlists[i + 1].addr + memlists[i + 1].size); 180 181 memlists[i].size += memlists[i + 1].size; 182 for (j = i + 1; j < memlists_used - 1; ++j) 183 memlists[j] = memlists[j + 1]; 184 --memlists_used; 185 DBG(memlists_used); 186 --i; /* after merging we need to reexamine, so do this */ 187 } 188 189 if (prom_debug) { 190 dboot_printf("\nFinal memlists:\n"); 191 for (i = 0; i < memlists_used; ++i) { 192 dboot_printf("\t%d: addr=%" PRIx64 " size=%" 193 PRIx64 "\n", i, memlists[i].addr, memlists[i].size); 194 } 195 } 196 197 /* 198 * link together the memlists with native size pointers 199 */ 200 memlists[0].next = 0; 201 memlists[0].prev = 0; 202 for (i = 1; i < memlists_used; ++i) { 203 memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1); 204 memlists[i].next = 0; 205 memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i); 206 } 207 bi->bi_phys_install = (native_ptr_t)memlists; 208 DBG(bi->bi_phys_install); 209 } 210 211 x86pte_t 212 get_pteval(paddr_t table, uint_t index) 213 { 214 if (pae_support) 215 return (((x86pte_t *)(uintptr_t)table)[index]); 216 return (((x86pte32_t *)(uintptr_t)table)[index]); 217 } 218 219 /*ARGSUSED*/ 220 void 221 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 222 { 223 uintptr_t tab_addr = (uintptr_t)table; 224 225 if (pae_support) 226 ((x86pte_t *)tab_addr)[index] = pteval; 227 else 228 ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval; 229 if (level == top_level && level == 2) 230 reload_cr3(); 231 } 232 233 paddr_t 234 make_ptable(x86pte_t *pteval, uint_t level) 235 { 236 paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 237 238 if (level == top_level && level == 2) 239 *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID; 240 else 241 *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits; 242 243 if (map_debug) 244 dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%" 245 PRIx64 "\n", level, (ulong_t)new_table, *pteval); 246 return (new_table); 247 } 248 249 x86pte_t * 250 map_pte(paddr_t table, uint_t index) 251 { 252 return ((x86pte_t *)(uintptr_t)(table + index * pte_size)); 253 } 254 255 #if 0 /* useful if debugging */ 256 /* 257 * dump out the contents of page tables... 258 */ 259 static void 260 dump_tables(void) 261 { 262 uint_t save_index[4]; /* for recursion */ 263 char *save_table[4]; /* for recursion */ 264 uint_t l; 265 uint64_t va; 266 uint64_t pgsize; 267 int index; 268 int i; 269 x86pte_t pteval; 270 char *table; 271 static char *tablist = "\t\t\t"; 272 char *tabs = tablist + 3 - top_level; 273 uint_t pa, pa1; 274 275 dboot_printf("Finished pagetables:\n"); 276 table = (char *)top_page_table; 277 l = top_level; 278 va = 0; 279 for (index = 0; index < ptes_per_table; ++index) { 280 pgsize = 1ull << shift_amt[l]; 281 if (pae_support) 282 pteval = ((x86pte_t *)table)[index]; 283 else 284 pteval = ((x86pte32_t *)table)[index]; 285 if (pteval == 0) 286 goto next_entry; 287 288 dboot_printf("%s %lx[0x%x] = %" PRIx64 ", va=%" PRIx64, 289 tabs + l, table, index, (uint64_t)pteval, va); 290 pa = ma_to_pa(pteval & MMU_PAGEMASK); 291 dboot_printf(" physaddr=%" PRIx64 "\n", pa); 292 293 /* 294 * Don't try to walk hypervisor private pagetables 295 */ 296 if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) { 297 save_table[l] = table; 298 save_index[l] = index; 299 --l; 300 index = -1; 301 table = (char *)(uintptr_t) 302 ma_to_pa(pteval & MMU_PAGEMASK); 303 goto recursion; 304 } 305 306 /* 307 * shorten dump for consecutive mappings 308 */ 309 for (i = 1; index + i < ptes_per_table; ++i) { 310 if (pae_support) 311 pteval = ((x86pte_t *)table)[index + i]; 312 else 313 pteval = ((x86pte32_t *)table)[index + i]; 314 if (pteval == 0) 315 break; 316 pa1 = ma_to_pa(pteval & MMU_PAGEMASK); 317 if (pa1 != pa + i * pgsize) 318 break; 319 } 320 if (i > 2) { 321 dboot_printf("%s...\n", tabs + l); 322 va += pgsize * (i - 2); 323 index += i - 2; 324 } 325 next_entry: 326 va += pgsize; 327 if (l == 3 && index == 256) /* VA hole */ 328 va = 0xffff800000000000ull; 329 recursion: 330 ; 331 } 332 if (l < top_level) { 333 ++l; 334 index = save_index[l]; 335 table = save_table[l]; 336 goto recursion; 337 } 338 } 339 #endif 340 341 /* 342 * Add a mapping for the physical page at the given virtual address. 343 */ 344 static void 345 map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level) 346 { 347 x86pte_t *ptep; 348 x86pte_t pteval; 349 350 pteval = pa_to_ma(pa) | pte_bits; 351 if (level > 0) 352 pteval |= PT_PAGESIZE; 353 if (va >= target_kernel_text && pge_support) 354 pteval |= PT_GLOBAL; 355 356 if (map_debug && pa != va) 357 dboot_printf("mapping pa=0x%" PRIx64 " va=0x%" PRIx64 358 " pte=0x%" PRIx64 " l=%d\n", 359 (uint64_t)pa, (uint64_t)va, pteval, level); 360 361 /* 362 * Find the pte that will map this address. This creates any 363 * missing intermediate level page tables 364 */ 365 ptep = find_pte(va, NULL, level, 0); 366 367 /* 368 * On Xen we must use hypervisor calls to modify the PTE, since 369 * paging is active. On real hardware we just write to the pagetables 370 * which aren't in use yet. 371 */ 372 if (va < 1024 * 1024) 373 pteval |= PT_NOCACHE; /* for video RAM */ 374 if (pae_support) 375 *ptep = pteval; 376 else 377 *((x86pte32_t *)ptep) = (x86pte32_t)pteval; 378 } 379 380 /* 381 * During memory allocation, find the highest address not used yet. 382 */ 383 static void 384 check_higher(paddr_t a) 385 { 386 if (a < next_avail_addr) 387 return; 388 next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE); 389 DBG(next_avail_addr); 390 } 391 392 /* 393 * This is called to remove start..end from the 394 * possible range of PCI addresses. 395 */ 396 const uint64_t pci_lo_limit = 0x00100000ul; 397 const uint64_t pci_hi_limit = 0xfff00000ul; 398 static void 399 exclude_from_pci(uint64_t start, uint64_t end) 400 { 401 int i; 402 int j; 403 struct boot_memlist *ml; 404 405 for (i = 0; i < pcimemlists_used; ++i) { 406 ml = &pcimemlists[i]; 407 408 /* delete the entire range? */ 409 if (start <= ml->addr && ml->addr + ml->size <= end) { 410 --pcimemlists_used; 411 for (j = i; j < pcimemlists_used; ++j) 412 pcimemlists[j] = pcimemlists[j + 1]; 413 --i; /* to revisit the new one at this index */ 414 } 415 416 /* split a range? */ 417 else if (ml->addr < start && end < ml->addr + ml->size) { 418 419 ++pcimemlists_used; 420 if (pcimemlists_used > MAX_MEMLIST) 421 dboot_panic("too many pcimemlists"); 422 423 for (j = pcimemlists_used - 1; j > i; --j) 424 pcimemlists[j] = pcimemlists[j - 1]; 425 ml->size = start - ml->addr; 426 427 ++ml; 428 ml->size = (ml->addr + ml->size) - end; 429 ml->addr = end; 430 ++i; /* skip on to next one */ 431 } 432 433 /* cut memory off the start? */ 434 else if (ml->addr < end && end < ml->addr + ml->size) { 435 ml->size -= end - ml->addr; 436 ml->addr = end; 437 } 438 439 /* cut memory off the end? */ 440 else if (ml->addr <= start && start < ml->addr + ml->size) { 441 ml->size = start - ml->addr; 442 } 443 } 444 } 445 446 /* 447 * Walk through the module information finding the last used address. 448 * The first available address will become the top level page table. 449 * 450 * We then build the phys_install memlist from the multiboot information. 451 */ 452 static void 453 init_mem_alloc(void) 454 { 455 mb_memory_map_t *mmap; 456 mb_module_t *mod; 457 uint64_t start; 458 uint64_t end; 459 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 460 extern char _end[]; 461 int i; 462 463 DBG_MSG("Entered init_mem_alloc()\n"); 464 DBG((uintptr_t)mb_info); 465 466 /* 467 * search the modules to find the last used address 468 * we'll build the module list while we're walking through here 469 */ 470 DBG_MSG("\nFinding Modules\n"); 471 check_higher((paddr_t)&_end); 472 for (mod = (mb_module_t *)(mb_info->mods_addr), i = 0; 473 i < mb_info->mods_count; 474 ++mod, ++i) { 475 if (prom_debug) { 476 dboot_printf("\tmodule #%d: %s at: 0x%lx, len 0x%lx\n", 477 i, (char *)(mod->mod_name), 478 (ulong_t)mod->mod_start, (ulong_t)mod->mod_end); 479 } 480 modules[i].bm_addr = mod->mod_start; 481 modules[i].bm_size = mod->mod_end; 482 483 check_higher(mod->mod_end); 484 } 485 bi->bi_modules = (native_ptr_t)modules; 486 DBG(bi->bi_modules); 487 bi->bi_module_cnt = mb_info->mods_count; 488 DBG(bi->bi_module_cnt); 489 490 /* 491 * start out by assuming PCI can use all physical addresses 492 */ 493 pcimemlists[0].addr = pci_lo_limit; 494 pcimemlists[0].size = pci_hi_limit - pci_lo_limit; 495 pcimemlists_used = 1; 496 497 /* 498 * Walk through the memory map from multiboot and build our memlist 499 * structures. Note these will have native format pointers. 500 */ 501 DBG_MSG("\nFinding Memory Map\n"); 502 DBG(mb_info->flags); 503 max_mem = 0; 504 if (mb_info->flags & 0x40) { 505 DBG(mb_info->mmap_addr); 506 DBG(mb_info->mmap_length); 507 check_higher(mb_info->mmap_addr + mb_info->mmap_length); 508 509 for (mmap = (mb_memory_map_t *)mb_info->mmap_addr; 510 (uint32_t)mmap < mb_info->mmap_addr + mb_info->mmap_length; 511 mmap = (mb_memory_map_t *)((uint32_t)mmap + mmap->size 512 + sizeof (mmap->size))) { 513 514 start = ((uint64_t)mmap->base_addr_high << 32) + 515 mmap->base_addr_low; 516 end = start + ((uint64_t)mmap->length_high << 32) + 517 mmap->length_low; 518 519 if (prom_debug) 520 dboot_printf("\ttype: %d %" PRIx64 "..%" 521 PRIx64 "\n", mmap->type, start, end); 522 523 /* 524 * page align start and end 525 */ 526 start = (start + page_offset) & ~page_offset; 527 end &= ~page_offset; 528 if (end <= start) 529 continue; 530 531 exclude_from_pci(start, end); 532 533 /* 534 * only type 1 is usable RAM 535 */ 536 if (mmap->type != 1) 537 continue; 538 539 if (end > max_mem) 540 max_mem = end; 541 542 memlists[memlists_used].addr = start; 543 memlists[memlists_used].size = end - start; 544 ++memlists_used; 545 if (memlists_used > MAX_MEMLIST) 546 dboot_panic("too many memlists"); 547 } 548 } else if (mb_info->flags & 0x01) { 549 DBG(mb_info->mem_lower); 550 memlists[memlists_used].addr = 0; 551 memlists[memlists_used].size = mb_info->mem_lower * 1024; 552 ++memlists_used; 553 DBG(mb_info->mem_upper); 554 memlists[memlists_used].addr = 1024 * 1024; 555 memlists[memlists_used].size = mb_info->mem_upper * 1024; 556 ++memlists_used; 557 exclude_from_pci(memlists[0].addr, 558 memlists[0].addr + memlists[memlists_used].size); 559 exclude_from_pci(memlists[1].addr, 560 memlists[1].addr + memlists[memlists_used].size); 561 } else { 562 dboot_panic("No memory info from boot loader!!!\n"); 563 } 564 565 check_higher(bi->bi_cmdline); 566 567 /* 568 * finish processing the physinstall list 569 */ 570 sort_physinstall(); 571 572 /* 573 * Finish off the pcimemlist 574 */ 575 if (prom_debug) { 576 for (i = 0; i < pcimemlists_used; ++i) { 577 dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%" 578 PRIx64 "\n", pcimemlists[i].addr, 579 pcimemlists[i].addr + pcimemlists[i].size); 580 } 581 } 582 pcimemlists[0].next = 0; 583 pcimemlists[0].prev = 0; 584 for (i = 1; i < pcimemlists_used; ++i) { 585 pcimemlists[i].prev = 586 (native_ptr_t)(uintptr_t)(pcimemlists + i - 1); 587 pcimemlists[i].next = 0; 588 pcimemlists[i - 1].next = 589 (native_ptr_t)(uintptr_t)(pcimemlists + i); 590 } 591 bi->bi_pcimem = (native_ptr_t)pcimemlists; 592 DBG(bi->bi_pcimem); 593 } 594 595 /* 596 * Simple memory allocator, allocates aligned physical memory. 597 * Note that startup_kernel() only allocates memory, never frees. 598 * Memory usage just grows in an upward direction. 599 */ 600 static void * 601 do_mem_alloc(uint32_t size, uint32_t align) 602 { 603 uint_t i; 604 uint64_t best; 605 uint64_t start; 606 uint64_t end; 607 608 /* 609 * make sure size is a multiple of pagesize 610 */ 611 size = RNDUP(size, MMU_PAGESIZE); 612 next_avail_addr = RNDUP(next_avail_addr, align); 613 614 /* 615 * a really large bootarchive that causes you to run out of memory 616 * may cause this to blow up 617 */ 618 /* LINTED E_UNEXPECTED_UINT_PROMOTION */ 619 best = (uint64_t)-size; 620 for (i = 0; i < memlists_used; ++i) { 621 start = memlists[i].addr; 622 end = start + memlists[i].size; 623 624 /* 625 * did we find the desired address? 626 */ 627 if (start <= next_avail_addr && next_avail_addr + size <= end) { 628 best = next_avail_addr; 629 goto done; 630 } 631 632 /* 633 * if not is this address the best so far? 634 */ 635 if (start > next_avail_addr && start < best && 636 RNDUP(start, align) + size <= end) 637 best = RNDUP(start, align); 638 } 639 640 /* 641 * We didn't find exactly the address we wanted, due to going off the 642 * end of a memory region. Return the best found memory address. 643 */ 644 done: 645 next_avail_addr = best + size; 646 (void) memset((void *)(uintptr_t)best, 0, size); 647 return ((void *)(uintptr_t)best); 648 } 649 650 void * 651 mem_alloc(uint32_t size) 652 { 653 return (do_mem_alloc(size, MMU_PAGESIZE)); 654 } 655 656 657 /* 658 * Build page tables to map all of memory used so far as well as the kernel. 659 */ 660 static void 661 build_page_tables(void) 662 { 663 uint32_t psize; 664 uint32_t level; 665 uint32_t off; 666 uint32_t i; 667 uint64_t start; 668 uint64_t end; 669 uint64_t next_mapping; 670 671 /* 672 * If we're not using Xen, we need to create the top level pagetable. 673 */ 674 top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 675 DBG((uintptr_t)top_page_table); 676 677 /* 678 * Determine if we'll use large mappings for kernel, then map it. 679 */ 680 if (largepage_support) { 681 psize = lpagesize; 682 level = 1; 683 } else { 684 psize = MMU_PAGESIZE; 685 level = 0; 686 } 687 688 DBG_MSG("Mapping kernel\n"); 689 DBG(ktext_phys); 690 DBG(target_kernel_text); 691 DBG(ksize); 692 DBG(psize); 693 for (off = 0; off < ksize; off += psize) 694 map_pa_at_va(ktext_phys + off, target_kernel_text + off, level); 695 696 /* 697 * The kernel will need a 1 page window to work with page tables 698 */ 699 bi->bi_pt_window = (uintptr_t)mem_alloc(MMU_PAGESIZE); 700 DBG(bi->bi_pt_window); 701 bi->bi_pte_to_pt_window = 702 (uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0); 703 DBG(bi->bi_pte_to_pt_window); 704 705 /* 706 * Under multiboot we need 1:1 mappings for all of low memory, which 707 * includes our pagetables. The following code works because our 708 * simple memory allocator only grows usage in an upwards direction. 709 * 710 * We map *all* possible addresses below 1 Meg, since things like 711 * the video RAM are down there. 712 * 713 * Skip memory between 1M and _start, this acts as a reserve 714 * of memory usable for DMA. 715 */ 716 next_mapping = (uintptr_t)_start & MMU_PAGEMASK; 717 if (map_debug) 718 dboot_printf("1:1 map pa=0..1Meg\n"); 719 for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) 720 map_pa_at_va(start, start, 0); 721 722 for (i = 0; i < memlists_used; ++i) { 723 start = memlists[i].addr; 724 if (start < next_mapping) 725 start = next_mapping; 726 727 end = start + memlists[i].size; 728 729 if (map_debug) 730 dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n", 731 start, end); 732 while (start < end && start < next_avail_addr) { 733 map_pa_at_va(start, start, 0); 734 start += MMU_PAGESIZE; 735 } 736 } 737 738 DBG_MSG("\nPage tables constructed\n"); 739 } 740 741 #define NO_MULTIBOOT \ 742 "multiboot is no longer used to boot the Solaris Operating System.\n\ 743 The grub entry should be changed to:\n\ 744 kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\ 745 module$ /platform/i86pc/$ISADIR/boot_archive\n\ 746 See http://www.sun.com/msg/SUNOS-8000-AK for details.\n" 747 748 /* 749 * startup_kernel has a pretty simple job. It builds pagetables which reflect 750 * 1:1 mappings for all memory in use. It then also adds mappings for 751 * the kernel nucleus at virtual address of target_kernel_text using large page 752 * mappings. The page table pages are also accessible at 1:1 mapped 753 * virtual addresses. 754 */ 755 /*ARGSUSED*/ 756 void 757 startup_kernel(void) 758 { 759 char *cmdline; 760 uintptr_t addr; 761 762 /* 763 * At this point we are executing in a 32 bit real mode. 764 */ 765 cmdline = (char *)mb_info->cmdline; 766 prom_debug = (strstr(cmdline, "prom_debug") != NULL); 767 map_debug = (strstr(cmdline, "map_debug") != NULL); 768 bcons_init(cmdline); 769 DBG_MSG("\n\nSolaris prekernel set: "); 770 DBG_MSG(cmdline); 771 DBG_MSG("\n"); 772 773 if (strstr(cmdline, "multiboot") != NULL) { 774 dboot_panic(NO_MULTIBOOT); 775 } 776 777 /* 778 * boot info must be 16 byte aligned for 64 bit kernel ABI 779 */ 780 addr = (uintptr_t)boot_info; 781 addr = (addr + 0xf) & ~0xf; 782 bi = (struct xboot_info *)addr; 783 DBG((uintptr_t)bi); 784 bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline; 785 786 /* 787 * Need correct target_kernel_text value 788 */ 789 #if defined(_BOOT_TARGET_amd64) 790 target_kernel_text = KERNEL_TEXT_amd64; 791 #else 792 target_kernel_text = KERNEL_TEXT_i386; 793 #endif 794 DBG(target_kernel_text); 795 796 /* 797 * use cpuid to enable MMU features 798 */ 799 if (have_cpuid()) { 800 uint32_t eax, edx; 801 802 eax = 1; 803 edx = get_cpuid_edx(&eax); 804 if (edx & CPUID_INTC_EDX_PSE) 805 largepage_support = 1; 806 if (edx & CPUID_INTC_EDX_PGE) 807 pge_support = 1; 808 if (edx & CPUID_INTC_EDX_PAE) 809 pae_support = 1; 810 811 eax = 0x80000000; 812 edx = get_cpuid_edx(&eax); 813 if (eax >= 0x80000001) { 814 eax = 0x80000001; 815 edx = get_cpuid_edx(&eax); 816 if (edx & CPUID_AMD_EDX_LM) 817 amd64_support = 1; 818 if (edx & CPUID_AMD_EDX_NX) 819 NX_support = 1; 820 } 821 } else { 822 dboot_printf("cpuid not supported\n"); 823 } 824 825 #if defined(_BOOT_TARGET_amd64) 826 if (amd64_support == 0) 827 dboot_panic("long mode not supported, rebooting\n"); 828 else if (pae_support == 0) 829 dboot_panic("long mode, but no PAE; rebooting\n"); 830 #endif 831 832 /* 833 * initialize our memory allocator 834 */ 835 init_mem_alloc(); 836 837 /* 838 * configure mmu information 839 */ 840 #if !defined(_BOOT_TARGET_amd64) 841 if (pae_support && (max_mem > FOUR_GIG || NX_support)) { 842 #endif 843 shift_amt = shift_amt_pae; 844 ptes_per_table = 512; 845 pte_size = 8; 846 lpagesize = TWO_MEG; 847 #if defined(_BOOT_TARGET_amd64) 848 top_level = 3; 849 #else 850 top_level = 2; 851 #endif 852 #if !defined(_BOOT_TARGET_amd64) 853 } else { 854 pae_support = 0; 855 NX_support = 0; 856 shift_amt = shift_amt_nopae; 857 ptes_per_table = 1024; 858 pte_size = 4; 859 lpagesize = FOUR_MEG; 860 top_level = 1; 861 } 862 #endif 863 864 DBG(pge_support); 865 DBG(NX_support); 866 DBG(largepage_support); 867 DBG(amd64_support); 868 DBG(top_level); 869 DBG(pte_size); 870 DBG(ptes_per_table); 871 DBG(lpagesize); 872 873 ktext_phys = FOUR_MEG; /* from UNIX Mapfile */ 874 875 #if defined(_BOOT_TARGET_amd64) 876 /* 877 * For grub, copy kernel bits from the ELF64 file to final place. 878 */ 879 DBG_MSG("\nAllocating nucleus pages.\n"); 880 ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG); 881 if (ktext_phys == 0) 882 dboot_panic("failed to allocate aligned kernel memory\n"); 883 if (dboot_elfload64(mb_header.load_addr) != 0) 884 dboot_panic("failed to parse kernel ELF image, rebooting\n"); 885 886 #endif 887 DBG(ktext_phys); 888 889 /* 890 * Allocate page tables. 891 */ 892 build_page_tables(); 893 894 /* 895 * return to assembly code to switch to running kernel 896 */ 897 entry_addr_low = (uint32_t)target_kernel_text; 898 DBG(entry_addr_low); 899 bi->bi_use_largepage = largepage_support; 900 bi->bi_use_pae = pae_support; 901 bi->bi_use_pge = pge_support; 902 bi->bi_use_nx = NX_support; 903 bi->bi_next_paddr = next_avail_addr; 904 DBG(bi->bi_next_paddr); 905 bi->bi_next_vaddr = (uintptr_t)next_avail_addr; 906 DBG(bi->bi_next_vaddr); 907 bi->bi_mb_info = (uintptr_t)mb_info; 908 bi->bi_top_page_table = (uintptr_t)top_page_table; 909 910 bi->bi_kseg_size = FOUR_MEG; 911 DBG(bi->bi_kseg_size); 912 913 #if 0 /* useful if debugging initial page tables */ 914 if (prom_debug) 915 dump_tables(); 916 #endif 917 918 DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n"); 919 } 920