1ae115bc7Smrj /* 2ae115bc7Smrj * CDDL HEADER START 3ae115bc7Smrj * 4ae115bc7Smrj * The contents of this file are subject to the terms of the 5ae115bc7Smrj * Common Development and Distribution License (the "License"). 6ae115bc7Smrj * You may not use this file except in compliance with the License. 7ae115bc7Smrj * 8ae115bc7Smrj * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9ae115bc7Smrj * or http://www.opensolaris.org/os/licensing. 10ae115bc7Smrj * See the License for the specific language governing permissions 11ae115bc7Smrj * and limitations under the License. 12ae115bc7Smrj * 13ae115bc7Smrj * When distributing Covered Code, include this CDDL HEADER in each 14ae115bc7Smrj * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15ae115bc7Smrj * If applicable, add the following below this CDDL HEADER, with the 16ae115bc7Smrj * fields enclosed by brackets "[]" replaced with your own identifying 17ae115bc7Smrj * information: Portions Copyright [yyyy] [name of copyright owner] 18ae115bc7Smrj * 19ae115bc7Smrj * CDDL HEADER END 20ae115bc7Smrj */ 21ae115bc7Smrj 22ae115bc7Smrj /* 23f34a7178SJoe Bonasera * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24ae115bc7Smrj * Use is subject to license terms. 25e65d07eeSKeith Wesolowski * 260181461bSKeith M Wesolowski * Copyright 2013 Joyent, Inc. All rights reserved. 27ae115bc7Smrj */ 28ae115bc7Smrj 29ae115bc7Smrj 30ae115bc7Smrj #include <sys/types.h> 31ae115bc7Smrj #include <sys/machparam.h> 32ae115bc7Smrj #include <sys/x86_archext.h> 33ae115bc7Smrj #include <sys/systm.h> 34ae115bc7Smrj #include <sys/mach_mmu.h> 35ae115bc7Smrj #include <sys/multiboot.h> 36d2670fc4SToomas Soome #include <sys/multiboot2.h> 37d2670fc4SToomas Soome #include <sys/multiboot2_impl.h> 38d2670fc4SToomas Soome #include <sys/sysmacros.h> 39e65d07eeSKeith Wesolowski #include <sys/sha1.h> 400181461bSKeith M Wesolowski #include <util/string.h> 410181461bSKeith M Wesolowski #include <util/strtolctype.h> 42ae115bc7Smrj 43843e1988Sjohnlev #if defined(__xpv) 44843e1988Sjohnlev 45843e1988Sjohnlev #include <sys/hypervisor.h> 46843e1988Sjohnlev uintptr_t xen_virt_start; 47843e1988Sjohnlev pfn_t *mfn_to_pfn_mapping; 48843e1988Sjohnlev 49843e1988Sjohnlev #else /* !__xpv */ 50843e1988Sjohnlev 51ae115bc7Smrj extern multiboot_header_t mb_header; 52d2670fc4SToomas Soome extern uint32_t mb2_load_addr; 53ae115bc7Smrj extern int have_cpuid(void); 54843e1988Sjohnlev 55843e1988Sjohnlev #endif /* !__xpv */ 56ae115bc7Smrj 57ae115bc7Smrj #include <sys/inttypes.h> 58ae115bc7Smrj #include <sys/bootinfo.h> 59ae115bc7Smrj #include <sys/mach_mmu.h> 60ae115bc7Smrj #include <sys/boot_console.h> 61ae115bc7Smrj 62843e1988Sjohnlev #include "dboot_asm.h" 63ae115bc7Smrj #include "dboot_printf.h" 64ae115bc7Smrj #include "dboot_xboot.h" 65ae115bc7Smrj #include "dboot_elfload.h" 66ae115bc7Smrj 67e65d07eeSKeith Wesolowski #define SHA1_ASCII_LENGTH (SHA1_DIGEST_LENGTH * 2) 68e65d07eeSKeith Wesolowski 69ae115bc7Smrj /* 70ae115bc7Smrj * This file contains code that runs to transition us from either a multiboot 71843e1988Sjohnlev * compliant loader (32 bit non-paging) or a XPV domain loader to 72843e1988Sjohnlev * regular kernel execution. Its task is to setup the kernel memory image 73843e1988Sjohnlev * and page tables. 74ae115bc7Smrj * 75ae115bc7Smrj * The code executes as: 76ae115bc7Smrj * - 32 bits under GRUB (for 32 or 64 bit Solaris) 77843e1988Sjohnlev * - a 32 bit program for the 32-bit PV hypervisor 78843e1988Sjohnlev * - a 64 bit program for the 64-bit PV hypervisor (at least for now) 79ae115bc7Smrj * 80843e1988Sjohnlev * Under the PV hypervisor, we must create mappings for any memory beyond the 81843e1988Sjohnlev * initial start of day allocation (such as the kernel itself). 82ae115bc7Smrj * 83843e1988Sjohnlev * When on the metal, the mapping between maddr_t and paddr_t is 1:1. 84ae115bc7Smrj * Since we are running in real mode, so all such memory is accessible. 85ae115bc7Smrj */ 86ae115bc7Smrj 87ae115bc7Smrj /* 88ae115bc7Smrj * Standard bits used in PTE (page level) and PTP (internal levels) 89ae115bc7Smrj */ 90843e1988Sjohnlev x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER; 91843e1988Sjohnlev x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST; 92ae115bc7Smrj 93ae115bc7Smrj /* 94ae115bc7Smrj * This is the target addresses (physical) where the kernel text and data 95843e1988Sjohnlev * nucleus pages will be unpacked. On the hypervisor this is actually a 96843e1988Sjohnlev * virtual address. 97ae115bc7Smrj */ 98ae115bc7Smrj paddr_t ktext_phys; 99ae115bc7Smrj uint32_t ksize = 2 * FOUR_MEG; /* kernel nucleus is 8Meg */ 100ae115bc7Smrj 101ae115bc7Smrj static uint64_t target_kernel_text; /* value to use for KERNEL_TEXT */ 102ae115bc7Smrj 103ae115bc7Smrj /* 104ae115bc7Smrj * The stack is setup in assembler before entering startup_kernel() 105ae115bc7Smrj */ 106ae115bc7Smrj char stack_space[STACK_SIZE]; 107ae115bc7Smrj 108ae115bc7Smrj /* 109ae115bc7Smrj * Used to track physical memory allocation 110ae115bc7Smrj */ 111ae115bc7Smrj static paddr_t next_avail_addr = 0; 112ae115bc7Smrj 113843e1988Sjohnlev #if defined(__xpv) 114843e1988Sjohnlev /* 115843e1988Sjohnlev * Additional information needed for hypervisor memory allocation. 116843e1988Sjohnlev * Only memory up to scratch_end is mapped by page tables. 117843e1988Sjohnlev * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so 118843e1988Sjohnlev * to derive a pfn from a pointer, you subtract mfn_base. 119843e1988Sjohnlev */ 120843e1988Sjohnlev 121843e1988Sjohnlev static paddr_t scratch_end = 0; /* we can't write all of mem here */ 122843e1988Sjohnlev static paddr_t mfn_base; /* addr corresponding to mfn_list[0] */ 123843e1988Sjohnlev start_info_t *xen_info; 124843e1988Sjohnlev 125843e1988Sjohnlev #else /* __xpv */ 126843e1988Sjohnlev 127843e1988Sjohnlev /* 128843e1988Sjohnlev * If on the metal, then we have a multiboot loader. 129843e1988Sjohnlev */ 130d2670fc4SToomas Soome uint32_t mb_magic; /* magic from boot loader */ 131d2670fc4SToomas Soome uint32_t mb_addr; /* multiboot info package from loader */ 132d2670fc4SToomas Soome int multiboot_version; 133ae115bc7Smrj multiboot_info_t *mb_info; 134d2670fc4SToomas Soome multiboot2_info_header_t *mb2_info; 135d2670fc4SToomas Soome multiboot_tag_mmap_t *mb2_mmap_tagp; 136d2670fc4SToomas Soome int num_entries; /* mmap entry count */ 137d2670fc4SToomas Soome boolean_t num_entries_set; /* is mmap entry count set */ 138d2670fc4SToomas Soome uintptr_t load_addr; 139ae115bc7Smrj 140843e1988Sjohnlev #endif /* __xpv */ 141843e1988Sjohnlev 142ae115bc7Smrj /* 143ae115bc7Smrj * This contains information passed to the kernel 144ae115bc7Smrj */ 145ae115bc7Smrj struct xboot_info boot_info[2]; /* extra space to fix alignement for amd64 */ 146ae115bc7Smrj struct xboot_info *bi; 147ae115bc7Smrj 148ae115bc7Smrj /* 149ae115bc7Smrj * Page table and memory stuff. 150ae115bc7Smrj */ 151843e1988Sjohnlev static paddr_t max_mem; /* maximum memory address */ 152ae115bc7Smrj 153ae115bc7Smrj /* 154ae115bc7Smrj * Information about processor MMU 155ae115bc7Smrj */ 156ae115bc7Smrj int amd64_support = 0; 157ae115bc7Smrj int largepage_support = 0; 158ae115bc7Smrj int pae_support = 0; 159ae115bc7Smrj int pge_support = 0; 160ae115bc7Smrj int NX_support = 0; 161ae115bc7Smrj 162ae115bc7Smrj /* 163ae115bc7Smrj * Low 32 bits of kernel entry address passed back to assembler. 164ae115bc7Smrj * When running a 64 bit kernel, the high 32 bits are 0xffffffff. 165ae115bc7Smrj */ 166ae115bc7Smrj uint32_t entry_addr_low; 167ae115bc7Smrj 168ae115bc7Smrj /* 169ae115bc7Smrj * Memlists for the kernel. We shouldn't need a lot of these. 170ae115bc7Smrj */ 171c9464e8bSjosephb #define MAX_MEMLIST (50) 172ae115bc7Smrj struct boot_memlist memlists[MAX_MEMLIST]; 173ae115bc7Smrj uint_t memlists_used = 0; 174c9464e8bSjosephb struct boot_memlist pcimemlists[MAX_MEMLIST]; 175c9464e8bSjosephb uint_t pcimemlists_used = 0; 1761de082f7SVikram Hegde struct boot_memlist rsvdmemlists[MAX_MEMLIST]; 1771de082f7SVikram Hegde uint_t rsvdmemlists_used = 0; 178ae115bc7Smrj 1790181461bSKeith M Wesolowski /* 1800181461bSKeith M Wesolowski * This should match what's in the bootloader. It's arbitrary, but GRUB 1810181461bSKeith M Wesolowski * in particular has limitations on how much space it can use before it 1820181461bSKeith M Wesolowski * stops working properly. This should be enough. 1830181461bSKeith M Wesolowski */ 1840181461bSKeith M Wesolowski struct boot_modules modules[MAX_BOOT_MODULES]; 185ae115bc7Smrj uint_t modules_used = 0; 186ae115bc7Smrj 187d2670fc4SToomas Soome #ifdef __xpv 188d2670fc4SToomas Soome /* 189d2670fc4SToomas Soome * Xen strips the size field out of the mb_memory_map_t, see struct e820entry 190d2670fc4SToomas Soome * definition in Xen source. 191d2670fc4SToomas Soome */ 192d2670fc4SToomas Soome typedef struct { 193d2670fc4SToomas Soome uint32_t base_addr_low; 194d2670fc4SToomas Soome uint32_t base_addr_high; 195d2670fc4SToomas Soome uint32_t length_low; 196d2670fc4SToomas Soome uint32_t length_high; 197d2670fc4SToomas Soome uint32_t type; 198d2670fc4SToomas Soome } mmap_t; 199d2670fc4SToomas Soome 200d2670fc4SToomas Soome /* 201d2670fc4SToomas Soome * There is 512KB of scratch area after the boot stack page. 202d2670fc4SToomas Soome * We'll use that for everything except the kernel nucleus pages which are too 203d2670fc4SToomas Soome * big to fit there and are allocated last anyway. 204d2670fc4SToomas Soome */ 205d2670fc4SToomas Soome #define MAXMAPS 100 206d2670fc4SToomas Soome static mmap_t map_buffer[MAXMAPS]; 207d2670fc4SToomas Soome #else 208d2670fc4SToomas Soome typedef mb_memory_map_t mmap_t; 209d2670fc4SToomas Soome #endif 210d2670fc4SToomas Soome 211ae115bc7Smrj /* 212ae115bc7Smrj * Debugging macros 213ae115bc7Smrj */ 214ae115bc7Smrj uint_t prom_debug = 0; 215ae115bc7Smrj uint_t map_debug = 0; 216ae115bc7Smrj 2170181461bSKeith M Wesolowski static char noname[2] = "-"; 2180181461bSKeith M Wesolowski 219ae115bc7Smrj /* 220843e1988Sjohnlev * Either hypervisor-specific or grub-specific code builds the initial 221843e1988Sjohnlev * memlists. This code does the sort/merge/link for final use. 222ae115bc7Smrj */ 223ae115bc7Smrj static void 224ae115bc7Smrj sort_physinstall(void) 225ae115bc7Smrj { 226ae115bc7Smrj int i; 227843e1988Sjohnlev #if !defined(__xpv) 228ae115bc7Smrj int j; 229ae115bc7Smrj struct boot_memlist tmp; 230ae115bc7Smrj 231ae115bc7Smrj /* 232ae115bc7Smrj * Now sort the memlists, in case they weren't in order. 233ae115bc7Smrj * Yeah, this is a bubble sort; small, simple and easy to get right. 234ae115bc7Smrj */ 235ae115bc7Smrj DBG_MSG("Sorting phys-installed list\n"); 236ae115bc7Smrj for (j = memlists_used - 1; j > 0; --j) { 237ae115bc7Smrj for (i = 0; i < j; ++i) { 238ae115bc7Smrj if (memlists[i].addr < memlists[i + 1].addr) 239ae115bc7Smrj continue; 240ae115bc7Smrj tmp = memlists[i]; 241ae115bc7Smrj memlists[i] = memlists[i + 1]; 242ae115bc7Smrj memlists[i + 1] = tmp; 243ae115bc7Smrj } 244ae115bc7Smrj } 245ae115bc7Smrj 246ae115bc7Smrj /* 247ae115bc7Smrj * Merge any memlists that don't have holes between them. 248ae115bc7Smrj */ 249ae115bc7Smrj for (i = 0; i <= memlists_used - 1; ++i) { 250ae115bc7Smrj if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr) 251ae115bc7Smrj continue; 252ae115bc7Smrj 253ae115bc7Smrj if (prom_debug) 254ae115bc7Smrj dboot_printf( 255ae115bc7Smrj "merging mem segs %" PRIx64 "...%" PRIx64 256ae115bc7Smrj " w/ %" PRIx64 "...%" PRIx64 "\n", 257ae115bc7Smrj memlists[i].addr, 258ae115bc7Smrj memlists[i].addr + memlists[i].size, 259ae115bc7Smrj memlists[i + 1].addr, 260ae115bc7Smrj memlists[i + 1].addr + memlists[i + 1].size); 261ae115bc7Smrj 262ae115bc7Smrj memlists[i].size += memlists[i + 1].size; 263ae115bc7Smrj for (j = i + 1; j < memlists_used - 1; ++j) 264ae115bc7Smrj memlists[j] = memlists[j + 1]; 265ae115bc7Smrj --memlists_used; 266ae115bc7Smrj DBG(memlists_used); 267ae115bc7Smrj --i; /* after merging we need to reexamine, so do this */ 268ae115bc7Smrj } 269843e1988Sjohnlev #endif /* __xpv */ 270ae115bc7Smrj 271ae115bc7Smrj if (prom_debug) { 272ae115bc7Smrj dboot_printf("\nFinal memlists:\n"); 273ae115bc7Smrj for (i = 0; i < memlists_used; ++i) { 274ae115bc7Smrj dboot_printf("\t%d: addr=%" PRIx64 " size=%" 275ae115bc7Smrj PRIx64 "\n", i, memlists[i].addr, memlists[i].size); 276ae115bc7Smrj } 277ae115bc7Smrj } 278ae115bc7Smrj 279ae115bc7Smrj /* 280ae115bc7Smrj * link together the memlists with native size pointers 281ae115bc7Smrj */ 282ae115bc7Smrj memlists[0].next = 0; 283ae115bc7Smrj memlists[0].prev = 0; 284ae115bc7Smrj for (i = 1; i < memlists_used; ++i) { 285ae115bc7Smrj memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1); 286ae115bc7Smrj memlists[i].next = 0; 287ae115bc7Smrj memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i); 288ae115bc7Smrj } 289c909a41bSRichard Lowe bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists; 290ae115bc7Smrj DBG(bi->bi_phys_install); 291ae115bc7Smrj } 292ae115bc7Smrj 2931de082f7SVikram Hegde /* 2941de082f7SVikram Hegde * build bios reserved memlists 2951de082f7SVikram Hegde */ 2961de082f7SVikram Hegde static void 2971de082f7SVikram Hegde build_rsvdmemlists(void) 2981de082f7SVikram Hegde { 2991de082f7SVikram Hegde int i; 3001de082f7SVikram Hegde 3011de082f7SVikram Hegde rsvdmemlists[0].next = 0; 3021de082f7SVikram Hegde rsvdmemlists[0].prev = 0; 3031de082f7SVikram Hegde for (i = 1; i < rsvdmemlists_used; ++i) { 3041de082f7SVikram Hegde rsvdmemlists[i].prev = 3051de082f7SVikram Hegde (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1); 3061de082f7SVikram Hegde rsvdmemlists[i].next = 0; 3071de082f7SVikram Hegde rsvdmemlists[i - 1].next = 3081de082f7SVikram Hegde (native_ptr_t)(uintptr_t)(rsvdmemlists + i); 3091de082f7SVikram Hegde } 310c909a41bSRichard Lowe bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists; 3111de082f7SVikram Hegde DBG(bi->bi_rsvdmem); 3121de082f7SVikram Hegde } 3131de082f7SVikram Hegde 314843e1988Sjohnlev #if defined(__xpv) 315843e1988Sjohnlev 316843e1988Sjohnlev /* 317843e1988Sjohnlev * halt on the hypervisor after a delay to drain console output 318843e1988Sjohnlev */ 319843e1988Sjohnlev void 320843e1988Sjohnlev dboot_halt(void) 321843e1988Sjohnlev { 322843e1988Sjohnlev uint_t i = 10000; 323843e1988Sjohnlev 324843e1988Sjohnlev while (--i) 325c1374a13SSurya Prakki (void) HYPERVISOR_yield(); 326c1374a13SSurya Prakki (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff); 327843e1988Sjohnlev } 328843e1988Sjohnlev 329843e1988Sjohnlev /* 330843e1988Sjohnlev * From a machine address, find the corresponding pseudo-physical address. 331843e1988Sjohnlev * Pseudo-physical address are contiguous and run from mfn_base in each VM. 332843e1988Sjohnlev * Machine addresses are the real underlying hardware addresses. 333843e1988Sjohnlev * These are needed for page table entries. Note that this routine is 334843e1988Sjohnlev * poorly protected. A bad value of "ma" will cause a page fault. 335843e1988Sjohnlev */ 336843e1988Sjohnlev paddr_t 337843e1988Sjohnlev ma_to_pa(maddr_t ma) 338843e1988Sjohnlev { 339843e1988Sjohnlev ulong_t pgoff = ma & MMU_PAGEOFFSET; 340843e1988Sjohnlev ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)]; 341843e1988Sjohnlev paddr_t pa; 342843e1988Sjohnlev 343843e1988Sjohnlev if (pfn >= xen_info->nr_pages) 344843e1988Sjohnlev return (-(paddr_t)1); 345843e1988Sjohnlev pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff; 346843e1988Sjohnlev #ifdef DEBUG 347843e1988Sjohnlev if (ma != pa_to_ma(pa)) 348843e1988Sjohnlev dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", " 349843e1988Sjohnlev "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa)); 350843e1988Sjohnlev #endif 351843e1988Sjohnlev return (pa); 352843e1988Sjohnlev } 353843e1988Sjohnlev 354843e1988Sjohnlev /* 355843e1988Sjohnlev * From a pseudo-physical address, find the corresponding machine address. 356843e1988Sjohnlev */ 357843e1988Sjohnlev maddr_t 358843e1988Sjohnlev pa_to_ma(paddr_t pa) 359843e1988Sjohnlev { 360843e1988Sjohnlev pfn_t pfn; 361843e1988Sjohnlev ulong_t mfn; 362843e1988Sjohnlev 363843e1988Sjohnlev pfn = mmu_btop(pa - mfn_base); 364843e1988Sjohnlev if (pa < mfn_base || pfn >= xen_info->nr_pages) 365843e1988Sjohnlev dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa); 366843e1988Sjohnlev mfn = ((ulong_t *)xen_info->mfn_list)[pfn]; 367843e1988Sjohnlev #ifdef DEBUG 368843e1988Sjohnlev if (mfn_to_pfn_mapping[mfn] != pfn) 369843e1988Sjohnlev dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n", 370843e1988Sjohnlev pfn, mfn, mfn_to_pfn_mapping[mfn]); 371843e1988Sjohnlev #endif 372843e1988Sjohnlev return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET)); 373843e1988Sjohnlev } 374843e1988Sjohnlev 375843e1988Sjohnlev #endif /* __xpv */ 376843e1988Sjohnlev 377ae115bc7Smrj x86pte_t 378ae115bc7Smrj get_pteval(paddr_t table, uint_t index) 379ae115bc7Smrj { 380ae115bc7Smrj if (pae_support) 381ae115bc7Smrj return (((x86pte_t *)(uintptr_t)table)[index]); 382ae115bc7Smrj return (((x86pte32_t *)(uintptr_t)table)[index]); 383ae115bc7Smrj } 384ae115bc7Smrj 385ae115bc7Smrj /*ARGSUSED*/ 386ae115bc7Smrj void 387ae115bc7Smrj set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval) 388ae115bc7Smrj { 389843e1988Sjohnlev #ifdef __xpv 390843e1988Sjohnlev mmu_update_t t; 391843e1988Sjohnlev maddr_t mtable = pa_to_ma(table); 392843e1988Sjohnlev int retcnt; 393843e1988Sjohnlev 394843e1988Sjohnlev t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE; 395843e1988Sjohnlev t.val = pteval; 396843e1988Sjohnlev if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1) 397843e1988Sjohnlev dboot_panic("HYPERVISOR_mmu_update() failed"); 398843e1988Sjohnlev #else /* __xpv */ 399ae115bc7Smrj uintptr_t tab_addr = (uintptr_t)table; 400ae115bc7Smrj 401ae115bc7Smrj if (pae_support) 402ae115bc7Smrj ((x86pte_t *)tab_addr)[index] = pteval; 403ae115bc7Smrj else 404ae115bc7Smrj ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval; 405ae115bc7Smrj if (level == top_level && level == 2) 406ae115bc7Smrj reload_cr3(); 407843e1988Sjohnlev #endif /* __xpv */ 408ae115bc7Smrj } 409ae115bc7Smrj 410ae115bc7Smrj paddr_t 411ae115bc7Smrj make_ptable(x86pte_t *pteval, uint_t level) 412ae115bc7Smrj { 413ae115bc7Smrj paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 414ae115bc7Smrj 415ae115bc7Smrj if (level == top_level && level == 2) 416ae115bc7Smrj *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID; 417ae115bc7Smrj else 418ae115bc7Smrj *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits; 419ae115bc7Smrj 420843e1988Sjohnlev #ifdef __xpv 421843e1988Sjohnlev /* Remove write permission to the new page table. */ 422843e1988Sjohnlev if (HYPERVISOR_update_va_mapping(new_table, 423843e1988Sjohnlev *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL)) 424843e1988Sjohnlev dboot_panic("HYP_update_va_mapping error"); 425843e1988Sjohnlev #endif 426843e1988Sjohnlev 427ae115bc7Smrj if (map_debug) 428ae115bc7Smrj dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%" 429ae115bc7Smrj PRIx64 "\n", level, (ulong_t)new_table, *pteval); 430ae115bc7Smrj return (new_table); 431ae115bc7Smrj } 432ae115bc7Smrj 433ae115bc7Smrj x86pte_t * 434ae115bc7Smrj map_pte(paddr_t table, uint_t index) 435ae115bc7Smrj { 436ae115bc7Smrj return ((x86pte_t *)(uintptr_t)(table + index * pte_size)); 437ae115bc7Smrj } 438ae115bc7Smrj 43919397407SSherry Moore /* 44019397407SSherry Moore * dump out the contents of page tables... 44119397407SSherry Moore */ 44219397407SSherry Moore static void 44319397407SSherry Moore dump_tables(void) 44419397407SSherry Moore { 44519397407SSherry Moore uint_t save_index[4]; /* for recursion */ 44619397407SSherry Moore char *save_table[4]; /* for recursion */ 44719397407SSherry Moore uint_t l; 44819397407SSherry Moore uint64_t va; 44919397407SSherry Moore uint64_t pgsize; 45019397407SSherry Moore int index; 45119397407SSherry Moore int i; 45219397407SSherry Moore x86pte_t pteval; 45319397407SSherry Moore char *table; 45419397407SSherry Moore static char *tablist = "\t\t\t"; 45519397407SSherry Moore char *tabs = tablist + 3 - top_level; 45619397407SSherry Moore uint_t pa, pa1; 457843e1988Sjohnlev #if !defined(__xpv) 458843e1988Sjohnlev #define maddr_t paddr_t 459843e1988Sjohnlev #endif /* !__xpv */ 460843e1988Sjohnlev 46119397407SSherry Moore dboot_printf("Finished pagetables:\n"); 46219397407SSherry Moore table = (char *)(uintptr_t)top_page_table; 46319397407SSherry Moore l = top_level; 46419397407SSherry Moore va = 0; 46519397407SSherry Moore for (index = 0; index < ptes_per_table; ++index) { 46619397407SSherry Moore pgsize = 1ull << shift_amt[l]; 46719397407SSherry Moore if (pae_support) 46819397407SSherry Moore pteval = ((x86pte_t *)table)[index]; 46919397407SSherry Moore else 47019397407SSherry Moore pteval = ((x86pte32_t *)table)[index]; 47119397407SSherry Moore if (pteval == 0) 47219397407SSherry Moore goto next_entry; 47319397407SSherry Moore 47419397407SSherry Moore dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64, 475c1374a13SSurya Prakki tabs + l, (void *)table, index, (uint64_t)pteval, va); 47619397407SSherry Moore pa = ma_to_pa(pteval & MMU_PAGEMASK); 47719397407SSherry Moore dboot_printf(" physaddr=%x\n", pa); 47819397407SSherry Moore 47919397407SSherry Moore /* 48019397407SSherry Moore * Don't try to walk hypervisor private pagetables 48119397407SSherry Moore */ 48219397407SSherry Moore if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) { 48319397407SSherry Moore save_table[l] = table; 48419397407SSherry Moore save_index[l] = index; 48519397407SSherry Moore --l; 48619397407SSherry Moore index = -1; 48719397407SSherry Moore table = (char *)(uintptr_t) 48819397407SSherry Moore ma_to_pa(pteval & MMU_PAGEMASK); 48919397407SSherry Moore goto recursion; 49019397407SSherry Moore } 49119397407SSherry Moore 49219397407SSherry Moore /* 49319397407SSherry Moore * shorten dump for consecutive mappings 49419397407SSherry Moore */ 49519397407SSherry Moore for (i = 1; index + i < ptes_per_table; ++i) { 49619397407SSherry Moore if (pae_support) 49719397407SSherry Moore pteval = ((x86pte_t *)table)[index + i]; 49819397407SSherry Moore else 49919397407SSherry Moore pteval = ((x86pte32_t *)table)[index + i]; 50019397407SSherry Moore if (pteval == 0) 50119397407SSherry Moore break; 50219397407SSherry Moore pa1 = ma_to_pa(pteval & MMU_PAGEMASK); 50319397407SSherry Moore if (pa1 != pa + i * pgsize) 50419397407SSherry Moore break; 50519397407SSherry Moore } 50619397407SSherry Moore if (i > 2) { 50719397407SSherry Moore dboot_printf("%s...\n", tabs + l); 50819397407SSherry Moore va += pgsize * (i - 2); 50919397407SSherry Moore index += i - 2; 51019397407SSherry Moore } 51119397407SSherry Moore next_entry: 51219397407SSherry Moore va += pgsize; 51319397407SSherry Moore if (l == 3 && index == 256) /* VA hole */ 51419397407SSherry Moore va = 0xffff800000000000ull; 51519397407SSherry Moore recursion: 51619397407SSherry Moore ; 51719397407SSherry Moore } 51819397407SSherry Moore if (l < top_level) { 51919397407SSherry Moore ++l; 52019397407SSherry Moore index = save_index[l]; 52119397407SSherry Moore table = save_table[l]; 52219397407SSherry Moore goto recursion; 52319397407SSherry Moore } 52419397407SSherry Moore } 52519397407SSherry Moore 526ae115bc7Smrj /* 527843e1988Sjohnlev * Add a mapping for the machine page at the given virtual address. 528ae115bc7Smrj */ 529ae115bc7Smrj static void 530843e1988Sjohnlev map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level) 531ae115bc7Smrj { 532ae115bc7Smrj x86pte_t *ptep; 533ae115bc7Smrj x86pte_t pteval; 534ae115bc7Smrj 535843e1988Sjohnlev pteval = ma | pte_bits; 536ae115bc7Smrj if (level > 0) 537ae115bc7Smrj pteval |= PT_PAGESIZE; 538ae115bc7Smrj if (va >= target_kernel_text && pge_support) 539ae115bc7Smrj pteval |= PT_GLOBAL; 540ae115bc7Smrj 541843e1988Sjohnlev if (map_debug && ma != va) 542843e1988Sjohnlev dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64 543ae115bc7Smrj " pte=0x%" PRIx64 " l=%d\n", 544843e1988Sjohnlev (uint64_t)ma, (uint64_t)va, pteval, level); 545843e1988Sjohnlev 546843e1988Sjohnlev #if defined(__xpv) 547843e1988Sjohnlev /* 548843e1988Sjohnlev * see if we can avoid find_pte() on the hypervisor 549843e1988Sjohnlev */ 550843e1988Sjohnlev if (HYPERVISOR_update_va_mapping(va, pteval, 551843e1988Sjohnlev UVMF_INVLPG | UVMF_LOCAL) == 0) 552843e1988Sjohnlev return; 553843e1988Sjohnlev #endif 554ae115bc7Smrj 555ae115bc7Smrj /* 556ae115bc7Smrj * Find the pte that will map this address. This creates any 557ae115bc7Smrj * missing intermediate level page tables 558ae115bc7Smrj */ 559ae115bc7Smrj ptep = find_pte(va, NULL, level, 0); 560ae115bc7Smrj 561ae115bc7Smrj /* 562843e1988Sjohnlev * When paravirtualized, we must use hypervisor calls to modify the 563843e1988Sjohnlev * PTE, since paging is active. On real hardware we just write to 564843e1988Sjohnlev * the pagetables which aren't in use yet. 565ae115bc7Smrj */ 566843e1988Sjohnlev #if defined(__xpv) 567843e1988Sjohnlev ptep = ptep; /* shut lint up */ 568843e1988Sjohnlev if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL)) 569843e1988Sjohnlev dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64 570843e1988Sjohnlev " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "", 571843e1988Sjohnlev (uint64_t)va, level, (uint64_t)ma, pteval); 572843e1988Sjohnlev #else 573ae115bc7Smrj if (va < 1024 * 1024) 574ae115bc7Smrj pteval |= PT_NOCACHE; /* for video RAM */ 575ae115bc7Smrj if (pae_support) 576ae115bc7Smrj *ptep = pteval; 577ae115bc7Smrj else 578ae115bc7Smrj *((x86pte32_t *)ptep) = (x86pte32_t)pteval; 579843e1988Sjohnlev #endif 580ae115bc7Smrj } 581ae115bc7Smrj 582ae115bc7Smrj /* 583843e1988Sjohnlev * Add a mapping for the physical page at the given virtual address. 584ae115bc7Smrj */ 585ae115bc7Smrj static void 586843e1988Sjohnlev map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level) 587ae115bc7Smrj { 588843e1988Sjohnlev map_ma_at_va(pa_to_ma(pa), va, level); 589ae115bc7Smrj } 590ae115bc7Smrj 591ae115bc7Smrj /* 592c9464e8bSjosephb * This is called to remove start..end from the 593c9464e8bSjosephb * possible range of PCI addresses. 594c9464e8bSjosephb */ 595c9464e8bSjosephb const uint64_t pci_lo_limit = 0x00100000ul; 596c9464e8bSjosephb const uint64_t pci_hi_limit = 0xfff00000ul; 597c9464e8bSjosephb static void 598c9464e8bSjosephb exclude_from_pci(uint64_t start, uint64_t end) 599c9464e8bSjosephb { 600c9464e8bSjosephb int i; 601c9464e8bSjosephb int j; 602c9464e8bSjosephb struct boot_memlist *ml; 603c9464e8bSjosephb 604c9464e8bSjosephb for (i = 0; i < pcimemlists_used; ++i) { 605c9464e8bSjosephb ml = &pcimemlists[i]; 606c9464e8bSjosephb 607c9464e8bSjosephb /* delete the entire range? */ 608c9464e8bSjosephb if (start <= ml->addr && ml->addr + ml->size <= end) { 609c9464e8bSjosephb --pcimemlists_used; 610c9464e8bSjosephb for (j = i; j < pcimemlists_used; ++j) 611c9464e8bSjosephb pcimemlists[j] = pcimemlists[j + 1]; 612c9464e8bSjosephb --i; /* to revisit the new one at this index */ 613c9464e8bSjosephb } 614c9464e8bSjosephb 615c9464e8bSjosephb /* split a range? */ 616c9464e8bSjosephb else if (ml->addr < start && end < ml->addr + ml->size) { 617c9464e8bSjosephb 618c9464e8bSjosephb ++pcimemlists_used; 619c9464e8bSjosephb if (pcimemlists_used > MAX_MEMLIST) 620c9464e8bSjosephb dboot_panic("too many pcimemlists"); 621c9464e8bSjosephb 622c9464e8bSjosephb for (j = pcimemlists_used - 1; j > i; --j) 623c9464e8bSjosephb pcimemlists[j] = pcimemlists[j - 1]; 624c9464e8bSjosephb ml->size = start - ml->addr; 625c9464e8bSjosephb 626c9464e8bSjosephb ++ml; 627c9464e8bSjosephb ml->size = (ml->addr + ml->size) - end; 628c9464e8bSjosephb ml->addr = end; 629c9464e8bSjosephb ++i; /* skip on to next one */ 630c9464e8bSjosephb } 631c9464e8bSjosephb 632c9464e8bSjosephb /* cut memory off the start? */ 633c9464e8bSjosephb else if (ml->addr < end && end < ml->addr + ml->size) { 634c9464e8bSjosephb ml->size -= end - ml->addr; 635c9464e8bSjosephb ml->addr = end; 636c9464e8bSjosephb } 637c9464e8bSjosephb 638c9464e8bSjosephb /* cut memory off the end? */ 639c9464e8bSjosephb else if (ml->addr <= start && start < ml->addr + ml->size) { 640c9464e8bSjosephb ml->size = start - ml->addr; 641c9464e8bSjosephb } 642c9464e8bSjosephb } 643c9464e8bSjosephb } 644c9464e8bSjosephb 645c9464e8bSjosephb /* 646d2670fc4SToomas Soome * During memory allocation, find the highest address not used yet. 647843e1988Sjohnlev */ 648d2670fc4SToomas Soome static void 649d2670fc4SToomas Soome check_higher(paddr_t a) 650d2670fc4SToomas Soome { 651d2670fc4SToomas Soome if (a < next_avail_addr) 652d2670fc4SToomas Soome return; 653d2670fc4SToomas Soome next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE); 654d2670fc4SToomas Soome DBG(next_avail_addr); 655d2670fc4SToomas Soome } 656d2670fc4SToomas Soome 657d2670fc4SToomas Soome static int 658d2670fc4SToomas Soome dboot_loader_mmap_entries(void) 659d2670fc4SToomas Soome { 660d2670fc4SToomas Soome #if !defined(__xpv) 661d2670fc4SToomas Soome if (num_entries_set == B_TRUE) 662d2670fc4SToomas Soome return (num_entries); 663d2670fc4SToomas Soome 664d2670fc4SToomas Soome switch (multiboot_version) { 665d2670fc4SToomas Soome case 1: 666d2670fc4SToomas Soome DBG(mb_info->flags); 667d2670fc4SToomas Soome if (mb_info->flags & 0x40) { 668d2670fc4SToomas Soome mb_memory_map_t *mmap; 669d2670fc4SToomas Soome 670d2670fc4SToomas Soome DBG(mb_info->mmap_addr); 671d2670fc4SToomas Soome DBG(mb_info->mmap_length); 672d2670fc4SToomas Soome check_higher(mb_info->mmap_addr + mb_info->mmap_length); 673d2670fc4SToomas Soome 674d2670fc4SToomas Soome for (mmap = (mb_memory_map_t *)mb_info->mmap_addr; 675d2670fc4SToomas Soome (uint32_t)mmap < mb_info->mmap_addr + 676d2670fc4SToomas Soome mb_info->mmap_length; 677d2670fc4SToomas Soome mmap = (mb_memory_map_t *)((uint32_t)mmap + 678d2670fc4SToomas Soome mmap->size + sizeof (mmap->size))) 679d2670fc4SToomas Soome ++num_entries; 680d2670fc4SToomas Soome 681d2670fc4SToomas Soome num_entries_set = B_TRUE; 682d2670fc4SToomas Soome } 683d2670fc4SToomas Soome break; 684d2670fc4SToomas Soome case 2: 685d2670fc4SToomas Soome num_entries_set = B_TRUE; 686d2670fc4SToomas Soome num_entries = dboot_multiboot2_mmap_nentries(mb2_info, 687d2670fc4SToomas Soome mb2_mmap_tagp); 688d2670fc4SToomas Soome break; 689d2670fc4SToomas Soome default: 690d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 691d2670fc4SToomas Soome multiboot_version); 692d2670fc4SToomas Soome break; 693d2670fc4SToomas Soome } 694d2670fc4SToomas Soome return (num_entries); 695843e1988Sjohnlev #else 696d2670fc4SToomas Soome return (MAXMAPS); 697843e1988Sjohnlev #endif 698d2670fc4SToomas Soome } 699d2670fc4SToomas Soome 700d2670fc4SToomas Soome static uint32_t 701d2670fc4SToomas Soome dboot_loader_mmap_get_type(int index) 702d2670fc4SToomas Soome { 703d2670fc4SToomas Soome #if !defined(__xpv) 704d2670fc4SToomas Soome mb_memory_map_t *mp, *mpend; 705d2670fc4SToomas Soome int i; 706d2670fc4SToomas Soome 707d2670fc4SToomas Soome switch (multiboot_version) { 708d2670fc4SToomas Soome case 1: 709d2670fc4SToomas Soome mp = (mb_memory_map_t *)mb_info->mmap_addr; 710d2670fc4SToomas Soome mpend = (mb_memory_map_t *) 711d2670fc4SToomas Soome (mb_info->mmap_addr + mb_info->mmap_length); 712d2670fc4SToomas Soome 713d2670fc4SToomas Soome for (i = 0; mp < mpend && i != index; i++) 714d2670fc4SToomas Soome mp = (mb_memory_map_t *)((uint32_t)mp + mp->size + 715d2670fc4SToomas Soome sizeof (mp->size)); 716d2670fc4SToomas Soome if (mp >= mpend) { 717d2670fc4SToomas Soome dboot_panic("dboot_loader_mmap_get_type(): index " 718d2670fc4SToomas Soome "out of bounds: %d\n", index); 719d2670fc4SToomas Soome } 720d2670fc4SToomas Soome return (mp->type); 721d2670fc4SToomas Soome 722d2670fc4SToomas Soome case 2: 723d2670fc4SToomas Soome return (dboot_multiboot2_mmap_get_type(mb2_info, 724d2670fc4SToomas Soome mb2_mmap_tagp, index)); 725d2670fc4SToomas Soome 726d2670fc4SToomas Soome default: 727d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 728d2670fc4SToomas Soome multiboot_version); 729d2670fc4SToomas Soome break; 730d2670fc4SToomas Soome } 731d2670fc4SToomas Soome return (0); 732d2670fc4SToomas Soome #else 733d2670fc4SToomas Soome return (map_buffer[index].type); 734d2670fc4SToomas Soome #endif 735d2670fc4SToomas Soome } 736d2670fc4SToomas Soome 737d2670fc4SToomas Soome static uint64_t 738d2670fc4SToomas Soome dboot_loader_mmap_get_base(int index) 739d2670fc4SToomas Soome { 740d2670fc4SToomas Soome #if !defined(__xpv) 741d2670fc4SToomas Soome mb_memory_map_t *mp, *mpend; 742d2670fc4SToomas Soome int i; 743d2670fc4SToomas Soome 744d2670fc4SToomas Soome switch (multiboot_version) { 745d2670fc4SToomas Soome case 1: 746d2670fc4SToomas Soome mp = (mb_memory_map_t *)mb_info->mmap_addr; 747d2670fc4SToomas Soome mpend = (mb_memory_map_t *) 748d2670fc4SToomas Soome (mb_info->mmap_addr + mb_info->mmap_length); 749d2670fc4SToomas Soome 750d2670fc4SToomas Soome for (i = 0; mp < mpend && i != index; i++) 751d2670fc4SToomas Soome mp = (mb_memory_map_t *)((uint32_t)mp + mp->size + 752d2670fc4SToomas Soome sizeof (mp->size)); 753d2670fc4SToomas Soome if (mp >= mpend) { 754d2670fc4SToomas Soome dboot_panic("dboot_loader_mmap_get_base(): index " 755d2670fc4SToomas Soome "out of bounds: %d\n", index); 756d2670fc4SToomas Soome } 757d2670fc4SToomas Soome return (((uint64_t)mp->base_addr_high << 32) + 758d2670fc4SToomas Soome (uint64_t)mp->base_addr_low); 759d2670fc4SToomas Soome 760d2670fc4SToomas Soome case 2: 761d2670fc4SToomas Soome return (dboot_multiboot2_mmap_get_base(mb2_info, 762d2670fc4SToomas Soome mb2_mmap_tagp, index)); 763d2670fc4SToomas Soome 764d2670fc4SToomas Soome default: 765d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 766d2670fc4SToomas Soome multiboot_version); 767d2670fc4SToomas Soome break; 768d2670fc4SToomas Soome } 769d2670fc4SToomas Soome return (0); 770d2670fc4SToomas Soome #else 771d2670fc4SToomas Soome return (((uint64_t)map_buffer[index].base_addr_high << 32) + 772d2670fc4SToomas Soome (uint64_t)map_buffer[index].base_addr_low); 773d2670fc4SToomas Soome #endif 774d2670fc4SToomas Soome } 775d2670fc4SToomas Soome 776d2670fc4SToomas Soome static uint64_t 777d2670fc4SToomas Soome dboot_loader_mmap_get_length(int index) 778d2670fc4SToomas Soome { 779d2670fc4SToomas Soome #if !defined(__xpv) 780d2670fc4SToomas Soome mb_memory_map_t *mp, *mpend; 781d2670fc4SToomas Soome int i; 782d2670fc4SToomas Soome 783d2670fc4SToomas Soome switch (multiboot_version) { 784d2670fc4SToomas Soome case 1: 785d2670fc4SToomas Soome mp = (mb_memory_map_t *)mb_info->mmap_addr; 786d2670fc4SToomas Soome mpend = (mb_memory_map_t *) 787d2670fc4SToomas Soome (mb_info->mmap_addr + mb_info->mmap_length); 788d2670fc4SToomas Soome 789d2670fc4SToomas Soome for (i = 0; mp < mpend && i != index; i++) 790d2670fc4SToomas Soome mp = (mb_memory_map_t *)((uint32_t)mp + mp->size + 791d2670fc4SToomas Soome sizeof (mp->size)); 792d2670fc4SToomas Soome if (mp >= mpend) { 793d2670fc4SToomas Soome dboot_panic("dboot_loader_mmap_get_length(): index " 794d2670fc4SToomas Soome "out of bounds: %d\n", index); 795d2670fc4SToomas Soome } 796d2670fc4SToomas Soome return (((uint64_t)mp->length_high << 32) + 797d2670fc4SToomas Soome (uint64_t)mp->length_low); 798d2670fc4SToomas Soome 799d2670fc4SToomas Soome case 2: 800d2670fc4SToomas Soome return (dboot_multiboot2_mmap_get_length(mb2_info, 801d2670fc4SToomas Soome mb2_mmap_tagp, index)); 802d2670fc4SToomas Soome 803d2670fc4SToomas Soome default: 804d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 805d2670fc4SToomas Soome multiboot_version); 806d2670fc4SToomas Soome break; 807d2670fc4SToomas Soome } 808d2670fc4SToomas Soome return (0); 809d2670fc4SToomas Soome #else 810d2670fc4SToomas Soome return (((uint64_t)map_buffer[index].length_high << 32) + 811d2670fc4SToomas Soome (uint64_t)map_buffer[index].length_low); 812d2670fc4SToomas Soome #endif 813d2670fc4SToomas Soome } 814843e1988Sjohnlev 815843e1988Sjohnlev static void 816d2670fc4SToomas Soome build_pcimemlists(void) 817843e1988Sjohnlev { 818843e1988Sjohnlev uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 819843e1988Sjohnlev uint64_t start; 820843e1988Sjohnlev uint64_t end; 821d2670fc4SToomas Soome int i, num; 822843e1988Sjohnlev 823843e1988Sjohnlev /* 824843e1988Sjohnlev * initialize 825843e1988Sjohnlev */ 826843e1988Sjohnlev pcimemlists[0].addr = pci_lo_limit; 827843e1988Sjohnlev pcimemlists[0].size = pci_hi_limit - pci_lo_limit; 828843e1988Sjohnlev pcimemlists_used = 1; 829843e1988Sjohnlev 830d2670fc4SToomas Soome num = dboot_loader_mmap_entries(); 831843e1988Sjohnlev /* 832843e1988Sjohnlev * Fill in PCI memlists. 833843e1988Sjohnlev */ 834d2670fc4SToomas Soome for (i = 0; i < num; ++i) { 835d2670fc4SToomas Soome start = dboot_loader_mmap_get_base(i); 836d2670fc4SToomas Soome end = start + dboot_loader_mmap_get_length(i); 837843e1988Sjohnlev 838843e1988Sjohnlev if (prom_debug) 839843e1988Sjohnlev dboot_printf("\ttype: %d %" PRIx64 "..%" 840d2670fc4SToomas Soome PRIx64 "\n", dboot_loader_mmap_get_type(i), 841d2670fc4SToomas Soome start, end); 842843e1988Sjohnlev 843843e1988Sjohnlev /* 844843e1988Sjohnlev * page align start and end 845843e1988Sjohnlev */ 846843e1988Sjohnlev start = (start + page_offset) & ~page_offset; 847843e1988Sjohnlev end &= ~page_offset; 848843e1988Sjohnlev if (end <= start) 849843e1988Sjohnlev continue; 850843e1988Sjohnlev 851843e1988Sjohnlev exclude_from_pci(start, end); 852843e1988Sjohnlev } 853843e1988Sjohnlev 854843e1988Sjohnlev /* 855843e1988Sjohnlev * Finish off the pcimemlist 856843e1988Sjohnlev */ 857843e1988Sjohnlev if (prom_debug) { 858843e1988Sjohnlev for (i = 0; i < pcimemlists_used; ++i) { 859843e1988Sjohnlev dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%" 860843e1988Sjohnlev PRIx64 "\n", pcimemlists[i].addr, 861843e1988Sjohnlev pcimemlists[i].addr + pcimemlists[i].size); 862843e1988Sjohnlev } 863843e1988Sjohnlev } 864843e1988Sjohnlev pcimemlists[0].next = 0; 865843e1988Sjohnlev pcimemlists[0].prev = 0; 866843e1988Sjohnlev for (i = 1; i < pcimemlists_used; ++i) { 867843e1988Sjohnlev pcimemlists[i].prev = 868843e1988Sjohnlev (native_ptr_t)(uintptr_t)(pcimemlists + i - 1); 869843e1988Sjohnlev pcimemlists[i].next = 0; 870843e1988Sjohnlev pcimemlists[i - 1].next = 871843e1988Sjohnlev (native_ptr_t)(uintptr_t)(pcimemlists + i); 872843e1988Sjohnlev } 873c909a41bSRichard Lowe bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists; 874843e1988Sjohnlev DBG(bi->bi_pcimem); 875843e1988Sjohnlev } 876843e1988Sjohnlev 877843e1988Sjohnlev #if defined(__xpv) 878843e1988Sjohnlev /* 879843e1988Sjohnlev * Initialize memory allocator stuff from hypervisor-supplied start info. 880843e1988Sjohnlev */ 881843e1988Sjohnlev static void 882843e1988Sjohnlev init_mem_alloc(void) 883843e1988Sjohnlev { 884843e1988Sjohnlev int local; /* variables needed to find start region */ 885843e1988Sjohnlev paddr_t scratch_start; 886843e1988Sjohnlev xen_memory_map_t map; 887843e1988Sjohnlev 888843e1988Sjohnlev DBG_MSG("Entered init_mem_alloc()\n"); 889843e1988Sjohnlev 890843e1988Sjohnlev /* 891843e1988Sjohnlev * Free memory follows the stack. There's at least 512KB of scratch 892843e1988Sjohnlev * space, rounded up to at least 2Mb alignment. That should be enough 893843e1988Sjohnlev * for the page tables we'll need to build. The nucleus memory is 894843e1988Sjohnlev * allocated last and will be outside the addressible range. We'll 895843e1988Sjohnlev * switch to new page tables before we unpack the kernel 896843e1988Sjohnlev */ 897843e1988Sjohnlev scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE); 898843e1988Sjohnlev DBG(scratch_start); 899843e1988Sjohnlev scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG); 900843e1988Sjohnlev DBG(scratch_end); 901843e1988Sjohnlev 902843e1988Sjohnlev /* 903843e1988Sjohnlev * For paranoia, leave some space between hypervisor data and ours. 904843e1988Sjohnlev * Use 500 instead of 512. 905843e1988Sjohnlev */ 906843e1988Sjohnlev next_avail_addr = scratch_end - 500 * 1024; 907843e1988Sjohnlev DBG(next_avail_addr); 908843e1988Sjohnlev 909843e1988Sjohnlev /* 910843e1988Sjohnlev * The domain builder gives us at most 1 module 911843e1988Sjohnlev */ 912843e1988Sjohnlev DBG(xen_info->mod_len); 913843e1988Sjohnlev if (xen_info->mod_len > 0) { 914843e1988Sjohnlev DBG(xen_info->mod_start); 915843e1988Sjohnlev modules[0].bm_addr = xen_info->mod_start; 916843e1988Sjohnlev modules[0].bm_size = xen_info->mod_len; 917843e1988Sjohnlev bi->bi_module_cnt = 1; 918843e1988Sjohnlev bi->bi_modules = (native_ptr_t)modules; 919843e1988Sjohnlev } else { 920843e1988Sjohnlev bi->bi_module_cnt = 0; 921843e1988Sjohnlev bi->bi_modules = NULL; 922843e1988Sjohnlev } 923843e1988Sjohnlev DBG(bi->bi_module_cnt); 924843e1988Sjohnlev DBG(bi->bi_modules); 925843e1988Sjohnlev 926843e1988Sjohnlev DBG(xen_info->mfn_list); 927843e1988Sjohnlev DBG(xen_info->nr_pages); 928843e1988Sjohnlev max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT; 929843e1988Sjohnlev DBG(max_mem); 930843e1988Sjohnlev 931843e1988Sjohnlev /* 932843e1988Sjohnlev * Using pseudo-physical addresses, so only 1 memlist element 933843e1988Sjohnlev */ 934843e1988Sjohnlev memlists[0].addr = 0; 935843e1988Sjohnlev DBG(memlists[0].addr); 936843e1988Sjohnlev memlists[0].size = max_mem; 937843e1988Sjohnlev DBG(memlists[0].size); 938843e1988Sjohnlev memlists_used = 1; 939843e1988Sjohnlev DBG(memlists_used); 940843e1988Sjohnlev 941843e1988Sjohnlev /* 942843e1988Sjohnlev * finish building physinstall list 943843e1988Sjohnlev */ 944843e1988Sjohnlev sort_physinstall(); 945843e1988Sjohnlev 9461de082f7SVikram Hegde /* 9471de082f7SVikram Hegde * build bios reserved memlists 9481de082f7SVikram Hegde */ 9491de082f7SVikram Hegde build_rsvdmemlists(); 9501de082f7SVikram Hegde 951843e1988Sjohnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) { 952843e1988Sjohnlev /* 953843e1988Sjohnlev * build PCI Memory list 954843e1988Sjohnlev */ 955843e1988Sjohnlev map.nr_entries = MAXMAPS; 956843e1988Sjohnlev /*LINTED: constant in conditional context*/ 957843e1988Sjohnlev set_xen_guest_handle(map.buffer, map_buffer); 958843e1988Sjohnlev if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0) 959843e1988Sjohnlev dboot_panic("getting XENMEM_machine_memory_map failed"); 960d2670fc4SToomas Soome build_pcimemlists(); 961843e1988Sjohnlev } 962843e1988Sjohnlev } 963843e1988Sjohnlev 964843e1988Sjohnlev #else /* !__xpv */ 965843e1988Sjohnlev 966d2670fc4SToomas Soome static void 967d2670fc4SToomas Soome dboot_multiboot1_xboot_consinfo(void) 968d2670fc4SToomas Soome { 969d2670fc4SToomas Soome } 970d2670fc4SToomas Soome 971d2670fc4SToomas Soome static void 972d2670fc4SToomas Soome dboot_multiboot2_xboot_consinfo(void) 973d2670fc4SToomas Soome { 974d2670fc4SToomas Soome } 975d2670fc4SToomas Soome 976d2670fc4SToomas Soome static int 977d2670fc4SToomas Soome dboot_multiboot_modcount(void) 978d2670fc4SToomas Soome { 979d2670fc4SToomas Soome switch (multiboot_version) { 980d2670fc4SToomas Soome case 1: 981d2670fc4SToomas Soome return (mb_info->mods_count); 982d2670fc4SToomas Soome 983d2670fc4SToomas Soome case 2: 984d2670fc4SToomas Soome return (dboot_multiboot2_modcount(mb2_info)); 985d2670fc4SToomas Soome 986d2670fc4SToomas Soome default: 987d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 988d2670fc4SToomas Soome multiboot_version); 989d2670fc4SToomas Soome break; 990d2670fc4SToomas Soome } 991d2670fc4SToomas Soome return (0); 992d2670fc4SToomas Soome } 993d2670fc4SToomas Soome 994d2670fc4SToomas Soome static uint32_t 995d2670fc4SToomas Soome dboot_multiboot_modstart(int index) 996d2670fc4SToomas Soome { 997d2670fc4SToomas Soome switch (multiboot_version) { 998d2670fc4SToomas Soome case 1: 999d2670fc4SToomas Soome return (((mb_module_t *)mb_info->mods_addr)[index].mod_start); 1000d2670fc4SToomas Soome 1001d2670fc4SToomas Soome case 2: 1002d2670fc4SToomas Soome return (dboot_multiboot2_modstart(mb2_info, index)); 1003d2670fc4SToomas Soome 1004d2670fc4SToomas Soome default: 1005d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 1006d2670fc4SToomas Soome multiboot_version); 1007d2670fc4SToomas Soome break; 1008d2670fc4SToomas Soome } 1009d2670fc4SToomas Soome return (0); 1010d2670fc4SToomas Soome } 1011d2670fc4SToomas Soome 1012d2670fc4SToomas Soome static uint32_t 1013d2670fc4SToomas Soome dboot_multiboot_modend(int index) 1014d2670fc4SToomas Soome { 1015d2670fc4SToomas Soome switch (multiboot_version) { 1016d2670fc4SToomas Soome case 1: 1017d2670fc4SToomas Soome return (((mb_module_t *)mb_info->mods_addr)[index].mod_end); 1018d2670fc4SToomas Soome 1019d2670fc4SToomas Soome case 2: 1020d2670fc4SToomas Soome return (dboot_multiboot2_modend(mb2_info, index)); 1021d2670fc4SToomas Soome 1022d2670fc4SToomas Soome default: 1023d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 1024d2670fc4SToomas Soome multiboot_version); 1025d2670fc4SToomas Soome break; 1026d2670fc4SToomas Soome } 1027d2670fc4SToomas Soome return (0); 1028d2670fc4SToomas Soome } 1029d2670fc4SToomas Soome 1030d2670fc4SToomas Soome static char * 1031d2670fc4SToomas Soome dboot_multiboot_modcmdline(int index) 1032d2670fc4SToomas Soome { 1033d2670fc4SToomas Soome switch (multiboot_version) { 1034d2670fc4SToomas Soome case 1: 1035d2670fc4SToomas Soome return ((char *)((mb_module_t *) 1036d2670fc4SToomas Soome mb_info->mods_addr)[index].mod_name); 1037d2670fc4SToomas Soome 1038d2670fc4SToomas Soome case 2: 1039d2670fc4SToomas Soome return (dboot_multiboot2_modcmdline(mb2_info, index)); 1040d2670fc4SToomas Soome 1041d2670fc4SToomas Soome default: 1042d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 1043d2670fc4SToomas Soome multiboot_version); 1044d2670fc4SToomas Soome break; 1045d2670fc4SToomas Soome } 1046d2670fc4SToomas Soome return (0); 1047d2670fc4SToomas Soome } 1048d2670fc4SToomas Soome 1049*b9a86732SToomas Soome /* 1050*b9a86732SToomas Soome * Find the environment module for console setup. 1051*b9a86732SToomas Soome * Since we need the console to print early boot messages, the console is set up 1052*b9a86732SToomas Soome * before anything else and therefore we need to pick up the environment module 1053*b9a86732SToomas Soome * early too. 1054*b9a86732SToomas Soome * 1055*b9a86732SToomas Soome * Note, we just will search for and if found, will pass the env 1056*b9a86732SToomas Soome * module to console setup, the proper module list processing will happen later. 1057*b9a86732SToomas Soome */ 1058*b9a86732SToomas Soome static void 1059*b9a86732SToomas Soome dboot_find_env(void) 1060*b9a86732SToomas Soome { 1061*b9a86732SToomas Soome int i, modcount; 1062*b9a86732SToomas Soome uint32_t mod_start, mod_end; 1063*b9a86732SToomas Soome char *cmdline; 1064*b9a86732SToomas Soome 1065*b9a86732SToomas Soome modcount = dboot_multiboot_modcount(); 1066*b9a86732SToomas Soome 1067*b9a86732SToomas Soome for (i = 0; i < modcount; ++i) { 1068*b9a86732SToomas Soome cmdline = dboot_multiboot_modcmdline(i); 1069*b9a86732SToomas Soome if (cmdline == NULL) 1070*b9a86732SToomas Soome continue; 1071*b9a86732SToomas Soome 1072*b9a86732SToomas Soome if (strstr(cmdline, "type=environment") == NULL) 1073*b9a86732SToomas Soome continue; 1074*b9a86732SToomas Soome 1075*b9a86732SToomas Soome mod_start = dboot_multiboot_modstart(i); 1076*b9a86732SToomas Soome mod_end = dboot_multiboot_modend(i); 1077*b9a86732SToomas Soome modules[0].bm_addr = mod_start; 1078*b9a86732SToomas Soome modules[0].bm_size = mod_end - mod_start; 1079*b9a86732SToomas Soome modules[0].bm_name = NULL; 1080*b9a86732SToomas Soome modules[0].bm_hash = NULL; 1081*b9a86732SToomas Soome modules[0].bm_type = BMT_ENV; 1082*b9a86732SToomas Soome bi->bi_modules = (native_ptr_t)(uintptr_t)modules; 1083*b9a86732SToomas Soome bi->bi_module_cnt = 1; 1084*b9a86732SToomas Soome return; 1085*b9a86732SToomas Soome } 1086*b9a86732SToomas Soome } 1087*b9a86732SToomas Soome 1088d2670fc4SToomas Soome static boolean_t 1089d2670fc4SToomas Soome dboot_multiboot_basicmeminfo(uint32_t *lower, uint32_t *upper) 1090d2670fc4SToomas Soome { 1091d2670fc4SToomas Soome boolean_t rv = B_FALSE; 1092d2670fc4SToomas Soome 1093d2670fc4SToomas Soome switch (multiboot_version) { 1094d2670fc4SToomas Soome case 1: 1095d2670fc4SToomas Soome if (mb_info->flags & 0x01) { 1096d2670fc4SToomas Soome *lower = mb_info->mem_lower; 1097d2670fc4SToomas Soome *upper = mb_info->mem_upper; 1098d2670fc4SToomas Soome rv = B_TRUE; 1099d2670fc4SToomas Soome } 1100d2670fc4SToomas Soome break; 1101d2670fc4SToomas Soome 1102d2670fc4SToomas Soome case 2: 1103d2670fc4SToomas Soome return (dboot_multiboot2_basicmeminfo(mb2_info, lower, upper)); 1104d2670fc4SToomas Soome 1105d2670fc4SToomas Soome default: 1106d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 1107d2670fc4SToomas Soome multiboot_version); 1108d2670fc4SToomas Soome break; 1109d2670fc4SToomas Soome } 1110d2670fc4SToomas Soome return (rv); 1111d2670fc4SToomas Soome } 1112d2670fc4SToomas Soome 1113e65d07eeSKeith Wesolowski static uint8_t 1114e65d07eeSKeith Wesolowski dboot_a2h(char v) 1115e65d07eeSKeith Wesolowski { 1116e65d07eeSKeith Wesolowski if (v >= 'a') 1117e65d07eeSKeith Wesolowski return (v - 'a' + 0xa); 1118e65d07eeSKeith Wesolowski else if (v >= 'A') 1119e65d07eeSKeith Wesolowski return (v - 'A' + 0xa); 1120e65d07eeSKeith Wesolowski else if (v >= '0') 1121e65d07eeSKeith Wesolowski return (v - '0'); 1122e65d07eeSKeith Wesolowski else 1123e65d07eeSKeith Wesolowski dboot_panic("bad ASCII hex character %c\n", v); 1124e65d07eeSKeith Wesolowski 1125e65d07eeSKeith Wesolowski return (0); 1126e65d07eeSKeith Wesolowski } 1127e65d07eeSKeith Wesolowski 1128e65d07eeSKeith Wesolowski static void 1129e65d07eeSKeith Wesolowski digest_a2h(const char *ascii, uint8_t *digest) 1130e65d07eeSKeith Wesolowski { 1131e65d07eeSKeith Wesolowski unsigned int i; 1132e65d07eeSKeith Wesolowski 1133e65d07eeSKeith Wesolowski for (i = 0; i < SHA1_DIGEST_LENGTH; i++) { 1134e65d07eeSKeith Wesolowski digest[i] = dboot_a2h(ascii[i * 2]) << 4; 1135e65d07eeSKeith Wesolowski digest[i] |= dboot_a2h(ascii[i * 2 + 1]); 1136e65d07eeSKeith Wesolowski } 1137e65d07eeSKeith Wesolowski } 1138e65d07eeSKeith Wesolowski 1139e65d07eeSKeith Wesolowski /* 1140e65d07eeSKeith Wesolowski * Generate a SHA-1 hash of the first len bytes of image, and compare it with 1141e65d07eeSKeith Wesolowski * the ASCII-format hash found in the 40-byte buffer at ascii. If they 1142e65d07eeSKeith Wesolowski * match, return 0, otherwise -1. This works only for images smaller than 1143e65d07eeSKeith Wesolowski * 4 GB, which should not be a problem. 1144e65d07eeSKeith Wesolowski */ 1145e65d07eeSKeith Wesolowski static int 11460181461bSKeith M Wesolowski check_image_hash(uint_t midx) 1147e65d07eeSKeith Wesolowski { 11480181461bSKeith M Wesolowski const char *ascii; 11490181461bSKeith M Wesolowski const void *image; 11500181461bSKeith M Wesolowski size_t len; 1151e65d07eeSKeith Wesolowski SHA1_CTX ctx; 1152e65d07eeSKeith Wesolowski uint8_t digest[SHA1_DIGEST_LENGTH]; 1153e65d07eeSKeith Wesolowski uint8_t baseline[SHA1_DIGEST_LENGTH]; 1154e65d07eeSKeith Wesolowski unsigned int i; 1155e65d07eeSKeith Wesolowski 11560181461bSKeith M Wesolowski ascii = (const char *)(uintptr_t)modules[midx].bm_hash; 11570181461bSKeith M Wesolowski image = (const void *)(uintptr_t)modules[midx].bm_addr; 11580181461bSKeith M Wesolowski len = (size_t)modules[midx].bm_size; 11590181461bSKeith M Wesolowski 1160e65d07eeSKeith Wesolowski digest_a2h(ascii, baseline); 1161e65d07eeSKeith Wesolowski 1162e65d07eeSKeith Wesolowski SHA1Init(&ctx); 1163e65d07eeSKeith Wesolowski SHA1Update(&ctx, image, len); 1164e65d07eeSKeith Wesolowski SHA1Final(digest, &ctx); 1165e65d07eeSKeith Wesolowski 1166e65d07eeSKeith Wesolowski for (i = 0; i < SHA1_DIGEST_LENGTH; i++) { 1167e65d07eeSKeith Wesolowski if (digest[i] != baseline[i]) 1168e65d07eeSKeith Wesolowski return (-1); 1169e65d07eeSKeith Wesolowski } 1170e65d07eeSKeith Wesolowski 1171e65d07eeSKeith Wesolowski return (0); 1172e65d07eeSKeith Wesolowski } 1173e65d07eeSKeith Wesolowski 11740181461bSKeith M Wesolowski static const char * 11750181461bSKeith M Wesolowski type_to_str(boot_module_type_t type) 11760181461bSKeith M Wesolowski { 11770181461bSKeith M Wesolowski switch (type) { 11780181461bSKeith M Wesolowski case BMT_ROOTFS: 11790181461bSKeith M Wesolowski return ("rootfs"); 11800181461bSKeith M Wesolowski case BMT_FILE: 11810181461bSKeith M Wesolowski return ("file"); 11820181461bSKeith M Wesolowski case BMT_HASH: 11830181461bSKeith M Wesolowski return ("hash"); 1184*b9a86732SToomas Soome case BMT_ENV: 1185*b9a86732SToomas Soome return ("environment"); 11860181461bSKeith M Wesolowski default: 11870181461bSKeith M Wesolowski return ("unknown"); 11880181461bSKeith M Wesolowski } 11890181461bSKeith M Wesolowski } 11900181461bSKeith M Wesolowski 1191e65d07eeSKeith Wesolowski static void 1192e65d07eeSKeith Wesolowski check_images(void) 1193e65d07eeSKeith Wesolowski { 11940181461bSKeith M Wesolowski uint_t i; 1195e65d07eeSKeith Wesolowski char displayhash[SHA1_ASCII_LENGTH + 1]; 11960181461bSKeith M Wesolowski 11970181461bSKeith M Wesolowski for (i = 0; i < modules_used; i++) { 11980181461bSKeith M Wesolowski if (prom_debug) { 11990181461bSKeith M Wesolowski dboot_printf("module #%d: name %s type %s " 12000181461bSKeith M Wesolowski "addr %lx size %lx\n", 12010181461bSKeith M Wesolowski i, (char *)(uintptr_t)modules[i].bm_name, 12020181461bSKeith M Wesolowski type_to_str(modules[i].bm_type), 12030181461bSKeith M Wesolowski (ulong_t)modules[i].bm_addr, 12040181461bSKeith M Wesolowski (ulong_t)modules[i].bm_size); 12050181461bSKeith M Wesolowski } 12060181461bSKeith M Wesolowski 12070181461bSKeith M Wesolowski if (modules[i].bm_type == BMT_HASH || 12080181461bSKeith M Wesolowski modules[i].bm_hash == NULL) { 12090181461bSKeith M Wesolowski DBG_MSG("module has no hash; skipping check\n"); 12100181461bSKeith M Wesolowski continue; 12110181461bSKeith M Wesolowski } 12120181461bSKeith M Wesolowski (void) memcpy(displayhash, 12130181461bSKeith M Wesolowski (void *)(uintptr_t)modules[i].bm_hash, 12140181461bSKeith M Wesolowski SHA1_ASCII_LENGTH); 12150181461bSKeith M Wesolowski displayhash[SHA1_ASCII_LENGTH] = '\0'; 12160181461bSKeith M Wesolowski if (prom_debug) { 12170181461bSKeith M Wesolowski dboot_printf("checking expected hash [%s]: ", 12180181461bSKeith M Wesolowski displayhash); 12190181461bSKeith M Wesolowski } 12200181461bSKeith M Wesolowski 12210181461bSKeith M Wesolowski if (check_image_hash(i) != 0) 12220181461bSKeith M Wesolowski dboot_panic("hash mismatch!\n"); 12230181461bSKeith M Wesolowski else 12240181461bSKeith M Wesolowski DBG_MSG("OK\n"); 12250181461bSKeith M Wesolowski } 12260181461bSKeith M Wesolowski } 12270181461bSKeith M Wesolowski 12280181461bSKeith M Wesolowski /* 12290181461bSKeith M Wesolowski * Determine the module's starting address, size, name, and type, and fill the 12300181461bSKeith M Wesolowski * boot_modules structure. This structure is used by the bop code, except for 12310181461bSKeith M Wesolowski * hashes which are checked prior to transferring control to the kernel. 12320181461bSKeith M Wesolowski */ 12330181461bSKeith M Wesolowski static void 1234d2670fc4SToomas Soome process_module(int midx) 12350181461bSKeith M Wesolowski { 1236d2670fc4SToomas Soome uint32_t mod_start = dboot_multiboot_modstart(midx); 1237d2670fc4SToomas Soome uint32_t mod_end = dboot_multiboot_modend(midx); 1238d2670fc4SToomas Soome char *cmdline = dboot_multiboot_modcmdline(midx); 12390181461bSKeith M Wesolowski char *p, *q; 12400181461bSKeith M Wesolowski 1241d2670fc4SToomas Soome check_higher(mod_end); 12420181461bSKeith M Wesolowski if (prom_debug) { 12430181461bSKeith M Wesolowski dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n", 1244d2670fc4SToomas Soome midx, cmdline, (ulong_t)mod_start, (ulong_t)mod_end); 12450181461bSKeith M Wesolowski } 12460181461bSKeith M Wesolowski 1247d2670fc4SToomas Soome if (mod_start > mod_end) { 12480181461bSKeith M Wesolowski dboot_panic("module #%d: module start address 0x%lx greater " 12490181461bSKeith M Wesolowski "than end address 0x%lx", midx, 1250d2670fc4SToomas Soome (ulong_t)mod_start, (ulong_t)mod_end); 12510181461bSKeith M Wesolowski } 1252e65d07eeSKeith Wesolowski 1253e65d07eeSKeith Wesolowski /* 1254e65d07eeSKeith Wesolowski * A brief note on lengths and sizes: GRUB, for reasons unknown, passes 1255e65d07eeSKeith Wesolowski * the address of the last valid byte in a module plus 1 as mod_end. 1256e65d07eeSKeith Wesolowski * This is of course a bug; the multiboot specification simply states 1257e65d07eeSKeith Wesolowski * that mod_start and mod_end "contain the start and end addresses of 1258e65d07eeSKeith Wesolowski * the boot module itself" which is pretty obviously not what GRUB is 1259e65d07eeSKeith Wesolowski * doing. However, fixing it requires that not only this code be 1260e65d07eeSKeith Wesolowski * changed but also that other code consuming this value and values 1261e65d07eeSKeith Wesolowski * derived from it be fixed, and that the kernel and GRUB must either 1262e65d07eeSKeith Wesolowski * both have the bug or neither. While there are a lot of combinations 1263e65d07eeSKeith Wesolowski * that will work, there are also some that won't, so for simplicity 1264e65d07eeSKeith Wesolowski * we'll just cope with the bug. That means we won't actually hash the 1265e65d07eeSKeith Wesolowski * byte at mod_end, and we will expect that mod_end for the hash file 1266e65d07eeSKeith Wesolowski * itself is one greater than some multiple of 41 (40 bytes of ASCII 12670181461bSKeith M Wesolowski * hash plus a newline for each module). We set bm_size to the true 12680181461bSKeith M Wesolowski * correct number of bytes in each module, achieving exactly this. 1269e65d07eeSKeith Wesolowski */ 1270e65d07eeSKeith Wesolowski 1271d2670fc4SToomas Soome modules[midx].bm_addr = mod_start; 1272d2670fc4SToomas Soome modules[midx].bm_size = mod_end - mod_start; 1273d2670fc4SToomas Soome modules[midx].bm_name = (native_ptr_t)(uintptr_t)cmdline; 12740181461bSKeith M Wesolowski modules[midx].bm_hash = NULL; 12750181461bSKeith M Wesolowski modules[midx].bm_type = BMT_FILE; 12760181461bSKeith M Wesolowski 1277d2670fc4SToomas Soome if (cmdline == NULL) { 12780181461bSKeith M Wesolowski modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname; 1279e65d07eeSKeith Wesolowski return; 1280e65d07eeSKeith Wesolowski } 1281e65d07eeSKeith Wesolowski 1282d2670fc4SToomas Soome p = cmdline; 12830181461bSKeith M Wesolowski modules[midx].bm_name = 12840181461bSKeith M Wesolowski (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r"); 12850181461bSKeith M Wesolowski 12860181461bSKeith M Wesolowski while (p != NULL) { 12870181461bSKeith M Wesolowski q = strsep(&p, " \t\f\n\r"); 12880181461bSKeith M Wesolowski if (strncmp(q, "name=", 5) == 0) { 12890181461bSKeith M Wesolowski if (q[5] != '\0' && !isspace(q[5])) { 12900181461bSKeith M Wesolowski modules[midx].bm_name = 12910181461bSKeith M Wesolowski (native_ptr_t)(uintptr_t)(q + 5); 12920181461bSKeith M Wesolowski } 12930181461bSKeith M Wesolowski continue; 12940181461bSKeith M Wesolowski } 12950181461bSKeith M Wesolowski 12960181461bSKeith M Wesolowski if (strncmp(q, "type=", 5) == 0) { 12970181461bSKeith M Wesolowski if (q[5] == '\0' || isspace(q[5])) 12980181461bSKeith M Wesolowski continue; 12990181461bSKeith M Wesolowski q += 5; 13000181461bSKeith M Wesolowski if (strcmp(q, "rootfs") == 0) { 13010181461bSKeith M Wesolowski modules[midx].bm_type = BMT_ROOTFS; 13020181461bSKeith M Wesolowski } else if (strcmp(q, "hash") == 0) { 13030181461bSKeith M Wesolowski modules[midx].bm_type = BMT_HASH; 1304*b9a86732SToomas Soome } else if (strcmp(q, "environment") == 0) { 1305*b9a86732SToomas Soome modules[midx].bm_type = BMT_ENV; 13060181461bSKeith M Wesolowski } else if (strcmp(q, "file") != 0) { 13070181461bSKeith M Wesolowski dboot_printf("\tmodule #%d: unknown module " 13080181461bSKeith M Wesolowski "type '%s'; defaulting to 'file'", 13090181461bSKeith M Wesolowski midx, q); 13100181461bSKeith M Wesolowski } 13110181461bSKeith M Wesolowski continue; 13120181461bSKeith M Wesolowski } 13130181461bSKeith M Wesolowski 13140181461bSKeith M Wesolowski if (strncmp(q, "hash=", 5) == 0) { 13150181461bSKeith M Wesolowski if (q[5] != '\0' && !isspace(q[5])) { 13160181461bSKeith M Wesolowski modules[midx].bm_hash = 13170181461bSKeith M Wesolowski (native_ptr_t)(uintptr_t)(q + 5); 13180181461bSKeith M Wesolowski } 13190181461bSKeith M Wesolowski continue; 13200181461bSKeith M Wesolowski } 13210181461bSKeith M Wesolowski 13220181461bSKeith M Wesolowski dboot_printf("ignoring unknown option '%s'\n", q); 13230181461bSKeith M Wesolowski } 13240181461bSKeith M Wesolowski } 13250181461bSKeith M Wesolowski 13260181461bSKeith M Wesolowski /* 13270181461bSKeith M Wesolowski * Backward compatibility: if there are exactly one or two modules, both 13280181461bSKeith M Wesolowski * of type 'file' and neither with an embedded hash value, we have been 13290181461bSKeith M Wesolowski * given the legacy style modules. In this case we need to treat the first 13300181461bSKeith M Wesolowski * module as a rootfs and the second as a hash referencing that module. 13310181461bSKeith M Wesolowski * Otherwise, even if the configuration is invalid, we assume that the 13320181461bSKeith M Wesolowski * operator knows what he's doing or at least isn't being bitten by this 13330181461bSKeith M Wesolowski * interface change. 13340181461bSKeith M Wesolowski */ 13350181461bSKeith M Wesolowski static void 13360181461bSKeith M Wesolowski fixup_modules(void) 13370181461bSKeith M Wesolowski { 13380181461bSKeith M Wesolowski if (modules_used == 0 || modules_used > 2) 13390181461bSKeith M Wesolowski return; 13400181461bSKeith M Wesolowski 13410181461bSKeith M Wesolowski if (modules[0].bm_type != BMT_FILE || 13420181461bSKeith M Wesolowski modules_used > 1 && modules[1].bm_type != BMT_FILE) { 13430181461bSKeith M Wesolowski return; 13440181461bSKeith M Wesolowski } 13450181461bSKeith M Wesolowski 13460181461bSKeith M Wesolowski if (modules[0].bm_hash != NULL || 13470181461bSKeith M Wesolowski modules_used > 1 && modules[1].bm_hash != NULL) { 13480181461bSKeith M Wesolowski return; 13490181461bSKeith M Wesolowski } 13500181461bSKeith M Wesolowski 13510181461bSKeith M Wesolowski modules[0].bm_type = BMT_ROOTFS; 13520181461bSKeith M Wesolowski if (modules_used > 1) { 13530181461bSKeith M Wesolowski modules[1].bm_type = BMT_HASH; 13540181461bSKeith M Wesolowski modules[1].bm_name = modules[0].bm_name; 13550181461bSKeith M Wesolowski } 13560181461bSKeith M Wesolowski } 13570181461bSKeith M Wesolowski 13580181461bSKeith M Wesolowski /* 13590181461bSKeith M Wesolowski * For modules that do not have assigned hashes but have a separate hash module, 13600181461bSKeith M Wesolowski * find the assigned hash module and set the primary module's bm_hash to point 13610181461bSKeith M Wesolowski * to the hash data from that module. We will then ignore modules of type 13620181461bSKeith M Wesolowski * BMT_HASH from this point forward. 13630181461bSKeith M Wesolowski */ 13640181461bSKeith M Wesolowski static void 13650181461bSKeith M Wesolowski assign_module_hashes(void) 13660181461bSKeith M Wesolowski { 13670181461bSKeith M Wesolowski uint_t i, j; 13680181461bSKeith M Wesolowski 13690181461bSKeith M Wesolowski for (i = 0; i < modules_used; i++) { 13700181461bSKeith M Wesolowski if (modules[i].bm_type == BMT_HASH || 13710181461bSKeith M Wesolowski modules[i].bm_hash != NULL) { 13720181461bSKeith M Wesolowski continue; 13730181461bSKeith M Wesolowski } 13740181461bSKeith M Wesolowski 13750181461bSKeith M Wesolowski for (j = 0; j < modules_used; j++) { 13760181461bSKeith M Wesolowski if (modules[j].bm_type != BMT_HASH || 13770181461bSKeith M Wesolowski strcmp((char *)(uintptr_t)modules[j].bm_name, 13780181461bSKeith M Wesolowski (char *)(uintptr_t)modules[i].bm_name) != 0) { 13790181461bSKeith M Wesolowski continue; 13800181461bSKeith M Wesolowski } 13810181461bSKeith M Wesolowski 13820181461bSKeith M Wesolowski if (modules[j].bm_size < SHA1_ASCII_LENGTH) { 13830181461bSKeith M Wesolowski dboot_printf("Short hash module of length " 13840181461bSKeith M Wesolowski "0x%lx bytes; ignoring\n", 13850181461bSKeith M Wesolowski (ulong_t)modules[j].bm_size); 13860181461bSKeith M Wesolowski } else { 13870181461bSKeith M Wesolowski modules[i].bm_hash = modules[j].bm_addr; 13880181461bSKeith M Wesolowski } 1389e65d07eeSKeith Wesolowski break; 1390e65d07eeSKeith Wesolowski } 1391e65d07eeSKeith Wesolowski } 1392e65d07eeSKeith Wesolowski } 1393e65d07eeSKeith Wesolowski 1394843e1988Sjohnlev /* 1395ae115bc7Smrj * Walk through the module information finding the last used address. 1396ae115bc7Smrj * The first available address will become the top level page table. 1397ae115bc7Smrj */ 1398ae115bc7Smrj static void 1399d2670fc4SToomas Soome dboot_process_modules(void) 1400ae115bc7Smrj { 1401d2670fc4SToomas Soome int i, modcount; 1402ae115bc7Smrj extern char _end[]; 1403ae115bc7Smrj 1404d2670fc4SToomas Soome DBG_MSG("\nFinding Modules\n"); 1405d2670fc4SToomas Soome modcount = dboot_multiboot_modcount(); 1406d2670fc4SToomas Soome if (modcount > MAX_BOOT_MODULES) { 14075420b805SSeth Goldberg dboot_panic("Too many modules (%d) -- the maximum is %d.", 1408d2670fc4SToomas Soome modcount, MAX_BOOT_MODULES); 14095420b805SSeth Goldberg } 1410ae115bc7Smrj /* 1411ae115bc7Smrj * search the modules to find the last used address 1412ae115bc7Smrj * we'll build the module list while we're walking through here 1413ae115bc7Smrj */ 1414c909a41bSRichard Lowe check_higher((paddr_t)(uintptr_t)&_end); 1415d2670fc4SToomas Soome for (i = 0; i < modcount; ++i) { 1416d2670fc4SToomas Soome process_module(i); 1417d2670fc4SToomas Soome modules_used++; 1418ae115bc7Smrj } 1419c909a41bSRichard Lowe bi->bi_modules = (native_ptr_t)(uintptr_t)modules; 1420ae115bc7Smrj DBG(bi->bi_modules); 1421d2670fc4SToomas Soome bi->bi_module_cnt = modcount; 1422ae115bc7Smrj DBG(bi->bi_module_cnt); 1423ae115bc7Smrj 14240181461bSKeith M Wesolowski fixup_modules(); 14250181461bSKeith M Wesolowski assign_module_hashes(); 1426e65d07eeSKeith Wesolowski check_images(); 1427d2670fc4SToomas Soome } 1428d2670fc4SToomas Soome 1429d2670fc4SToomas Soome /* 1430d2670fc4SToomas Soome * We then build the phys_install memlist from the multiboot information. 1431d2670fc4SToomas Soome */ 1432d2670fc4SToomas Soome static void 1433d2670fc4SToomas Soome dboot_process_mmap(void) 1434d2670fc4SToomas Soome { 1435d2670fc4SToomas Soome uint64_t start; 1436d2670fc4SToomas Soome uint64_t end; 1437d2670fc4SToomas Soome uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */ 1438d2670fc4SToomas Soome uint32_t lower, upper; 1439d2670fc4SToomas Soome int i, mmap_entries; 1440e65d07eeSKeith Wesolowski 1441ae115bc7Smrj /* 1442ae115bc7Smrj * Walk through the memory map from multiboot and build our memlist 1443ae115bc7Smrj * structures. Note these will have native format pointers. 1444ae115bc7Smrj */ 1445ae115bc7Smrj DBG_MSG("\nFinding Memory Map\n"); 1446d2670fc4SToomas Soome num_entries = 0; 1447d2670fc4SToomas Soome num_entries_set = B_FALSE; 1448ae115bc7Smrj max_mem = 0; 1449d2670fc4SToomas Soome if ((mmap_entries = dboot_loader_mmap_entries()) > 0) { 1450d2670fc4SToomas Soome for (i = 0; i < mmap_entries; i++) { 1451d2670fc4SToomas Soome uint32_t type = dboot_loader_mmap_get_type(i); 1452d2670fc4SToomas Soome start = dboot_loader_mmap_get_base(i); 1453d2670fc4SToomas Soome end = start + dboot_loader_mmap_get_length(i); 1454ae115bc7Smrj 1455c9464e8bSjosephb if (prom_debug) 1456ae115bc7Smrj dboot_printf("\ttype: %d %" PRIx64 "..%" 1457d2670fc4SToomas Soome PRIx64 "\n", type, start, end); 1458ae115bc7Smrj 1459ae115bc7Smrj /* 1460ae115bc7Smrj * page align start and end 1461ae115bc7Smrj */ 1462ae115bc7Smrj start = (start + page_offset) & ~page_offset; 1463ae115bc7Smrj end &= ~page_offset; 1464ae115bc7Smrj if (end <= start) 1465ae115bc7Smrj continue; 1466ae115bc7Smrj 1467c9464e8bSjosephb /* 1468c9464e8bSjosephb * only type 1 is usable RAM 1469c9464e8bSjosephb */ 1470d2670fc4SToomas Soome switch (type) { 14711de082f7SVikram Hegde case 1: 1472ae115bc7Smrj if (end > max_mem) 1473ae115bc7Smrj max_mem = end; 1474ae115bc7Smrj memlists[memlists_used].addr = start; 1475ae115bc7Smrj memlists[memlists_used].size = end - start; 1476c9464e8bSjosephb ++memlists_used; 1477c9464e8bSjosephb if (memlists_used > MAX_MEMLIST) 1478c9464e8bSjosephb dboot_panic("too many memlists"); 14791de082f7SVikram Hegde break; 14801de082f7SVikram Hegde case 2: 14811de082f7SVikram Hegde rsvdmemlists[rsvdmemlists_used].addr = start; 14821de082f7SVikram Hegde rsvdmemlists[rsvdmemlists_used].size = 14831de082f7SVikram Hegde end - start; 14841de082f7SVikram Hegde ++rsvdmemlists_used; 14851de082f7SVikram Hegde if (rsvdmemlists_used > MAX_MEMLIST) 14861de082f7SVikram Hegde dboot_panic("too many rsvdmemlists"); 14871de082f7SVikram Hegde break; 14881de082f7SVikram Hegde default: 14891de082f7SVikram Hegde continue; 14901de082f7SVikram Hegde } 1491ae115bc7Smrj } 1492d2670fc4SToomas Soome build_pcimemlists(); 1493d2670fc4SToomas Soome } else if (dboot_multiboot_basicmeminfo(&lower, &upper)) { 1494d2670fc4SToomas Soome DBG(lower); 1495ae115bc7Smrj memlists[memlists_used].addr = 0; 1496d2670fc4SToomas Soome memlists[memlists_used].size = lower * 1024; 1497ae115bc7Smrj ++memlists_used; 1498d2670fc4SToomas Soome DBG(upper); 1499ae115bc7Smrj memlists[memlists_used].addr = 1024 * 1024; 1500d2670fc4SToomas Soome memlists[memlists_used].size = upper * 1024; 1501ae115bc7Smrj ++memlists_used; 1502843e1988Sjohnlev 1503843e1988Sjohnlev /* 1504843e1988Sjohnlev * Old platform - assume I/O space at the end of memory. 1505843e1988Sjohnlev */ 1506d2670fc4SToomas Soome pcimemlists[0].addr = (upper * 1024) + (1024 * 1024); 1507843e1988Sjohnlev pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr; 1508843e1988Sjohnlev pcimemlists[0].next = 0; 1509843e1988Sjohnlev pcimemlists[0].prev = 0; 1510c909a41bSRichard Lowe bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists; 1511843e1988Sjohnlev DBG(bi->bi_pcimem); 1512ae115bc7Smrj } else { 1513843e1988Sjohnlev dboot_panic("No memory info from boot loader!!!"); 1514ae115bc7Smrj } 1515ae115bc7Smrj 1516ae115bc7Smrj /* 1517ae115bc7Smrj * finish processing the physinstall list 1518ae115bc7Smrj */ 1519ae115bc7Smrj sort_physinstall(); 15201de082f7SVikram Hegde 15211de082f7SVikram Hegde /* 15221de082f7SVikram Hegde * build bios reserved mem lists 15231de082f7SVikram Hegde */ 15241de082f7SVikram Hegde build_rsvdmemlists(); 1525c9464e8bSjosephb } 1526d2670fc4SToomas Soome 1527d2670fc4SToomas Soome /* 1528d2670fc4SToomas Soome * The highest address is used as the starting point for dboot's simple 1529d2670fc4SToomas Soome * memory allocator. 1530d2670fc4SToomas Soome * 1531d2670fc4SToomas Soome * Finding the highest address in case of Multiboot 1 protocol is 1532d2670fc4SToomas Soome * quite painful in the sense that some information provided by 1533d2670fc4SToomas Soome * the multiboot info structure points to BIOS data, and some to RAM. 1534d2670fc4SToomas Soome * 1535d2670fc4SToomas Soome * The module list was processed and checked already by dboot_process_modules(), 1536d2670fc4SToomas Soome * so we will check the command line string and the memory map. 1537d2670fc4SToomas Soome * 1538d2670fc4SToomas Soome * This list of to be checked items is based on our current knowledge of 1539d2670fc4SToomas Soome * allocations made by grub1 and will need to be reviewed if there 1540d2670fc4SToomas Soome * are updates about the information provided by Multiboot 1. 1541d2670fc4SToomas Soome * 1542d2670fc4SToomas Soome * In the case of the Multiboot 2, our life is much simpler, as the MB2 1543d2670fc4SToomas Soome * information tag list is one contiguous chunk of memory. 1544d2670fc4SToomas Soome */ 1545d2670fc4SToomas Soome static paddr_t 1546d2670fc4SToomas Soome dboot_multiboot1_highest_addr(void) 1547d2670fc4SToomas Soome { 1548d2670fc4SToomas Soome paddr_t addr = NULL; 1549d2670fc4SToomas Soome char *cmdl = (char *)mb_info->cmdline; 1550d2670fc4SToomas Soome 1551d2670fc4SToomas Soome if (mb_info->flags & MB_INFO_CMDLINE) 1552d2670fc4SToomas Soome addr = ((paddr_t)((uintptr_t)cmdl + strlen(cmdl) + 1)); 1553d2670fc4SToomas Soome 1554d2670fc4SToomas Soome if (mb_info->flags & MB_INFO_MEM_MAP) 1555d2670fc4SToomas Soome addr = MAX(addr, 1556d2670fc4SToomas Soome ((paddr_t)(mb_info->mmap_addr + mb_info->mmap_length))); 1557d2670fc4SToomas Soome return (addr); 1558d2670fc4SToomas Soome } 1559d2670fc4SToomas Soome 1560d2670fc4SToomas Soome static void 1561d2670fc4SToomas Soome dboot_multiboot_highest_addr(void) 1562d2670fc4SToomas Soome { 1563d2670fc4SToomas Soome paddr_t addr; 1564d2670fc4SToomas Soome 1565d2670fc4SToomas Soome switch (multiboot_version) { 1566d2670fc4SToomas Soome case 1: 1567d2670fc4SToomas Soome addr = dboot_multiboot1_highest_addr(); 1568d2670fc4SToomas Soome if (addr != NULL) 1569d2670fc4SToomas Soome check_higher(addr); 1570d2670fc4SToomas Soome break; 1571d2670fc4SToomas Soome case 2: 1572d2670fc4SToomas Soome addr = dboot_multiboot2_highest_addr(mb2_info); 1573d2670fc4SToomas Soome if (addr != NULL) 1574d2670fc4SToomas Soome check_higher(addr); 1575d2670fc4SToomas Soome break; 1576d2670fc4SToomas Soome default: 1577d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 1578d2670fc4SToomas Soome multiboot_version); 1579d2670fc4SToomas Soome break; 1580d2670fc4SToomas Soome } 1581d2670fc4SToomas Soome } 1582d2670fc4SToomas Soome 1583d2670fc4SToomas Soome /* 1584d2670fc4SToomas Soome * Walk the boot loader provided information and find the highest free address. 1585d2670fc4SToomas Soome */ 1586d2670fc4SToomas Soome static void 1587d2670fc4SToomas Soome init_mem_alloc(void) 1588d2670fc4SToomas Soome { 1589d2670fc4SToomas Soome DBG_MSG("Entered init_mem_alloc()\n"); 1590d2670fc4SToomas Soome dboot_process_modules(); 1591d2670fc4SToomas Soome dboot_process_mmap(); 1592d2670fc4SToomas Soome dboot_multiboot_highest_addr(); 1593d2670fc4SToomas Soome } 1594d2670fc4SToomas Soome 1595d2670fc4SToomas Soome static void 1596d2670fc4SToomas Soome dboot_multiboot_get_fwtables(void) 1597d2670fc4SToomas Soome { 1598d2670fc4SToomas Soome multiboot_tag_new_acpi_t *nacpitagp; 1599d2670fc4SToomas Soome multiboot_tag_old_acpi_t *oacpitagp; 1600d2670fc4SToomas Soome 1601d2670fc4SToomas Soome /* no fw tables from multiboot 1 */ 1602d2670fc4SToomas Soome if (multiboot_version != 2) 1603d2670fc4SToomas Soome return; 1604d2670fc4SToomas Soome 1605d2670fc4SToomas Soome nacpitagp = (multiboot_tag_new_acpi_t *) 1606d2670fc4SToomas Soome dboot_multiboot2_find_tag(mb2_info, 1607d2670fc4SToomas Soome MULTIBOOT_TAG_TYPE_ACPI_NEW); 1608d2670fc4SToomas Soome oacpitagp = (multiboot_tag_old_acpi_t *) 1609d2670fc4SToomas Soome dboot_multiboot2_find_tag(mb2_info, 1610d2670fc4SToomas Soome MULTIBOOT_TAG_TYPE_ACPI_OLD); 1611d2670fc4SToomas Soome 1612d2670fc4SToomas Soome if (nacpitagp != NULL) { 1613d2670fc4SToomas Soome bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t) 1614d2670fc4SToomas Soome &nacpitagp->mb_rsdp[0]; 1615d2670fc4SToomas Soome } else if (oacpitagp != NULL) { 1616d2670fc4SToomas Soome bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t) 1617d2670fc4SToomas Soome &oacpitagp->mb_rsdp[0]; 1618d2670fc4SToomas Soome } else { 1619d2670fc4SToomas Soome bi->bi_acpi_rsdp = NULL; 1620d2670fc4SToomas Soome } 1621d2670fc4SToomas Soome } 1622843e1988Sjohnlev #endif /* !__xpv */ 1623ae115bc7Smrj 1624ae115bc7Smrj /* 1625ae115bc7Smrj * Simple memory allocator, allocates aligned physical memory. 1626ae115bc7Smrj * Note that startup_kernel() only allocates memory, never frees. 1627ae115bc7Smrj * Memory usage just grows in an upward direction. 1628ae115bc7Smrj */ 1629ae115bc7Smrj static void * 1630ae115bc7Smrj do_mem_alloc(uint32_t size, uint32_t align) 1631ae115bc7Smrj { 1632ae115bc7Smrj uint_t i; 1633ae115bc7Smrj uint64_t best; 1634ae115bc7Smrj uint64_t start; 1635ae115bc7Smrj uint64_t end; 1636ae115bc7Smrj 1637ae115bc7Smrj /* 1638ae115bc7Smrj * make sure size is a multiple of pagesize 1639ae115bc7Smrj */ 1640ae115bc7Smrj size = RNDUP(size, MMU_PAGESIZE); 1641ae115bc7Smrj next_avail_addr = RNDUP(next_avail_addr, align); 1642ae115bc7Smrj 1643ae115bc7Smrj /* 1644843e1988Sjohnlev * XXPV fixme joe 1645843e1988Sjohnlev * 1646ae115bc7Smrj * a really large bootarchive that causes you to run out of memory 1647ae115bc7Smrj * may cause this to blow up 1648ae115bc7Smrj */ 1649ae115bc7Smrj /* LINTED E_UNEXPECTED_UINT_PROMOTION */ 1650ae115bc7Smrj best = (uint64_t)-size; 1651ae115bc7Smrj for (i = 0; i < memlists_used; ++i) { 1652ae115bc7Smrj start = memlists[i].addr; 1653843e1988Sjohnlev #if defined(__xpv) 1654843e1988Sjohnlev start += mfn_base; 1655843e1988Sjohnlev #endif 1656ae115bc7Smrj end = start + memlists[i].size; 1657ae115bc7Smrj 1658ae115bc7Smrj /* 1659ae115bc7Smrj * did we find the desired address? 1660ae115bc7Smrj */ 1661ae115bc7Smrj if (start <= next_avail_addr && next_avail_addr + size <= end) { 1662ae115bc7Smrj best = next_avail_addr; 1663ae115bc7Smrj goto done; 1664ae115bc7Smrj } 1665ae115bc7Smrj 1666ae115bc7Smrj /* 1667ae115bc7Smrj * if not is this address the best so far? 1668ae115bc7Smrj */ 1669ae115bc7Smrj if (start > next_avail_addr && start < best && 1670ae115bc7Smrj RNDUP(start, align) + size <= end) 1671ae115bc7Smrj best = RNDUP(start, align); 1672ae115bc7Smrj } 1673ae115bc7Smrj 1674ae115bc7Smrj /* 1675ae115bc7Smrj * We didn't find exactly the address we wanted, due to going off the 1676ae115bc7Smrj * end of a memory region. Return the best found memory address. 1677ae115bc7Smrj */ 1678ae115bc7Smrj done: 1679ae115bc7Smrj next_avail_addr = best + size; 1680843e1988Sjohnlev #if defined(__xpv) 1681843e1988Sjohnlev if (next_avail_addr > scratch_end) 1682843e1988Sjohnlev dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: " 1683843e1988Sjohnlev "0x%lx", (ulong_t)next_avail_addr, 1684843e1988Sjohnlev (ulong_t)scratch_end); 1685843e1988Sjohnlev #endif 1686ae115bc7Smrj (void) memset((void *)(uintptr_t)best, 0, size); 1687ae115bc7Smrj return ((void *)(uintptr_t)best); 1688ae115bc7Smrj } 1689ae115bc7Smrj 1690ae115bc7Smrj void * 1691ae115bc7Smrj mem_alloc(uint32_t size) 1692ae115bc7Smrj { 1693ae115bc7Smrj return (do_mem_alloc(size, MMU_PAGESIZE)); 1694ae115bc7Smrj } 1695ae115bc7Smrj 1696ae115bc7Smrj 1697ae115bc7Smrj /* 1698ae115bc7Smrj * Build page tables to map all of memory used so far as well as the kernel. 1699ae115bc7Smrj */ 1700ae115bc7Smrj static void 1701ae115bc7Smrj build_page_tables(void) 1702ae115bc7Smrj { 1703ae115bc7Smrj uint32_t psize; 1704ae115bc7Smrj uint32_t level; 1705ae115bc7Smrj uint32_t off; 1706ae115bc7Smrj uint64_t start; 1707843e1988Sjohnlev #if !defined(__xpv) 1708843e1988Sjohnlev uint32_t i; 1709ae115bc7Smrj uint64_t end; 1710843e1988Sjohnlev #endif /* __xpv */ 1711ae115bc7Smrj 1712ae115bc7Smrj /* 1713843e1988Sjohnlev * If we're on metal, we need to create the top level pagetable. 1714ae115bc7Smrj */ 1715843e1988Sjohnlev #if defined(__xpv) 1716843e1988Sjohnlev top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base; 1717843e1988Sjohnlev #else /* __xpv */ 1718ae115bc7Smrj top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE); 1719843e1988Sjohnlev #endif /* __xpv */ 1720ae115bc7Smrj DBG((uintptr_t)top_page_table); 1721ae115bc7Smrj 1722ae115bc7Smrj /* 1723ae115bc7Smrj * Determine if we'll use large mappings for kernel, then map it. 1724ae115bc7Smrj */ 1725ae115bc7Smrj if (largepage_support) { 1726ae115bc7Smrj psize = lpagesize; 1727ae115bc7Smrj level = 1; 1728ae115bc7Smrj } else { 1729ae115bc7Smrj psize = MMU_PAGESIZE; 1730ae115bc7Smrj level = 0; 1731ae115bc7Smrj } 1732ae115bc7Smrj 1733ae115bc7Smrj DBG_MSG("Mapping kernel\n"); 1734ae115bc7Smrj DBG(ktext_phys); 1735ae115bc7Smrj DBG(target_kernel_text); 1736ae115bc7Smrj DBG(ksize); 1737ae115bc7Smrj DBG(psize); 1738ae115bc7Smrj for (off = 0; off < ksize; off += psize) 1739ae115bc7Smrj map_pa_at_va(ktext_phys + off, target_kernel_text + off, level); 1740ae115bc7Smrj 1741ae115bc7Smrj /* 1742ae115bc7Smrj * The kernel will need a 1 page window to work with page tables 1743ae115bc7Smrj */ 1744ae115bc7Smrj bi->bi_pt_window = (uintptr_t)mem_alloc(MMU_PAGESIZE); 1745ae115bc7Smrj DBG(bi->bi_pt_window); 1746ae115bc7Smrj bi->bi_pte_to_pt_window = 1747ae115bc7Smrj (uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0); 1748ae115bc7Smrj DBG(bi->bi_pte_to_pt_window); 1749ae115bc7Smrj 1750843e1988Sjohnlev #if defined(__xpv) 1751843e1988Sjohnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) { 1752843e1988Sjohnlev /* If this is a domU we're done. */ 1753843e1988Sjohnlev DBG_MSG("\nPage tables constructed\n"); 1754843e1988Sjohnlev return; 1755843e1988Sjohnlev } 1756843e1988Sjohnlev #endif /* __xpv */ 1757843e1988Sjohnlev 1758ae115bc7Smrj /* 1759843e1988Sjohnlev * We need 1:1 mappings for the lower 1M of memory to access 1760843e1988Sjohnlev * BIOS tables used by a couple of drivers during boot. 1761ae115bc7Smrj * 1762843e1988Sjohnlev * The following code works because our simple memory allocator 1763843e1988Sjohnlev * only grows usage in an upwards direction. 1764ae115bc7Smrj * 1765843e1988Sjohnlev * Note that by this point in boot some mappings for low memory 1766843e1988Sjohnlev * may already exist because we've already accessed device in low 1767843e1988Sjohnlev * memory. (Specifically the video frame buffer and keyboard 1768843e1988Sjohnlev * status ports.) If we're booting on raw hardware then GRUB 1769843e1988Sjohnlev * created these mappings for us. If we're booting under a 1770843e1988Sjohnlev * hypervisor then we went ahead and remapped these devices into 1771843e1988Sjohnlev * memory allocated within dboot itself. 1772843e1988Sjohnlev */ 1773843e1988Sjohnlev if (map_debug) 1774843e1988Sjohnlev dboot_printf("1:1 map pa=0..1Meg\n"); 1775843e1988Sjohnlev for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) { 1776843e1988Sjohnlev #if defined(__xpv) 1777843e1988Sjohnlev map_ma_at_va(start, start, 0); 1778843e1988Sjohnlev #else /* __xpv */ 1779843e1988Sjohnlev map_pa_at_va(start, start, 0); 1780843e1988Sjohnlev #endif /* __xpv */ 1781843e1988Sjohnlev } 1782843e1988Sjohnlev 1783843e1988Sjohnlev #if !defined(__xpv) 1784ae115bc7Smrj for (i = 0; i < memlists_used; ++i) { 1785ae115bc7Smrj start = memlists[i].addr; 1786ae115bc7Smrj 1787ae115bc7Smrj end = start + memlists[i].size; 1788ae115bc7Smrj 1789ae115bc7Smrj if (map_debug) 1790ae115bc7Smrj dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n", 1791ae115bc7Smrj start, end); 1792ae115bc7Smrj while (start < end && start < next_avail_addr) { 1793ae115bc7Smrj map_pa_at_va(start, start, 0); 1794ae115bc7Smrj start += MMU_PAGESIZE; 1795ae115bc7Smrj } 1796ae115bc7Smrj } 1797843e1988Sjohnlev #endif /* !__xpv */ 1798ae115bc7Smrj 1799ae115bc7Smrj DBG_MSG("\nPage tables constructed\n"); 1800ae115bc7Smrj } 1801ae115bc7Smrj 1802ae115bc7Smrj #define NO_MULTIBOOT \ 1803ae115bc7Smrj "multiboot is no longer used to boot the Solaris Operating System.\n\ 1804ae115bc7Smrj The grub entry should be changed to:\n\ 1805ae115bc7Smrj kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\ 1806ae115bc7Smrj module$ /platform/i86pc/$ISADIR/boot_archive\n\ 1807654b400cSJoshua M. Clulow See http://illumos.org/msg/SUNOS-8000-AK for details.\n" 1808ae115bc7Smrj 1809d2670fc4SToomas Soome static void 1810d2670fc4SToomas Soome dboot_init_xboot_consinfo(void) 1811d2670fc4SToomas Soome { 1812d2670fc4SToomas Soome uintptr_t addr; 1813d2670fc4SToomas Soome /* 1814d2670fc4SToomas Soome * boot info must be 16 byte aligned for 64 bit kernel ABI 1815d2670fc4SToomas Soome */ 1816d2670fc4SToomas Soome addr = (uintptr_t)boot_info; 1817d2670fc4SToomas Soome addr = (addr + 0xf) & ~0xf; 1818d2670fc4SToomas Soome bi = (struct xboot_info *)addr; 1819d2670fc4SToomas Soome 1820d2670fc4SToomas Soome #if !defined(__xpv) 1821d2670fc4SToomas Soome switch (multiboot_version) { 1822d2670fc4SToomas Soome case 1: 1823d2670fc4SToomas Soome dboot_multiboot1_xboot_consinfo(); 1824d2670fc4SToomas Soome break; 1825d2670fc4SToomas Soome case 2: 1826d2670fc4SToomas Soome dboot_multiboot2_xboot_consinfo(); 1827d2670fc4SToomas Soome break; 1828d2670fc4SToomas Soome default: 1829d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 1830d2670fc4SToomas Soome multiboot_version); 1831d2670fc4SToomas Soome break; 1832d2670fc4SToomas Soome } 1833*b9a86732SToomas Soome /* 1834*b9a86732SToomas Soome * Lookup environment module for the console. Complete module list 1835*b9a86732SToomas Soome * will be built after console setup. 1836*b9a86732SToomas Soome */ 1837*b9a86732SToomas Soome dboot_find_env(); 1838d2670fc4SToomas Soome #endif 1839d2670fc4SToomas Soome } 1840d2670fc4SToomas Soome 1841d2670fc4SToomas Soome /* 1842d2670fc4SToomas Soome * Set up basic data from the boot loader. 1843d2670fc4SToomas Soome * The load_addr is part of AOUT kludge setup in dboot_grub.s, to support 1844d2670fc4SToomas Soome * 32-bit dboot code setup used to set up and start 64-bit kernel. 1845d2670fc4SToomas Soome * AOUT kludge does allow 32-bit boot loader, such as grub1, to load and 1846d2670fc4SToomas Soome * start 64-bit illumos kernel. 1847d2670fc4SToomas Soome */ 1848d2670fc4SToomas Soome static void 1849d2670fc4SToomas Soome dboot_loader_init(void) 1850d2670fc4SToomas Soome { 1851d2670fc4SToomas Soome #if !defined(__xpv) 1852d2670fc4SToomas Soome mb_info = NULL; 1853d2670fc4SToomas Soome mb2_info = NULL; 1854d2670fc4SToomas Soome 1855d2670fc4SToomas Soome switch (mb_magic) { 1856d2670fc4SToomas Soome case MB_BOOTLOADER_MAGIC: 1857d2670fc4SToomas Soome multiboot_version = 1; 1858d2670fc4SToomas Soome mb_info = (multiboot_info_t *)(uintptr_t)mb_addr; 1859d2670fc4SToomas Soome #if defined(_BOOT_TARGET_amd64) 1860d2670fc4SToomas Soome load_addr = mb_header.load_addr; 1861d2670fc4SToomas Soome #endif 1862d2670fc4SToomas Soome break; 1863d2670fc4SToomas Soome 1864d2670fc4SToomas Soome case MULTIBOOT2_BOOTLOADER_MAGIC: 1865d2670fc4SToomas Soome multiboot_version = 2; 1866d2670fc4SToomas Soome mb2_info = (multiboot2_info_header_t *)(uintptr_t)mb_addr; 1867d2670fc4SToomas Soome mb2_mmap_tagp = dboot_multiboot2_get_mmap_tagp(mb2_info); 1868d2670fc4SToomas Soome #if defined(_BOOT_TARGET_amd64) 1869d2670fc4SToomas Soome load_addr = mb2_load_addr; 1870d2670fc4SToomas Soome #endif 1871d2670fc4SToomas Soome break; 1872d2670fc4SToomas Soome 1873d2670fc4SToomas Soome default: 1874d2670fc4SToomas Soome dboot_panic("Unknown bootloader magic: 0x%x\n", mb_magic); 1875d2670fc4SToomas Soome break; 1876d2670fc4SToomas Soome } 1877d2670fc4SToomas Soome #endif /* !defined(__xpv) */ 1878d2670fc4SToomas Soome } 1879d2670fc4SToomas Soome 1880d2670fc4SToomas Soome /* Extract the kernel command line from [multi]boot information. */ 1881d2670fc4SToomas Soome static char * 1882d2670fc4SToomas Soome dboot_loader_cmdline(void) 1883d2670fc4SToomas Soome { 1884d2670fc4SToomas Soome char *line = NULL; 1885d2670fc4SToomas Soome 1886d2670fc4SToomas Soome #if defined(__xpv) 1887d2670fc4SToomas Soome line = (char *)xen_info->cmd_line; 1888d2670fc4SToomas Soome #else /* __xpv */ 1889d2670fc4SToomas Soome 1890d2670fc4SToomas Soome switch (multiboot_version) { 1891d2670fc4SToomas Soome case 1: 1892d2670fc4SToomas Soome if (mb_info->flags & MB_INFO_CMDLINE) 1893d2670fc4SToomas Soome line = (char *)mb_info->cmdline; 1894d2670fc4SToomas Soome break; 1895d2670fc4SToomas Soome 1896d2670fc4SToomas Soome case 2: 1897d2670fc4SToomas Soome line = dboot_multiboot2_cmdline(mb2_info); 1898d2670fc4SToomas Soome break; 1899d2670fc4SToomas Soome 1900d2670fc4SToomas Soome default: 1901d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 1902d2670fc4SToomas Soome multiboot_version); 1903d2670fc4SToomas Soome break; 1904d2670fc4SToomas Soome } 1905d2670fc4SToomas Soome 1906d2670fc4SToomas Soome #endif /* __xpv */ 1907d2670fc4SToomas Soome 1908d2670fc4SToomas Soome /* 1909d2670fc4SToomas Soome * Make sure we have valid pointer so the string operations 1910d2670fc4SToomas Soome * will not crash us. 1911d2670fc4SToomas Soome */ 1912d2670fc4SToomas Soome if (line == NULL) 1913d2670fc4SToomas Soome line = ""; 1914d2670fc4SToomas Soome 1915d2670fc4SToomas Soome return (line); 1916d2670fc4SToomas Soome } 1917d2670fc4SToomas Soome 1918d2670fc4SToomas Soome static char * 1919d2670fc4SToomas Soome dboot_loader_name(void) 1920d2670fc4SToomas Soome { 1921d2670fc4SToomas Soome #if defined(__xpv) 1922d2670fc4SToomas Soome return (NULL); 1923d2670fc4SToomas Soome #else /* __xpv */ 1924d2670fc4SToomas Soome multiboot_tag_string_t *tag; 1925d2670fc4SToomas Soome 1926d2670fc4SToomas Soome switch (multiboot_version) { 1927d2670fc4SToomas Soome case 1: 1928d2670fc4SToomas Soome return ((char *)mb_info->boot_loader_name); 1929d2670fc4SToomas Soome 1930d2670fc4SToomas Soome case 2: 1931d2670fc4SToomas Soome tag = dboot_multiboot2_find_tag(mb2_info, 1932d2670fc4SToomas Soome MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME); 1933d2670fc4SToomas Soome return (tag->mb_string); 1934d2670fc4SToomas Soome default: 1935d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 1936d2670fc4SToomas Soome multiboot_version); 1937d2670fc4SToomas Soome break; 1938d2670fc4SToomas Soome } 1939d2670fc4SToomas Soome 1940d2670fc4SToomas Soome return (NULL); 1941d2670fc4SToomas Soome #endif /* __xpv */ 1942d2670fc4SToomas Soome } 1943ae115bc7Smrj /* 1944ae115bc7Smrj * startup_kernel has a pretty simple job. It builds pagetables which reflect 1945ae115bc7Smrj * 1:1 mappings for all memory in use. It then also adds mappings for 1946ae115bc7Smrj * the kernel nucleus at virtual address of target_kernel_text using large page 1947ae115bc7Smrj * mappings. The page table pages are also accessible at 1:1 mapped 1948ae115bc7Smrj * virtual addresses. 1949ae115bc7Smrj */ 1950ae115bc7Smrj /*ARGSUSED*/ 1951ae115bc7Smrj void 1952ae115bc7Smrj startup_kernel(void) 1953ae115bc7Smrj { 1954ae115bc7Smrj char *cmdline; 1955d2670fc4SToomas Soome char *bootloader; 1956843e1988Sjohnlev #if defined(__xpv) 1957843e1988Sjohnlev physdev_set_iopl_t set_iopl; 1958843e1988Sjohnlev #endif /* __xpv */ 1959ae115bc7Smrj 1960d2670fc4SToomas Soome dboot_loader_init(); 1961ae115bc7Smrj /* 1962ae115bc7Smrj * At this point we are executing in a 32 bit real mode. 1963ae115bc7Smrj */ 1964d2670fc4SToomas Soome 1965d2670fc4SToomas Soome bootloader = dboot_loader_name(); 1966d2670fc4SToomas Soome cmdline = dboot_loader_cmdline(); 1967843e1988Sjohnlev 1968843e1988Sjohnlev #if defined(__xpv) 1969843e1988Sjohnlev /* 1970843e1988Sjohnlev * For dom0, before we initialize the console subsystem we'll 1971843e1988Sjohnlev * need to enable io operations, so set I/O priveldge level to 1. 1972843e1988Sjohnlev */ 1973843e1988Sjohnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) { 1974843e1988Sjohnlev set_iopl.iopl = 1; 1975843e1988Sjohnlev (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); 1976843e1988Sjohnlev } 1977843e1988Sjohnlev #endif /* __xpv */ 1978843e1988Sjohnlev 1979d2670fc4SToomas Soome dboot_init_xboot_consinfo(); 1980d2670fc4SToomas Soome bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline; 1981*b9a86732SToomas Soome bcons_init(bi); 1982*b9a86732SToomas Soome 1983*b9a86732SToomas Soome prom_debug = (find_boot_prop("prom_debug") != NULL); 1984*b9a86732SToomas Soome map_debug = (find_boot_prop("map_debug") != NULL); 1985d2670fc4SToomas Soome 1986d2670fc4SToomas Soome #if !defined(__xpv) 1987d2670fc4SToomas Soome dboot_multiboot_get_fwtables(); 1988d2670fc4SToomas Soome #endif 1989d2670fc4SToomas Soome DBG_MSG("\n\nillumos prekernel set: "); 1990ae115bc7Smrj DBG_MSG(cmdline); 1991ae115bc7Smrj DBG_MSG("\n"); 1992ae115bc7Smrj 1993d2670fc4SToomas Soome if (bootloader != NULL && prom_debug) { 1994d2670fc4SToomas Soome dboot_printf("Kernel loaded by: %s\n", bootloader); 1995d2670fc4SToomas Soome #if !defined(__xpv) 1996d2670fc4SToomas Soome dboot_printf("Using multiboot %d boot protocol.\n", 1997d2670fc4SToomas Soome multiboot_version); 1998d2670fc4SToomas Soome #endif 1999d2670fc4SToomas Soome } 2000d2670fc4SToomas Soome 2001ae115bc7Smrj if (strstr(cmdline, "multiboot") != NULL) { 2002ae115bc7Smrj dboot_panic(NO_MULTIBOOT); 2003ae115bc7Smrj } 2004ae115bc7Smrj 2005ae115bc7Smrj DBG((uintptr_t)bi); 2006d2670fc4SToomas Soome #if !defined(__xpv) 2007d2670fc4SToomas Soome DBG((uintptr_t)mb_info); 2008d2670fc4SToomas Soome DBG((uintptr_t)mb2_info); 2009d2670fc4SToomas Soome if (mb2_info != NULL) 2010d2670fc4SToomas Soome DBG(mb2_info->mbi_total_size); 2011d2670fc4SToomas Soome DBG(bi->bi_acpi_rsdp); 2012d2670fc4SToomas Soome #endif 2013ae115bc7Smrj 2014ae115bc7Smrj /* 2015ae115bc7Smrj * Need correct target_kernel_text value 2016ae115bc7Smrj */ 2017ae115bc7Smrj #if defined(_BOOT_TARGET_amd64) 2018ae115bc7Smrj target_kernel_text = KERNEL_TEXT_amd64; 2019843e1988Sjohnlev #elif defined(__xpv) 2020843e1988Sjohnlev target_kernel_text = KERNEL_TEXT_i386_xpv; 2021ae115bc7Smrj #else 2022ae115bc7Smrj target_kernel_text = KERNEL_TEXT_i386; 2023ae115bc7Smrj #endif 2024ae115bc7Smrj DBG(target_kernel_text); 2025ae115bc7Smrj 2026843e1988Sjohnlev #if defined(__xpv) 2027843e1988Sjohnlev 2028843e1988Sjohnlev /* 2029843e1988Sjohnlev * XXPV Derive this stuff from CPUID / what the hypervisor has enabled 2030843e1988Sjohnlev */ 2031843e1988Sjohnlev 2032843e1988Sjohnlev #if defined(_BOOT_TARGET_amd64) 2033843e1988Sjohnlev /* 2034843e1988Sjohnlev * 64-bit hypervisor. 2035843e1988Sjohnlev */ 2036843e1988Sjohnlev amd64_support = 1; 2037843e1988Sjohnlev pae_support = 1; 2038843e1988Sjohnlev 2039843e1988Sjohnlev #else /* _BOOT_TARGET_amd64 */ 2040843e1988Sjohnlev 2041843e1988Sjohnlev /* 2042843e1988Sjohnlev * See if we are running on a PAE Hypervisor 2043843e1988Sjohnlev */ 2044843e1988Sjohnlev { 2045843e1988Sjohnlev xen_capabilities_info_t caps; 2046843e1988Sjohnlev 2047843e1988Sjohnlev if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0) 2048843e1988Sjohnlev dboot_panic("HYPERVISOR_xen_version(caps) failed"); 2049843e1988Sjohnlev caps[sizeof (caps) - 1] = 0; 2050843e1988Sjohnlev if (prom_debug) 2051843e1988Sjohnlev dboot_printf("xen capabilities %s\n", caps); 2052843e1988Sjohnlev if (strstr(caps, "x86_32p") != NULL) 2053843e1988Sjohnlev pae_support = 1; 2054843e1988Sjohnlev } 2055843e1988Sjohnlev 2056843e1988Sjohnlev #endif /* _BOOT_TARGET_amd64 */ 2057843e1988Sjohnlev { 2058843e1988Sjohnlev xen_platform_parameters_t p; 2059843e1988Sjohnlev 2060843e1988Sjohnlev if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0) 2061843e1988Sjohnlev dboot_panic("HYPERVISOR_xen_version(parms) failed"); 2062843e1988Sjohnlev DBG(p.virt_start); 2063843e1988Sjohnlev mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start); 2064843e1988Sjohnlev } 2065843e1988Sjohnlev 2066843e1988Sjohnlev /* 2067843e1988Sjohnlev * The hypervisor loads stuff starting at 1Gig 2068843e1988Sjohnlev */ 2069843e1988Sjohnlev mfn_base = ONE_GIG; 2070843e1988Sjohnlev DBG(mfn_base); 2071843e1988Sjohnlev 2072843e1988Sjohnlev /* 2073843e1988Sjohnlev * enable writable page table mode for the hypervisor 2074843e1988Sjohnlev */ 2075843e1988Sjohnlev if (HYPERVISOR_vm_assist(VMASST_CMD_enable, 2076843e1988Sjohnlev VMASST_TYPE_writable_pagetables) < 0) 2077843e1988Sjohnlev dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed"); 2078843e1988Sjohnlev 2079843e1988Sjohnlev /* 2080843e1988Sjohnlev * check for NX support 2081843e1988Sjohnlev */ 2082843e1988Sjohnlev if (pae_support) { 2083843e1988Sjohnlev uint32_t eax = 0x80000000; 2084843e1988Sjohnlev uint32_t edx = get_cpuid_edx(&eax); 2085843e1988Sjohnlev 2086843e1988Sjohnlev if (eax >= 0x80000001) { 2087843e1988Sjohnlev eax = 0x80000001; 2088843e1988Sjohnlev edx = get_cpuid_edx(&eax); 2089843e1988Sjohnlev if (edx & CPUID_AMD_EDX_NX) 2090843e1988Sjohnlev NX_support = 1; 2091843e1988Sjohnlev } 2092843e1988Sjohnlev } 2093843e1988Sjohnlev 2094843e1988Sjohnlev #if !defined(_BOOT_TARGET_amd64) 2095843e1988Sjohnlev 2096843e1988Sjohnlev /* 2097843e1988Sjohnlev * The 32-bit hypervisor uses segmentation to protect itself from 2098843e1988Sjohnlev * guests. This means when a guest attempts to install a flat 4GB 2099843e1988Sjohnlev * code or data descriptor the 32-bit hypervisor will protect itself 2100843e1988Sjohnlev * by silently shrinking the segment such that if the guest attempts 2101843e1988Sjohnlev * any access where the hypervisor lives a #gp fault is generated. 2102843e1988Sjohnlev * The problem is that some applications expect a full 4GB flat 2103843e1988Sjohnlev * segment for their current thread pointer and will use negative 2104843e1988Sjohnlev * offset segment wrap around to access data. TLS support in linux 2105843e1988Sjohnlev * brand is one example of this. 2106843e1988Sjohnlev * 2107843e1988Sjohnlev * The 32-bit hypervisor can catch the #gp fault in these cases 2108843e1988Sjohnlev * and emulate the access without passing the #gp fault to the guest 2109843e1988Sjohnlev * but only if VMASST_TYPE_4gb_segments is explicitly turned on. 2110843e1988Sjohnlev * Seems like this should have been the default. 2111843e1988Sjohnlev * Either way, we want the hypervisor -- and not Solaris -- to deal 2112843e1988Sjohnlev * to deal with emulating these accesses. 2113843e1988Sjohnlev */ 2114843e1988Sjohnlev if (HYPERVISOR_vm_assist(VMASST_CMD_enable, 2115843e1988Sjohnlev VMASST_TYPE_4gb_segments) < 0) 2116843e1988Sjohnlev dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed"); 2117843e1988Sjohnlev #endif /* !_BOOT_TARGET_amd64 */ 2118843e1988Sjohnlev 2119843e1988Sjohnlev #else /* __xpv */ 2120843e1988Sjohnlev 2121ae115bc7Smrj /* 2122ae115bc7Smrj * use cpuid to enable MMU features 2123ae115bc7Smrj */ 2124ae115bc7Smrj if (have_cpuid()) { 2125ae115bc7Smrj uint32_t eax, edx; 2126ae115bc7Smrj 2127ae115bc7Smrj eax = 1; 2128ae115bc7Smrj edx = get_cpuid_edx(&eax); 2129ae115bc7Smrj if (edx & CPUID_INTC_EDX_PSE) 2130ae115bc7Smrj largepage_support = 1; 2131ae115bc7Smrj if (edx & CPUID_INTC_EDX_PGE) 2132ae115bc7Smrj pge_support = 1; 2133ae115bc7Smrj if (edx & CPUID_INTC_EDX_PAE) 2134ae115bc7Smrj pae_support = 1; 2135ae115bc7Smrj 2136ae115bc7Smrj eax = 0x80000000; 2137ae115bc7Smrj edx = get_cpuid_edx(&eax); 2138ae115bc7Smrj if (eax >= 0x80000001) { 2139ae115bc7Smrj eax = 0x80000001; 2140ae115bc7Smrj edx = get_cpuid_edx(&eax); 2141ae115bc7Smrj if (edx & CPUID_AMD_EDX_LM) 2142ae115bc7Smrj amd64_support = 1; 2143ae115bc7Smrj if (edx & CPUID_AMD_EDX_NX) 2144ae115bc7Smrj NX_support = 1; 2145ae115bc7Smrj } 2146ae115bc7Smrj } else { 2147ae115bc7Smrj dboot_printf("cpuid not supported\n"); 2148ae115bc7Smrj } 2149843e1988Sjohnlev #endif /* __xpv */ 2150843e1988Sjohnlev 2151ae115bc7Smrj 2152ae115bc7Smrj #if defined(_BOOT_TARGET_amd64) 2153ae115bc7Smrj if (amd64_support == 0) 2154843e1988Sjohnlev dboot_panic("long mode not supported, rebooting"); 2155ae115bc7Smrj else if (pae_support == 0) 2156843e1988Sjohnlev dboot_panic("long mode, but no PAE; rebooting"); 2157843e1988Sjohnlev #else 2158843e1988Sjohnlev /* 2159843e1988Sjohnlev * Allow the command line to over-ride use of PAE for 32 bit. 2160843e1988Sjohnlev */ 2161843e1988Sjohnlev if (strstr(cmdline, "disablePAE=true") != NULL) { 2162843e1988Sjohnlev pae_support = 0; 2163843e1988Sjohnlev NX_support = 0; 2164843e1988Sjohnlev amd64_support = 0; 2165843e1988Sjohnlev } 2166ae115bc7Smrj #endif 2167ae115bc7Smrj 2168ae115bc7Smrj /* 2169843e1988Sjohnlev * initialize the simple memory allocator 2170ae115bc7Smrj */ 2171ae115bc7Smrj init_mem_alloc(); 2172ae115bc7Smrj 2173843e1988Sjohnlev #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64) 2174843e1988Sjohnlev /* 2175843e1988Sjohnlev * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory 2176843e1988Sjohnlev */ 2177843e1988Sjohnlev if (max_mem < FOUR_GIG && NX_support == 0) 2178843e1988Sjohnlev pae_support = 0; 2179843e1988Sjohnlev #endif 2180843e1988Sjohnlev 2181ae115bc7Smrj /* 2182ae115bc7Smrj * configure mmu information 2183ae115bc7Smrj */ 2184843e1988Sjohnlev if (pae_support) { 2185ae115bc7Smrj shift_amt = shift_amt_pae; 2186ae115bc7Smrj ptes_per_table = 512; 2187ae115bc7Smrj pte_size = 8; 2188ae115bc7Smrj lpagesize = TWO_MEG; 2189ae115bc7Smrj #if defined(_BOOT_TARGET_amd64) 2190ae115bc7Smrj top_level = 3; 2191ae115bc7Smrj #else 2192ae115bc7Smrj top_level = 2; 2193ae115bc7Smrj #endif 2194ae115bc7Smrj } else { 2195ae115bc7Smrj pae_support = 0; 2196ae115bc7Smrj NX_support = 0; 2197ae115bc7Smrj shift_amt = shift_amt_nopae; 2198ae115bc7Smrj ptes_per_table = 1024; 2199ae115bc7Smrj pte_size = 4; 2200ae115bc7Smrj lpagesize = FOUR_MEG; 2201ae115bc7Smrj top_level = 1; 2202ae115bc7Smrj } 2203ae115bc7Smrj 2204ae115bc7Smrj DBG(pge_support); 2205ae115bc7Smrj DBG(NX_support); 2206ae115bc7Smrj DBG(largepage_support); 2207ae115bc7Smrj DBG(amd64_support); 2208ae115bc7Smrj DBG(top_level); 2209ae115bc7Smrj DBG(pte_size); 2210ae115bc7Smrj DBG(ptes_per_table); 2211ae115bc7Smrj DBG(lpagesize); 2212ae115bc7Smrj 2213843e1988Sjohnlev #if defined(__xpv) 2214843e1988Sjohnlev ktext_phys = ONE_GIG; /* from UNIX Mapfile */ 2215843e1988Sjohnlev #else 2216ae115bc7Smrj ktext_phys = FOUR_MEG; /* from UNIX Mapfile */ 2217843e1988Sjohnlev #endif 2218ae115bc7Smrj 2219843e1988Sjohnlev #if !defined(__xpv) && defined(_BOOT_TARGET_amd64) 2220ae115bc7Smrj /* 2221ae115bc7Smrj * For grub, copy kernel bits from the ELF64 file to final place. 2222ae115bc7Smrj */ 2223ae115bc7Smrj DBG_MSG("\nAllocating nucleus pages.\n"); 2224ae115bc7Smrj ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG); 2225ae115bc7Smrj if (ktext_phys == 0) 2226843e1988Sjohnlev dboot_panic("failed to allocate aligned kernel memory"); 2227d2670fc4SToomas Soome DBG(load_addr); 2228d2670fc4SToomas Soome if (dboot_elfload64(load_addr) != 0) 2229843e1988Sjohnlev dboot_panic("failed to parse kernel ELF image, rebooting"); 2230ae115bc7Smrj #endif 2231843e1988Sjohnlev 2232ae115bc7Smrj DBG(ktext_phys); 2233ae115bc7Smrj 2234ae115bc7Smrj /* 2235ae115bc7Smrj * Allocate page tables. 2236ae115bc7Smrj */ 2237ae115bc7Smrj build_page_tables(); 2238ae115bc7Smrj 2239ae115bc7Smrj /* 2240ae115bc7Smrj * return to assembly code to switch to running kernel 2241ae115bc7Smrj */ 2242ae115bc7Smrj entry_addr_low = (uint32_t)target_kernel_text; 2243ae115bc7Smrj DBG(entry_addr_low); 2244ae115bc7Smrj bi->bi_use_largepage = largepage_support; 2245ae115bc7Smrj bi->bi_use_pae = pae_support; 2246ae115bc7Smrj bi->bi_use_pge = pge_support; 2247ae115bc7Smrj bi->bi_use_nx = NX_support; 2248843e1988Sjohnlev 2249843e1988Sjohnlev #if defined(__xpv) 2250843e1988Sjohnlev 2251843e1988Sjohnlev bi->bi_next_paddr = next_avail_addr - mfn_base; 2252843e1988Sjohnlev DBG(bi->bi_next_paddr); 2253843e1988Sjohnlev bi->bi_next_vaddr = (native_ptr_t)next_avail_addr; 2254843e1988Sjohnlev DBG(bi->bi_next_vaddr); 2255843e1988Sjohnlev 2256843e1988Sjohnlev /* 2257843e1988Sjohnlev * unmap unused pages in start area to make them available for DMA 2258843e1988Sjohnlev */ 2259843e1988Sjohnlev while (next_avail_addr < scratch_end) { 2260843e1988Sjohnlev (void) HYPERVISOR_update_va_mapping(next_avail_addr, 2261843e1988Sjohnlev 0, UVMF_INVLPG | UVMF_LOCAL); 2262843e1988Sjohnlev next_avail_addr += MMU_PAGESIZE; 2263843e1988Sjohnlev } 2264843e1988Sjohnlev 2265843e1988Sjohnlev bi->bi_xen_start_info = (uintptr_t)xen_info; 2266843e1988Sjohnlev DBG((uintptr_t)HYPERVISOR_shared_info); 2267843e1988Sjohnlev bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info; 2268843e1988Sjohnlev bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base; 2269843e1988Sjohnlev 2270843e1988Sjohnlev #else /* __xpv */ 2271843e1988Sjohnlev 2272ae115bc7Smrj bi->bi_next_paddr = next_avail_addr; 2273ae115bc7Smrj DBG(bi->bi_next_paddr); 2274ae115bc7Smrj bi->bi_next_vaddr = (uintptr_t)next_avail_addr; 2275ae115bc7Smrj DBG(bi->bi_next_vaddr); 2276d2670fc4SToomas Soome bi->bi_mb_version = multiboot_version; 2277d2670fc4SToomas Soome 2278d2670fc4SToomas Soome switch (multiboot_version) { 2279d2670fc4SToomas Soome case 1: 2280ae115bc7Smrj bi->bi_mb_info = (uintptr_t)mb_info; 2281d2670fc4SToomas Soome break; 2282d2670fc4SToomas Soome case 2: 2283d2670fc4SToomas Soome bi->bi_mb_info = (uintptr_t)mb2_info; 2284d2670fc4SToomas Soome break; 2285d2670fc4SToomas Soome default: 2286d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n", 2287d2670fc4SToomas Soome multiboot_version); 2288d2670fc4SToomas Soome break; 2289d2670fc4SToomas Soome } 2290ae115bc7Smrj bi->bi_top_page_table = (uintptr_t)top_page_table; 2291ae115bc7Smrj 2292843e1988Sjohnlev #endif /* __xpv */ 2293843e1988Sjohnlev 2294ae115bc7Smrj bi->bi_kseg_size = FOUR_MEG; 2295ae115bc7Smrj DBG(bi->bi_kseg_size); 2296ae115bc7Smrj 229715ba2a79SSherry Moore #ifndef __xpv 2298f34a7178SJoe Bonasera if (map_debug) 229919397407SSherry Moore dump_tables(); 230015ba2a79SSherry Moore #endif 230119397407SSherry Moore 2302ae115bc7Smrj DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n"); 2303ae115bc7Smrj } 2304