1ae115bc7Smrj /*
2ae115bc7Smrj * CDDL HEADER START
3ae115bc7Smrj *
4ae115bc7Smrj * The contents of this file are subject to the terms of the
5ae115bc7Smrj * Common Development and Distribution License (the "License").
6ae115bc7Smrj * You may not use this file except in compliance with the License.
7ae115bc7Smrj *
8ae115bc7Smrj * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ae115bc7Smrj * or http://www.opensolaris.org/os/licensing.
10ae115bc7Smrj * See the License for the specific language governing permissions
11ae115bc7Smrj * and limitations under the License.
12ae115bc7Smrj *
13ae115bc7Smrj * When distributing Covered Code, include this CDDL HEADER in each
14ae115bc7Smrj * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ae115bc7Smrj * If applicable, add the following below this CDDL HEADER, with the
16ae115bc7Smrj * fields enclosed by brackets "[]" replaced with your own identifying
17ae115bc7Smrj * information: Portions Copyright [yyyy] [name of copyright owner]
18ae115bc7Smrj *
19ae115bc7Smrj * CDDL HEADER END
20ae115bc7Smrj */
21ae115bc7Smrj
22ae115bc7Smrj /*
23f34a7178SJoe Bonasera * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24ae115bc7Smrj * Use is subject to license terms.
25e65d07eeSKeith Wesolowski *
260181461bSKeith M Wesolowski * Copyright 2013 Joyent, Inc. All rights reserved.
27ae115bc7Smrj */
28ae115bc7Smrj
29ae115bc7Smrj
30ae115bc7Smrj #include <sys/types.h>
31ae115bc7Smrj #include <sys/machparam.h>
32ae115bc7Smrj #include <sys/x86_archext.h>
33ae115bc7Smrj #include <sys/systm.h>
34ae115bc7Smrj #include <sys/mach_mmu.h>
35ae115bc7Smrj #include <sys/multiboot.h>
36d2670fc4SToomas Soome #include <sys/multiboot2.h>
37d2670fc4SToomas Soome #include <sys/multiboot2_impl.h>
38d2670fc4SToomas Soome #include <sys/sysmacros.h>
39e65d07eeSKeith Wesolowski #include <sys/sha1.h>
400181461bSKeith M Wesolowski #include <util/string.h>
410181461bSKeith M Wesolowski #include <util/strtolctype.h>
42ae115bc7Smrj
43843e1988Sjohnlev #if defined(__xpv)
44843e1988Sjohnlev
45843e1988Sjohnlev #include <sys/hypervisor.h>
46843e1988Sjohnlev uintptr_t xen_virt_start;
47843e1988Sjohnlev pfn_t *mfn_to_pfn_mapping;
48843e1988Sjohnlev
49843e1988Sjohnlev #else /* !__xpv */
50843e1988Sjohnlev
51ae115bc7Smrj extern multiboot_header_t mb_header;
52d2670fc4SToomas Soome extern uint32_t mb2_load_addr;
53ae115bc7Smrj extern int have_cpuid(void);
54843e1988Sjohnlev
55843e1988Sjohnlev #endif /* !__xpv */
56ae115bc7Smrj
57ae115bc7Smrj #include <sys/inttypes.h>
58ae115bc7Smrj #include <sys/bootinfo.h>
59ae115bc7Smrj #include <sys/mach_mmu.h>
60ae115bc7Smrj #include <sys/boot_console.h>
61ae115bc7Smrj
62843e1988Sjohnlev #include "dboot_asm.h"
63ae115bc7Smrj #include "dboot_printf.h"
64ae115bc7Smrj #include "dboot_xboot.h"
65ae115bc7Smrj #include "dboot_elfload.h"
66ae115bc7Smrj
67e65d07eeSKeith Wesolowski #define SHA1_ASCII_LENGTH (SHA1_DIGEST_LENGTH * 2)
68e65d07eeSKeith Wesolowski
69ae115bc7Smrj /*
70ae115bc7Smrj * This file contains code that runs to transition us from either a multiboot
71843e1988Sjohnlev * compliant loader (32 bit non-paging) or a XPV domain loader to
72843e1988Sjohnlev * regular kernel execution. Its task is to setup the kernel memory image
73843e1988Sjohnlev * and page tables.
74ae115bc7Smrj *
75ae115bc7Smrj * The code executes as:
76ae115bc7Smrj * - 32 bits under GRUB (for 32 or 64 bit Solaris)
77843e1988Sjohnlev * - a 32 bit program for the 32-bit PV hypervisor
78843e1988Sjohnlev * - a 64 bit program for the 64-bit PV hypervisor (at least for now)
79ae115bc7Smrj *
80843e1988Sjohnlev * Under the PV hypervisor, we must create mappings for any memory beyond the
81843e1988Sjohnlev * initial start of day allocation (such as the kernel itself).
82ae115bc7Smrj *
83843e1988Sjohnlev * When on the metal, the mapping between maddr_t and paddr_t is 1:1.
84ae115bc7Smrj * Since we are running in real mode, so all such memory is accessible.
85ae115bc7Smrj */
86ae115bc7Smrj
87ae115bc7Smrj /*
88ae115bc7Smrj * Standard bits used in PTE (page level) and PTP (internal levels)
89ae115bc7Smrj */
90843e1988Sjohnlev x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER;
91843e1988Sjohnlev x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST;
92ae115bc7Smrj
93ae115bc7Smrj /*
94ae115bc7Smrj * This is the target addresses (physical) where the kernel text and data
95843e1988Sjohnlev * nucleus pages will be unpacked. On the hypervisor this is actually a
96843e1988Sjohnlev * virtual address.
97ae115bc7Smrj */
98ae115bc7Smrj paddr_t ktext_phys;
99ae115bc7Smrj uint32_t ksize = 2 * FOUR_MEG; /* kernel nucleus is 8Meg */
100ae115bc7Smrj
101ae115bc7Smrj static uint64_t target_kernel_text; /* value to use for KERNEL_TEXT */
102ae115bc7Smrj
103ae115bc7Smrj /*
104ae115bc7Smrj * The stack is setup in assembler before entering startup_kernel()
105ae115bc7Smrj */
106ae115bc7Smrj char stack_space[STACK_SIZE];
107ae115bc7Smrj
108ae115bc7Smrj /*
109ae115bc7Smrj * Used to track physical memory allocation
110ae115bc7Smrj */
111ae115bc7Smrj static paddr_t next_avail_addr = 0;
112ae115bc7Smrj
113843e1988Sjohnlev #if defined(__xpv)
114843e1988Sjohnlev /*
115843e1988Sjohnlev * Additional information needed for hypervisor memory allocation.
116843e1988Sjohnlev * Only memory up to scratch_end is mapped by page tables.
117843e1988Sjohnlev * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so
118843e1988Sjohnlev * to derive a pfn from a pointer, you subtract mfn_base.
119843e1988Sjohnlev */
120843e1988Sjohnlev
121843e1988Sjohnlev static paddr_t scratch_end = 0; /* we can't write all of mem here */
122843e1988Sjohnlev static paddr_t mfn_base; /* addr corresponding to mfn_list[0] */
123843e1988Sjohnlev start_info_t *xen_info;
124843e1988Sjohnlev
125843e1988Sjohnlev #else /* __xpv */
126843e1988Sjohnlev
127843e1988Sjohnlev /*
128843e1988Sjohnlev * If on the metal, then we have a multiboot loader.
129843e1988Sjohnlev */
130d2670fc4SToomas Soome uint32_t mb_magic; /* magic from boot loader */
131d2670fc4SToomas Soome uint32_t mb_addr; /* multiboot info package from loader */
132d2670fc4SToomas Soome int multiboot_version;
133ae115bc7Smrj multiboot_info_t *mb_info;
134d2670fc4SToomas Soome multiboot2_info_header_t *mb2_info;
135d2670fc4SToomas Soome multiboot_tag_mmap_t *mb2_mmap_tagp;
136d2670fc4SToomas Soome int num_entries; /* mmap entry count */
137d2670fc4SToomas Soome boolean_t num_entries_set; /* is mmap entry count set */
138d2670fc4SToomas Soome uintptr_t load_addr;
139ae115bc7Smrj
140843e1988Sjohnlev #endif /* __xpv */
141843e1988Sjohnlev
142ae115bc7Smrj /*
143ae115bc7Smrj * This contains information passed to the kernel
144ae115bc7Smrj */
145ae115bc7Smrj struct xboot_info boot_info[2]; /* extra space to fix alignement for amd64 */
146ae115bc7Smrj struct xboot_info *bi;
147ae115bc7Smrj
148ae115bc7Smrj /*
149ae115bc7Smrj * Page table and memory stuff.
150ae115bc7Smrj */
151843e1988Sjohnlev static paddr_t max_mem; /* maximum memory address */
152ae115bc7Smrj
153ae115bc7Smrj /*
154ae115bc7Smrj * Information about processor MMU
155ae115bc7Smrj */
156ae115bc7Smrj int amd64_support = 0;
157ae115bc7Smrj int largepage_support = 0;
158ae115bc7Smrj int pae_support = 0;
159ae115bc7Smrj int pge_support = 0;
160ae115bc7Smrj int NX_support = 0;
161ae115bc7Smrj
162ae115bc7Smrj /*
163ae115bc7Smrj * Low 32 bits of kernel entry address passed back to assembler.
164ae115bc7Smrj * When running a 64 bit kernel, the high 32 bits are 0xffffffff.
165ae115bc7Smrj */
166ae115bc7Smrj uint32_t entry_addr_low;
167ae115bc7Smrj
168ae115bc7Smrj /*
169ae115bc7Smrj * Memlists for the kernel. We shouldn't need a lot of these.
170ae115bc7Smrj */
171c9464e8bSjosephb #define MAX_MEMLIST (50)
172ae115bc7Smrj struct boot_memlist memlists[MAX_MEMLIST];
173ae115bc7Smrj uint_t memlists_used = 0;
174c9464e8bSjosephb struct boot_memlist pcimemlists[MAX_MEMLIST];
175c9464e8bSjosephb uint_t pcimemlists_used = 0;
1761de082f7SVikram Hegde struct boot_memlist rsvdmemlists[MAX_MEMLIST];
1771de082f7SVikram Hegde uint_t rsvdmemlists_used = 0;
178ae115bc7Smrj
1790181461bSKeith M Wesolowski /*
1800181461bSKeith M Wesolowski * This should match what's in the bootloader. It's arbitrary, but GRUB
1810181461bSKeith M Wesolowski * in particular has limitations on how much space it can use before it
1820181461bSKeith M Wesolowski * stops working properly. This should be enough.
1830181461bSKeith M Wesolowski */
1840181461bSKeith M Wesolowski struct boot_modules modules[MAX_BOOT_MODULES];
185ae115bc7Smrj uint_t modules_used = 0;
186ae115bc7Smrj
187d2670fc4SToomas Soome #ifdef __xpv
188d2670fc4SToomas Soome /*
189d2670fc4SToomas Soome * Xen strips the size field out of the mb_memory_map_t, see struct e820entry
190d2670fc4SToomas Soome * definition in Xen source.
191d2670fc4SToomas Soome */
192d2670fc4SToomas Soome typedef struct {
193d2670fc4SToomas Soome uint32_t base_addr_low;
194d2670fc4SToomas Soome uint32_t base_addr_high;
195d2670fc4SToomas Soome uint32_t length_low;
196d2670fc4SToomas Soome uint32_t length_high;
197d2670fc4SToomas Soome uint32_t type;
198d2670fc4SToomas Soome } mmap_t;
199d2670fc4SToomas Soome
200d2670fc4SToomas Soome /*
201d2670fc4SToomas Soome * There is 512KB of scratch area after the boot stack page.
202d2670fc4SToomas Soome * We'll use that for everything except the kernel nucleus pages which are too
203d2670fc4SToomas Soome * big to fit there and are allocated last anyway.
204d2670fc4SToomas Soome */
205d2670fc4SToomas Soome #define MAXMAPS 100
206d2670fc4SToomas Soome static mmap_t map_buffer[MAXMAPS];
207d2670fc4SToomas Soome #else
208d2670fc4SToomas Soome typedef mb_memory_map_t mmap_t;
209d2670fc4SToomas Soome #endif
210d2670fc4SToomas Soome
211ae115bc7Smrj /*
212ae115bc7Smrj * Debugging macros
213ae115bc7Smrj */
214ae115bc7Smrj uint_t prom_debug = 0;
215ae115bc7Smrj uint_t map_debug = 0;
216ae115bc7Smrj
2170181461bSKeith M Wesolowski static char noname[2] = "-";
2180181461bSKeith M Wesolowski
219ae115bc7Smrj /*
220843e1988Sjohnlev * Either hypervisor-specific or grub-specific code builds the initial
221843e1988Sjohnlev * memlists. This code does the sort/merge/link for final use.
222ae115bc7Smrj */
223ae115bc7Smrj static void
sort_physinstall(void)224ae115bc7Smrj sort_physinstall(void)
225ae115bc7Smrj {
226ae115bc7Smrj int i;
227843e1988Sjohnlev #if !defined(__xpv)
228ae115bc7Smrj int j;
229ae115bc7Smrj struct boot_memlist tmp;
230ae115bc7Smrj
231ae115bc7Smrj /*
232ae115bc7Smrj * Now sort the memlists, in case they weren't in order.
233ae115bc7Smrj * Yeah, this is a bubble sort; small, simple and easy to get right.
234ae115bc7Smrj */
235ae115bc7Smrj DBG_MSG("Sorting phys-installed list\n");
236ae115bc7Smrj for (j = memlists_used - 1; j > 0; --j) {
237ae115bc7Smrj for (i = 0; i < j; ++i) {
238ae115bc7Smrj if (memlists[i].addr < memlists[i + 1].addr)
239ae115bc7Smrj continue;
240ae115bc7Smrj tmp = memlists[i];
241ae115bc7Smrj memlists[i] = memlists[i + 1];
242ae115bc7Smrj memlists[i + 1] = tmp;
243ae115bc7Smrj }
244ae115bc7Smrj }
245ae115bc7Smrj
246ae115bc7Smrj /*
247ae115bc7Smrj * Merge any memlists that don't have holes between them.
248ae115bc7Smrj */
249ae115bc7Smrj for (i = 0; i <= memlists_used - 1; ++i) {
250ae115bc7Smrj if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr)
251ae115bc7Smrj continue;
252ae115bc7Smrj
253ae115bc7Smrj if (prom_debug)
254ae115bc7Smrj dboot_printf(
255ae115bc7Smrj "merging mem segs %" PRIx64 "...%" PRIx64
256ae115bc7Smrj " w/ %" PRIx64 "...%" PRIx64 "\n",
257ae115bc7Smrj memlists[i].addr,
258ae115bc7Smrj memlists[i].addr + memlists[i].size,
259ae115bc7Smrj memlists[i + 1].addr,
260ae115bc7Smrj memlists[i + 1].addr + memlists[i + 1].size);
261ae115bc7Smrj
262ae115bc7Smrj memlists[i].size += memlists[i + 1].size;
263ae115bc7Smrj for (j = i + 1; j < memlists_used - 1; ++j)
264ae115bc7Smrj memlists[j] = memlists[j + 1];
265ae115bc7Smrj --memlists_used;
266ae115bc7Smrj DBG(memlists_used);
267ae115bc7Smrj --i; /* after merging we need to reexamine, so do this */
268ae115bc7Smrj }
269843e1988Sjohnlev #endif /* __xpv */
270ae115bc7Smrj
271ae115bc7Smrj if (prom_debug) {
272ae115bc7Smrj dboot_printf("\nFinal memlists:\n");
273ae115bc7Smrj for (i = 0; i < memlists_used; ++i) {
274ae115bc7Smrj dboot_printf("\t%d: addr=%" PRIx64 " size=%"
275ae115bc7Smrj PRIx64 "\n", i, memlists[i].addr, memlists[i].size);
276ae115bc7Smrj }
277ae115bc7Smrj }
278ae115bc7Smrj
279ae115bc7Smrj /*
280ae115bc7Smrj * link together the memlists with native size pointers
281ae115bc7Smrj */
282ae115bc7Smrj memlists[0].next = 0;
283ae115bc7Smrj memlists[0].prev = 0;
284ae115bc7Smrj for (i = 1; i < memlists_used; ++i) {
285ae115bc7Smrj memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1);
286ae115bc7Smrj memlists[i].next = 0;
287ae115bc7Smrj memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i);
288ae115bc7Smrj }
289c909a41bSRichard Lowe bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists;
290ae115bc7Smrj DBG(bi->bi_phys_install);
291ae115bc7Smrj }
292ae115bc7Smrj
2931de082f7SVikram Hegde /*
2941de082f7SVikram Hegde * build bios reserved memlists
2951de082f7SVikram Hegde */
2961de082f7SVikram Hegde static void
build_rsvdmemlists(void)2971de082f7SVikram Hegde build_rsvdmemlists(void)
2981de082f7SVikram Hegde {
2991de082f7SVikram Hegde int i;
3001de082f7SVikram Hegde
3011de082f7SVikram Hegde rsvdmemlists[0].next = 0;
3021de082f7SVikram Hegde rsvdmemlists[0].prev = 0;
3031de082f7SVikram Hegde for (i = 1; i < rsvdmemlists_used; ++i) {
3041de082f7SVikram Hegde rsvdmemlists[i].prev =
3051de082f7SVikram Hegde (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1);
3061de082f7SVikram Hegde rsvdmemlists[i].next = 0;
3071de082f7SVikram Hegde rsvdmemlists[i - 1].next =
3081de082f7SVikram Hegde (native_ptr_t)(uintptr_t)(rsvdmemlists + i);
3091de082f7SVikram Hegde }
310c909a41bSRichard Lowe bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists;
3111de082f7SVikram Hegde DBG(bi->bi_rsvdmem);
3121de082f7SVikram Hegde }
3131de082f7SVikram Hegde
314843e1988Sjohnlev #if defined(__xpv)
315843e1988Sjohnlev
316843e1988Sjohnlev /*
317843e1988Sjohnlev * halt on the hypervisor after a delay to drain console output
318843e1988Sjohnlev */
319843e1988Sjohnlev void
dboot_halt(void)320843e1988Sjohnlev dboot_halt(void)
321843e1988Sjohnlev {
322843e1988Sjohnlev uint_t i = 10000;
323843e1988Sjohnlev
324843e1988Sjohnlev while (--i)
325c1374a13SSurya Prakki (void) HYPERVISOR_yield();
326c1374a13SSurya Prakki (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff);
327843e1988Sjohnlev }
328843e1988Sjohnlev
329843e1988Sjohnlev /*
330843e1988Sjohnlev * From a machine address, find the corresponding pseudo-physical address.
331843e1988Sjohnlev * Pseudo-physical address are contiguous and run from mfn_base in each VM.
332843e1988Sjohnlev * Machine addresses are the real underlying hardware addresses.
333843e1988Sjohnlev * These are needed for page table entries. Note that this routine is
334843e1988Sjohnlev * poorly protected. A bad value of "ma" will cause a page fault.
335843e1988Sjohnlev */
336843e1988Sjohnlev paddr_t
ma_to_pa(maddr_t ma)337843e1988Sjohnlev ma_to_pa(maddr_t ma)
338843e1988Sjohnlev {
339843e1988Sjohnlev ulong_t pgoff = ma & MMU_PAGEOFFSET;
340843e1988Sjohnlev ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)];
341843e1988Sjohnlev paddr_t pa;
342843e1988Sjohnlev
343843e1988Sjohnlev if (pfn >= xen_info->nr_pages)
344843e1988Sjohnlev return (-(paddr_t)1);
345843e1988Sjohnlev pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff;
346843e1988Sjohnlev #ifdef DEBUG
347843e1988Sjohnlev if (ma != pa_to_ma(pa))
348843e1988Sjohnlev dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", "
349843e1988Sjohnlev "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa));
350843e1988Sjohnlev #endif
351843e1988Sjohnlev return (pa);
352843e1988Sjohnlev }
353843e1988Sjohnlev
354843e1988Sjohnlev /*
355843e1988Sjohnlev * From a pseudo-physical address, find the corresponding machine address.
356843e1988Sjohnlev */
357843e1988Sjohnlev maddr_t
pa_to_ma(paddr_t pa)358843e1988Sjohnlev pa_to_ma(paddr_t pa)
359843e1988Sjohnlev {
360843e1988Sjohnlev pfn_t pfn;
361843e1988Sjohnlev ulong_t mfn;
362843e1988Sjohnlev
363843e1988Sjohnlev pfn = mmu_btop(pa - mfn_base);
364843e1988Sjohnlev if (pa < mfn_base || pfn >= xen_info->nr_pages)
365843e1988Sjohnlev dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa);
366843e1988Sjohnlev mfn = ((ulong_t *)xen_info->mfn_list)[pfn];
367843e1988Sjohnlev #ifdef DEBUG
368843e1988Sjohnlev if (mfn_to_pfn_mapping[mfn] != pfn)
369843e1988Sjohnlev dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n",
370843e1988Sjohnlev pfn, mfn, mfn_to_pfn_mapping[mfn]);
371843e1988Sjohnlev #endif
372843e1988Sjohnlev return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET));
373843e1988Sjohnlev }
374843e1988Sjohnlev
375843e1988Sjohnlev #endif /* __xpv */
376843e1988Sjohnlev
377ae115bc7Smrj x86pte_t
get_pteval(paddr_t table,uint_t index)378ae115bc7Smrj get_pteval(paddr_t table, uint_t index)
379ae115bc7Smrj {
380ae115bc7Smrj if (pae_support)
381ae115bc7Smrj return (((x86pte_t *)(uintptr_t)table)[index]);
382ae115bc7Smrj return (((x86pte32_t *)(uintptr_t)table)[index]);
383ae115bc7Smrj }
384ae115bc7Smrj
385ae115bc7Smrj /*ARGSUSED*/
386ae115bc7Smrj void
set_pteval(paddr_t table,uint_t index,uint_t level,x86pte_t pteval)387ae115bc7Smrj set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
388ae115bc7Smrj {
389843e1988Sjohnlev #ifdef __xpv
390843e1988Sjohnlev mmu_update_t t;
391843e1988Sjohnlev maddr_t mtable = pa_to_ma(table);
392843e1988Sjohnlev int retcnt;
393843e1988Sjohnlev
394843e1988Sjohnlev t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
395843e1988Sjohnlev t.val = pteval;
396843e1988Sjohnlev if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
397843e1988Sjohnlev dboot_panic("HYPERVISOR_mmu_update() failed");
398843e1988Sjohnlev #else /* __xpv */
399ae115bc7Smrj uintptr_t tab_addr = (uintptr_t)table;
400ae115bc7Smrj
401ae115bc7Smrj if (pae_support)
402ae115bc7Smrj ((x86pte_t *)tab_addr)[index] = pteval;
403ae115bc7Smrj else
404ae115bc7Smrj ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval;
405ae115bc7Smrj if (level == top_level && level == 2)
406ae115bc7Smrj reload_cr3();
407843e1988Sjohnlev #endif /* __xpv */
408ae115bc7Smrj }
409ae115bc7Smrj
410ae115bc7Smrj paddr_t
make_ptable(x86pte_t * pteval,uint_t level)411ae115bc7Smrj make_ptable(x86pte_t *pteval, uint_t level)
412ae115bc7Smrj {
413ae115bc7Smrj paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
414ae115bc7Smrj
415ae115bc7Smrj if (level == top_level && level == 2)
416ae115bc7Smrj *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID;
417ae115bc7Smrj else
418ae115bc7Smrj *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits;
419ae115bc7Smrj
420843e1988Sjohnlev #ifdef __xpv
421843e1988Sjohnlev /* Remove write permission to the new page table. */
422843e1988Sjohnlev if (HYPERVISOR_update_va_mapping(new_table,
423843e1988Sjohnlev *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL))
424843e1988Sjohnlev dboot_panic("HYP_update_va_mapping error");
425843e1988Sjohnlev #endif
426843e1988Sjohnlev
427ae115bc7Smrj if (map_debug)
428ae115bc7Smrj dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%"
429ae115bc7Smrj PRIx64 "\n", level, (ulong_t)new_table, *pteval);
430ae115bc7Smrj return (new_table);
431ae115bc7Smrj }
432ae115bc7Smrj
433ae115bc7Smrj x86pte_t *
map_pte(paddr_t table,uint_t index)434ae115bc7Smrj map_pte(paddr_t table, uint_t index)
435ae115bc7Smrj {
436ae115bc7Smrj return ((x86pte_t *)(uintptr_t)(table + index * pte_size));
437ae115bc7Smrj }
438ae115bc7Smrj
43919397407SSherry Moore /*
44019397407SSherry Moore * dump out the contents of page tables...
44119397407SSherry Moore */
44219397407SSherry Moore static void
dump_tables(void)44319397407SSherry Moore dump_tables(void)
44419397407SSherry Moore {
44519397407SSherry Moore uint_t save_index[4]; /* for recursion */
44619397407SSherry Moore char *save_table[4]; /* for recursion */
44719397407SSherry Moore uint_t l;
44819397407SSherry Moore uint64_t va;
44919397407SSherry Moore uint64_t pgsize;
45019397407SSherry Moore int index;
45119397407SSherry Moore int i;
45219397407SSherry Moore x86pte_t pteval;
45319397407SSherry Moore char *table;
45419397407SSherry Moore static char *tablist = "\t\t\t";
45519397407SSherry Moore char *tabs = tablist + 3 - top_level;
45619397407SSherry Moore uint_t pa, pa1;
457843e1988Sjohnlev #if !defined(__xpv)
458843e1988Sjohnlev #define maddr_t paddr_t
459843e1988Sjohnlev #endif /* !__xpv */
460843e1988Sjohnlev
46119397407SSherry Moore dboot_printf("Finished pagetables:\n");
46219397407SSherry Moore table = (char *)(uintptr_t)top_page_table;
46319397407SSherry Moore l = top_level;
46419397407SSherry Moore va = 0;
46519397407SSherry Moore for (index = 0; index < ptes_per_table; ++index) {
46619397407SSherry Moore pgsize = 1ull << shift_amt[l];
46719397407SSherry Moore if (pae_support)
46819397407SSherry Moore pteval = ((x86pte_t *)table)[index];
46919397407SSherry Moore else
47019397407SSherry Moore pteval = ((x86pte32_t *)table)[index];
47119397407SSherry Moore if (pteval == 0)
47219397407SSherry Moore goto next_entry;
47319397407SSherry Moore
47419397407SSherry Moore dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64,
475c1374a13SSurya Prakki tabs + l, (void *)table, index, (uint64_t)pteval, va);
47619397407SSherry Moore pa = ma_to_pa(pteval & MMU_PAGEMASK);
47719397407SSherry Moore dboot_printf(" physaddr=%x\n", pa);
47819397407SSherry Moore
47919397407SSherry Moore /*
48019397407SSherry Moore * Don't try to walk hypervisor private pagetables
48119397407SSherry Moore */
48219397407SSherry Moore if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) {
48319397407SSherry Moore save_table[l] = table;
48419397407SSherry Moore save_index[l] = index;
48519397407SSherry Moore --l;
48619397407SSherry Moore index = -1;
48719397407SSherry Moore table = (char *)(uintptr_t)
48819397407SSherry Moore ma_to_pa(pteval & MMU_PAGEMASK);
48919397407SSherry Moore goto recursion;
49019397407SSherry Moore }
49119397407SSherry Moore
49219397407SSherry Moore /*
49319397407SSherry Moore * shorten dump for consecutive mappings
49419397407SSherry Moore */
49519397407SSherry Moore for (i = 1; index + i < ptes_per_table; ++i) {
49619397407SSherry Moore if (pae_support)
49719397407SSherry Moore pteval = ((x86pte_t *)table)[index + i];
49819397407SSherry Moore else
49919397407SSherry Moore pteval = ((x86pte32_t *)table)[index + i];
50019397407SSherry Moore if (pteval == 0)
50119397407SSherry Moore break;
50219397407SSherry Moore pa1 = ma_to_pa(pteval & MMU_PAGEMASK);
50319397407SSherry Moore if (pa1 != pa + i * pgsize)
50419397407SSherry Moore break;
50519397407SSherry Moore }
50619397407SSherry Moore if (i > 2) {
50719397407SSherry Moore dboot_printf("%s...\n", tabs + l);
50819397407SSherry Moore va += pgsize * (i - 2);
50919397407SSherry Moore index += i - 2;
51019397407SSherry Moore }
51119397407SSherry Moore next_entry:
51219397407SSherry Moore va += pgsize;
51319397407SSherry Moore if (l == 3 && index == 256) /* VA hole */
51419397407SSherry Moore va = 0xffff800000000000ull;
51519397407SSherry Moore recursion:
51619397407SSherry Moore ;
51719397407SSherry Moore }
51819397407SSherry Moore if (l < top_level) {
51919397407SSherry Moore ++l;
52019397407SSherry Moore index = save_index[l];
52119397407SSherry Moore table = save_table[l];
52219397407SSherry Moore goto recursion;
52319397407SSherry Moore }
52419397407SSherry Moore }
52519397407SSherry Moore
526ae115bc7Smrj /*
527843e1988Sjohnlev * Add a mapping for the machine page at the given virtual address.
528ae115bc7Smrj */
529ae115bc7Smrj static void
map_ma_at_va(maddr_t ma,native_ptr_t va,uint_t level)530843e1988Sjohnlev map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level)
531ae115bc7Smrj {
532ae115bc7Smrj x86pte_t *ptep;
533ae115bc7Smrj x86pte_t pteval;
534ae115bc7Smrj
535843e1988Sjohnlev pteval = ma | pte_bits;
536ae115bc7Smrj if (level > 0)
537ae115bc7Smrj pteval |= PT_PAGESIZE;
538ae115bc7Smrj if (va >= target_kernel_text && pge_support)
539ae115bc7Smrj pteval |= PT_GLOBAL;
540ae115bc7Smrj
541843e1988Sjohnlev if (map_debug && ma != va)
542843e1988Sjohnlev dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64
543ae115bc7Smrj " pte=0x%" PRIx64 " l=%d\n",
544843e1988Sjohnlev (uint64_t)ma, (uint64_t)va, pteval, level);
545843e1988Sjohnlev
546843e1988Sjohnlev #if defined(__xpv)
547843e1988Sjohnlev /*
548843e1988Sjohnlev * see if we can avoid find_pte() on the hypervisor
549843e1988Sjohnlev */
550843e1988Sjohnlev if (HYPERVISOR_update_va_mapping(va, pteval,
551843e1988Sjohnlev UVMF_INVLPG | UVMF_LOCAL) == 0)
552843e1988Sjohnlev return;
553843e1988Sjohnlev #endif
554ae115bc7Smrj
555ae115bc7Smrj /*
556ae115bc7Smrj * Find the pte that will map this address. This creates any
557ae115bc7Smrj * missing intermediate level page tables
558ae115bc7Smrj */
559ae115bc7Smrj ptep = find_pte(va, NULL, level, 0);
560ae115bc7Smrj
561ae115bc7Smrj /*
562843e1988Sjohnlev * When paravirtualized, we must use hypervisor calls to modify the
563843e1988Sjohnlev * PTE, since paging is active. On real hardware we just write to
564843e1988Sjohnlev * the pagetables which aren't in use yet.
565ae115bc7Smrj */
566843e1988Sjohnlev #if defined(__xpv)
567843e1988Sjohnlev ptep = ptep; /* shut lint up */
568843e1988Sjohnlev if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
569843e1988Sjohnlev dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64
570843e1988Sjohnlev " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "",
571843e1988Sjohnlev (uint64_t)va, level, (uint64_t)ma, pteval);
572843e1988Sjohnlev #else
573ae115bc7Smrj if (va < 1024 * 1024)
574ae115bc7Smrj pteval |= PT_NOCACHE; /* for video RAM */
575ae115bc7Smrj if (pae_support)
576ae115bc7Smrj *ptep = pteval;
577ae115bc7Smrj else
578ae115bc7Smrj *((x86pte32_t *)ptep) = (x86pte32_t)pteval;
579843e1988Sjohnlev #endif
580ae115bc7Smrj }
581ae115bc7Smrj
582ae115bc7Smrj /*
583843e1988Sjohnlev * Add a mapping for the physical page at the given virtual address.
584ae115bc7Smrj */
585ae115bc7Smrj static void
map_pa_at_va(paddr_t pa,native_ptr_t va,uint_t level)586843e1988Sjohnlev map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level)
587ae115bc7Smrj {
588843e1988Sjohnlev map_ma_at_va(pa_to_ma(pa), va, level);
589ae115bc7Smrj }
590ae115bc7Smrj
591ae115bc7Smrj /*
592c9464e8bSjosephb * This is called to remove start..end from the
593c9464e8bSjosephb * possible range of PCI addresses.
594c9464e8bSjosephb */
595c9464e8bSjosephb const uint64_t pci_lo_limit = 0x00100000ul;
596c9464e8bSjosephb const uint64_t pci_hi_limit = 0xfff00000ul;
597c9464e8bSjosephb static void
exclude_from_pci(uint64_t start,uint64_t end)598c9464e8bSjosephb exclude_from_pci(uint64_t start, uint64_t end)
599c9464e8bSjosephb {
600c9464e8bSjosephb int i;
601c9464e8bSjosephb int j;
602c9464e8bSjosephb struct boot_memlist *ml;
603c9464e8bSjosephb
604c9464e8bSjosephb for (i = 0; i < pcimemlists_used; ++i) {
605c9464e8bSjosephb ml = &pcimemlists[i];
606c9464e8bSjosephb
607c9464e8bSjosephb /* delete the entire range? */
608c9464e8bSjosephb if (start <= ml->addr && ml->addr + ml->size <= end) {
609c9464e8bSjosephb --pcimemlists_used;
610c9464e8bSjosephb for (j = i; j < pcimemlists_used; ++j)
611c9464e8bSjosephb pcimemlists[j] = pcimemlists[j + 1];
612c9464e8bSjosephb --i; /* to revisit the new one at this index */
613c9464e8bSjosephb }
614c9464e8bSjosephb
615c9464e8bSjosephb /* split a range? */
616c9464e8bSjosephb else if (ml->addr < start && end < ml->addr + ml->size) {
617c9464e8bSjosephb
618c9464e8bSjosephb ++pcimemlists_used;
619c9464e8bSjosephb if (pcimemlists_used > MAX_MEMLIST)
620c9464e8bSjosephb dboot_panic("too many pcimemlists");
621c9464e8bSjosephb
622c9464e8bSjosephb for (j = pcimemlists_used - 1; j > i; --j)
623c9464e8bSjosephb pcimemlists[j] = pcimemlists[j - 1];
624c9464e8bSjosephb ml->size = start - ml->addr;
625c9464e8bSjosephb
626c9464e8bSjosephb ++ml;
627c9464e8bSjosephb ml->size = (ml->addr + ml->size) - end;
628c9464e8bSjosephb ml->addr = end;
629c9464e8bSjosephb ++i; /* skip on to next one */
630c9464e8bSjosephb }
631c9464e8bSjosephb
632c9464e8bSjosephb /* cut memory off the start? */
633c9464e8bSjosephb else if (ml->addr < end && end < ml->addr + ml->size) {
634c9464e8bSjosephb ml->size -= end - ml->addr;
635c9464e8bSjosephb ml->addr = end;
636c9464e8bSjosephb }
637c9464e8bSjosephb
638c9464e8bSjosephb /* cut memory off the end? */
639c9464e8bSjosephb else if (ml->addr <= start && start < ml->addr + ml->size) {
640c9464e8bSjosephb ml->size = start - ml->addr;
641c9464e8bSjosephb }
642c9464e8bSjosephb }
643c9464e8bSjosephb }
644c9464e8bSjosephb
645c9464e8bSjosephb /*
646d2670fc4SToomas Soome * During memory allocation, find the highest address not used yet.
647843e1988Sjohnlev */
648d2670fc4SToomas Soome static void
check_higher(paddr_t a)649d2670fc4SToomas Soome check_higher(paddr_t a)
650d2670fc4SToomas Soome {
651d2670fc4SToomas Soome if (a < next_avail_addr)
652d2670fc4SToomas Soome return;
653d2670fc4SToomas Soome next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE);
654d2670fc4SToomas Soome DBG(next_avail_addr);
655d2670fc4SToomas Soome }
656d2670fc4SToomas Soome
657d2670fc4SToomas Soome static int
dboot_loader_mmap_entries(void)658d2670fc4SToomas Soome dboot_loader_mmap_entries(void)
659d2670fc4SToomas Soome {
660d2670fc4SToomas Soome #if !defined(__xpv)
661d2670fc4SToomas Soome if (num_entries_set == B_TRUE)
662d2670fc4SToomas Soome return (num_entries);
663d2670fc4SToomas Soome
664d2670fc4SToomas Soome switch (multiboot_version) {
665d2670fc4SToomas Soome case 1:
666d2670fc4SToomas Soome DBG(mb_info->flags);
667d2670fc4SToomas Soome if (mb_info->flags & 0x40) {
668d2670fc4SToomas Soome mb_memory_map_t *mmap;
669d2670fc4SToomas Soome
670d2670fc4SToomas Soome DBG(mb_info->mmap_addr);
671d2670fc4SToomas Soome DBG(mb_info->mmap_length);
672d2670fc4SToomas Soome check_higher(mb_info->mmap_addr + mb_info->mmap_length);
673d2670fc4SToomas Soome
674d2670fc4SToomas Soome for (mmap = (mb_memory_map_t *)mb_info->mmap_addr;
675d2670fc4SToomas Soome (uint32_t)mmap < mb_info->mmap_addr +
676d2670fc4SToomas Soome mb_info->mmap_length;
677d2670fc4SToomas Soome mmap = (mb_memory_map_t *)((uint32_t)mmap +
678d2670fc4SToomas Soome mmap->size + sizeof (mmap->size)))
679d2670fc4SToomas Soome ++num_entries;
680d2670fc4SToomas Soome
681d2670fc4SToomas Soome num_entries_set = B_TRUE;
682d2670fc4SToomas Soome }
683d2670fc4SToomas Soome break;
684d2670fc4SToomas Soome case 2:
685d2670fc4SToomas Soome num_entries_set = B_TRUE;
686d2670fc4SToomas Soome num_entries = dboot_multiboot2_mmap_nentries(mb2_info,
687d2670fc4SToomas Soome mb2_mmap_tagp);
688d2670fc4SToomas Soome break;
689d2670fc4SToomas Soome default:
690d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
691d2670fc4SToomas Soome multiboot_version);
692d2670fc4SToomas Soome break;
693d2670fc4SToomas Soome }
694d2670fc4SToomas Soome return (num_entries);
695843e1988Sjohnlev #else
696d2670fc4SToomas Soome return (MAXMAPS);
697843e1988Sjohnlev #endif
698d2670fc4SToomas Soome }
699d2670fc4SToomas Soome
700d2670fc4SToomas Soome static uint32_t
dboot_loader_mmap_get_type(int index)701d2670fc4SToomas Soome dboot_loader_mmap_get_type(int index)
702d2670fc4SToomas Soome {
703d2670fc4SToomas Soome #if !defined(__xpv)
704d2670fc4SToomas Soome mb_memory_map_t *mp, *mpend;
705d2670fc4SToomas Soome int i;
706d2670fc4SToomas Soome
707d2670fc4SToomas Soome switch (multiboot_version) {
708d2670fc4SToomas Soome case 1:
709d2670fc4SToomas Soome mp = (mb_memory_map_t *)mb_info->mmap_addr;
710d2670fc4SToomas Soome mpend = (mb_memory_map_t *)
711d2670fc4SToomas Soome (mb_info->mmap_addr + mb_info->mmap_length);
712d2670fc4SToomas Soome
713d2670fc4SToomas Soome for (i = 0; mp < mpend && i != index; i++)
714d2670fc4SToomas Soome mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
715d2670fc4SToomas Soome sizeof (mp->size));
716d2670fc4SToomas Soome if (mp >= mpend) {
717d2670fc4SToomas Soome dboot_panic("dboot_loader_mmap_get_type(): index "
718d2670fc4SToomas Soome "out of bounds: %d\n", index);
719d2670fc4SToomas Soome }
720d2670fc4SToomas Soome return (mp->type);
721d2670fc4SToomas Soome
722d2670fc4SToomas Soome case 2:
723d2670fc4SToomas Soome return (dboot_multiboot2_mmap_get_type(mb2_info,
724d2670fc4SToomas Soome mb2_mmap_tagp, index));
725d2670fc4SToomas Soome
726d2670fc4SToomas Soome default:
727d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
728d2670fc4SToomas Soome multiboot_version);
729d2670fc4SToomas Soome break;
730d2670fc4SToomas Soome }
731d2670fc4SToomas Soome return (0);
732d2670fc4SToomas Soome #else
733d2670fc4SToomas Soome return (map_buffer[index].type);
734d2670fc4SToomas Soome #endif
735d2670fc4SToomas Soome }
736d2670fc4SToomas Soome
737d2670fc4SToomas Soome static uint64_t
dboot_loader_mmap_get_base(int index)738d2670fc4SToomas Soome dboot_loader_mmap_get_base(int index)
739d2670fc4SToomas Soome {
740d2670fc4SToomas Soome #if !defined(__xpv)
741d2670fc4SToomas Soome mb_memory_map_t *mp, *mpend;
742d2670fc4SToomas Soome int i;
743d2670fc4SToomas Soome
744d2670fc4SToomas Soome switch (multiboot_version) {
745d2670fc4SToomas Soome case 1:
746d2670fc4SToomas Soome mp = (mb_memory_map_t *)mb_info->mmap_addr;
747d2670fc4SToomas Soome mpend = (mb_memory_map_t *)
748d2670fc4SToomas Soome (mb_info->mmap_addr + mb_info->mmap_length);
749d2670fc4SToomas Soome
750d2670fc4SToomas Soome for (i = 0; mp < mpend && i != index; i++)
751d2670fc4SToomas Soome mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
752d2670fc4SToomas Soome sizeof (mp->size));
753d2670fc4SToomas Soome if (mp >= mpend) {
754d2670fc4SToomas Soome dboot_panic("dboot_loader_mmap_get_base(): index "
755d2670fc4SToomas Soome "out of bounds: %d\n", index);
756d2670fc4SToomas Soome }
757d2670fc4SToomas Soome return (((uint64_t)mp->base_addr_high << 32) +
758d2670fc4SToomas Soome (uint64_t)mp->base_addr_low);
759d2670fc4SToomas Soome
760d2670fc4SToomas Soome case 2:
761d2670fc4SToomas Soome return (dboot_multiboot2_mmap_get_base(mb2_info,
762d2670fc4SToomas Soome mb2_mmap_tagp, index));
763d2670fc4SToomas Soome
764d2670fc4SToomas Soome default:
765d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
766d2670fc4SToomas Soome multiboot_version);
767d2670fc4SToomas Soome break;
768d2670fc4SToomas Soome }
769d2670fc4SToomas Soome return (0);
770d2670fc4SToomas Soome #else
771d2670fc4SToomas Soome return (((uint64_t)map_buffer[index].base_addr_high << 32) +
772d2670fc4SToomas Soome (uint64_t)map_buffer[index].base_addr_low);
773d2670fc4SToomas Soome #endif
774d2670fc4SToomas Soome }
775d2670fc4SToomas Soome
776d2670fc4SToomas Soome static uint64_t
dboot_loader_mmap_get_length(int index)777d2670fc4SToomas Soome dboot_loader_mmap_get_length(int index)
778d2670fc4SToomas Soome {
779d2670fc4SToomas Soome #if !defined(__xpv)
780d2670fc4SToomas Soome mb_memory_map_t *mp, *mpend;
781d2670fc4SToomas Soome int i;
782d2670fc4SToomas Soome
783d2670fc4SToomas Soome switch (multiboot_version) {
784d2670fc4SToomas Soome case 1:
785d2670fc4SToomas Soome mp = (mb_memory_map_t *)mb_info->mmap_addr;
786d2670fc4SToomas Soome mpend = (mb_memory_map_t *)
787d2670fc4SToomas Soome (mb_info->mmap_addr + mb_info->mmap_length);
788d2670fc4SToomas Soome
789d2670fc4SToomas Soome for (i = 0; mp < mpend && i != index; i++)
790d2670fc4SToomas Soome mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
791d2670fc4SToomas Soome sizeof (mp->size));
792d2670fc4SToomas Soome if (mp >= mpend) {
793d2670fc4SToomas Soome dboot_panic("dboot_loader_mmap_get_length(): index "
794d2670fc4SToomas Soome "out of bounds: %d\n", index);
795d2670fc4SToomas Soome }
796d2670fc4SToomas Soome return (((uint64_t)mp->length_high << 32) +
797d2670fc4SToomas Soome (uint64_t)mp->length_low);
798d2670fc4SToomas Soome
799d2670fc4SToomas Soome case 2:
800d2670fc4SToomas Soome return (dboot_multiboot2_mmap_get_length(mb2_info,
801d2670fc4SToomas Soome mb2_mmap_tagp, index));
802d2670fc4SToomas Soome
803d2670fc4SToomas Soome default:
804d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
805d2670fc4SToomas Soome multiboot_version);
806d2670fc4SToomas Soome break;
807d2670fc4SToomas Soome }
808d2670fc4SToomas Soome return (0);
809d2670fc4SToomas Soome #else
810d2670fc4SToomas Soome return (((uint64_t)map_buffer[index].length_high << 32) +
811d2670fc4SToomas Soome (uint64_t)map_buffer[index].length_low);
812d2670fc4SToomas Soome #endif
813d2670fc4SToomas Soome }
814843e1988Sjohnlev
815843e1988Sjohnlev static void
build_pcimemlists(void)816d2670fc4SToomas Soome build_pcimemlists(void)
817843e1988Sjohnlev {
818843e1988Sjohnlev uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */
819843e1988Sjohnlev uint64_t start;
820843e1988Sjohnlev uint64_t end;
821d2670fc4SToomas Soome int i, num;
822843e1988Sjohnlev
823843e1988Sjohnlev /*
824843e1988Sjohnlev * initialize
825843e1988Sjohnlev */
826843e1988Sjohnlev pcimemlists[0].addr = pci_lo_limit;
827843e1988Sjohnlev pcimemlists[0].size = pci_hi_limit - pci_lo_limit;
828843e1988Sjohnlev pcimemlists_used = 1;
829843e1988Sjohnlev
830d2670fc4SToomas Soome num = dboot_loader_mmap_entries();
831843e1988Sjohnlev /*
832843e1988Sjohnlev * Fill in PCI memlists.
833843e1988Sjohnlev */
834d2670fc4SToomas Soome for (i = 0; i < num; ++i) {
835d2670fc4SToomas Soome start = dboot_loader_mmap_get_base(i);
836d2670fc4SToomas Soome end = start + dboot_loader_mmap_get_length(i);
837843e1988Sjohnlev
838843e1988Sjohnlev if (prom_debug)
839843e1988Sjohnlev dboot_printf("\ttype: %d %" PRIx64 "..%"
840d2670fc4SToomas Soome PRIx64 "\n", dboot_loader_mmap_get_type(i),
841d2670fc4SToomas Soome start, end);
842843e1988Sjohnlev
843843e1988Sjohnlev /*
844843e1988Sjohnlev * page align start and end
845843e1988Sjohnlev */
846843e1988Sjohnlev start = (start + page_offset) & ~page_offset;
847843e1988Sjohnlev end &= ~page_offset;
848843e1988Sjohnlev if (end <= start)
849843e1988Sjohnlev continue;
850843e1988Sjohnlev
851843e1988Sjohnlev exclude_from_pci(start, end);
852843e1988Sjohnlev }
853843e1988Sjohnlev
854843e1988Sjohnlev /*
855843e1988Sjohnlev * Finish off the pcimemlist
856843e1988Sjohnlev */
857843e1988Sjohnlev if (prom_debug) {
858843e1988Sjohnlev for (i = 0; i < pcimemlists_used; ++i) {
859843e1988Sjohnlev dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%"
860843e1988Sjohnlev PRIx64 "\n", pcimemlists[i].addr,
861843e1988Sjohnlev pcimemlists[i].addr + pcimemlists[i].size);
862843e1988Sjohnlev }
863843e1988Sjohnlev }
864843e1988Sjohnlev pcimemlists[0].next = 0;
865843e1988Sjohnlev pcimemlists[0].prev = 0;
866843e1988Sjohnlev for (i = 1; i < pcimemlists_used; ++i) {
867843e1988Sjohnlev pcimemlists[i].prev =
868843e1988Sjohnlev (native_ptr_t)(uintptr_t)(pcimemlists + i - 1);
869843e1988Sjohnlev pcimemlists[i].next = 0;
870843e1988Sjohnlev pcimemlists[i - 1].next =
871843e1988Sjohnlev (native_ptr_t)(uintptr_t)(pcimemlists + i);
872843e1988Sjohnlev }
873c909a41bSRichard Lowe bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
874843e1988Sjohnlev DBG(bi->bi_pcimem);
875843e1988Sjohnlev }
876843e1988Sjohnlev
877843e1988Sjohnlev #if defined(__xpv)
878843e1988Sjohnlev /*
879843e1988Sjohnlev * Initialize memory allocator stuff from hypervisor-supplied start info.
880843e1988Sjohnlev */
881843e1988Sjohnlev static void
init_mem_alloc(void)882843e1988Sjohnlev init_mem_alloc(void)
883843e1988Sjohnlev {
884843e1988Sjohnlev int local; /* variables needed to find start region */
885843e1988Sjohnlev paddr_t scratch_start;
886843e1988Sjohnlev xen_memory_map_t map;
887843e1988Sjohnlev
888843e1988Sjohnlev DBG_MSG("Entered init_mem_alloc()\n");
889843e1988Sjohnlev
890843e1988Sjohnlev /*
891843e1988Sjohnlev * Free memory follows the stack. There's at least 512KB of scratch
892843e1988Sjohnlev * space, rounded up to at least 2Mb alignment. That should be enough
893843e1988Sjohnlev * for the page tables we'll need to build. The nucleus memory is
894843e1988Sjohnlev * allocated last and will be outside the addressible range. We'll
895843e1988Sjohnlev * switch to new page tables before we unpack the kernel
896843e1988Sjohnlev */
897843e1988Sjohnlev scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE);
898843e1988Sjohnlev DBG(scratch_start);
899843e1988Sjohnlev scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG);
900843e1988Sjohnlev DBG(scratch_end);
901843e1988Sjohnlev
902843e1988Sjohnlev /*
903843e1988Sjohnlev * For paranoia, leave some space between hypervisor data and ours.
904843e1988Sjohnlev * Use 500 instead of 512.
905843e1988Sjohnlev */
906843e1988Sjohnlev next_avail_addr = scratch_end - 500 * 1024;
907843e1988Sjohnlev DBG(next_avail_addr);
908843e1988Sjohnlev
909843e1988Sjohnlev /*
910843e1988Sjohnlev * The domain builder gives us at most 1 module
911843e1988Sjohnlev */
912843e1988Sjohnlev DBG(xen_info->mod_len);
913843e1988Sjohnlev if (xen_info->mod_len > 0) {
914843e1988Sjohnlev DBG(xen_info->mod_start);
915843e1988Sjohnlev modules[0].bm_addr = xen_info->mod_start;
916843e1988Sjohnlev modules[0].bm_size = xen_info->mod_len;
917843e1988Sjohnlev bi->bi_module_cnt = 1;
918843e1988Sjohnlev bi->bi_modules = (native_ptr_t)modules;
919843e1988Sjohnlev } else {
920843e1988Sjohnlev bi->bi_module_cnt = 0;
921843e1988Sjohnlev bi->bi_modules = NULL;
922843e1988Sjohnlev }
923843e1988Sjohnlev DBG(bi->bi_module_cnt);
924843e1988Sjohnlev DBG(bi->bi_modules);
925843e1988Sjohnlev
926843e1988Sjohnlev DBG(xen_info->mfn_list);
927843e1988Sjohnlev DBG(xen_info->nr_pages);
928843e1988Sjohnlev max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT;
929843e1988Sjohnlev DBG(max_mem);
930843e1988Sjohnlev
931843e1988Sjohnlev /*
932843e1988Sjohnlev * Using pseudo-physical addresses, so only 1 memlist element
933843e1988Sjohnlev */
934843e1988Sjohnlev memlists[0].addr = 0;
935843e1988Sjohnlev DBG(memlists[0].addr);
936843e1988Sjohnlev memlists[0].size = max_mem;
937843e1988Sjohnlev DBG(memlists[0].size);
938843e1988Sjohnlev memlists_used = 1;
939843e1988Sjohnlev DBG(memlists_used);
940843e1988Sjohnlev
941843e1988Sjohnlev /*
942843e1988Sjohnlev * finish building physinstall list
943843e1988Sjohnlev */
944843e1988Sjohnlev sort_physinstall();
945843e1988Sjohnlev
9461de082f7SVikram Hegde /*
9471de082f7SVikram Hegde * build bios reserved memlists
9481de082f7SVikram Hegde */
9491de082f7SVikram Hegde build_rsvdmemlists();
9501de082f7SVikram Hegde
951843e1988Sjohnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) {
952843e1988Sjohnlev /*
953843e1988Sjohnlev * build PCI Memory list
954843e1988Sjohnlev */
955843e1988Sjohnlev map.nr_entries = MAXMAPS;
956843e1988Sjohnlev /*LINTED: constant in conditional context*/
957843e1988Sjohnlev set_xen_guest_handle(map.buffer, map_buffer);
958843e1988Sjohnlev if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0)
959843e1988Sjohnlev dboot_panic("getting XENMEM_machine_memory_map failed");
960d2670fc4SToomas Soome build_pcimemlists();
961843e1988Sjohnlev }
962843e1988Sjohnlev }
963843e1988Sjohnlev
964843e1988Sjohnlev #else /* !__xpv */
965843e1988Sjohnlev
966d2670fc4SToomas Soome static void
dboot_multiboot1_xboot_consinfo(void)967d2670fc4SToomas Soome dboot_multiboot1_xboot_consinfo(void)
968d2670fc4SToomas Soome {
969d2670fc4SToomas Soome }
970d2670fc4SToomas Soome
971d2670fc4SToomas Soome static void
dboot_multiboot2_xboot_consinfo(void)972d2670fc4SToomas Soome dboot_multiboot2_xboot_consinfo(void)
973d2670fc4SToomas Soome {
974d2670fc4SToomas Soome }
975d2670fc4SToomas Soome
976d2670fc4SToomas Soome static int
dboot_multiboot_modcount(void)977d2670fc4SToomas Soome dboot_multiboot_modcount(void)
978d2670fc4SToomas Soome {
979d2670fc4SToomas Soome switch (multiboot_version) {
980d2670fc4SToomas Soome case 1:
981d2670fc4SToomas Soome return (mb_info->mods_count);
982d2670fc4SToomas Soome
983d2670fc4SToomas Soome case 2:
984d2670fc4SToomas Soome return (dboot_multiboot2_modcount(mb2_info));
985d2670fc4SToomas Soome
986d2670fc4SToomas Soome default:
987d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
988d2670fc4SToomas Soome multiboot_version);
989d2670fc4SToomas Soome break;
990d2670fc4SToomas Soome }
991d2670fc4SToomas Soome return (0);
992d2670fc4SToomas Soome }
993d2670fc4SToomas Soome
994d2670fc4SToomas Soome static uint32_t
dboot_multiboot_modstart(int index)995d2670fc4SToomas Soome dboot_multiboot_modstart(int index)
996d2670fc4SToomas Soome {
997d2670fc4SToomas Soome switch (multiboot_version) {
998d2670fc4SToomas Soome case 1:
999d2670fc4SToomas Soome return (((mb_module_t *)mb_info->mods_addr)[index].mod_start);
1000d2670fc4SToomas Soome
1001d2670fc4SToomas Soome case 2:
1002d2670fc4SToomas Soome return (dboot_multiboot2_modstart(mb2_info, index));
1003d2670fc4SToomas Soome
1004d2670fc4SToomas Soome default:
1005d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
1006d2670fc4SToomas Soome multiboot_version);
1007d2670fc4SToomas Soome break;
1008d2670fc4SToomas Soome }
1009d2670fc4SToomas Soome return (0);
1010d2670fc4SToomas Soome }
1011d2670fc4SToomas Soome
1012d2670fc4SToomas Soome static uint32_t
dboot_multiboot_modend(int index)1013d2670fc4SToomas Soome dboot_multiboot_modend(int index)
1014d2670fc4SToomas Soome {
1015d2670fc4SToomas Soome switch (multiboot_version) {
1016d2670fc4SToomas Soome case 1:
1017d2670fc4SToomas Soome return (((mb_module_t *)mb_info->mods_addr)[index].mod_end);
1018d2670fc4SToomas Soome
1019d2670fc4SToomas Soome case 2:
1020d2670fc4SToomas Soome return (dboot_multiboot2_modend(mb2_info, index));
1021d2670fc4SToomas Soome
1022d2670fc4SToomas Soome default:
1023d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
1024d2670fc4SToomas Soome multiboot_version);
1025d2670fc4SToomas Soome break;
1026d2670fc4SToomas Soome }
1027d2670fc4SToomas Soome return (0);
1028d2670fc4SToomas Soome }
1029d2670fc4SToomas Soome
1030d2670fc4SToomas Soome static char *
dboot_multiboot_modcmdline(int index)1031d2670fc4SToomas Soome dboot_multiboot_modcmdline(int index)
1032d2670fc4SToomas Soome {
1033d2670fc4SToomas Soome switch (multiboot_version) {
1034d2670fc4SToomas Soome case 1:
1035d2670fc4SToomas Soome return ((char *)((mb_module_t *)
1036d2670fc4SToomas Soome mb_info->mods_addr)[index].mod_name);
1037d2670fc4SToomas Soome
1038d2670fc4SToomas Soome case 2:
1039d2670fc4SToomas Soome return (dboot_multiboot2_modcmdline(mb2_info, index));
1040d2670fc4SToomas Soome
1041d2670fc4SToomas Soome default:
1042d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
1043d2670fc4SToomas Soome multiboot_version);
1044d2670fc4SToomas Soome break;
1045d2670fc4SToomas Soome }
1046d2670fc4SToomas Soome return (0);
1047d2670fc4SToomas Soome }
1048d2670fc4SToomas Soome
1049*b9a86732SToomas Soome /*
1050*b9a86732SToomas Soome * Find the environment module for console setup.
1051*b9a86732SToomas Soome * Since we need the console to print early boot messages, the console is set up
1052*b9a86732SToomas Soome * before anything else and therefore we need to pick up the environment module
1053*b9a86732SToomas Soome * early too.
1054*b9a86732SToomas Soome *
1055*b9a86732SToomas Soome * Note, we just will search for and if found, will pass the env
1056*b9a86732SToomas Soome * module to console setup, the proper module list processing will happen later.
1057*b9a86732SToomas Soome */
1058*b9a86732SToomas Soome static void
dboot_find_env(void)1059*b9a86732SToomas Soome dboot_find_env(void)
1060*b9a86732SToomas Soome {
1061*b9a86732SToomas Soome int i, modcount;
1062*b9a86732SToomas Soome uint32_t mod_start, mod_end;
1063*b9a86732SToomas Soome char *cmdline;
1064*b9a86732SToomas Soome
1065*b9a86732SToomas Soome modcount = dboot_multiboot_modcount();
1066*b9a86732SToomas Soome
1067*b9a86732SToomas Soome for (i = 0; i < modcount; ++i) {
1068*b9a86732SToomas Soome cmdline = dboot_multiboot_modcmdline(i);
1069*b9a86732SToomas Soome if (cmdline == NULL)
1070*b9a86732SToomas Soome continue;
1071*b9a86732SToomas Soome
1072*b9a86732SToomas Soome if (strstr(cmdline, "type=environment") == NULL)
1073*b9a86732SToomas Soome continue;
1074*b9a86732SToomas Soome
1075*b9a86732SToomas Soome mod_start = dboot_multiboot_modstart(i);
1076*b9a86732SToomas Soome mod_end = dboot_multiboot_modend(i);
1077*b9a86732SToomas Soome modules[0].bm_addr = mod_start;
1078*b9a86732SToomas Soome modules[0].bm_size = mod_end - mod_start;
1079*b9a86732SToomas Soome modules[0].bm_name = NULL;
1080*b9a86732SToomas Soome modules[0].bm_hash = NULL;
1081*b9a86732SToomas Soome modules[0].bm_type = BMT_ENV;
1082*b9a86732SToomas Soome bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1083*b9a86732SToomas Soome bi->bi_module_cnt = 1;
1084*b9a86732SToomas Soome return;
1085*b9a86732SToomas Soome }
1086*b9a86732SToomas Soome }
1087*b9a86732SToomas Soome
1088d2670fc4SToomas Soome static boolean_t
dboot_multiboot_basicmeminfo(uint32_t * lower,uint32_t * upper)1089d2670fc4SToomas Soome dboot_multiboot_basicmeminfo(uint32_t *lower, uint32_t *upper)
1090d2670fc4SToomas Soome {
1091d2670fc4SToomas Soome boolean_t rv = B_FALSE;
1092d2670fc4SToomas Soome
1093d2670fc4SToomas Soome switch (multiboot_version) {
1094d2670fc4SToomas Soome case 1:
1095d2670fc4SToomas Soome if (mb_info->flags & 0x01) {
1096d2670fc4SToomas Soome *lower = mb_info->mem_lower;
1097d2670fc4SToomas Soome *upper = mb_info->mem_upper;
1098d2670fc4SToomas Soome rv = B_TRUE;
1099d2670fc4SToomas Soome }
1100d2670fc4SToomas Soome break;
1101d2670fc4SToomas Soome
1102d2670fc4SToomas Soome case 2:
1103d2670fc4SToomas Soome return (dboot_multiboot2_basicmeminfo(mb2_info, lower, upper));
1104d2670fc4SToomas Soome
1105d2670fc4SToomas Soome default:
1106d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
1107d2670fc4SToomas Soome multiboot_version);
1108d2670fc4SToomas Soome break;
1109d2670fc4SToomas Soome }
1110d2670fc4SToomas Soome return (rv);
1111d2670fc4SToomas Soome }
1112d2670fc4SToomas Soome
1113e65d07eeSKeith Wesolowski static uint8_t
dboot_a2h(char v)1114e65d07eeSKeith Wesolowski dboot_a2h(char v)
1115e65d07eeSKeith Wesolowski {
1116e65d07eeSKeith Wesolowski if (v >= 'a')
1117e65d07eeSKeith Wesolowski return (v - 'a' + 0xa);
1118e65d07eeSKeith Wesolowski else if (v >= 'A')
1119e65d07eeSKeith Wesolowski return (v - 'A' + 0xa);
1120e65d07eeSKeith Wesolowski else if (v >= '0')
1121e65d07eeSKeith Wesolowski return (v - '0');
1122e65d07eeSKeith Wesolowski else
1123e65d07eeSKeith Wesolowski dboot_panic("bad ASCII hex character %c\n", v);
1124e65d07eeSKeith Wesolowski
1125e65d07eeSKeith Wesolowski return (0);
1126e65d07eeSKeith Wesolowski }
1127e65d07eeSKeith Wesolowski
1128e65d07eeSKeith Wesolowski static void
digest_a2h(const char * ascii,uint8_t * digest)1129e65d07eeSKeith Wesolowski digest_a2h(const char *ascii, uint8_t *digest)
1130e65d07eeSKeith Wesolowski {
1131e65d07eeSKeith Wesolowski unsigned int i;
1132e65d07eeSKeith Wesolowski
1133e65d07eeSKeith Wesolowski for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1134e65d07eeSKeith Wesolowski digest[i] = dboot_a2h(ascii[i * 2]) << 4;
1135e65d07eeSKeith Wesolowski digest[i] |= dboot_a2h(ascii[i * 2 + 1]);
1136e65d07eeSKeith Wesolowski }
1137e65d07eeSKeith Wesolowski }
1138e65d07eeSKeith Wesolowski
1139e65d07eeSKeith Wesolowski /*
1140e65d07eeSKeith Wesolowski * Generate a SHA-1 hash of the first len bytes of image, and compare it with
1141e65d07eeSKeith Wesolowski * the ASCII-format hash found in the 40-byte buffer at ascii. If they
1142e65d07eeSKeith Wesolowski * match, return 0, otherwise -1. This works only for images smaller than
1143e65d07eeSKeith Wesolowski * 4 GB, which should not be a problem.
1144e65d07eeSKeith Wesolowski */
1145e65d07eeSKeith Wesolowski static int
check_image_hash(uint_t midx)11460181461bSKeith M Wesolowski check_image_hash(uint_t midx)
1147e65d07eeSKeith Wesolowski {
11480181461bSKeith M Wesolowski const char *ascii;
11490181461bSKeith M Wesolowski const void *image;
11500181461bSKeith M Wesolowski size_t len;
1151e65d07eeSKeith Wesolowski SHA1_CTX ctx;
1152e65d07eeSKeith Wesolowski uint8_t digest[SHA1_DIGEST_LENGTH];
1153e65d07eeSKeith Wesolowski uint8_t baseline[SHA1_DIGEST_LENGTH];
1154e65d07eeSKeith Wesolowski unsigned int i;
1155e65d07eeSKeith Wesolowski
11560181461bSKeith M Wesolowski ascii = (const char *)(uintptr_t)modules[midx].bm_hash;
11570181461bSKeith M Wesolowski image = (const void *)(uintptr_t)modules[midx].bm_addr;
11580181461bSKeith M Wesolowski len = (size_t)modules[midx].bm_size;
11590181461bSKeith M Wesolowski
1160e65d07eeSKeith Wesolowski digest_a2h(ascii, baseline);
1161e65d07eeSKeith Wesolowski
1162e65d07eeSKeith Wesolowski SHA1Init(&ctx);
1163e65d07eeSKeith Wesolowski SHA1Update(&ctx, image, len);
1164e65d07eeSKeith Wesolowski SHA1Final(digest, &ctx);
1165e65d07eeSKeith Wesolowski
1166e65d07eeSKeith Wesolowski for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1167e65d07eeSKeith Wesolowski if (digest[i] != baseline[i])
1168e65d07eeSKeith Wesolowski return (-1);
1169e65d07eeSKeith Wesolowski }
1170e65d07eeSKeith Wesolowski
1171e65d07eeSKeith Wesolowski return (0);
1172e65d07eeSKeith Wesolowski }
1173e65d07eeSKeith Wesolowski
11740181461bSKeith M Wesolowski static const char *
type_to_str(boot_module_type_t type)11750181461bSKeith M Wesolowski type_to_str(boot_module_type_t type)
11760181461bSKeith M Wesolowski {
11770181461bSKeith M Wesolowski switch (type) {
11780181461bSKeith M Wesolowski case BMT_ROOTFS:
11790181461bSKeith M Wesolowski return ("rootfs");
11800181461bSKeith M Wesolowski case BMT_FILE:
11810181461bSKeith M Wesolowski return ("file");
11820181461bSKeith M Wesolowski case BMT_HASH:
11830181461bSKeith M Wesolowski return ("hash");
1184*b9a86732SToomas Soome case BMT_ENV:
1185*b9a86732SToomas Soome return ("environment");
11860181461bSKeith M Wesolowski default:
11870181461bSKeith M Wesolowski return ("unknown");
11880181461bSKeith M Wesolowski }
11890181461bSKeith M Wesolowski }
11900181461bSKeith M Wesolowski
1191e65d07eeSKeith Wesolowski static void
check_images(void)1192e65d07eeSKeith Wesolowski check_images(void)
1193e65d07eeSKeith Wesolowski {
11940181461bSKeith M Wesolowski uint_t i;
1195e65d07eeSKeith Wesolowski char displayhash[SHA1_ASCII_LENGTH + 1];
11960181461bSKeith M Wesolowski
11970181461bSKeith M Wesolowski for (i = 0; i < modules_used; i++) {
11980181461bSKeith M Wesolowski if (prom_debug) {
11990181461bSKeith M Wesolowski dboot_printf("module #%d: name %s type %s "
12000181461bSKeith M Wesolowski "addr %lx size %lx\n",
12010181461bSKeith M Wesolowski i, (char *)(uintptr_t)modules[i].bm_name,
12020181461bSKeith M Wesolowski type_to_str(modules[i].bm_type),
12030181461bSKeith M Wesolowski (ulong_t)modules[i].bm_addr,
12040181461bSKeith M Wesolowski (ulong_t)modules[i].bm_size);
12050181461bSKeith M Wesolowski }
12060181461bSKeith M Wesolowski
12070181461bSKeith M Wesolowski if (modules[i].bm_type == BMT_HASH ||
12080181461bSKeith M Wesolowski modules[i].bm_hash == NULL) {
12090181461bSKeith M Wesolowski DBG_MSG("module has no hash; skipping check\n");
12100181461bSKeith M Wesolowski continue;
12110181461bSKeith M Wesolowski }
12120181461bSKeith M Wesolowski (void) memcpy(displayhash,
12130181461bSKeith M Wesolowski (void *)(uintptr_t)modules[i].bm_hash,
12140181461bSKeith M Wesolowski SHA1_ASCII_LENGTH);
12150181461bSKeith M Wesolowski displayhash[SHA1_ASCII_LENGTH] = '\0';
12160181461bSKeith M Wesolowski if (prom_debug) {
12170181461bSKeith M Wesolowski dboot_printf("checking expected hash [%s]: ",
12180181461bSKeith M Wesolowski displayhash);
12190181461bSKeith M Wesolowski }
12200181461bSKeith M Wesolowski
12210181461bSKeith M Wesolowski if (check_image_hash(i) != 0)
12220181461bSKeith M Wesolowski dboot_panic("hash mismatch!\n");
12230181461bSKeith M Wesolowski else
12240181461bSKeith M Wesolowski DBG_MSG("OK\n");
12250181461bSKeith M Wesolowski }
12260181461bSKeith M Wesolowski }
12270181461bSKeith M Wesolowski
12280181461bSKeith M Wesolowski /*
12290181461bSKeith M Wesolowski * Determine the module's starting address, size, name, and type, and fill the
12300181461bSKeith M Wesolowski * boot_modules structure. This structure is used by the bop code, except for
12310181461bSKeith M Wesolowski * hashes which are checked prior to transferring control to the kernel.
12320181461bSKeith M Wesolowski */
12330181461bSKeith M Wesolowski static void
process_module(int midx)1234d2670fc4SToomas Soome process_module(int midx)
12350181461bSKeith M Wesolowski {
1236d2670fc4SToomas Soome uint32_t mod_start = dboot_multiboot_modstart(midx);
1237d2670fc4SToomas Soome uint32_t mod_end = dboot_multiboot_modend(midx);
1238d2670fc4SToomas Soome char *cmdline = dboot_multiboot_modcmdline(midx);
12390181461bSKeith M Wesolowski char *p, *q;
12400181461bSKeith M Wesolowski
1241d2670fc4SToomas Soome check_higher(mod_end);
12420181461bSKeith M Wesolowski if (prom_debug) {
12430181461bSKeith M Wesolowski dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n",
1244d2670fc4SToomas Soome midx, cmdline, (ulong_t)mod_start, (ulong_t)mod_end);
12450181461bSKeith M Wesolowski }
12460181461bSKeith M Wesolowski
1247d2670fc4SToomas Soome if (mod_start > mod_end) {
12480181461bSKeith M Wesolowski dboot_panic("module #%d: module start address 0x%lx greater "
12490181461bSKeith M Wesolowski "than end address 0x%lx", midx,
1250d2670fc4SToomas Soome (ulong_t)mod_start, (ulong_t)mod_end);
12510181461bSKeith M Wesolowski }
1252e65d07eeSKeith Wesolowski
1253e65d07eeSKeith Wesolowski /*
1254e65d07eeSKeith Wesolowski * A brief note on lengths and sizes: GRUB, for reasons unknown, passes
1255e65d07eeSKeith Wesolowski * the address of the last valid byte in a module plus 1 as mod_end.
1256e65d07eeSKeith Wesolowski * This is of course a bug; the multiboot specification simply states
1257e65d07eeSKeith Wesolowski * that mod_start and mod_end "contain the start and end addresses of
1258e65d07eeSKeith Wesolowski * the boot module itself" which is pretty obviously not what GRUB is
1259e65d07eeSKeith Wesolowski * doing. However, fixing it requires that not only this code be
1260e65d07eeSKeith Wesolowski * changed but also that other code consuming this value and values
1261e65d07eeSKeith Wesolowski * derived from it be fixed, and that the kernel and GRUB must either
1262e65d07eeSKeith Wesolowski * both have the bug or neither. While there are a lot of combinations
1263e65d07eeSKeith Wesolowski * that will work, there are also some that won't, so for simplicity
1264e65d07eeSKeith Wesolowski * we'll just cope with the bug. That means we won't actually hash the
1265e65d07eeSKeith Wesolowski * byte at mod_end, and we will expect that mod_end for the hash file
1266e65d07eeSKeith Wesolowski * itself is one greater than some multiple of 41 (40 bytes of ASCII
12670181461bSKeith M Wesolowski * hash plus a newline for each module). We set bm_size to the true
12680181461bSKeith M Wesolowski * correct number of bytes in each module, achieving exactly this.
1269e65d07eeSKeith Wesolowski */
1270e65d07eeSKeith Wesolowski
1271d2670fc4SToomas Soome modules[midx].bm_addr = mod_start;
1272d2670fc4SToomas Soome modules[midx].bm_size = mod_end - mod_start;
1273d2670fc4SToomas Soome modules[midx].bm_name = (native_ptr_t)(uintptr_t)cmdline;
12740181461bSKeith M Wesolowski modules[midx].bm_hash = NULL;
12750181461bSKeith M Wesolowski modules[midx].bm_type = BMT_FILE;
12760181461bSKeith M Wesolowski
1277d2670fc4SToomas Soome if (cmdline == NULL) {
12780181461bSKeith M Wesolowski modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname;
1279e65d07eeSKeith Wesolowski return;
1280e65d07eeSKeith Wesolowski }
1281e65d07eeSKeith Wesolowski
1282d2670fc4SToomas Soome p = cmdline;
12830181461bSKeith M Wesolowski modules[midx].bm_name =
12840181461bSKeith M Wesolowski (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r");
12850181461bSKeith M Wesolowski
12860181461bSKeith M Wesolowski while (p != NULL) {
12870181461bSKeith M Wesolowski q = strsep(&p, " \t\f\n\r");
12880181461bSKeith M Wesolowski if (strncmp(q, "name=", 5) == 0) {
12890181461bSKeith M Wesolowski if (q[5] != '\0' && !isspace(q[5])) {
12900181461bSKeith M Wesolowski modules[midx].bm_name =
12910181461bSKeith M Wesolowski (native_ptr_t)(uintptr_t)(q + 5);
12920181461bSKeith M Wesolowski }
12930181461bSKeith M Wesolowski continue;
12940181461bSKeith M Wesolowski }
12950181461bSKeith M Wesolowski
12960181461bSKeith M Wesolowski if (strncmp(q, "type=", 5) == 0) {
12970181461bSKeith M Wesolowski if (q[5] == '\0' || isspace(q[5]))
12980181461bSKeith M Wesolowski continue;
12990181461bSKeith M Wesolowski q += 5;
13000181461bSKeith M Wesolowski if (strcmp(q, "rootfs") == 0) {
13010181461bSKeith M Wesolowski modules[midx].bm_type = BMT_ROOTFS;
13020181461bSKeith M Wesolowski } else if (strcmp(q, "hash") == 0) {
13030181461bSKeith M Wesolowski modules[midx].bm_type = BMT_HASH;
1304*b9a86732SToomas Soome } else if (strcmp(q, "environment") == 0) {
1305*b9a86732SToomas Soome modules[midx].bm_type = BMT_ENV;
13060181461bSKeith M Wesolowski } else if (strcmp(q, "file") != 0) {
13070181461bSKeith M Wesolowski dboot_printf("\tmodule #%d: unknown module "
13080181461bSKeith M Wesolowski "type '%s'; defaulting to 'file'",
13090181461bSKeith M Wesolowski midx, q);
13100181461bSKeith M Wesolowski }
13110181461bSKeith M Wesolowski continue;
13120181461bSKeith M Wesolowski }
13130181461bSKeith M Wesolowski
13140181461bSKeith M Wesolowski if (strncmp(q, "hash=", 5) == 0) {
13150181461bSKeith M Wesolowski if (q[5] != '\0' && !isspace(q[5])) {
13160181461bSKeith M Wesolowski modules[midx].bm_hash =
13170181461bSKeith M Wesolowski (native_ptr_t)(uintptr_t)(q + 5);
13180181461bSKeith M Wesolowski }
13190181461bSKeith M Wesolowski continue;
13200181461bSKeith M Wesolowski }
13210181461bSKeith M Wesolowski
13220181461bSKeith M Wesolowski dboot_printf("ignoring unknown option '%s'\n", q);
13230181461bSKeith M Wesolowski }
13240181461bSKeith M Wesolowski }
13250181461bSKeith M Wesolowski
13260181461bSKeith M Wesolowski /*
13270181461bSKeith M Wesolowski * Backward compatibility: if there are exactly one or two modules, both
13280181461bSKeith M Wesolowski * of type 'file' and neither with an embedded hash value, we have been
13290181461bSKeith M Wesolowski * given the legacy style modules. In this case we need to treat the first
13300181461bSKeith M Wesolowski * module as a rootfs and the second as a hash referencing that module.
13310181461bSKeith M Wesolowski * Otherwise, even if the configuration is invalid, we assume that the
13320181461bSKeith M Wesolowski * operator knows what he's doing or at least isn't being bitten by this
13330181461bSKeith M Wesolowski * interface change.
13340181461bSKeith M Wesolowski */
13350181461bSKeith M Wesolowski static void
fixup_modules(void)13360181461bSKeith M Wesolowski fixup_modules(void)
13370181461bSKeith M Wesolowski {
13380181461bSKeith M Wesolowski if (modules_used == 0 || modules_used > 2)
13390181461bSKeith M Wesolowski return;
13400181461bSKeith M Wesolowski
13410181461bSKeith M Wesolowski if (modules[0].bm_type != BMT_FILE ||
13420181461bSKeith M Wesolowski modules_used > 1 && modules[1].bm_type != BMT_FILE) {
13430181461bSKeith M Wesolowski return;
13440181461bSKeith M Wesolowski }
13450181461bSKeith M Wesolowski
13460181461bSKeith M Wesolowski if (modules[0].bm_hash != NULL ||
13470181461bSKeith M Wesolowski modules_used > 1 && modules[1].bm_hash != NULL) {
13480181461bSKeith M Wesolowski return;
13490181461bSKeith M Wesolowski }
13500181461bSKeith M Wesolowski
13510181461bSKeith M Wesolowski modules[0].bm_type = BMT_ROOTFS;
13520181461bSKeith M Wesolowski if (modules_used > 1) {
13530181461bSKeith M Wesolowski modules[1].bm_type = BMT_HASH;
13540181461bSKeith M Wesolowski modules[1].bm_name = modules[0].bm_name;
13550181461bSKeith M Wesolowski }
13560181461bSKeith M Wesolowski }
13570181461bSKeith M Wesolowski
13580181461bSKeith M Wesolowski /*
13590181461bSKeith M Wesolowski * For modules that do not have assigned hashes but have a separate hash module,
13600181461bSKeith M Wesolowski * find the assigned hash module and set the primary module's bm_hash to point
13610181461bSKeith M Wesolowski * to the hash data from that module. We will then ignore modules of type
13620181461bSKeith M Wesolowski * BMT_HASH from this point forward.
13630181461bSKeith M Wesolowski */
13640181461bSKeith M Wesolowski static void
assign_module_hashes(void)13650181461bSKeith M Wesolowski assign_module_hashes(void)
13660181461bSKeith M Wesolowski {
13670181461bSKeith M Wesolowski uint_t i, j;
13680181461bSKeith M Wesolowski
13690181461bSKeith M Wesolowski for (i = 0; i < modules_used; i++) {
13700181461bSKeith M Wesolowski if (modules[i].bm_type == BMT_HASH ||
13710181461bSKeith M Wesolowski modules[i].bm_hash != NULL) {
13720181461bSKeith M Wesolowski continue;
13730181461bSKeith M Wesolowski }
13740181461bSKeith M Wesolowski
13750181461bSKeith M Wesolowski for (j = 0; j < modules_used; j++) {
13760181461bSKeith M Wesolowski if (modules[j].bm_type != BMT_HASH ||
13770181461bSKeith M Wesolowski strcmp((char *)(uintptr_t)modules[j].bm_name,
13780181461bSKeith M Wesolowski (char *)(uintptr_t)modules[i].bm_name) != 0) {
13790181461bSKeith M Wesolowski continue;
13800181461bSKeith M Wesolowski }
13810181461bSKeith M Wesolowski
13820181461bSKeith M Wesolowski if (modules[j].bm_size < SHA1_ASCII_LENGTH) {
13830181461bSKeith M Wesolowski dboot_printf("Short hash module of length "
13840181461bSKeith M Wesolowski "0x%lx bytes; ignoring\n",
13850181461bSKeith M Wesolowski (ulong_t)modules[j].bm_size);
13860181461bSKeith M Wesolowski } else {
13870181461bSKeith M Wesolowski modules[i].bm_hash = modules[j].bm_addr;
13880181461bSKeith M Wesolowski }
1389e65d07eeSKeith Wesolowski break;
1390e65d07eeSKeith Wesolowski }
1391e65d07eeSKeith Wesolowski }
1392e65d07eeSKeith Wesolowski }
1393e65d07eeSKeith Wesolowski
1394843e1988Sjohnlev /*
1395ae115bc7Smrj * Walk through the module information finding the last used address.
1396ae115bc7Smrj * The first available address will become the top level page table.
1397ae115bc7Smrj */
1398ae115bc7Smrj static void
dboot_process_modules(void)1399d2670fc4SToomas Soome dboot_process_modules(void)
1400ae115bc7Smrj {
1401d2670fc4SToomas Soome int i, modcount;
1402ae115bc7Smrj extern char _end[];
1403ae115bc7Smrj
1404d2670fc4SToomas Soome DBG_MSG("\nFinding Modules\n");
1405d2670fc4SToomas Soome modcount = dboot_multiboot_modcount();
1406d2670fc4SToomas Soome if (modcount > MAX_BOOT_MODULES) {
14075420b805SSeth Goldberg dboot_panic("Too many modules (%d) -- the maximum is %d.",
1408d2670fc4SToomas Soome modcount, MAX_BOOT_MODULES);
14095420b805SSeth Goldberg }
1410ae115bc7Smrj /*
1411ae115bc7Smrj * search the modules to find the last used address
1412ae115bc7Smrj * we'll build the module list while we're walking through here
1413ae115bc7Smrj */
1414c909a41bSRichard Lowe check_higher((paddr_t)(uintptr_t)&_end);
1415d2670fc4SToomas Soome for (i = 0; i < modcount; ++i) {
1416d2670fc4SToomas Soome process_module(i);
1417d2670fc4SToomas Soome modules_used++;
1418ae115bc7Smrj }
1419c909a41bSRichard Lowe bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1420ae115bc7Smrj DBG(bi->bi_modules);
1421d2670fc4SToomas Soome bi->bi_module_cnt = modcount;
1422ae115bc7Smrj DBG(bi->bi_module_cnt);
1423ae115bc7Smrj
14240181461bSKeith M Wesolowski fixup_modules();
14250181461bSKeith M Wesolowski assign_module_hashes();
1426e65d07eeSKeith Wesolowski check_images();
1427d2670fc4SToomas Soome }
1428d2670fc4SToomas Soome
1429d2670fc4SToomas Soome /*
1430d2670fc4SToomas Soome * We then build the phys_install memlist from the multiboot information.
1431d2670fc4SToomas Soome */
1432d2670fc4SToomas Soome static void
dboot_process_mmap(void)1433d2670fc4SToomas Soome dboot_process_mmap(void)
1434d2670fc4SToomas Soome {
1435d2670fc4SToomas Soome uint64_t start;
1436d2670fc4SToomas Soome uint64_t end;
1437d2670fc4SToomas Soome uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */
1438d2670fc4SToomas Soome uint32_t lower, upper;
1439d2670fc4SToomas Soome int i, mmap_entries;
1440e65d07eeSKeith Wesolowski
1441ae115bc7Smrj /*
1442ae115bc7Smrj * Walk through the memory map from multiboot and build our memlist
1443ae115bc7Smrj * structures. Note these will have native format pointers.
1444ae115bc7Smrj */
1445ae115bc7Smrj DBG_MSG("\nFinding Memory Map\n");
1446d2670fc4SToomas Soome num_entries = 0;
1447d2670fc4SToomas Soome num_entries_set = B_FALSE;
1448ae115bc7Smrj max_mem = 0;
1449d2670fc4SToomas Soome if ((mmap_entries = dboot_loader_mmap_entries()) > 0) {
1450d2670fc4SToomas Soome for (i = 0; i < mmap_entries; i++) {
1451d2670fc4SToomas Soome uint32_t type = dboot_loader_mmap_get_type(i);
1452d2670fc4SToomas Soome start = dboot_loader_mmap_get_base(i);
1453d2670fc4SToomas Soome end = start + dboot_loader_mmap_get_length(i);
1454ae115bc7Smrj
1455c9464e8bSjosephb if (prom_debug)
1456ae115bc7Smrj dboot_printf("\ttype: %d %" PRIx64 "..%"
1457d2670fc4SToomas Soome PRIx64 "\n", type, start, end);
1458ae115bc7Smrj
1459ae115bc7Smrj /*
1460ae115bc7Smrj * page align start and end
1461ae115bc7Smrj */
1462ae115bc7Smrj start = (start + page_offset) & ~page_offset;
1463ae115bc7Smrj end &= ~page_offset;
1464ae115bc7Smrj if (end <= start)
1465ae115bc7Smrj continue;
1466ae115bc7Smrj
1467c9464e8bSjosephb /*
1468c9464e8bSjosephb * only type 1 is usable RAM
1469c9464e8bSjosephb */
1470d2670fc4SToomas Soome switch (type) {
14711de082f7SVikram Hegde case 1:
1472ae115bc7Smrj if (end > max_mem)
1473ae115bc7Smrj max_mem = end;
1474ae115bc7Smrj memlists[memlists_used].addr = start;
1475ae115bc7Smrj memlists[memlists_used].size = end - start;
1476c9464e8bSjosephb ++memlists_used;
1477c9464e8bSjosephb if (memlists_used > MAX_MEMLIST)
1478c9464e8bSjosephb dboot_panic("too many memlists");
14791de082f7SVikram Hegde break;
14801de082f7SVikram Hegde case 2:
14811de082f7SVikram Hegde rsvdmemlists[rsvdmemlists_used].addr = start;
14821de082f7SVikram Hegde rsvdmemlists[rsvdmemlists_used].size =
14831de082f7SVikram Hegde end - start;
14841de082f7SVikram Hegde ++rsvdmemlists_used;
14851de082f7SVikram Hegde if (rsvdmemlists_used > MAX_MEMLIST)
14861de082f7SVikram Hegde dboot_panic("too many rsvdmemlists");
14871de082f7SVikram Hegde break;
14881de082f7SVikram Hegde default:
14891de082f7SVikram Hegde continue;
14901de082f7SVikram Hegde }
1491ae115bc7Smrj }
1492d2670fc4SToomas Soome build_pcimemlists();
1493d2670fc4SToomas Soome } else if (dboot_multiboot_basicmeminfo(&lower, &upper)) {
1494d2670fc4SToomas Soome DBG(lower);
1495ae115bc7Smrj memlists[memlists_used].addr = 0;
1496d2670fc4SToomas Soome memlists[memlists_used].size = lower * 1024;
1497ae115bc7Smrj ++memlists_used;
1498d2670fc4SToomas Soome DBG(upper);
1499ae115bc7Smrj memlists[memlists_used].addr = 1024 * 1024;
1500d2670fc4SToomas Soome memlists[memlists_used].size = upper * 1024;
1501ae115bc7Smrj ++memlists_used;
1502843e1988Sjohnlev
1503843e1988Sjohnlev /*
1504843e1988Sjohnlev * Old platform - assume I/O space at the end of memory.
1505843e1988Sjohnlev */
1506d2670fc4SToomas Soome pcimemlists[0].addr = (upper * 1024) + (1024 * 1024);
1507843e1988Sjohnlev pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr;
1508843e1988Sjohnlev pcimemlists[0].next = 0;
1509843e1988Sjohnlev pcimemlists[0].prev = 0;
1510c909a41bSRichard Lowe bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
1511843e1988Sjohnlev DBG(bi->bi_pcimem);
1512ae115bc7Smrj } else {
1513843e1988Sjohnlev dboot_panic("No memory info from boot loader!!!");
1514ae115bc7Smrj }
1515ae115bc7Smrj
1516ae115bc7Smrj /*
1517ae115bc7Smrj * finish processing the physinstall list
1518ae115bc7Smrj */
1519ae115bc7Smrj sort_physinstall();
15201de082f7SVikram Hegde
15211de082f7SVikram Hegde /*
15221de082f7SVikram Hegde * build bios reserved mem lists
15231de082f7SVikram Hegde */
15241de082f7SVikram Hegde build_rsvdmemlists();
1525c9464e8bSjosephb }
1526d2670fc4SToomas Soome
1527d2670fc4SToomas Soome /*
1528d2670fc4SToomas Soome * The highest address is used as the starting point for dboot's simple
1529d2670fc4SToomas Soome * memory allocator.
1530d2670fc4SToomas Soome *
1531d2670fc4SToomas Soome * Finding the highest address in case of Multiboot 1 protocol is
1532d2670fc4SToomas Soome * quite painful in the sense that some information provided by
1533d2670fc4SToomas Soome * the multiboot info structure points to BIOS data, and some to RAM.
1534d2670fc4SToomas Soome *
1535d2670fc4SToomas Soome * The module list was processed and checked already by dboot_process_modules(),
1536d2670fc4SToomas Soome * so we will check the command line string and the memory map.
1537d2670fc4SToomas Soome *
1538d2670fc4SToomas Soome * This list of to be checked items is based on our current knowledge of
1539d2670fc4SToomas Soome * allocations made by grub1 and will need to be reviewed if there
1540d2670fc4SToomas Soome * are updates about the information provided by Multiboot 1.
1541d2670fc4SToomas Soome *
1542d2670fc4SToomas Soome * In the case of the Multiboot 2, our life is much simpler, as the MB2
1543d2670fc4SToomas Soome * information tag list is one contiguous chunk of memory.
1544d2670fc4SToomas Soome */
1545d2670fc4SToomas Soome static paddr_t
dboot_multiboot1_highest_addr(void)1546d2670fc4SToomas Soome dboot_multiboot1_highest_addr(void)
1547d2670fc4SToomas Soome {
1548d2670fc4SToomas Soome paddr_t addr = NULL;
1549d2670fc4SToomas Soome char *cmdl = (char *)mb_info->cmdline;
1550d2670fc4SToomas Soome
1551d2670fc4SToomas Soome if (mb_info->flags & MB_INFO_CMDLINE)
1552d2670fc4SToomas Soome addr = ((paddr_t)((uintptr_t)cmdl + strlen(cmdl) + 1));
1553d2670fc4SToomas Soome
1554d2670fc4SToomas Soome if (mb_info->flags & MB_INFO_MEM_MAP)
1555d2670fc4SToomas Soome addr = MAX(addr,
1556d2670fc4SToomas Soome ((paddr_t)(mb_info->mmap_addr + mb_info->mmap_length)));
1557d2670fc4SToomas Soome return (addr);
1558d2670fc4SToomas Soome }
1559d2670fc4SToomas Soome
1560d2670fc4SToomas Soome static void
dboot_multiboot_highest_addr(void)1561d2670fc4SToomas Soome dboot_multiboot_highest_addr(void)
1562d2670fc4SToomas Soome {
1563d2670fc4SToomas Soome paddr_t addr;
1564d2670fc4SToomas Soome
1565d2670fc4SToomas Soome switch (multiboot_version) {
1566d2670fc4SToomas Soome case 1:
1567d2670fc4SToomas Soome addr = dboot_multiboot1_highest_addr();
1568d2670fc4SToomas Soome if (addr != NULL)
1569d2670fc4SToomas Soome check_higher(addr);
1570d2670fc4SToomas Soome break;
1571d2670fc4SToomas Soome case 2:
1572d2670fc4SToomas Soome addr = dboot_multiboot2_highest_addr(mb2_info);
1573d2670fc4SToomas Soome if (addr != NULL)
1574d2670fc4SToomas Soome check_higher(addr);
1575d2670fc4SToomas Soome break;
1576d2670fc4SToomas Soome default:
1577d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
1578d2670fc4SToomas Soome multiboot_version);
1579d2670fc4SToomas Soome break;
1580d2670fc4SToomas Soome }
1581d2670fc4SToomas Soome }
1582d2670fc4SToomas Soome
1583d2670fc4SToomas Soome /*
1584d2670fc4SToomas Soome * Walk the boot loader provided information and find the highest free address.
1585d2670fc4SToomas Soome */
1586d2670fc4SToomas Soome static void
init_mem_alloc(void)1587d2670fc4SToomas Soome init_mem_alloc(void)
1588d2670fc4SToomas Soome {
1589d2670fc4SToomas Soome DBG_MSG("Entered init_mem_alloc()\n");
1590d2670fc4SToomas Soome dboot_process_modules();
1591d2670fc4SToomas Soome dboot_process_mmap();
1592d2670fc4SToomas Soome dboot_multiboot_highest_addr();
1593d2670fc4SToomas Soome }
1594d2670fc4SToomas Soome
1595d2670fc4SToomas Soome static void
dboot_multiboot_get_fwtables(void)1596d2670fc4SToomas Soome dboot_multiboot_get_fwtables(void)
1597d2670fc4SToomas Soome {
1598d2670fc4SToomas Soome multiboot_tag_new_acpi_t *nacpitagp;
1599d2670fc4SToomas Soome multiboot_tag_old_acpi_t *oacpitagp;
1600d2670fc4SToomas Soome
1601d2670fc4SToomas Soome /* no fw tables from multiboot 1 */
1602d2670fc4SToomas Soome if (multiboot_version != 2)
1603d2670fc4SToomas Soome return;
1604d2670fc4SToomas Soome
1605d2670fc4SToomas Soome nacpitagp = (multiboot_tag_new_acpi_t *)
1606d2670fc4SToomas Soome dboot_multiboot2_find_tag(mb2_info,
1607d2670fc4SToomas Soome MULTIBOOT_TAG_TYPE_ACPI_NEW);
1608d2670fc4SToomas Soome oacpitagp = (multiboot_tag_old_acpi_t *)
1609d2670fc4SToomas Soome dboot_multiboot2_find_tag(mb2_info,
1610d2670fc4SToomas Soome MULTIBOOT_TAG_TYPE_ACPI_OLD);
1611d2670fc4SToomas Soome
1612d2670fc4SToomas Soome if (nacpitagp != NULL) {
1613d2670fc4SToomas Soome bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1614d2670fc4SToomas Soome &nacpitagp->mb_rsdp[0];
1615d2670fc4SToomas Soome } else if (oacpitagp != NULL) {
1616d2670fc4SToomas Soome bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1617d2670fc4SToomas Soome &oacpitagp->mb_rsdp[0];
1618d2670fc4SToomas Soome } else {
1619d2670fc4SToomas Soome bi->bi_acpi_rsdp = NULL;
1620d2670fc4SToomas Soome }
1621d2670fc4SToomas Soome }
1622843e1988Sjohnlev #endif /* !__xpv */
1623ae115bc7Smrj
1624ae115bc7Smrj /*
1625ae115bc7Smrj * Simple memory allocator, allocates aligned physical memory.
1626ae115bc7Smrj * Note that startup_kernel() only allocates memory, never frees.
1627ae115bc7Smrj * Memory usage just grows in an upward direction.
1628ae115bc7Smrj */
1629ae115bc7Smrj static void *
do_mem_alloc(uint32_t size,uint32_t align)1630ae115bc7Smrj do_mem_alloc(uint32_t size, uint32_t align)
1631ae115bc7Smrj {
1632ae115bc7Smrj uint_t i;
1633ae115bc7Smrj uint64_t best;
1634ae115bc7Smrj uint64_t start;
1635ae115bc7Smrj uint64_t end;
1636ae115bc7Smrj
1637ae115bc7Smrj /*
1638ae115bc7Smrj * make sure size is a multiple of pagesize
1639ae115bc7Smrj */
1640ae115bc7Smrj size = RNDUP(size, MMU_PAGESIZE);
1641ae115bc7Smrj next_avail_addr = RNDUP(next_avail_addr, align);
1642ae115bc7Smrj
1643ae115bc7Smrj /*
1644843e1988Sjohnlev * XXPV fixme joe
1645843e1988Sjohnlev *
1646ae115bc7Smrj * a really large bootarchive that causes you to run out of memory
1647ae115bc7Smrj * may cause this to blow up
1648ae115bc7Smrj */
1649ae115bc7Smrj /* LINTED E_UNEXPECTED_UINT_PROMOTION */
1650ae115bc7Smrj best = (uint64_t)-size;
1651ae115bc7Smrj for (i = 0; i < memlists_used; ++i) {
1652ae115bc7Smrj start = memlists[i].addr;
1653843e1988Sjohnlev #if defined(__xpv)
1654843e1988Sjohnlev start += mfn_base;
1655843e1988Sjohnlev #endif
1656ae115bc7Smrj end = start + memlists[i].size;
1657ae115bc7Smrj
1658ae115bc7Smrj /*
1659ae115bc7Smrj * did we find the desired address?
1660ae115bc7Smrj */
1661ae115bc7Smrj if (start <= next_avail_addr && next_avail_addr + size <= end) {
1662ae115bc7Smrj best = next_avail_addr;
1663ae115bc7Smrj goto done;
1664ae115bc7Smrj }
1665ae115bc7Smrj
1666ae115bc7Smrj /*
1667ae115bc7Smrj * if not is this address the best so far?
1668ae115bc7Smrj */
1669ae115bc7Smrj if (start > next_avail_addr && start < best &&
1670ae115bc7Smrj RNDUP(start, align) + size <= end)
1671ae115bc7Smrj best = RNDUP(start, align);
1672ae115bc7Smrj }
1673ae115bc7Smrj
1674ae115bc7Smrj /*
1675ae115bc7Smrj * We didn't find exactly the address we wanted, due to going off the
1676ae115bc7Smrj * end of a memory region. Return the best found memory address.
1677ae115bc7Smrj */
1678ae115bc7Smrj done:
1679ae115bc7Smrj next_avail_addr = best + size;
1680843e1988Sjohnlev #if defined(__xpv)
1681843e1988Sjohnlev if (next_avail_addr > scratch_end)
1682843e1988Sjohnlev dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: "
1683843e1988Sjohnlev "0x%lx", (ulong_t)next_avail_addr,
1684843e1988Sjohnlev (ulong_t)scratch_end);
1685843e1988Sjohnlev #endif
1686ae115bc7Smrj (void) memset((void *)(uintptr_t)best, 0, size);
1687ae115bc7Smrj return ((void *)(uintptr_t)best);
1688ae115bc7Smrj }
1689ae115bc7Smrj
1690ae115bc7Smrj void *
mem_alloc(uint32_t size)1691ae115bc7Smrj mem_alloc(uint32_t size)
1692ae115bc7Smrj {
1693ae115bc7Smrj return (do_mem_alloc(size, MMU_PAGESIZE));
1694ae115bc7Smrj }
1695ae115bc7Smrj
1696ae115bc7Smrj
1697ae115bc7Smrj /*
1698ae115bc7Smrj * Build page tables to map all of memory used so far as well as the kernel.
1699ae115bc7Smrj */
1700ae115bc7Smrj static void
build_page_tables(void)1701ae115bc7Smrj build_page_tables(void)
1702ae115bc7Smrj {
1703ae115bc7Smrj uint32_t psize;
1704ae115bc7Smrj uint32_t level;
1705ae115bc7Smrj uint32_t off;
1706ae115bc7Smrj uint64_t start;
1707843e1988Sjohnlev #if !defined(__xpv)
1708843e1988Sjohnlev uint32_t i;
1709ae115bc7Smrj uint64_t end;
1710843e1988Sjohnlev #endif /* __xpv */
1711ae115bc7Smrj
1712ae115bc7Smrj /*
1713843e1988Sjohnlev * If we're on metal, we need to create the top level pagetable.
1714ae115bc7Smrj */
1715843e1988Sjohnlev #if defined(__xpv)
1716843e1988Sjohnlev top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base;
1717843e1988Sjohnlev #else /* __xpv */
1718ae115bc7Smrj top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
1719843e1988Sjohnlev #endif /* __xpv */
1720ae115bc7Smrj DBG((uintptr_t)top_page_table);
1721ae115bc7Smrj
1722ae115bc7Smrj /*
1723ae115bc7Smrj * Determine if we'll use large mappings for kernel, then map it.
1724ae115bc7Smrj */
1725ae115bc7Smrj if (largepage_support) {
1726ae115bc7Smrj psize = lpagesize;
1727ae115bc7Smrj level = 1;
1728ae115bc7Smrj } else {
1729ae115bc7Smrj psize = MMU_PAGESIZE;
1730ae115bc7Smrj level = 0;
1731ae115bc7Smrj }
1732ae115bc7Smrj
1733ae115bc7Smrj DBG_MSG("Mapping kernel\n");
1734ae115bc7Smrj DBG(ktext_phys);
1735ae115bc7Smrj DBG(target_kernel_text);
1736ae115bc7Smrj DBG(ksize);
1737ae115bc7Smrj DBG(psize);
1738ae115bc7Smrj for (off = 0; off < ksize; off += psize)
1739ae115bc7Smrj map_pa_at_va(ktext_phys + off, target_kernel_text + off, level);
1740ae115bc7Smrj
1741ae115bc7Smrj /*
1742ae115bc7Smrj * The kernel will need a 1 page window to work with page tables
1743ae115bc7Smrj */
1744ae115bc7Smrj bi->bi_pt_window = (uintptr_t)mem_alloc(MMU_PAGESIZE);
1745ae115bc7Smrj DBG(bi->bi_pt_window);
1746ae115bc7Smrj bi->bi_pte_to_pt_window =
1747ae115bc7Smrj (uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0);
1748ae115bc7Smrj DBG(bi->bi_pte_to_pt_window);
1749ae115bc7Smrj
1750843e1988Sjohnlev #if defined(__xpv)
1751843e1988Sjohnlev if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
1752843e1988Sjohnlev /* If this is a domU we're done. */
1753843e1988Sjohnlev DBG_MSG("\nPage tables constructed\n");
1754843e1988Sjohnlev return;
1755843e1988Sjohnlev }
1756843e1988Sjohnlev #endif /* __xpv */
1757843e1988Sjohnlev
1758ae115bc7Smrj /*
1759843e1988Sjohnlev * We need 1:1 mappings for the lower 1M of memory to access
1760843e1988Sjohnlev * BIOS tables used by a couple of drivers during boot.
1761ae115bc7Smrj *
1762843e1988Sjohnlev * The following code works because our simple memory allocator
1763843e1988Sjohnlev * only grows usage in an upwards direction.
1764ae115bc7Smrj *
1765843e1988Sjohnlev * Note that by this point in boot some mappings for low memory
1766843e1988Sjohnlev * may already exist because we've already accessed device in low
1767843e1988Sjohnlev * memory. (Specifically the video frame buffer and keyboard
1768843e1988Sjohnlev * status ports.) If we're booting on raw hardware then GRUB
1769843e1988Sjohnlev * created these mappings for us. If we're booting under a
1770843e1988Sjohnlev * hypervisor then we went ahead and remapped these devices into
1771843e1988Sjohnlev * memory allocated within dboot itself.
1772843e1988Sjohnlev */
1773843e1988Sjohnlev if (map_debug)
1774843e1988Sjohnlev dboot_printf("1:1 map pa=0..1Meg\n");
1775843e1988Sjohnlev for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) {
1776843e1988Sjohnlev #if defined(__xpv)
1777843e1988Sjohnlev map_ma_at_va(start, start, 0);
1778843e1988Sjohnlev #else /* __xpv */
1779843e1988Sjohnlev map_pa_at_va(start, start, 0);
1780843e1988Sjohnlev #endif /* __xpv */
1781843e1988Sjohnlev }
1782843e1988Sjohnlev
1783843e1988Sjohnlev #if !defined(__xpv)
1784ae115bc7Smrj for (i = 0; i < memlists_used; ++i) {
1785ae115bc7Smrj start = memlists[i].addr;
1786ae115bc7Smrj
1787ae115bc7Smrj end = start + memlists[i].size;
1788ae115bc7Smrj
1789ae115bc7Smrj if (map_debug)
1790ae115bc7Smrj dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
1791ae115bc7Smrj start, end);
1792ae115bc7Smrj while (start < end && start < next_avail_addr) {
1793ae115bc7Smrj map_pa_at_va(start, start, 0);
1794ae115bc7Smrj start += MMU_PAGESIZE;
1795ae115bc7Smrj }
1796ae115bc7Smrj }
1797843e1988Sjohnlev #endif /* !__xpv */
1798ae115bc7Smrj
1799ae115bc7Smrj DBG_MSG("\nPage tables constructed\n");
1800ae115bc7Smrj }
1801ae115bc7Smrj
1802ae115bc7Smrj #define NO_MULTIBOOT \
1803ae115bc7Smrj "multiboot is no longer used to boot the Solaris Operating System.\n\
1804ae115bc7Smrj The grub entry should be changed to:\n\
1805ae115bc7Smrj kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\
1806ae115bc7Smrj module$ /platform/i86pc/$ISADIR/boot_archive\n\
1807654b400cSJoshua M. Clulow See http://illumos.org/msg/SUNOS-8000-AK for details.\n"
1808ae115bc7Smrj
1809d2670fc4SToomas Soome static void
dboot_init_xboot_consinfo(void)1810d2670fc4SToomas Soome dboot_init_xboot_consinfo(void)
1811d2670fc4SToomas Soome {
1812d2670fc4SToomas Soome uintptr_t addr;
1813d2670fc4SToomas Soome /*
1814d2670fc4SToomas Soome * boot info must be 16 byte aligned for 64 bit kernel ABI
1815d2670fc4SToomas Soome */
1816d2670fc4SToomas Soome addr = (uintptr_t)boot_info;
1817d2670fc4SToomas Soome addr = (addr + 0xf) & ~0xf;
1818d2670fc4SToomas Soome bi = (struct xboot_info *)addr;
1819d2670fc4SToomas Soome
1820d2670fc4SToomas Soome #if !defined(__xpv)
1821d2670fc4SToomas Soome switch (multiboot_version) {
1822d2670fc4SToomas Soome case 1:
1823d2670fc4SToomas Soome dboot_multiboot1_xboot_consinfo();
1824d2670fc4SToomas Soome break;
1825d2670fc4SToomas Soome case 2:
1826d2670fc4SToomas Soome dboot_multiboot2_xboot_consinfo();
1827d2670fc4SToomas Soome break;
1828d2670fc4SToomas Soome default:
1829d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
1830d2670fc4SToomas Soome multiboot_version);
1831d2670fc4SToomas Soome break;
1832d2670fc4SToomas Soome }
1833*b9a86732SToomas Soome /*
1834*b9a86732SToomas Soome * Lookup environment module for the console. Complete module list
1835*b9a86732SToomas Soome * will be built after console setup.
1836*b9a86732SToomas Soome */
1837*b9a86732SToomas Soome dboot_find_env();
1838d2670fc4SToomas Soome #endif
1839d2670fc4SToomas Soome }
1840d2670fc4SToomas Soome
1841d2670fc4SToomas Soome /*
1842d2670fc4SToomas Soome * Set up basic data from the boot loader.
1843d2670fc4SToomas Soome * The load_addr is part of AOUT kludge setup in dboot_grub.s, to support
1844d2670fc4SToomas Soome * 32-bit dboot code setup used to set up and start 64-bit kernel.
1845d2670fc4SToomas Soome * AOUT kludge does allow 32-bit boot loader, such as grub1, to load and
1846d2670fc4SToomas Soome * start 64-bit illumos kernel.
1847d2670fc4SToomas Soome */
1848d2670fc4SToomas Soome static void
dboot_loader_init(void)1849d2670fc4SToomas Soome dboot_loader_init(void)
1850d2670fc4SToomas Soome {
1851d2670fc4SToomas Soome #if !defined(__xpv)
1852d2670fc4SToomas Soome mb_info = NULL;
1853d2670fc4SToomas Soome mb2_info = NULL;
1854d2670fc4SToomas Soome
1855d2670fc4SToomas Soome switch (mb_magic) {
1856d2670fc4SToomas Soome case MB_BOOTLOADER_MAGIC:
1857d2670fc4SToomas Soome multiboot_version = 1;
1858d2670fc4SToomas Soome mb_info = (multiboot_info_t *)(uintptr_t)mb_addr;
1859d2670fc4SToomas Soome #if defined(_BOOT_TARGET_amd64)
1860d2670fc4SToomas Soome load_addr = mb_header.load_addr;
1861d2670fc4SToomas Soome #endif
1862d2670fc4SToomas Soome break;
1863d2670fc4SToomas Soome
1864d2670fc4SToomas Soome case MULTIBOOT2_BOOTLOADER_MAGIC:
1865d2670fc4SToomas Soome multiboot_version = 2;
1866d2670fc4SToomas Soome mb2_info = (multiboot2_info_header_t *)(uintptr_t)mb_addr;
1867d2670fc4SToomas Soome mb2_mmap_tagp = dboot_multiboot2_get_mmap_tagp(mb2_info);
1868d2670fc4SToomas Soome #if defined(_BOOT_TARGET_amd64)
1869d2670fc4SToomas Soome load_addr = mb2_load_addr;
1870d2670fc4SToomas Soome #endif
1871d2670fc4SToomas Soome break;
1872d2670fc4SToomas Soome
1873d2670fc4SToomas Soome default:
1874d2670fc4SToomas Soome dboot_panic("Unknown bootloader magic: 0x%x\n", mb_magic);
1875d2670fc4SToomas Soome break;
1876d2670fc4SToomas Soome }
1877d2670fc4SToomas Soome #endif /* !defined(__xpv) */
1878d2670fc4SToomas Soome }
1879d2670fc4SToomas Soome
1880d2670fc4SToomas Soome /* Extract the kernel command line from [multi]boot information. */
1881d2670fc4SToomas Soome static char *
dboot_loader_cmdline(void)1882d2670fc4SToomas Soome dboot_loader_cmdline(void)
1883d2670fc4SToomas Soome {
1884d2670fc4SToomas Soome char *line = NULL;
1885d2670fc4SToomas Soome
1886d2670fc4SToomas Soome #if defined(__xpv)
1887d2670fc4SToomas Soome line = (char *)xen_info->cmd_line;
1888d2670fc4SToomas Soome #else /* __xpv */
1889d2670fc4SToomas Soome
1890d2670fc4SToomas Soome switch (multiboot_version) {
1891d2670fc4SToomas Soome case 1:
1892d2670fc4SToomas Soome if (mb_info->flags & MB_INFO_CMDLINE)
1893d2670fc4SToomas Soome line = (char *)mb_info->cmdline;
1894d2670fc4SToomas Soome break;
1895d2670fc4SToomas Soome
1896d2670fc4SToomas Soome case 2:
1897d2670fc4SToomas Soome line = dboot_multiboot2_cmdline(mb2_info);
1898d2670fc4SToomas Soome break;
1899d2670fc4SToomas Soome
1900d2670fc4SToomas Soome default:
1901d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
1902d2670fc4SToomas Soome multiboot_version);
1903d2670fc4SToomas Soome break;
1904d2670fc4SToomas Soome }
1905d2670fc4SToomas Soome
1906d2670fc4SToomas Soome #endif /* __xpv */
1907d2670fc4SToomas Soome
1908d2670fc4SToomas Soome /*
1909d2670fc4SToomas Soome * Make sure we have valid pointer so the string operations
1910d2670fc4SToomas Soome * will not crash us.
1911d2670fc4SToomas Soome */
1912d2670fc4SToomas Soome if (line == NULL)
1913d2670fc4SToomas Soome line = "";
1914d2670fc4SToomas Soome
1915d2670fc4SToomas Soome return (line);
1916d2670fc4SToomas Soome }
1917d2670fc4SToomas Soome
1918d2670fc4SToomas Soome static char *
dboot_loader_name(void)1919d2670fc4SToomas Soome dboot_loader_name(void)
1920d2670fc4SToomas Soome {
1921d2670fc4SToomas Soome #if defined(__xpv)
1922d2670fc4SToomas Soome return (NULL);
1923d2670fc4SToomas Soome #else /* __xpv */
1924d2670fc4SToomas Soome multiboot_tag_string_t *tag;
1925d2670fc4SToomas Soome
1926d2670fc4SToomas Soome switch (multiboot_version) {
1927d2670fc4SToomas Soome case 1:
1928d2670fc4SToomas Soome return ((char *)mb_info->boot_loader_name);
1929d2670fc4SToomas Soome
1930d2670fc4SToomas Soome case 2:
1931d2670fc4SToomas Soome tag = dboot_multiboot2_find_tag(mb2_info,
1932d2670fc4SToomas Soome MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME);
1933d2670fc4SToomas Soome return (tag->mb_string);
1934d2670fc4SToomas Soome default:
1935d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
1936d2670fc4SToomas Soome multiboot_version);
1937d2670fc4SToomas Soome break;
1938d2670fc4SToomas Soome }
1939d2670fc4SToomas Soome
1940d2670fc4SToomas Soome return (NULL);
1941d2670fc4SToomas Soome #endif /* __xpv */
1942d2670fc4SToomas Soome }
1943ae115bc7Smrj /*
1944ae115bc7Smrj * startup_kernel has a pretty simple job. It builds pagetables which reflect
1945ae115bc7Smrj * 1:1 mappings for all memory in use. It then also adds mappings for
1946ae115bc7Smrj * the kernel nucleus at virtual address of target_kernel_text using large page
1947ae115bc7Smrj * mappings. The page table pages are also accessible at 1:1 mapped
1948ae115bc7Smrj * virtual addresses.
1949ae115bc7Smrj */
1950ae115bc7Smrj /*ARGSUSED*/
1951ae115bc7Smrj void
startup_kernel(void)1952ae115bc7Smrj startup_kernel(void)
1953ae115bc7Smrj {
1954ae115bc7Smrj char *cmdline;
1955d2670fc4SToomas Soome char *bootloader;
1956843e1988Sjohnlev #if defined(__xpv)
1957843e1988Sjohnlev physdev_set_iopl_t set_iopl;
1958843e1988Sjohnlev #endif /* __xpv */
1959ae115bc7Smrj
1960d2670fc4SToomas Soome dboot_loader_init();
1961ae115bc7Smrj /*
1962ae115bc7Smrj * At this point we are executing in a 32 bit real mode.
1963ae115bc7Smrj */
1964d2670fc4SToomas Soome
1965d2670fc4SToomas Soome bootloader = dboot_loader_name();
1966d2670fc4SToomas Soome cmdline = dboot_loader_cmdline();
1967843e1988Sjohnlev
1968843e1988Sjohnlev #if defined(__xpv)
1969843e1988Sjohnlev /*
1970843e1988Sjohnlev * For dom0, before we initialize the console subsystem we'll
1971843e1988Sjohnlev * need to enable io operations, so set I/O priveldge level to 1.
1972843e1988Sjohnlev */
1973843e1988Sjohnlev if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1974843e1988Sjohnlev set_iopl.iopl = 1;
1975843e1988Sjohnlev (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1976843e1988Sjohnlev }
1977843e1988Sjohnlev #endif /* __xpv */
1978843e1988Sjohnlev
1979d2670fc4SToomas Soome dboot_init_xboot_consinfo();
1980d2670fc4SToomas Soome bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline;
1981*b9a86732SToomas Soome bcons_init(bi);
1982*b9a86732SToomas Soome
1983*b9a86732SToomas Soome prom_debug = (find_boot_prop("prom_debug") != NULL);
1984*b9a86732SToomas Soome map_debug = (find_boot_prop("map_debug") != NULL);
1985d2670fc4SToomas Soome
1986d2670fc4SToomas Soome #if !defined(__xpv)
1987d2670fc4SToomas Soome dboot_multiboot_get_fwtables();
1988d2670fc4SToomas Soome #endif
1989d2670fc4SToomas Soome DBG_MSG("\n\nillumos prekernel set: ");
1990ae115bc7Smrj DBG_MSG(cmdline);
1991ae115bc7Smrj DBG_MSG("\n");
1992ae115bc7Smrj
1993d2670fc4SToomas Soome if (bootloader != NULL && prom_debug) {
1994d2670fc4SToomas Soome dboot_printf("Kernel loaded by: %s\n", bootloader);
1995d2670fc4SToomas Soome #if !defined(__xpv)
1996d2670fc4SToomas Soome dboot_printf("Using multiboot %d boot protocol.\n",
1997d2670fc4SToomas Soome multiboot_version);
1998d2670fc4SToomas Soome #endif
1999d2670fc4SToomas Soome }
2000d2670fc4SToomas Soome
2001ae115bc7Smrj if (strstr(cmdline, "multiboot") != NULL) {
2002ae115bc7Smrj dboot_panic(NO_MULTIBOOT);
2003ae115bc7Smrj }
2004ae115bc7Smrj
2005ae115bc7Smrj DBG((uintptr_t)bi);
2006d2670fc4SToomas Soome #if !defined(__xpv)
2007d2670fc4SToomas Soome DBG((uintptr_t)mb_info);
2008d2670fc4SToomas Soome DBG((uintptr_t)mb2_info);
2009d2670fc4SToomas Soome if (mb2_info != NULL)
2010d2670fc4SToomas Soome DBG(mb2_info->mbi_total_size);
2011d2670fc4SToomas Soome DBG(bi->bi_acpi_rsdp);
2012d2670fc4SToomas Soome #endif
2013ae115bc7Smrj
2014ae115bc7Smrj /*
2015ae115bc7Smrj * Need correct target_kernel_text value
2016ae115bc7Smrj */
2017ae115bc7Smrj #if defined(_BOOT_TARGET_amd64)
2018ae115bc7Smrj target_kernel_text = KERNEL_TEXT_amd64;
2019843e1988Sjohnlev #elif defined(__xpv)
2020843e1988Sjohnlev target_kernel_text = KERNEL_TEXT_i386_xpv;
2021ae115bc7Smrj #else
2022ae115bc7Smrj target_kernel_text = KERNEL_TEXT_i386;
2023ae115bc7Smrj #endif
2024ae115bc7Smrj DBG(target_kernel_text);
2025ae115bc7Smrj
2026843e1988Sjohnlev #if defined(__xpv)
2027843e1988Sjohnlev
2028843e1988Sjohnlev /*
2029843e1988Sjohnlev * XXPV Derive this stuff from CPUID / what the hypervisor has enabled
2030843e1988Sjohnlev */
2031843e1988Sjohnlev
2032843e1988Sjohnlev #if defined(_BOOT_TARGET_amd64)
2033843e1988Sjohnlev /*
2034843e1988Sjohnlev * 64-bit hypervisor.
2035843e1988Sjohnlev */
2036843e1988Sjohnlev amd64_support = 1;
2037843e1988Sjohnlev pae_support = 1;
2038843e1988Sjohnlev
2039843e1988Sjohnlev #else /* _BOOT_TARGET_amd64 */
2040843e1988Sjohnlev
2041843e1988Sjohnlev /*
2042843e1988Sjohnlev * See if we are running on a PAE Hypervisor
2043843e1988Sjohnlev */
2044843e1988Sjohnlev {
2045843e1988Sjohnlev xen_capabilities_info_t caps;
2046843e1988Sjohnlev
2047843e1988Sjohnlev if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0)
2048843e1988Sjohnlev dboot_panic("HYPERVISOR_xen_version(caps) failed");
2049843e1988Sjohnlev caps[sizeof (caps) - 1] = 0;
2050843e1988Sjohnlev if (prom_debug)
2051843e1988Sjohnlev dboot_printf("xen capabilities %s\n", caps);
2052843e1988Sjohnlev if (strstr(caps, "x86_32p") != NULL)
2053843e1988Sjohnlev pae_support = 1;
2054843e1988Sjohnlev }
2055843e1988Sjohnlev
2056843e1988Sjohnlev #endif /* _BOOT_TARGET_amd64 */
2057843e1988Sjohnlev {
2058843e1988Sjohnlev xen_platform_parameters_t p;
2059843e1988Sjohnlev
2060843e1988Sjohnlev if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0)
2061843e1988Sjohnlev dboot_panic("HYPERVISOR_xen_version(parms) failed");
2062843e1988Sjohnlev DBG(p.virt_start);
2063843e1988Sjohnlev mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
2064843e1988Sjohnlev }
2065843e1988Sjohnlev
2066843e1988Sjohnlev /*
2067843e1988Sjohnlev * The hypervisor loads stuff starting at 1Gig
2068843e1988Sjohnlev */
2069843e1988Sjohnlev mfn_base = ONE_GIG;
2070843e1988Sjohnlev DBG(mfn_base);
2071843e1988Sjohnlev
2072843e1988Sjohnlev /*
2073843e1988Sjohnlev * enable writable page table mode for the hypervisor
2074843e1988Sjohnlev */
2075843e1988Sjohnlev if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2076843e1988Sjohnlev VMASST_TYPE_writable_pagetables) < 0)
2077843e1988Sjohnlev dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed");
2078843e1988Sjohnlev
2079843e1988Sjohnlev /*
2080843e1988Sjohnlev * check for NX support
2081843e1988Sjohnlev */
2082843e1988Sjohnlev if (pae_support) {
2083843e1988Sjohnlev uint32_t eax = 0x80000000;
2084843e1988Sjohnlev uint32_t edx = get_cpuid_edx(&eax);
2085843e1988Sjohnlev
2086843e1988Sjohnlev if (eax >= 0x80000001) {
2087843e1988Sjohnlev eax = 0x80000001;
2088843e1988Sjohnlev edx = get_cpuid_edx(&eax);
2089843e1988Sjohnlev if (edx & CPUID_AMD_EDX_NX)
2090843e1988Sjohnlev NX_support = 1;
2091843e1988Sjohnlev }
2092843e1988Sjohnlev }
2093843e1988Sjohnlev
2094843e1988Sjohnlev #if !defined(_BOOT_TARGET_amd64)
2095843e1988Sjohnlev
2096843e1988Sjohnlev /*
2097843e1988Sjohnlev * The 32-bit hypervisor uses segmentation to protect itself from
2098843e1988Sjohnlev * guests. This means when a guest attempts to install a flat 4GB
2099843e1988Sjohnlev * code or data descriptor the 32-bit hypervisor will protect itself
2100843e1988Sjohnlev * by silently shrinking the segment such that if the guest attempts
2101843e1988Sjohnlev * any access where the hypervisor lives a #gp fault is generated.
2102843e1988Sjohnlev * The problem is that some applications expect a full 4GB flat
2103843e1988Sjohnlev * segment for their current thread pointer and will use negative
2104843e1988Sjohnlev * offset segment wrap around to access data. TLS support in linux
2105843e1988Sjohnlev * brand is one example of this.
2106843e1988Sjohnlev *
2107843e1988Sjohnlev * The 32-bit hypervisor can catch the #gp fault in these cases
2108843e1988Sjohnlev * and emulate the access without passing the #gp fault to the guest
2109843e1988Sjohnlev * but only if VMASST_TYPE_4gb_segments is explicitly turned on.
2110843e1988Sjohnlev * Seems like this should have been the default.
2111843e1988Sjohnlev * Either way, we want the hypervisor -- and not Solaris -- to deal
2112843e1988Sjohnlev * to deal with emulating these accesses.
2113843e1988Sjohnlev */
2114843e1988Sjohnlev if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2115843e1988Sjohnlev VMASST_TYPE_4gb_segments) < 0)
2116843e1988Sjohnlev dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed");
2117843e1988Sjohnlev #endif /* !_BOOT_TARGET_amd64 */
2118843e1988Sjohnlev
2119843e1988Sjohnlev #else /* __xpv */
2120843e1988Sjohnlev
2121ae115bc7Smrj /*
2122ae115bc7Smrj * use cpuid to enable MMU features
2123ae115bc7Smrj */
2124ae115bc7Smrj if (have_cpuid()) {
2125ae115bc7Smrj uint32_t eax, edx;
2126ae115bc7Smrj
2127ae115bc7Smrj eax = 1;
2128ae115bc7Smrj edx = get_cpuid_edx(&eax);
2129ae115bc7Smrj if (edx & CPUID_INTC_EDX_PSE)
2130ae115bc7Smrj largepage_support = 1;
2131ae115bc7Smrj if (edx & CPUID_INTC_EDX_PGE)
2132ae115bc7Smrj pge_support = 1;
2133ae115bc7Smrj if (edx & CPUID_INTC_EDX_PAE)
2134ae115bc7Smrj pae_support = 1;
2135ae115bc7Smrj
2136ae115bc7Smrj eax = 0x80000000;
2137ae115bc7Smrj edx = get_cpuid_edx(&eax);
2138ae115bc7Smrj if (eax >= 0x80000001) {
2139ae115bc7Smrj eax = 0x80000001;
2140ae115bc7Smrj edx = get_cpuid_edx(&eax);
2141ae115bc7Smrj if (edx & CPUID_AMD_EDX_LM)
2142ae115bc7Smrj amd64_support = 1;
2143ae115bc7Smrj if (edx & CPUID_AMD_EDX_NX)
2144ae115bc7Smrj NX_support = 1;
2145ae115bc7Smrj }
2146ae115bc7Smrj } else {
2147ae115bc7Smrj dboot_printf("cpuid not supported\n");
2148ae115bc7Smrj }
2149843e1988Sjohnlev #endif /* __xpv */
2150843e1988Sjohnlev
2151ae115bc7Smrj
2152ae115bc7Smrj #if defined(_BOOT_TARGET_amd64)
2153ae115bc7Smrj if (amd64_support == 0)
2154843e1988Sjohnlev dboot_panic("long mode not supported, rebooting");
2155ae115bc7Smrj else if (pae_support == 0)
2156843e1988Sjohnlev dboot_panic("long mode, but no PAE; rebooting");
2157843e1988Sjohnlev #else
2158843e1988Sjohnlev /*
2159843e1988Sjohnlev * Allow the command line to over-ride use of PAE for 32 bit.
2160843e1988Sjohnlev */
2161843e1988Sjohnlev if (strstr(cmdline, "disablePAE=true") != NULL) {
2162843e1988Sjohnlev pae_support = 0;
2163843e1988Sjohnlev NX_support = 0;
2164843e1988Sjohnlev amd64_support = 0;
2165843e1988Sjohnlev }
2166ae115bc7Smrj #endif
2167ae115bc7Smrj
2168ae115bc7Smrj /*
2169843e1988Sjohnlev * initialize the simple memory allocator
2170ae115bc7Smrj */
2171ae115bc7Smrj init_mem_alloc();
2172ae115bc7Smrj
2173843e1988Sjohnlev #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64)
2174843e1988Sjohnlev /*
2175843e1988Sjohnlev * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory
2176843e1988Sjohnlev */
2177843e1988Sjohnlev if (max_mem < FOUR_GIG && NX_support == 0)
2178843e1988Sjohnlev pae_support = 0;
2179843e1988Sjohnlev #endif
2180843e1988Sjohnlev
2181ae115bc7Smrj /*
2182ae115bc7Smrj * configure mmu information
2183ae115bc7Smrj */
2184843e1988Sjohnlev if (pae_support) {
2185ae115bc7Smrj shift_amt = shift_amt_pae;
2186ae115bc7Smrj ptes_per_table = 512;
2187ae115bc7Smrj pte_size = 8;
2188ae115bc7Smrj lpagesize = TWO_MEG;
2189ae115bc7Smrj #if defined(_BOOT_TARGET_amd64)
2190ae115bc7Smrj top_level = 3;
2191ae115bc7Smrj #else
2192ae115bc7Smrj top_level = 2;
2193ae115bc7Smrj #endif
2194ae115bc7Smrj } else {
2195ae115bc7Smrj pae_support = 0;
2196ae115bc7Smrj NX_support = 0;
2197ae115bc7Smrj shift_amt = shift_amt_nopae;
2198ae115bc7Smrj ptes_per_table = 1024;
2199ae115bc7Smrj pte_size = 4;
2200ae115bc7Smrj lpagesize = FOUR_MEG;
2201ae115bc7Smrj top_level = 1;
2202ae115bc7Smrj }
2203ae115bc7Smrj
2204ae115bc7Smrj DBG(pge_support);
2205ae115bc7Smrj DBG(NX_support);
2206ae115bc7Smrj DBG(largepage_support);
2207ae115bc7Smrj DBG(amd64_support);
2208ae115bc7Smrj DBG(top_level);
2209ae115bc7Smrj DBG(pte_size);
2210ae115bc7Smrj DBG(ptes_per_table);
2211ae115bc7Smrj DBG(lpagesize);
2212ae115bc7Smrj
2213843e1988Sjohnlev #if defined(__xpv)
2214843e1988Sjohnlev ktext_phys = ONE_GIG; /* from UNIX Mapfile */
2215843e1988Sjohnlev #else
2216ae115bc7Smrj ktext_phys = FOUR_MEG; /* from UNIX Mapfile */
2217843e1988Sjohnlev #endif
2218ae115bc7Smrj
2219843e1988Sjohnlev #if !defined(__xpv) && defined(_BOOT_TARGET_amd64)
2220ae115bc7Smrj /*
2221ae115bc7Smrj * For grub, copy kernel bits from the ELF64 file to final place.
2222ae115bc7Smrj */
2223ae115bc7Smrj DBG_MSG("\nAllocating nucleus pages.\n");
2224ae115bc7Smrj ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG);
2225ae115bc7Smrj if (ktext_phys == 0)
2226843e1988Sjohnlev dboot_panic("failed to allocate aligned kernel memory");
2227d2670fc4SToomas Soome DBG(load_addr);
2228d2670fc4SToomas Soome if (dboot_elfload64(load_addr) != 0)
2229843e1988Sjohnlev dboot_panic("failed to parse kernel ELF image, rebooting");
2230ae115bc7Smrj #endif
2231843e1988Sjohnlev
2232ae115bc7Smrj DBG(ktext_phys);
2233ae115bc7Smrj
2234ae115bc7Smrj /*
2235ae115bc7Smrj * Allocate page tables.
2236ae115bc7Smrj */
2237ae115bc7Smrj build_page_tables();
2238ae115bc7Smrj
2239ae115bc7Smrj /*
2240ae115bc7Smrj * return to assembly code to switch to running kernel
2241ae115bc7Smrj */
2242ae115bc7Smrj entry_addr_low = (uint32_t)target_kernel_text;
2243ae115bc7Smrj DBG(entry_addr_low);
2244ae115bc7Smrj bi->bi_use_largepage = largepage_support;
2245ae115bc7Smrj bi->bi_use_pae = pae_support;
2246ae115bc7Smrj bi->bi_use_pge = pge_support;
2247ae115bc7Smrj bi->bi_use_nx = NX_support;
2248843e1988Sjohnlev
2249843e1988Sjohnlev #if defined(__xpv)
2250843e1988Sjohnlev
2251843e1988Sjohnlev bi->bi_next_paddr = next_avail_addr - mfn_base;
2252843e1988Sjohnlev DBG(bi->bi_next_paddr);
2253843e1988Sjohnlev bi->bi_next_vaddr = (native_ptr_t)next_avail_addr;
2254843e1988Sjohnlev DBG(bi->bi_next_vaddr);
2255843e1988Sjohnlev
2256843e1988Sjohnlev /*
2257843e1988Sjohnlev * unmap unused pages in start area to make them available for DMA
2258843e1988Sjohnlev */
2259843e1988Sjohnlev while (next_avail_addr < scratch_end) {
2260843e1988Sjohnlev (void) HYPERVISOR_update_va_mapping(next_avail_addr,
2261843e1988Sjohnlev 0, UVMF_INVLPG | UVMF_LOCAL);
2262843e1988Sjohnlev next_avail_addr += MMU_PAGESIZE;
2263843e1988Sjohnlev }
2264843e1988Sjohnlev
2265843e1988Sjohnlev bi->bi_xen_start_info = (uintptr_t)xen_info;
2266843e1988Sjohnlev DBG((uintptr_t)HYPERVISOR_shared_info);
2267843e1988Sjohnlev bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info;
2268843e1988Sjohnlev bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base;
2269843e1988Sjohnlev
2270843e1988Sjohnlev #else /* __xpv */
2271843e1988Sjohnlev
2272ae115bc7Smrj bi->bi_next_paddr = next_avail_addr;
2273ae115bc7Smrj DBG(bi->bi_next_paddr);
2274ae115bc7Smrj bi->bi_next_vaddr = (uintptr_t)next_avail_addr;
2275ae115bc7Smrj DBG(bi->bi_next_vaddr);
2276d2670fc4SToomas Soome bi->bi_mb_version = multiboot_version;
2277d2670fc4SToomas Soome
2278d2670fc4SToomas Soome switch (multiboot_version) {
2279d2670fc4SToomas Soome case 1:
2280ae115bc7Smrj bi->bi_mb_info = (uintptr_t)mb_info;
2281d2670fc4SToomas Soome break;
2282d2670fc4SToomas Soome case 2:
2283d2670fc4SToomas Soome bi->bi_mb_info = (uintptr_t)mb2_info;
2284d2670fc4SToomas Soome break;
2285d2670fc4SToomas Soome default:
2286d2670fc4SToomas Soome dboot_panic("Unknown multiboot version: %d\n",
2287d2670fc4SToomas Soome multiboot_version);
2288d2670fc4SToomas Soome break;
2289d2670fc4SToomas Soome }
2290ae115bc7Smrj bi->bi_top_page_table = (uintptr_t)top_page_table;
2291ae115bc7Smrj
2292843e1988Sjohnlev #endif /* __xpv */
2293843e1988Sjohnlev
2294ae115bc7Smrj bi->bi_kseg_size = FOUR_MEG;
2295ae115bc7Smrj DBG(bi->bi_kseg_size);
2296ae115bc7Smrj
229715ba2a79SSherry Moore #ifndef __xpv
2298f34a7178SJoe Bonasera if (map_debug)
229919397407SSherry Moore dump_tables();
230015ba2a79SSherry Moore #endif
230119397407SSherry Moore
2302ae115bc7Smrj DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n");
2303ae115bc7Smrj }
2304