xref: /titanic_44/usr/src/uts/i86pc/dboot/dboot_startkern.c (revision e84622ca60e336e723b4e107b019b1ecb4542eb3)
1ae115bc7Smrj /*
2ae115bc7Smrj  * CDDL HEADER START
3ae115bc7Smrj  *
4ae115bc7Smrj  * The contents of this file are subject to the terms of the
5ae115bc7Smrj  * Common Development and Distribution License (the "License").
6ae115bc7Smrj  * You may not use this file except in compliance with the License.
7ae115bc7Smrj  *
8ae115bc7Smrj  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ae115bc7Smrj  * or http://www.opensolaris.org/os/licensing.
10ae115bc7Smrj  * See the License for the specific language governing permissions
11ae115bc7Smrj  * and limitations under the License.
12ae115bc7Smrj  *
13ae115bc7Smrj  * When distributing Covered Code, include this CDDL HEADER in each
14ae115bc7Smrj  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ae115bc7Smrj  * If applicable, add the following below this CDDL HEADER, with the
16ae115bc7Smrj  * fields enclosed by brackets "[]" replaced with your own identifying
17ae115bc7Smrj  * information: Portions Copyright [yyyy] [name of copyright owner]
18ae115bc7Smrj  *
19ae115bc7Smrj  * CDDL HEADER END
20ae115bc7Smrj  */
21ae115bc7Smrj 
22ae115bc7Smrj /*
23f34a7178SJoe Bonasera  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24ae115bc7Smrj  * Use is subject to license terms.
25e65d07eeSKeith Wesolowski  *
261d9cde1dSKeith M Wesolowski  * Copyright 2013 Joyent, Inc.  All rights reserved.
27ae115bc7Smrj  */
28ae115bc7Smrj 
29ae115bc7Smrj 
30ae115bc7Smrj #include <sys/types.h>
31ae115bc7Smrj #include <sys/machparam.h>
32ae115bc7Smrj #include <sys/x86_archext.h>
33ae115bc7Smrj #include <sys/systm.h>
34ae115bc7Smrj #include <sys/mach_mmu.h>
35ae115bc7Smrj #include <sys/multiboot.h>
36*e84622caSToomas Soome #include <sys/multiboot2.h>
37*e84622caSToomas Soome #include <sys/multiboot2_impl.h>
38*e84622caSToomas Soome #include <sys/sysmacros.h>
39e65d07eeSKeith Wesolowski #include <sys/sha1.h>
401d9cde1dSKeith M Wesolowski #include <util/string.h>
411d9cde1dSKeith M Wesolowski #include <util/strtolctype.h>
42ae115bc7Smrj 
43843e1988Sjohnlev #if defined(__xpv)
44843e1988Sjohnlev 
45843e1988Sjohnlev #include <sys/hypervisor.h>
46843e1988Sjohnlev uintptr_t xen_virt_start;
47843e1988Sjohnlev pfn_t *mfn_to_pfn_mapping;
48843e1988Sjohnlev 
49843e1988Sjohnlev #else /* !__xpv */
50843e1988Sjohnlev 
51ae115bc7Smrj extern multiboot_header_t mb_header;
52*e84622caSToomas Soome extern uint32_t mb2_load_addr;
53ae115bc7Smrj extern int have_cpuid(void);
54843e1988Sjohnlev 
55843e1988Sjohnlev #endif /* !__xpv */
56ae115bc7Smrj 
57ae115bc7Smrj #include <sys/inttypes.h>
58ae115bc7Smrj #include <sys/bootinfo.h>
59ae115bc7Smrj #include <sys/mach_mmu.h>
60ae115bc7Smrj #include <sys/boot_console.h>
61ae115bc7Smrj 
62843e1988Sjohnlev #include "dboot_asm.h"
63ae115bc7Smrj #include "dboot_printf.h"
64ae115bc7Smrj #include "dboot_xboot.h"
65ae115bc7Smrj #include "dboot_elfload.h"
66ae115bc7Smrj 
67e65d07eeSKeith Wesolowski #define	SHA1_ASCII_LENGTH	(SHA1_DIGEST_LENGTH * 2)
68e65d07eeSKeith Wesolowski 
69ae115bc7Smrj /*
70ae115bc7Smrj  * This file contains code that runs to transition us from either a multiboot
71843e1988Sjohnlev  * compliant loader (32 bit non-paging) or a XPV domain loader to
72843e1988Sjohnlev  * regular kernel execution. Its task is to setup the kernel memory image
73843e1988Sjohnlev  * and page tables.
74ae115bc7Smrj  *
75ae115bc7Smrj  * The code executes as:
76ae115bc7Smrj  *	- 32 bits under GRUB (for 32 or 64 bit Solaris)
77843e1988Sjohnlev  * 	- a 32 bit program for the 32-bit PV hypervisor
78843e1988Sjohnlev  *	- a 64 bit program for the 64-bit PV hypervisor (at least for now)
79ae115bc7Smrj  *
80843e1988Sjohnlev  * Under the PV hypervisor, we must create mappings for any memory beyond the
81843e1988Sjohnlev  * initial start of day allocation (such as the kernel itself).
82ae115bc7Smrj  *
83843e1988Sjohnlev  * When on the metal, the mapping between maddr_t and paddr_t is 1:1.
84ae115bc7Smrj  * Since we are running in real mode, so all such memory is accessible.
85ae115bc7Smrj  */
86ae115bc7Smrj 
87ae115bc7Smrj /*
88ae115bc7Smrj  * Standard bits used in PTE (page level) and PTP (internal levels)
89ae115bc7Smrj  */
90843e1988Sjohnlev x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER;
91843e1988Sjohnlev x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST;
92ae115bc7Smrj 
93ae115bc7Smrj /*
94ae115bc7Smrj  * This is the target addresses (physical) where the kernel text and data
95843e1988Sjohnlev  * nucleus pages will be unpacked. On the hypervisor this is actually a
96843e1988Sjohnlev  * virtual address.
97ae115bc7Smrj  */
98ae115bc7Smrj paddr_t ktext_phys;
99ae115bc7Smrj uint32_t ksize = 2 * FOUR_MEG;	/* kernel nucleus is 8Meg */
100ae115bc7Smrj 
101ae115bc7Smrj static uint64_t target_kernel_text;	/* value to use for KERNEL_TEXT */
102ae115bc7Smrj 
103ae115bc7Smrj /*
104ae115bc7Smrj  * The stack is setup in assembler before entering startup_kernel()
105ae115bc7Smrj  */
106ae115bc7Smrj char stack_space[STACK_SIZE];
107ae115bc7Smrj 
108ae115bc7Smrj /*
109ae115bc7Smrj  * Used to track physical memory allocation
110ae115bc7Smrj  */
111ae115bc7Smrj static paddr_t next_avail_addr = 0;
112ae115bc7Smrj 
113843e1988Sjohnlev #if defined(__xpv)
114843e1988Sjohnlev /*
115843e1988Sjohnlev  * Additional information needed for hypervisor memory allocation.
116843e1988Sjohnlev  * Only memory up to scratch_end is mapped by page tables.
117843e1988Sjohnlev  * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so
118843e1988Sjohnlev  * to derive a pfn from a pointer, you subtract mfn_base.
119843e1988Sjohnlev  */
120843e1988Sjohnlev 
121843e1988Sjohnlev static paddr_t scratch_end = 0;	/* we can't write all of mem here */
122843e1988Sjohnlev static paddr_t mfn_base;		/* addr corresponding to mfn_list[0] */
123843e1988Sjohnlev start_info_t *xen_info;
124843e1988Sjohnlev 
125843e1988Sjohnlev #else	/* __xpv */
126843e1988Sjohnlev 
127843e1988Sjohnlev /*
128843e1988Sjohnlev  * If on the metal, then we have a multiboot loader.
129843e1988Sjohnlev  */
130*e84622caSToomas Soome uint32_t mb_magic;			/* magic from boot loader */
131*e84622caSToomas Soome uint32_t mb_addr;			/* multiboot info package from loader */
132*e84622caSToomas Soome int multiboot_version;
133ae115bc7Smrj multiboot_info_t *mb_info;
134*e84622caSToomas Soome multiboot2_info_header_t *mb2_info;
135*e84622caSToomas Soome multiboot_tag_mmap_t *mb2_mmap_tagp;
136*e84622caSToomas Soome int num_entries;			/* mmap entry count */
137*e84622caSToomas Soome boolean_t num_entries_set;		/* is mmap entry count set */
138*e84622caSToomas Soome uintptr_t load_addr;
139ae115bc7Smrj 
140843e1988Sjohnlev #endif	/* __xpv */
141843e1988Sjohnlev 
142ae115bc7Smrj /*
143ae115bc7Smrj  * This contains information passed to the kernel
144ae115bc7Smrj  */
145ae115bc7Smrj struct xboot_info boot_info[2];	/* extra space to fix alignement for amd64 */
146ae115bc7Smrj struct xboot_info *bi;
147ae115bc7Smrj 
148ae115bc7Smrj /*
149ae115bc7Smrj  * Page table and memory stuff.
150ae115bc7Smrj  */
151843e1988Sjohnlev static paddr_t max_mem;			/* maximum memory address */
152ae115bc7Smrj 
153ae115bc7Smrj /*
154ae115bc7Smrj  * Information about processor MMU
155ae115bc7Smrj  */
156ae115bc7Smrj int amd64_support = 0;
157ae115bc7Smrj int largepage_support = 0;
158ae115bc7Smrj int pae_support = 0;
159ae115bc7Smrj int pge_support = 0;
160ae115bc7Smrj int NX_support = 0;
161ae115bc7Smrj 
162ae115bc7Smrj /*
163ae115bc7Smrj  * Low 32 bits of kernel entry address passed back to assembler.
164ae115bc7Smrj  * When running a 64 bit kernel, the high 32 bits are 0xffffffff.
165ae115bc7Smrj  */
166ae115bc7Smrj uint32_t entry_addr_low;
167ae115bc7Smrj 
168ae115bc7Smrj /*
169ae115bc7Smrj  * Memlists for the kernel. We shouldn't need a lot of these.
170ae115bc7Smrj  */
171c9464e8bSjosephb #define	MAX_MEMLIST (50)
172ae115bc7Smrj struct boot_memlist memlists[MAX_MEMLIST];
173ae115bc7Smrj uint_t memlists_used = 0;
174c9464e8bSjosephb struct boot_memlist pcimemlists[MAX_MEMLIST];
175c9464e8bSjosephb uint_t pcimemlists_used = 0;
1761de082f7SVikram Hegde struct boot_memlist rsvdmemlists[MAX_MEMLIST];
1771de082f7SVikram Hegde uint_t rsvdmemlists_used = 0;
178ae115bc7Smrj 
1791d9cde1dSKeith M Wesolowski /*
1801d9cde1dSKeith M Wesolowski  * This should match what's in the bootloader.  It's arbitrary, but GRUB
1811d9cde1dSKeith M Wesolowski  * in particular has limitations on how much space it can use before it
1821d9cde1dSKeith M Wesolowski  * stops working properly.  This should be enough.
1831d9cde1dSKeith M Wesolowski  */
1841d9cde1dSKeith M Wesolowski struct boot_modules modules[MAX_BOOT_MODULES];
185ae115bc7Smrj uint_t modules_used = 0;
186ae115bc7Smrj 
187*e84622caSToomas Soome #ifdef __xpv
188*e84622caSToomas Soome /*
189*e84622caSToomas Soome  * Xen strips the size field out of the mb_memory_map_t, see struct e820entry
190*e84622caSToomas Soome  * definition in Xen source.
191*e84622caSToomas Soome  */
192*e84622caSToomas Soome typedef struct {
193*e84622caSToomas Soome 	uint32_t	base_addr_low;
194*e84622caSToomas Soome 	uint32_t	base_addr_high;
195*e84622caSToomas Soome 	uint32_t	length_low;
196*e84622caSToomas Soome 	uint32_t	length_high;
197*e84622caSToomas Soome 	uint32_t	type;
198*e84622caSToomas Soome } mmap_t;
199*e84622caSToomas Soome 
200*e84622caSToomas Soome /*
201*e84622caSToomas Soome  * There is 512KB of scratch area after the boot stack page.
202*e84622caSToomas Soome  * We'll use that for everything except the kernel nucleus pages which are too
203*e84622caSToomas Soome  * big to fit there and are allocated last anyway.
204*e84622caSToomas Soome  */
205*e84622caSToomas Soome #define	MAXMAPS	100
206*e84622caSToomas Soome static mmap_t map_buffer[MAXMAPS];
207*e84622caSToomas Soome #else
208*e84622caSToomas Soome typedef mb_memory_map_t mmap_t;
209*e84622caSToomas Soome #endif
210*e84622caSToomas Soome 
211ae115bc7Smrj /*
212ae115bc7Smrj  * Debugging macros
213ae115bc7Smrj  */
214ae115bc7Smrj uint_t prom_debug = 0;
215ae115bc7Smrj uint_t map_debug = 0;
216ae115bc7Smrj 
2171d9cde1dSKeith M Wesolowski static char noname[2] = "-";
2181d9cde1dSKeith M Wesolowski 
219ae115bc7Smrj /*
220843e1988Sjohnlev  * Either hypervisor-specific or grub-specific code builds the initial
221843e1988Sjohnlev  * memlists. This code does the sort/merge/link for final use.
222ae115bc7Smrj  */
223ae115bc7Smrj static void
sort_physinstall(void)224ae115bc7Smrj sort_physinstall(void)
225ae115bc7Smrj {
226ae115bc7Smrj 	int i;
227843e1988Sjohnlev #if !defined(__xpv)
228ae115bc7Smrj 	int j;
229ae115bc7Smrj 	struct boot_memlist tmp;
230ae115bc7Smrj 
231ae115bc7Smrj 	/*
232ae115bc7Smrj 	 * Now sort the memlists, in case they weren't in order.
233ae115bc7Smrj 	 * Yeah, this is a bubble sort; small, simple and easy to get right.
234ae115bc7Smrj 	 */
235ae115bc7Smrj 	DBG_MSG("Sorting phys-installed list\n");
236ae115bc7Smrj 	for (j = memlists_used - 1; j > 0; --j) {
237ae115bc7Smrj 		for (i = 0; i < j; ++i) {
238ae115bc7Smrj 			if (memlists[i].addr < memlists[i + 1].addr)
239ae115bc7Smrj 				continue;
240ae115bc7Smrj 			tmp = memlists[i];
241ae115bc7Smrj 			memlists[i] = memlists[i + 1];
242ae115bc7Smrj 			memlists[i + 1] = tmp;
243ae115bc7Smrj 		}
244ae115bc7Smrj 	}
245ae115bc7Smrj 
246ae115bc7Smrj 	/*
247ae115bc7Smrj 	 * Merge any memlists that don't have holes between them.
248ae115bc7Smrj 	 */
249ae115bc7Smrj 	for (i = 0; i <= memlists_used - 1; ++i) {
250ae115bc7Smrj 		if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr)
251ae115bc7Smrj 			continue;
252ae115bc7Smrj 
253ae115bc7Smrj 		if (prom_debug)
254ae115bc7Smrj 			dboot_printf(
255ae115bc7Smrj 			    "merging mem segs %" PRIx64 "...%" PRIx64
256ae115bc7Smrj 			    " w/ %" PRIx64 "...%" PRIx64 "\n",
257ae115bc7Smrj 			    memlists[i].addr,
258ae115bc7Smrj 			    memlists[i].addr + memlists[i].size,
259ae115bc7Smrj 			    memlists[i + 1].addr,
260ae115bc7Smrj 			    memlists[i + 1].addr + memlists[i + 1].size);
261ae115bc7Smrj 
262ae115bc7Smrj 		memlists[i].size += memlists[i + 1].size;
263ae115bc7Smrj 		for (j = i + 1; j < memlists_used - 1; ++j)
264ae115bc7Smrj 			memlists[j] = memlists[j + 1];
265ae115bc7Smrj 		--memlists_used;
266ae115bc7Smrj 		DBG(memlists_used);
267ae115bc7Smrj 		--i;	/* after merging we need to reexamine, so do this */
268ae115bc7Smrj 	}
269843e1988Sjohnlev #endif	/* __xpv */
270ae115bc7Smrj 
271ae115bc7Smrj 	if (prom_debug) {
272ae115bc7Smrj 		dboot_printf("\nFinal memlists:\n");
273ae115bc7Smrj 		for (i = 0; i < memlists_used; ++i) {
274ae115bc7Smrj 			dboot_printf("\t%d: addr=%" PRIx64 " size=%"
275ae115bc7Smrj 			    PRIx64 "\n", i, memlists[i].addr, memlists[i].size);
276ae115bc7Smrj 		}
277ae115bc7Smrj 	}
278ae115bc7Smrj 
279ae115bc7Smrj 	/*
280ae115bc7Smrj 	 * link together the memlists with native size pointers
281ae115bc7Smrj 	 */
282ae115bc7Smrj 	memlists[0].next = 0;
283ae115bc7Smrj 	memlists[0].prev = 0;
284ae115bc7Smrj 	for (i = 1; i < memlists_used; ++i) {
285ae115bc7Smrj 		memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1);
286ae115bc7Smrj 		memlists[i].next = 0;
287ae115bc7Smrj 		memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i);
288ae115bc7Smrj 	}
289c909a41bSRichard Lowe 	bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists;
290ae115bc7Smrj 	DBG(bi->bi_phys_install);
291ae115bc7Smrj }
292ae115bc7Smrj 
2931de082f7SVikram Hegde /*
2941de082f7SVikram Hegde  * build bios reserved memlists
2951de082f7SVikram Hegde  */
2961de082f7SVikram Hegde static void
build_rsvdmemlists(void)2971de082f7SVikram Hegde build_rsvdmemlists(void)
2981de082f7SVikram Hegde {
2991de082f7SVikram Hegde 	int i;
3001de082f7SVikram Hegde 
3011de082f7SVikram Hegde 	rsvdmemlists[0].next = 0;
3021de082f7SVikram Hegde 	rsvdmemlists[0].prev = 0;
3031de082f7SVikram Hegde 	for (i = 1; i < rsvdmemlists_used; ++i) {
3041de082f7SVikram Hegde 		rsvdmemlists[i].prev =
3051de082f7SVikram Hegde 		    (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1);
3061de082f7SVikram Hegde 		rsvdmemlists[i].next = 0;
3071de082f7SVikram Hegde 		rsvdmemlists[i - 1].next =
3081de082f7SVikram Hegde 		    (native_ptr_t)(uintptr_t)(rsvdmemlists + i);
3091de082f7SVikram Hegde 	}
310c909a41bSRichard Lowe 	bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists;
3111de082f7SVikram Hegde 	DBG(bi->bi_rsvdmem);
3121de082f7SVikram Hegde }
3131de082f7SVikram Hegde 
314843e1988Sjohnlev #if defined(__xpv)
315843e1988Sjohnlev 
316843e1988Sjohnlev /*
317843e1988Sjohnlev  * halt on the hypervisor after a delay to drain console output
318843e1988Sjohnlev  */
319843e1988Sjohnlev void
dboot_halt(void)320843e1988Sjohnlev dboot_halt(void)
321843e1988Sjohnlev {
322843e1988Sjohnlev 	uint_t i = 10000;
323843e1988Sjohnlev 
324843e1988Sjohnlev 	while (--i)
325c1374a13SSurya Prakki 		(void) HYPERVISOR_yield();
326c1374a13SSurya Prakki 	(void) HYPERVISOR_shutdown(SHUTDOWN_poweroff);
327843e1988Sjohnlev }
328843e1988Sjohnlev 
329843e1988Sjohnlev /*
330843e1988Sjohnlev  * From a machine address, find the corresponding pseudo-physical address.
331843e1988Sjohnlev  * Pseudo-physical address are contiguous and run from mfn_base in each VM.
332843e1988Sjohnlev  * Machine addresses are the real underlying hardware addresses.
333843e1988Sjohnlev  * These are needed for page table entries. Note that this routine is
334843e1988Sjohnlev  * poorly protected. A bad value of "ma" will cause a page fault.
335843e1988Sjohnlev  */
336843e1988Sjohnlev paddr_t
ma_to_pa(maddr_t ma)337843e1988Sjohnlev ma_to_pa(maddr_t ma)
338843e1988Sjohnlev {
339843e1988Sjohnlev 	ulong_t pgoff = ma & MMU_PAGEOFFSET;
340843e1988Sjohnlev 	ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)];
341843e1988Sjohnlev 	paddr_t pa;
342843e1988Sjohnlev 
343843e1988Sjohnlev 	if (pfn >= xen_info->nr_pages)
344843e1988Sjohnlev 		return (-(paddr_t)1);
345843e1988Sjohnlev 	pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff;
346843e1988Sjohnlev #ifdef DEBUG
347843e1988Sjohnlev 	if (ma != pa_to_ma(pa))
348843e1988Sjohnlev 		dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", "
349843e1988Sjohnlev 		    "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa));
350843e1988Sjohnlev #endif
351843e1988Sjohnlev 	return (pa);
352843e1988Sjohnlev }
353843e1988Sjohnlev 
354843e1988Sjohnlev /*
355843e1988Sjohnlev  * From a pseudo-physical address, find the corresponding machine address.
356843e1988Sjohnlev  */
357843e1988Sjohnlev maddr_t
pa_to_ma(paddr_t pa)358843e1988Sjohnlev pa_to_ma(paddr_t pa)
359843e1988Sjohnlev {
360843e1988Sjohnlev 	pfn_t pfn;
361843e1988Sjohnlev 	ulong_t mfn;
362843e1988Sjohnlev 
363843e1988Sjohnlev 	pfn = mmu_btop(pa - mfn_base);
364843e1988Sjohnlev 	if (pa < mfn_base || pfn >= xen_info->nr_pages)
365843e1988Sjohnlev 		dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa);
366843e1988Sjohnlev 	mfn = ((ulong_t *)xen_info->mfn_list)[pfn];
367843e1988Sjohnlev #ifdef DEBUG
368843e1988Sjohnlev 	if (mfn_to_pfn_mapping[mfn] != pfn)
369843e1988Sjohnlev 		dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n",
370843e1988Sjohnlev 		    pfn, mfn, mfn_to_pfn_mapping[mfn]);
371843e1988Sjohnlev #endif
372843e1988Sjohnlev 	return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET));
373843e1988Sjohnlev }
374843e1988Sjohnlev 
375843e1988Sjohnlev #endif	/* __xpv */
376843e1988Sjohnlev 
377ae115bc7Smrj x86pte_t
get_pteval(paddr_t table,uint_t index)378ae115bc7Smrj get_pteval(paddr_t table, uint_t index)
379ae115bc7Smrj {
380ae115bc7Smrj 	if (pae_support)
381ae115bc7Smrj 		return (((x86pte_t *)(uintptr_t)table)[index]);
382ae115bc7Smrj 	return (((x86pte32_t *)(uintptr_t)table)[index]);
383ae115bc7Smrj }
384ae115bc7Smrj 
385ae115bc7Smrj /*ARGSUSED*/
386ae115bc7Smrj void
set_pteval(paddr_t table,uint_t index,uint_t level,x86pte_t pteval)387ae115bc7Smrj set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
388ae115bc7Smrj {
389843e1988Sjohnlev #ifdef __xpv
390843e1988Sjohnlev 	mmu_update_t t;
391843e1988Sjohnlev 	maddr_t mtable = pa_to_ma(table);
392843e1988Sjohnlev 	int retcnt;
393843e1988Sjohnlev 
394843e1988Sjohnlev 	t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
395843e1988Sjohnlev 	t.val = pteval;
396843e1988Sjohnlev 	if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
397843e1988Sjohnlev 		dboot_panic("HYPERVISOR_mmu_update() failed");
398843e1988Sjohnlev #else /* __xpv */
399ae115bc7Smrj 	uintptr_t tab_addr = (uintptr_t)table;
400ae115bc7Smrj 
401ae115bc7Smrj 	if (pae_support)
402ae115bc7Smrj 		((x86pte_t *)tab_addr)[index] = pteval;
403ae115bc7Smrj 	else
404ae115bc7Smrj 		((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval;
405ae115bc7Smrj 	if (level == top_level && level == 2)
406ae115bc7Smrj 		reload_cr3();
407843e1988Sjohnlev #endif /* __xpv */
408ae115bc7Smrj }
409ae115bc7Smrj 
410ae115bc7Smrj paddr_t
make_ptable(x86pte_t * pteval,uint_t level)411ae115bc7Smrj make_ptable(x86pte_t *pteval, uint_t level)
412ae115bc7Smrj {
413ae115bc7Smrj 	paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
414ae115bc7Smrj 
415ae115bc7Smrj 	if (level == top_level && level == 2)
416ae115bc7Smrj 		*pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID;
417ae115bc7Smrj 	else
418ae115bc7Smrj 		*pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits;
419ae115bc7Smrj 
420843e1988Sjohnlev #ifdef __xpv
421843e1988Sjohnlev 	/* Remove write permission to the new page table. */
422843e1988Sjohnlev 	if (HYPERVISOR_update_va_mapping(new_table,
423843e1988Sjohnlev 	    *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL))
424843e1988Sjohnlev 		dboot_panic("HYP_update_va_mapping error");
425843e1988Sjohnlev #endif
426843e1988Sjohnlev 
427ae115bc7Smrj 	if (map_debug)
428ae115bc7Smrj 		dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%"
429ae115bc7Smrj 		    PRIx64 "\n", level, (ulong_t)new_table, *pteval);
430ae115bc7Smrj 	return (new_table);
431ae115bc7Smrj }
432ae115bc7Smrj 
433ae115bc7Smrj x86pte_t *
map_pte(paddr_t table,uint_t index)434ae115bc7Smrj map_pte(paddr_t table, uint_t index)
435ae115bc7Smrj {
436ae115bc7Smrj 	return ((x86pte_t *)(uintptr_t)(table + index * pte_size));
437ae115bc7Smrj }
438ae115bc7Smrj 
43919397407SSherry Moore /*
44019397407SSherry Moore  * dump out the contents of page tables...
44119397407SSherry Moore  */
44219397407SSherry Moore static void
dump_tables(void)44319397407SSherry Moore dump_tables(void)
44419397407SSherry Moore {
44519397407SSherry Moore 	uint_t save_index[4];	/* for recursion */
44619397407SSherry Moore 	char *save_table[4];	/* for recursion */
44719397407SSherry Moore 	uint_t	l;
44819397407SSherry Moore 	uint64_t va;
44919397407SSherry Moore 	uint64_t pgsize;
45019397407SSherry Moore 	int index;
45119397407SSherry Moore 	int i;
45219397407SSherry Moore 	x86pte_t pteval;
45319397407SSherry Moore 	char *table;
45419397407SSherry Moore 	static char *tablist = "\t\t\t";
45519397407SSherry Moore 	char *tabs = tablist + 3 - top_level;
45619397407SSherry Moore 	uint_t pa, pa1;
457843e1988Sjohnlev #if !defined(__xpv)
458843e1988Sjohnlev #define	maddr_t paddr_t
459843e1988Sjohnlev #endif /* !__xpv */
460843e1988Sjohnlev 
46119397407SSherry Moore 	dboot_printf("Finished pagetables:\n");
46219397407SSherry Moore 	table = (char *)(uintptr_t)top_page_table;
46319397407SSherry Moore 	l = top_level;
46419397407SSherry Moore 	va = 0;
46519397407SSherry Moore 	for (index = 0; index < ptes_per_table; ++index) {
46619397407SSherry Moore 		pgsize = 1ull << shift_amt[l];
46719397407SSherry Moore 		if (pae_support)
46819397407SSherry Moore 			pteval = ((x86pte_t *)table)[index];
46919397407SSherry Moore 		else
47019397407SSherry Moore 			pteval = ((x86pte32_t *)table)[index];
47119397407SSherry Moore 		if (pteval == 0)
47219397407SSherry Moore 			goto next_entry;
47319397407SSherry Moore 
47419397407SSherry Moore 		dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64,
475c1374a13SSurya Prakki 		    tabs + l, (void *)table, index, (uint64_t)pteval, va);
47619397407SSherry Moore 		pa = ma_to_pa(pteval & MMU_PAGEMASK);
47719397407SSherry Moore 		dboot_printf(" physaddr=%x\n", pa);
47819397407SSherry Moore 
47919397407SSherry Moore 		/*
48019397407SSherry Moore 		 * Don't try to walk hypervisor private pagetables
48119397407SSherry Moore 		 */
48219397407SSherry Moore 		if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) {
48319397407SSherry Moore 			save_table[l] = table;
48419397407SSherry Moore 			save_index[l] = index;
48519397407SSherry Moore 			--l;
48619397407SSherry Moore 			index = -1;
48719397407SSherry Moore 			table = (char *)(uintptr_t)
48819397407SSherry Moore 			    ma_to_pa(pteval & MMU_PAGEMASK);
48919397407SSherry Moore 			goto recursion;
49019397407SSherry Moore 		}
49119397407SSherry Moore 
49219397407SSherry Moore 		/*
49319397407SSherry Moore 		 * shorten dump for consecutive mappings
49419397407SSherry Moore 		 */
49519397407SSherry Moore 		for (i = 1; index + i < ptes_per_table; ++i) {
49619397407SSherry Moore 			if (pae_support)
49719397407SSherry Moore 				pteval = ((x86pte_t *)table)[index + i];
49819397407SSherry Moore 			else
49919397407SSherry Moore 				pteval = ((x86pte32_t *)table)[index + i];
50019397407SSherry Moore 			if (pteval == 0)
50119397407SSherry Moore 				break;
50219397407SSherry Moore 			pa1 = ma_to_pa(pteval & MMU_PAGEMASK);
50319397407SSherry Moore 			if (pa1 != pa + i * pgsize)
50419397407SSherry Moore 				break;
50519397407SSherry Moore 		}
50619397407SSherry Moore 		if (i > 2) {
50719397407SSherry Moore 			dboot_printf("%s...\n", tabs + l);
50819397407SSherry Moore 			va += pgsize * (i - 2);
50919397407SSherry Moore 			index += i - 2;
51019397407SSherry Moore 		}
51119397407SSherry Moore next_entry:
51219397407SSherry Moore 		va += pgsize;
51319397407SSherry Moore 		if (l == 3 && index == 256)	/* VA hole */
51419397407SSherry Moore 			va = 0xffff800000000000ull;
51519397407SSherry Moore recursion:
51619397407SSherry Moore 		;
51719397407SSherry Moore 	}
51819397407SSherry Moore 	if (l < top_level) {
51919397407SSherry Moore 		++l;
52019397407SSherry Moore 		index = save_index[l];
52119397407SSherry Moore 		table = save_table[l];
52219397407SSherry Moore 		goto recursion;
52319397407SSherry Moore 	}
52419397407SSherry Moore }
52519397407SSherry Moore 
526ae115bc7Smrj /*
527843e1988Sjohnlev  * Add a mapping for the machine page at the given virtual address.
528ae115bc7Smrj  */
529ae115bc7Smrj static void
map_ma_at_va(maddr_t ma,native_ptr_t va,uint_t level)530843e1988Sjohnlev map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level)
531ae115bc7Smrj {
532ae115bc7Smrj 	x86pte_t *ptep;
533ae115bc7Smrj 	x86pte_t pteval;
534ae115bc7Smrj 
535843e1988Sjohnlev 	pteval = ma | pte_bits;
536ae115bc7Smrj 	if (level > 0)
537ae115bc7Smrj 		pteval |= PT_PAGESIZE;
538ae115bc7Smrj 	if (va >= target_kernel_text && pge_support)
539ae115bc7Smrj 		pteval |= PT_GLOBAL;
540ae115bc7Smrj 
541843e1988Sjohnlev 	if (map_debug && ma != va)
542843e1988Sjohnlev 		dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64
543ae115bc7Smrj 		    " pte=0x%" PRIx64 " l=%d\n",
544843e1988Sjohnlev 		    (uint64_t)ma, (uint64_t)va, pteval, level);
545843e1988Sjohnlev 
546843e1988Sjohnlev #if defined(__xpv)
547843e1988Sjohnlev 	/*
548843e1988Sjohnlev 	 * see if we can avoid find_pte() on the hypervisor
549843e1988Sjohnlev 	 */
550843e1988Sjohnlev 	if (HYPERVISOR_update_va_mapping(va, pteval,
551843e1988Sjohnlev 	    UVMF_INVLPG | UVMF_LOCAL) == 0)
552843e1988Sjohnlev 		return;
553843e1988Sjohnlev #endif
554ae115bc7Smrj 
555ae115bc7Smrj 	/*
556ae115bc7Smrj 	 * Find the pte that will map this address. This creates any
557ae115bc7Smrj 	 * missing intermediate level page tables
558ae115bc7Smrj 	 */
559ae115bc7Smrj 	ptep = find_pte(va, NULL, level, 0);
560ae115bc7Smrj 
561ae115bc7Smrj 	/*
562843e1988Sjohnlev 	 * When paravirtualized, we must use hypervisor calls to modify the
563843e1988Sjohnlev 	 * PTE, since paging is active. On real hardware we just write to
564843e1988Sjohnlev 	 * the pagetables which aren't in use yet.
565ae115bc7Smrj 	 */
566843e1988Sjohnlev #if defined(__xpv)
567843e1988Sjohnlev 	ptep = ptep;	/* shut lint up */
568843e1988Sjohnlev 	if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
569843e1988Sjohnlev 		dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64
570843e1988Sjohnlev 		    " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "",
571843e1988Sjohnlev 		    (uint64_t)va, level, (uint64_t)ma, pteval);
572843e1988Sjohnlev #else
573ae115bc7Smrj 	if (va < 1024 * 1024)
574ae115bc7Smrj 		pteval |= PT_NOCACHE;		/* for video RAM */
575ae115bc7Smrj 	if (pae_support)
576ae115bc7Smrj 		*ptep = pteval;
577ae115bc7Smrj 	else
578ae115bc7Smrj 		*((x86pte32_t *)ptep) = (x86pte32_t)pteval;
579843e1988Sjohnlev #endif
580ae115bc7Smrj }
581ae115bc7Smrj 
582ae115bc7Smrj /*
583843e1988Sjohnlev  * Add a mapping for the physical page at the given virtual address.
584ae115bc7Smrj  */
585ae115bc7Smrj static void
map_pa_at_va(paddr_t pa,native_ptr_t va,uint_t level)586843e1988Sjohnlev map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level)
587ae115bc7Smrj {
588843e1988Sjohnlev 	map_ma_at_va(pa_to_ma(pa), va, level);
589ae115bc7Smrj }
590ae115bc7Smrj 
591ae115bc7Smrj /*
592c9464e8bSjosephb  * This is called to remove start..end from the
593c9464e8bSjosephb  * possible range of PCI addresses.
594c9464e8bSjosephb  */
595c9464e8bSjosephb const uint64_t pci_lo_limit = 0x00100000ul;
596c9464e8bSjosephb const uint64_t pci_hi_limit = 0xfff00000ul;
597c9464e8bSjosephb static void
exclude_from_pci(uint64_t start,uint64_t end)598c9464e8bSjosephb exclude_from_pci(uint64_t start, uint64_t end)
599c9464e8bSjosephb {
600c9464e8bSjosephb 	int i;
601c9464e8bSjosephb 	int j;
602c9464e8bSjosephb 	struct boot_memlist *ml;
603c9464e8bSjosephb 
604c9464e8bSjosephb 	for (i = 0; i < pcimemlists_used; ++i) {
605c9464e8bSjosephb 		ml = &pcimemlists[i];
606c9464e8bSjosephb 
607c9464e8bSjosephb 		/* delete the entire range? */
608c9464e8bSjosephb 		if (start <= ml->addr && ml->addr + ml->size <= end) {
609c9464e8bSjosephb 			--pcimemlists_used;
610c9464e8bSjosephb 			for (j = i; j < pcimemlists_used; ++j)
611c9464e8bSjosephb 				pcimemlists[j] = pcimemlists[j + 1];
612c9464e8bSjosephb 			--i;	/* to revisit the new one at this index */
613c9464e8bSjosephb 		}
614c9464e8bSjosephb 
615c9464e8bSjosephb 		/* split a range? */
616c9464e8bSjosephb 		else if (ml->addr < start && end < ml->addr + ml->size) {
617c9464e8bSjosephb 
618c9464e8bSjosephb 			++pcimemlists_used;
619c9464e8bSjosephb 			if (pcimemlists_used > MAX_MEMLIST)
620c9464e8bSjosephb 				dboot_panic("too many pcimemlists");
621c9464e8bSjosephb 
622c9464e8bSjosephb 			for (j = pcimemlists_used - 1; j > i; --j)
623c9464e8bSjosephb 				pcimemlists[j] = pcimemlists[j - 1];
624c9464e8bSjosephb 			ml->size = start - ml->addr;
625c9464e8bSjosephb 
626c9464e8bSjosephb 			++ml;
627c9464e8bSjosephb 			ml->size = (ml->addr + ml->size) - end;
628c9464e8bSjosephb 			ml->addr = end;
629c9464e8bSjosephb 			++i;	/* skip on to next one */
630c9464e8bSjosephb 		}
631c9464e8bSjosephb 
632c9464e8bSjosephb 		/* cut memory off the start? */
633c9464e8bSjosephb 		else if (ml->addr < end && end < ml->addr + ml->size) {
634c9464e8bSjosephb 			ml->size -= end - ml->addr;
635c9464e8bSjosephb 			ml->addr = end;
636c9464e8bSjosephb 		}
637c9464e8bSjosephb 
638c9464e8bSjosephb 		/* cut memory off the end? */
639c9464e8bSjosephb 		else if (ml->addr <= start && start < ml->addr + ml->size) {
640c9464e8bSjosephb 			ml->size = start - ml->addr;
641c9464e8bSjosephb 		}
642c9464e8bSjosephb 	}
643c9464e8bSjosephb }
644c9464e8bSjosephb 
645c9464e8bSjosephb /*
646*e84622caSToomas Soome  * During memory allocation, find the highest address not used yet.
647843e1988Sjohnlev  */
648*e84622caSToomas Soome static void
check_higher(paddr_t a)649*e84622caSToomas Soome check_higher(paddr_t a)
650*e84622caSToomas Soome {
651*e84622caSToomas Soome 	if (a < next_avail_addr)
652*e84622caSToomas Soome 		return;
653*e84622caSToomas Soome 	next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE);
654*e84622caSToomas Soome 	DBG(next_avail_addr);
655*e84622caSToomas Soome }
656*e84622caSToomas Soome 
657*e84622caSToomas Soome static int
dboot_loader_mmap_entries(void)658*e84622caSToomas Soome dboot_loader_mmap_entries(void)
659*e84622caSToomas Soome {
660*e84622caSToomas Soome #if !defined(__xpv)
661*e84622caSToomas Soome 	if (num_entries_set == B_TRUE)
662*e84622caSToomas Soome 		return (num_entries);
663*e84622caSToomas Soome 
664*e84622caSToomas Soome 	switch (multiboot_version) {
665*e84622caSToomas Soome 	case 1:
666*e84622caSToomas Soome 		DBG(mb_info->flags);
667*e84622caSToomas Soome 		if (mb_info->flags & 0x40) {
668*e84622caSToomas Soome 			mb_memory_map_t *mmap;
669*e84622caSToomas Soome 
670*e84622caSToomas Soome 			DBG(mb_info->mmap_addr);
671*e84622caSToomas Soome 			DBG(mb_info->mmap_length);
672*e84622caSToomas Soome 			check_higher(mb_info->mmap_addr + mb_info->mmap_length);
673*e84622caSToomas Soome 
674*e84622caSToomas Soome 			for (mmap = (mb_memory_map_t *)mb_info->mmap_addr;
675*e84622caSToomas Soome 			    (uint32_t)mmap < mb_info->mmap_addr +
676*e84622caSToomas Soome 			    mb_info->mmap_length;
677*e84622caSToomas Soome 			    mmap = (mb_memory_map_t *)((uint32_t)mmap +
678*e84622caSToomas Soome 			    mmap->size + sizeof (mmap->size)))
679*e84622caSToomas Soome 				++num_entries;
680*e84622caSToomas Soome 
681*e84622caSToomas Soome 			num_entries_set = B_TRUE;
682*e84622caSToomas Soome 		}
683*e84622caSToomas Soome 		break;
684*e84622caSToomas Soome 	case 2:
685*e84622caSToomas Soome 		num_entries_set = B_TRUE;
686*e84622caSToomas Soome 		num_entries = dboot_multiboot2_mmap_nentries(mb2_info,
687*e84622caSToomas Soome 		    mb2_mmap_tagp);
688*e84622caSToomas Soome 		break;
689*e84622caSToomas Soome 	default:
690*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
691*e84622caSToomas Soome 		    multiboot_version);
692*e84622caSToomas Soome 		break;
693*e84622caSToomas Soome 	}
694*e84622caSToomas Soome 	return (num_entries);
695843e1988Sjohnlev #else
696*e84622caSToomas Soome 	return (MAXMAPS);
697843e1988Sjohnlev #endif
698*e84622caSToomas Soome }
699*e84622caSToomas Soome 
700*e84622caSToomas Soome static uint32_t
dboot_loader_mmap_get_type(int index)701*e84622caSToomas Soome dboot_loader_mmap_get_type(int index)
702*e84622caSToomas Soome {
703*e84622caSToomas Soome #if !defined(__xpv)
704*e84622caSToomas Soome 	mb_memory_map_t *mp, *mpend;
705*e84622caSToomas Soome 	int i;
706*e84622caSToomas Soome 
707*e84622caSToomas Soome 	switch (multiboot_version) {
708*e84622caSToomas Soome 	case 1:
709*e84622caSToomas Soome 		mp = (mb_memory_map_t *)mb_info->mmap_addr;
710*e84622caSToomas Soome 		mpend = (mb_memory_map_t *)
711*e84622caSToomas Soome 		    (mb_info->mmap_addr + mb_info->mmap_length);
712*e84622caSToomas Soome 
713*e84622caSToomas Soome 		for (i = 0; mp < mpend && i != index; i++)
714*e84622caSToomas Soome 			mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
715*e84622caSToomas Soome 			    sizeof (mp->size));
716*e84622caSToomas Soome 		if (mp >= mpend) {
717*e84622caSToomas Soome 			dboot_panic("dboot_loader_mmap_get_type(): index "
718*e84622caSToomas Soome 			    "out of bounds: %d\n", index);
719*e84622caSToomas Soome 		}
720*e84622caSToomas Soome 		return (mp->type);
721*e84622caSToomas Soome 
722*e84622caSToomas Soome 	case 2:
723*e84622caSToomas Soome 		return (dboot_multiboot2_mmap_get_type(mb2_info,
724*e84622caSToomas Soome 		    mb2_mmap_tagp, index));
725*e84622caSToomas Soome 
726*e84622caSToomas Soome 	default:
727*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
728*e84622caSToomas Soome 		    multiboot_version);
729*e84622caSToomas Soome 		break;
730*e84622caSToomas Soome 	}
731*e84622caSToomas Soome 	return (0);
732*e84622caSToomas Soome #else
733*e84622caSToomas Soome 	return (map_buffer[index].type);
734*e84622caSToomas Soome #endif
735*e84622caSToomas Soome }
736*e84622caSToomas Soome 
737*e84622caSToomas Soome static uint64_t
dboot_loader_mmap_get_base(int index)738*e84622caSToomas Soome dboot_loader_mmap_get_base(int index)
739*e84622caSToomas Soome {
740*e84622caSToomas Soome #if !defined(__xpv)
741*e84622caSToomas Soome 	mb_memory_map_t *mp, *mpend;
742*e84622caSToomas Soome 	int i;
743*e84622caSToomas Soome 
744*e84622caSToomas Soome 	switch (multiboot_version) {
745*e84622caSToomas Soome 	case 1:
746*e84622caSToomas Soome 		mp = (mb_memory_map_t *)mb_info->mmap_addr;
747*e84622caSToomas Soome 		mpend = (mb_memory_map_t *)
748*e84622caSToomas Soome 		    (mb_info->mmap_addr + mb_info->mmap_length);
749*e84622caSToomas Soome 
750*e84622caSToomas Soome 		for (i = 0; mp < mpend && i != index; i++)
751*e84622caSToomas Soome 			mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
752*e84622caSToomas Soome 			    sizeof (mp->size));
753*e84622caSToomas Soome 		if (mp >= mpend) {
754*e84622caSToomas Soome 			dboot_panic("dboot_loader_mmap_get_base(): index "
755*e84622caSToomas Soome 			    "out of bounds: %d\n", index);
756*e84622caSToomas Soome 		}
757*e84622caSToomas Soome 		return (((uint64_t)mp->base_addr_high << 32) +
758*e84622caSToomas Soome 		    (uint64_t)mp->base_addr_low);
759*e84622caSToomas Soome 
760*e84622caSToomas Soome 	case 2:
761*e84622caSToomas Soome 		return (dboot_multiboot2_mmap_get_base(mb2_info,
762*e84622caSToomas Soome 		    mb2_mmap_tagp, index));
763*e84622caSToomas Soome 
764*e84622caSToomas Soome 	default:
765*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
766*e84622caSToomas Soome 		    multiboot_version);
767*e84622caSToomas Soome 		break;
768*e84622caSToomas Soome 	}
769*e84622caSToomas Soome 	return (0);
770*e84622caSToomas Soome #else
771*e84622caSToomas Soome 	return (((uint64_t)map_buffer[index].base_addr_high << 32) +
772*e84622caSToomas Soome 	    (uint64_t)map_buffer[index].base_addr_low);
773*e84622caSToomas Soome #endif
774*e84622caSToomas Soome }
775*e84622caSToomas Soome 
776*e84622caSToomas Soome static uint64_t
dboot_loader_mmap_get_length(int index)777*e84622caSToomas Soome dboot_loader_mmap_get_length(int index)
778*e84622caSToomas Soome {
779*e84622caSToomas Soome #if !defined(__xpv)
780*e84622caSToomas Soome 	mb_memory_map_t *mp, *mpend;
781*e84622caSToomas Soome 	int i;
782*e84622caSToomas Soome 
783*e84622caSToomas Soome 	switch (multiboot_version) {
784*e84622caSToomas Soome 	case 1:
785*e84622caSToomas Soome 		mp = (mb_memory_map_t *)mb_info->mmap_addr;
786*e84622caSToomas Soome 		mpend = (mb_memory_map_t *)
787*e84622caSToomas Soome 		    (mb_info->mmap_addr + mb_info->mmap_length);
788*e84622caSToomas Soome 
789*e84622caSToomas Soome 		for (i = 0; mp < mpend && i != index; i++)
790*e84622caSToomas Soome 			mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
791*e84622caSToomas Soome 			    sizeof (mp->size));
792*e84622caSToomas Soome 		if (mp >= mpend) {
793*e84622caSToomas Soome 			dboot_panic("dboot_loader_mmap_get_length(): index "
794*e84622caSToomas Soome 			    "out of bounds: %d\n", index);
795*e84622caSToomas Soome 		}
796*e84622caSToomas Soome 		return (((uint64_t)mp->length_high << 32) +
797*e84622caSToomas Soome 		    (uint64_t)mp->length_low);
798*e84622caSToomas Soome 
799*e84622caSToomas Soome 	case 2:
800*e84622caSToomas Soome 		return (dboot_multiboot2_mmap_get_length(mb2_info,
801*e84622caSToomas Soome 		    mb2_mmap_tagp, index));
802*e84622caSToomas Soome 
803*e84622caSToomas Soome 	default:
804*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
805*e84622caSToomas Soome 		    multiboot_version);
806*e84622caSToomas Soome 		break;
807*e84622caSToomas Soome 	}
808*e84622caSToomas Soome 	return (0);
809*e84622caSToomas Soome #else
810*e84622caSToomas Soome 	return (((uint64_t)map_buffer[index].length_high << 32) +
811*e84622caSToomas Soome 	    (uint64_t)map_buffer[index].length_low);
812*e84622caSToomas Soome #endif
813*e84622caSToomas Soome }
814843e1988Sjohnlev 
815843e1988Sjohnlev static void
build_pcimemlists(void)816*e84622caSToomas Soome build_pcimemlists(void)
817843e1988Sjohnlev {
818843e1988Sjohnlev 	uint64_t page_offset = MMU_PAGEOFFSET;	/* needs to be 64 bits */
819843e1988Sjohnlev 	uint64_t start;
820843e1988Sjohnlev 	uint64_t end;
821*e84622caSToomas Soome 	int i, num;
822843e1988Sjohnlev 
823843e1988Sjohnlev 	/*
824843e1988Sjohnlev 	 * initialize
825843e1988Sjohnlev 	 */
826843e1988Sjohnlev 	pcimemlists[0].addr = pci_lo_limit;
827843e1988Sjohnlev 	pcimemlists[0].size = pci_hi_limit - pci_lo_limit;
828843e1988Sjohnlev 	pcimemlists_used = 1;
829843e1988Sjohnlev 
830*e84622caSToomas Soome 	num = dboot_loader_mmap_entries();
831843e1988Sjohnlev 	/*
832843e1988Sjohnlev 	 * Fill in PCI memlists.
833843e1988Sjohnlev 	 */
834*e84622caSToomas Soome 	for (i = 0; i < num; ++i) {
835*e84622caSToomas Soome 		start = dboot_loader_mmap_get_base(i);
836*e84622caSToomas Soome 		end = start + dboot_loader_mmap_get_length(i);
837843e1988Sjohnlev 
838843e1988Sjohnlev 		if (prom_debug)
839843e1988Sjohnlev 			dboot_printf("\ttype: %d %" PRIx64 "..%"
840*e84622caSToomas Soome 			    PRIx64 "\n", dboot_loader_mmap_get_type(i),
841*e84622caSToomas Soome 			    start, end);
842843e1988Sjohnlev 
843843e1988Sjohnlev 		/*
844843e1988Sjohnlev 		 * page align start and end
845843e1988Sjohnlev 		 */
846843e1988Sjohnlev 		start = (start + page_offset) & ~page_offset;
847843e1988Sjohnlev 		end &= ~page_offset;
848843e1988Sjohnlev 		if (end <= start)
849843e1988Sjohnlev 			continue;
850843e1988Sjohnlev 
851843e1988Sjohnlev 		exclude_from_pci(start, end);
852843e1988Sjohnlev 	}
853843e1988Sjohnlev 
854843e1988Sjohnlev 	/*
855843e1988Sjohnlev 	 * Finish off the pcimemlist
856843e1988Sjohnlev 	 */
857843e1988Sjohnlev 	if (prom_debug) {
858843e1988Sjohnlev 		for (i = 0; i < pcimemlists_used; ++i) {
859843e1988Sjohnlev 			dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%"
860843e1988Sjohnlev 			    PRIx64 "\n", pcimemlists[i].addr,
861843e1988Sjohnlev 			    pcimemlists[i].addr + pcimemlists[i].size);
862843e1988Sjohnlev 		}
863843e1988Sjohnlev 	}
864843e1988Sjohnlev 	pcimemlists[0].next = 0;
865843e1988Sjohnlev 	pcimemlists[0].prev = 0;
866843e1988Sjohnlev 	for (i = 1; i < pcimemlists_used; ++i) {
867843e1988Sjohnlev 		pcimemlists[i].prev =
868843e1988Sjohnlev 		    (native_ptr_t)(uintptr_t)(pcimemlists + i - 1);
869843e1988Sjohnlev 		pcimemlists[i].next = 0;
870843e1988Sjohnlev 		pcimemlists[i - 1].next =
871843e1988Sjohnlev 		    (native_ptr_t)(uintptr_t)(pcimemlists + i);
872843e1988Sjohnlev 	}
873c909a41bSRichard Lowe 	bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
874843e1988Sjohnlev 	DBG(bi->bi_pcimem);
875843e1988Sjohnlev }
876843e1988Sjohnlev 
877843e1988Sjohnlev #if defined(__xpv)
878843e1988Sjohnlev /*
879843e1988Sjohnlev  * Initialize memory allocator stuff from hypervisor-supplied start info.
880843e1988Sjohnlev  */
881843e1988Sjohnlev static void
init_mem_alloc(void)882843e1988Sjohnlev init_mem_alloc(void)
883843e1988Sjohnlev {
884843e1988Sjohnlev 	int	local;	/* variables needed to find start region */
885843e1988Sjohnlev 	paddr_t	scratch_start;
886843e1988Sjohnlev 	xen_memory_map_t map;
887843e1988Sjohnlev 
888843e1988Sjohnlev 	DBG_MSG("Entered init_mem_alloc()\n");
889843e1988Sjohnlev 
890843e1988Sjohnlev 	/*
891843e1988Sjohnlev 	 * Free memory follows the stack. There's at least 512KB of scratch
892843e1988Sjohnlev 	 * space, rounded up to at least 2Mb alignment.  That should be enough
893843e1988Sjohnlev 	 * for the page tables we'll need to build.  The nucleus memory is
894843e1988Sjohnlev 	 * allocated last and will be outside the addressible range.  We'll
895843e1988Sjohnlev 	 * switch to new page tables before we unpack the kernel
896843e1988Sjohnlev 	 */
897843e1988Sjohnlev 	scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE);
898843e1988Sjohnlev 	DBG(scratch_start);
899843e1988Sjohnlev 	scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG);
900843e1988Sjohnlev 	DBG(scratch_end);
901843e1988Sjohnlev 
902843e1988Sjohnlev 	/*
903843e1988Sjohnlev 	 * For paranoia, leave some space between hypervisor data and ours.
904843e1988Sjohnlev 	 * Use 500 instead of 512.
905843e1988Sjohnlev 	 */
906843e1988Sjohnlev 	next_avail_addr = scratch_end - 500 * 1024;
907843e1988Sjohnlev 	DBG(next_avail_addr);
908843e1988Sjohnlev 
909843e1988Sjohnlev 	/*
910843e1988Sjohnlev 	 * The domain builder gives us at most 1 module
911843e1988Sjohnlev 	 */
912843e1988Sjohnlev 	DBG(xen_info->mod_len);
913843e1988Sjohnlev 	if (xen_info->mod_len > 0) {
914843e1988Sjohnlev 		DBG(xen_info->mod_start);
915843e1988Sjohnlev 		modules[0].bm_addr = xen_info->mod_start;
916843e1988Sjohnlev 		modules[0].bm_size = xen_info->mod_len;
917843e1988Sjohnlev 		bi->bi_module_cnt = 1;
918843e1988Sjohnlev 		bi->bi_modules = (native_ptr_t)modules;
919843e1988Sjohnlev 	} else {
920843e1988Sjohnlev 		bi->bi_module_cnt = 0;
921843e1988Sjohnlev 		bi->bi_modules = NULL;
922843e1988Sjohnlev 	}
923843e1988Sjohnlev 	DBG(bi->bi_module_cnt);
924843e1988Sjohnlev 	DBG(bi->bi_modules);
925843e1988Sjohnlev 
926843e1988Sjohnlev 	DBG(xen_info->mfn_list);
927843e1988Sjohnlev 	DBG(xen_info->nr_pages);
928843e1988Sjohnlev 	max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT;
929843e1988Sjohnlev 	DBG(max_mem);
930843e1988Sjohnlev 
931843e1988Sjohnlev 	/*
932843e1988Sjohnlev 	 * Using pseudo-physical addresses, so only 1 memlist element
933843e1988Sjohnlev 	 */
934843e1988Sjohnlev 	memlists[0].addr = 0;
935843e1988Sjohnlev 	DBG(memlists[0].addr);
936843e1988Sjohnlev 	memlists[0].size = max_mem;
937843e1988Sjohnlev 	DBG(memlists[0].size);
938843e1988Sjohnlev 	memlists_used = 1;
939843e1988Sjohnlev 	DBG(memlists_used);
940843e1988Sjohnlev 
941843e1988Sjohnlev 	/*
942843e1988Sjohnlev 	 * finish building physinstall list
943843e1988Sjohnlev 	 */
944843e1988Sjohnlev 	sort_physinstall();
945843e1988Sjohnlev 
9461de082f7SVikram Hegde 	/*
9471de082f7SVikram Hegde 	 * build bios reserved memlists
9481de082f7SVikram Hegde 	 */
9491de082f7SVikram Hegde 	build_rsvdmemlists();
9501de082f7SVikram Hegde 
951843e1988Sjohnlev 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
952843e1988Sjohnlev 		/*
953843e1988Sjohnlev 		 * build PCI Memory list
954843e1988Sjohnlev 		 */
955843e1988Sjohnlev 		map.nr_entries = MAXMAPS;
956843e1988Sjohnlev 		/*LINTED: constant in conditional context*/
957843e1988Sjohnlev 		set_xen_guest_handle(map.buffer, map_buffer);
958843e1988Sjohnlev 		if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0)
959843e1988Sjohnlev 			dboot_panic("getting XENMEM_machine_memory_map failed");
960*e84622caSToomas Soome 		build_pcimemlists();
961843e1988Sjohnlev 	}
962843e1988Sjohnlev }
963843e1988Sjohnlev 
964843e1988Sjohnlev #else	/* !__xpv */
965843e1988Sjohnlev 
966*e84622caSToomas Soome /* Stub in this version. */
967*e84622caSToomas Soome static void
dboot_multiboot1_xboot_consinfo(void)968*e84622caSToomas Soome dboot_multiboot1_xboot_consinfo(void)
969*e84622caSToomas Soome {
970*e84622caSToomas Soome }
971*e84622caSToomas Soome 
972*e84622caSToomas Soome /* Stub in this version. */
973*e84622caSToomas Soome static void
dboot_multiboot2_xboot_consinfo(void)974*e84622caSToomas Soome dboot_multiboot2_xboot_consinfo(void)
975*e84622caSToomas Soome {
976*e84622caSToomas Soome }
977*e84622caSToomas Soome 
978*e84622caSToomas Soome static int
dboot_multiboot_modcount(void)979*e84622caSToomas Soome dboot_multiboot_modcount(void)
980*e84622caSToomas Soome {
981*e84622caSToomas Soome 	switch (multiboot_version) {
982*e84622caSToomas Soome 	case 1:
983*e84622caSToomas Soome 		return (mb_info->mods_count);
984*e84622caSToomas Soome 
985*e84622caSToomas Soome 	case 2:
986*e84622caSToomas Soome 		return (dboot_multiboot2_modcount(mb2_info));
987*e84622caSToomas Soome 
988*e84622caSToomas Soome 	default:
989*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
990*e84622caSToomas Soome 		    multiboot_version);
991*e84622caSToomas Soome 		break;
992*e84622caSToomas Soome 	}
993*e84622caSToomas Soome 	return (0);
994*e84622caSToomas Soome }
995*e84622caSToomas Soome 
996*e84622caSToomas Soome static uint32_t
dboot_multiboot_modstart(int index)997*e84622caSToomas Soome dboot_multiboot_modstart(int index)
998*e84622caSToomas Soome {
999*e84622caSToomas Soome 	switch (multiboot_version) {
1000*e84622caSToomas Soome 	case 1:
1001*e84622caSToomas Soome 		return (((mb_module_t *)mb_info->mods_addr)[index].mod_start);
1002*e84622caSToomas Soome 
1003*e84622caSToomas Soome 	case 2:
1004*e84622caSToomas Soome 		return (dboot_multiboot2_modstart(mb2_info, index));
1005*e84622caSToomas Soome 
1006*e84622caSToomas Soome 	default:
1007*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
1008*e84622caSToomas Soome 		    multiboot_version);
1009*e84622caSToomas Soome 		break;
1010*e84622caSToomas Soome 	}
1011*e84622caSToomas Soome 	return (0);
1012*e84622caSToomas Soome }
1013*e84622caSToomas Soome 
1014*e84622caSToomas Soome static uint32_t
dboot_multiboot_modend(int index)1015*e84622caSToomas Soome dboot_multiboot_modend(int index)
1016*e84622caSToomas Soome {
1017*e84622caSToomas Soome 	switch (multiboot_version) {
1018*e84622caSToomas Soome 	case 1:
1019*e84622caSToomas Soome 		return (((mb_module_t *)mb_info->mods_addr)[index].mod_end);
1020*e84622caSToomas Soome 
1021*e84622caSToomas Soome 	case 2:
1022*e84622caSToomas Soome 		return (dboot_multiboot2_modend(mb2_info, index));
1023*e84622caSToomas Soome 
1024*e84622caSToomas Soome 	default:
1025*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
1026*e84622caSToomas Soome 		    multiboot_version);
1027*e84622caSToomas Soome 		break;
1028*e84622caSToomas Soome 	}
1029*e84622caSToomas Soome 	return (0);
1030*e84622caSToomas Soome }
1031*e84622caSToomas Soome 
1032*e84622caSToomas Soome static char *
dboot_multiboot_modcmdline(int index)1033*e84622caSToomas Soome dboot_multiboot_modcmdline(int index)
1034*e84622caSToomas Soome {
1035*e84622caSToomas Soome 	switch (multiboot_version) {
1036*e84622caSToomas Soome 	case 1:
1037*e84622caSToomas Soome 		return ((char *)((mb_module_t *)
1038*e84622caSToomas Soome 		    mb_info->mods_addr)[index].mod_name);
1039*e84622caSToomas Soome 
1040*e84622caSToomas Soome 	case 2:
1041*e84622caSToomas Soome 		return (dboot_multiboot2_modcmdline(mb2_info, index));
1042*e84622caSToomas Soome 
1043*e84622caSToomas Soome 	default:
1044*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
1045*e84622caSToomas Soome 		    multiboot_version);
1046*e84622caSToomas Soome 		break;
1047*e84622caSToomas Soome 	}
1048*e84622caSToomas Soome 	return (0);
1049*e84622caSToomas Soome }
1050*e84622caSToomas Soome 
1051*e84622caSToomas Soome static boolean_t
dboot_multiboot_basicmeminfo(uint32_t * lower,uint32_t * upper)1052*e84622caSToomas Soome dboot_multiboot_basicmeminfo(uint32_t *lower, uint32_t *upper)
1053*e84622caSToomas Soome {
1054*e84622caSToomas Soome 	boolean_t rv = B_FALSE;
1055*e84622caSToomas Soome 
1056*e84622caSToomas Soome 	switch (multiboot_version) {
1057*e84622caSToomas Soome 	case 1:
1058*e84622caSToomas Soome 		if (mb_info->flags & 0x01) {
1059*e84622caSToomas Soome 			*lower = mb_info->mem_lower;
1060*e84622caSToomas Soome 			*upper = mb_info->mem_upper;
1061*e84622caSToomas Soome 			rv = B_TRUE;
1062*e84622caSToomas Soome 		}
1063*e84622caSToomas Soome 		break;
1064*e84622caSToomas Soome 
1065*e84622caSToomas Soome 	case 2:
1066*e84622caSToomas Soome 		return (dboot_multiboot2_basicmeminfo(mb2_info, lower, upper));
1067*e84622caSToomas Soome 
1068*e84622caSToomas Soome 	default:
1069*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
1070*e84622caSToomas Soome 		    multiboot_version);
1071*e84622caSToomas Soome 		break;
1072*e84622caSToomas Soome 	}
1073*e84622caSToomas Soome 	return (rv);
1074*e84622caSToomas Soome }
1075*e84622caSToomas Soome 
1076e65d07eeSKeith Wesolowski static uint8_t
dboot_a2h(char v)1077e65d07eeSKeith Wesolowski dboot_a2h(char v)
1078e65d07eeSKeith Wesolowski {
1079e65d07eeSKeith Wesolowski 	if (v >= 'a')
1080e65d07eeSKeith Wesolowski 		return (v - 'a' + 0xa);
1081e65d07eeSKeith Wesolowski 	else if (v >= 'A')
1082e65d07eeSKeith Wesolowski 		return (v - 'A' + 0xa);
1083e65d07eeSKeith Wesolowski 	else if (v >= '0')
1084e65d07eeSKeith Wesolowski 		return (v - '0');
1085e65d07eeSKeith Wesolowski 	else
1086e65d07eeSKeith Wesolowski 		dboot_panic("bad ASCII hex character %c\n", v);
1087e65d07eeSKeith Wesolowski 
1088e65d07eeSKeith Wesolowski 	return (0);
1089e65d07eeSKeith Wesolowski }
1090e65d07eeSKeith Wesolowski 
1091e65d07eeSKeith Wesolowski static void
digest_a2h(const char * ascii,uint8_t * digest)1092e65d07eeSKeith Wesolowski digest_a2h(const char *ascii, uint8_t *digest)
1093e65d07eeSKeith Wesolowski {
1094e65d07eeSKeith Wesolowski 	unsigned int i;
1095e65d07eeSKeith Wesolowski 
1096e65d07eeSKeith Wesolowski 	for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1097e65d07eeSKeith Wesolowski 		digest[i] = dboot_a2h(ascii[i * 2]) << 4;
1098e65d07eeSKeith Wesolowski 		digest[i] |= dboot_a2h(ascii[i * 2 + 1]);
1099e65d07eeSKeith Wesolowski 	}
1100e65d07eeSKeith Wesolowski }
1101e65d07eeSKeith Wesolowski 
1102e65d07eeSKeith Wesolowski /*
1103e65d07eeSKeith Wesolowski  * Generate a SHA-1 hash of the first len bytes of image, and compare it with
1104e65d07eeSKeith Wesolowski  * the ASCII-format hash found in the 40-byte buffer at ascii.  If they
1105e65d07eeSKeith Wesolowski  * match, return 0, otherwise -1.  This works only for images smaller than
1106e65d07eeSKeith Wesolowski  * 4 GB, which should not be a problem.
1107e65d07eeSKeith Wesolowski  */
1108e65d07eeSKeith Wesolowski static int
check_image_hash(uint_t midx)11091d9cde1dSKeith M Wesolowski check_image_hash(uint_t midx)
1110e65d07eeSKeith Wesolowski {
11111d9cde1dSKeith M Wesolowski 	const char *ascii;
11121d9cde1dSKeith M Wesolowski 	const void *image;
11131d9cde1dSKeith M Wesolowski 	size_t len;
1114e65d07eeSKeith Wesolowski 	SHA1_CTX ctx;
1115e65d07eeSKeith Wesolowski 	uint8_t digest[SHA1_DIGEST_LENGTH];
1116e65d07eeSKeith Wesolowski 	uint8_t baseline[SHA1_DIGEST_LENGTH];
1117e65d07eeSKeith Wesolowski 	unsigned int i;
1118e65d07eeSKeith Wesolowski 
11191d9cde1dSKeith M Wesolowski 	ascii = (const char *)(uintptr_t)modules[midx].bm_hash;
11201d9cde1dSKeith M Wesolowski 	image = (const void *)(uintptr_t)modules[midx].bm_addr;
11211d9cde1dSKeith M Wesolowski 	len = (size_t)modules[midx].bm_size;
11221d9cde1dSKeith M Wesolowski 
1123e65d07eeSKeith Wesolowski 	digest_a2h(ascii, baseline);
1124e65d07eeSKeith Wesolowski 
1125e65d07eeSKeith Wesolowski 	SHA1Init(&ctx);
1126e65d07eeSKeith Wesolowski 	SHA1Update(&ctx, image, len);
1127e65d07eeSKeith Wesolowski 	SHA1Final(digest, &ctx);
1128e65d07eeSKeith Wesolowski 
1129e65d07eeSKeith Wesolowski 	for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1130e65d07eeSKeith Wesolowski 		if (digest[i] != baseline[i])
1131e65d07eeSKeith Wesolowski 			return (-1);
1132e65d07eeSKeith Wesolowski 	}
1133e65d07eeSKeith Wesolowski 
1134e65d07eeSKeith Wesolowski 	return (0);
1135e65d07eeSKeith Wesolowski }
1136e65d07eeSKeith Wesolowski 
11371d9cde1dSKeith M Wesolowski static const char *
type_to_str(boot_module_type_t type)11381d9cde1dSKeith M Wesolowski type_to_str(boot_module_type_t type)
11391d9cde1dSKeith M Wesolowski {
11401d9cde1dSKeith M Wesolowski 	switch (type) {
11411d9cde1dSKeith M Wesolowski 	case BMT_ROOTFS:
11421d9cde1dSKeith M Wesolowski 		return ("rootfs");
11431d9cde1dSKeith M Wesolowski 	case BMT_FILE:
11441d9cde1dSKeith M Wesolowski 		return ("file");
11451d9cde1dSKeith M Wesolowski 	case BMT_HASH:
11461d9cde1dSKeith M Wesolowski 		return ("hash");
11471d9cde1dSKeith M Wesolowski 	default:
11481d9cde1dSKeith M Wesolowski 		return ("unknown");
11491d9cde1dSKeith M Wesolowski 	}
11501d9cde1dSKeith M Wesolowski }
11511d9cde1dSKeith M Wesolowski 
1152e65d07eeSKeith Wesolowski static void
check_images(void)1153e65d07eeSKeith Wesolowski check_images(void)
1154e65d07eeSKeith Wesolowski {
11551d9cde1dSKeith M Wesolowski 	uint_t i;
1156e65d07eeSKeith Wesolowski 	char displayhash[SHA1_ASCII_LENGTH + 1];
11571d9cde1dSKeith M Wesolowski 
11581d9cde1dSKeith M Wesolowski 	for (i = 0; i < modules_used; i++) {
11591d9cde1dSKeith M Wesolowski 		if (prom_debug) {
11601d9cde1dSKeith M Wesolowski 			dboot_printf("module #%d: name %s type %s "
11611d9cde1dSKeith M Wesolowski 			    "addr %lx size %lx\n",
11621d9cde1dSKeith M Wesolowski 			    i, (char *)(uintptr_t)modules[i].bm_name,
11631d9cde1dSKeith M Wesolowski 			    type_to_str(modules[i].bm_type),
11641d9cde1dSKeith M Wesolowski 			    (ulong_t)modules[i].bm_addr,
11651d9cde1dSKeith M Wesolowski 			    (ulong_t)modules[i].bm_size);
11661d9cde1dSKeith M Wesolowski 		}
11671d9cde1dSKeith M Wesolowski 
11681d9cde1dSKeith M Wesolowski 		if (modules[i].bm_type == BMT_HASH ||
11691d9cde1dSKeith M Wesolowski 		    modules[i].bm_hash == NULL) {
11701d9cde1dSKeith M Wesolowski 			DBG_MSG("module has no hash; skipping check\n");
11711d9cde1dSKeith M Wesolowski 			continue;
11721d9cde1dSKeith M Wesolowski 		}
11731d9cde1dSKeith M Wesolowski 		(void) memcpy(displayhash,
11741d9cde1dSKeith M Wesolowski 		    (void *)(uintptr_t)modules[i].bm_hash,
11751d9cde1dSKeith M Wesolowski 		    SHA1_ASCII_LENGTH);
11761d9cde1dSKeith M Wesolowski 		displayhash[SHA1_ASCII_LENGTH] = '\0';
11771d9cde1dSKeith M Wesolowski 		if (prom_debug) {
11781d9cde1dSKeith M Wesolowski 			dboot_printf("checking expected hash [%s]: ",
11791d9cde1dSKeith M Wesolowski 			    displayhash);
11801d9cde1dSKeith M Wesolowski 		}
11811d9cde1dSKeith M Wesolowski 
11821d9cde1dSKeith M Wesolowski 		if (check_image_hash(i) != 0)
11831d9cde1dSKeith M Wesolowski 			dboot_panic("hash mismatch!\n");
11841d9cde1dSKeith M Wesolowski 		else
11851d9cde1dSKeith M Wesolowski 			DBG_MSG("OK\n");
11861d9cde1dSKeith M Wesolowski 	}
11871d9cde1dSKeith M Wesolowski }
11881d9cde1dSKeith M Wesolowski 
11891d9cde1dSKeith M Wesolowski /*
11901d9cde1dSKeith M Wesolowski  * Determine the module's starting address, size, name, and type, and fill the
11911d9cde1dSKeith M Wesolowski  * boot_modules structure.  This structure is used by the bop code, except for
11921d9cde1dSKeith M Wesolowski  * hashes which are checked prior to transferring control to the kernel.
11931d9cde1dSKeith M Wesolowski  */
11941d9cde1dSKeith M Wesolowski static void
process_module(int midx)1195*e84622caSToomas Soome process_module(int midx)
11961d9cde1dSKeith M Wesolowski {
1197*e84622caSToomas Soome 	uint32_t mod_start = dboot_multiboot_modstart(midx);
1198*e84622caSToomas Soome 	uint32_t mod_end = dboot_multiboot_modend(midx);
1199*e84622caSToomas Soome 	char *cmdline = dboot_multiboot_modcmdline(midx);
12001d9cde1dSKeith M Wesolowski 	char *p, *q;
12011d9cde1dSKeith M Wesolowski 
1202*e84622caSToomas Soome 	check_higher(mod_end);
12031d9cde1dSKeith M Wesolowski 	if (prom_debug) {
12041d9cde1dSKeith M Wesolowski 		dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n",
1205*e84622caSToomas Soome 		    midx, cmdline, (ulong_t)mod_start, (ulong_t)mod_end);
12061d9cde1dSKeith M Wesolowski 	}
12071d9cde1dSKeith M Wesolowski 
1208*e84622caSToomas Soome 	if (mod_start > mod_end) {
12091d9cde1dSKeith M Wesolowski 		dboot_panic("module #%d: module start address 0x%lx greater "
12101d9cde1dSKeith M Wesolowski 		    "than end address 0x%lx", midx,
1211*e84622caSToomas Soome 		    (ulong_t)mod_start, (ulong_t)mod_end);
12121d9cde1dSKeith M Wesolowski 	}
1213e65d07eeSKeith Wesolowski 
1214e65d07eeSKeith Wesolowski 	/*
1215e65d07eeSKeith Wesolowski 	 * A brief note on lengths and sizes: GRUB, for reasons unknown, passes
1216e65d07eeSKeith Wesolowski 	 * the address of the last valid byte in a module plus 1 as mod_end.
1217e65d07eeSKeith Wesolowski 	 * This is of course a bug; the multiboot specification simply states
1218e65d07eeSKeith Wesolowski 	 * that mod_start and mod_end "contain the start and end addresses of
1219e65d07eeSKeith Wesolowski 	 * the boot module itself" which is pretty obviously not what GRUB is
1220e65d07eeSKeith Wesolowski 	 * doing.  However, fixing it requires that not only this code be
1221e65d07eeSKeith Wesolowski 	 * changed but also that other code consuming this value and values
1222e65d07eeSKeith Wesolowski 	 * derived from it be fixed, and that the kernel and GRUB must either
1223e65d07eeSKeith Wesolowski 	 * both have the bug or neither.  While there are a lot of combinations
1224e65d07eeSKeith Wesolowski 	 * that will work, there are also some that won't, so for simplicity
1225e65d07eeSKeith Wesolowski 	 * we'll just cope with the bug.  That means we won't actually hash the
1226e65d07eeSKeith Wesolowski 	 * byte at mod_end, and we will expect that mod_end for the hash file
1227e65d07eeSKeith Wesolowski 	 * itself is one greater than some multiple of 41 (40 bytes of ASCII
12281d9cde1dSKeith M Wesolowski 	 * hash plus a newline for each module).  We set bm_size to the true
12291d9cde1dSKeith M Wesolowski 	 * correct number of bytes in each module, achieving exactly this.
1230e65d07eeSKeith Wesolowski 	 */
1231e65d07eeSKeith Wesolowski 
1232*e84622caSToomas Soome 	modules[midx].bm_addr = mod_start;
1233*e84622caSToomas Soome 	modules[midx].bm_size = mod_end - mod_start;
1234*e84622caSToomas Soome 	modules[midx].bm_name = (native_ptr_t)(uintptr_t)cmdline;
12351d9cde1dSKeith M Wesolowski 	modules[midx].bm_hash = NULL;
12361d9cde1dSKeith M Wesolowski 	modules[midx].bm_type = BMT_FILE;
12371d9cde1dSKeith M Wesolowski 
1238*e84622caSToomas Soome 	if (cmdline == NULL) {
12391d9cde1dSKeith M Wesolowski 		modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname;
1240e65d07eeSKeith Wesolowski 		return;
1241e65d07eeSKeith Wesolowski 	}
1242e65d07eeSKeith Wesolowski 
1243*e84622caSToomas Soome 	p = cmdline;
12441d9cde1dSKeith M Wesolowski 	modules[midx].bm_name =
12451d9cde1dSKeith M Wesolowski 	    (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r");
12461d9cde1dSKeith M Wesolowski 
12471d9cde1dSKeith M Wesolowski 	while (p != NULL) {
12481d9cde1dSKeith M Wesolowski 		q = strsep(&p, " \t\f\n\r");
12491d9cde1dSKeith M Wesolowski 		if (strncmp(q, "name=", 5) == 0) {
12501d9cde1dSKeith M Wesolowski 			if (q[5] != '\0' && !isspace(q[5])) {
12511d9cde1dSKeith M Wesolowski 				modules[midx].bm_name =
12521d9cde1dSKeith M Wesolowski 				    (native_ptr_t)(uintptr_t)(q + 5);
12531d9cde1dSKeith M Wesolowski 			}
12541d9cde1dSKeith M Wesolowski 			continue;
12551d9cde1dSKeith M Wesolowski 		}
12561d9cde1dSKeith M Wesolowski 
12571d9cde1dSKeith M Wesolowski 		if (strncmp(q, "type=", 5) == 0) {
12581d9cde1dSKeith M Wesolowski 			if (q[5] == '\0' || isspace(q[5]))
12591d9cde1dSKeith M Wesolowski 				continue;
12601d9cde1dSKeith M Wesolowski 			q += 5;
12611d9cde1dSKeith M Wesolowski 			if (strcmp(q, "rootfs") == 0) {
12621d9cde1dSKeith M Wesolowski 				modules[midx].bm_type = BMT_ROOTFS;
12631d9cde1dSKeith M Wesolowski 			} else if (strcmp(q, "hash") == 0) {
12641d9cde1dSKeith M Wesolowski 				modules[midx].bm_type = BMT_HASH;
12651d9cde1dSKeith M Wesolowski 			} else if (strcmp(q, "file") != 0) {
12661d9cde1dSKeith M Wesolowski 				dboot_printf("\tmodule #%d: unknown module "
12671d9cde1dSKeith M Wesolowski 				    "type '%s'; defaulting to 'file'",
12681d9cde1dSKeith M Wesolowski 				    midx, q);
12691d9cde1dSKeith M Wesolowski 			}
12701d9cde1dSKeith M Wesolowski 			continue;
12711d9cde1dSKeith M Wesolowski 		}
12721d9cde1dSKeith M Wesolowski 
12731d9cde1dSKeith M Wesolowski 		if (strncmp(q, "hash=", 5) == 0) {
12741d9cde1dSKeith M Wesolowski 			if (q[5] != '\0' && !isspace(q[5])) {
12751d9cde1dSKeith M Wesolowski 				modules[midx].bm_hash =
12761d9cde1dSKeith M Wesolowski 				    (native_ptr_t)(uintptr_t)(q + 5);
12771d9cde1dSKeith M Wesolowski 			}
12781d9cde1dSKeith M Wesolowski 			continue;
12791d9cde1dSKeith M Wesolowski 		}
12801d9cde1dSKeith M Wesolowski 
12811d9cde1dSKeith M Wesolowski 		dboot_printf("ignoring unknown option '%s'\n", q);
12821d9cde1dSKeith M Wesolowski 	}
12831d9cde1dSKeith M Wesolowski }
12841d9cde1dSKeith M Wesolowski 
12851d9cde1dSKeith M Wesolowski /*
12861d9cde1dSKeith M Wesolowski  * Backward compatibility: if there are exactly one or two modules, both
12871d9cde1dSKeith M Wesolowski  * of type 'file' and neither with an embedded hash value, we have been
12881d9cde1dSKeith M Wesolowski  * given the legacy style modules.  In this case we need to treat the first
12891d9cde1dSKeith M Wesolowski  * module as a rootfs and the second as a hash referencing that module.
12901d9cde1dSKeith M Wesolowski  * Otherwise, even if the configuration is invalid, we assume that the
12911d9cde1dSKeith M Wesolowski  * operator knows what he's doing or at least isn't being bitten by this
12921d9cde1dSKeith M Wesolowski  * interface change.
12931d9cde1dSKeith M Wesolowski  */
12941d9cde1dSKeith M Wesolowski static void
fixup_modules(void)12951d9cde1dSKeith M Wesolowski fixup_modules(void)
12961d9cde1dSKeith M Wesolowski {
12971d9cde1dSKeith M Wesolowski 	if (modules_used == 0 || modules_used > 2)
12981d9cde1dSKeith M Wesolowski 		return;
12991d9cde1dSKeith M Wesolowski 
13001d9cde1dSKeith M Wesolowski 	if (modules[0].bm_type != BMT_FILE ||
13011d9cde1dSKeith M Wesolowski 	    modules_used > 1 && modules[1].bm_type != BMT_FILE) {
13021d9cde1dSKeith M Wesolowski 		return;
13031d9cde1dSKeith M Wesolowski 	}
13041d9cde1dSKeith M Wesolowski 
13051d9cde1dSKeith M Wesolowski 	if (modules[0].bm_hash != NULL ||
13061d9cde1dSKeith M Wesolowski 	    modules_used > 1 && modules[1].bm_hash != NULL) {
13071d9cde1dSKeith M Wesolowski 		return;
13081d9cde1dSKeith M Wesolowski 	}
13091d9cde1dSKeith M Wesolowski 
13101d9cde1dSKeith M Wesolowski 	modules[0].bm_type = BMT_ROOTFS;
13111d9cde1dSKeith M Wesolowski 	if (modules_used > 1) {
13121d9cde1dSKeith M Wesolowski 		modules[1].bm_type = BMT_HASH;
13131d9cde1dSKeith M Wesolowski 		modules[1].bm_name = modules[0].bm_name;
13141d9cde1dSKeith M Wesolowski 	}
13151d9cde1dSKeith M Wesolowski }
13161d9cde1dSKeith M Wesolowski 
13171d9cde1dSKeith M Wesolowski /*
13181d9cde1dSKeith M Wesolowski  * For modules that do not have assigned hashes but have a separate hash module,
13191d9cde1dSKeith M Wesolowski  * find the assigned hash module and set the primary module's bm_hash to point
13201d9cde1dSKeith M Wesolowski  * to the hash data from that module.  We will then ignore modules of type
13211d9cde1dSKeith M Wesolowski  * BMT_HASH from this point forward.
13221d9cde1dSKeith M Wesolowski  */
13231d9cde1dSKeith M Wesolowski static void
assign_module_hashes(void)13241d9cde1dSKeith M Wesolowski assign_module_hashes(void)
13251d9cde1dSKeith M Wesolowski {
13261d9cde1dSKeith M Wesolowski 	uint_t i, j;
13271d9cde1dSKeith M Wesolowski 
13281d9cde1dSKeith M Wesolowski 	for (i = 0; i < modules_used; i++) {
13291d9cde1dSKeith M Wesolowski 		if (modules[i].bm_type == BMT_HASH ||
13301d9cde1dSKeith M Wesolowski 		    modules[i].bm_hash != NULL) {
13311d9cde1dSKeith M Wesolowski 			continue;
13321d9cde1dSKeith M Wesolowski 		}
13331d9cde1dSKeith M Wesolowski 
13341d9cde1dSKeith M Wesolowski 		for (j = 0; j < modules_used; j++) {
13351d9cde1dSKeith M Wesolowski 			if (modules[j].bm_type != BMT_HASH ||
13361d9cde1dSKeith M Wesolowski 			    strcmp((char *)(uintptr_t)modules[j].bm_name,
13371d9cde1dSKeith M Wesolowski 			    (char *)(uintptr_t)modules[i].bm_name) != 0) {
13381d9cde1dSKeith M Wesolowski 				continue;
13391d9cde1dSKeith M Wesolowski 			}
13401d9cde1dSKeith M Wesolowski 
13411d9cde1dSKeith M Wesolowski 			if (modules[j].bm_size < SHA1_ASCII_LENGTH) {
13421d9cde1dSKeith M Wesolowski 				dboot_printf("Short hash module of length "
13431d9cde1dSKeith M Wesolowski 				    "0x%lx bytes; ignoring\n",
13441d9cde1dSKeith M Wesolowski 				    (ulong_t)modules[j].bm_size);
13451d9cde1dSKeith M Wesolowski 			} else {
13461d9cde1dSKeith M Wesolowski 				modules[i].bm_hash = modules[j].bm_addr;
13471d9cde1dSKeith M Wesolowski 			}
1348e65d07eeSKeith Wesolowski 			break;
1349e65d07eeSKeith Wesolowski 		}
1350e65d07eeSKeith Wesolowski 	}
1351e65d07eeSKeith Wesolowski }
1352e65d07eeSKeith Wesolowski 
1353843e1988Sjohnlev /*
1354ae115bc7Smrj  * Walk through the module information finding the last used address.
1355ae115bc7Smrj  * The first available address will become the top level page table.
1356ae115bc7Smrj  */
1357ae115bc7Smrj static void
dboot_process_modules(void)1358*e84622caSToomas Soome dboot_process_modules(void)
1359ae115bc7Smrj {
1360*e84622caSToomas Soome 	int i, modcount;
1361ae115bc7Smrj 	extern char _end[];
1362ae115bc7Smrj 
1363*e84622caSToomas Soome 	DBG_MSG("\nFinding Modules\n");
1364*e84622caSToomas Soome 	modcount = dboot_multiboot_modcount();
1365*e84622caSToomas Soome 	if (modcount > MAX_BOOT_MODULES) {
13665420b805SSeth Goldberg 		dboot_panic("Too many modules (%d) -- the maximum is %d.",
1367*e84622caSToomas Soome 		    modcount, MAX_BOOT_MODULES);
13685420b805SSeth Goldberg 	}
1369ae115bc7Smrj 	/*
1370ae115bc7Smrj 	 * search the modules to find the last used address
1371ae115bc7Smrj 	 * we'll build the module list while we're walking through here
1372ae115bc7Smrj 	 */
1373c909a41bSRichard Lowe 	check_higher((paddr_t)(uintptr_t)&_end);
1374*e84622caSToomas Soome 	for (i = 0; i < modcount; ++i) {
1375*e84622caSToomas Soome 		process_module(i);
1376*e84622caSToomas Soome 		modules_used++;
1377ae115bc7Smrj 	}
1378c909a41bSRichard Lowe 	bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1379ae115bc7Smrj 	DBG(bi->bi_modules);
1380*e84622caSToomas Soome 	bi->bi_module_cnt = modcount;
1381ae115bc7Smrj 	DBG(bi->bi_module_cnt);
1382ae115bc7Smrj 
13831d9cde1dSKeith M Wesolowski 	fixup_modules();
13841d9cde1dSKeith M Wesolowski 	assign_module_hashes();
1385e65d07eeSKeith Wesolowski 	check_images();
1386*e84622caSToomas Soome }
1387*e84622caSToomas Soome 
1388*e84622caSToomas Soome /*
1389*e84622caSToomas Soome  * We then build the phys_install memlist from the multiboot information.
1390*e84622caSToomas Soome  */
1391*e84622caSToomas Soome static void
dboot_process_mmap(void)1392*e84622caSToomas Soome dboot_process_mmap(void)
1393*e84622caSToomas Soome {
1394*e84622caSToomas Soome 	uint64_t start;
1395*e84622caSToomas Soome 	uint64_t end;
1396*e84622caSToomas Soome 	uint64_t page_offset = MMU_PAGEOFFSET;	/* needs to be 64 bits */
1397*e84622caSToomas Soome 	uint32_t lower, upper;
1398*e84622caSToomas Soome 	int i, mmap_entries;
1399e65d07eeSKeith Wesolowski 
1400ae115bc7Smrj 	/*
1401ae115bc7Smrj 	 * Walk through the memory map from multiboot and build our memlist
1402ae115bc7Smrj 	 * structures. Note these will have native format pointers.
1403ae115bc7Smrj 	 */
1404ae115bc7Smrj 	DBG_MSG("\nFinding Memory Map\n");
1405*e84622caSToomas Soome 	num_entries = 0;
1406*e84622caSToomas Soome 	num_entries_set = B_FALSE;
1407ae115bc7Smrj 	max_mem = 0;
1408*e84622caSToomas Soome 	if ((mmap_entries = dboot_loader_mmap_entries()) > 0) {
1409*e84622caSToomas Soome 		for (i = 0; i < mmap_entries; i++) {
1410*e84622caSToomas Soome 			uint32_t type = dboot_loader_mmap_get_type(i);
1411*e84622caSToomas Soome 			start = dboot_loader_mmap_get_base(i);
1412*e84622caSToomas Soome 			end = start + dboot_loader_mmap_get_length(i);
1413ae115bc7Smrj 
1414c9464e8bSjosephb 			if (prom_debug)
1415ae115bc7Smrj 				dboot_printf("\ttype: %d %" PRIx64 "..%"
1416*e84622caSToomas Soome 				    PRIx64 "\n", type, start, end);
1417ae115bc7Smrj 
1418ae115bc7Smrj 			/*
1419ae115bc7Smrj 			 * page align start and end
1420ae115bc7Smrj 			 */
1421ae115bc7Smrj 			start = (start + page_offset) & ~page_offset;
1422ae115bc7Smrj 			end &= ~page_offset;
1423ae115bc7Smrj 			if (end <= start)
1424ae115bc7Smrj 				continue;
1425ae115bc7Smrj 
1426c9464e8bSjosephb 			/*
1427c9464e8bSjosephb 			 * only type 1 is usable RAM
1428c9464e8bSjosephb 			 */
1429*e84622caSToomas Soome 			switch (type) {
14301de082f7SVikram Hegde 			case 1:
1431ae115bc7Smrj 				if (end > max_mem)
1432ae115bc7Smrj 					max_mem = end;
1433ae115bc7Smrj 				memlists[memlists_used].addr = start;
1434ae115bc7Smrj 				memlists[memlists_used].size = end - start;
1435c9464e8bSjosephb 				++memlists_used;
1436c9464e8bSjosephb 				if (memlists_used > MAX_MEMLIST)
1437c9464e8bSjosephb 					dboot_panic("too many memlists");
14381de082f7SVikram Hegde 				break;
14391de082f7SVikram Hegde 			case 2:
14401de082f7SVikram Hegde 				rsvdmemlists[rsvdmemlists_used].addr = start;
14411de082f7SVikram Hegde 				rsvdmemlists[rsvdmemlists_used].size =
14421de082f7SVikram Hegde 				    end - start;
14431de082f7SVikram Hegde 				++rsvdmemlists_used;
14441de082f7SVikram Hegde 				if (rsvdmemlists_used > MAX_MEMLIST)
14451de082f7SVikram Hegde 					dboot_panic("too many rsvdmemlists");
14461de082f7SVikram Hegde 				break;
14471de082f7SVikram Hegde 			default:
14481de082f7SVikram Hegde 				continue;
14491de082f7SVikram Hegde 			}
1450ae115bc7Smrj 		}
1451*e84622caSToomas Soome 		build_pcimemlists();
1452*e84622caSToomas Soome 	} else if (dboot_multiboot_basicmeminfo(&lower, &upper)) {
1453*e84622caSToomas Soome 		DBG(lower);
1454ae115bc7Smrj 		memlists[memlists_used].addr = 0;
1455*e84622caSToomas Soome 		memlists[memlists_used].size = lower * 1024;
1456ae115bc7Smrj 		++memlists_used;
1457*e84622caSToomas Soome 		DBG(upper);
1458ae115bc7Smrj 		memlists[memlists_used].addr = 1024 * 1024;
1459*e84622caSToomas Soome 		memlists[memlists_used].size = upper * 1024;
1460ae115bc7Smrj 		++memlists_used;
1461843e1988Sjohnlev 
1462843e1988Sjohnlev 		/*
1463843e1988Sjohnlev 		 * Old platform - assume I/O space at the end of memory.
1464843e1988Sjohnlev 		 */
1465*e84622caSToomas Soome 		pcimemlists[0].addr = (upper * 1024) + (1024 * 1024);
1466843e1988Sjohnlev 		pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr;
1467843e1988Sjohnlev 		pcimemlists[0].next = 0;
1468843e1988Sjohnlev 		pcimemlists[0].prev = 0;
1469c909a41bSRichard Lowe 		bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
1470843e1988Sjohnlev 		DBG(bi->bi_pcimem);
1471ae115bc7Smrj 	} else {
1472843e1988Sjohnlev 		dboot_panic("No memory info from boot loader!!!");
1473ae115bc7Smrj 	}
1474ae115bc7Smrj 
1475ae115bc7Smrj 	/*
1476ae115bc7Smrj 	 * finish processing the physinstall list
1477ae115bc7Smrj 	 */
1478ae115bc7Smrj 	sort_physinstall();
14791de082f7SVikram Hegde 
14801de082f7SVikram Hegde 	/*
14811de082f7SVikram Hegde 	 * build bios reserved mem lists
14821de082f7SVikram Hegde 	 */
14831de082f7SVikram Hegde 	build_rsvdmemlists();
1484c9464e8bSjosephb }
1485*e84622caSToomas Soome 
1486*e84622caSToomas Soome /*
1487*e84622caSToomas Soome  * The highest address is used as the starting point for dboot's simple
1488*e84622caSToomas Soome  * memory allocator.
1489*e84622caSToomas Soome  *
1490*e84622caSToomas Soome  * Finding the highest address in case of Multiboot 1 protocol is
1491*e84622caSToomas Soome  * quite painful in the sense that some information provided by
1492*e84622caSToomas Soome  * the multiboot info structure points to BIOS data, and some to RAM.
1493*e84622caSToomas Soome  *
1494*e84622caSToomas Soome  * The module list was processed and checked already by dboot_process_modules(),
1495*e84622caSToomas Soome  * so we will check the command line string and the memory map.
1496*e84622caSToomas Soome  *
1497*e84622caSToomas Soome  * This list of to be checked items is based on our current knowledge of
1498*e84622caSToomas Soome  * allocations made by grub1 and will need to be reviewed if there
1499*e84622caSToomas Soome  * are updates about the information provided by Multiboot 1.
1500*e84622caSToomas Soome  *
1501*e84622caSToomas Soome  * In the case of the Multiboot 2, our life is much simpler, as the MB2
1502*e84622caSToomas Soome  * information tag list is one contiguous chunk of memory.
1503*e84622caSToomas Soome  */
1504*e84622caSToomas Soome static paddr_t
dboot_multiboot1_highest_addr(void)1505*e84622caSToomas Soome dboot_multiboot1_highest_addr(void)
1506*e84622caSToomas Soome {
1507*e84622caSToomas Soome 	paddr_t addr = NULL;
1508*e84622caSToomas Soome 	char *cmdl = (char *)mb_info->cmdline;
1509*e84622caSToomas Soome 
1510*e84622caSToomas Soome 	if (mb_info->flags & MB_INFO_CMDLINE)
1511*e84622caSToomas Soome 		addr = ((paddr_t)((uintptr_t)cmdl + strlen(cmdl) + 1));
1512*e84622caSToomas Soome 
1513*e84622caSToomas Soome 	if (mb_info->flags & MB_INFO_MEM_MAP)
1514*e84622caSToomas Soome 		addr = MAX(addr,
1515*e84622caSToomas Soome 		    ((paddr_t)(mb_info->mmap_addr + mb_info->mmap_length)));
1516*e84622caSToomas Soome 	return (addr);
1517*e84622caSToomas Soome }
1518*e84622caSToomas Soome 
1519*e84622caSToomas Soome static void
dboot_multiboot_highest_addr(void)1520*e84622caSToomas Soome dboot_multiboot_highest_addr(void)
1521*e84622caSToomas Soome {
1522*e84622caSToomas Soome 	paddr_t addr;
1523*e84622caSToomas Soome 
1524*e84622caSToomas Soome 	switch (multiboot_version) {
1525*e84622caSToomas Soome 	case 1:
1526*e84622caSToomas Soome 		addr = dboot_multiboot1_highest_addr();
1527*e84622caSToomas Soome 		if (addr != NULL)
1528*e84622caSToomas Soome 			check_higher(addr);
1529*e84622caSToomas Soome 		break;
1530*e84622caSToomas Soome 	case 2:
1531*e84622caSToomas Soome 		addr = dboot_multiboot2_highest_addr(mb2_info);
1532*e84622caSToomas Soome 		if (addr != NULL)
1533*e84622caSToomas Soome 			check_higher(addr);
1534*e84622caSToomas Soome 		break;
1535*e84622caSToomas Soome 	default:
1536*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
1537*e84622caSToomas Soome 		    multiboot_version);
1538*e84622caSToomas Soome 		break;
1539*e84622caSToomas Soome 	}
1540*e84622caSToomas Soome }
1541*e84622caSToomas Soome 
1542*e84622caSToomas Soome /*
1543*e84622caSToomas Soome  * Walk the boot loader provided information and find the highest free address.
1544*e84622caSToomas Soome  */
1545*e84622caSToomas Soome static void
init_mem_alloc(void)1546*e84622caSToomas Soome init_mem_alloc(void)
1547*e84622caSToomas Soome {
1548*e84622caSToomas Soome 	DBG_MSG("Entered init_mem_alloc()\n");
1549*e84622caSToomas Soome 	dboot_process_modules();
1550*e84622caSToomas Soome 	dboot_process_mmap();
1551*e84622caSToomas Soome 	dboot_multiboot_highest_addr();
1552*e84622caSToomas Soome }
1553*e84622caSToomas Soome 
1554*e84622caSToomas Soome static void
dboot_multiboot_get_fwtables(void)1555*e84622caSToomas Soome dboot_multiboot_get_fwtables(void)
1556*e84622caSToomas Soome {
1557*e84622caSToomas Soome 	multiboot_tag_new_acpi_t *nacpitagp;
1558*e84622caSToomas Soome 	multiboot_tag_old_acpi_t *oacpitagp;
1559*e84622caSToomas Soome 
1560*e84622caSToomas Soome 	/* no fw tables from multiboot 1 */
1561*e84622caSToomas Soome 	if (multiboot_version != 2)
1562*e84622caSToomas Soome 		return;
1563*e84622caSToomas Soome 
1564*e84622caSToomas Soome 	nacpitagp = (multiboot_tag_new_acpi_t *)
1565*e84622caSToomas Soome 	    dboot_multiboot2_find_tag(mb2_info,
1566*e84622caSToomas Soome 	    MULTIBOOT_TAG_TYPE_ACPI_NEW);
1567*e84622caSToomas Soome 	oacpitagp = (multiboot_tag_old_acpi_t *)
1568*e84622caSToomas Soome 	    dboot_multiboot2_find_tag(mb2_info,
1569*e84622caSToomas Soome 	    MULTIBOOT_TAG_TYPE_ACPI_OLD);
1570*e84622caSToomas Soome 
1571*e84622caSToomas Soome 	if (nacpitagp != NULL) {
1572*e84622caSToomas Soome 		bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1573*e84622caSToomas Soome 		    &nacpitagp->mb_rsdp[0];
1574*e84622caSToomas Soome 	} else if (oacpitagp != NULL) {
1575*e84622caSToomas Soome 		bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1576*e84622caSToomas Soome 		    &oacpitagp->mb_rsdp[0];
1577*e84622caSToomas Soome 	} else {
1578*e84622caSToomas Soome 		bi->bi_acpi_rsdp = NULL;
1579*e84622caSToomas Soome 	}
1580*e84622caSToomas Soome }
1581843e1988Sjohnlev #endif /* !__xpv */
1582ae115bc7Smrj 
1583ae115bc7Smrj /*
1584ae115bc7Smrj  * Simple memory allocator, allocates aligned physical memory.
1585ae115bc7Smrj  * Note that startup_kernel() only allocates memory, never frees.
1586ae115bc7Smrj  * Memory usage just grows in an upward direction.
1587ae115bc7Smrj  */
1588ae115bc7Smrj static void *
do_mem_alloc(uint32_t size,uint32_t align)1589ae115bc7Smrj do_mem_alloc(uint32_t size, uint32_t align)
1590ae115bc7Smrj {
1591ae115bc7Smrj 	uint_t i;
1592ae115bc7Smrj 	uint64_t best;
1593ae115bc7Smrj 	uint64_t start;
1594ae115bc7Smrj 	uint64_t end;
1595ae115bc7Smrj 
1596ae115bc7Smrj 	/*
1597ae115bc7Smrj 	 * make sure size is a multiple of pagesize
1598ae115bc7Smrj 	 */
1599ae115bc7Smrj 	size = RNDUP(size, MMU_PAGESIZE);
1600ae115bc7Smrj 	next_avail_addr = RNDUP(next_avail_addr, align);
1601ae115bc7Smrj 
1602ae115bc7Smrj 	/*
1603843e1988Sjohnlev 	 * XXPV fixme joe
1604843e1988Sjohnlev 	 *
1605ae115bc7Smrj 	 * a really large bootarchive that causes you to run out of memory
1606ae115bc7Smrj 	 * may cause this to blow up
1607ae115bc7Smrj 	 */
1608ae115bc7Smrj 	/* LINTED E_UNEXPECTED_UINT_PROMOTION */
1609ae115bc7Smrj 	best = (uint64_t)-size;
1610ae115bc7Smrj 	for (i = 0; i < memlists_used; ++i) {
1611ae115bc7Smrj 		start = memlists[i].addr;
1612843e1988Sjohnlev #if defined(__xpv)
1613843e1988Sjohnlev 		start += mfn_base;
1614843e1988Sjohnlev #endif
1615ae115bc7Smrj 		end = start + memlists[i].size;
1616ae115bc7Smrj 
1617ae115bc7Smrj 		/*
1618ae115bc7Smrj 		 * did we find the desired address?
1619ae115bc7Smrj 		 */
1620ae115bc7Smrj 		if (start <= next_avail_addr && next_avail_addr + size <= end) {
1621ae115bc7Smrj 			best = next_avail_addr;
1622ae115bc7Smrj 			goto done;
1623ae115bc7Smrj 		}
1624ae115bc7Smrj 
1625ae115bc7Smrj 		/*
1626ae115bc7Smrj 		 * if not is this address the best so far?
1627ae115bc7Smrj 		 */
1628ae115bc7Smrj 		if (start > next_avail_addr && start < best &&
1629ae115bc7Smrj 		    RNDUP(start, align) + size <= end)
1630ae115bc7Smrj 			best = RNDUP(start, align);
1631ae115bc7Smrj 	}
1632ae115bc7Smrj 
1633ae115bc7Smrj 	/*
1634ae115bc7Smrj 	 * We didn't find exactly the address we wanted, due to going off the
1635ae115bc7Smrj 	 * end of a memory region. Return the best found memory address.
1636ae115bc7Smrj 	 */
1637ae115bc7Smrj done:
1638ae115bc7Smrj 	next_avail_addr = best + size;
1639843e1988Sjohnlev #if defined(__xpv)
1640843e1988Sjohnlev 	if (next_avail_addr > scratch_end)
1641843e1988Sjohnlev 		dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: "
1642843e1988Sjohnlev 		    "0x%lx", (ulong_t)next_avail_addr,
1643843e1988Sjohnlev 		    (ulong_t)scratch_end);
1644843e1988Sjohnlev #endif
1645ae115bc7Smrj 	(void) memset((void *)(uintptr_t)best, 0, size);
1646ae115bc7Smrj 	return ((void *)(uintptr_t)best);
1647ae115bc7Smrj }
1648ae115bc7Smrj 
1649ae115bc7Smrj void *
mem_alloc(uint32_t size)1650ae115bc7Smrj mem_alloc(uint32_t size)
1651ae115bc7Smrj {
1652ae115bc7Smrj 	return (do_mem_alloc(size, MMU_PAGESIZE));
1653ae115bc7Smrj }
1654ae115bc7Smrj 
1655ae115bc7Smrj 
1656ae115bc7Smrj /*
1657ae115bc7Smrj  * Build page tables to map all of memory used so far as well as the kernel.
1658ae115bc7Smrj  */
1659ae115bc7Smrj static void
build_page_tables(void)1660ae115bc7Smrj build_page_tables(void)
1661ae115bc7Smrj {
1662ae115bc7Smrj 	uint32_t psize;
1663ae115bc7Smrj 	uint32_t level;
1664ae115bc7Smrj 	uint32_t off;
1665ae115bc7Smrj 	uint64_t start;
1666843e1988Sjohnlev #if !defined(__xpv)
1667843e1988Sjohnlev 	uint32_t i;
1668ae115bc7Smrj 	uint64_t end;
1669843e1988Sjohnlev #endif	/* __xpv */
1670ae115bc7Smrj 
1671ae115bc7Smrj 	/*
1672843e1988Sjohnlev 	 * If we're on metal, we need to create the top level pagetable.
1673ae115bc7Smrj 	 */
1674843e1988Sjohnlev #if defined(__xpv)
1675843e1988Sjohnlev 	top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base;
1676843e1988Sjohnlev #else /* __xpv */
1677ae115bc7Smrj 	top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
1678843e1988Sjohnlev #endif /* __xpv */
1679ae115bc7Smrj 	DBG((uintptr_t)top_page_table);
1680ae115bc7Smrj 
1681ae115bc7Smrj 	/*
1682ae115bc7Smrj 	 * Determine if we'll use large mappings for kernel, then map it.
1683ae115bc7Smrj 	 */
1684ae115bc7Smrj 	if (largepage_support) {
1685ae115bc7Smrj 		psize = lpagesize;
1686ae115bc7Smrj 		level = 1;
1687ae115bc7Smrj 	} else {
1688ae115bc7Smrj 		psize = MMU_PAGESIZE;
1689ae115bc7Smrj 		level = 0;
1690ae115bc7Smrj 	}
1691ae115bc7Smrj 
1692ae115bc7Smrj 	DBG_MSG("Mapping kernel\n");
1693ae115bc7Smrj 	DBG(ktext_phys);
1694ae115bc7Smrj 	DBG(target_kernel_text);
1695ae115bc7Smrj 	DBG(ksize);
1696ae115bc7Smrj 	DBG(psize);
1697ae115bc7Smrj 	for (off = 0; off < ksize; off += psize)
1698ae115bc7Smrj 		map_pa_at_va(ktext_phys + off, target_kernel_text + off, level);
1699ae115bc7Smrj 
1700ae115bc7Smrj 	/*
1701ae115bc7Smrj 	 * The kernel will need a 1 page window to work with page tables
1702ae115bc7Smrj 	 */
1703ae115bc7Smrj 	bi->bi_pt_window = (uintptr_t)mem_alloc(MMU_PAGESIZE);
1704ae115bc7Smrj 	DBG(bi->bi_pt_window);
1705ae115bc7Smrj 	bi->bi_pte_to_pt_window =
1706ae115bc7Smrj 	    (uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0);
1707ae115bc7Smrj 	DBG(bi->bi_pte_to_pt_window);
1708ae115bc7Smrj 
1709843e1988Sjohnlev #if defined(__xpv)
1710843e1988Sjohnlev 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
1711843e1988Sjohnlev 		/* If this is a domU we're done. */
1712843e1988Sjohnlev 		DBG_MSG("\nPage tables constructed\n");
1713843e1988Sjohnlev 		return;
1714843e1988Sjohnlev 	}
1715843e1988Sjohnlev #endif /* __xpv */
1716843e1988Sjohnlev 
1717ae115bc7Smrj 	/*
1718843e1988Sjohnlev 	 * We need 1:1 mappings for the lower 1M of memory to access
1719843e1988Sjohnlev 	 * BIOS tables used by a couple of drivers during boot.
1720ae115bc7Smrj 	 *
1721843e1988Sjohnlev 	 * The following code works because our simple memory allocator
1722843e1988Sjohnlev 	 * only grows usage in an upwards direction.
1723ae115bc7Smrj 	 *
1724843e1988Sjohnlev 	 * Note that by this point in boot some mappings for low memory
1725843e1988Sjohnlev 	 * may already exist because we've already accessed device in low
1726843e1988Sjohnlev 	 * memory.  (Specifically the video frame buffer and keyboard
1727843e1988Sjohnlev 	 * status ports.)  If we're booting on raw hardware then GRUB
1728843e1988Sjohnlev 	 * created these mappings for us.  If we're booting under a
1729843e1988Sjohnlev 	 * hypervisor then we went ahead and remapped these devices into
1730843e1988Sjohnlev 	 * memory allocated within dboot itself.
1731843e1988Sjohnlev 	 */
1732843e1988Sjohnlev 	if (map_debug)
1733843e1988Sjohnlev 		dboot_printf("1:1 map pa=0..1Meg\n");
1734843e1988Sjohnlev 	for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) {
1735843e1988Sjohnlev #if defined(__xpv)
1736843e1988Sjohnlev 		map_ma_at_va(start, start, 0);
1737843e1988Sjohnlev #else /* __xpv */
1738843e1988Sjohnlev 		map_pa_at_va(start, start, 0);
1739843e1988Sjohnlev #endif /* __xpv */
1740843e1988Sjohnlev 	}
1741843e1988Sjohnlev 
1742843e1988Sjohnlev #if !defined(__xpv)
1743ae115bc7Smrj 	for (i = 0; i < memlists_used; ++i) {
1744ae115bc7Smrj 		start = memlists[i].addr;
1745ae115bc7Smrj 
1746ae115bc7Smrj 		end = start + memlists[i].size;
1747ae115bc7Smrj 
1748ae115bc7Smrj 		if (map_debug)
1749ae115bc7Smrj 			dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
1750ae115bc7Smrj 			    start, end);
1751ae115bc7Smrj 		while (start < end && start < next_avail_addr) {
1752ae115bc7Smrj 			map_pa_at_va(start, start, 0);
1753ae115bc7Smrj 			start += MMU_PAGESIZE;
1754ae115bc7Smrj 		}
1755ae115bc7Smrj 	}
1756843e1988Sjohnlev #endif /* !__xpv */
1757ae115bc7Smrj 
1758ae115bc7Smrj 	DBG_MSG("\nPage tables constructed\n");
1759ae115bc7Smrj }
1760ae115bc7Smrj 
1761ae115bc7Smrj #define	NO_MULTIBOOT	\
1762ae115bc7Smrj "multiboot is no longer used to boot the Solaris Operating System.\n\
1763ae115bc7Smrj The grub entry should be changed to:\n\
1764ae115bc7Smrj kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\
1765ae115bc7Smrj module$ /platform/i86pc/$ISADIR/boot_archive\n\
1766654b400cSJoshua M. Clulow See http://illumos.org/msg/SUNOS-8000-AK for details.\n"
1767ae115bc7Smrj 
1768*e84622caSToomas Soome static void
dboot_init_xboot_consinfo(void)1769*e84622caSToomas Soome dboot_init_xboot_consinfo(void)
1770*e84622caSToomas Soome {
1771*e84622caSToomas Soome 	uintptr_t addr;
1772*e84622caSToomas Soome 	/*
1773*e84622caSToomas Soome 	 * boot info must be 16 byte aligned for 64 bit kernel ABI
1774*e84622caSToomas Soome 	 */
1775*e84622caSToomas Soome 	addr = (uintptr_t)boot_info;
1776*e84622caSToomas Soome 	addr = (addr + 0xf) & ~0xf;
1777*e84622caSToomas Soome 	bi = (struct xboot_info *)addr;
1778*e84622caSToomas Soome 
1779*e84622caSToomas Soome #if !defined(__xpv)
1780*e84622caSToomas Soome 	switch (multiboot_version) {
1781*e84622caSToomas Soome 	case 1:
1782*e84622caSToomas Soome 		dboot_multiboot1_xboot_consinfo();
1783*e84622caSToomas Soome 		break;
1784*e84622caSToomas Soome 	case 2:
1785*e84622caSToomas Soome 		dboot_multiboot2_xboot_consinfo();
1786*e84622caSToomas Soome 		break;
1787*e84622caSToomas Soome 	default:
1788*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
1789*e84622caSToomas Soome 		    multiboot_version);
1790*e84622caSToomas Soome 		break;
1791*e84622caSToomas Soome 	}
1792*e84622caSToomas Soome #endif
1793*e84622caSToomas Soome }
1794*e84622caSToomas Soome 
1795*e84622caSToomas Soome /*
1796*e84622caSToomas Soome  * Set up basic data from the boot loader.
1797*e84622caSToomas Soome  * The load_addr is part of AOUT kludge setup in dboot_grub.s, to support
1798*e84622caSToomas Soome  * 32-bit dboot code setup used to set up and start 64-bit kernel.
1799*e84622caSToomas Soome  * AOUT kludge does allow 32-bit boot loader, such as grub1, to load and
1800*e84622caSToomas Soome  * start 64-bit illumos kernel.
1801*e84622caSToomas Soome  */
1802*e84622caSToomas Soome static void
dboot_loader_init(void)1803*e84622caSToomas Soome dboot_loader_init(void)
1804*e84622caSToomas Soome {
1805*e84622caSToomas Soome #if !defined(__xpv)
1806*e84622caSToomas Soome 	mb_info = NULL;
1807*e84622caSToomas Soome 	mb2_info = NULL;
1808*e84622caSToomas Soome 
1809*e84622caSToomas Soome 	switch (mb_magic) {
1810*e84622caSToomas Soome 	case MB_BOOTLOADER_MAGIC:
1811*e84622caSToomas Soome 		multiboot_version = 1;
1812*e84622caSToomas Soome 		mb_info = (multiboot_info_t *)(uintptr_t)mb_addr;
1813*e84622caSToomas Soome #if defined(_BOOT_TARGET_amd64)
1814*e84622caSToomas Soome 		load_addr = mb_header.load_addr;
1815*e84622caSToomas Soome #endif
1816*e84622caSToomas Soome 		break;
1817*e84622caSToomas Soome 
1818*e84622caSToomas Soome 	case MULTIBOOT2_BOOTLOADER_MAGIC:
1819*e84622caSToomas Soome 		multiboot_version = 2;
1820*e84622caSToomas Soome 		mb2_info = (multiboot2_info_header_t *)(uintptr_t)mb_addr;
1821*e84622caSToomas Soome 		mb2_mmap_tagp = dboot_multiboot2_get_mmap_tagp(mb2_info);
1822*e84622caSToomas Soome #if defined(_BOOT_TARGET_amd64)
1823*e84622caSToomas Soome 		load_addr = mb2_load_addr;
1824*e84622caSToomas Soome #endif
1825*e84622caSToomas Soome 		break;
1826*e84622caSToomas Soome 
1827*e84622caSToomas Soome 	default:
1828*e84622caSToomas Soome 		dboot_panic("Unknown bootloader magic: 0x%x\n", mb_magic);
1829*e84622caSToomas Soome 		break;
1830*e84622caSToomas Soome 	}
1831*e84622caSToomas Soome #endif	/* !defined(__xpv) */
1832*e84622caSToomas Soome }
1833*e84622caSToomas Soome 
1834*e84622caSToomas Soome /* Extract the kernel command line from [multi]boot information. */
1835*e84622caSToomas Soome static char *
dboot_loader_cmdline(void)1836*e84622caSToomas Soome dboot_loader_cmdline(void)
1837*e84622caSToomas Soome {
1838*e84622caSToomas Soome 	char *line = NULL;
1839*e84622caSToomas Soome 
1840*e84622caSToomas Soome #if defined(__xpv)
1841*e84622caSToomas Soome 	line = (char *)xen_info->cmd_line;
1842*e84622caSToomas Soome #else /* __xpv */
1843*e84622caSToomas Soome 
1844*e84622caSToomas Soome 	switch (multiboot_version) {
1845*e84622caSToomas Soome 	case 1:
1846*e84622caSToomas Soome 		if (mb_info->flags & MB_INFO_CMDLINE)
1847*e84622caSToomas Soome 			line = (char *)mb_info->cmdline;
1848*e84622caSToomas Soome 		break;
1849*e84622caSToomas Soome 
1850*e84622caSToomas Soome 	case 2:
1851*e84622caSToomas Soome 		line = dboot_multiboot2_cmdline(mb2_info);
1852*e84622caSToomas Soome 		break;
1853*e84622caSToomas Soome 
1854*e84622caSToomas Soome 	default:
1855*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
1856*e84622caSToomas Soome 		    multiboot_version);
1857*e84622caSToomas Soome 		break;
1858*e84622caSToomas Soome 	}
1859*e84622caSToomas Soome 
1860*e84622caSToomas Soome #endif /* __xpv */
1861*e84622caSToomas Soome 
1862*e84622caSToomas Soome 	/*
1863*e84622caSToomas Soome 	 * Make sure we have valid pointer so the string operations
1864*e84622caSToomas Soome 	 * will not crash us.
1865*e84622caSToomas Soome 	 */
1866*e84622caSToomas Soome 	if (line == NULL)
1867*e84622caSToomas Soome 		line = "";
1868*e84622caSToomas Soome 
1869*e84622caSToomas Soome 	return (line);
1870*e84622caSToomas Soome }
1871*e84622caSToomas Soome 
1872*e84622caSToomas Soome static char *
dboot_loader_name(void)1873*e84622caSToomas Soome dboot_loader_name(void)
1874*e84622caSToomas Soome {
1875*e84622caSToomas Soome #if defined(__xpv)
1876*e84622caSToomas Soome 	return (NULL);
1877*e84622caSToomas Soome #else /* __xpv */
1878*e84622caSToomas Soome 	multiboot_tag_string_t *tag;
1879*e84622caSToomas Soome 
1880*e84622caSToomas Soome 	switch (multiboot_version) {
1881*e84622caSToomas Soome 	case 1:
1882*e84622caSToomas Soome 		return ((char *)mb_info->boot_loader_name);
1883*e84622caSToomas Soome 
1884*e84622caSToomas Soome 	case 2:
1885*e84622caSToomas Soome 		tag = dboot_multiboot2_find_tag(mb2_info,
1886*e84622caSToomas Soome 		    MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME);
1887*e84622caSToomas Soome 		return (tag->mb_string);
1888*e84622caSToomas Soome 	default:
1889*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
1890*e84622caSToomas Soome 		    multiboot_version);
1891*e84622caSToomas Soome 		break;
1892*e84622caSToomas Soome 	}
1893*e84622caSToomas Soome 
1894*e84622caSToomas Soome 	return (NULL);
1895*e84622caSToomas Soome #endif /* __xpv */
1896*e84622caSToomas Soome }
1897ae115bc7Smrj /*
1898ae115bc7Smrj  * startup_kernel has a pretty simple job. It builds pagetables which reflect
1899ae115bc7Smrj  * 1:1 mappings for all memory in use. It then also adds mappings for
1900ae115bc7Smrj  * the kernel nucleus at virtual address of target_kernel_text using large page
1901ae115bc7Smrj  * mappings. The page table pages are also accessible at 1:1 mapped
1902ae115bc7Smrj  * virtual addresses.
1903ae115bc7Smrj  */
1904ae115bc7Smrj /*ARGSUSED*/
1905ae115bc7Smrj void
startup_kernel(void)1906ae115bc7Smrj startup_kernel(void)
1907ae115bc7Smrj {
1908ae115bc7Smrj 	char *cmdline;
1909*e84622caSToomas Soome 	char *bootloader;
1910843e1988Sjohnlev #if defined(__xpv)
1911843e1988Sjohnlev 	physdev_set_iopl_t set_iopl;
1912843e1988Sjohnlev #endif /* __xpv */
1913ae115bc7Smrj 
1914*e84622caSToomas Soome 	dboot_loader_init();
1915ae115bc7Smrj 	/*
1916ae115bc7Smrj 	 * At this point we are executing in a 32 bit real mode.
1917ae115bc7Smrj 	 */
1918*e84622caSToomas Soome 
1919*e84622caSToomas Soome 	bootloader = dboot_loader_name();
1920*e84622caSToomas Soome 	cmdline = dboot_loader_cmdline();
1921843e1988Sjohnlev 
1922ae115bc7Smrj 	prom_debug = (strstr(cmdline, "prom_debug") != NULL);
1923ae115bc7Smrj 	map_debug = (strstr(cmdline, "map_debug") != NULL);
1924843e1988Sjohnlev 
1925843e1988Sjohnlev #if defined(__xpv)
1926843e1988Sjohnlev 	/*
1927843e1988Sjohnlev 	 * For dom0, before we initialize the console subsystem we'll
1928843e1988Sjohnlev 	 * need to enable io operations, so set I/O priveldge level to 1.
1929843e1988Sjohnlev 	 */
1930843e1988Sjohnlev 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1931843e1988Sjohnlev 		set_iopl.iopl = 1;
1932843e1988Sjohnlev 		(void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
1933843e1988Sjohnlev 	}
1934843e1988Sjohnlev #endif /* __xpv */
1935843e1988Sjohnlev 
1936*e84622caSToomas Soome 	dboot_init_xboot_consinfo();
1937*e84622caSToomas Soome 	bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline;
1938*e84622caSToomas Soome 
1939*e84622caSToomas Soome #if !defined(__xpv)
1940*e84622caSToomas Soome 	dboot_multiboot_get_fwtables();
1941*e84622caSToomas Soome #endif
1942ae115bc7Smrj 	bcons_init(cmdline);
1943*e84622caSToomas Soome 	DBG_MSG("\n\nillumos prekernel set: ");
1944ae115bc7Smrj 	DBG_MSG(cmdline);
1945ae115bc7Smrj 	DBG_MSG("\n");
1946ae115bc7Smrj 
1947*e84622caSToomas Soome 	if (bootloader != NULL && prom_debug) {
1948*e84622caSToomas Soome 		dboot_printf("Kernel loaded by: %s\n", bootloader);
1949*e84622caSToomas Soome #if !defined(__xpv)
1950*e84622caSToomas Soome 		dboot_printf("Using multiboot %d boot protocol.\n",
1951*e84622caSToomas Soome 		    multiboot_version);
1952*e84622caSToomas Soome #endif
1953*e84622caSToomas Soome 	}
1954*e84622caSToomas Soome 
1955ae115bc7Smrj 	if (strstr(cmdline, "multiboot") != NULL) {
1956ae115bc7Smrj 		dboot_panic(NO_MULTIBOOT);
1957ae115bc7Smrj 	}
1958ae115bc7Smrj 
1959ae115bc7Smrj 	DBG((uintptr_t)bi);
1960*e84622caSToomas Soome #if !defined(__xpv)
1961*e84622caSToomas Soome 	DBG((uintptr_t)mb_info);
1962*e84622caSToomas Soome 	DBG((uintptr_t)mb2_info);
1963*e84622caSToomas Soome 	if (mb2_info != NULL)
1964*e84622caSToomas Soome 		DBG(mb2_info->mbi_total_size);
1965*e84622caSToomas Soome 	DBG(bi->bi_acpi_rsdp);
1966*e84622caSToomas Soome #endif
1967ae115bc7Smrj 
1968ae115bc7Smrj 	/*
1969ae115bc7Smrj 	 * Need correct target_kernel_text value
1970ae115bc7Smrj 	 */
1971ae115bc7Smrj #if defined(_BOOT_TARGET_amd64)
1972ae115bc7Smrj 	target_kernel_text = KERNEL_TEXT_amd64;
1973843e1988Sjohnlev #elif defined(__xpv)
1974843e1988Sjohnlev 	target_kernel_text = KERNEL_TEXT_i386_xpv;
1975ae115bc7Smrj #else
1976ae115bc7Smrj 	target_kernel_text = KERNEL_TEXT_i386;
1977ae115bc7Smrj #endif
1978ae115bc7Smrj 	DBG(target_kernel_text);
1979ae115bc7Smrj 
1980843e1988Sjohnlev #if defined(__xpv)
1981843e1988Sjohnlev 
1982843e1988Sjohnlev 	/*
1983843e1988Sjohnlev 	 * XXPV	Derive this stuff from CPUID / what the hypervisor has enabled
1984843e1988Sjohnlev 	 */
1985843e1988Sjohnlev 
1986843e1988Sjohnlev #if defined(_BOOT_TARGET_amd64)
1987843e1988Sjohnlev 	/*
1988843e1988Sjohnlev 	 * 64-bit hypervisor.
1989843e1988Sjohnlev 	 */
1990843e1988Sjohnlev 	amd64_support = 1;
1991843e1988Sjohnlev 	pae_support = 1;
1992843e1988Sjohnlev 
1993843e1988Sjohnlev #else	/* _BOOT_TARGET_amd64 */
1994843e1988Sjohnlev 
1995843e1988Sjohnlev 	/*
1996843e1988Sjohnlev 	 * See if we are running on a PAE Hypervisor
1997843e1988Sjohnlev 	 */
1998843e1988Sjohnlev 	{
1999843e1988Sjohnlev 		xen_capabilities_info_t caps;
2000843e1988Sjohnlev 
2001843e1988Sjohnlev 		if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0)
2002843e1988Sjohnlev 			dboot_panic("HYPERVISOR_xen_version(caps) failed");
2003843e1988Sjohnlev 		caps[sizeof (caps) - 1] = 0;
2004843e1988Sjohnlev 		if (prom_debug)
2005843e1988Sjohnlev 			dboot_printf("xen capabilities %s\n", caps);
2006843e1988Sjohnlev 		if (strstr(caps, "x86_32p") != NULL)
2007843e1988Sjohnlev 			pae_support = 1;
2008843e1988Sjohnlev 	}
2009843e1988Sjohnlev 
2010843e1988Sjohnlev #endif	/* _BOOT_TARGET_amd64 */
2011843e1988Sjohnlev 	{
2012843e1988Sjohnlev 		xen_platform_parameters_t p;
2013843e1988Sjohnlev 
2014843e1988Sjohnlev 		if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0)
2015843e1988Sjohnlev 			dboot_panic("HYPERVISOR_xen_version(parms) failed");
2016843e1988Sjohnlev 		DBG(p.virt_start);
2017843e1988Sjohnlev 		mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
2018843e1988Sjohnlev 	}
2019843e1988Sjohnlev 
2020843e1988Sjohnlev 	/*
2021843e1988Sjohnlev 	 * The hypervisor loads stuff starting at 1Gig
2022843e1988Sjohnlev 	 */
2023843e1988Sjohnlev 	mfn_base = ONE_GIG;
2024843e1988Sjohnlev 	DBG(mfn_base);
2025843e1988Sjohnlev 
2026843e1988Sjohnlev 	/*
2027843e1988Sjohnlev 	 * enable writable page table mode for the hypervisor
2028843e1988Sjohnlev 	 */
2029843e1988Sjohnlev 	if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2030843e1988Sjohnlev 	    VMASST_TYPE_writable_pagetables) < 0)
2031843e1988Sjohnlev 		dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed");
2032843e1988Sjohnlev 
2033843e1988Sjohnlev 	/*
2034843e1988Sjohnlev 	 * check for NX support
2035843e1988Sjohnlev 	 */
2036843e1988Sjohnlev 	if (pae_support) {
2037843e1988Sjohnlev 		uint32_t eax = 0x80000000;
2038843e1988Sjohnlev 		uint32_t edx = get_cpuid_edx(&eax);
2039843e1988Sjohnlev 
2040843e1988Sjohnlev 		if (eax >= 0x80000001) {
2041843e1988Sjohnlev 			eax = 0x80000001;
2042843e1988Sjohnlev 			edx = get_cpuid_edx(&eax);
2043843e1988Sjohnlev 			if (edx & CPUID_AMD_EDX_NX)
2044843e1988Sjohnlev 				NX_support = 1;
2045843e1988Sjohnlev 		}
2046843e1988Sjohnlev 	}
2047843e1988Sjohnlev 
2048843e1988Sjohnlev #if !defined(_BOOT_TARGET_amd64)
2049843e1988Sjohnlev 
2050843e1988Sjohnlev 	/*
2051843e1988Sjohnlev 	 * The 32-bit hypervisor uses segmentation to protect itself from
2052843e1988Sjohnlev 	 * guests. This means when a guest attempts to install a flat 4GB
2053843e1988Sjohnlev 	 * code or data descriptor the 32-bit hypervisor will protect itself
2054843e1988Sjohnlev 	 * by silently shrinking the segment such that if the guest attempts
2055843e1988Sjohnlev 	 * any access where the hypervisor lives a #gp fault is generated.
2056843e1988Sjohnlev 	 * The problem is that some applications expect a full 4GB flat
2057843e1988Sjohnlev 	 * segment for their current thread pointer and will use negative
2058843e1988Sjohnlev 	 * offset segment wrap around to access data. TLS support in linux
2059843e1988Sjohnlev 	 * brand is one example of this.
2060843e1988Sjohnlev 	 *
2061843e1988Sjohnlev 	 * The 32-bit hypervisor can catch the #gp fault in these cases
2062843e1988Sjohnlev 	 * and emulate the access without passing the #gp fault to the guest
2063843e1988Sjohnlev 	 * but only if VMASST_TYPE_4gb_segments is explicitly turned on.
2064843e1988Sjohnlev 	 * Seems like this should have been the default.
2065843e1988Sjohnlev 	 * Either way, we want the hypervisor -- and not Solaris -- to deal
2066843e1988Sjohnlev 	 * to deal with emulating these accesses.
2067843e1988Sjohnlev 	 */
2068843e1988Sjohnlev 	if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2069843e1988Sjohnlev 	    VMASST_TYPE_4gb_segments) < 0)
2070843e1988Sjohnlev 		dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed");
2071843e1988Sjohnlev #endif	/* !_BOOT_TARGET_amd64 */
2072843e1988Sjohnlev 
2073843e1988Sjohnlev #else	/* __xpv */
2074843e1988Sjohnlev 
2075ae115bc7Smrj 	/*
2076ae115bc7Smrj 	 * use cpuid to enable MMU features
2077ae115bc7Smrj 	 */
2078ae115bc7Smrj 	if (have_cpuid()) {
2079ae115bc7Smrj 		uint32_t eax, edx;
2080ae115bc7Smrj 
2081ae115bc7Smrj 		eax = 1;
2082ae115bc7Smrj 		edx = get_cpuid_edx(&eax);
2083ae115bc7Smrj 		if (edx & CPUID_INTC_EDX_PSE)
2084ae115bc7Smrj 			largepage_support = 1;
2085ae115bc7Smrj 		if (edx & CPUID_INTC_EDX_PGE)
2086ae115bc7Smrj 			pge_support = 1;
2087ae115bc7Smrj 		if (edx & CPUID_INTC_EDX_PAE)
2088ae115bc7Smrj 			pae_support = 1;
2089ae115bc7Smrj 
2090ae115bc7Smrj 		eax = 0x80000000;
2091ae115bc7Smrj 		edx = get_cpuid_edx(&eax);
2092ae115bc7Smrj 		if (eax >= 0x80000001) {
2093ae115bc7Smrj 			eax = 0x80000001;
2094ae115bc7Smrj 			edx = get_cpuid_edx(&eax);
2095ae115bc7Smrj 			if (edx & CPUID_AMD_EDX_LM)
2096ae115bc7Smrj 				amd64_support = 1;
2097ae115bc7Smrj 			if (edx & CPUID_AMD_EDX_NX)
2098ae115bc7Smrj 				NX_support = 1;
2099ae115bc7Smrj 		}
2100ae115bc7Smrj 	} else {
2101ae115bc7Smrj 		dboot_printf("cpuid not supported\n");
2102ae115bc7Smrj 	}
2103843e1988Sjohnlev #endif /* __xpv */
2104843e1988Sjohnlev 
2105ae115bc7Smrj 
2106ae115bc7Smrj #if defined(_BOOT_TARGET_amd64)
2107ae115bc7Smrj 	if (amd64_support == 0)
2108843e1988Sjohnlev 		dboot_panic("long mode not supported, rebooting");
2109ae115bc7Smrj 	else if (pae_support == 0)
2110843e1988Sjohnlev 		dboot_panic("long mode, but no PAE; rebooting");
2111843e1988Sjohnlev #else
2112843e1988Sjohnlev 	/*
2113843e1988Sjohnlev 	 * Allow the command line to over-ride use of PAE for 32 bit.
2114843e1988Sjohnlev 	 */
2115843e1988Sjohnlev 	if (strstr(cmdline, "disablePAE=true") != NULL) {
2116843e1988Sjohnlev 		pae_support = 0;
2117843e1988Sjohnlev 		NX_support = 0;
2118843e1988Sjohnlev 		amd64_support = 0;
2119843e1988Sjohnlev 	}
2120ae115bc7Smrj #endif
2121ae115bc7Smrj 
2122ae115bc7Smrj 	/*
2123843e1988Sjohnlev 	 * initialize the simple memory allocator
2124ae115bc7Smrj 	 */
2125ae115bc7Smrj 	init_mem_alloc();
2126ae115bc7Smrj 
2127843e1988Sjohnlev #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64)
2128843e1988Sjohnlev 	/*
2129843e1988Sjohnlev 	 * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory
2130843e1988Sjohnlev 	 */
2131843e1988Sjohnlev 	if (max_mem < FOUR_GIG && NX_support == 0)
2132843e1988Sjohnlev 		pae_support = 0;
2133843e1988Sjohnlev #endif
2134843e1988Sjohnlev 
2135ae115bc7Smrj 	/*
2136ae115bc7Smrj 	 * configure mmu information
2137ae115bc7Smrj 	 */
2138843e1988Sjohnlev 	if (pae_support) {
2139ae115bc7Smrj 		shift_amt = shift_amt_pae;
2140ae115bc7Smrj 		ptes_per_table = 512;
2141ae115bc7Smrj 		pte_size = 8;
2142ae115bc7Smrj 		lpagesize = TWO_MEG;
2143ae115bc7Smrj #if defined(_BOOT_TARGET_amd64)
2144ae115bc7Smrj 		top_level = 3;
2145ae115bc7Smrj #else
2146ae115bc7Smrj 		top_level = 2;
2147ae115bc7Smrj #endif
2148ae115bc7Smrj 	} else {
2149ae115bc7Smrj 		pae_support = 0;
2150ae115bc7Smrj 		NX_support = 0;
2151ae115bc7Smrj 		shift_amt = shift_amt_nopae;
2152ae115bc7Smrj 		ptes_per_table = 1024;
2153ae115bc7Smrj 		pte_size = 4;
2154ae115bc7Smrj 		lpagesize = FOUR_MEG;
2155ae115bc7Smrj 		top_level = 1;
2156ae115bc7Smrj 	}
2157ae115bc7Smrj 
2158ae115bc7Smrj 	DBG(pge_support);
2159ae115bc7Smrj 	DBG(NX_support);
2160ae115bc7Smrj 	DBG(largepage_support);
2161ae115bc7Smrj 	DBG(amd64_support);
2162ae115bc7Smrj 	DBG(top_level);
2163ae115bc7Smrj 	DBG(pte_size);
2164ae115bc7Smrj 	DBG(ptes_per_table);
2165ae115bc7Smrj 	DBG(lpagesize);
2166ae115bc7Smrj 
2167843e1988Sjohnlev #if defined(__xpv)
2168843e1988Sjohnlev 	ktext_phys = ONE_GIG;		/* from UNIX Mapfile */
2169843e1988Sjohnlev #else
2170ae115bc7Smrj 	ktext_phys = FOUR_MEG;		/* from UNIX Mapfile */
2171843e1988Sjohnlev #endif
2172ae115bc7Smrj 
2173843e1988Sjohnlev #if !defined(__xpv) && defined(_BOOT_TARGET_amd64)
2174ae115bc7Smrj 	/*
2175ae115bc7Smrj 	 * For grub, copy kernel bits from the ELF64 file to final place.
2176ae115bc7Smrj 	 */
2177ae115bc7Smrj 	DBG_MSG("\nAllocating nucleus pages.\n");
2178ae115bc7Smrj 	ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG);
2179ae115bc7Smrj 	if (ktext_phys == 0)
2180843e1988Sjohnlev 		dboot_panic("failed to allocate aligned kernel memory");
2181*e84622caSToomas Soome 	DBG(load_addr);
2182*e84622caSToomas Soome 	if (dboot_elfload64(load_addr) != 0)
2183843e1988Sjohnlev 		dboot_panic("failed to parse kernel ELF image, rebooting");
2184ae115bc7Smrj #endif
2185843e1988Sjohnlev 
2186ae115bc7Smrj 	DBG(ktext_phys);
2187ae115bc7Smrj 
2188ae115bc7Smrj 	/*
2189ae115bc7Smrj 	 * Allocate page tables.
2190ae115bc7Smrj 	 */
2191ae115bc7Smrj 	build_page_tables();
2192ae115bc7Smrj 
2193ae115bc7Smrj 	/*
2194ae115bc7Smrj 	 * return to assembly code to switch to running kernel
2195ae115bc7Smrj 	 */
2196ae115bc7Smrj 	entry_addr_low = (uint32_t)target_kernel_text;
2197ae115bc7Smrj 	DBG(entry_addr_low);
2198ae115bc7Smrj 	bi->bi_use_largepage = largepage_support;
2199ae115bc7Smrj 	bi->bi_use_pae = pae_support;
2200ae115bc7Smrj 	bi->bi_use_pge = pge_support;
2201ae115bc7Smrj 	bi->bi_use_nx = NX_support;
2202843e1988Sjohnlev 
2203843e1988Sjohnlev #if defined(__xpv)
2204843e1988Sjohnlev 
2205843e1988Sjohnlev 	bi->bi_next_paddr = next_avail_addr - mfn_base;
2206843e1988Sjohnlev 	DBG(bi->bi_next_paddr);
2207843e1988Sjohnlev 	bi->bi_next_vaddr = (native_ptr_t)next_avail_addr;
2208843e1988Sjohnlev 	DBG(bi->bi_next_vaddr);
2209843e1988Sjohnlev 
2210843e1988Sjohnlev 	/*
2211843e1988Sjohnlev 	 * unmap unused pages in start area to make them available for DMA
2212843e1988Sjohnlev 	 */
2213843e1988Sjohnlev 	while (next_avail_addr < scratch_end) {
2214843e1988Sjohnlev 		(void) HYPERVISOR_update_va_mapping(next_avail_addr,
2215843e1988Sjohnlev 		    0, UVMF_INVLPG | UVMF_LOCAL);
2216843e1988Sjohnlev 		next_avail_addr += MMU_PAGESIZE;
2217843e1988Sjohnlev 	}
2218843e1988Sjohnlev 
2219843e1988Sjohnlev 	bi->bi_xen_start_info = (uintptr_t)xen_info;
2220843e1988Sjohnlev 	DBG((uintptr_t)HYPERVISOR_shared_info);
2221843e1988Sjohnlev 	bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info;
2222843e1988Sjohnlev 	bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base;
2223843e1988Sjohnlev 
2224843e1988Sjohnlev #else /* __xpv */
2225843e1988Sjohnlev 
2226ae115bc7Smrj 	bi->bi_next_paddr = next_avail_addr;
2227ae115bc7Smrj 	DBG(bi->bi_next_paddr);
2228ae115bc7Smrj 	bi->bi_next_vaddr = (uintptr_t)next_avail_addr;
2229ae115bc7Smrj 	DBG(bi->bi_next_vaddr);
2230*e84622caSToomas Soome 	bi->bi_mb_version = multiboot_version;
2231*e84622caSToomas Soome 
2232*e84622caSToomas Soome 	switch (multiboot_version) {
2233*e84622caSToomas Soome 	case 1:
2234ae115bc7Smrj 		bi->bi_mb_info = (uintptr_t)mb_info;
2235*e84622caSToomas Soome 		break;
2236*e84622caSToomas Soome 	case 2:
2237*e84622caSToomas Soome 		bi->bi_mb_info = (uintptr_t)mb2_info;
2238*e84622caSToomas Soome 		break;
2239*e84622caSToomas Soome 	default:
2240*e84622caSToomas Soome 		dboot_panic("Unknown multiboot version: %d\n",
2241*e84622caSToomas Soome 		    multiboot_version);
2242*e84622caSToomas Soome 		break;
2243*e84622caSToomas Soome 	}
2244ae115bc7Smrj 	bi->bi_top_page_table = (uintptr_t)top_page_table;
2245ae115bc7Smrj 
2246843e1988Sjohnlev #endif /* __xpv */
2247843e1988Sjohnlev 
2248ae115bc7Smrj 	bi->bi_kseg_size = FOUR_MEG;
2249ae115bc7Smrj 	DBG(bi->bi_kseg_size);
2250ae115bc7Smrj 
225115ba2a79SSherry Moore #ifndef __xpv
2252f34a7178SJoe Bonasera 	if (map_debug)
225319397407SSherry Moore 		dump_tables();
225415ba2a79SSherry Moore #endif
225519397407SSherry Moore 
2256ae115bc7Smrj 	DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n");
2257ae115bc7Smrj }
2258