xref: /illumos-gate/usr/src/uts/i86pc/dboot/dboot_startkern.c (revision b1e2e3fb17324e9ddf43db264a0c64da7756d9e6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright 2013 Joyent, Inc.  All rights reserved.
27  */
28 
29 
30 #include <sys/types.h>
31 #include <sys/machparam.h>
32 #include <sys/x86_archext.h>
33 #include <sys/systm.h>
34 #include <sys/mach_mmu.h>
35 #include <sys/multiboot.h>
36 #include <sys/multiboot2.h>
37 #include <sys/multiboot2_impl.h>
38 #include <sys/sysmacros.h>
39 #include <sys/framebuffer.h>
40 #include <sys/sha1.h>
41 #include <util/string.h>
42 #include <util/strtolctype.h>
43 #include <sys/efi.h>
44 
45 /*
46  * Compile time debug knob. We do not have any early mechanism to control it
47  * as the boot is the earliest mechanism we have, and we do not want to have
48  * it being switched on by default.
49  */
50 int dboot_debug = 0;
51 
52 #if defined(__xpv)
53 
54 #include <sys/hypervisor.h>
55 uintptr_t xen_virt_start;
56 pfn_t *mfn_to_pfn_mapping;
57 
58 #else /* !__xpv */
59 
60 extern multiboot_header_t mb_header;
61 extern uint32_t mb2_load_addr;
62 extern int have_cpuid(void);
63 
64 #endif /* !__xpv */
65 
66 #include <sys/inttypes.h>
67 #include <sys/bootinfo.h>
68 #include <sys/mach_mmu.h>
69 #include <sys/boot_console.h>
70 
71 #include "dboot_asm.h"
72 #include "dboot_printf.h"
73 #include "dboot_xboot.h"
74 #include "dboot_elfload.h"
75 
76 #define	SHA1_ASCII_LENGTH	(SHA1_DIGEST_LENGTH * 2)
77 
78 /*
79  * This file contains code that runs to transition us from either a multiboot
80  * compliant loader (32 bit non-paging) or a XPV domain loader to
81  * regular kernel execution. Its task is to setup the kernel memory image
82  * and page tables.
83  *
84  * The code executes as:
85  *	- 32 bits under GRUB (for 32 or 64 bit Solaris)
86  *	- a 32 bit program for the 32-bit PV hypervisor
87  *	- a 64 bit program for the 64-bit PV hypervisor (at least for now)
88  *
89  * Under the PV hypervisor, we must create mappings for any memory beyond the
90  * initial start of day allocation (such as the kernel itself).
91  *
92  * When on the metal, the mapping between maddr_t and paddr_t is 1:1.
93  * Since we are running in real mode, so all such memory is accessible.
94  */
95 
96 /*
97  * Standard bits used in PTE (page level) and PTP (internal levels)
98  */
99 x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER;
100 x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST;
101 
102 /*
103  * This is the target addresses (physical) where the kernel text and data
104  * nucleus pages will be unpacked. On the hypervisor this is actually a
105  * virtual address.
106  */
107 paddr_t ktext_phys;
108 uint32_t ksize = 2 * FOUR_MEG;	/* kernel nucleus is 8Meg */
109 
110 static uint64_t target_kernel_text;	/* value to use for KERNEL_TEXT */
111 
112 /*
113  * The stack is setup in assembler before entering startup_kernel()
114  */
115 char stack_space[STACK_SIZE];
116 
117 /*
118  * Used to track physical memory allocation
119  */
120 static paddr_t next_avail_addr = 0;
121 
122 #if defined(__xpv)
123 /*
124  * Additional information needed for hypervisor memory allocation.
125  * Only memory up to scratch_end is mapped by page tables.
126  * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so
127  * to derive a pfn from a pointer, you subtract mfn_base.
128  */
129 
130 static paddr_t scratch_end = 0;	/* we can't write all of mem here */
131 static paddr_t mfn_base;		/* addr corresponding to mfn_list[0] */
132 start_info_t *xen_info;
133 
134 #else	/* __xpv */
135 
136 /*
137  * If on the metal, then we have a multiboot loader.
138  */
139 uint32_t mb_magic;			/* magic from boot loader */
140 uint32_t mb_addr;			/* multiboot info package from loader */
141 int multiboot_version;
142 multiboot_info_t *mb_info;
143 multiboot2_info_header_t *mb2_info;
144 multiboot_tag_mmap_t *mb2_mmap_tagp;
145 int num_entries;			/* mmap entry count */
146 boolean_t num_entries_set;		/* is mmap entry count set */
147 uintptr_t load_addr;
148 static boot_framebuffer_t framebuffer __aligned(16);
149 static boot_framebuffer_t *fb;
150 
151 /* can not be automatic variables because of alignment */
152 static efi_guid_t smbios3 = SMBIOS3_TABLE_GUID;
153 static efi_guid_t smbios = SMBIOS_TABLE_GUID;
154 static efi_guid_t acpi2 = EFI_ACPI_TABLE_GUID;
155 static efi_guid_t acpi1 = ACPI_10_TABLE_GUID;
156 #endif	/* __xpv */
157 
158 /*
159  * This contains information passed to the kernel
160  */
161 struct xboot_info boot_info __aligned(16);
162 struct xboot_info *bi;
163 
164 /*
165  * Page table and memory stuff.
166  */
167 static paddr_t max_mem;			/* maximum memory address */
168 
169 /*
170  * Information about processor MMU
171  */
172 int amd64_support = 0;
173 int largepage_support = 0;
174 int pae_support = 0;
175 int pge_support = 0;
176 int NX_support = 0;
177 int PAT_support = 0;
178 
179 /*
180  * Low 32 bits of kernel entry address passed back to assembler.
181  * When running a 64 bit kernel, the high 32 bits are 0xffffffff.
182  */
183 uint32_t entry_addr_low;
184 
185 /*
186  * Memlists for the kernel. We shouldn't need a lot of these.
187  */
188 #define	MAX_MEMLIST (50)
189 struct boot_memlist memlists[MAX_MEMLIST];
190 uint_t memlists_used = 0;
191 struct boot_memlist pcimemlists[MAX_MEMLIST];
192 uint_t pcimemlists_used = 0;
193 struct boot_memlist rsvdmemlists[MAX_MEMLIST];
194 uint_t rsvdmemlists_used = 0;
195 
196 /*
197  * This should match what's in the bootloader.  It's arbitrary, but GRUB
198  * in particular has limitations on how much space it can use before it
199  * stops working properly.  This should be enough.
200  */
201 struct boot_modules modules[MAX_BOOT_MODULES];
202 uint_t modules_used = 0;
203 
204 #ifdef __xpv
205 /*
206  * Xen strips the size field out of the mb_memory_map_t, see struct e820entry
207  * definition in Xen source.
208  */
209 typedef struct {
210 	uint32_t	base_addr_low;
211 	uint32_t	base_addr_high;
212 	uint32_t	length_low;
213 	uint32_t	length_high;
214 	uint32_t	type;
215 } mmap_t;
216 
217 /*
218  * There is 512KB of scratch area after the boot stack page.
219  * We'll use that for everything except the kernel nucleus pages which are too
220  * big to fit there and are allocated last anyway.
221  */
222 #define	MAXMAPS	100
223 static mmap_t map_buffer[MAXMAPS];
224 #else
225 typedef mb_memory_map_t mmap_t;
226 #endif
227 
228 /*
229  * Debugging macros
230  */
231 uint_t prom_debug = 0;
232 uint_t map_debug = 0;
233 
234 static char noname[2] = "-";
235 
236 /*
237  * Either hypervisor-specific or grub-specific code builds the initial
238  * memlists. This code does the sort/merge/link for final use.
239  */
240 static void
241 sort_physinstall(void)
242 {
243 	int i;
244 #if !defined(__xpv)
245 	int j;
246 	struct boot_memlist tmp;
247 
248 	/*
249 	 * Now sort the memlists, in case they weren't in order.
250 	 * Yeah, this is a bubble sort; small, simple and easy to get right.
251 	 */
252 	DBG_MSG("Sorting phys-installed list\n");
253 	for (j = memlists_used - 1; j > 0; --j) {
254 		for (i = 0; i < j; ++i) {
255 			if (memlists[i].addr < memlists[i + 1].addr)
256 				continue;
257 			tmp = memlists[i];
258 			memlists[i] = memlists[i + 1];
259 			memlists[i + 1] = tmp;
260 		}
261 	}
262 
263 	/*
264 	 * Merge any memlists that don't have holes between them.
265 	 */
266 	for (i = 0; i <= memlists_used - 1; ++i) {
267 		if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr)
268 			continue;
269 
270 		if (prom_debug)
271 			dboot_printf(
272 			    "merging mem segs %" PRIx64 "...%" PRIx64
273 			    " w/ %" PRIx64 "...%" PRIx64 "\n",
274 			    memlists[i].addr,
275 			    memlists[i].addr + memlists[i].size,
276 			    memlists[i + 1].addr,
277 			    memlists[i + 1].addr + memlists[i + 1].size);
278 
279 		memlists[i].size += memlists[i + 1].size;
280 		for (j = i + 1; j < memlists_used - 1; ++j)
281 			memlists[j] = memlists[j + 1];
282 		--memlists_used;
283 		DBG(memlists_used);
284 		--i;	/* after merging we need to reexamine, so do this */
285 	}
286 #endif	/* __xpv */
287 
288 	if (prom_debug) {
289 		dboot_printf("\nFinal memlists:\n");
290 		for (i = 0; i < memlists_used; ++i) {
291 			dboot_printf("\t%d: addr=%" PRIx64 " size=%"
292 			    PRIx64 "\n", i, memlists[i].addr, memlists[i].size);
293 		}
294 	}
295 
296 	/*
297 	 * link together the memlists with native size pointers
298 	 */
299 	memlists[0].next = 0;
300 	memlists[0].prev = 0;
301 	for (i = 1; i < memlists_used; ++i) {
302 		memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1);
303 		memlists[i].next = 0;
304 		memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i);
305 	}
306 	bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists;
307 	DBG(bi->bi_phys_install);
308 }
309 
310 /*
311  * build bios reserved memlists
312  */
313 static void
314 build_rsvdmemlists(void)
315 {
316 	int i;
317 
318 	rsvdmemlists[0].next = 0;
319 	rsvdmemlists[0].prev = 0;
320 	for (i = 1; i < rsvdmemlists_used; ++i) {
321 		rsvdmemlists[i].prev =
322 		    (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1);
323 		rsvdmemlists[i].next = 0;
324 		rsvdmemlists[i - 1].next =
325 		    (native_ptr_t)(uintptr_t)(rsvdmemlists + i);
326 	}
327 	bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists;
328 	DBG(bi->bi_rsvdmem);
329 }
330 
331 #if defined(__xpv)
332 
333 /*
334  * halt on the hypervisor after a delay to drain console output
335  */
336 void
337 dboot_halt(void)
338 {
339 	uint_t i = 10000;
340 
341 	while (--i)
342 		(void) HYPERVISOR_yield();
343 	(void) HYPERVISOR_shutdown(SHUTDOWN_poweroff);
344 }
345 
346 /*
347  * From a machine address, find the corresponding pseudo-physical address.
348  * Pseudo-physical address are contiguous and run from mfn_base in each VM.
349  * Machine addresses are the real underlying hardware addresses.
350  * These are needed for page table entries. Note that this routine is
351  * poorly protected. A bad value of "ma" will cause a page fault.
352  */
353 paddr_t
354 ma_to_pa(maddr_t ma)
355 {
356 	ulong_t pgoff = ma & MMU_PAGEOFFSET;
357 	ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)];
358 	paddr_t pa;
359 
360 	if (pfn >= xen_info->nr_pages)
361 		return (-(paddr_t)1);
362 	pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff;
363 #ifdef DEBUG
364 	if (ma != pa_to_ma(pa))
365 		dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", "
366 		    "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa));
367 #endif
368 	return (pa);
369 }
370 
371 /*
372  * From a pseudo-physical address, find the corresponding machine address.
373  */
374 maddr_t
375 pa_to_ma(paddr_t pa)
376 {
377 	pfn_t pfn;
378 	ulong_t mfn;
379 
380 	pfn = mmu_btop(pa - mfn_base);
381 	if (pa < mfn_base || pfn >= xen_info->nr_pages)
382 		dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa);
383 	mfn = ((ulong_t *)xen_info->mfn_list)[pfn];
384 #ifdef DEBUG
385 	if (mfn_to_pfn_mapping[mfn] != pfn)
386 		dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n",
387 		    pfn, mfn, mfn_to_pfn_mapping[mfn]);
388 #endif
389 	return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET));
390 }
391 
392 #endif	/* __xpv */
393 
394 x86pte_t
395 get_pteval(paddr_t table, uint_t index)
396 {
397 	if (pae_support)
398 		return (((x86pte_t *)(uintptr_t)table)[index]);
399 	return (((x86pte32_t *)(uintptr_t)table)[index]);
400 }
401 
402 /*ARGSUSED*/
403 void
404 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
405 {
406 #ifdef __xpv
407 	mmu_update_t t;
408 	maddr_t mtable = pa_to_ma(table);
409 	int retcnt;
410 
411 	t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
412 	t.val = pteval;
413 	if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
414 		dboot_panic("HYPERVISOR_mmu_update() failed");
415 #else /* __xpv */
416 	uintptr_t tab_addr = (uintptr_t)table;
417 
418 	if (pae_support)
419 		((x86pte_t *)tab_addr)[index] = pteval;
420 	else
421 		((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval;
422 	if (level == top_level && level == 2)
423 		reload_cr3();
424 #endif /* __xpv */
425 }
426 
427 paddr_t
428 make_ptable(x86pte_t *pteval, uint_t level)
429 {
430 	paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
431 
432 	if (level == top_level && level == 2)
433 		*pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID;
434 	else
435 		*pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits;
436 
437 #ifdef __xpv
438 	/* Remove write permission to the new page table. */
439 	if (HYPERVISOR_update_va_mapping(new_table,
440 	    *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL))
441 		dboot_panic("HYP_update_va_mapping error");
442 #endif
443 
444 	if (map_debug)
445 		dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%"
446 		    PRIx64 "\n", level, (ulong_t)new_table, *pteval);
447 	return (new_table);
448 }
449 
450 x86pte_t *
451 map_pte(paddr_t table, uint_t index)
452 {
453 	return ((x86pte_t *)(uintptr_t)(table + index * pte_size));
454 }
455 
456 /*
457  * dump out the contents of page tables...
458  */
459 static void
460 dump_tables(void)
461 {
462 	uint_t save_index[4];	/* for recursion */
463 	char *save_table[4];	/* for recursion */
464 	uint_t	l;
465 	uint64_t va;
466 	uint64_t pgsize;
467 	int index;
468 	int i;
469 	x86pte_t pteval;
470 	char *table;
471 	static char *tablist = "\t\t\t";
472 	char *tabs = tablist + 3 - top_level;
473 	uint_t pa, pa1;
474 #if !defined(__xpv)
475 #define	maddr_t paddr_t
476 #endif /* !__xpv */
477 
478 	dboot_printf("Finished pagetables:\n");
479 	table = (char *)(uintptr_t)top_page_table;
480 	l = top_level;
481 	va = 0;
482 	for (index = 0; index < ptes_per_table; ++index) {
483 		pgsize = 1ull << shift_amt[l];
484 		if (pae_support)
485 			pteval = ((x86pte_t *)table)[index];
486 		else
487 			pteval = ((x86pte32_t *)table)[index];
488 		if (pteval == 0)
489 			goto next_entry;
490 
491 		dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64,
492 		    tabs + l, (void *)table, index, (uint64_t)pteval, va);
493 		pa = ma_to_pa(pteval & MMU_PAGEMASK);
494 		dboot_printf(" physaddr=%x\n", pa);
495 
496 		/*
497 		 * Don't try to walk hypervisor private pagetables
498 		 */
499 		if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) {
500 			save_table[l] = table;
501 			save_index[l] = index;
502 			--l;
503 			index = -1;
504 			table = (char *)(uintptr_t)
505 			    ma_to_pa(pteval & MMU_PAGEMASK);
506 			goto recursion;
507 		}
508 
509 		/*
510 		 * shorten dump for consecutive mappings
511 		 */
512 		for (i = 1; index + i < ptes_per_table; ++i) {
513 			if (pae_support)
514 				pteval = ((x86pte_t *)table)[index + i];
515 			else
516 				pteval = ((x86pte32_t *)table)[index + i];
517 			if (pteval == 0)
518 				break;
519 			pa1 = ma_to_pa(pteval & MMU_PAGEMASK);
520 			if (pa1 != pa + i * pgsize)
521 				break;
522 		}
523 		if (i > 2) {
524 			dboot_printf("%s...\n", tabs + l);
525 			va += pgsize * (i - 2);
526 			index += i - 2;
527 		}
528 next_entry:
529 		va += pgsize;
530 		if (l == 3 && index == 256)	/* VA hole */
531 			va = 0xffff800000000000ull;
532 recursion:
533 		;
534 	}
535 	if (l < top_level) {
536 		++l;
537 		index = save_index[l];
538 		table = save_table[l];
539 		goto recursion;
540 	}
541 }
542 
543 /*
544  * Add a mapping for the machine page at the given virtual address.
545  */
546 static void
547 map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level)
548 {
549 	x86pte_t *ptep;
550 	x86pte_t pteval;
551 
552 	pteval = ma | pte_bits;
553 	if (level > 0)
554 		pteval |= PT_PAGESIZE;
555 	if (va >= target_kernel_text && pge_support)
556 		pteval |= PT_GLOBAL;
557 
558 	if (map_debug && ma != va)
559 		dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64
560 		    " pte=0x%" PRIx64 " l=%d\n",
561 		    (uint64_t)ma, (uint64_t)va, pteval, level);
562 
563 #if defined(__xpv)
564 	/*
565 	 * see if we can avoid find_pte() on the hypervisor
566 	 */
567 	if (HYPERVISOR_update_va_mapping(va, pteval,
568 	    UVMF_INVLPG | UVMF_LOCAL) == 0)
569 		return;
570 #endif
571 
572 	/*
573 	 * Find the pte that will map this address. This creates any
574 	 * missing intermediate level page tables
575 	 */
576 	ptep = find_pte(va, NULL, level, 0);
577 
578 	/*
579 	 * When paravirtualized, we must use hypervisor calls to modify the
580 	 * PTE, since paging is active. On real hardware we just write to
581 	 * the pagetables which aren't in use yet.
582 	 */
583 #if defined(__xpv)
584 	ptep = ptep;	/* shut lint up */
585 	if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
586 		dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64
587 		    " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "",
588 		    (uint64_t)va, level, (uint64_t)ma, pteval);
589 #else
590 	if (va < 1024 * 1024)
591 		pteval |= PT_NOCACHE;		/* for video RAM */
592 	if (pae_support)
593 		*ptep = pteval;
594 	else
595 		*((x86pte32_t *)ptep) = (x86pte32_t)pteval;
596 #endif
597 }
598 
599 /*
600  * Add a mapping for the physical page at the given virtual address.
601  */
602 static void
603 map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level)
604 {
605 	map_ma_at_va(pa_to_ma(pa), va, level);
606 }
607 
608 /*
609  * This is called to remove start..end from the
610  * possible range of PCI addresses.
611  */
612 const uint64_t pci_lo_limit = 0x00100000ul;
613 const uint64_t pci_hi_limit = 0xfff00000ul;
614 static void
615 exclude_from_pci(uint64_t start, uint64_t end)
616 {
617 	int i;
618 	int j;
619 	struct boot_memlist *ml;
620 
621 	for (i = 0; i < pcimemlists_used; ++i) {
622 		ml = &pcimemlists[i];
623 
624 		/* delete the entire range? */
625 		if (start <= ml->addr && ml->addr + ml->size <= end) {
626 			--pcimemlists_used;
627 			for (j = i; j < pcimemlists_used; ++j)
628 				pcimemlists[j] = pcimemlists[j + 1];
629 			--i;	/* to revisit the new one at this index */
630 		}
631 
632 		/* split a range? */
633 		else if (ml->addr < start && end < ml->addr + ml->size) {
634 
635 			++pcimemlists_used;
636 			if (pcimemlists_used > MAX_MEMLIST)
637 				dboot_panic("too many pcimemlists");
638 
639 			for (j = pcimemlists_used - 1; j > i; --j)
640 				pcimemlists[j] = pcimemlists[j - 1];
641 			ml->size = start - ml->addr;
642 
643 			++ml;
644 			ml->size = (ml->addr + ml->size) - end;
645 			ml->addr = end;
646 			++i;	/* skip on to next one */
647 		}
648 
649 		/* cut memory off the start? */
650 		else if (ml->addr < end && end < ml->addr + ml->size) {
651 			ml->size -= end - ml->addr;
652 			ml->addr = end;
653 		}
654 
655 		/* cut memory off the end? */
656 		else if (ml->addr <= start && start < ml->addr + ml->size) {
657 			ml->size = start - ml->addr;
658 		}
659 	}
660 }
661 
662 /*
663  * During memory allocation, find the highest address not used yet.
664  */
665 static void
666 check_higher(paddr_t a)
667 {
668 	if (a < next_avail_addr)
669 		return;
670 	next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE);
671 	DBG(next_avail_addr);
672 }
673 
674 static int
675 dboot_loader_mmap_entries(void)
676 {
677 #if !defined(__xpv)
678 	if (num_entries_set == B_TRUE)
679 		return (num_entries);
680 
681 	switch (multiboot_version) {
682 	case 1:
683 		DBG(mb_info->flags);
684 		if (mb_info->flags & 0x40) {
685 			mb_memory_map_t *mmap;
686 
687 			DBG(mb_info->mmap_addr);
688 			DBG(mb_info->mmap_length);
689 			check_higher(mb_info->mmap_addr + mb_info->mmap_length);
690 
691 			for (mmap = (mb_memory_map_t *)mb_info->mmap_addr;
692 			    (uint32_t)mmap < mb_info->mmap_addr +
693 			    mb_info->mmap_length;
694 			    mmap = (mb_memory_map_t *)((uint32_t)mmap +
695 			    mmap->size + sizeof (mmap->size)))
696 				++num_entries;
697 
698 			num_entries_set = B_TRUE;
699 		}
700 		break;
701 	case 2:
702 		num_entries_set = B_TRUE;
703 		num_entries = dboot_multiboot2_mmap_nentries(mb2_info,
704 		    mb2_mmap_tagp);
705 		break;
706 	default:
707 		dboot_panic("Unknown multiboot version: %d\n",
708 		    multiboot_version);
709 		break;
710 	}
711 	return (num_entries);
712 #else
713 	return (MAXMAPS);
714 #endif
715 }
716 
717 static uint32_t
718 dboot_loader_mmap_get_type(int index)
719 {
720 #if !defined(__xpv)
721 	mb_memory_map_t *mp, *mpend;
722 	int i;
723 
724 	switch (multiboot_version) {
725 	case 1:
726 		mp = (mb_memory_map_t *)mb_info->mmap_addr;
727 		mpend = (mb_memory_map_t *)
728 		    (mb_info->mmap_addr + mb_info->mmap_length);
729 
730 		for (i = 0; mp < mpend && i != index; i++)
731 			mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
732 			    sizeof (mp->size));
733 		if (mp >= mpend) {
734 			dboot_panic("dboot_loader_mmap_get_type(): index "
735 			    "out of bounds: %d\n", index);
736 		}
737 		return (mp->type);
738 
739 	case 2:
740 		return (dboot_multiboot2_mmap_get_type(mb2_info,
741 		    mb2_mmap_tagp, index));
742 
743 	default:
744 		dboot_panic("Unknown multiboot version: %d\n",
745 		    multiboot_version);
746 		break;
747 	}
748 	return (0);
749 #else
750 	return (map_buffer[index].type);
751 #endif
752 }
753 
754 static uint64_t
755 dboot_loader_mmap_get_base(int index)
756 {
757 #if !defined(__xpv)
758 	mb_memory_map_t *mp, *mpend;
759 	int i;
760 
761 	switch (multiboot_version) {
762 	case 1:
763 		mp = (mb_memory_map_t *)mb_info->mmap_addr;
764 		mpend = (mb_memory_map_t *)
765 		    (mb_info->mmap_addr + mb_info->mmap_length);
766 
767 		for (i = 0; mp < mpend && i != index; i++)
768 			mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
769 			    sizeof (mp->size));
770 		if (mp >= mpend) {
771 			dboot_panic("dboot_loader_mmap_get_base(): index "
772 			    "out of bounds: %d\n", index);
773 		}
774 		return (((uint64_t)mp->base_addr_high << 32) +
775 		    (uint64_t)mp->base_addr_low);
776 
777 	case 2:
778 		return (dboot_multiboot2_mmap_get_base(mb2_info,
779 		    mb2_mmap_tagp, index));
780 
781 	default:
782 		dboot_panic("Unknown multiboot version: %d\n",
783 		    multiboot_version);
784 		break;
785 	}
786 	return (0);
787 #else
788 	return (((uint64_t)map_buffer[index].base_addr_high << 32) +
789 	    (uint64_t)map_buffer[index].base_addr_low);
790 #endif
791 }
792 
793 static uint64_t
794 dboot_loader_mmap_get_length(int index)
795 {
796 #if !defined(__xpv)
797 	mb_memory_map_t *mp, *mpend;
798 	int i;
799 
800 	switch (multiboot_version) {
801 	case 1:
802 		mp = (mb_memory_map_t *)mb_info->mmap_addr;
803 		mpend = (mb_memory_map_t *)
804 		    (mb_info->mmap_addr + mb_info->mmap_length);
805 
806 		for (i = 0; mp < mpend && i != index; i++)
807 			mp = (mb_memory_map_t *)((uint32_t)mp + mp->size +
808 			    sizeof (mp->size));
809 		if (mp >= mpend) {
810 			dboot_panic("dboot_loader_mmap_get_length(): index "
811 			    "out of bounds: %d\n", index);
812 		}
813 		return (((uint64_t)mp->length_high << 32) +
814 		    (uint64_t)mp->length_low);
815 
816 	case 2:
817 		return (dboot_multiboot2_mmap_get_length(mb2_info,
818 		    mb2_mmap_tagp, index));
819 
820 	default:
821 		dboot_panic("Unknown multiboot version: %d\n",
822 		    multiboot_version);
823 		break;
824 	}
825 	return (0);
826 #else
827 	return (((uint64_t)map_buffer[index].length_high << 32) +
828 	    (uint64_t)map_buffer[index].length_low);
829 #endif
830 }
831 
832 static void
833 build_pcimemlists(void)
834 {
835 	uint64_t page_offset = MMU_PAGEOFFSET;	/* needs to be 64 bits */
836 	uint64_t start;
837 	uint64_t end;
838 	int i, num;
839 
840 	/*
841 	 * initialize
842 	 */
843 	pcimemlists[0].addr = pci_lo_limit;
844 	pcimemlists[0].size = pci_hi_limit - pci_lo_limit;
845 	pcimemlists_used = 1;
846 
847 	num = dboot_loader_mmap_entries();
848 	/*
849 	 * Fill in PCI memlists.
850 	 */
851 	for (i = 0; i < num; ++i) {
852 		start = dboot_loader_mmap_get_base(i);
853 		end = start + dboot_loader_mmap_get_length(i);
854 
855 		if (prom_debug)
856 			dboot_printf("\ttype: %d %" PRIx64 "..%"
857 			    PRIx64 "\n", dboot_loader_mmap_get_type(i),
858 			    start, end);
859 
860 		/*
861 		 * page align start and end
862 		 */
863 		start = (start + page_offset) & ~page_offset;
864 		end &= ~page_offset;
865 		if (end <= start)
866 			continue;
867 
868 		exclude_from_pci(start, end);
869 	}
870 
871 	/*
872 	 * Finish off the pcimemlist
873 	 */
874 	if (prom_debug) {
875 		for (i = 0; i < pcimemlists_used; ++i) {
876 			dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%"
877 			    PRIx64 "\n", pcimemlists[i].addr,
878 			    pcimemlists[i].addr + pcimemlists[i].size);
879 		}
880 	}
881 	pcimemlists[0].next = 0;
882 	pcimemlists[0].prev = 0;
883 	for (i = 1; i < pcimemlists_used; ++i) {
884 		pcimemlists[i].prev =
885 		    (native_ptr_t)(uintptr_t)(pcimemlists + i - 1);
886 		pcimemlists[i].next = 0;
887 		pcimemlists[i - 1].next =
888 		    (native_ptr_t)(uintptr_t)(pcimemlists + i);
889 	}
890 	bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
891 	DBG(bi->bi_pcimem);
892 }
893 
894 #if defined(__xpv)
895 /*
896  * Initialize memory allocator stuff from hypervisor-supplied start info.
897  */
898 static void
899 init_mem_alloc(void)
900 {
901 	int	local;	/* variables needed to find start region */
902 	paddr_t	scratch_start;
903 	xen_memory_map_t map;
904 
905 	DBG_MSG("Entered init_mem_alloc()\n");
906 
907 	/*
908 	 * Free memory follows the stack. There's at least 512KB of scratch
909 	 * space, rounded up to at least 2Mb alignment.  That should be enough
910 	 * for the page tables we'll need to build.  The nucleus memory is
911 	 * allocated last and will be outside the addressible range.  We'll
912 	 * switch to new page tables before we unpack the kernel
913 	 */
914 	scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE);
915 	DBG(scratch_start);
916 	scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG);
917 	DBG(scratch_end);
918 
919 	/*
920 	 * For paranoia, leave some space between hypervisor data and ours.
921 	 * Use 500 instead of 512.
922 	 */
923 	next_avail_addr = scratch_end - 500 * 1024;
924 	DBG(next_avail_addr);
925 
926 	/*
927 	 * The domain builder gives us at most 1 module
928 	 */
929 	DBG(xen_info->mod_len);
930 	if (xen_info->mod_len > 0) {
931 		DBG(xen_info->mod_start);
932 		modules[0].bm_addr =
933 		    (native_ptr_t)(uintptr_t)xen_info->mod_start;
934 		modules[0].bm_size = xen_info->mod_len;
935 		bi->bi_module_cnt = 1;
936 		bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
937 	} else {
938 		bi->bi_module_cnt = 0;
939 		bi->bi_modules = (native_ptr_t)(uintptr_t)NULL;
940 	}
941 	DBG(bi->bi_module_cnt);
942 	DBG(bi->bi_modules);
943 
944 	DBG(xen_info->mfn_list);
945 	DBG(xen_info->nr_pages);
946 	max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT;
947 	DBG(max_mem);
948 
949 	/*
950 	 * Using pseudo-physical addresses, so only 1 memlist element
951 	 */
952 	memlists[0].addr = 0;
953 	DBG(memlists[0].addr);
954 	memlists[0].size = max_mem;
955 	DBG(memlists[0].size);
956 	memlists_used = 1;
957 	DBG(memlists_used);
958 
959 	/*
960 	 * finish building physinstall list
961 	 */
962 	sort_physinstall();
963 
964 	/*
965 	 * build bios reserved memlists
966 	 */
967 	build_rsvdmemlists();
968 
969 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
970 		/*
971 		 * build PCI Memory list
972 		 */
973 		map.nr_entries = MAXMAPS;
974 		/*LINTED: constant in conditional context*/
975 		set_xen_guest_handle(map.buffer, map_buffer);
976 		if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0)
977 			dboot_panic("getting XENMEM_machine_memory_map failed");
978 		build_pcimemlists();
979 	}
980 }
981 
982 #else	/* !__xpv */
983 
984 static void
985 dboot_multiboot1_xboot_consinfo(void)
986 {
987 	fb->framebuffer = 0;
988 }
989 
990 static void
991 dboot_multiboot2_xboot_consinfo(void)
992 {
993 	multiboot_tag_framebuffer_t *fbtag;
994 	fbtag = dboot_multiboot2_find_tag(mb2_info,
995 	    MULTIBOOT_TAG_TYPE_FRAMEBUFFER);
996 	fb->framebuffer = (uint64_t)(uintptr_t)fbtag;
997 }
998 
999 static int
1000 dboot_multiboot_modcount(void)
1001 {
1002 	switch (multiboot_version) {
1003 	case 1:
1004 		return (mb_info->mods_count);
1005 
1006 	case 2:
1007 		return (dboot_multiboot2_modcount(mb2_info));
1008 
1009 	default:
1010 		dboot_panic("Unknown multiboot version: %d\n",
1011 		    multiboot_version);
1012 		break;
1013 	}
1014 	return (0);
1015 }
1016 
1017 static uint32_t
1018 dboot_multiboot_modstart(int index)
1019 {
1020 	switch (multiboot_version) {
1021 	case 1:
1022 		return (((mb_module_t *)mb_info->mods_addr)[index].mod_start);
1023 
1024 	case 2:
1025 		return (dboot_multiboot2_modstart(mb2_info, index));
1026 
1027 	default:
1028 		dboot_panic("Unknown multiboot version: %d\n",
1029 		    multiboot_version);
1030 		break;
1031 	}
1032 	return (0);
1033 }
1034 
1035 static uint32_t
1036 dboot_multiboot_modend(int index)
1037 {
1038 	switch (multiboot_version) {
1039 	case 1:
1040 		return (((mb_module_t *)mb_info->mods_addr)[index].mod_end);
1041 
1042 	case 2:
1043 		return (dboot_multiboot2_modend(mb2_info, index));
1044 
1045 	default:
1046 		dboot_panic("Unknown multiboot version: %d\n",
1047 		    multiboot_version);
1048 		break;
1049 	}
1050 	return (0);
1051 }
1052 
1053 static char *
1054 dboot_multiboot_modcmdline(int index)
1055 {
1056 	switch (multiboot_version) {
1057 	case 1:
1058 		return ((char *)((mb_module_t *)
1059 		    mb_info->mods_addr)[index].mod_name);
1060 
1061 	case 2:
1062 		return (dboot_multiboot2_modcmdline(mb2_info, index));
1063 
1064 	default:
1065 		dboot_panic("Unknown multiboot version: %d\n",
1066 		    multiboot_version);
1067 		break;
1068 	}
1069 	return (0);
1070 }
1071 
1072 /*
1073  * Find the modules used by console setup.
1074  * Since we need the console to print early boot messages, the console is set up
1075  * before anything else and therefore we need to pick up the needed modules.
1076  *
1077  * Note, we just will search for and if found, will pass the modules
1078  * to console setup, the proper module list processing will happen later.
1079  * Currently used modules are boot environment and console font.
1080  */
1081 static void
1082 dboot_find_console_modules(void)
1083 {
1084 	int i, modcount;
1085 	uint32_t mod_start, mod_end;
1086 	char *cmdline;
1087 
1088 	modcount = dboot_multiboot_modcount();
1089 	bi->bi_module_cnt = 0;
1090 	for (i = 0; i < modcount; ++i) {
1091 		cmdline = dboot_multiboot_modcmdline(i);
1092 		if (cmdline == NULL)
1093 			continue;
1094 
1095 		if (strstr(cmdline, "type=console-font") != NULL)
1096 			modules[bi->bi_module_cnt].bm_type = BMT_FONT;
1097 		else if (strstr(cmdline, "type=environment") != NULL)
1098 			modules[bi->bi_module_cnt].bm_type = BMT_ENV;
1099 		else
1100 			continue;
1101 
1102 		mod_start = dboot_multiboot_modstart(i);
1103 		mod_end = dboot_multiboot_modend(i);
1104 		modules[bi->bi_module_cnt].bm_addr =
1105 		    (native_ptr_t)(uintptr_t)mod_start;
1106 		modules[bi->bi_module_cnt].bm_size = mod_end - mod_start;
1107 		modules[bi->bi_module_cnt].bm_name =
1108 		    (native_ptr_t)(uintptr_t)NULL;
1109 		modules[bi->bi_module_cnt].bm_hash =
1110 		    (native_ptr_t)(uintptr_t)NULL;
1111 		bi->bi_module_cnt++;
1112 	}
1113 	if (bi->bi_module_cnt != 0)
1114 		bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1115 }
1116 
1117 static boolean_t
1118 dboot_multiboot_basicmeminfo(uint32_t *lower, uint32_t *upper)
1119 {
1120 	boolean_t rv = B_FALSE;
1121 
1122 	switch (multiboot_version) {
1123 	case 1:
1124 		if (mb_info->flags & 0x01) {
1125 			*lower = mb_info->mem_lower;
1126 			*upper = mb_info->mem_upper;
1127 			rv = B_TRUE;
1128 		}
1129 		break;
1130 
1131 	case 2:
1132 		return (dboot_multiboot2_basicmeminfo(mb2_info, lower, upper));
1133 
1134 	default:
1135 		dboot_panic("Unknown multiboot version: %d\n",
1136 		    multiboot_version);
1137 		break;
1138 	}
1139 	return (rv);
1140 }
1141 
1142 static uint8_t
1143 dboot_a2h(char v)
1144 {
1145 	if (v >= 'a')
1146 		return (v - 'a' + 0xa);
1147 	else if (v >= 'A')
1148 		return (v - 'A' + 0xa);
1149 	else if (v >= '0')
1150 		return (v - '0');
1151 	else
1152 		dboot_panic("bad ASCII hex character %c\n", v);
1153 
1154 	return (0);
1155 }
1156 
1157 static void
1158 digest_a2h(const char *ascii, uint8_t *digest)
1159 {
1160 	unsigned int i;
1161 
1162 	for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1163 		digest[i] = dboot_a2h(ascii[i * 2]) << 4;
1164 		digest[i] |= dboot_a2h(ascii[i * 2 + 1]);
1165 	}
1166 }
1167 
1168 /*
1169  * Generate a SHA-1 hash of the first len bytes of image, and compare it with
1170  * the ASCII-format hash found in the 40-byte buffer at ascii.  If they
1171  * match, return 0, otherwise -1.  This works only for images smaller than
1172  * 4 GB, which should not be a problem.
1173  */
1174 static int
1175 check_image_hash(uint_t midx)
1176 {
1177 	const char *ascii;
1178 	const void *image;
1179 	size_t len;
1180 	SHA1_CTX ctx;
1181 	uint8_t digest[SHA1_DIGEST_LENGTH];
1182 	uint8_t baseline[SHA1_DIGEST_LENGTH];
1183 	unsigned int i;
1184 
1185 	ascii = (const char *)(uintptr_t)modules[midx].bm_hash;
1186 	image = (const void *)(uintptr_t)modules[midx].bm_addr;
1187 	len = (size_t)modules[midx].bm_size;
1188 
1189 	digest_a2h(ascii, baseline);
1190 
1191 	SHA1Init(&ctx);
1192 	SHA1Update(&ctx, image, len);
1193 	SHA1Final(digest, &ctx);
1194 
1195 	for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1196 		if (digest[i] != baseline[i])
1197 			return (-1);
1198 	}
1199 
1200 	return (0);
1201 }
1202 
1203 static const char *
1204 type_to_str(boot_module_type_t type)
1205 {
1206 	switch (type) {
1207 	case BMT_ROOTFS:
1208 		return ("rootfs");
1209 	case BMT_FILE:
1210 		return ("file");
1211 	case BMT_HASH:
1212 		return ("hash");
1213 	case BMT_ENV:
1214 		return ("environment");
1215 	case BMT_FONT:
1216 		return ("console-font");
1217 	default:
1218 		return ("unknown");
1219 	}
1220 }
1221 
1222 static void
1223 check_images(void)
1224 {
1225 	uint_t i;
1226 	char displayhash[SHA1_ASCII_LENGTH + 1];
1227 
1228 	for (i = 0; i < modules_used; i++) {
1229 		if (prom_debug) {
1230 			dboot_printf("module #%d: name %s type %s "
1231 			    "addr %lx size %lx\n",
1232 			    i, (char *)(uintptr_t)modules[i].bm_name,
1233 			    type_to_str(modules[i].bm_type),
1234 			    (ulong_t)modules[i].bm_addr,
1235 			    (ulong_t)modules[i].bm_size);
1236 		}
1237 
1238 		if (modules[i].bm_type == BMT_HASH ||
1239 		    modules[i].bm_hash == (native_ptr_t)(uintptr_t)NULL) {
1240 			DBG_MSG("module has no hash; skipping check\n");
1241 			continue;
1242 		}
1243 		(void) memcpy(displayhash,
1244 		    (void *)(uintptr_t)modules[i].bm_hash,
1245 		    SHA1_ASCII_LENGTH);
1246 		displayhash[SHA1_ASCII_LENGTH] = '\0';
1247 		if (prom_debug) {
1248 			dboot_printf("checking expected hash [%s]: ",
1249 			    displayhash);
1250 		}
1251 
1252 		if (check_image_hash(i) != 0)
1253 			dboot_panic("hash mismatch!\n");
1254 		else
1255 			DBG_MSG("OK\n");
1256 	}
1257 }
1258 
1259 /*
1260  * Determine the module's starting address, size, name, and type, and fill the
1261  * boot_modules structure.  This structure is used by the bop code, except for
1262  * hashes which are checked prior to transferring control to the kernel.
1263  */
1264 static void
1265 process_module(int midx)
1266 {
1267 	uint32_t mod_start = dboot_multiboot_modstart(midx);
1268 	uint32_t mod_end = dboot_multiboot_modend(midx);
1269 	char *cmdline = dboot_multiboot_modcmdline(midx);
1270 	char *p, *q;
1271 
1272 	check_higher(mod_end);
1273 	if (prom_debug) {
1274 		dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n",
1275 		    midx, cmdline, (ulong_t)mod_start, (ulong_t)mod_end);
1276 	}
1277 
1278 	if (mod_start > mod_end) {
1279 		dboot_panic("module #%d: module start address 0x%lx greater "
1280 		    "than end address 0x%lx", midx,
1281 		    (ulong_t)mod_start, (ulong_t)mod_end);
1282 	}
1283 
1284 	/*
1285 	 * A brief note on lengths and sizes: GRUB, for reasons unknown, passes
1286 	 * the address of the last valid byte in a module plus 1 as mod_end.
1287 	 * This is of course a bug; the multiboot specification simply states
1288 	 * that mod_start and mod_end "contain the start and end addresses of
1289 	 * the boot module itself" which is pretty obviously not what GRUB is
1290 	 * doing.  However, fixing it requires that not only this code be
1291 	 * changed but also that other code consuming this value and values
1292 	 * derived from it be fixed, and that the kernel and GRUB must either
1293 	 * both have the bug or neither.  While there are a lot of combinations
1294 	 * that will work, there are also some that won't, so for simplicity
1295 	 * we'll just cope with the bug.  That means we won't actually hash the
1296 	 * byte at mod_end, and we will expect that mod_end for the hash file
1297 	 * itself is one greater than some multiple of 41 (40 bytes of ASCII
1298 	 * hash plus a newline for each module).  We set bm_size to the true
1299 	 * correct number of bytes in each module, achieving exactly this.
1300 	 */
1301 
1302 	modules[midx].bm_addr = (native_ptr_t)(uintptr_t)mod_start;
1303 	modules[midx].bm_size = mod_end - mod_start;
1304 	modules[midx].bm_name = (native_ptr_t)(uintptr_t)cmdline;
1305 	modules[midx].bm_hash = (native_ptr_t)(uintptr_t)NULL;
1306 	modules[midx].bm_type = BMT_FILE;
1307 
1308 	if (cmdline == NULL) {
1309 		modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname;
1310 		return;
1311 	}
1312 
1313 	p = cmdline;
1314 	modules[midx].bm_name =
1315 	    (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r");
1316 
1317 	while (p != NULL) {
1318 		q = strsep(&p, " \t\f\n\r");
1319 		if (strncmp(q, "name=", 5) == 0) {
1320 			if (q[5] != '\0' && !isspace(q[5])) {
1321 				modules[midx].bm_name =
1322 				    (native_ptr_t)(uintptr_t)(q + 5);
1323 			}
1324 			continue;
1325 		}
1326 
1327 		if (strncmp(q, "type=", 5) == 0) {
1328 			if (q[5] == '\0' || isspace(q[5]))
1329 				continue;
1330 			q += 5;
1331 			if (strcmp(q, "rootfs") == 0) {
1332 				modules[midx].bm_type = BMT_ROOTFS;
1333 			} else if (strcmp(q, "hash") == 0) {
1334 				modules[midx].bm_type = BMT_HASH;
1335 			} else if (strcmp(q, "environment") == 0) {
1336 				modules[midx].bm_type = BMT_ENV;
1337 			} else if (strcmp(q, "console-font") == 0) {
1338 				modules[midx].bm_type = BMT_FONT;
1339 			} else if (strcmp(q, "file") != 0) {
1340 				dboot_printf("\tmodule #%d: unknown module "
1341 				    "type '%s'; defaulting to 'file'\n",
1342 				    midx, q);
1343 			}
1344 			continue;
1345 		}
1346 
1347 		if (strncmp(q, "hash=", 5) == 0) {
1348 			if (q[5] != '\0' && !isspace(q[5])) {
1349 				modules[midx].bm_hash =
1350 				    (native_ptr_t)(uintptr_t)(q + 5);
1351 			}
1352 			continue;
1353 		}
1354 
1355 		dboot_printf("ignoring unknown option '%s'\n", q);
1356 	}
1357 }
1358 
1359 /*
1360  * Backward compatibility: if there are exactly one or two modules, both
1361  * of type 'file' and neither with an embedded hash value, we have been
1362  * given the legacy style modules.  In this case we need to treat the first
1363  * module as a rootfs and the second as a hash referencing that module.
1364  * Otherwise, even if the configuration is invalid, we assume that the
1365  * operator knows what he's doing or at least isn't being bitten by this
1366  * interface change.
1367  */
1368 static void
1369 fixup_modules(void)
1370 {
1371 	if (modules_used == 0 || modules_used > 2)
1372 		return;
1373 
1374 	if (modules[0].bm_type != BMT_FILE ||
1375 	    modules_used > 1 && modules[1].bm_type != BMT_FILE) {
1376 		return;
1377 	}
1378 
1379 	if (modules[0].bm_hash != (native_ptr_t)(uintptr_t)NULL ||
1380 	    modules_used > 1 &&
1381 	    modules[1].bm_hash != (native_ptr_t)(uintptr_t)NULL) {
1382 		return;
1383 	}
1384 
1385 	modules[0].bm_type = BMT_ROOTFS;
1386 	if (modules_used > 1) {
1387 		modules[1].bm_type = BMT_HASH;
1388 		modules[1].bm_name = modules[0].bm_name;
1389 	}
1390 }
1391 
1392 /*
1393  * For modules that do not have assigned hashes but have a separate hash module,
1394  * find the assigned hash module and set the primary module's bm_hash to point
1395  * to the hash data from that module.  We will then ignore modules of type
1396  * BMT_HASH from this point forward.
1397  */
1398 static void
1399 assign_module_hashes(void)
1400 {
1401 	uint_t i, j;
1402 
1403 	for (i = 0; i < modules_used; i++) {
1404 		if (modules[i].bm_type == BMT_HASH ||
1405 		    modules[i].bm_hash != (native_ptr_t)(uintptr_t)NULL) {
1406 			continue;
1407 		}
1408 
1409 		for (j = 0; j < modules_used; j++) {
1410 			if (modules[j].bm_type != BMT_HASH ||
1411 			    strcmp((char *)(uintptr_t)modules[j].bm_name,
1412 			    (char *)(uintptr_t)modules[i].bm_name) != 0) {
1413 				continue;
1414 			}
1415 
1416 			if (modules[j].bm_size < SHA1_ASCII_LENGTH) {
1417 				dboot_printf("Short hash module of length "
1418 				    "0x%lx bytes; ignoring\n",
1419 				    (ulong_t)modules[j].bm_size);
1420 			} else {
1421 				modules[i].bm_hash = modules[j].bm_addr;
1422 			}
1423 			break;
1424 		}
1425 	}
1426 }
1427 
1428 /*
1429  * Walk through the module information finding the last used address.
1430  * The first available address will become the top level page table.
1431  */
1432 static void
1433 dboot_process_modules(void)
1434 {
1435 	int i, modcount;
1436 	extern char _end[];
1437 
1438 	DBG_MSG("\nFinding Modules\n");
1439 	modcount = dboot_multiboot_modcount();
1440 	if (modcount > MAX_BOOT_MODULES) {
1441 		dboot_panic("Too many modules (%d) -- the maximum is %d.",
1442 		    modcount, MAX_BOOT_MODULES);
1443 	}
1444 	/*
1445 	 * search the modules to find the last used address
1446 	 * we'll build the module list while we're walking through here
1447 	 */
1448 	check_higher((paddr_t)(uintptr_t)&_end);
1449 	for (i = 0; i < modcount; ++i) {
1450 		process_module(i);
1451 		modules_used++;
1452 	}
1453 	bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1454 	DBG(bi->bi_modules);
1455 	bi->bi_module_cnt = modcount;
1456 	DBG(bi->bi_module_cnt);
1457 
1458 	fixup_modules();
1459 	assign_module_hashes();
1460 	check_images();
1461 }
1462 
1463 /*
1464  * We then build the phys_install memlist from the multiboot information.
1465  */
1466 static void
1467 dboot_process_mmap(void)
1468 {
1469 	uint64_t start;
1470 	uint64_t end;
1471 	uint64_t page_offset = MMU_PAGEOFFSET;	/* needs to be 64 bits */
1472 	uint32_t lower, upper;
1473 	int i, mmap_entries;
1474 
1475 	/*
1476 	 * Walk through the memory map from multiboot and build our memlist
1477 	 * structures. Note these will have native format pointers.
1478 	 */
1479 	DBG_MSG("\nFinding Memory Map\n");
1480 	num_entries = 0;
1481 	num_entries_set = B_FALSE;
1482 	max_mem = 0;
1483 	if ((mmap_entries = dboot_loader_mmap_entries()) > 0) {
1484 		for (i = 0; i < mmap_entries; i++) {
1485 			uint32_t type = dboot_loader_mmap_get_type(i);
1486 			start = dboot_loader_mmap_get_base(i);
1487 			end = start + dboot_loader_mmap_get_length(i);
1488 
1489 			if (prom_debug)
1490 				dboot_printf("\ttype: %d %" PRIx64 "..%"
1491 				    PRIx64 "\n", type, start, end);
1492 
1493 			/*
1494 			 * page align start and end
1495 			 */
1496 			start = (start + page_offset) & ~page_offset;
1497 			end &= ~page_offset;
1498 			if (end <= start)
1499 				continue;
1500 
1501 			/*
1502 			 * only type 1 is usable RAM
1503 			 */
1504 			switch (type) {
1505 			case 1:
1506 				if (end > max_mem)
1507 					max_mem = end;
1508 				memlists[memlists_used].addr = start;
1509 				memlists[memlists_used].size = end - start;
1510 				++memlists_used;
1511 				if (memlists_used > MAX_MEMLIST)
1512 					dboot_panic("too many memlists");
1513 				break;
1514 			case 2:
1515 				rsvdmemlists[rsvdmemlists_used].addr = start;
1516 				rsvdmemlists[rsvdmemlists_used].size =
1517 				    end - start;
1518 				++rsvdmemlists_used;
1519 				if (rsvdmemlists_used > MAX_MEMLIST)
1520 					dboot_panic("too many rsvdmemlists");
1521 				break;
1522 			default:
1523 				continue;
1524 			}
1525 		}
1526 		build_pcimemlists();
1527 	} else if (dboot_multiboot_basicmeminfo(&lower, &upper)) {
1528 		DBG(lower);
1529 		memlists[memlists_used].addr = 0;
1530 		memlists[memlists_used].size = lower * 1024;
1531 		++memlists_used;
1532 		DBG(upper);
1533 		memlists[memlists_used].addr = 1024 * 1024;
1534 		memlists[memlists_used].size = upper * 1024;
1535 		++memlists_used;
1536 
1537 		/*
1538 		 * Old platform - assume I/O space at the end of memory.
1539 		 */
1540 		pcimemlists[0].addr = (upper * 1024) + (1024 * 1024);
1541 		pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr;
1542 		pcimemlists[0].next = 0;
1543 		pcimemlists[0].prev = 0;
1544 		bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
1545 		DBG(bi->bi_pcimem);
1546 	} else {
1547 		dboot_panic("No memory info from boot loader!!!");
1548 	}
1549 
1550 	/*
1551 	 * finish processing the physinstall list
1552 	 */
1553 	sort_physinstall();
1554 
1555 	/*
1556 	 * build bios reserved mem lists
1557 	 */
1558 	build_rsvdmemlists();
1559 }
1560 
1561 /*
1562  * The highest address is used as the starting point for dboot's simple
1563  * memory allocator.
1564  *
1565  * Finding the highest address in case of Multiboot 1 protocol is
1566  * quite painful in the sense that some information provided by
1567  * the multiboot info structure points to BIOS data, and some to RAM.
1568  *
1569  * The module list was processed and checked already by dboot_process_modules(),
1570  * so we will check the command line string and the memory map.
1571  *
1572  * This list of to be checked items is based on our current knowledge of
1573  * allocations made by grub1 and will need to be reviewed if there
1574  * are updates about the information provided by Multiboot 1.
1575  *
1576  * In the case of the Multiboot 2, our life is much simpler, as the MB2
1577  * information tag list is one contiguous chunk of memory.
1578  */
1579 static paddr_t
1580 dboot_multiboot1_highest_addr(void)
1581 {
1582 	paddr_t addr = (paddr_t)(uintptr_t)NULL;
1583 	char *cmdl = (char *)mb_info->cmdline;
1584 
1585 	if (mb_info->flags & MB_INFO_CMDLINE)
1586 		addr = ((paddr_t)((uintptr_t)cmdl + strlen(cmdl) + 1));
1587 
1588 	if (mb_info->flags & MB_INFO_MEM_MAP)
1589 		addr = MAX(addr,
1590 		    ((paddr_t)(mb_info->mmap_addr + mb_info->mmap_length)));
1591 	return (addr);
1592 }
1593 
1594 static void
1595 dboot_multiboot_highest_addr(void)
1596 {
1597 	paddr_t addr;
1598 
1599 	switch (multiboot_version) {
1600 	case 1:
1601 		addr = dboot_multiboot1_highest_addr();
1602 		if (addr != (paddr_t)(uintptr_t)NULL)
1603 			check_higher(addr);
1604 		break;
1605 	case 2:
1606 		addr = dboot_multiboot2_highest_addr(mb2_info);
1607 		if (addr != (paddr_t)(uintptr_t)NULL)
1608 			check_higher(addr);
1609 		break;
1610 	default:
1611 		dboot_panic("Unknown multiboot version: %d\n",
1612 		    multiboot_version);
1613 		break;
1614 	}
1615 }
1616 
1617 /*
1618  * Walk the boot loader provided information and find the highest free address.
1619  */
1620 static void
1621 init_mem_alloc(void)
1622 {
1623 	DBG_MSG("Entered init_mem_alloc()\n");
1624 	dboot_process_modules();
1625 	dboot_process_mmap();
1626 	dboot_multiboot_highest_addr();
1627 }
1628 
1629 static int
1630 dboot_same_guids(efi_guid_t *g1, efi_guid_t *g2)
1631 {
1632 	int i;
1633 
1634 	if (g1->time_low != g2->time_low)
1635 		return (0);
1636 	if (g1->time_mid != g2->time_mid)
1637 		return (0);
1638 	if (g1->time_hi_and_version != g2->time_hi_and_version)
1639 		return (0);
1640 	if (g1->clock_seq_hi_and_reserved != g2->clock_seq_hi_and_reserved)
1641 		return (0);
1642 	if (g1->clock_seq_low != g2->clock_seq_low)
1643 		return (0);
1644 
1645 	for (i = 0; i < 6; i++) {
1646 		if (g1->node_addr[i] != g2->node_addr[i])
1647 			return (0);
1648 	}
1649 	return (1);
1650 }
1651 
1652 static void
1653 process_efi32(EFI_SYSTEM_TABLE32 *efi)
1654 {
1655 	uint32_t entries;
1656 	EFI_CONFIGURATION_TABLE32 *config;
1657 	int i;
1658 
1659 	entries = efi->NumberOfTableEntries;
1660 	config = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t)
1661 	    efi->ConfigurationTable;
1662 
1663 	for (i = 0; i < entries; i++) {
1664 		if (dboot_same_guids(&config[i].VendorGuid, &smbios3)) {
1665 			bi->bi_smbios = (native_ptr_t)(uintptr_t)
1666 			    config[i].VendorTable;
1667 		}
1668 		if (bi->bi_smbios == 0 &&
1669 		    dboot_same_guids(&config[i].VendorGuid, &smbios)) {
1670 			bi->bi_smbios = (native_ptr_t)(uintptr_t)
1671 			    config[i].VendorTable;
1672 		}
1673 		if (dboot_same_guids(&config[i].VendorGuid, &acpi2)) {
1674 			bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1675 			    config[i].VendorTable;
1676 		}
1677 		if (bi->bi_acpi_rsdp == 0 &&
1678 		    dboot_same_guids(&config[i].VendorGuid, &acpi1)) {
1679 			bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1680 			    config[i].VendorTable;
1681 		}
1682 	}
1683 }
1684 
1685 static void
1686 process_efi64(EFI_SYSTEM_TABLE64 *efi)
1687 {
1688 	uint64_t entries;
1689 	EFI_CONFIGURATION_TABLE64 *config;
1690 	int i;
1691 
1692 	entries = efi->NumberOfTableEntries;
1693 	config = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t)
1694 	    efi->ConfigurationTable;
1695 
1696 	for (i = 0; i < entries; i++) {
1697 		if (dboot_same_guids(&config[i].VendorGuid, &smbios3)) {
1698 			bi->bi_smbios = (native_ptr_t)(uintptr_t)
1699 			    config[i].VendorTable;
1700 		}
1701 		if (bi->bi_smbios == 0 &&
1702 		    dboot_same_guids(&config[i].VendorGuid, &smbios)) {
1703 			bi->bi_smbios = (native_ptr_t)(uintptr_t)
1704 			    config[i].VendorTable;
1705 		}
1706 		/* Prefer acpi v2+ over v1. */
1707 		if (dboot_same_guids(&config[i].VendorGuid, &acpi2)) {
1708 			bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1709 			    config[i].VendorTable;
1710 		}
1711 		if (bi->bi_acpi_rsdp == 0 &&
1712 		    dboot_same_guids(&config[i].VendorGuid, &acpi1)) {
1713 			bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1714 			    config[i].VendorTable;
1715 		}
1716 	}
1717 }
1718 
1719 static void
1720 dboot_multiboot_get_fwtables(void)
1721 {
1722 	multiboot_tag_new_acpi_t *nacpitagp;
1723 	multiboot_tag_old_acpi_t *oacpitagp;
1724 	multiboot_tag_efi64_t *efi64tagp = NULL;
1725 	multiboot_tag_efi32_t *efi32tagp = NULL;
1726 
1727 	/* no fw tables from multiboot 1 */
1728 	if (multiboot_version != 2)
1729 		return;
1730 
1731 	efi64tagp = (multiboot_tag_efi64_t *)
1732 	    dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_EFI64);
1733 	if (efi64tagp != NULL) {
1734 		bi->bi_uefi_arch = XBI_UEFI_ARCH_64;
1735 		bi->bi_uefi_systab = (native_ptr_t)(uintptr_t)
1736 		    efi64tagp->mb_pointer;
1737 		process_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t)
1738 		    efi64tagp->mb_pointer);
1739 	} else {
1740 		efi32tagp = (multiboot_tag_efi32_t *)
1741 		    dboot_multiboot2_find_tag(mb2_info,
1742 		    MULTIBOOT_TAG_TYPE_EFI32);
1743 		if (efi32tagp != NULL) {
1744 			bi->bi_uefi_arch = XBI_UEFI_ARCH_32;
1745 			bi->bi_uefi_systab = (native_ptr_t)(uintptr_t)
1746 			    efi32tagp->mb_pointer;
1747 			process_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t)
1748 			    efi32tagp->mb_pointer);
1749 		}
1750 	}
1751 
1752 	/*
1753 	 * The ACPI RSDP can be found by scanning the BIOS memory areas or
1754 	 * from the EFI system table. The boot loader may pass in the address
1755 	 * it found the ACPI tables at.
1756 	 */
1757 	nacpitagp = (multiboot_tag_new_acpi_t *)
1758 	    dboot_multiboot2_find_tag(mb2_info,
1759 	    MULTIBOOT_TAG_TYPE_ACPI_NEW);
1760 	oacpitagp = (multiboot_tag_old_acpi_t *)
1761 	    dboot_multiboot2_find_tag(mb2_info,
1762 	    MULTIBOOT_TAG_TYPE_ACPI_OLD);
1763 
1764 	if (nacpitagp != NULL) {
1765 		bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1766 		    &nacpitagp->mb_rsdp[0];
1767 	} else if (oacpitagp != NULL) {
1768 		bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1769 		    &oacpitagp->mb_rsdp[0];
1770 	}
1771 }
1772 
1773 /* print out EFI version string with newline */
1774 static void
1775 dboot_print_efi_version(uint32_t ver)
1776 {
1777 	int rev;
1778 
1779 	dboot_printf("%d.", EFI_REV_MAJOR(ver));
1780 
1781 	rev = EFI_REV_MINOR(ver);
1782 	if ((rev % 10) != 0) {
1783 		dboot_printf("%d.%d\n", rev / 10, rev % 10);
1784 	} else {
1785 		dboot_printf("%d\n", rev / 10);
1786 	}
1787 }
1788 
1789 static void
1790 print_efi32(EFI_SYSTEM_TABLE32 *efi)
1791 {
1792 	uint16_t *data;
1793 	EFI_CONFIGURATION_TABLE32 *conf;
1794 	int i;
1795 
1796 	dboot_printf("EFI32 signature: %llx\n",
1797 	    (unsigned long long)efi->Hdr.Signature);
1798 	dboot_printf("EFI system version: ");
1799 	dboot_print_efi_version(efi->Hdr.Revision);
1800 	dboot_printf("EFI system vendor: ");
1801 	data = (uint16_t *)(uintptr_t)efi->FirmwareVendor;
1802 	for (i = 0; data[i] != 0; i++)
1803 		dboot_printf("%c", (char)data[i]);
1804 	dboot_printf("\nEFI firmware revision: ");
1805 	dboot_print_efi_version(efi->FirmwareRevision);
1806 	dboot_printf("EFI system table number of entries: %d\n",
1807 	    efi->NumberOfTableEntries);
1808 	conf = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t)
1809 	    efi->ConfigurationTable;
1810 	for (i = 0; i < (int)efi->NumberOfTableEntries; i++) {
1811 		dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i,
1812 		    conf[i].VendorGuid.time_low,
1813 		    conf[i].VendorGuid.time_mid,
1814 		    conf[i].VendorGuid.time_hi_and_version,
1815 		    conf[i].VendorGuid.clock_seq_hi_and_reserved,
1816 		    conf[i].VendorGuid.clock_seq_low);
1817 		dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1818 		    conf[i].VendorGuid.node_addr[0],
1819 		    conf[i].VendorGuid.node_addr[1],
1820 		    conf[i].VendorGuid.node_addr[2],
1821 		    conf[i].VendorGuid.node_addr[3],
1822 		    conf[i].VendorGuid.node_addr[4],
1823 		    conf[i].VendorGuid.node_addr[5]);
1824 	}
1825 }
1826 
1827 static void
1828 print_efi64(EFI_SYSTEM_TABLE64 *efi)
1829 {
1830 	uint16_t *data;
1831 	EFI_CONFIGURATION_TABLE64 *conf;
1832 	int i;
1833 
1834 	dboot_printf("EFI64 signature: %llx\n",
1835 	    (unsigned long long)efi->Hdr.Signature);
1836 	dboot_printf("EFI system version: ");
1837 	dboot_print_efi_version(efi->Hdr.Revision);
1838 	dboot_printf("EFI system vendor: ");
1839 	data = (uint16_t *)(uintptr_t)efi->FirmwareVendor;
1840 	for (i = 0; data[i] != 0; i++)
1841 		dboot_printf("%c", (char)data[i]);
1842 	dboot_printf("\nEFI firmware revision: ");
1843 	dboot_print_efi_version(efi->FirmwareRevision);
1844 	dboot_printf("EFI system table number of entries: %lld\n",
1845 	    efi->NumberOfTableEntries);
1846 	conf = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t)
1847 	    efi->ConfigurationTable;
1848 	for (i = 0; i < (int)efi->NumberOfTableEntries; i++) {
1849 		dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i,
1850 		    conf[i].VendorGuid.time_low,
1851 		    conf[i].VendorGuid.time_mid,
1852 		    conf[i].VendorGuid.time_hi_and_version,
1853 		    conf[i].VendorGuid.clock_seq_hi_and_reserved,
1854 		    conf[i].VendorGuid.clock_seq_low);
1855 		dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1856 		    conf[i].VendorGuid.node_addr[0],
1857 		    conf[i].VendorGuid.node_addr[1],
1858 		    conf[i].VendorGuid.node_addr[2],
1859 		    conf[i].VendorGuid.node_addr[3],
1860 		    conf[i].VendorGuid.node_addr[4],
1861 		    conf[i].VendorGuid.node_addr[5]);
1862 	}
1863 }
1864 #endif /* !__xpv */
1865 
1866 /*
1867  * Simple memory allocator, allocates aligned physical memory.
1868  * Note that startup_kernel() only allocates memory, never frees.
1869  * Memory usage just grows in an upward direction.
1870  */
1871 static void *
1872 do_mem_alloc(uint32_t size, uint32_t align)
1873 {
1874 	uint_t i;
1875 	uint64_t best;
1876 	uint64_t start;
1877 	uint64_t end;
1878 
1879 	/*
1880 	 * make sure size is a multiple of pagesize
1881 	 */
1882 	size = RNDUP(size, MMU_PAGESIZE);
1883 	next_avail_addr = RNDUP(next_avail_addr, align);
1884 
1885 	/*
1886 	 * XXPV fixme joe
1887 	 *
1888 	 * a really large bootarchive that causes you to run out of memory
1889 	 * may cause this to blow up
1890 	 */
1891 	/* LINTED E_UNEXPECTED_UINT_PROMOTION */
1892 	best = (uint64_t)-size;
1893 	for (i = 0; i < memlists_used; ++i) {
1894 		start = memlists[i].addr;
1895 #if defined(__xpv)
1896 		start += mfn_base;
1897 #endif
1898 		end = start + memlists[i].size;
1899 
1900 		/*
1901 		 * did we find the desired address?
1902 		 */
1903 		if (start <= next_avail_addr && next_avail_addr + size <= end) {
1904 			best = next_avail_addr;
1905 			goto done;
1906 		}
1907 
1908 		/*
1909 		 * if not is this address the best so far?
1910 		 */
1911 		if (start > next_avail_addr && start < best &&
1912 		    RNDUP(start, align) + size <= end)
1913 			best = RNDUP(start, align);
1914 	}
1915 
1916 	/*
1917 	 * We didn't find exactly the address we wanted, due to going off the
1918 	 * end of a memory region. Return the best found memory address.
1919 	 */
1920 done:
1921 	next_avail_addr = best + size;
1922 #if defined(__xpv)
1923 	if (next_avail_addr > scratch_end)
1924 		dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: "
1925 		    "0x%lx", (ulong_t)next_avail_addr,
1926 		    (ulong_t)scratch_end);
1927 #endif
1928 	(void) memset((void *)(uintptr_t)best, 0, size);
1929 	return ((void *)(uintptr_t)best);
1930 }
1931 
1932 void *
1933 mem_alloc(uint32_t size)
1934 {
1935 	return (do_mem_alloc(size, MMU_PAGESIZE));
1936 }
1937 
1938 
1939 /*
1940  * Build page tables to map all of memory used so far as well as the kernel.
1941  */
1942 static void
1943 build_page_tables(void)
1944 {
1945 	uint32_t psize;
1946 	uint32_t level;
1947 	uint32_t off;
1948 	uint64_t start;
1949 #if !defined(__xpv)
1950 	uint32_t i;
1951 	uint64_t end;
1952 #endif	/* __xpv */
1953 
1954 	/*
1955 	 * If we're on metal, we need to create the top level pagetable.
1956 	 */
1957 #if defined(__xpv)
1958 	top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base;
1959 #else /* __xpv */
1960 	top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
1961 #endif /* __xpv */
1962 	DBG((uintptr_t)top_page_table);
1963 
1964 	/*
1965 	 * Determine if we'll use large mappings for kernel, then map it.
1966 	 */
1967 	if (largepage_support) {
1968 		psize = lpagesize;
1969 		level = 1;
1970 	} else {
1971 		psize = MMU_PAGESIZE;
1972 		level = 0;
1973 	}
1974 
1975 	DBG_MSG("Mapping kernel\n");
1976 	DBG(ktext_phys);
1977 	DBG(target_kernel_text);
1978 	DBG(ksize);
1979 	DBG(psize);
1980 	for (off = 0; off < ksize; off += psize)
1981 		map_pa_at_va(ktext_phys + off, target_kernel_text + off, level);
1982 
1983 	/*
1984 	 * The kernel will need a 1 page window to work with page tables
1985 	 */
1986 	bi->bi_pt_window = (native_ptr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
1987 	DBG(bi->bi_pt_window);
1988 	bi->bi_pte_to_pt_window =
1989 	    (native_ptr_t)(uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0);
1990 	DBG(bi->bi_pte_to_pt_window);
1991 
1992 #if defined(__xpv)
1993 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
1994 		/* If this is a domU we're done. */
1995 		DBG_MSG("\nPage tables constructed\n");
1996 		return;
1997 	}
1998 #endif /* __xpv */
1999 
2000 	/*
2001 	 * We need 1:1 mappings for the lower 1M of memory to access
2002 	 * BIOS tables used by a couple of drivers during boot.
2003 	 *
2004 	 * The following code works because our simple memory allocator
2005 	 * only grows usage in an upwards direction.
2006 	 *
2007 	 * Note that by this point in boot some mappings for low memory
2008 	 * may already exist because we've already accessed device in low
2009 	 * memory.  (Specifically the video frame buffer and keyboard
2010 	 * status ports.)  If we're booting on raw hardware then GRUB
2011 	 * created these mappings for us.  If we're booting under a
2012 	 * hypervisor then we went ahead and remapped these devices into
2013 	 * memory allocated within dboot itself.
2014 	 */
2015 	if (map_debug)
2016 		dboot_printf("1:1 map pa=0..1Meg\n");
2017 	for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) {
2018 #if defined(__xpv)
2019 		map_ma_at_va(start, start, 0);
2020 #else /* __xpv */
2021 		map_pa_at_va(start, start, 0);
2022 #endif /* __xpv */
2023 	}
2024 
2025 #if !defined(__xpv)
2026 
2027 	for (i = 0; i < memlists_used; ++i) {
2028 		start = memlists[i].addr;
2029 		end = start + memlists[i].size;
2030 
2031 		if (map_debug)
2032 			dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
2033 			    start, end);
2034 		while (start < end && start < next_avail_addr) {
2035 			map_pa_at_va(start, start, 0);
2036 			start += MMU_PAGESIZE;
2037 		}
2038 		if (start >= next_avail_addr)
2039 			break;
2040 	}
2041 
2042 	/*
2043 	 * Map framebuffer memory as PT_NOCACHE as this is memory from a
2044 	 * device and therefore must not be cached.
2045 	 */
2046 	if (bi->bi_framebuffer != NULL && fb->framebuffer != 0) {
2047 		multiboot_tag_framebuffer_t *fb_tagp;
2048 		fb_tagp = (multiboot_tag_framebuffer_t *)(uintptr_t)
2049 		    fb->framebuffer;
2050 
2051 		start = fb_tagp->framebuffer_common.framebuffer_addr;
2052 		end = start + fb_tagp->framebuffer_common.framebuffer_height *
2053 		    fb_tagp->framebuffer_common.framebuffer_pitch;
2054 
2055 		if (map_debug)
2056 			dboot_printf("FB 1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
2057 			    start, end);
2058 		pte_bits |= PT_NOCACHE;
2059 		if (PAT_support != 0)
2060 			pte_bits |= PT_PAT_4K;
2061 
2062 		while (start < end) {
2063 			map_pa_at_va(start, start, 0);
2064 			start += MMU_PAGESIZE;
2065 		}
2066 		pte_bits &= ~PT_NOCACHE;
2067 		if (PAT_support != 0)
2068 			pte_bits &= ~PT_PAT_4K;
2069 	}
2070 #endif /* !__xpv */
2071 
2072 	DBG_MSG("\nPage tables constructed\n");
2073 }
2074 
2075 #define	NO_MULTIBOOT	\
2076 "multiboot is no longer used to boot the Solaris Operating System.\n\
2077 The grub entry should be changed to:\n\
2078 kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\
2079 module$ /platform/i86pc/$ISADIR/boot_archive\n\
2080 See http://illumos.org/msg/SUNOS-8000-AK for details.\n"
2081 
2082 static void
2083 dboot_init_xboot_consinfo(void)
2084 {
2085 	bi = &boot_info;
2086 
2087 #if !defined(__xpv)
2088 	fb = &framebuffer;
2089 	bi->bi_framebuffer = (native_ptr_t)(uintptr_t)fb;
2090 
2091 	switch (multiboot_version) {
2092 	case 1:
2093 		dboot_multiboot1_xboot_consinfo();
2094 		break;
2095 	case 2:
2096 		dboot_multiboot2_xboot_consinfo();
2097 		break;
2098 	default:
2099 		dboot_panic("Unknown multiboot version: %d\n",
2100 		    multiboot_version);
2101 		break;
2102 	}
2103 	dboot_find_console_modules();
2104 #endif
2105 }
2106 
2107 /*
2108  * Set up basic data from the boot loader.
2109  * The load_addr is part of AOUT kludge setup in dboot_grub.s, to support
2110  * 32-bit dboot code setup used to set up and start 64-bit kernel.
2111  * AOUT kludge does allow 32-bit boot loader, such as grub1, to load and
2112  * start 64-bit illumos kernel.
2113  */
2114 static void
2115 dboot_loader_init(void)
2116 {
2117 #if !defined(__xpv)
2118 	mb_info = NULL;
2119 	mb2_info = NULL;
2120 
2121 	switch (mb_magic) {
2122 	case MB_BOOTLOADER_MAGIC:
2123 		multiboot_version = 1;
2124 		mb_info = (multiboot_info_t *)(uintptr_t)mb_addr;
2125 #if defined(_BOOT_TARGET_amd64)
2126 		load_addr = mb_header.load_addr;
2127 #endif
2128 		break;
2129 
2130 	case MULTIBOOT2_BOOTLOADER_MAGIC:
2131 		multiboot_version = 2;
2132 		mb2_info = (multiboot2_info_header_t *)(uintptr_t)mb_addr;
2133 		mb2_mmap_tagp = dboot_multiboot2_get_mmap_tagp(mb2_info);
2134 #if defined(_BOOT_TARGET_amd64)
2135 		load_addr = mb2_load_addr;
2136 #endif
2137 		break;
2138 
2139 	default:
2140 		dboot_panic("Unknown bootloader magic: 0x%x\n", mb_magic);
2141 		break;
2142 	}
2143 #endif	/* !defined(__xpv) */
2144 }
2145 
2146 /* Extract the kernel command line from [multi]boot information. */
2147 static char *
2148 dboot_loader_cmdline(void)
2149 {
2150 	char *line = NULL;
2151 
2152 #if defined(__xpv)
2153 	line = (char *)xen_info->cmd_line;
2154 #else /* __xpv */
2155 
2156 	switch (multiboot_version) {
2157 	case 1:
2158 		if (mb_info->flags & MB_INFO_CMDLINE)
2159 			line = (char *)mb_info->cmdline;
2160 		break;
2161 
2162 	case 2:
2163 		line = dboot_multiboot2_cmdline(mb2_info);
2164 		break;
2165 
2166 	default:
2167 		dboot_panic("Unknown multiboot version: %d\n",
2168 		    multiboot_version);
2169 		break;
2170 	}
2171 
2172 #endif /* __xpv */
2173 
2174 	/*
2175 	 * Make sure we have valid pointer so the string operations
2176 	 * will not crash us.
2177 	 */
2178 	if (line == NULL)
2179 		line = "";
2180 
2181 	return (line);
2182 }
2183 
2184 static char *
2185 dboot_loader_name(void)
2186 {
2187 #if defined(__xpv)
2188 	return (NULL);
2189 #else /* __xpv */
2190 	multiboot_tag_string_t *tag;
2191 
2192 	switch (multiboot_version) {
2193 	case 1:
2194 		return ((char *)mb_info->boot_loader_name);
2195 
2196 	case 2:
2197 		tag = dboot_multiboot2_find_tag(mb2_info,
2198 		    MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME);
2199 		return (tag->mb_string);
2200 	default:
2201 		dboot_panic("Unknown multiboot version: %d\n",
2202 		    multiboot_version);
2203 		break;
2204 	}
2205 
2206 	return (NULL);
2207 #endif /* __xpv */
2208 }
2209 
2210 /*
2211  * startup_kernel has a pretty simple job. It builds pagetables which reflect
2212  * 1:1 mappings for all memory in use. It then also adds mappings for
2213  * the kernel nucleus at virtual address of target_kernel_text using large page
2214  * mappings. The page table pages are also accessible at 1:1 mapped
2215  * virtual addresses.
2216  */
2217 /*ARGSUSED*/
2218 void
2219 startup_kernel(void)
2220 {
2221 	char *cmdline;
2222 	char *bootloader;
2223 #if defined(__xpv)
2224 	physdev_set_iopl_t set_iopl;
2225 #endif /* __xpv */
2226 
2227 	if (dboot_debug == 1)
2228 		bcons_init(NULL);	/* Set very early console to ttya. */
2229 	dboot_loader_init();
2230 	/*
2231 	 * At this point we are executing in a 32 bit real mode.
2232 	 */
2233 
2234 	bootloader = dboot_loader_name();
2235 	cmdline = dboot_loader_cmdline();
2236 
2237 #if defined(__xpv)
2238 	/*
2239 	 * For dom0, before we initialize the console subsystem we'll
2240 	 * need to enable io operations, so set I/O priveldge level to 1.
2241 	 */
2242 	if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2243 		set_iopl.iopl = 1;
2244 		(void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
2245 	}
2246 #endif /* __xpv */
2247 
2248 	dboot_init_xboot_consinfo();
2249 	bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline;
2250 	bcons_init(bi);		/* Now we can set the real console. */
2251 
2252 	prom_debug = (find_boot_prop("prom_debug") != NULL);
2253 	map_debug = (find_boot_prop("map_debug") != NULL);
2254 
2255 #if !defined(__xpv)
2256 	dboot_multiboot_get_fwtables();
2257 #endif
2258 	DBG_MSG("\n\nillumos prekernel set: ");
2259 	DBG_MSG(cmdline);
2260 	DBG_MSG("\n");
2261 
2262 	if (bootloader != NULL && prom_debug) {
2263 		dboot_printf("Kernel loaded by: %s\n", bootloader);
2264 #if !defined(__xpv)
2265 		dboot_printf("Using multiboot %d boot protocol.\n",
2266 		    multiboot_version);
2267 #endif
2268 	}
2269 
2270 	if (strstr(cmdline, "multiboot") != NULL) {
2271 		dboot_panic(NO_MULTIBOOT);
2272 	}
2273 
2274 	DBG((uintptr_t)bi);
2275 #if !defined(__xpv)
2276 	DBG((uintptr_t)mb_info);
2277 	DBG((uintptr_t)mb2_info);
2278 	if (mb2_info != NULL)
2279 		DBG(mb2_info->mbi_total_size);
2280 	DBG(bi->bi_acpi_rsdp);
2281 	DBG(bi->bi_smbios);
2282 	DBG(bi->bi_uefi_arch);
2283 	DBG(bi->bi_uefi_systab);
2284 
2285 	if (bi->bi_uefi_systab && prom_debug) {
2286 		if (bi->bi_uefi_arch == XBI_UEFI_ARCH_64) {
2287 			print_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t)
2288 			    bi->bi_uefi_systab);
2289 		} else {
2290 			print_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t)
2291 			    bi->bi_uefi_systab);
2292 		}
2293 	}
2294 #endif
2295 
2296 	/*
2297 	 * Need correct target_kernel_text value
2298 	 */
2299 #if defined(_BOOT_TARGET_amd64)
2300 	target_kernel_text = KERNEL_TEXT_amd64;
2301 #elif defined(__xpv)
2302 	target_kernel_text = KERNEL_TEXT_i386_xpv;
2303 #else
2304 	target_kernel_text = KERNEL_TEXT_i386;
2305 #endif
2306 	DBG(target_kernel_text);
2307 
2308 #if defined(__xpv)
2309 
2310 	/*
2311 	 * XXPV	Derive this stuff from CPUID / what the hypervisor has enabled
2312 	 */
2313 
2314 #if defined(_BOOT_TARGET_amd64)
2315 	/*
2316 	 * 64-bit hypervisor.
2317 	 */
2318 	amd64_support = 1;
2319 	pae_support = 1;
2320 
2321 #else	/* _BOOT_TARGET_amd64 */
2322 
2323 	/*
2324 	 * See if we are running on a PAE Hypervisor
2325 	 */
2326 	{
2327 		xen_capabilities_info_t caps;
2328 
2329 		if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0)
2330 			dboot_panic("HYPERVISOR_xen_version(caps) failed");
2331 		caps[sizeof (caps) - 1] = 0;
2332 		if (prom_debug)
2333 			dboot_printf("xen capabilities %s\n", caps);
2334 		if (strstr(caps, "x86_32p") != NULL)
2335 			pae_support = 1;
2336 	}
2337 
2338 #endif	/* _BOOT_TARGET_amd64 */
2339 	{
2340 		xen_platform_parameters_t p;
2341 
2342 		if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0)
2343 			dboot_panic("HYPERVISOR_xen_version(parms) failed");
2344 		DBG(p.virt_start);
2345 		mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
2346 	}
2347 
2348 	/*
2349 	 * The hypervisor loads stuff starting at 1Gig
2350 	 */
2351 	mfn_base = ONE_GIG;
2352 	DBG(mfn_base);
2353 
2354 	/*
2355 	 * enable writable page table mode for the hypervisor
2356 	 */
2357 	if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2358 	    VMASST_TYPE_writable_pagetables) < 0)
2359 		dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed");
2360 
2361 	/*
2362 	 * check for NX support
2363 	 */
2364 	if (pae_support) {
2365 		uint32_t eax = 0x80000000;
2366 		uint32_t edx = get_cpuid_edx(&eax);
2367 
2368 		if (eax >= 0x80000001) {
2369 			eax = 0x80000001;
2370 			edx = get_cpuid_edx(&eax);
2371 			if (edx & CPUID_AMD_EDX_NX)
2372 				NX_support = 1;
2373 		}
2374 	}
2375 
2376 	/*
2377 	 * check for PAT support
2378 	 */
2379 	{
2380 		uint32_t eax = 1;
2381 		uint32_t edx = get_cpuid_edx(&eax);
2382 
2383 		if (edx & CPUID_INTC_EDX_PAT)
2384 			PAT_support = 1;
2385 	}
2386 #if !defined(_BOOT_TARGET_amd64)
2387 
2388 	/*
2389 	 * The 32-bit hypervisor uses segmentation to protect itself from
2390 	 * guests. This means when a guest attempts to install a flat 4GB
2391 	 * code or data descriptor the 32-bit hypervisor will protect itself
2392 	 * by silently shrinking the segment such that if the guest attempts
2393 	 * any access where the hypervisor lives a #gp fault is generated.
2394 	 * The problem is that some applications expect a full 4GB flat
2395 	 * segment for their current thread pointer and will use negative
2396 	 * offset segment wrap around to access data. TLS support in linux
2397 	 * brand is one example of this.
2398 	 *
2399 	 * The 32-bit hypervisor can catch the #gp fault in these cases
2400 	 * and emulate the access without passing the #gp fault to the guest
2401 	 * but only if VMASST_TYPE_4gb_segments is explicitly turned on.
2402 	 * Seems like this should have been the default.
2403 	 * Either way, we want the hypervisor -- and not Solaris -- to deal
2404 	 * to deal with emulating these accesses.
2405 	 */
2406 	if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2407 	    VMASST_TYPE_4gb_segments) < 0)
2408 		dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed");
2409 #endif	/* !_BOOT_TARGET_amd64 */
2410 
2411 #else	/* __xpv */
2412 
2413 	/*
2414 	 * use cpuid to enable MMU features
2415 	 */
2416 	if (have_cpuid()) {
2417 		uint32_t eax, edx;
2418 
2419 		eax = 1;
2420 		edx = get_cpuid_edx(&eax);
2421 		if (edx & CPUID_INTC_EDX_PSE)
2422 			largepage_support = 1;
2423 		if (edx & CPUID_INTC_EDX_PGE)
2424 			pge_support = 1;
2425 		if (edx & CPUID_INTC_EDX_PAE)
2426 			pae_support = 1;
2427 		if (edx & CPUID_INTC_EDX_PAT)
2428 			PAT_support = 1;
2429 
2430 		eax = 0x80000000;
2431 		edx = get_cpuid_edx(&eax);
2432 		if (eax >= 0x80000001) {
2433 			eax = 0x80000001;
2434 			edx = get_cpuid_edx(&eax);
2435 			if (edx & CPUID_AMD_EDX_LM)
2436 				amd64_support = 1;
2437 			if (edx & CPUID_AMD_EDX_NX)
2438 				NX_support = 1;
2439 		}
2440 	} else {
2441 		dboot_printf("cpuid not supported\n");
2442 	}
2443 #endif /* __xpv */
2444 
2445 
2446 #if defined(_BOOT_TARGET_amd64)
2447 	if (amd64_support == 0)
2448 		dboot_panic("long mode not supported, rebooting");
2449 	else if (pae_support == 0)
2450 		dboot_panic("long mode, but no PAE; rebooting");
2451 #else
2452 	/*
2453 	 * Allow the command line to over-ride use of PAE for 32 bit.
2454 	 */
2455 	if (strstr(cmdline, "disablePAE=true") != NULL) {
2456 		pae_support = 0;
2457 		NX_support = 0;
2458 		amd64_support = 0;
2459 	}
2460 #endif
2461 
2462 	/*
2463 	 * initialize the simple memory allocator
2464 	 */
2465 	init_mem_alloc();
2466 
2467 #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64)
2468 	/*
2469 	 * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory
2470 	 */
2471 	if (max_mem < FOUR_GIG && NX_support == 0)
2472 		pae_support = 0;
2473 #endif
2474 
2475 	/*
2476 	 * configure mmu information
2477 	 */
2478 	if (pae_support) {
2479 		shift_amt = shift_amt_pae;
2480 		ptes_per_table = 512;
2481 		pte_size = 8;
2482 		lpagesize = TWO_MEG;
2483 #if defined(_BOOT_TARGET_amd64)
2484 		top_level = 3;
2485 #else
2486 		top_level = 2;
2487 #endif
2488 	} else {
2489 		pae_support = 0;
2490 		NX_support = 0;
2491 		shift_amt = shift_amt_nopae;
2492 		ptes_per_table = 1024;
2493 		pte_size = 4;
2494 		lpagesize = FOUR_MEG;
2495 		top_level = 1;
2496 	}
2497 
2498 	DBG(PAT_support);
2499 	DBG(pge_support);
2500 	DBG(NX_support);
2501 	DBG(largepage_support);
2502 	DBG(amd64_support);
2503 	DBG(top_level);
2504 	DBG(pte_size);
2505 	DBG(ptes_per_table);
2506 	DBG(lpagesize);
2507 
2508 #if defined(__xpv)
2509 	ktext_phys = ONE_GIG;		/* from UNIX Mapfile */
2510 #else
2511 	ktext_phys = FOUR_MEG;		/* from UNIX Mapfile */
2512 #endif
2513 
2514 #if !defined(__xpv) && defined(_BOOT_TARGET_amd64)
2515 	/*
2516 	 * For grub, copy kernel bits from the ELF64 file to final place.
2517 	 */
2518 	DBG_MSG("\nAllocating nucleus pages.\n");
2519 	ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG);
2520 
2521 	if (ktext_phys == 0)
2522 		dboot_panic("failed to allocate aligned kernel memory");
2523 	DBG(load_addr);
2524 	if (dboot_elfload64(load_addr) != 0)
2525 		dboot_panic("failed to parse kernel ELF image, rebooting");
2526 #endif
2527 
2528 	DBG(ktext_phys);
2529 
2530 	/*
2531 	 * Allocate page tables.
2532 	 */
2533 	build_page_tables();
2534 
2535 	/*
2536 	 * return to assembly code to switch to running kernel
2537 	 */
2538 	entry_addr_low = (uint32_t)target_kernel_text;
2539 	DBG(entry_addr_low);
2540 	bi->bi_use_largepage = largepage_support;
2541 	bi->bi_use_pae = pae_support;
2542 	bi->bi_use_pge = pge_support;
2543 	bi->bi_use_nx = NX_support;
2544 
2545 #if defined(__xpv)
2546 
2547 	bi->bi_next_paddr = next_avail_addr - mfn_base;
2548 	DBG(bi->bi_next_paddr);
2549 	bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr;
2550 	DBG(bi->bi_next_vaddr);
2551 
2552 	/*
2553 	 * unmap unused pages in start area to make them available for DMA
2554 	 */
2555 	while (next_avail_addr < scratch_end) {
2556 		(void) HYPERVISOR_update_va_mapping(next_avail_addr,
2557 		    0, UVMF_INVLPG | UVMF_LOCAL);
2558 		next_avail_addr += MMU_PAGESIZE;
2559 	}
2560 
2561 	bi->bi_xen_start_info = (native_ptr_t)(uintptr_t)xen_info;
2562 	DBG((uintptr_t)HYPERVISOR_shared_info);
2563 	bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info;
2564 	bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base;
2565 
2566 #else /* __xpv */
2567 
2568 	bi->bi_next_paddr = next_avail_addr;
2569 	DBG(bi->bi_next_paddr);
2570 	bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr;
2571 	DBG(bi->bi_next_vaddr);
2572 	bi->bi_mb_version = multiboot_version;
2573 
2574 	switch (multiboot_version) {
2575 	case 1:
2576 		bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb_info;
2577 		break;
2578 	case 2:
2579 		bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb2_info;
2580 		break;
2581 	default:
2582 		dboot_panic("Unknown multiboot version: %d\n",
2583 		    multiboot_version);
2584 		break;
2585 	}
2586 	bi->bi_top_page_table = (uintptr_t)top_page_table;
2587 
2588 #endif /* __xpv */
2589 
2590 	bi->bi_kseg_size = FOUR_MEG;
2591 	DBG(bi->bi_kseg_size);
2592 
2593 #ifndef __xpv
2594 	if (map_debug)
2595 		dump_tables();
2596 #endif
2597 
2598 	DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n");
2599 
2600 #ifndef __xpv
2601 	/* Update boot info with FB data */
2602 	fb->cursor.origin.x = fb_info.cursor.origin.x;
2603 	fb->cursor.origin.y = fb_info.cursor.origin.y;
2604 	fb->cursor.pos.x = fb_info.cursor.pos.x;
2605 	fb->cursor.pos.y = fb_info.cursor.pos.y;
2606 	fb->cursor.visible = fb_info.cursor.visible;
2607 #endif
2608 }
2609