1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 *
26 * Copyright 2020 Joyent, Inc.
27 */
28
29
30 #include <sys/types.h>
31 #include <sys/machparam.h>
32 #include <sys/x86_archext.h>
33 #include <sys/systm.h>
34 #include <sys/mach_mmu.h>
35 #include <sys/multiboot.h>
36 #include <sys/multiboot2.h>
37 #include <sys/multiboot2_impl.h>
38 #include <sys/sysmacros.h>
39 #include <sys/framebuffer.h>
40 #include <sys/sha1.h>
41 #include <util/string.h>
42 #include <util/strtolctype.h>
43 #include <sys/efi.h>
44
45 /*
46 * Compile time debug knob. We do not have any early mechanism to control it
47 * as the boot is the earliest mechanism we have, and we do not want to have
48 * it being switched on by default.
49 */
50 int dboot_debug = 0;
51
52 #if defined(__xpv)
53
54 #include <sys/hypervisor.h>
55 uintptr_t xen_virt_start;
56 pfn_t *mfn_to_pfn_mapping;
57
58 #else /* !__xpv */
59
60 extern multiboot_header_t mb_header;
61 extern uint32_t mb2_load_addr;
62 extern int have_cpuid(void);
63
64 #endif /* !__xpv */
65
66 #include <sys/inttypes.h>
67 #include <sys/bootinfo.h>
68 #include <sys/mach_mmu.h>
69 #include <sys/boot_console.h>
70
71 #include "dboot_asm.h"
72 #include "dboot_printf.h"
73 #include "dboot_xboot.h"
74 #include "dboot_elfload.h"
75
76 #define SHA1_ASCII_LENGTH (SHA1_DIGEST_LENGTH * 2)
77
78 /*
79 * This file contains code that runs to transition us from either a multiboot
80 * compliant loader (32 bit non-paging) or a XPV domain loader to
81 * regular kernel execution. Its task is to setup the kernel memory image
82 * and page tables.
83 *
84 * The code executes as:
85 * - 32 bits under GRUB (for 32 or 64 bit Solaris)
86 * - a 32 bit program for the 32-bit PV hypervisor
87 * - a 64 bit program for the 64-bit PV hypervisor (at least for now)
88 *
89 * Under the PV hypervisor, we must create mappings for any memory beyond the
90 * initial start of day allocation (such as the kernel itself).
91 *
92 * When on the metal, the mapping between maddr_t and paddr_t is 1:1.
93 * Since we are running in real mode, so all such memory is accessible.
94 */
95
96 /*
97 * Standard bits used in PTE (page level) and PTP (internal levels)
98 */
99 x86pte_t ptp_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_USER;
100 x86pte_t pte_bits = PT_VALID | PT_REF | PT_WRITABLE | PT_MOD | PT_NOCONSIST;
101
102 /*
103 * This is the target addresses (physical) where the kernel text and data
104 * nucleus pages will be unpacked. On the hypervisor this is actually a
105 * virtual address.
106 */
107 paddr_t ktext_phys;
108 uint32_t ksize = 2 * FOUR_MEG; /* kernel nucleus is 8Meg */
109
110 static uint64_t target_kernel_text; /* value to use for KERNEL_TEXT */
111
112 /*
113 * The stack is setup in assembler before entering startup_kernel()
114 */
115 char stack_space[STACK_SIZE];
116
117 /*
118 * Used to track physical memory allocation
119 */
120 static paddr_t next_avail_addr = 0;
121
122 #if defined(__xpv)
123 /*
124 * Additional information needed for hypervisor memory allocation.
125 * Only memory up to scratch_end is mapped by page tables.
126 * mfn_base is the start of the hypervisor virtual image. It's ONE_GIG, so
127 * to derive a pfn from a pointer, you subtract mfn_base.
128 */
129
130 static paddr_t scratch_end = 0; /* we can't write all of mem here */
131 static paddr_t mfn_base; /* addr corresponding to mfn_list[0] */
132 start_info_t *xen_info;
133
134 #else /* __xpv */
135
136 /*
137 * If on the metal, then we have a multiboot loader.
138 */
139 uint32_t mb_magic; /* magic from boot loader */
140 uint32_t mb_addr; /* multiboot info package from loader */
141 int multiboot_version;
142 multiboot_info_t *mb_info;
143 multiboot2_info_header_t *mb2_info;
144 int num_entries; /* mmap entry count */
145 boolean_t num_entries_set; /* is mmap entry count set */
146 uintptr_t load_addr;
147 static boot_framebuffer_t framebuffer __aligned(16);
148 static boot_framebuffer_t *fb;
149
150 /* can not be automatic variables because of alignment */
151 static efi_guid_t smbios3 = SMBIOS3_TABLE_GUID;
152 static efi_guid_t smbios = SMBIOS_TABLE_GUID;
153 static efi_guid_t acpi2 = EFI_ACPI_TABLE_GUID;
154 static efi_guid_t acpi1 = ACPI_10_TABLE_GUID;
155 #endif /* __xpv */
156
157 /*
158 * This contains information passed to the kernel
159 */
160 struct xboot_info boot_info __aligned(16);
161 struct xboot_info *bi;
162
163 /*
164 * Page table and memory stuff.
165 */
166 static paddr_t max_mem; /* maximum memory address */
167
168 /*
169 * Information about processor MMU
170 */
171 int amd64_support = 0;
172 int largepage_support = 0;
173 int pae_support = 0;
174 int pge_support = 0;
175 int NX_support = 0;
176 int PAT_support = 0;
177
178 /*
179 * Low 32 bits of kernel entry address passed back to assembler.
180 * When running a 64 bit kernel, the high 32 bits are 0xffffffff.
181 */
182 uint32_t entry_addr_low;
183
184 /*
185 * Memlists for the kernel. We shouldn't need a lot of these.
186 */
187 #define MAX_MEMLIST (50)
188 struct boot_memlist memlists[MAX_MEMLIST];
189 uint_t memlists_used = 0;
190 struct boot_memlist pcimemlists[MAX_MEMLIST];
191 uint_t pcimemlists_used = 0;
192 struct boot_memlist rsvdmemlists[MAX_MEMLIST];
193 uint_t rsvdmemlists_used = 0;
194
195 /*
196 * This should match what's in the bootloader. It's arbitrary, but GRUB
197 * in particular has limitations on how much space it can use before it
198 * stops working properly. This should be enough.
199 */
200 struct boot_modules modules[MAX_BOOT_MODULES];
201 uint_t modules_used = 0;
202
203 #ifdef __xpv
204 /*
205 * Xen strips the size field out of the mb_memory_map_t, see struct e820entry
206 * definition in Xen source.
207 */
208 typedef struct {
209 uint32_t base_addr_low;
210 uint32_t base_addr_high;
211 uint32_t length_low;
212 uint32_t length_high;
213 uint32_t type;
214 } mmap_t;
215
216 /*
217 * There is 512KB of scratch area after the boot stack page.
218 * We'll use that for everything except the kernel nucleus pages which are too
219 * big to fit there and are allocated last anyway.
220 */
221 #define MAXMAPS 100
222 static mmap_t map_buffer[MAXMAPS];
223 #else
224 typedef mb_memory_map_t mmap_t;
225 #endif
226
227 /*
228 * Debugging macros
229 */
230 uint_t prom_debug = 0;
231 uint_t map_debug = 0;
232
233 static char noname[2] = "-";
234
235 /*
236 * Either hypervisor-specific or grub-specific code builds the initial
237 * memlists. This code does the sort/merge/link for final use.
238 */
239 static void
sort_physinstall(void)240 sort_physinstall(void)
241 {
242 int i;
243 #if !defined(__xpv)
244 int j;
245 struct boot_memlist tmp;
246
247 /*
248 * Now sort the memlists, in case they weren't in order.
249 * Yeah, this is a bubble sort; small, simple and easy to get right.
250 */
251 DBG_MSG("Sorting phys-installed list\n");
252 for (j = memlists_used - 1; j > 0; --j) {
253 for (i = 0; i < j; ++i) {
254 if (memlists[i].addr < memlists[i + 1].addr)
255 continue;
256 tmp = memlists[i];
257 memlists[i] = memlists[i + 1];
258 memlists[i + 1] = tmp;
259 }
260 }
261
262 /*
263 * Merge any memlists that don't have holes between them.
264 */
265 for (i = 0; i <= memlists_used - 1; ++i) {
266 if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr)
267 continue;
268
269 if (prom_debug)
270 dboot_printf(
271 "merging mem segs %" PRIx64 "...%" PRIx64
272 " w/ %" PRIx64 "...%" PRIx64 "\n",
273 memlists[i].addr,
274 memlists[i].addr + memlists[i].size,
275 memlists[i + 1].addr,
276 memlists[i + 1].addr + memlists[i + 1].size);
277
278 memlists[i].size += memlists[i + 1].size;
279 for (j = i + 1; j < memlists_used - 1; ++j)
280 memlists[j] = memlists[j + 1];
281 --memlists_used;
282 DBG(memlists_used);
283 --i; /* after merging we need to reexamine, so do this */
284 }
285 #endif /* __xpv */
286
287 if (prom_debug) {
288 dboot_printf("\nFinal memlists:\n");
289 for (i = 0; i < memlists_used; ++i) {
290 dboot_printf("\t%d: addr=%" PRIx64 " size=%"
291 PRIx64 "\n", i, memlists[i].addr, memlists[i].size);
292 }
293 }
294
295 /*
296 * link together the memlists with native size pointers
297 */
298 memlists[0].next = 0;
299 memlists[0].prev = 0;
300 for (i = 1; i < memlists_used; ++i) {
301 memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1);
302 memlists[i].next = 0;
303 memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i);
304 }
305 bi->bi_phys_install = (native_ptr_t)(uintptr_t)memlists;
306 DBG(bi->bi_phys_install);
307 }
308
309 /*
310 * build bios reserved memlists
311 */
312 static void
build_rsvdmemlists(void)313 build_rsvdmemlists(void)
314 {
315 int i;
316
317 rsvdmemlists[0].next = 0;
318 rsvdmemlists[0].prev = 0;
319 for (i = 1; i < rsvdmemlists_used; ++i) {
320 rsvdmemlists[i].prev =
321 (native_ptr_t)(uintptr_t)(rsvdmemlists + i - 1);
322 rsvdmemlists[i].next = 0;
323 rsvdmemlists[i - 1].next =
324 (native_ptr_t)(uintptr_t)(rsvdmemlists + i);
325 }
326 bi->bi_rsvdmem = (native_ptr_t)(uintptr_t)rsvdmemlists;
327 DBG(bi->bi_rsvdmem);
328 }
329
330 #if defined(__xpv)
331
332 /*
333 * halt on the hypervisor after a delay to drain console output
334 */
335 __NORETURN void
dboot_halt(void)336 dboot_halt(void)
337 {
338 uint_t i = 10000;
339
340 while (--i)
341 (void) HYPERVISOR_yield();
342 (void) HYPERVISOR_shutdown(SHUTDOWN_poweroff);
343 /* never reached */
344 for (;;)
345 ;
346 }
347
348 /*
349 * From a machine address, find the corresponding pseudo-physical address.
350 * Pseudo-physical address are contiguous and run from mfn_base in each VM.
351 * Machine addresses are the real underlying hardware addresses.
352 * These are needed for page table entries. Note that this routine is
353 * poorly protected. A bad value of "ma" will cause a page fault.
354 */
355 paddr_t
ma_to_pa(maddr_t ma)356 ma_to_pa(maddr_t ma)
357 {
358 ulong_t pgoff = ma & MMU_PAGEOFFSET;
359 ulong_t pfn = mfn_to_pfn_mapping[mmu_btop(ma)];
360 paddr_t pa;
361
362 if (pfn >= xen_info->nr_pages)
363 return (-(paddr_t)1);
364 pa = mfn_base + mmu_ptob((paddr_t)pfn) + pgoff;
365 #ifdef DEBUG
366 if (ma != pa_to_ma(pa))
367 dboot_printf("ma_to_pa(%" PRIx64 ") got %" PRIx64 ", "
368 "pa_to_ma() says %" PRIx64 "\n", ma, pa, pa_to_ma(pa));
369 #endif
370 return (pa);
371 }
372
373 /*
374 * From a pseudo-physical address, find the corresponding machine address.
375 */
376 maddr_t
pa_to_ma(paddr_t pa)377 pa_to_ma(paddr_t pa)
378 {
379 pfn_t pfn;
380 ulong_t mfn;
381
382 pfn = mmu_btop(pa - mfn_base);
383 if (pa < mfn_base || pfn >= xen_info->nr_pages)
384 dboot_panic("pa_to_ma(): illegal address 0x%lx", (ulong_t)pa);
385 mfn = ((ulong_t *)xen_info->mfn_list)[pfn];
386 #ifdef DEBUG
387 if (mfn_to_pfn_mapping[mfn] != pfn)
388 dboot_printf("pa_to_ma(pfn=%lx) got %lx ma_to_pa() says %lx\n",
389 pfn, mfn, mfn_to_pfn_mapping[mfn]);
390 #endif
391 return (mfn_to_ma(mfn) | (pa & MMU_PAGEOFFSET));
392 }
393
394 #endif /* __xpv */
395
396 x86pte_t
get_pteval(paddr_t table,uint_t index)397 get_pteval(paddr_t table, uint_t index)
398 {
399 if (pae_support)
400 return (((x86pte_t *)(uintptr_t)table)[index]);
401 return (((x86pte32_t *)(uintptr_t)table)[index]);
402 }
403
404 /*ARGSUSED*/
405 void
set_pteval(paddr_t table,uint_t index,uint_t level,x86pte_t pteval)406 set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
407 {
408 #ifdef __xpv
409 mmu_update_t t;
410 maddr_t mtable = pa_to_ma(table);
411 int retcnt;
412
413 t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
414 t.val = pteval;
415 if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
416 dboot_panic("HYPERVISOR_mmu_update() failed");
417 #else /* __xpv */
418 uintptr_t tab_addr = (uintptr_t)table;
419
420 if (pae_support)
421 ((x86pte_t *)tab_addr)[index] = pteval;
422 else
423 ((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval;
424 if (level == top_level && level == 2)
425 reload_cr3();
426 #endif /* __xpv */
427 }
428
429 paddr_t
make_ptable(x86pte_t * pteval,uint_t level)430 make_ptable(x86pte_t *pteval, uint_t level)
431 {
432 paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
433
434 if (level == top_level && level == 2)
435 *pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID;
436 else
437 *pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits;
438
439 #ifdef __xpv
440 /* Remove write permission to the new page table. */
441 if (HYPERVISOR_update_va_mapping(new_table,
442 *pteval & ~(x86pte_t)PT_WRITABLE, UVMF_INVLPG | UVMF_LOCAL))
443 dboot_panic("HYP_update_va_mapping error");
444 #endif
445
446 if (map_debug)
447 dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%"
448 PRIx64 "\n", level, (ulong_t)new_table, *pteval);
449 return (new_table);
450 }
451
452 x86pte_t *
map_pte(paddr_t table,uint_t index)453 map_pte(paddr_t table, uint_t index)
454 {
455 return ((x86pte_t *)(uintptr_t)(table + index * pte_size));
456 }
457
458 /*
459 * dump out the contents of page tables...
460 */
461 static void
dump_tables(void)462 dump_tables(void)
463 {
464 uint_t save_index[4]; /* for recursion */
465 char *save_table[4]; /* for recursion */
466 uint_t l;
467 uint64_t va;
468 uint64_t pgsize;
469 int index;
470 int i;
471 x86pte_t pteval;
472 char *table;
473 static char *tablist = "\t\t\t";
474 char *tabs = tablist + 3 - top_level;
475 uint_t pa, pa1;
476 #if !defined(__xpv)
477 #define maddr_t paddr_t
478 #endif /* !__xpv */
479
480 dboot_printf("Finished pagetables:\n");
481 table = (char *)(uintptr_t)top_page_table;
482 l = top_level;
483 va = 0;
484 for (index = 0; index < ptes_per_table; ++index) {
485 pgsize = 1ull << shift_amt[l];
486 if (pae_support)
487 pteval = ((x86pte_t *)table)[index];
488 else
489 pteval = ((x86pte32_t *)table)[index];
490 if (pteval == 0)
491 goto next_entry;
492
493 dboot_printf("%s %p[0x%x] = %" PRIx64 ", va=%" PRIx64,
494 tabs + l, (void *)table, index, (uint64_t)pteval, va);
495 pa = ma_to_pa(pteval & MMU_PAGEMASK);
496 dboot_printf(" physaddr=%x\n", pa);
497
498 /*
499 * Don't try to walk hypervisor private pagetables
500 */
501 if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) {
502 save_table[l] = table;
503 save_index[l] = index;
504 --l;
505 index = -1;
506 table = (char *)(uintptr_t)
507 ma_to_pa(pteval & MMU_PAGEMASK);
508 goto recursion;
509 }
510
511 /*
512 * shorten dump for consecutive mappings
513 */
514 for (i = 1; index + i < ptes_per_table; ++i) {
515 if (pae_support)
516 pteval = ((x86pte_t *)table)[index + i];
517 else
518 pteval = ((x86pte32_t *)table)[index + i];
519 if (pteval == 0)
520 break;
521 pa1 = ma_to_pa(pteval & MMU_PAGEMASK);
522 if (pa1 != pa + i * pgsize)
523 break;
524 }
525 if (i > 2) {
526 dboot_printf("%s...\n", tabs + l);
527 va += pgsize * (i - 2);
528 index += i - 2;
529 }
530 next_entry:
531 va += pgsize;
532 if (l == 3 && index == 255) /* VA hole */
533 va = 0xffff800000000000ull;
534 recursion:
535 ;
536 }
537 if (l < top_level) {
538 ++l;
539 index = save_index[l];
540 table = save_table[l];
541 goto recursion;
542 }
543 }
544
545 /*
546 * Add a mapping for the machine page at the given virtual address.
547 */
548 static void
map_ma_at_va(maddr_t ma,native_ptr_t va,uint_t level)549 map_ma_at_va(maddr_t ma, native_ptr_t va, uint_t level)
550 {
551 x86pte_t *ptep;
552 x86pte_t pteval;
553
554 pteval = ma | pte_bits;
555 if (level > 0)
556 pteval |= PT_PAGESIZE;
557 if (va >= target_kernel_text && pge_support)
558 pteval |= PT_GLOBAL;
559
560 if (map_debug && ma != va)
561 dboot_printf("mapping ma=0x%" PRIx64 " va=0x%" PRIx64
562 " pte=0x%" PRIx64 " l=%d\n",
563 (uint64_t)ma, (uint64_t)va, pteval, level);
564
565 #if defined(__xpv)
566 /*
567 * see if we can avoid find_pte() on the hypervisor
568 */
569 if (HYPERVISOR_update_va_mapping(va, pteval,
570 UVMF_INVLPG | UVMF_LOCAL) == 0)
571 return;
572 #endif
573
574 /*
575 * Find the pte that will map this address. This creates any
576 * missing intermediate level page tables
577 */
578 ptep = find_pte(va, NULL, level, 0);
579
580 /*
581 * When paravirtualized, we must use hypervisor calls to modify the
582 * PTE, since paging is active. On real hardware we just write to
583 * the pagetables which aren't in use yet.
584 */
585 #if defined(__xpv)
586 ptep = ptep; /* shut lint up */
587 if (HYPERVISOR_update_va_mapping(va, pteval, UVMF_INVLPG | UVMF_LOCAL))
588 dboot_panic("mmu_update failed-map_pa_at_va va=0x%" PRIx64
589 " l=%d ma=0x%" PRIx64 ", pte=0x%" PRIx64 "",
590 (uint64_t)va, level, (uint64_t)ma, pteval);
591 #else
592 if (va < 1024 * 1024)
593 pteval |= PT_NOCACHE; /* for video RAM */
594 if (pae_support)
595 *ptep = pteval;
596 else
597 *((x86pte32_t *)ptep) = (x86pte32_t)pteval;
598 #endif
599 }
600
601 /*
602 * Add a mapping for the physical page at the given virtual address.
603 */
604 static void
map_pa_at_va(paddr_t pa,native_ptr_t va,uint_t level)605 map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level)
606 {
607 map_ma_at_va(pa_to_ma(pa), va, level);
608 }
609
610 /*
611 * This is called to remove start..end from the
612 * possible range of PCI addresses.
613 */
614 const uint64_t pci_lo_limit = 0x00100000ul;
615 const uint64_t pci_hi_limit = 0xfff00000ul;
616 static void
exclude_from_pci(uint64_t start,uint64_t end)617 exclude_from_pci(uint64_t start, uint64_t end)
618 {
619 int i;
620 int j;
621 struct boot_memlist *ml;
622
623 for (i = 0; i < pcimemlists_used; ++i) {
624 ml = &pcimemlists[i];
625
626 /* delete the entire range? */
627 if (start <= ml->addr && ml->addr + ml->size <= end) {
628 --pcimemlists_used;
629 for (j = i; j < pcimemlists_used; ++j)
630 pcimemlists[j] = pcimemlists[j + 1];
631 --i; /* to revisit the new one at this index */
632 }
633
634 /* split a range? */
635 else if (ml->addr < start && end < ml->addr + ml->size) {
636
637 ++pcimemlists_used;
638 if (pcimemlists_used > MAX_MEMLIST)
639 dboot_panic("too many pcimemlists");
640
641 for (j = pcimemlists_used - 1; j > i; --j)
642 pcimemlists[j] = pcimemlists[j - 1];
643 ml->size = start - ml->addr;
644
645 ++ml;
646 ml->size = (ml->addr + ml->size) - end;
647 ml->addr = end;
648 ++i; /* skip on to next one */
649 }
650
651 /* cut memory off the start? */
652 else if (ml->addr < end && end < ml->addr + ml->size) {
653 ml->size -= end - ml->addr;
654 ml->addr = end;
655 }
656
657 /* cut memory off the end? */
658 else if (ml->addr <= start && start < ml->addr + ml->size) {
659 ml->size = start - ml->addr;
660 }
661 }
662 }
663
664 /*
665 * During memory allocation, find the highest address not used yet.
666 */
667 static void
check_higher(paddr_t a)668 check_higher(paddr_t a)
669 {
670 if (a < next_avail_addr)
671 return;
672 next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE);
673 DBG(next_avail_addr);
674 }
675
676 static int
dboot_loader_mmap_entries(void)677 dboot_loader_mmap_entries(void)
678 {
679 #if !defined(__xpv)
680 if (num_entries_set == B_TRUE)
681 return (num_entries);
682
683 switch (multiboot_version) {
684 case 1:
685 DBG(mb_info->flags);
686 if (mb_info->flags & 0x40) {
687 mb_memory_map_t *mmap;
688 caddr32_t mmap_addr;
689
690 DBG(mb_info->mmap_addr);
691 DBG(mb_info->mmap_length);
692 check_higher(mb_info->mmap_addr + mb_info->mmap_length);
693
694 for (mmap_addr = mb_info->mmap_addr;
695 mmap_addr < mb_info->mmap_addr +
696 mb_info->mmap_length;
697 mmap_addr += mmap->size + sizeof (mmap->size)) {
698 mmap = (mb_memory_map_t *)(uintptr_t)mmap_addr;
699 ++num_entries;
700 }
701
702 num_entries_set = B_TRUE;
703 }
704 break;
705 case 2:
706 num_entries = dboot_multiboot2_efi_mmap_nentries(mb2_info);
707 if (num_entries == 0)
708 num_entries = dboot_multiboot2_mmap_nentries(mb2_info);
709 if (num_entries == 0)
710 dboot_panic("No memory map?\n");
711 num_entries_set = B_TRUE;
712 break;
713 default:
714 dboot_panic("Unknown multiboot version: %d\n",
715 multiboot_version);
716 break;
717 }
718 return (num_entries);
719 #else
720 return (MAXMAPS);
721 #endif
722 }
723
724 #if !defined(__xpv)
725 static uint32_t
dboot_efi_to_smap_type(int index,uint32_t type)726 dboot_efi_to_smap_type(int index, uint32_t type)
727 {
728 uint64_t addr;
729
730 /*
731 * ACPI 6.1 tells the lower memory should be reported as
732 * normal memory, so we enforce page 0 type even as
733 * vmware maps it as acpi reclaimable.
734 */
735 if (dboot_multiboot2_efi_mmap_get_base(mb2_info, index, &addr)) {
736 if (addr == 0)
737 return (1);
738 }
739
740 /*
741 * Translate UEFI memory types to SMAP types.
742 * See "ACPI Specification Release 6.5 Errata A"
743 * Table 15-6 (page 785), UEFI Memory Types and mapping to ACPI address
744 * range types.
745 */
746
747 switch (type) {
748 case EfiLoaderCode:
749 case EfiLoaderData:
750 case EfiBootServicesCode:
751 case EfiBootServicesData:
752 case EfiConventionalMemory:
753 return (1);
754 case EfiReservedMemoryType:
755 case EfiRuntimeServicesCode:
756 case EfiRuntimeServicesData:
757 case EfiMemoryMappedIO:
758 case EfiMemoryMappedIOPortSpace:
759 case EfiPalCode:
760 case EfiUnusableMemory:
761 return (2);
762 case EfiACPIReclaimMemory:
763 return (3);
764 case EfiACPIMemoryNVS:
765 return (4);
766 }
767
768 return (2);
769 }
770 #endif
771
772 static uint32_t
dboot_loader_mmap_get_type(int index)773 dboot_loader_mmap_get_type(int index)
774 {
775 #if !defined(__xpv)
776 mb_memory_map_t *mp, *mpend;
777 uint32_t type;
778 int i;
779
780 switch (multiboot_version) {
781 case 1:
782 mp = (mb_memory_map_t *)(uintptr_t)mb_info->mmap_addr;
783 mpend = (mb_memory_map_t *)(uintptr_t)
784 (mb_info->mmap_addr + mb_info->mmap_length);
785
786 for (i = 0; mp < mpend && i != index; i++)
787 mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size +
788 sizeof (mp->size));
789 if (mp >= mpend) {
790 dboot_panic("dboot_loader_mmap_get_type(): index "
791 "out of bounds: %d\n", index);
792 }
793 return (mp->type);
794
795 case 2:
796 if (dboot_multiboot2_efi_mmap_get_type(mb2_info, index, &type))
797 return (dboot_efi_to_smap_type(index, type));
798
799 if (dboot_multiboot2_mmap_get_type(mb2_info, index, &type))
800 return (type);
801
802 dboot_panic("Can not get memory type for %d\n", index);
803
804 default:
805 dboot_panic("Unknown multiboot version: %d\n",
806 multiboot_version);
807 break;
808 }
809 return (0);
810 #else
811 return (map_buffer[index].type);
812 #endif
813 }
814
815 static uint64_t
dboot_loader_mmap_get_base(int index)816 dboot_loader_mmap_get_base(int index)
817 {
818 #if !defined(__xpv)
819 mb_memory_map_t *mp, *mpend;
820 uint64_t base;
821 int i;
822
823 switch (multiboot_version) {
824 case 1:
825 mp = (mb_memory_map_t *)mb_info->mmap_addr;
826 mpend = (mb_memory_map_t *)
827 (mb_info->mmap_addr + mb_info->mmap_length);
828
829 for (i = 0; mp < mpend && i != index; i++)
830 mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size +
831 sizeof (mp->size));
832 if (mp >= mpend) {
833 dboot_panic("dboot_loader_mmap_get_base(): index "
834 "out of bounds: %d\n", index);
835 }
836 return (((uint64_t)mp->base_addr_high << 32) +
837 (uint64_t)mp->base_addr_low);
838
839 case 2:
840 if (dboot_multiboot2_efi_mmap_get_base(mb2_info, index, &base))
841 return (base);
842
843 if (dboot_multiboot2_mmap_get_base(mb2_info, index, &base))
844 return (base);
845
846 dboot_panic("Can not get memory address for %d\n", index);
847
848 default:
849 dboot_panic("Unknown multiboot version: %d\n",
850 multiboot_version);
851 break;
852 }
853 return (0);
854 #else
855 return (((uint64_t)map_buffer[index].base_addr_high << 32) +
856 (uint64_t)map_buffer[index].base_addr_low);
857 #endif
858 }
859
860 static uint64_t
dboot_loader_mmap_get_length(int index)861 dboot_loader_mmap_get_length(int index)
862 {
863 #if !defined(__xpv)
864 mb_memory_map_t *mp, *mpend;
865 uint64_t length;
866 int i;
867
868 switch (multiboot_version) {
869 case 1:
870 mp = (mb_memory_map_t *)mb_info->mmap_addr;
871 mpend = (mb_memory_map_t *)
872 (mb_info->mmap_addr + mb_info->mmap_length);
873
874 for (i = 0; mp < mpend && i != index; i++)
875 mp = (mb_memory_map_t *)((uintptr_t)mp + mp->size +
876 sizeof (mp->size));
877 if (mp >= mpend) {
878 dboot_panic("dboot_loader_mmap_get_length(): index "
879 "out of bounds: %d\n", index);
880 }
881 return (((uint64_t)mp->length_high << 32) +
882 (uint64_t)mp->length_low);
883
884 case 2:
885 if (dboot_multiboot2_efi_mmap_get_length(mb2_info,
886 index, &length))
887 return (length);
888
889 if (dboot_multiboot2_mmap_get_length(mb2_info,
890 index, &length))
891 return (length);
892
893 dboot_panic("Can not get memory length for %d\n", index);
894
895 default:
896 dboot_panic("Unknown multiboot version: %d\n",
897 multiboot_version);
898 break;
899 }
900 return (0);
901 #else
902 return (((uint64_t)map_buffer[index].length_high << 32) +
903 (uint64_t)map_buffer[index].length_low);
904 #endif
905 }
906
907 static void
build_pcimemlists(void)908 build_pcimemlists(void)
909 {
910 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */
911 uint64_t start;
912 uint64_t end;
913 int i, num;
914
915 if (prom_debug)
916 dboot_printf("building pcimemlists:\n");
917 /*
918 * initialize
919 */
920 pcimemlists[0].addr = pci_lo_limit;
921 pcimemlists[0].size = pci_hi_limit - pci_lo_limit;
922 pcimemlists_used = 1;
923
924 num = dboot_loader_mmap_entries();
925 /*
926 * Fill in PCI memlists.
927 */
928 for (i = 0; i < num; ++i) {
929 start = dboot_loader_mmap_get_base(i);
930 end = start + dboot_loader_mmap_get_length(i);
931
932 if (prom_debug)
933 dboot_printf("\ttype: %d %" PRIx64 "..%"
934 PRIx64 "\n", dboot_loader_mmap_get_type(i),
935 start, end);
936
937 /*
938 * page align start and end
939 */
940 start = (start + page_offset) & ~page_offset;
941 end &= ~page_offset;
942 if (end <= start)
943 continue;
944
945 exclude_from_pci(start, end);
946 }
947
948 /*
949 * Finish off the pcimemlist
950 */
951 if (prom_debug) {
952 for (i = 0; i < pcimemlists_used; ++i) {
953 dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%"
954 PRIx64 "\n", pcimemlists[i].addr,
955 pcimemlists[i].addr + pcimemlists[i].size);
956 }
957 }
958 pcimemlists[0].next = 0;
959 pcimemlists[0].prev = 0;
960 for (i = 1; i < pcimemlists_used; ++i) {
961 pcimemlists[i].prev =
962 (native_ptr_t)(uintptr_t)(pcimemlists + i - 1);
963 pcimemlists[i].next = 0;
964 pcimemlists[i - 1].next =
965 (native_ptr_t)(uintptr_t)(pcimemlists + i);
966 }
967 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
968 DBG(bi->bi_pcimem);
969 }
970
971 #if defined(__xpv)
972 /*
973 * Initialize memory allocator stuff from hypervisor-supplied start info.
974 */
975 static void
init_mem_alloc(void)976 init_mem_alloc(void)
977 {
978 int local; /* variables needed to find start region */
979 paddr_t scratch_start;
980 xen_memory_map_t map;
981
982 DBG_MSG("Entered init_mem_alloc()\n");
983
984 /*
985 * Free memory follows the stack. There's at least 512KB of scratch
986 * space, rounded up to at least 2Mb alignment. That should be enough
987 * for the page tables we'll need to build. The nucleus memory is
988 * allocated last and will be outside the addressible range. We'll
989 * switch to new page tables before we unpack the kernel
990 */
991 scratch_start = RNDUP((paddr_t)(uintptr_t)&local, MMU_PAGESIZE);
992 DBG(scratch_start);
993 scratch_end = RNDUP((paddr_t)scratch_start + 512 * 1024, TWO_MEG);
994 DBG(scratch_end);
995
996 /*
997 * For paranoia, leave some space between hypervisor data and ours.
998 * Use 500 instead of 512.
999 */
1000 next_avail_addr = scratch_end - 500 * 1024;
1001 DBG(next_avail_addr);
1002
1003 /*
1004 * The domain builder gives us at most 1 module
1005 */
1006 DBG(xen_info->mod_len);
1007 if (xen_info->mod_len > 0) {
1008 DBG(xen_info->mod_start);
1009 modules[0].bm_addr =
1010 (native_ptr_t)(uintptr_t)xen_info->mod_start;
1011 modules[0].bm_size = xen_info->mod_len;
1012 bi->bi_module_cnt = 1;
1013 bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1014 } else {
1015 bi->bi_module_cnt = 0;
1016 bi->bi_modules = (native_ptr_t)(uintptr_t)NULL;
1017 }
1018 DBG(bi->bi_module_cnt);
1019 DBG(bi->bi_modules);
1020
1021 DBG(xen_info->mfn_list);
1022 DBG(xen_info->nr_pages);
1023 max_mem = (paddr_t)xen_info->nr_pages << MMU_PAGESHIFT;
1024 DBG(max_mem);
1025
1026 /*
1027 * Using pseudo-physical addresses, so only 1 memlist element
1028 */
1029 memlists[0].addr = 0;
1030 DBG(memlists[0].addr);
1031 memlists[0].size = max_mem;
1032 DBG(memlists[0].size);
1033 memlists_used = 1;
1034 DBG(memlists_used);
1035
1036 /*
1037 * finish building physinstall list
1038 */
1039 sort_physinstall();
1040
1041 /*
1042 * build bios reserved memlists
1043 */
1044 build_rsvdmemlists();
1045
1046 if (DOMAIN_IS_INITDOMAIN(xen_info)) {
1047 /*
1048 * build PCI Memory list
1049 */
1050 map.nr_entries = MAXMAPS;
1051 /*LINTED: constant in conditional context*/
1052 set_xen_guest_handle(map.buffer, map_buffer);
1053 if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &map) != 0)
1054 dboot_panic("getting XENMEM_machine_memory_map failed");
1055 build_pcimemlists();
1056 }
1057 }
1058
1059 #else /* !__xpv */
1060
1061 static void
dboot_multiboot1_xboot_consinfo(void)1062 dboot_multiboot1_xboot_consinfo(void)
1063 {
1064 fb->framebuffer = 0;
1065 }
1066
1067 static void
dboot_multiboot2_xboot_consinfo(void)1068 dboot_multiboot2_xboot_consinfo(void)
1069 {
1070 multiboot_tag_framebuffer_t *fbtag;
1071 fbtag = dboot_multiboot2_find_tag(mb2_info,
1072 MULTIBOOT_TAG_TYPE_FRAMEBUFFER);
1073 fb->framebuffer = (uint64_t)(uintptr_t)fbtag;
1074 }
1075
1076 static int
dboot_multiboot_modcount(void)1077 dboot_multiboot_modcount(void)
1078 {
1079 switch (multiboot_version) {
1080 case 1:
1081 return (mb_info->mods_count);
1082
1083 case 2:
1084 return (dboot_multiboot2_modcount(mb2_info));
1085
1086 default:
1087 dboot_panic("Unknown multiboot version: %d\n",
1088 multiboot_version);
1089 break;
1090 }
1091 return (0);
1092 }
1093
1094 static uint32_t
dboot_multiboot_modstart(int index)1095 dboot_multiboot_modstart(int index)
1096 {
1097 switch (multiboot_version) {
1098 case 1:
1099 return (((mb_module_t *)mb_info->mods_addr)[index].mod_start);
1100
1101 case 2:
1102 return (dboot_multiboot2_modstart(mb2_info, index));
1103
1104 default:
1105 dboot_panic("Unknown multiboot version: %d\n",
1106 multiboot_version);
1107 break;
1108 }
1109 return (0);
1110 }
1111
1112 static uint32_t
dboot_multiboot_modend(int index)1113 dboot_multiboot_modend(int index)
1114 {
1115 switch (multiboot_version) {
1116 case 1:
1117 return (((mb_module_t *)mb_info->mods_addr)[index].mod_end);
1118
1119 case 2:
1120 return (dboot_multiboot2_modend(mb2_info, index));
1121
1122 default:
1123 dboot_panic("Unknown multiboot version: %d\n",
1124 multiboot_version);
1125 break;
1126 }
1127 return (0);
1128 }
1129
1130 static char *
dboot_multiboot_modcmdline(int index)1131 dboot_multiboot_modcmdline(int index)
1132 {
1133 switch (multiboot_version) {
1134 case 1:
1135 return ((char *)((mb_module_t *)
1136 mb_info->mods_addr)[index].mod_name);
1137
1138 case 2:
1139 return (dboot_multiboot2_modcmdline(mb2_info, index));
1140
1141 default:
1142 dboot_panic("Unknown multiboot version: %d\n",
1143 multiboot_version);
1144 break;
1145 }
1146 return (0);
1147 }
1148
1149 /*
1150 * Find the modules used by console setup.
1151 * Since we need the console to print early boot messages, the console is set up
1152 * before anything else and therefore we need to pick up the needed modules.
1153 *
1154 * Note, we just will search for and if found, will pass the modules
1155 * to console setup, the proper module list processing will happen later.
1156 * Currently used modules are boot environment and console font.
1157 */
1158 static void
dboot_find_console_modules(void)1159 dboot_find_console_modules(void)
1160 {
1161 int i, modcount;
1162 uint32_t mod_start, mod_end;
1163 char *cmdline;
1164
1165 modcount = dboot_multiboot_modcount();
1166 bi->bi_module_cnt = 0;
1167 for (i = 0; i < modcount; ++i) {
1168 cmdline = dboot_multiboot_modcmdline(i);
1169 if (cmdline == NULL)
1170 continue;
1171
1172 if (strstr(cmdline, "type=console-font") != NULL)
1173 modules[bi->bi_module_cnt].bm_type = BMT_FONT;
1174 else if (strstr(cmdline, "type=environment") != NULL)
1175 modules[bi->bi_module_cnt].bm_type = BMT_ENV;
1176 else
1177 continue;
1178
1179 mod_start = dboot_multiboot_modstart(i);
1180 mod_end = dboot_multiboot_modend(i);
1181 modules[bi->bi_module_cnt].bm_addr =
1182 (native_ptr_t)(uintptr_t)mod_start;
1183 modules[bi->bi_module_cnt].bm_size = mod_end - mod_start;
1184 modules[bi->bi_module_cnt].bm_name =
1185 (native_ptr_t)(uintptr_t)NULL;
1186 modules[bi->bi_module_cnt].bm_hash =
1187 (native_ptr_t)(uintptr_t)NULL;
1188 bi->bi_module_cnt++;
1189 }
1190 if (bi->bi_module_cnt != 0)
1191 bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1192 }
1193
1194 static boolean_t
dboot_multiboot_basicmeminfo(uint32_t * lower,uint32_t * upper)1195 dboot_multiboot_basicmeminfo(uint32_t *lower, uint32_t *upper)
1196 {
1197 boolean_t rv = B_FALSE;
1198
1199 switch (multiboot_version) {
1200 case 1:
1201 if (mb_info->flags & 0x01) {
1202 *lower = mb_info->mem_lower;
1203 *upper = mb_info->mem_upper;
1204 rv = B_TRUE;
1205 }
1206 break;
1207
1208 case 2:
1209 return (dboot_multiboot2_basicmeminfo(mb2_info, lower, upper));
1210
1211 default:
1212 dboot_panic("Unknown multiboot version: %d\n",
1213 multiboot_version);
1214 break;
1215 }
1216 return (rv);
1217 }
1218
1219 static uint8_t
dboot_a2h(char v)1220 dboot_a2h(char v)
1221 {
1222 if (v >= 'a')
1223 return (v - 'a' + 0xa);
1224 else if (v >= 'A')
1225 return (v - 'A' + 0xa);
1226 else if (v >= '0')
1227 return (v - '0');
1228 else
1229 dboot_panic("bad ASCII hex character %c\n", v);
1230
1231 return (0);
1232 }
1233
1234 static void
digest_a2h(const char * ascii,uint8_t * digest)1235 digest_a2h(const char *ascii, uint8_t *digest)
1236 {
1237 unsigned int i;
1238
1239 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1240 digest[i] = dboot_a2h(ascii[i * 2]) << 4;
1241 digest[i] |= dboot_a2h(ascii[i * 2 + 1]);
1242 }
1243 }
1244
1245 /*
1246 * Generate a SHA-1 hash of the first len bytes of image, and compare it with
1247 * the ASCII-format hash found in the 40-byte buffer at ascii. If they
1248 * match, return 0, otherwise -1. This works only for images smaller than
1249 * 4 GB, which should not be a problem.
1250 */
1251 static int
check_image_hash(uint_t midx)1252 check_image_hash(uint_t midx)
1253 {
1254 const char *ascii;
1255 const void *image;
1256 size_t len;
1257 SHA1_CTX ctx;
1258 uint8_t digest[SHA1_DIGEST_LENGTH];
1259 uint8_t baseline[SHA1_DIGEST_LENGTH];
1260 unsigned int i;
1261
1262 ascii = (const char *)(uintptr_t)modules[midx].bm_hash;
1263 image = (const void *)(uintptr_t)modules[midx].bm_addr;
1264 len = (size_t)modules[midx].bm_size;
1265
1266 digest_a2h(ascii, baseline);
1267
1268 SHA1Init(&ctx);
1269 SHA1Update(&ctx, image, len);
1270 SHA1Final(digest, &ctx);
1271
1272 for (i = 0; i < SHA1_DIGEST_LENGTH; i++) {
1273 if (digest[i] != baseline[i])
1274 return (-1);
1275 }
1276
1277 return (0);
1278 }
1279
1280 static const char *
type_to_str(boot_module_type_t type)1281 type_to_str(boot_module_type_t type)
1282 {
1283 switch (type) {
1284 case BMT_ROOTFS:
1285 return ("rootfs");
1286 case BMT_FILE:
1287 return ("file");
1288 case BMT_HASH:
1289 return ("hash");
1290 case BMT_ENV:
1291 return ("environment");
1292 case BMT_FONT:
1293 return ("console-font");
1294 default:
1295 return ("unknown");
1296 }
1297 }
1298
1299 static void
check_images(void)1300 check_images(void)
1301 {
1302 uint_t i;
1303 char displayhash[SHA1_ASCII_LENGTH + 1];
1304
1305 for (i = 0; i < modules_used; i++) {
1306 if (prom_debug) {
1307 dboot_printf("module #%d: name %s type %s "
1308 "addr %lx size %lx\n",
1309 i, (char *)(uintptr_t)modules[i].bm_name,
1310 type_to_str(modules[i].bm_type),
1311 (ulong_t)modules[i].bm_addr,
1312 (ulong_t)modules[i].bm_size);
1313 }
1314
1315 if (modules[i].bm_type == BMT_HASH ||
1316 modules[i].bm_hash == (native_ptr_t)(uintptr_t)NULL) {
1317 DBG_MSG("module has no hash; skipping check\n");
1318 continue;
1319 }
1320 (void) memcpy(displayhash,
1321 (void *)(uintptr_t)modules[i].bm_hash,
1322 SHA1_ASCII_LENGTH);
1323 displayhash[SHA1_ASCII_LENGTH] = '\0';
1324 if (prom_debug) {
1325 dboot_printf("checking expected hash [%s]: ",
1326 displayhash);
1327 }
1328
1329 if (check_image_hash(i) != 0)
1330 dboot_panic("hash mismatch!\n");
1331 else
1332 DBG_MSG("OK\n");
1333 }
1334 }
1335
1336 /*
1337 * Determine the module's starting address, size, name, and type, and fill the
1338 * boot_modules structure. This structure is used by the bop code, except for
1339 * hashes which are checked prior to transferring control to the kernel.
1340 */
1341 static void
process_module(int midx)1342 process_module(int midx)
1343 {
1344 uint32_t mod_start = dboot_multiboot_modstart(midx);
1345 uint32_t mod_end = dboot_multiboot_modend(midx);
1346 char *cmdline = dboot_multiboot_modcmdline(midx);
1347 char *p, *q;
1348
1349 check_higher(mod_end);
1350 if (prom_debug) {
1351 dboot_printf("\tmodule #%d: '%s' at 0x%lx, end 0x%lx\n",
1352 midx, cmdline, (ulong_t)mod_start, (ulong_t)mod_end);
1353 }
1354
1355 if (mod_start > mod_end) {
1356 dboot_panic("module #%d: module start address 0x%lx greater "
1357 "than end address 0x%lx", midx,
1358 (ulong_t)mod_start, (ulong_t)mod_end);
1359 }
1360
1361 /*
1362 * A brief note on lengths and sizes: GRUB, for reasons unknown, passes
1363 * the address of the last valid byte in a module plus 1 as mod_end.
1364 * This is of course a bug; the multiboot specification simply states
1365 * that mod_start and mod_end "contain the start and end addresses of
1366 * the boot module itself" which is pretty obviously not what GRUB is
1367 * doing. However, fixing it requires that not only this code be
1368 * changed but also that other code consuming this value and values
1369 * derived from it be fixed, and that the kernel and GRUB must either
1370 * both have the bug or neither. While there are a lot of combinations
1371 * that will work, there are also some that won't, so for simplicity
1372 * we'll just cope with the bug. That means we won't actually hash the
1373 * byte at mod_end, and we will expect that mod_end for the hash file
1374 * itself is one greater than some multiple of 41 (40 bytes of ASCII
1375 * hash plus a newline for each module). We set bm_size to the true
1376 * correct number of bytes in each module, achieving exactly this.
1377 */
1378
1379 modules[midx].bm_addr = (native_ptr_t)(uintptr_t)mod_start;
1380 modules[midx].bm_size = mod_end - mod_start;
1381 modules[midx].bm_name = (native_ptr_t)(uintptr_t)cmdline;
1382 modules[midx].bm_hash = (native_ptr_t)(uintptr_t)NULL;
1383 modules[midx].bm_type = BMT_FILE;
1384
1385 if (cmdline == NULL) {
1386 modules[midx].bm_name = (native_ptr_t)(uintptr_t)noname;
1387 return;
1388 }
1389
1390 p = cmdline;
1391 modules[midx].bm_name =
1392 (native_ptr_t)(uintptr_t)strsep(&p, " \t\f\n\r");
1393
1394 while (p != NULL) {
1395 q = strsep(&p, " \t\f\n\r");
1396 if (strncmp(q, "name=", 5) == 0) {
1397 if (q[5] != '\0' && !isspace(q[5])) {
1398 modules[midx].bm_name =
1399 (native_ptr_t)(uintptr_t)(q + 5);
1400 }
1401 continue;
1402 }
1403
1404 if (strncmp(q, "type=", 5) == 0) {
1405 if (q[5] == '\0' || isspace(q[5]))
1406 continue;
1407 q += 5;
1408 if (strcmp(q, "rootfs") == 0) {
1409 modules[midx].bm_type = BMT_ROOTFS;
1410 } else if (strcmp(q, "hash") == 0) {
1411 modules[midx].bm_type = BMT_HASH;
1412 } else if (strcmp(q, "environment") == 0) {
1413 modules[midx].bm_type = BMT_ENV;
1414 } else if (strcmp(q, "console-font") == 0) {
1415 modules[midx].bm_type = BMT_FONT;
1416 } else if (strcmp(q, "file") != 0) {
1417 dboot_printf("\tmodule #%d: unknown module "
1418 "type '%s'; defaulting to 'file'\n",
1419 midx, q);
1420 }
1421 continue;
1422 }
1423
1424 if (strncmp(q, "hash=", 5) == 0) {
1425 if (q[5] != '\0' && !isspace(q[5])) {
1426 modules[midx].bm_hash =
1427 (native_ptr_t)(uintptr_t)(q + 5);
1428 }
1429 continue;
1430 }
1431
1432 dboot_printf("ignoring unknown option '%s'\n", q);
1433 }
1434 }
1435
1436 /*
1437 * Backward compatibility: if there are exactly one or two modules, both
1438 * of type 'file' and neither with an embedded hash value, we have been
1439 * given the legacy style modules. In this case we need to treat the first
1440 * module as a rootfs and the second as a hash referencing that module.
1441 * Otherwise, even if the configuration is invalid, we assume that the
1442 * operator knows what he's doing or at least isn't being bitten by this
1443 * interface change.
1444 */
1445 static void
fixup_modules(void)1446 fixup_modules(void)
1447 {
1448 if (modules_used == 0 || modules_used > 2)
1449 return;
1450
1451 if (modules[0].bm_type != BMT_FILE ||
1452 (modules_used > 1 && modules[1].bm_type != BMT_FILE)) {
1453 return;
1454 }
1455
1456 if (modules[0].bm_hash != (native_ptr_t)(uintptr_t)NULL ||
1457 (modules_used > 1 &&
1458 modules[1].bm_hash != (native_ptr_t)(uintptr_t)NULL)) {
1459 return;
1460 }
1461
1462 modules[0].bm_type = BMT_ROOTFS;
1463 if (modules_used > 1) {
1464 modules[1].bm_type = BMT_HASH;
1465 modules[1].bm_name = modules[0].bm_name;
1466 }
1467 }
1468
1469 /*
1470 * For modules that do not have assigned hashes but have a separate hash module,
1471 * find the assigned hash module and set the primary module's bm_hash to point
1472 * to the hash data from that module. We will then ignore modules of type
1473 * BMT_HASH from this point forward.
1474 */
1475 static void
assign_module_hashes(void)1476 assign_module_hashes(void)
1477 {
1478 uint_t i, j;
1479
1480 for (i = 0; i < modules_used; i++) {
1481 if (modules[i].bm_type == BMT_HASH ||
1482 modules[i].bm_hash != (native_ptr_t)(uintptr_t)NULL) {
1483 continue;
1484 }
1485
1486 for (j = 0; j < modules_used; j++) {
1487 if (modules[j].bm_type != BMT_HASH ||
1488 strcmp((char *)(uintptr_t)modules[j].bm_name,
1489 (char *)(uintptr_t)modules[i].bm_name) != 0) {
1490 continue;
1491 }
1492
1493 if (modules[j].bm_size < SHA1_ASCII_LENGTH) {
1494 dboot_printf("Short hash module of length "
1495 "0x%lx bytes; ignoring\n",
1496 (ulong_t)modules[j].bm_size);
1497 } else {
1498 modules[i].bm_hash = modules[j].bm_addr;
1499 }
1500 break;
1501 }
1502 }
1503 }
1504
1505 /*
1506 * Walk through the module information finding the last used address.
1507 * The first available address will become the top level page table.
1508 */
1509 static void
dboot_process_modules(void)1510 dboot_process_modules(void)
1511 {
1512 int i, modcount;
1513 extern char _end[];
1514
1515 DBG_MSG("\nFinding Modules\n");
1516 modcount = dboot_multiboot_modcount();
1517 if (modcount > MAX_BOOT_MODULES) {
1518 dboot_panic("Too many modules (%d) -- the maximum is %d.",
1519 modcount, MAX_BOOT_MODULES);
1520 }
1521 /*
1522 * search the modules to find the last used address
1523 * we'll build the module list while we're walking through here
1524 */
1525 check_higher((paddr_t)(uintptr_t)&_end);
1526 for (i = 0; i < modcount; ++i) {
1527 process_module(i);
1528 modules_used++;
1529 }
1530 bi->bi_modules = (native_ptr_t)(uintptr_t)modules;
1531 DBG(bi->bi_modules);
1532 bi->bi_module_cnt = modcount;
1533 DBG(bi->bi_module_cnt);
1534
1535 fixup_modules();
1536 assign_module_hashes();
1537 check_images();
1538 }
1539
1540 /*
1541 * We then build the phys_install memlist from the multiboot information.
1542 */
1543 static void
dboot_process_mmap(void)1544 dboot_process_mmap(void)
1545 {
1546 uint64_t start;
1547 uint64_t end;
1548 uint64_t page_offset = MMU_PAGEOFFSET; /* needs to be 64 bits */
1549 uint32_t lower, upper, type, t;
1550 int i, mmap_entries;
1551
1552 /*
1553 * Walk through the memory map from multiboot and build our memlist
1554 * structures. Note these will have native format pointers.
1555 */
1556 DBG_MSG("\nFinding Memory Map\n");
1557 num_entries = 0;
1558 num_entries_set = B_FALSE;
1559 max_mem = 0;
1560 t = 0;
1561 if ((mmap_entries = dboot_loader_mmap_entries()) > 0) {
1562 struct boot_memlist *mlist;
1563 uint_t *indexp;
1564
1565 for (i = 0; i < mmap_entries; i++) {
1566 start = dboot_loader_mmap_get_base(i);
1567 end = start + dboot_loader_mmap_get_length(i);
1568 type = dboot_loader_mmap_get_type(i);
1569
1570 if (prom_debug)
1571 dboot_printf("\ttype: %u %" PRIx64 "..%"
1572 PRIx64 "\n", type, start, end);
1573
1574 /*
1575 * page align start and end
1576 */
1577 start = (start + page_offset) & ~page_offset;
1578 end &= ~page_offset;
1579 if (end <= start)
1580 continue;
1581
1582 /*
1583 * only type 1 is usable RAM
1584 */
1585 switch (type) {
1586 case 1:
1587 if (end > max_mem)
1588 max_mem = end;
1589 mlist = memlists;
1590 indexp = &memlists_used;
1591 break;
1592 case 2:
1593 mlist = rsvdmemlists;
1594 indexp = &rsvdmemlists_used;
1595 break;
1596 default:
1597 continue;
1598 }
1599
1600 if (memlists_used > MAX_MEMLIST)
1601 dboot_panic("too many memlists");
1602 if (rsvdmemlists_used > MAX_MEMLIST)
1603 dboot_panic("too many rsvdmemlists");
1604
1605 if (mlist[*indexp].size != 0 &&
1606 type == t &&
1607 (mlist[*indexp].addr +
1608 mlist[*indexp].size) == start) {
1609 mlist[*indexp].size =
1610 end - mlist[*indexp].addr;
1611 continue;
1612 }
1613 /* do we need new entry? */
1614 if (mlist[*indexp].size != 0) {
1615 *indexp = *indexp + 1;
1616 if (*indexp > MAX_MEMLIST)
1617 continue;
1618 }
1619
1620 t = type;
1621 mlist[*indexp].addr = start;
1622 mlist[*indexp].size = end - start;
1623 }
1624
1625 if (memlists[memlists_used].size != 0) {
1626 memlists_used++;
1627 }
1628 if (rsvdmemlists[rsvdmemlists_used].size != 0) {
1629 rsvdmemlists_used++;
1630 }
1631
1632 if (prom_debug) {
1633 for (i = 0; i < memlists_used; i++) {
1634 dboot_printf("memlists[%u] %"
1635 PRIx64 "..%" PRIx64 "\n",
1636 i,
1637 memlists[i].addr,
1638 memlists[i].size);
1639 }
1640 for (i = 0; i < rsvdmemlists_used; i++) {
1641 dboot_printf("rsvdmemlists[%u] %"
1642 PRIx64 "..%" PRIx64 "\n",
1643 i,
1644 rsvdmemlists[i].addr,
1645 rsvdmemlists[i].size);
1646 }
1647 }
1648
1649 build_pcimemlists();
1650 } else if (dboot_multiboot_basicmeminfo(&lower, &upper)) {
1651 DBG(lower);
1652 memlists[memlists_used].addr = 0;
1653 memlists[memlists_used].size = lower * 1024;
1654 ++memlists_used;
1655 DBG(upper);
1656 memlists[memlists_used].addr = 1024 * 1024;
1657 memlists[memlists_used].size = upper * 1024;
1658 ++memlists_used;
1659
1660 /*
1661 * Old platform - assume I/O space at the end of memory.
1662 */
1663 pcimemlists[0].addr = (upper * 1024) + (1024 * 1024);
1664 pcimemlists[0].size = pci_hi_limit - pcimemlists[0].addr;
1665 pcimemlists[0].next = 0;
1666 pcimemlists[0].prev = 0;
1667 bi->bi_pcimem = (native_ptr_t)(uintptr_t)pcimemlists;
1668 DBG(bi->bi_pcimem);
1669 } else {
1670 dboot_panic("No memory info from boot loader!!!");
1671 }
1672
1673 /*
1674 * finish processing the physinstall list
1675 */
1676 sort_physinstall();
1677
1678 /*
1679 * build bios reserved mem lists
1680 */
1681 build_rsvdmemlists();
1682 }
1683
1684 /*
1685 * The highest address is used as the starting point for dboot's simple
1686 * memory allocator.
1687 *
1688 * Finding the highest address in case of Multiboot 1 protocol is
1689 * quite painful in the sense that some information provided by
1690 * the multiboot info structure points to BIOS data, and some to RAM.
1691 *
1692 * The module list was processed and checked already by dboot_process_modules(),
1693 * so we will check the command line string and the memory map.
1694 *
1695 * This list of to be checked items is based on our current knowledge of
1696 * allocations made by grub1 and will need to be reviewed if there
1697 * are updates about the information provided by Multiboot 1.
1698 *
1699 * In the case of the Multiboot 2, our life is much simpler, as the MB2
1700 * information tag list is one contiguous chunk of memory.
1701 */
1702 static paddr_t
dboot_multiboot1_highest_addr(void)1703 dboot_multiboot1_highest_addr(void)
1704 {
1705 paddr_t addr = (paddr_t)(uintptr_t)NULL;
1706 char *cmdl = (char *)mb_info->cmdline;
1707
1708 if (mb_info->flags & MB_INFO_CMDLINE)
1709 addr = ((paddr_t)((uintptr_t)cmdl + strlen(cmdl) + 1));
1710
1711 if (mb_info->flags & MB_INFO_MEM_MAP)
1712 addr = MAX(addr,
1713 ((paddr_t)(mb_info->mmap_addr + mb_info->mmap_length)));
1714 return (addr);
1715 }
1716
1717 static void
dboot_multiboot_highest_addr(void)1718 dboot_multiboot_highest_addr(void)
1719 {
1720 paddr_t addr;
1721
1722 switch (multiboot_version) {
1723 case 1:
1724 addr = dboot_multiboot1_highest_addr();
1725 if (addr != (paddr_t)(uintptr_t)NULL)
1726 check_higher(addr);
1727 break;
1728 case 2:
1729 addr = dboot_multiboot2_highest_addr(mb2_info);
1730 if (addr != (paddr_t)(uintptr_t)NULL)
1731 check_higher(addr);
1732 break;
1733 default:
1734 dboot_panic("Unknown multiboot version: %d\n",
1735 multiboot_version);
1736 break;
1737 }
1738 }
1739
1740 /*
1741 * Walk the boot loader provided information and find the highest free address.
1742 */
1743 static void
init_mem_alloc(void)1744 init_mem_alloc(void)
1745 {
1746 DBG_MSG("Entered init_mem_alloc()\n");
1747 dboot_process_modules();
1748 dboot_process_mmap();
1749 dboot_multiboot_highest_addr();
1750 }
1751
1752 static int
dboot_same_guids(efi_guid_t * g1,efi_guid_t * g2)1753 dboot_same_guids(efi_guid_t *g1, efi_guid_t *g2)
1754 {
1755 int i;
1756
1757 if (g1->time_low != g2->time_low)
1758 return (0);
1759 if (g1->time_mid != g2->time_mid)
1760 return (0);
1761 if (g1->time_hi_and_version != g2->time_hi_and_version)
1762 return (0);
1763 if (g1->clock_seq_hi_and_reserved != g2->clock_seq_hi_and_reserved)
1764 return (0);
1765 if (g1->clock_seq_low != g2->clock_seq_low)
1766 return (0);
1767
1768 for (i = 0; i < 6; i++) {
1769 if (g1->node_addr[i] != g2->node_addr[i])
1770 return (0);
1771 }
1772 return (1);
1773 }
1774
1775 static void
process_efi32(EFI_SYSTEM_TABLE32 * efi)1776 process_efi32(EFI_SYSTEM_TABLE32 *efi)
1777 {
1778 uint32_t entries;
1779 EFI_CONFIGURATION_TABLE32 *config;
1780 efi_guid_t VendorGuid;
1781 int i;
1782
1783 entries = efi->NumberOfTableEntries;
1784 config = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t)
1785 efi->ConfigurationTable;
1786
1787 for (i = 0; i < entries; i++) {
1788 (void) memcpy(&VendorGuid, &config[i].VendorGuid,
1789 sizeof (VendorGuid));
1790 if (dboot_same_guids(&VendorGuid, &smbios3)) {
1791 bi->bi_smbios = (native_ptr_t)(uintptr_t)
1792 config[i].VendorTable;
1793 }
1794 if (bi->bi_smbios == 0 &&
1795 dboot_same_guids(&VendorGuid, &smbios)) {
1796 bi->bi_smbios = (native_ptr_t)(uintptr_t)
1797 config[i].VendorTable;
1798 }
1799 if (dboot_same_guids(&VendorGuid, &acpi2)) {
1800 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1801 config[i].VendorTable;
1802 }
1803 if (bi->bi_acpi_rsdp == 0 &&
1804 dboot_same_guids(&VendorGuid, &acpi1)) {
1805 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1806 config[i].VendorTable;
1807 }
1808 }
1809 }
1810
1811 static void
process_efi64(EFI_SYSTEM_TABLE64 * efi)1812 process_efi64(EFI_SYSTEM_TABLE64 *efi)
1813 {
1814 uint64_t entries;
1815 EFI_CONFIGURATION_TABLE64 *config;
1816 efi_guid_t VendorGuid;
1817 int i;
1818
1819 entries = efi->NumberOfTableEntries;
1820 config = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t)
1821 efi->ConfigurationTable;
1822
1823 for (i = 0; i < entries; i++) {
1824 (void) memcpy(&VendorGuid, &config[i].VendorGuid,
1825 sizeof (VendorGuid));
1826 if (dboot_same_guids(&VendorGuid, &smbios3)) {
1827 bi->bi_smbios = (native_ptr_t)(uintptr_t)
1828 config[i].VendorTable;
1829 }
1830 if (bi->bi_smbios == 0 &&
1831 dboot_same_guids(&VendorGuid, &smbios)) {
1832 bi->bi_smbios = (native_ptr_t)(uintptr_t)
1833 config[i].VendorTable;
1834 }
1835 /* Prefer acpi v2+ over v1. */
1836 if (dboot_same_guids(&VendorGuid, &acpi2)) {
1837 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1838 config[i].VendorTable;
1839 }
1840 if (bi->bi_acpi_rsdp == 0 &&
1841 dboot_same_guids(&VendorGuid, &acpi1)) {
1842 bi->bi_acpi_rsdp = (native_ptr_t)(uintptr_t)
1843 config[i].VendorTable;
1844 }
1845 }
1846 }
1847
1848 static void
dboot_multiboot_get_fwtables(void)1849 dboot_multiboot_get_fwtables(void)
1850 {
1851 multiboot_tag_new_acpi_t *nacpitagp;
1852 multiboot_tag_old_acpi_t *oacpitagp;
1853 multiboot_tag_efi64_t *efi64tagp = NULL;
1854 multiboot_tag_efi32_t *efi32tagp = NULL;
1855
1856 /* no fw tables from multiboot 1 */
1857 if (multiboot_version != 2)
1858 return;
1859
1860 efi64tagp = (multiboot_tag_efi64_t *)
1861 dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_EFI64);
1862 if (efi64tagp != NULL) {
1863 bi->bi_uefi_arch = XBI_UEFI_ARCH_64;
1864 bi->bi_uefi_systab = (native_ptr_t)(uintptr_t)
1865 efi64tagp->mb_pointer;
1866 process_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t)
1867 efi64tagp->mb_pointer);
1868 } else {
1869 efi32tagp = (multiboot_tag_efi32_t *)
1870 dboot_multiboot2_find_tag(mb2_info,
1871 MULTIBOOT_TAG_TYPE_EFI32);
1872 if (efi32tagp != NULL) {
1873 bi->bi_uefi_arch = XBI_UEFI_ARCH_32;
1874 bi->bi_uefi_systab = (native_ptr_t)(uintptr_t)
1875 efi32tagp->mb_pointer;
1876 process_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t)
1877 efi32tagp->mb_pointer);
1878 }
1879 }
1880
1881 /*
1882 * The multiboot2 info contains a copy of the RSDP; stash a pointer to
1883 * it (see find_rsdp() in fakebop).
1884 */
1885 nacpitagp = (multiboot_tag_new_acpi_t *)
1886 dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_ACPI_NEW);
1887 oacpitagp = (multiboot_tag_old_acpi_t *)
1888 dboot_multiboot2_find_tag(mb2_info, MULTIBOOT_TAG_TYPE_ACPI_OLD);
1889
1890 if (nacpitagp != NULL) {
1891 bi->bi_acpi_rsdp_copy = (native_ptr_t)(uintptr_t)
1892 &nacpitagp->mb_rsdp[0];
1893 } else if (oacpitagp != NULL) {
1894 bi->bi_acpi_rsdp_copy = (native_ptr_t)(uintptr_t)
1895 &oacpitagp->mb_rsdp[0];
1896 }
1897 }
1898
1899 /* print out EFI version string with newline */
1900 static void
dboot_print_efi_version(uint32_t ver)1901 dboot_print_efi_version(uint32_t ver)
1902 {
1903 int rev;
1904
1905 dboot_printf("%d.", EFI_REV_MAJOR(ver));
1906
1907 rev = EFI_REV_MINOR(ver);
1908 if ((rev % 10) != 0) {
1909 dboot_printf("%d.%d\n", rev / 10, rev % 10);
1910 } else {
1911 dboot_printf("%d\n", rev / 10);
1912 }
1913 }
1914
1915 static void
print_efi32(EFI_SYSTEM_TABLE32 * efi)1916 print_efi32(EFI_SYSTEM_TABLE32 *efi)
1917 {
1918 uint16_t *data;
1919 EFI_CONFIGURATION_TABLE32 *conf;
1920 int i;
1921
1922 dboot_printf("EFI32 signature: %llx\n",
1923 (unsigned long long)efi->Hdr.Signature);
1924 dboot_printf("EFI system version: ");
1925 dboot_print_efi_version(efi->Hdr.Revision);
1926 dboot_printf("EFI system vendor: ");
1927 data = (uint16_t *)(uintptr_t)efi->FirmwareVendor;
1928 for (i = 0; data[i] != 0; i++)
1929 dboot_printf("%c", (char)data[i]);
1930 dboot_printf("\nEFI firmware revision: ");
1931 dboot_print_efi_version(efi->FirmwareRevision);
1932 dboot_printf("EFI system table number of entries: %d\n",
1933 efi->NumberOfTableEntries);
1934 conf = (EFI_CONFIGURATION_TABLE32 *)(uintptr_t)
1935 efi->ConfigurationTable;
1936 for (i = 0; i < (int)efi->NumberOfTableEntries; i++) {
1937 dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i,
1938 conf[i].VendorGuid.time_low,
1939 conf[i].VendorGuid.time_mid,
1940 conf[i].VendorGuid.time_hi_and_version,
1941 conf[i].VendorGuid.clock_seq_hi_and_reserved,
1942 conf[i].VendorGuid.clock_seq_low);
1943 dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1944 conf[i].VendorGuid.node_addr[0],
1945 conf[i].VendorGuid.node_addr[1],
1946 conf[i].VendorGuid.node_addr[2],
1947 conf[i].VendorGuid.node_addr[3],
1948 conf[i].VendorGuid.node_addr[4],
1949 conf[i].VendorGuid.node_addr[5]);
1950 }
1951 }
1952
1953 static void
print_efi64(EFI_SYSTEM_TABLE64 * efi)1954 print_efi64(EFI_SYSTEM_TABLE64 *efi)
1955 {
1956 uint16_t *data;
1957 EFI_CONFIGURATION_TABLE64 *conf;
1958 int i;
1959
1960 dboot_printf("EFI64 signature: %llx\n",
1961 (unsigned long long)efi->Hdr.Signature);
1962 dboot_printf("EFI system version: ");
1963 dboot_print_efi_version(efi->Hdr.Revision);
1964 dboot_printf("EFI system vendor: ");
1965 data = (uint16_t *)(uintptr_t)efi->FirmwareVendor;
1966 for (i = 0; data[i] != 0; i++)
1967 dboot_printf("%c", (char)data[i]);
1968 dboot_printf("\nEFI firmware revision: ");
1969 dboot_print_efi_version(efi->FirmwareRevision);
1970 dboot_printf("EFI system table number of entries: %" PRIu64 "\n",
1971 efi->NumberOfTableEntries);
1972 conf = (EFI_CONFIGURATION_TABLE64 *)(uintptr_t)
1973 efi->ConfigurationTable;
1974 for (i = 0; i < (int)efi->NumberOfTableEntries; i++) {
1975 dboot_printf("%d: 0x%x 0x%x 0x%x 0x%x 0x%x", i,
1976 conf[i].VendorGuid.time_low,
1977 conf[i].VendorGuid.time_mid,
1978 conf[i].VendorGuid.time_hi_and_version,
1979 conf[i].VendorGuid.clock_seq_hi_and_reserved,
1980 conf[i].VendorGuid.clock_seq_low);
1981 dboot_printf(" 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
1982 conf[i].VendorGuid.node_addr[0],
1983 conf[i].VendorGuid.node_addr[1],
1984 conf[i].VendorGuid.node_addr[2],
1985 conf[i].VendorGuid.node_addr[3],
1986 conf[i].VendorGuid.node_addr[4],
1987 conf[i].VendorGuid.node_addr[5]);
1988 }
1989 }
1990 #endif /* !__xpv */
1991
1992 /*
1993 * Simple memory allocator, allocates aligned physical memory.
1994 * Note that startup_kernel() only allocates memory, never frees.
1995 * Memory usage just grows in an upward direction.
1996 */
1997 static void *
do_mem_alloc(uint32_t size,uint32_t align)1998 do_mem_alloc(uint32_t size, uint32_t align)
1999 {
2000 uint_t i;
2001 uint64_t best;
2002 uint64_t start;
2003 uint64_t end;
2004
2005 /*
2006 * make sure size is a multiple of pagesize
2007 */
2008 size = RNDUP(size, MMU_PAGESIZE);
2009 next_avail_addr = RNDUP(next_avail_addr, align);
2010
2011 /*
2012 * XXPV fixme joe
2013 *
2014 * a really large bootarchive that causes you to run out of memory
2015 * may cause this to blow up
2016 */
2017 /* LINTED E_UNEXPECTED_UINT_PROMOTION */
2018 best = (uint64_t)-size;
2019 for (i = 0; i < memlists_used; ++i) {
2020 start = memlists[i].addr;
2021 #if defined(__xpv)
2022 start += mfn_base;
2023 #endif
2024 end = start + memlists[i].size;
2025
2026 /*
2027 * did we find the desired address?
2028 */
2029 if (start <= next_avail_addr && next_avail_addr + size <= end) {
2030 best = next_avail_addr;
2031 goto done;
2032 }
2033
2034 /*
2035 * if not is this address the best so far?
2036 */
2037 if (start > next_avail_addr && start < best &&
2038 RNDUP(start, align) + size <= end)
2039 best = RNDUP(start, align);
2040 }
2041
2042 /*
2043 * We didn't find exactly the address we wanted, due to going off the
2044 * end of a memory region. Return the best found memory address.
2045 */
2046 done:
2047 next_avail_addr = best + size;
2048 #if defined(__xpv)
2049 if (next_avail_addr > scratch_end)
2050 dboot_panic("Out of mem next_avail: 0x%lx, scratch_end: "
2051 "0x%lx", (ulong_t)next_avail_addr,
2052 (ulong_t)scratch_end);
2053 #endif
2054 (void) memset((void *)(uintptr_t)best, 0, size);
2055 return ((void *)(uintptr_t)best);
2056 }
2057
2058 void *
mem_alloc(uint32_t size)2059 mem_alloc(uint32_t size)
2060 {
2061 return (do_mem_alloc(size, MMU_PAGESIZE));
2062 }
2063
2064
2065 /*
2066 * Build page tables to map all of memory used so far as well as the kernel.
2067 */
2068 static void
build_page_tables(void)2069 build_page_tables(void)
2070 {
2071 uint32_t psize;
2072 uint32_t level;
2073 uint32_t off;
2074 uint64_t start;
2075 #if !defined(__xpv)
2076 uint32_t i;
2077 uint64_t end;
2078 #endif /* __xpv */
2079
2080 /*
2081 * If we're on metal, we need to create the top level pagetable.
2082 */
2083 #if defined(__xpv)
2084 top_page_table = (paddr_t)(uintptr_t)xen_info->pt_base;
2085 #else /* __xpv */
2086 top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
2087 #endif /* __xpv */
2088 DBG((uintptr_t)top_page_table);
2089
2090 /*
2091 * Determine if we'll use large mappings for kernel, then map it.
2092 */
2093 if (largepage_support) {
2094 psize = lpagesize;
2095 level = 1;
2096 } else {
2097 psize = MMU_PAGESIZE;
2098 level = 0;
2099 }
2100
2101 DBG_MSG("Mapping kernel\n");
2102 DBG(ktext_phys);
2103 DBG(target_kernel_text);
2104 DBG(ksize);
2105 DBG(psize);
2106 for (off = 0; off < ksize; off += psize)
2107 map_pa_at_va(ktext_phys + off, target_kernel_text + off, level);
2108
2109 /*
2110 * The kernel will need a 1 page window to work with page tables
2111 */
2112 bi->bi_pt_window = (native_ptr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
2113 DBG(bi->bi_pt_window);
2114 bi->bi_pte_to_pt_window =
2115 (native_ptr_t)(uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0);
2116 DBG(bi->bi_pte_to_pt_window);
2117
2118 #if defined(__xpv)
2119 if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
2120 /* If this is a domU we're done. */
2121 DBG_MSG("\nPage tables constructed\n");
2122 return;
2123 }
2124 #endif /* __xpv */
2125
2126 /*
2127 * We need 1:1 mappings for the lower 1M of memory to access
2128 * BIOS tables used by a couple of drivers during boot.
2129 *
2130 * The following code works because our simple memory allocator
2131 * only grows usage in an upwards direction.
2132 *
2133 * Note that by this point in boot some mappings for low memory
2134 * may already exist because we've already accessed device in low
2135 * memory. (Specifically the video frame buffer and keyboard
2136 * status ports.) If we're booting on raw hardware then GRUB
2137 * created these mappings for us. If we're booting under a
2138 * hypervisor then we went ahead and remapped these devices into
2139 * memory allocated within dboot itself.
2140 */
2141 if (map_debug)
2142 dboot_printf("1:1 map pa=0..1Meg\n");
2143 for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE) {
2144 #if defined(__xpv)
2145 map_ma_at_va(start, start, 0);
2146 #else /* __xpv */
2147 map_pa_at_va(start, start, 0);
2148 #endif /* __xpv */
2149 }
2150
2151 #if !defined(__xpv)
2152
2153 for (i = 0; i < memlists_used; ++i) {
2154 start = memlists[i].addr;
2155 end = start + memlists[i].size;
2156
2157 if (map_debug)
2158 dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
2159 start, end);
2160 while (start < end && start < next_avail_addr) {
2161 map_pa_at_va(start, start, 0);
2162 start += MMU_PAGESIZE;
2163 }
2164 if (start >= next_avail_addr)
2165 break;
2166 }
2167
2168 /*
2169 * Map framebuffer memory as PT_NOCACHE as this is memory from a
2170 * device and therefore must not be cached.
2171 */
2172 if (fb != NULL && fb->framebuffer != 0) {
2173 multiboot_tag_framebuffer_t *fb_tagp;
2174 fb_tagp = (multiboot_tag_framebuffer_t *)(uintptr_t)
2175 fb->framebuffer;
2176
2177 start = fb_tagp->framebuffer_common.framebuffer_addr;
2178 end = start + fb_tagp->framebuffer_common.framebuffer_height *
2179 fb_tagp->framebuffer_common.framebuffer_pitch;
2180
2181 if (map_debug)
2182 dboot_printf("FB 1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
2183 start, end);
2184 pte_bits |= PT_NOCACHE;
2185 if (PAT_support != 0)
2186 pte_bits |= PT_PAT_4K;
2187
2188 while (start < end) {
2189 map_pa_at_va(start, start, 0);
2190 start += MMU_PAGESIZE;
2191 }
2192 pte_bits &= ~PT_NOCACHE;
2193 if (PAT_support != 0)
2194 pte_bits &= ~PT_PAT_4K;
2195 }
2196 #endif /* !__xpv */
2197
2198 DBG_MSG("\nPage tables constructed\n");
2199 }
2200
2201 #define NO_MULTIBOOT \
2202 "multiboot is no longer used to boot the Solaris Operating System.\n\
2203 The grub entry should be changed to:\n\
2204 kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\
2205 module$ /platform/i86pc/$ISADIR/boot_archive\n\
2206 See http://illumos.org/msg/SUNOS-8000-AK for details.\n"
2207
2208 static void
dboot_init_xboot_consinfo(void)2209 dboot_init_xboot_consinfo(void)
2210 {
2211 bi = &boot_info;
2212
2213 #if !defined(__xpv)
2214 fb = &framebuffer;
2215 bi->bi_framebuffer = (native_ptr_t)(uintptr_t)fb;
2216
2217 switch (multiboot_version) {
2218 case 1:
2219 dboot_multiboot1_xboot_consinfo();
2220 break;
2221 case 2:
2222 dboot_multiboot2_xboot_consinfo();
2223 break;
2224 default:
2225 dboot_panic("Unknown multiboot version: %d\n",
2226 multiboot_version);
2227 break;
2228 }
2229 dboot_find_console_modules();
2230 #endif
2231 }
2232
2233 /*
2234 * Set up basic data from the boot loader.
2235 * The load_addr is part of AOUT kludge setup in dboot_grub.s, to support
2236 * 32-bit dboot code setup used to set up and start 64-bit kernel.
2237 * AOUT kludge does allow 32-bit boot loader, such as grub1, to load and
2238 * start 64-bit illumos kernel.
2239 */
2240 static void
dboot_loader_init(void)2241 dboot_loader_init(void)
2242 {
2243 #if !defined(__xpv)
2244 mb_info = NULL;
2245 mb2_info = NULL;
2246
2247 switch (mb_magic) {
2248 case MB_BOOTLOADER_MAGIC:
2249 multiboot_version = 1;
2250 mb_info = (multiboot_info_t *)(uintptr_t)mb_addr;
2251 #if defined(_BOOT_TARGET_amd64)
2252 load_addr = mb_header.load_addr;
2253 #endif
2254 break;
2255
2256 case MULTIBOOT2_BOOTLOADER_MAGIC:
2257 multiboot_version = 2;
2258 mb2_info = (multiboot2_info_header_t *)(uintptr_t)mb_addr;
2259 #if defined(_BOOT_TARGET_amd64)
2260 load_addr = mb2_load_addr;
2261 #endif
2262 break;
2263
2264 default:
2265 dboot_panic("Unknown bootloader magic: 0x%x\n", mb_magic);
2266 break;
2267 }
2268 #endif /* !defined(__xpv) */
2269 }
2270
2271 /* Extract the kernel command line from [multi]boot information. */
2272 static char *
dboot_loader_cmdline(void)2273 dboot_loader_cmdline(void)
2274 {
2275 char *line = NULL;
2276
2277 #if defined(__xpv)
2278 line = (char *)xen_info->cmd_line;
2279 #else /* __xpv */
2280
2281 switch (multiboot_version) {
2282 case 1:
2283 if (mb_info->flags & MB_INFO_CMDLINE)
2284 line = (char *)mb_info->cmdline;
2285 break;
2286
2287 case 2:
2288 line = dboot_multiboot2_cmdline(mb2_info);
2289 break;
2290
2291 default:
2292 dboot_panic("Unknown multiboot version: %d\n",
2293 multiboot_version);
2294 break;
2295 }
2296
2297 #endif /* __xpv */
2298
2299 /*
2300 * Make sure we have valid pointer so the string operations
2301 * will not crash us.
2302 */
2303 if (line == NULL)
2304 line = "";
2305
2306 return (line);
2307 }
2308
2309 static char *
dboot_loader_name(void)2310 dboot_loader_name(void)
2311 {
2312 #if defined(__xpv)
2313 return (NULL);
2314 #else /* __xpv */
2315 multiboot_tag_string_t *tag;
2316
2317 switch (multiboot_version) {
2318 case 1:
2319 return ((char *)(uintptr_t)mb_info->boot_loader_name);
2320
2321 case 2:
2322 tag = dboot_multiboot2_find_tag(mb2_info,
2323 MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME);
2324 return (tag->mb_string);
2325 default:
2326 dboot_panic("Unknown multiboot version: %d\n",
2327 multiboot_version);
2328 break;
2329 }
2330
2331 return (NULL);
2332 #endif /* __xpv */
2333 }
2334
2335 /*
2336 * startup_kernel has a pretty simple job. It builds pagetables which reflect
2337 * 1:1 mappings for all memory in use. It then also adds mappings for
2338 * the kernel nucleus at virtual address of target_kernel_text using large page
2339 * mappings. The page table pages are also accessible at 1:1 mapped
2340 * virtual addresses.
2341 */
2342 /*ARGSUSED*/
2343 void
startup_kernel(void)2344 startup_kernel(void)
2345 {
2346 char *cmdline;
2347 char *bootloader;
2348 #if defined(__xpv)
2349 physdev_set_iopl_t set_iopl;
2350 #endif /* __xpv */
2351
2352 if (dboot_debug == 1)
2353 bcons_init(NULL); /* Set very early console to ttya. */
2354 dboot_loader_init();
2355 /*
2356 * At this point we are executing in a 32 bit real mode.
2357 */
2358
2359 bootloader = dboot_loader_name();
2360 cmdline = dboot_loader_cmdline();
2361
2362 #if defined(__xpv)
2363 /*
2364 * For dom0, before we initialize the console subsystem we'll
2365 * need to enable io operations, so set I/O priveldge level to 1.
2366 */
2367 if (DOMAIN_IS_INITDOMAIN(xen_info)) {
2368 set_iopl.iopl = 1;
2369 (void) HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
2370 }
2371 #endif /* __xpv */
2372
2373 dboot_init_xboot_consinfo();
2374 bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline;
2375 bcons_init(bi); /* Now we can set the real console. */
2376
2377 prom_debug = (find_boot_prop("prom_debug") != NULL);
2378 map_debug = (find_boot_prop("map_debug") != NULL);
2379
2380 #if !defined(__xpv)
2381 dboot_multiboot_get_fwtables();
2382 #endif
2383 DBG_MSG("\n\nillumos prekernel set: ");
2384 DBG_MSG(cmdline);
2385 DBG_MSG("\n");
2386
2387 if (bootloader != NULL && prom_debug) {
2388 dboot_printf("Kernel loaded by: %s\n", bootloader);
2389 #if !defined(__xpv)
2390 dboot_printf("Using multiboot %d boot protocol.\n",
2391 multiboot_version);
2392 #endif
2393 }
2394
2395 if (strstr(cmdline, "multiboot") != NULL) {
2396 dboot_panic(NO_MULTIBOOT);
2397 }
2398
2399 DBG((uintptr_t)bi);
2400 #if !defined(__xpv)
2401 DBG((uintptr_t)mb_info);
2402 DBG((uintptr_t)mb2_info);
2403 if (mb2_info != NULL)
2404 DBG(mb2_info->mbi_total_size);
2405 DBG(bi->bi_acpi_rsdp);
2406 DBG(bi->bi_acpi_rsdp_copy);
2407 DBG(bi->bi_smbios);
2408 DBG(bi->bi_uefi_arch);
2409 DBG(bi->bi_uefi_systab);
2410
2411 if (bi->bi_uefi_systab && prom_debug) {
2412 if (bi->bi_uefi_arch == XBI_UEFI_ARCH_64) {
2413 print_efi64((EFI_SYSTEM_TABLE64 *)(uintptr_t)
2414 bi->bi_uefi_systab);
2415 } else {
2416 print_efi32((EFI_SYSTEM_TABLE32 *)(uintptr_t)
2417 bi->bi_uefi_systab);
2418 }
2419 }
2420 #endif
2421
2422 /*
2423 * Need correct target_kernel_text value
2424 */
2425 target_kernel_text = KERNEL_TEXT;
2426 DBG(target_kernel_text);
2427
2428 #if defined(__xpv)
2429
2430 /*
2431 * XXPV Derive this stuff from CPUID / what the hypervisor has enabled
2432 */
2433
2434 #if defined(_BOOT_TARGET_amd64)
2435 /*
2436 * 64-bit hypervisor.
2437 */
2438 amd64_support = 1;
2439 pae_support = 1;
2440
2441 #else /* _BOOT_TARGET_amd64 */
2442
2443 /*
2444 * See if we are running on a PAE Hypervisor
2445 */
2446 {
2447 xen_capabilities_info_t caps;
2448
2449 if (HYPERVISOR_xen_version(XENVER_capabilities, &caps) != 0)
2450 dboot_panic("HYPERVISOR_xen_version(caps) failed");
2451 caps[sizeof (caps) - 1] = 0;
2452 if (prom_debug)
2453 dboot_printf("xen capabilities %s\n", caps);
2454 if (strstr(caps, "x86_32p") != NULL)
2455 pae_support = 1;
2456 }
2457
2458 #endif /* _BOOT_TARGET_amd64 */
2459 {
2460 xen_platform_parameters_t p;
2461
2462 if (HYPERVISOR_xen_version(XENVER_platform_parameters, &p) != 0)
2463 dboot_panic("HYPERVISOR_xen_version(parms) failed");
2464 DBG(p.virt_start);
2465 mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
2466 }
2467
2468 /*
2469 * The hypervisor loads stuff starting at 1Gig
2470 */
2471 mfn_base = ONE_GIG;
2472 DBG(mfn_base);
2473
2474 /*
2475 * enable writable page table mode for the hypervisor
2476 */
2477 if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2478 VMASST_TYPE_writable_pagetables) < 0)
2479 dboot_panic("HYPERVISOR_vm_assist(writable_pagetables) failed");
2480
2481 /*
2482 * check for NX support
2483 */
2484 if (pae_support) {
2485 uint32_t eax = 0x80000000;
2486 uint32_t edx = get_cpuid_edx(&eax);
2487
2488 if (eax >= 0x80000001) {
2489 eax = 0x80000001;
2490 edx = get_cpuid_edx(&eax);
2491 if (edx & CPUID_AMD_EDX_NX)
2492 NX_support = 1;
2493 }
2494 }
2495
2496 /*
2497 * check for PAT support
2498 */
2499 {
2500 uint32_t eax = 1;
2501 uint32_t edx = get_cpuid_edx(&eax);
2502
2503 if (edx & CPUID_INTC_EDX_PAT)
2504 PAT_support = 1;
2505 }
2506 #if !defined(_BOOT_TARGET_amd64)
2507
2508 /*
2509 * The 32-bit hypervisor uses segmentation to protect itself from
2510 * guests. This means when a guest attempts to install a flat 4GB
2511 * code or data descriptor the 32-bit hypervisor will protect itself
2512 * by silently shrinking the segment such that if the guest attempts
2513 * any access where the hypervisor lives a #gp fault is generated.
2514 * The problem is that some applications expect a full 4GB flat
2515 * segment for their current thread pointer and will use negative
2516 * offset segment wrap around to access data. TLS support in linux
2517 * brand is one example of this.
2518 *
2519 * The 32-bit hypervisor can catch the #gp fault in these cases
2520 * and emulate the access without passing the #gp fault to the guest
2521 * but only if VMASST_TYPE_4gb_segments is explicitly turned on.
2522 * Seems like this should have been the default.
2523 * Either way, we want the hypervisor -- and not Solaris -- to deal
2524 * to deal with emulating these accesses.
2525 */
2526 if (HYPERVISOR_vm_assist(VMASST_CMD_enable,
2527 VMASST_TYPE_4gb_segments) < 0)
2528 dboot_panic("HYPERVISOR_vm_assist(4gb_segments) failed");
2529 #endif /* !_BOOT_TARGET_amd64 */
2530
2531 #else /* __xpv */
2532
2533 /*
2534 * use cpuid to enable MMU features
2535 */
2536 if (have_cpuid()) {
2537 uint32_t eax, edx;
2538
2539 eax = 1;
2540 edx = get_cpuid_edx(&eax);
2541 if (edx & CPUID_INTC_EDX_PSE)
2542 largepage_support = 1;
2543 if (edx & CPUID_INTC_EDX_PGE)
2544 pge_support = 1;
2545 if (edx & CPUID_INTC_EDX_PAE)
2546 pae_support = 1;
2547 if (edx & CPUID_INTC_EDX_PAT)
2548 PAT_support = 1;
2549
2550 eax = 0x80000000;
2551 edx = get_cpuid_edx(&eax);
2552 if (eax >= 0x80000001) {
2553 eax = 0x80000001;
2554 edx = get_cpuid_edx(&eax);
2555 if (edx & CPUID_AMD_EDX_LM)
2556 amd64_support = 1;
2557 if (edx & CPUID_AMD_EDX_NX)
2558 NX_support = 1;
2559 }
2560 } else {
2561 dboot_printf("cpuid not supported\n");
2562 }
2563 #endif /* __xpv */
2564
2565
2566 #if defined(_BOOT_TARGET_amd64)
2567 if (amd64_support == 0)
2568 dboot_panic("long mode not supported, rebooting");
2569 else if (pae_support == 0)
2570 dboot_panic("long mode, but no PAE; rebooting");
2571 #else
2572 /*
2573 * Allow the command line to over-ride use of PAE for 32 bit.
2574 */
2575 if (strstr(cmdline, "disablePAE=true") != NULL) {
2576 pae_support = 0;
2577 NX_support = 0;
2578 amd64_support = 0;
2579 }
2580 #endif
2581
2582 /*
2583 * initialize the simple memory allocator
2584 */
2585 init_mem_alloc();
2586
2587 #if !defined(__xpv) && !defined(_BOOT_TARGET_amd64)
2588 /*
2589 * disable PAE on 32 bit h/w w/o NX and < 4Gig of memory
2590 */
2591 if (max_mem < FOUR_GIG && NX_support == 0)
2592 pae_support = 0;
2593 #endif
2594
2595 /*
2596 * configure mmu information
2597 */
2598 if (pae_support) {
2599 shift_amt = shift_amt_pae;
2600 ptes_per_table = 512;
2601 pte_size = 8;
2602 lpagesize = TWO_MEG;
2603 #if defined(_BOOT_TARGET_amd64)
2604 top_level = 3;
2605 #else
2606 top_level = 2;
2607 #endif
2608 } else {
2609 pae_support = 0;
2610 NX_support = 0;
2611 shift_amt = shift_amt_nopae;
2612 ptes_per_table = 1024;
2613 pte_size = 4;
2614 lpagesize = FOUR_MEG;
2615 top_level = 1;
2616 }
2617
2618 DBG(PAT_support);
2619 DBG(pge_support);
2620 DBG(NX_support);
2621 DBG(largepage_support);
2622 DBG(amd64_support);
2623 DBG(top_level);
2624 DBG(pte_size);
2625 DBG(ptes_per_table);
2626 DBG(lpagesize);
2627
2628 #if defined(__xpv)
2629 ktext_phys = ONE_GIG; /* from UNIX Mapfile */
2630 #else
2631 ktext_phys = FOUR_MEG; /* from UNIX Mapfile */
2632 #endif
2633
2634 #if !defined(__xpv) && defined(_BOOT_TARGET_amd64)
2635 /*
2636 * For grub, copy kernel bits from the ELF64 file to final place.
2637 */
2638 DBG_MSG("\nAllocating nucleus pages.\n");
2639 ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG);
2640
2641 if (ktext_phys == 0)
2642 dboot_panic("failed to allocate aligned kernel memory");
2643 DBG(load_addr);
2644 if (dboot_elfload64(load_addr) != 0)
2645 dboot_panic("failed to parse kernel ELF image, rebooting");
2646 #endif
2647
2648 DBG(ktext_phys);
2649
2650 /*
2651 * Allocate page tables.
2652 */
2653 build_page_tables();
2654
2655 /*
2656 * return to assembly code to switch to running kernel
2657 */
2658 entry_addr_low = (uint32_t)target_kernel_text;
2659 DBG(entry_addr_low);
2660 bi->bi_use_largepage = largepage_support;
2661 bi->bi_use_pae = pae_support;
2662 bi->bi_use_pge = pge_support;
2663 bi->bi_use_nx = NX_support;
2664
2665 #if defined(__xpv)
2666
2667 bi->bi_next_paddr = next_avail_addr - mfn_base;
2668 DBG(bi->bi_next_paddr);
2669 bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr;
2670 DBG(bi->bi_next_vaddr);
2671
2672 /*
2673 * unmap unused pages in start area to make them available for DMA
2674 */
2675 while (next_avail_addr < scratch_end) {
2676 (void) HYPERVISOR_update_va_mapping(next_avail_addr,
2677 0, UVMF_INVLPG | UVMF_LOCAL);
2678 next_avail_addr += MMU_PAGESIZE;
2679 }
2680
2681 bi->bi_xen_start_info = (native_ptr_t)(uintptr_t)xen_info;
2682 DBG((uintptr_t)HYPERVISOR_shared_info);
2683 bi->bi_shared_info = (native_ptr_t)HYPERVISOR_shared_info;
2684 bi->bi_top_page_table = (uintptr_t)top_page_table - mfn_base;
2685
2686 #else /* __xpv */
2687
2688 bi->bi_next_paddr = next_avail_addr;
2689 DBG(bi->bi_next_paddr);
2690 bi->bi_next_vaddr = (native_ptr_t)(uintptr_t)next_avail_addr;
2691 DBG(bi->bi_next_vaddr);
2692 bi->bi_mb_version = multiboot_version;
2693
2694 switch (multiboot_version) {
2695 case 1:
2696 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb_info;
2697 break;
2698 case 2:
2699 bi->bi_mb_info = (native_ptr_t)(uintptr_t)mb2_info;
2700 break;
2701 default:
2702 dboot_panic("Unknown multiboot version: %d\n",
2703 multiboot_version);
2704 break;
2705 }
2706 bi->bi_top_page_table = (uintptr_t)top_page_table;
2707
2708 #endif /* __xpv */
2709
2710 bi->bi_kseg_size = FOUR_MEG;
2711 DBG(bi->bi_kseg_size);
2712
2713 #ifndef __xpv
2714 if (map_debug)
2715 dump_tables();
2716 #endif
2717
2718 DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n");
2719
2720 #ifndef __xpv
2721 /* Update boot info with FB data */
2722 fb->cursor.origin.x = fb_info.cursor.origin.x;
2723 fb->cursor.origin.y = fb_info.cursor.origin.y;
2724 fb->cursor.pos.x = fb_info.cursor.pos.x;
2725 fb->cursor.pos.y = fb_info.cursor.pos.y;
2726 fb->cursor.visible = fb_info.cursor.visible;
2727 #endif
2728 }
2729