/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <sys/types.h>
#include <sys/machparam.h>
#include <sys/x86_archext.h>
#include <sys/systm.h>
#include <sys/mach_mmu.h>

#include <sys/multiboot.h>

extern multiboot_header_t mb_header;
extern int have_cpuid(void);
extern uint32_t get_cpuid_edx(uint32_t *eax);

#include <sys/inttypes.h>
#include <sys/bootinfo.h>
#include <sys/mach_mmu.h>
#include <sys/boot_console.h>

#include "dboot_printf.h"
#include "dboot_xboot.h"
#include "dboot_elfload.h"

/*
 * This file contains code that runs to transition us from either a multiboot
 * compliant loader (32 bit non-paging) or Xen domain loader to regular kernel
 * execution. Its task is to setup the kernel memory image and page tables.
 *
 * The code executes as:
 *	- 32 bits under GRUB (for 32 or 64 bit Solaris)
 * 	- 32 bit program for Xen 32 bit
 *	- 64 bit program for Xen 64 bit (at least that's my assumption for now)
 *
 * Under Xen, we must create mappings for any memory beyond the initial
 * start of day allocation (such as the kernel itself).
 *
 * When not under Xen, the mapping between maddr_t and paddr_t is 1:1.
 * Since we are running in real mode, so all such memory is accessible.
 */

/*
 * Standard bits used in PTE (page level) and PTP (internal levels)
 */
x86pte_t ptp_bits = PT_VALID | PT_REF | PT_USER | PT_WRITABLE | PT_USER;
x86pte_t pte_bits = PT_VALID | PT_REF | PT_MOD | PT_NOCONSIST | PT_WRITABLE;

/*
 * This is the target addresses (physical) where the kernel text and data
 * nucleus pages will be unpacked. On Xen this is actually a virtual address.
 */
paddr_t ktext_phys;
uint32_t ksize = 2 * FOUR_MEG;	/* kernel nucleus is 8Meg */

static uint64_t target_kernel_text;	/* value to use for KERNEL_TEXT */

/*
 * The stack is setup in assembler before entering startup_kernel()
 */
char stack_space[STACK_SIZE];

/*
 * Used to track physical memory allocation
 */
static paddr_t next_avail_addr = 0;

multiboot_info_t *mb_info;

/*
 * This contains information passed to the kernel
 */
struct xboot_info boot_info[2];	/* extra space to fix alignement for amd64 */
struct xboot_info *bi;

/*
 * Page table and memory stuff.
 */
static uint64_t max_mem;			/* maximum memory address */

/*
 * Information about processor MMU
 */
int amd64_support = 0;
int largepage_support = 0;
int pae_support = 0;
int pge_support = 0;
int NX_support = 0;

/*
 * Low 32 bits of kernel entry address passed back to assembler.
 * When running a 64 bit kernel, the high 32 bits are 0xffffffff.
 */
uint32_t entry_addr_low;

/*
 * Memlists for the kernel. We shouldn't need a lot of these.
 */
#define	MAX_MEMLIST (50)
struct boot_memlist memlists[MAX_MEMLIST];
uint_t memlists_used = 0;
struct boot_memlist pcimemlists[MAX_MEMLIST];
uint_t pcimemlists_used = 0;

#define	MAX_MODULES (10)
struct boot_modules modules[MAX_MODULES];
uint_t modules_used = 0;

/*
 * Debugging macros
 */
uint_t prom_debug = 0;
uint_t map_debug = 0;

/*
 * The Xen/Grub specific code builds the initial memlists. This code does
 * sort/merge/link for final use.
 */
static void
sort_physinstall(void)
{
	int i;
	int j;
	struct boot_memlist tmp;

	/*
	 * Now sort the memlists, in case they weren't in order.
	 * Yeah, this is a bubble sort; small, simple and easy to get right.
	 */
	DBG_MSG("Sorting phys-installed list\n");
	for (j = memlists_used - 1; j > 0; --j) {
		for (i = 0; i < j; ++i) {
			if (memlists[i].addr < memlists[i + 1].addr)
				continue;
			tmp = memlists[i];
			memlists[i] = memlists[i + 1];
			memlists[i + 1] = tmp;
		}
	}

	/*
	 * Merge any memlists that don't have holes between them.
	 */
	for (i = 0; i <= memlists_used - 1; ++i) {
		if (memlists[i].addr + memlists[i].size != memlists[i + 1].addr)
			continue;

		if (prom_debug)
			dboot_printf(
			    "merging mem segs %" PRIx64 "...%" PRIx64
			    " w/ %" PRIx64 "...%" PRIx64 "\n",
			    memlists[i].addr,
			    memlists[i].addr + memlists[i].size,
			    memlists[i + 1].addr,
			    memlists[i + 1].addr + memlists[i + 1].size);

		memlists[i].size += memlists[i + 1].size;
		for (j = i + 1; j < memlists_used - 1; ++j)
			memlists[j] = memlists[j + 1];
		--memlists_used;
		DBG(memlists_used);
		--i;	/* after merging we need to reexamine, so do this */
	}

	if (prom_debug) {
		dboot_printf("\nFinal memlists:\n");
		for (i = 0; i < memlists_used; ++i) {
			dboot_printf("\t%d: addr=%" PRIx64 " size=%"
			    PRIx64 "\n", i, memlists[i].addr, memlists[i].size);
		}
	}

	/*
	 * link together the memlists with native size pointers
	 */
	memlists[0].next = 0;
	memlists[0].prev = 0;
	for (i = 1; i < memlists_used; ++i) {
		memlists[i].prev = (native_ptr_t)(uintptr_t)(memlists + i - 1);
		memlists[i].next = 0;
		memlists[i - 1].next = (native_ptr_t)(uintptr_t)(memlists + i);
	}
	bi->bi_phys_install = (native_ptr_t)memlists;
	DBG(bi->bi_phys_install);
}

x86pte_t
get_pteval(paddr_t table, uint_t index)
{
	if (pae_support)
		return (((x86pte_t *)(uintptr_t)table)[index]);
	return (((x86pte32_t *)(uintptr_t)table)[index]);
}

/*ARGSUSED*/
void
set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
{
	uintptr_t tab_addr = (uintptr_t)table;

	if (pae_support)
		((x86pte_t *)tab_addr)[index] = pteval;
	else
		((x86pte32_t *)tab_addr)[index] = (x86pte32_t)pteval;
	if (level == top_level && level == 2)
		reload_cr3();
}

paddr_t
make_ptable(x86pte_t *pteval, uint_t level)
{
	paddr_t new_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);

	if (level == top_level && level == 2)
		*pteval = pa_to_ma((uintptr_t)new_table) | PT_VALID;
	else
		*pteval = pa_to_ma((uintptr_t)new_table) | ptp_bits;

	if (map_debug)
		dboot_printf("new page table lvl=%d paddr=0x%lx ptp=0x%"
		    PRIx64 "\n", level, (ulong_t)new_table, *pteval);
	return (new_table);
}

x86pte_t *
map_pte(paddr_t table, uint_t index)
{
	return ((x86pte_t *)(uintptr_t)(table + index * pte_size));
}

#if 0	/* useful if debugging */
/*
 * dump out the contents of page tables...
 */
static void
dump_tables(void)
{
	uint_t save_index[4];	/* for recursion */
	char *save_table[4];	/* for recursion */
	uint_t	l;
	uint64_t va;
	uint64_t pgsize;
	int index;
	int i;
	x86pte_t pteval;
	char *table;
	static char *tablist = "\t\t\t";
	char *tabs = tablist + 3 - top_level;
	uint_t pa, pa1;

	dboot_printf("Finished pagetables:\n");
	table = (char *)top_page_table;
	l = top_level;
	va = 0;
	for (index = 0; index < ptes_per_table; ++index) {
		pgsize = 1ull << shift_amt[l];
		if (pae_support)
			pteval = ((x86pte_t *)table)[index];
		else
			pteval = ((x86pte32_t *)table)[index];
		if (pteval == 0)
			goto next_entry;

		dboot_printf("%s %lx[0x%x] = %" PRIx64 ", va=%" PRIx64,
		    tabs + l, table, index, (uint64_t)pteval, va);
		pa = ma_to_pa(pteval & MMU_PAGEMASK);
		dboot_printf(" physaddr=%" PRIx64 "\n", pa);

		/*
		 * Don't try to walk hypervisor private pagetables
		 */
		if ((l > 1 || (l == 1 && (pteval & PT_PAGESIZE) == 0))) {
			save_table[l] = table;
			save_index[l] = index;
			--l;
			index = -1;
			table = (char *)(uintptr_t)
			    ma_to_pa(pteval & MMU_PAGEMASK);
			goto recursion;
		}

		/*
		 * shorten dump for consecutive mappings
		 */
		for (i = 1; index + i < ptes_per_table; ++i) {
			if (pae_support)
				pteval = ((x86pte_t *)table)[index + i];
			else
				pteval = ((x86pte32_t *)table)[index + i];
			if (pteval == 0)
				break;
			pa1 = ma_to_pa(pteval & MMU_PAGEMASK);
			if (pa1 != pa + i * pgsize)
				break;
		}
		if (i > 2) {
			dboot_printf("%s...\n", tabs + l);
			va += pgsize * (i - 2);
			index += i - 2;
		}
next_entry:
		va += pgsize;
		if (l == 3 && index == 256)	/* VA hole */
			va = 0xffff800000000000ull;
recursion:
		;
	}
	if (l < top_level) {
		++l;
		index = save_index[l];
		table = save_table[l];
		goto recursion;
	}
}
#endif

/*
 * Add a mapping for the physical page at the given virtual address.
 */
static void
map_pa_at_va(paddr_t pa, native_ptr_t va, uint_t level)
{
	x86pte_t *ptep;
	x86pte_t pteval;

	pteval = pa_to_ma(pa) | pte_bits;
	if (level > 0)
		pteval |= PT_PAGESIZE;
	if (va >= target_kernel_text && pge_support)
		pteval |= PT_GLOBAL;

	if (map_debug && pa != va)
		dboot_printf("mapping pa=0x%" PRIx64 " va=0x%" PRIx64
		    " pte=0x%" PRIx64 " l=%d\n",
		    (uint64_t)pa, (uint64_t)va, pteval, level);

	/*
	 * Find the pte that will map this address. This creates any
	 * missing intermediate level page tables
	 */
	ptep = find_pte(va, NULL, level, 0);

	/*
	 * On Xen we must use hypervisor calls to modify the PTE, since
	 * paging is active. On real hardware we just write to the pagetables
	 * which aren't in use yet.
	 */
	if (va < 1024 * 1024)
		pteval |= PT_NOCACHE;		/* for video RAM */
	if (pae_support)
		*ptep = pteval;
	else
		*((x86pte32_t *)ptep) = (x86pte32_t)pteval;
}

/*
 * During memory allocation, find the highest address not used yet.
 */
static void
check_higher(paddr_t a)
{
	if (a < next_avail_addr)
		return;
	next_avail_addr = RNDUP(a + 1, MMU_PAGESIZE);
	DBG(next_avail_addr);
}

/*
 * This is called to remove start..end from the
 * possible range of PCI addresses.
 */
const uint64_t pci_lo_limit = 0x00100000ul;
const uint64_t pci_hi_limit = 0xfff00000ul;
static void
exclude_from_pci(uint64_t start, uint64_t end)
{
	int i;
	int j;
	struct boot_memlist *ml;

	for (i = 0; i < pcimemlists_used; ++i) {
		ml = &pcimemlists[i];

		/* delete the entire range? */
		if (start <= ml->addr && ml->addr + ml->size <= end) {
			--pcimemlists_used;
			for (j = i; j < pcimemlists_used; ++j)
				pcimemlists[j] = pcimemlists[j + 1];
			--i;	/* to revisit the new one at this index */
		}

		/* split a range? */
		else if (ml->addr < start && end < ml->addr + ml->size) {

			++pcimemlists_used;
			if (pcimemlists_used > MAX_MEMLIST)
				dboot_panic("too many pcimemlists");

			for (j = pcimemlists_used - 1; j > i; --j)
				pcimemlists[j] = pcimemlists[j - 1];
			ml->size = start - ml->addr;

			++ml;
			ml->size = (ml->addr + ml->size) - end;
			ml->addr = end;
			++i;	/* skip on to next one */
		}

		/* cut memory off the start? */
		else if (ml->addr < end && end < ml->addr + ml->size) {
			ml->size -= end - ml->addr;
			ml->addr = end;
		}

		/* cut memory off the end? */
		else if (ml->addr <= start && start < ml->addr + ml->size) {
			ml->size = start - ml->addr;
		}
	}
}

/*
 * Walk through the module information finding the last used address.
 * The first available address will become the top level page table.
 *
 * We then build the phys_install memlist from the multiboot information.
 */
static void
init_mem_alloc(void)
{
	mb_memory_map_t *mmap;
	mb_module_t *mod;
	uint64_t start;
	uint64_t end;
	uint64_t page_offset = MMU_PAGEOFFSET;	/* needs to be 64 bits */
	extern char _end[];
	int i;

	DBG_MSG("Entered init_mem_alloc()\n");
	DBG((uintptr_t)mb_info);

	/*
	 * search the modules to find the last used address
	 * we'll build the module list while we're walking through here
	 */
	DBG_MSG("\nFinding Modules\n");
	check_higher((paddr_t)&_end);
	for (mod = (mb_module_t *)(mb_info->mods_addr), i = 0;
	    i < mb_info->mods_count;
	    ++mod, ++i) {
		if (prom_debug) {
			dboot_printf("\tmodule #%d: %s at: 0x%lx, len 0x%lx\n",
			    i, (char *)(mod->mod_name),
			    (ulong_t)mod->mod_start, (ulong_t)mod->mod_end);
		}
		modules[i].bm_addr = mod->mod_start;
		modules[i].bm_size = mod->mod_end;

		check_higher(mod->mod_end);
	}
	bi->bi_modules = (native_ptr_t)modules;
	DBG(bi->bi_modules);
	bi->bi_module_cnt = mb_info->mods_count;
	DBG(bi->bi_module_cnt);

	/*
	 * start out by assuming PCI can use all physical addresses
	 */
	pcimemlists[0].addr = pci_lo_limit;
	pcimemlists[0].size = pci_hi_limit - pci_lo_limit;
	pcimemlists_used = 1;

	/*
	 * Walk through the memory map from multiboot and build our memlist
	 * structures. Note these will have native format pointers.
	 */
	DBG_MSG("\nFinding Memory Map\n");
	DBG(mb_info->flags);
	max_mem = 0;
	if (mb_info->flags & 0x40) {
		DBG(mb_info->mmap_addr);
		DBG(mb_info->mmap_length);
		check_higher(mb_info->mmap_addr + mb_info->mmap_length);

		for (mmap = (mb_memory_map_t *)mb_info->mmap_addr;
		    (uint32_t)mmap < mb_info->mmap_addr + mb_info->mmap_length;
		    mmap = (mb_memory_map_t *)((uint32_t)mmap + mmap->size
		    + sizeof (mmap->size))) {

			start = ((uint64_t)mmap->base_addr_high << 32) +
			    mmap->base_addr_low;
			end = start + ((uint64_t)mmap->length_high << 32) +
			    mmap->length_low;

			if (prom_debug)
				dboot_printf("\ttype: %d %" PRIx64 "..%"
				    PRIx64 "\n", mmap->type, start, end);

			/*
			 * page align start and end
			 */
			start = (start + page_offset) & ~page_offset;
			end &= ~page_offset;
			if (end <= start)
				continue;

			exclude_from_pci(start, end);

			/*
			 * only type 1 is usable RAM
			 */
			if (mmap->type != 1)
				continue;

			if (end > max_mem)
				max_mem = end;

			memlists[memlists_used].addr = start;
			memlists[memlists_used].size = end - start;
			++memlists_used;
			if (memlists_used > MAX_MEMLIST)
				dboot_panic("too many memlists");
		}
	} else if (mb_info->flags & 0x01) {
		DBG(mb_info->mem_lower);
		memlists[memlists_used].addr = 0;
		memlists[memlists_used].size = mb_info->mem_lower * 1024;
		++memlists_used;
		DBG(mb_info->mem_upper);
		memlists[memlists_used].addr = 1024 * 1024;
		memlists[memlists_used].size = mb_info->mem_upper * 1024;
		++memlists_used;
		exclude_from_pci(memlists[0].addr,
		    memlists[0].addr + memlists[memlists_used].size);
		exclude_from_pci(memlists[1].addr,
		    memlists[1].addr + memlists[memlists_used].size);
	} else {
		dboot_panic("No memory info from boot loader!!!\n");
	}

	check_higher(bi->bi_cmdline);

	/*
	 * finish processing the physinstall list
	 */
	sort_physinstall();

	/*
	 * Finish off the pcimemlist
	 */
	if (prom_debug) {
		for (i = 0; i < pcimemlists_used; ++i) {
			dboot_printf("pcimemlist entry 0x%" PRIx64 "..0x%"
				    PRIx64 "\n", pcimemlists[i].addr,
				pcimemlists[i].addr + pcimemlists[i].size);
		}
	}
	pcimemlists[0].next = 0;
	pcimemlists[0].prev = 0;
	for (i = 1; i < pcimemlists_used; ++i) {
		pcimemlists[i].prev =
		    (native_ptr_t)(uintptr_t)(pcimemlists + i - 1);
		pcimemlists[i].next = 0;
		pcimemlists[i - 1].next =
		    (native_ptr_t)(uintptr_t)(pcimemlists + i);
	}
	bi->bi_pcimem = (native_ptr_t)pcimemlists;
	DBG(bi->bi_pcimem);
}

/*
 * Simple memory allocator, allocates aligned physical memory.
 * Note that startup_kernel() only allocates memory, never frees.
 * Memory usage just grows in an upward direction.
 */
static void *
do_mem_alloc(uint32_t size, uint32_t align)
{
	uint_t i;
	uint64_t best;
	uint64_t start;
	uint64_t end;

	/*
	 * make sure size is a multiple of pagesize
	 */
	size = RNDUP(size, MMU_PAGESIZE);
	next_avail_addr = RNDUP(next_avail_addr, align);

	/*
	 * a really large bootarchive that causes you to run out of memory
	 * may cause this to blow up
	 */
	/* LINTED E_UNEXPECTED_UINT_PROMOTION */
	best = (uint64_t)-size;
	for (i = 0; i < memlists_used; ++i) {
		start = memlists[i].addr;
		end = start + memlists[i].size;

		/*
		 * did we find the desired address?
		 */
		if (start <= next_avail_addr && next_avail_addr + size <= end) {
			best = next_avail_addr;
			goto done;
		}

		/*
		 * if not is this address the best so far?
		 */
		if (start > next_avail_addr && start < best &&
		    RNDUP(start, align) + size <= end)
			best = RNDUP(start, align);
	}

	/*
	 * We didn't find exactly the address we wanted, due to going off the
	 * end of a memory region. Return the best found memory address.
	 */
done:
	next_avail_addr = best + size;
	(void) memset((void *)(uintptr_t)best, 0, size);
	return ((void *)(uintptr_t)best);
}

void *
mem_alloc(uint32_t size)
{
	return (do_mem_alloc(size, MMU_PAGESIZE));
}


/*
 * Build page tables to map all of memory used so far as well as the kernel.
 */
static void
build_page_tables(void)
{
	uint32_t psize;
	uint32_t level;
	uint32_t off;
	uint32_t i;
	uint64_t start;
	uint64_t end;
	uint64_t next_mapping;

	/*
	 * If we're not using Xen, we need to create the top level pagetable.
	 */
	top_page_table = (paddr_t)(uintptr_t)mem_alloc(MMU_PAGESIZE);
	DBG((uintptr_t)top_page_table);

	/*
	 * Determine if we'll use large mappings for kernel, then map it.
	 */
	if (largepage_support) {
		psize = lpagesize;
		level = 1;
	} else {
		psize = MMU_PAGESIZE;
		level = 0;
	}

	DBG_MSG("Mapping kernel\n");
	DBG(ktext_phys);
	DBG(target_kernel_text);
	DBG(ksize);
	DBG(psize);
	for (off = 0; off < ksize; off += psize)
		map_pa_at_va(ktext_phys + off, target_kernel_text + off, level);

	/*
	 * The kernel will need a 1 page window to work with page tables
	 */
	bi->bi_pt_window = (uintptr_t)mem_alloc(MMU_PAGESIZE);
	DBG(bi->bi_pt_window);
	bi->bi_pte_to_pt_window =
	    (uintptr_t)find_pte(bi->bi_pt_window, NULL, 0, 0);
	DBG(bi->bi_pte_to_pt_window);

	/*
	 * Under multiboot we need 1:1 mappings for all of low memory, which
	 * includes our pagetables. The following code works because our
	 * simple memory allocator only grows usage in an upwards direction.
	 *
	 * We map *all* possible addresses below 1 Meg, since things like
	 * the video RAM are down there.
	 *
	 * Skip memory between 1M and _start, this acts as a reserve
	 * of memory usable for DMA.
	 */
	next_mapping = (uintptr_t)_start & MMU_PAGEMASK;
	if (map_debug)
		dboot_printf("1:1 map pa=0..1Meg\n");
	for (start = 0; start < 1024 * 1024; start += MMU_PAGESIZE)
		map_pa_at_va(start, start, 0);

	for (i = 0; i < memlists_used; ++i) {
		start = memlists[i].addr;
		if (start < next_mapping)
			start = next_mapping;

		end = start + memlists[i].size;

		if (map_debug)
			dboot_printf("1:1 map pa=%" PRIx64 "..%" PRIx64 "\n",
			    start, end);
		while (start < end && start < next_avail_addr) {
			map_pa_at_va(start, start, 0);
			start += MMU_PAGESIZE;
		}
	}

	DBG_MSG("\nPage tables constructed\n");
}

#define	NO_MULTIBOOT	\
"multiboot is no longer used to boot the Solaris Operating System.\n\
The grub entry should be changed to:\n\
kernel$ /platform/i86pc/kernel/$ISADIR/unix\n\
module$ /platform/i86pc/$ISADIR/boot_archive\n\
See http://www.sun.com/msg/SUNOS-8000-AK for details.\n"

/*
 * startup_kernel has a pretty simple job. It builds pagetables which reflect
 * 1:1 mappings for all memory in use. It then also adds mappings for
 * the kernel nucleus at virtual address of target_kernel_text using large page
 * mappings. The page table pages are also accessible at 1:1 mapped
 * virtual addresses.
 */
/*ARGSUSED*/
void
startup_kernel(void)
{
	char *cmdline;
	uintptr_t addr;

	/*
	 * At this point we are executing in a 32 bit real mode.
	 */
	cmdline = (char *)mb_info->cmdline;
	prom_debug = (strstr(cmdline, "prom_debug") != NULL);
	map_debug = (strstr(cmdline, "map_debug") != NULL);
	bcons_init(cmdline);
	DBG_MSG("\n\nSolaris prekernel set: ");
	DBG_MSG(cmdline);
	DBG_MSG("\n");

	if (strstr(cmdline, "multiboot") != NULL) {
		dboot_panic(NO_MULTIBOOT);
	}

	/*
	 * boot info must be 16 byte aligned for 64 bit kernel ABI
	 */
	addr = (uintptr_t)boot_info;
	addr = (addr + 0xf) & ~0xf;
	bi = (struct xboot_info *)addr;
	DBG((uintptr_t)bi);
	bi->bi_cmdline = (native_ptr_t)(uintptr_t)cmdline;

	/*
	 * Need correct target_kernel_text value
	 */
#if defined(_BOOT_TARGET_amd64)
	target_kernel_text = KERNEL_TEXT_amd64;
#else
	target_kernel_text = KERNEL_TEXT_i386;
#endif
	DBG(target_kernel_text);

	/*
	 * use cpuid to enable MMU features
	 */
	if (have_cpuid()) {
		uint32_t eax, edx;

		eax = 1;
		edx = get_cpuid_edx(&eax);
		if (edx & CPUID_INTC_EDX_PSE)
			largepage_support = 1;
		if (edx & CPUID_INTC_EDX_PGE)
			pge_support = 1;
		if (edx & CPUID_INTC_EDX_PAE)
			pae_support = 1;

		eax = 0x80000000;
		edx = get_cpuid_edx(&eax);
		if (eax >= 0x80000001) {
			eax = 0x80000001;
			edx = get_cpuid_edx(&eax);
			if (edx & CPUID_AMD_EDX_LM)
				amd64_support = 1;
			if (edx & CPUID_AMD_EDX_NX)
				NX_support = 1;
		}
	} else {
		dboot_printf("cpuid not supported\n");
	}

#if defined(_BOOT_TARGET_amd64)
	if (amd64_support == 0)
		dboot_panic("long mode not supported, rebooting\n");
	else if (pae_support == 0)
		dboot_panic("long mode, but no PAE; rebooting\n");
#endif

	/*
	 * initialize our memory allocator
	 */
	init_mem_alloc();

	/*
	 * configure mmu information
	 */
#if !defined(_BOOT_TARGET_amd64)
	if (pae_support && (max_mem > FOUR_GIG || NX_support)) {
#endif
		shift_amt = shift_amt_pae;
		ptes_per_table = 512;
		pte_size = 8;
		lpagesize = TWO_MEG;
#if defined(_BOOT_TARGET_amd64)
		top_level = 3;
#else
		top_level = 2;
#endif
#if !defined(_BOOT_TARGET_amd64)
	} else {
		pae_support = 0;
		NX_support = 0;
		shift_amt = shift_amt_nopae;
		ptes_per_table = 1024;
		pte_size = 4;
		lpagesize = FOUR_MEG;
		top_level = 1;
	}
#endif

	DBG(pge_support);
	DBG(NX_support);
	DBG(largepage_support);
	DBG(amd64_support);
	DBG(top_level);
	DBG(pte_size);
	DBG(ptes_per_table);
	DBG(lpagesize);

	ktext_phys = FOUR_MEG;		/* from UNIX Mapfile */

#if defined(_BOOT_TARGET_amd64)
	/*
	 * For grub, copy kernel bits from the ELF64 file to final place.
	 */
	DBG_MSG("\nAllocating nucleus pages.\n");
	ktext_phys = (uintptr_t)do_mem_alloc(ksize, FOUR_MEG);
	if (ktext_phys == 0)
		dboot_panic("failed to allocate aligned kernel memory\n");
	if (dboot_elfload64(mb_header.load_addr) != 0)
		dboot_panic("failed to parse kernel ELF image, rebooting\n");

#endif
	DBG(ktext_phys);

	/*
	 * Allocate page tables.
	 */
	build_page_tables();

	/*
	 * return to assembly code to switch to running kernel
	 */
	entry_addr_low = (uint32_t)target_kernel_text;
	DBG(entry_addr_low);
	bi->bi_use_largepage = largepage_support;
	bi->bi_use_pae = pae_support;
	bi->bi_use_pge = pge_support;
	bi->bi_use_nx = NX_support;
	bi->bi_next_paddr = next_avail_addr;
	DBG(bi->bi_next_paddr);
	bi->bi_next_vaddr = (uintptr_t)next_avail_addr;
	DBG(bi->bi_next_vaddr);
	bi->bi_mb_info = (uintptr_t)mb_info;
	bi->bi_top_page_table = (uintptr_t)top_page_table;

	bi->bi_kseg_size = FOUR_MEG;
	DBG(bi->bi_kseg_size);

#if 0		/* useful if debugging initial page tables */
	if (prom_debug)
		dump_tables();
#endif

	DBG_MSG("\n\n*** DBOOT DONE -- back to asm to jump to kernel\n\n");
}