xref: /illumos-gate/usr/src/uts/i86pc/os/fakebop.c (revision 1fceb383a3f0b59711832b9dc4e8329d7f216604)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * This file contains the functionality that mimics the boot operations
31  * on SPARC systems or the old boot.bin/multiboot programs on x86 systems.
32  * The x86 kernel now does everything on its own.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/bootconf.h>
37 #include <sys/bootsvcs.h>
38 #include <sys/bootinfo.h>
39 #include <sys/multiboot.h>
40 #include <sys/bootvfs.h>
41 #include <sys/bootprops.h>
42 #include <sys/varargs.h>
43 #include <sys/param.h>
44 #include <sys/machparam.h>
45 #include <sys/archsystm.h>
46 #include <sys/boot_console.h>
47 #include <sys/cmn_err.h>
48 #include <sys/systm.h>
49 #include <sys/promif.h>
50 #include <sys/archsystm.h>
51 #include <sys/x86_archext.h>
52 #include <sys/kobj.h>
53 #include <sys/privregs.h>
54 #include <sys/sysmacros.h>
55 #include <sys/ctype.h>
56 #ifdef __xpv
57 #include <sys/hypervisor.h>
58 #include <net/if.h>
59 #endif
60 #include <vm/kboot_mmu.h>
61 #include <vm/hat_pte.h>
62 #include "acpi_fw.h"
63 
64 static int have_console = 0;	/* set once primitive console is initialized */
65 static char *boot_args = "";
66 
67 /*
68  * Debugging macros
69  */
70 static uint_t kbm_debug = 0;
71 #define	DBG_MSG(s)	{ if (kbm_debug) bop_printf(NULL, "%s", s); }
72 #define	DBG(x)		{ if (kbm_debug)			\
73 	bop_printf(NULL, "%s is %" PRIx64 "\n", #x, (uint64_t)(x));	\
74 	}
75 
76 #define	PUT_STRING(s) {				\
77 	char *cp;				\
78 	for (cp = (s); *cp; ++cp)		\
79 		bcons_putchar(*cp);		\
80 	}
81 
82 struct xboot_info *xbootp;	/* boot info from "glue" code in low memory */
83 bootops_t bootop;	/* simple bootops we'll pass on to kernel */
84 struct bsys_mem bm;
85 
86 static uintptr_t next_virt;	/* next available virtual address */
87 static paddr_t next_phys;	/* next available physical address from dboot */
88 static paddr_t high_phys = -(paddr_t)1;	/* last used physical address */
89 
90 /*
91  * buffer for vsnprintf for console I/O
92  */
93 #define	BUFFERSIZE	256
94 static char buffer[BUFFERSIZE];
95 /*
96  * stuff to store/report/manipulate boot property settings.
97  */
98 typedef struct bootprop {
99 	struct bootprop *bp_next;
100 	char *bp_name;
101 	uint_t bp_vlen;
102 	char *bp_value;
103 } bootprop_t;
104 
105 static bootprop_t *bprops = NULL;
106 static char *curr_page = NULL;		/* ptr to avail bprop memory */
107 static int curr_space = 0;		/* amount of memory at curr_page */
108 
109 #ifdef __xpv
110 start_info_t *xen_info;
111 shared_info_t *HYPERVISOR_shared_info;
112 #endif
113 
114 /*
115  * some allocator statistics
116  */
117 static ulong_t total_bop_alloc_scratch = 0;
118 static ulong_t total_bop_alloc_kernel = 0;
119 
120 static void build_firmware_properties(void);
121 
122 static int early_allocation = 1;
123 
124 /*
125  * Allocate aligned physical memory at boot time. This allocator allocates
126  * from the highest possible addresses. This avoids exhausting memory that
127  * would be useful for DMA buffers.
128  */
129 paddr_t
130 do_bop_phys_alloc(uint64_t size, uint64_t align)
131 {
132 	paddr_t	pa = 0;
133 	paddr_t	start;
134 	paddr_t	end;
135 	struct memlist	*ml = (struct memlist *)xbootp->bi_phys_install;
136 
137 	/*
138 	 * Be careful if high memory usage is limited in startup.c
139 	 * Since there are holes in the low part of the physical address
140 	 * space we can treat physmem as a pfn (not just a pgcnt) and
141 	 * get a conservative upper limit.
142 	 */
143 	if (physmem != 0 && high_phys > pfn_to_pa(physmem))
144 		high_phys = pfn_to_pa(physmem);
145 
146 	/*
147 	 * find the lowest or highest available memory in physinstalled
148 	 * On 32 bit avoid physmem above 4Gig if PAE isn't enabled
149 	 */
150 #if defined(__i386)
151 	if (xbootp->bi_use_pae == 0 && high_phys > FOUR_GIG)
152 		high_phys = FOUR_GIG;
153 #endif
154 
155 	/*
156 	 * find the highest available memory in physinstalled
157 	 */
158 	size = P2ROUNDUP(size, align);
159 	for (; ml; ml = ml->next) {
160 		start = P2ROUNDUP(ml->address, align);
161 		end = P2ALIGN(ml->address + ml->size, align);
162 		if (start < next_phys)
163 			start = P2ROUNDUP(next_phys, align);
164 		if (end > high_phys)
165 			end = P2ALIGN(high_phys, align);
166 
167 		if (end <= start)
168 			continue;
169 		if (end - start < size)
170 			continue;
171 
172 		/*
173 		 * Early allocations need to use low memory, since
174 		 * physmem might be further limited by bootenv.rc
175 		 */
176 		if (early_allocation) {
177 			if (pa == 0 || start < pa)
178 				pa = start;
179 		} else {
180 			if (end - size > pa)
181 				pa = end - size;
182 		}
183 	}
184 	if (pa != 0) {
185 		if (early_allocation)
186 			next_phys = pa + size;
187 		else
188 			high_phys = pa;
189 		return (pa);
190 	}
191 	panic("do_bop_phys_alloc(0x%" PRIx64 ", 0x%" PRIx64 ") Out of memory\n",
192 	    size, align);
193 	/*NOTREACHED*/
194 }
195 
196 static uintptr_t
197 alloc_vaddr(size_t size, paddr_t align)
198 {
199 	uintptr_t rv;
200 
201 	next_virt = P2ROUNDUP(next_virt, (uintptr_t)align);
202 	rv = (uintptr_t)next_virt;
203 	next_virt += size;
204 	return (rv);
205 }
206 
207 /*
208  * Allocate virtual memory. The size is always rounded up to a multiple
209  * of base pagesize.
210  */
211 
212 /*ARGSUSED*/
213 static caddr_t
214 do_bsys_alloc(bootops_t *bop, caddr_t virthint, size_t size, int align)
215 {
216 	paddr_t a = align;	/* same type as pa for masking */
217 	uint_t pgsize;
218 	paddr_t pa;
219 	uintptr_t va;
220 	ssize_t s;		/* the aligned size */
221 	uint_t level;
222 	uint_t is_kernel = (virthint != 0);
223 
224 	if (a < MMU_PAGESIZE)
225 		a = MMU_PAGESIZE;
226 	else if (!ISP2(a))
227 		prom_panic("do_bsys_alloc() incorrect alignment");
228 	size = P2ROUNDUP(size, MMU_PAGESIZE);
229 
230 	/*
231 	 * Use the next aligned virtual address if we weren't given one.
232 	 */
233 	if (virthint == NULL) {
234 		virthint = (caddr_t)alloc_vaddr(size, a);
235 		total_bop_alloc_scratch += size;
236 	} else {
237 		total_bop_alloc_kernel += size;
238 	}
239 
240 	/*
241 	 * allocate the physical memory
242 	 */
243 	pa = do_bop_phys_alloc(size, a);
244 
245 	/*
246 	 * Add the mappings to the page tables, try large pages first.
247 	 */
248 	va = (uintptr_t)virthint;
249 	s = size;
250 	level = 1;
251 	pgsize = xbootp->bi_use_pae ? TWO_MEG : FOUR_MEG;
252 	if (xbootp->bi_use_largepage && a == pgsize) {
253 		while (IS_P2ALIGNED(pa, pgsize) && IS_P2ALIGNED(va, pgsize) &&
254 		    s >= pgsize) {
255 			kbm_map(va, pa, level, is_kernel);
256 			va += pgsize;
257 			pa += pgsize;
258 			s -= pgsize;
259 		}
260 	}
261 
262 	/*
263 	 * Map remaining pages use small mappings
264 	 */
265 	level = 0;
266 	pgsize = MMU_PAGESIZE;
267 	while (s > 0) {
268 		kbm_map(va, pa, level, is_kernel);
269 		va += pgsize;
270 		pa += pgsize;
271 		s -= pgsize;
272 	}
273 	return (virthint);
274 }
275 
276 /*
277  * Free virtual memory - we'll just ignore these.
278  */
279 /*ARGSUSED*/
280 static void
281 do_bsys_free(bootops_t *bop, caddr_t virt, size_t size)
282 {
283 	bop_printf(NULL, "do_bsys_free(virt=0x%p, size=0x%lx) ignored\n",
284 	    (void *)virt, size);
285 }
286 
287 /*
288  * Old interface
289  */
290 /*ARGSUSED*/
291 static caddr_t
292 do_bsys_ealloc(
293 	bootops_t *bop,
294 	caddr_t virthint,
295 	size_t size,
296 	int align,
297 	int flags)
298 {
299 	prom_panic("unsupported call to BOP_EALLOC()\n");
300 	return (0);
301 }
302 
303 
304 static void
305 bsetprop(char *name, int nlen, void *value, int vlen)
306 {
307 	uint_t size;
308 	uint_t need_size;
309 	bootprop_t *b;
310 
311 	/*
312 	 * align the size to 16 byte boundary
313 	 */
314 	size = sizeof (bootprop_t) + nlen + 1 + vlen;
315 	size = (size + 0xf) & ~0xf;
316 	if (size > curr_space) {
317 		need_size = (size + (MMU_PAGEOFFSET)) & MMU_PAGEMASK;
318 		curr_page = do_bsys_alloc(NULL, 0, need_size, MMU_PAGESIZE);
319 		curr_space = need_size;
320 	}
321 
322 	/*
323 	 * use a bootprop_t at curr_page and link into list
324 	 */
325 	b = (bootprop_t *)curr_page;
326 	curr_page += sizeof (bootprop_t);
327 	curr_space -=  sizeof (bootprop_t);
328 	b->bp_next = bprops;
329 	bprops = b;
330 
331 	/*
332 	 * follow by name and ending zero byte
333 	 */
334 	b->bp_name = curr_page;
335 	bcopy(name, curr_page, nlen);
336 	curr_page += nlen;
337 	*curr_page++ = 0;
338 	curr_space -= nlen + 1;
339 
340 	/*
341 	 * copy in value, but no ending zero byte
342 	 */
343 	b->bp_value = curr_page;
344 	b->bp_vlen = vlen;
345 	if (vlen > 0) {
346 		bcopy(value, curr_page, vlen);
347 		curr_page += vlen;
348 		curr_space -= vlen;
349 	}
350 
351 	/*
352 	 * align new values of curr_page, curr_space
353 	 */
354 	while (curr_space & 0xf) {
355 		++curr_page;
356 		--curr_space;
357 	}
358 }
359 
360 static void
361 bsetprops(char *name, char *value)
362 {
363 	bsetprop(name, strlen(name), value, strlen(value) + 1);
364 }
365 
366 static void
367 bsetprop64(char *name, uint64_t value)
368 {
369 	bsetprop(name, strlen(name), (void *)&value, sizeof (value));
370 }
371 
372 static void
373 bsetpropsi(char *name, int value)
374 {
375 	char prop_val[32];
376 
377 	(void) snprintf(prop_val, sizeof (prop_val), "%d", value);
378 	bsetprops(name, prop_val);
379 }
380 
381 /*
382  * to find the size of the buffer to allocate
383  */
384 /*ARGSUSED*/
385 int
386 do_bsys_getproplen(bootops_t *bop, char *name)
387 {
388 	bootprop_t *b;
389 
390 	for (b = bprops; b; b = b->bp_next) {
391 		if (strcmp(name, b->bp_name) != 0)
392 			continue;
393 		return (b->bp_vlen);
394 	}
395 	return (-1);
396 }
397 
398 /*
399  * get the value associated with this name
400  */
401 /*ARGSUSED*/
402 int
403 do_bsys_getprop(bootops_t *bop, char *name, void *value)
404 {
405 	bootprop_t *b;
406 
407 	for (b = bprops; b; b = b->bp_next) {
408 		if (strcmp(name, b->bp_name) != 0)
409 			continue;
410 		bcopy(b->bp_value, value, b->bp_vlen);
411 		return (0);
412 	}
413 	return (-1);
414 }
415 
416 /*
417  * get the name of the next property in succession from the standalone
418  */
419 /*ARGSUSED*/
420 static char *
421 do_bsys_nextprop(bootops_t *bop, char *name)
422 {
423 	bootprop_t *b;
424 
425 	/*
426 	 * A null name is a special signal for the 1st boot property
427 	 */
428 	if (name == NULL || strlen(name) == 0) {
429 		if (bprops == NULL)
430 			return (NULL);
431 		return (bprops->bp_name);
432 	}
433 
434 	for (b = bprops; b; b = b->bp_next) {
435 		if (name != b->bp_name)
436 			continue;
437 		b = b->bp_next;
438 		if (b == NULL)
439 			return (NULL);
440 		return (b->bp_name);
441 	}
442 	return (NULL);
443 }
444 
445 /*
446  * Parse numeric value from a string. Understands decimal, hex, octal, - and ~
447  */
448 static int
449 parse_value(char *p, uint64_t *retval)
450 {
451 	int adjust = 0;
452 	uint64_t tmp = 0;
453 	int digit;
454 	int radix = 10;
455 
456 	*retval = 0;
457 	if (*p == '-' || *p == '~')
458 		adjust = *p++;
459 
460 	if (*p == '0') {
461 		++p;
462 		if (*p == 0)
463 			return (0);
464 		if (*p == 'x' || *p == 'X') {
465 			radix = 16;
466 			++p;
467 		} else {
468 			radix = 8;
469 			++p;
470 		}
471 	}
472 	while (*p) {
473 		if ('0' <= *p && *p <= '9')
474 			digit = *p - '0';
475 		else if ('a' <= *p && *p <= 'f')
476 			digit = 10 + *p - 'a';
477 		else if ('A' <= *p && *p <= 'F')
478 			digit = 10 + *p - 'A';
479 		else
480 			return (-1);
481 		if (digit >= radix)
482 			return (-1);
483 		tmp = tmp * radix + digit;
484 		++p;
485 	}
486 	if (adjust == '-')
487 		tmp = -tmp;
488 	else if (adjust == '~')
489 		tmp = ~tmp;
490 	*retval = tmp;
491 	return (0);
492 }
493 
494 /*
495  * 2nd part of building the table of boot properties. This includes:
496  * - values from /boot/solaris/bootenv.rc (ie. eeprom(1m) values)
497  *
498  * lines look like one of:
499  * ^$
500  * ^# comment till end of line
501  * setprop name 'value'
502  * setprop name value
503  * setprop name "value"
504  *
505  * we do single character I/O since this is really just looking at memory
506  */
507 void
508 boot_prop_finish(void)
509 {
510 	int fd;
511 	char *line;
512 	int c;
513 	int bytes_read;
514 	char *name;
515 	int n_len;
516 	char *value;
517 	int v_len;
518 	char *inputdev;	/* these override the command line if serial ports */
519 	char *outputdev;
520 	char *consoledev;
521 	uint64_t lvalue;
522 	int use_xencons = 0;
523 
524 #ifdef __xpv
525 	if (!DOMAIN_IS_INITDOMAIN(xen_info))
526 		use_xencons = 1;
527 #endif /* __xpv */
528 
529 	DBG_MSG("Opening /boot/solaris/bootenv.rc\n");
530 	fd = BRD_OPEN(bfs_ops, "/boot/solaris/bootenv.rc", 0);
531 	DBG(fd);
532 
533 	line = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE);
534 	while (fd >= 0) {
535 
536 		/*
537 		 * get a line
538 		 */
539 		for (c = 0; ; ++c) {
540 			bytes_read = BRD_READ(bfs_ops, fd, line + c, 1);
541 			if (bytes_read == 0) {
542 				if (c == 0)
543 					goto done;
544 				break;
545 			}
546 			if (line[c] == '\n')
547 				break;
548 		}
549 		line[c] = 0;
550 
551 		/*
552 		 * ignore comment lines
553 		 */
554 		c = 0;
555 		while (ISSPACE(line[c]))
556 			++c;
557 		if (line[c] == '#' || line[c] == 0)
558 			continue;
559 
560 		/*
561 		 * must have "setprop " or "setprop\t"
562 		 */
563 		if (strncmp(line + c, "setprop ", 8) != 0 &&
564 		    strncmp(line + c, "setprop\t", 8) != 0)
565 			continue;
566 		c += 8;
567 		while (ISSPACE(line[c]))
568 			++c;
569 		if (line[c] == 0)
570 			continue;
571 
572 		/*
573 		 * gather up the property name
574 		 */
575 		name = line + c;
576 		n_len = 0;
577 		while (line[c] && !ISSPACE(line[c]))
578 			++n_len, ++c;
579 
580 		/*
581 		 * gather up the value, if any
582 		 */
583 		value = "";
584 		v_len = 0;
585 		while (ISSPACE(line[c]))
586 			++c;
587 		if (line[c] != 0) {
588 			value = line + c;
589 			while (line[c] && !ISSPACE(line[c]))
590 				++v_len, ++c;
591 		}
592 
593 		if (v_len >= 2 && value[0] == value[v_len - 1] &&
594 		    (value[0] == '\'' || value[0] == '"')) {
595 			++value;
596 			v_len -= 2;
597 		}
598 		name[n_len] = 0;
599 		if (v_len > 0)
600 			value[v_len] = 0;
601 		else
602 			continue;
603 
604 		/*
605 		 * ignore "boot-file" property, it's now meaningless
606 		 */
607 		if (strcmp(name, "boot-file") == 0)
608 			continue;
609 		if (strcmp(name, "boot-args") == 0 &&
610 		    strlen(boot_args) > 0)
611 			continue;
612 
613 		/*
614 		 * If a property was explicitly set on the command line
615 		 * it will override a setting in bootenv.rc
616 		 */
617 		if (do_bsys_getproplen(NULL, name) > 0)
618 			continue;
619 
620 		bsetprop(name, n_len, value, v_len + 1);
621 	}
622 done:
623 	if (fd >= 0)
624 		BRD_CLOSE(bfs_ops, fd);
625 
626 	/*
627 	 * Check if we have to limit the boot time allocator
628 	 */
629 	if (do_bsys_getproplen(NULL, "physmem") != -1 &&
630 	    do_bsys_getprop(NULL, "physmem", line) >= 0 &&
631 	    parse_value(line, &lvalue) != -1) {
632 		if (0 < lvalue && (lvalue < physmem || physmem == 0)) {
633 			physmem = (pgcnt_t)lvalue;
634 			DBG(physmem);
635 		}
636 	}
637 	early_allocation = 0;
638 
639 	/*
640 	 * check to see if we have to override the default value of the console
641 	 */
642 	if (!use_xencons) {
643 		inputdev = line;
644 		v_len = do_bsys_getproplen(NULL, "input-device");
645 		if (v_len > 0)
646 			(void) do_bsys_getprop(NULL, "input-device", inputdev);
647 		else
648 			v_len = 0;
649 		inputdev[v_len] = 0;
650 
651 		outputdev = inputdev + v_len + 1;
652 		v_len = do_bsys_getproplen(NULL, "output-device");
653 		if (v_len > 0)
654 			(void) do_bsys_getprop(NULL, "output-device",
655 			    outputdev);
656 		else
657 			v_len = 0;
658 		outputdev[v_len] = 0;
659 
660 		consoledev = outputdev + v_len + 1;
661 		v_len = do_bsys_getproplen(NULL, "console");
662 		if (v_len > 0)
663 			(void) do_bsys_getprop(NULL, "console", consoledev);
664 		else
665 			v_len = 0;
666 		consoledev[v_len] = 0;
667 		bcons_init2(inputdev, outputdev, consoledev);
668 	} else {
669 		/*
670 		 * Ensure console property exists
671 		 * If not create it as "hypervisor"
672 		 */
673 		v_len = do_bsys_getproplen(NULL, "console");
674 		if (v_len < 0)
675 			bsetprops("console", "hypervisor");
676 		inputdev = outputdev = consoledev = "hypervisor";
677 		bcons_init2(inputdev, outputdev, consoledev);
678 	}
679 
680 	if (strstr((char *)xbootp->bi_cmdline, "prom_debug") || kbm_debug) {
681 		value = line;
682 		bop_printf(NULL, "\nBoot properties:\n");
683 		name = "";
684 		while ((name = do_bsys_nextprop(NULL, name)) != NULL) {
685 			bop_printf(NULL, "\t0x%p %s = ", (void *)name, name);
686 			(void) do_bsys_getprop(NULL, name, value);
687 			v_len = do_bsys_getproplen(NULL, name);
688 			bop_printf(NULL, "len=%d ", v_len);
689 			value[v_len] = 0;
690 			bop_printf(NULL, "%s\n", value);
691 		}
692 	}
693 }
694 
695 /*
696  * print formatted output
697  */
698 /*PRINTFLIKE2*/
699 /*ARGSUSED*/
700 void
701 bop_printf(bootops_t *bop, char *fmt, ...)
702 {
703 	va_list	ap;
704 
705 	if (have_console == 0)
706 		return;
707 
708 	va_start(ap, fmt);
709 	(void) vsnprintf(buffer, BUFFERSIZE, fmt, ap);
710 	va_end(ap);
711 	PUT_STRING(buffer);
712 }
713 
714 /*
715  * Another panic() variant; this one can be used even earlier during boot than
716  * prom_panic().
717  */
718 /*PRINTFLIKE1*/
719 void
720 bop_panic(char *fmt, ...)
721 {
722 	va_list ap;
723 
724 	va_start(ap, fmt);
725 	bop_printf(NULL, fmt, ap);
726 	va_end(ap);
727 
728 	bop_printf(NULL, "\nPress any key to reboot.\n");
729 	(void) bcons_getchar();
730 	bop_printf(NULL, "Resetting...\n");
731 	pc_reset();
732 }
733 
734 /*
735  * Do a real mode interrupt BIOS call
736  */
737 typedef struct bios_regs {
738 	unsigned short ax, bx, cx, dx, si, di, bp, es, ds;
739 } bios_regs_t;
740 typedef int (*bios_func_t)(int, bios_regs_t *);
741 
742 /*ARGSUSED*/
743 static void
744 do_bsys_doint(bootops_t *bop, int intnum, struct bop_regs *rp)
745 {
746 #if defined(__xpv)
747 	prom_panic("unsupported call to BOP_DOINT()\n");
748 #else	/* __xpv */
749 	static int firsttime = 1;
750 	bios_func_t bios_func = (bios_func_t)(void *)(uintptr_t)0x5000;
751 	bios_regs_t br;
752 
753 	/*
754 	 * The first time we do this, we have to copy the pre-packaged
755 	 * low memory bios call code image into place.
756 	 */
757 	if (firsttime) {
758 		extern char bios_image[];
759 		extern uint32_t bios_size;
760 
761 		bcopy(bios_image, (void *)bios_func, bios_size);
762 		firsttime = 0;
763 	}
764 
765 	br.ax = rp->eax.word.ax;
766 	br.bx = rp->ebx.word.bx;
767 	br.cx = rp->ecx.word.cx;
768 	br.dx = rp->edx.word.dx;
769 	br.bp = rp->ebp.word.bp;
770 	br.si = rp->esi.word.si;
771 	br.di = rp->edi.word.di;
772 	br.ds = rp->ds;
773 	br.es = rp->es;
774 
775 	DBG_MSG("Doing BIOS call...");
776 	rp->eflags = bios_func(intnum, &br);
777 	DBG_MSG("done\n");
778 
779 	rp->eax.word.ax = br.ax;
780 	rp->ebx.word.bx = br.bx;
781 	rp->ecx.word.cx = br.cx;
782 	rp->edx.word.dx = br.dx;
783 	rp->ebp.word.bp = br.bp;
784 	rp->esi.word.si = br.si;
785 	rp->edi.word.di = br.di;
786 	rp->ds = br.ds;
787 	rp->es = br.es;
788 #endif /* __xpv */
789 }
790 
791 static struct boot_syscalls bop_sysp = {
792 	bcons_getchar,
793 	bcons_putchar,
794 	bcons_ischar,
795 };
796 
797 static char *whoami;
798 
799 #define	BUFLEN	64
800 
801 #if defined(__xpv)
802 
803 static char namebuf[32];
804 
805 static void
806 xen_parse_props(char *s, char *prop_map[], int n_prop)
807 {
808 	char **prop_name = prop_map;
809 	char *cp = s, *scp;
810 
811 	do {
812 		scp = cp;
813 		while ((*cp != NULL) && (*cp != ':'))
814 			cp++;
815 
816 		if ((scp != cp) && (*prop_name != NULL)) {
817 			*cp = NULL;
818 			bsetprops(*prop_name, scp);
819 		}
820 
821 		cp++;
822 		prop_name++;
823 		n_prop--;
824 	} while (n_prop > 0);
825 }
826 
827 #define	VBDPATHLEN	64
828 
829 /*
830  * parse the 'xpv-root' property to create properties used by
831  * ufs_mountroot.
832  */
833 static void
834 xen_vbdroot_props(char *s)
835 {
836 	char vbdpath[VBDPATHLEN] = "/xpvd/xdf@";
837 	const char lnamefix[] = "/dev/dsk/c0d";
838 	char *pnp;
839 	char *prop_p;
840 	char mi;
841 	short minor;
842 	long addr = 0;
843 
844 	pnp = vbdpath + strlen(vbdpath);
845 	prop_p = s + strlen(lnamefix);
846 	while ((*prop_p != '\0') && (*prop_p != 's') && (*prop_p != 'p'))
847 		addr = addr * 10 + *prop_p++ - '0';
848 	(void) snprintf(pnp, VBDPATHLEN, "%lx", addr);
849 	pnp = vbdpath + strlen(vbdpath);
850 	if (*prop_p == 's')
851 		mi = 'a';
852 	else if (*prop_p == 'p')
853 		mi = 'q';
854 	else
855 		ASSERT(0); /* shouldn't be here */
856 	prop_p++;
857 	ASSERT(*prop_p != '\0');
858 	if (ISDIGIT(*prop_p)) {
859 		minor = *prop_p - '0';
860 		prop_p++;
861 		if (ISDIGIT(*prop_p)) {
862 			minor = minor * 10 + *prop_p - '0';
863 		}
864 	} else {
865 		/* malformed root path, use 0 as default */
866 		minor = 0;
867 	}
868 	ASSERT(minor < 16); /* at most 16 partitions */
869 	mi += minor;
870 	*pnp++ = ':';
871 	*pnp++ = mi;
872 	*pnp++ = '\0';
873 	bsetprops("fstype", "ufs");
874 	bsetprops("bootpath", vbdpath);
875 
876 	DBG_MSG("VBD bootpath set to ");
877 	DBG_MSG(vbdpath);
878 	DBG_MSG("\n");
879 }
880 
881 /*
882  * parse the xpv-nfsroot property to create properties used by
883  * nfs_mountroot.
884  */
885 static void
886 xen_nfsroot_props(char *s)
887 {
888 	char *prop_map[] = {
889 		BP_SERVER_IP,	/* server IP address */
890 		BP_SERVER_NAME,	/* server hostname */
891 		BP_SERVER_PATH,	/* root path */
892 	};
893 	int n_prop = sizeof (prop_map) / sizeof (prop_map[0]);
894 
895 	bsetprop("fstype", 6, "nfsdyn", 7);
896 
897 	xen_parse_props(s, prop_map, n_prop);
898 
899 	/*
900 	 * If a server name wasn't specified, use a default.
901 	 */
902 	if (do_bsys_getproplen(NULL, BP_SERVER_NAME) == -1)
903 		bsetprops(BP_SERVER_NAME, "unknown");
904 }
905 
906 /*
907  * Extract our IP address, etc. from the "xpv-ip" property.
908  */
909 static void
910 xen_ip_props(char *s)
911 {
912 	char *prop_map[] = {
913 		BP_HOST_IP,		/* IP address */
914 		NULL,			/* NFS server IP address (ignored in */
915 					/* favour of xpv-nfsroot) */
916 		BP_ROUTER_IP,		/* IP gateway */
917 		BP_SUBNET_MASK,		/* IP subnet mask */
918 		"xpv-hostname",		/* hostname (ignored) */
919 		BP_NETWORK_INTERFACE,	/* interface name */
920 		"xpv-hcp",		/* host configuration protocol */
921 	};
922 	int n_prop = sizeof (prop_map) / sizeof (prop_map[0]);
923 	char ifname[IFNAMSIZ];
924 
925 	xen_parse_props(s, prop_map, n_prop);
926 
927 	/*
928 	 * A Linux dom0 administrator expects all interfaces to be
929 	 * called "ethX", which is not the case here.
930 	 *
931 	 * If the interface name specified is "eth0", presume that
932 	 * this is really intended to be "xnf0" (the first domU ->
933 	 * dom0 interface for this domain).
934 	 */
935 	if ((do_bsys_getprop(NULL, BP_NETWORK_INTERFACE, ifname) == 0) &&
936 	    (strcmp("eth0", ifname) == 0)) {
937 		bsetprops(BP_NETWORK_INTERFACE, "xnf0");
938 		bop_printf(NULL,
939 		    "network interface name 'eth0' replaced with 'xnf0'\n");
940 	}
941 }
942 
943 #else	/* __xpv */
944 
945 static void
946 setup_rarp_props(struct sol_netinfo *sip)
947 {
948 	char buf[BUFLEN];	/* to hold ip/mac addrs */
949 	uint8_t *val;
950 
951 	val = (uint8_t *)&sip->sn_ciaddr;
952 	(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
953 	    val[0], val[1], val[2], val[3]);
954 	bsetprops(BP_HOST_IP, buf);
955 
956 	val = (uint8_t *)&sip->sn_siaddr;
957 	(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
958 	    val[0], val[1], val[2], val[3]);
959 	bsetprops(BP_SERVER_IP, buf);
960 
961 	if (sip->sn_giaddr != 0) {
962 		val = (uint8_t *)&sip->sn_giaddr;
963 		(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
964 		    val[0], val[1], val[2], val[3]);
965 		bsetprops(BP_ROUTER_IP, buf);
966 	}
967 
968 	if (sip->sn_netmask != 0) {
969 		val = (uint8_t *)&sip->sn_netmask;
970 		(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
971 		    val[0], val[1], val[2], val[3]);
972 		bsetprops(BP_SUBNET_MASK, buf);
973 	}
974 
975 	if (sip->sn_mactype != 4 || sip->sn_maclen != 6) {
976 		bop_printf(NULL, "unsupported mac type %d, mac len %d\n",
977 		    sip->sn_mactype, sip->sn_maclen);
978 	} else {
979 		val = sip->sn_macaddr;
980 		(void) snprintf(buf, BUFLEN, "%x:%x:%x:%x:%x:%x",
981 		    val[0], val[1], val[2], val[3], val[4], val[5]);
982 		bsetprops(BP_BOOT_MAC, buf);
983 	}
984 }
985 
986 #endif	/* __xpv */
987 
988 /*
989  * 1st pass at building the table of boot properties. This includes:
990  * - values set on the command line: -B a=x,b=y,c=z ....
991  * - known values we just compute (ie. from xbootp)
992  * - values from /boot/solaris/bootenv.rc (ie. eeprom(1m) values)
993  *
994  * the grub command line looked like:
995  * kernel boot-file [-B prop=value[,prop=value]...] [boot-args]
996  *
997  * whoami is the same as boot-file
998  */
999 static void
1000 build_boot_properties(void)
1001 {
1002 	char *name;
1003 	int name_len;
1004 	char *value;
1005 	int value_len;
1006 	struct boot_modules *bm;
1007 	char *propbuf;
1008 	int quoted = 0;
1009 	int boot_arg_len;
1010 #ifndef __xpv
1011 	static int stdout_val = 0;
1012 	uchar_t boot_device;
1013 	char str[3];
1014 	multiboot_info_t *mbi;
1015 	int netboot;
1016 	struct sol_netinfo *sip;
1017 #endif
1018 
1019 	/*
1020 	 * These have to be done first, so that kobj_mount_root() works
1021 	 */
1022 	DBG_MSG("Building boot properties\n");
1023 	propbuf = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, 0);
1024 	DBG((uintptr_t)propbuf);
1025 	if (xbootp->bi_module_cnt > 0) {
1026 		bm = xbootp->bi_modules;
1027 		bsetprop64("ramdisk_start", (uint64_t)(uintptr_t)bm->bm_addr);
1028 		bsetprop64("ramdisk_end", (uint64_t)(uintptr_t)bm->bm_addr +
1029 		    bm->bm_size);
1030 	}
1031 
1032 	DBG_MSG("Parsing command line for boot properties\n");
1033 	value = xbootp->bi_cmdline;
1034 
1035 	/*
1036 	 * allocate memory to collect boot_args into
1037 	 */
1038 	boot_arg_len = strlen(xbootp->bi_cmdline) + 1;
1039 	boot_args = do_bsys_alloc(NULL, NULL, boot_arg_len, MMU_PAGESIZE);
1040 	boot_args[0] = 0;
1041 	boot_arg_len = 0;
1042 
1043 #ifdef __xpv
1044 	/*
1045 	 * Xen puts a lot of device information in front of the kernel name
1046 	 * let's grab them and make them boot properties.  The first
1047 	 * string w/o an "=" in it will be the boot-file property.
1048 	 */
1049 	(void) strcpy(namebuf, "xpv-");
1050 	for (;;) {
1051 		/*
1052 		 * get to next property
1053 		 */
1054 		while (ISSPACE(*value))
1055 			++value;
1056 		name = value;
1057 		/*
1058 		 * look for an "="
1059 		 */
1060 		while (*value && !ISSPACE(*value) && *value != '=') {
1061 			value++;
1062 		}
1063 		if (*value != '=') { /* no "=" in the property */
1064 			value = name;
1065 			break;
1066 		}
1067 		name_len = value - name;
1068 		value_len = 0;
1069 		/*
1070 		 * skip over the "="
1071 		 */
1072 		value++;
1073 		while (value[value_len] && !ISSPACE(value[value_len])) {
1074 			++value_len;
1075 		}
1076 		/*
1077 		 * build property name with "xpv-" prefix
1078 		 */
1079 		if (name_len + 4 > 32) { /* skip if name too long */
1080 			value += value_len;
1081 			continue;
1082 		}
1083 		bcopy(name, &namebuf[4], name_len);
1084 		name_len += 4;
1085 		namebuf[name_len] = 0;
1086 		bcopy(value, propbuf, value_len);
1087 		propbuf[value_len] = 0;
1088 		bsetprops(namebuf, propbuf);
1089 
1090 		/*
1091 		 * xpv-root is set to the logical disk name of the xen
1092 		 * VBD when booting from a disk-based filesystem.
1093 		 */
1094 		if (strcmp(namebuf, "xpv-root") == 0)
1095 			xen_vbdroot_props(propbuf);
1096 		/*
1097 		 * While we're here, if we have a "xpv-nfsroot" property
1098 		 * then we need to set "fstype" to "nfsdyn" so we mount
1099 		 * our root from the nfs server.  Also parse the xpv-nfsroot
1100 		 * property to create the properties that nfs_mountroot will
1101 		 * need to find the root and mount it.
1102 		 */
1103 		if (strcmp(namebuf, "xpv-nfsroot") == 0)
1104 			xen_nfsroot_props(propbuf);
1105 
1106 		if (strcmp(namebuf, "xpv-ip") == 0)
1107 			xen_ip_props(propbuf);
1108 		value += value_len;
1109 	}
1110 #endif
1111 
1112 	while (ISSPACE(*value))
1113 		++value;
1114 	/*
1115 	 * value now points at the boot-file
1116 	 */
1117 	value_len = 0;
1118 	while (value[value_len] && !ISSPACE(value[value_len]))
1119 		++value_len;
1120 	if (value_len > 0) {
1121 		whoami = propbuf;
1122 		bcopy(value, whoami, value_len);
1123 		whoami[value_len] = 0;
1124 		bsetprops("boot-file", whoami);
1125 		/*
1126 		 * strip leading path stuff from whoami, so running from
1127 		 * PXE/miniroot makes sense.
1128 		 */
1129 		if (strstr(whoami, "/platform/") != NULL)
1130 			whoami = strstr(whoami, "/platform/");
1131 		bsetprops("whoami", whoami);
1132 	}
1133 
1134 	/*
1135 	 * Values forcibly set boot properties on the command line via -B.
1136 	 * Allow use of quotes in values. Other stuff goes on kernel
1137 	 * command line.
1138 	 */
1139 	name = value + value_len;
1140 	while (*name != 0) {
1141 		/*
1142 		 * anything not " -B" is copied to the command line
1143 		 */
1144 		if (!ISSPACE(name[0]) || name[1] != '-' || name[2] != 'B') {
1145 			boot_args[boot_arg_len++] = *name;
1146 			boot_args[boot_arg_len] = 0;
1147 			++name;
1148 			continue;
1149 		}
1150 
1151 		/*
1152 		 * skip the " -B" and following white space
1153 		 */
1154 		name += 3;
1155 		while (ISSPACE(*name))
1156 			++name;
1157 		while (*name && !ISSPACE(*name)) {
1158 			value = strstr(name, "=");
1159 			if (value == NULL)
1160 				break;
1161 			name_len = value - name;
1162 			++value;
1163 			value_len = 0;
1164 			quoted = 0;
1165 			for (; ; ++value_len) {
1166 				if (!value[value_len])
1167 					break;
1168 
1169 				/*
1170 				 * is this value quoted?
1171 				 */
1172 				if (value_len == 0 &&
1173 				    (value[0] == '\'' || value[0] == '"')) {
1174 					quoted = value[0];
1175 					++value_len;
1176 				}
1177 
1178 				/*
1179 				 * In the quote accept any character,
1180 				 * but look for ending quote.
1181 				 */
1182 				if (quoted) {
1183 					if (value[value_len] == quoted)
1184 						quoted = 0;
1185 					continue;
1186 				}
1187 
1188 				/*
1189 				 * a comma or white space ends the value
1190 				 */
1191 				if (value[value_len] == ',' ||
1192 				    ISSPACE(value[value_len]))
1193 					break;
1194 			}
1195 
1196 			if (value_len == 0) {
1197 				bsetprop(name, name_len, "true", 5);
1198 			} else {
1199 				char *v = value;
1200 				int l = value_len;
1201 				if (v[0] == v[l - 1] &&
1202 				    (v[0] == '\'' || v[0] == '"')) {
1203 					++v;
1204 					l -= 2;
1205 				}
1206 				bcopy(v, propbuf, l);
1207 				propbuf[l] = '\0';
1208 				bsetprop(name, name_len, propbuf,
1209 				    l + 1);
1210 			}
1211 			name = value + value_len;
1212 			while (*name == ',')
1213 				++name;
1214 		}
1215 	}
1216 
1217 	/*
1218 	 * set boot-args property
1219 	 */
1220 	bsetprops("boot-args", boot_args);
1221 
1222 #ifndef __xpv
1223 	/*
1224 	 * set the BIOS boot device from GRUB
1225 	 */
1226 	netboot = 0;
1227 	mbi = xbootp->bi_mb_info;
1228 	if (mbi != NULL && mbi->flags & 0x2) {
1229 		boot_device = mbi->boot_device >> 24;
1230 		if (boot_device == 0x20)
1231 			netboot++;
1232 		str[0] = (boot_device >> 4) + '0';
1233 		str[1] = (boot_device & 0xf) + '0';
1234 		str[2] = 0;
1235 		bsetprops("bios-boot-device", str);
1236 	} else {
1237 		netboot = 1;
1238 	}
1239 
1240 	/*
1241 	 * In the netboot case, drives_info is overloaded with the dhcp ack.
1242 	 * This is not multiboot compliant and requires special pxegrub!
1243 	 */
1244 	if (netboot && mbi->drives_length != 0) {
1245 		sip = (struct sol_netinfo *)(uintptr_t)mbi->drives_addr;
1246 		if (sip->sn_infotype == SN_TYPE_BOOTP)
1247 			bsetprop("bootp-response", sizeof ("bootp-response"),
1248 			    (void *)(uintptr_t)mbi->drives_addr,
1249 			    mbi->drives_length);
1250 		else if (sip->sn_infotype == SN_TYPE_BOOTP)
1251 			setup_rarp_props(sip);
1252 	}
1253 	bsetprop("stdout", strlen("stdout"),
1254 	    &stdout_val, sizeof (stdout_val));
1255 #endif /* __xpv */
1256 
1257 	/*
1258 	 * more conjured up values for made up things....
1259 	 */
1260 #if defined(__xpv)
1261 	bsetprops("mfg-name", "i86xpv");
1262 	bsetprops("impl-arch-name", "i86xpv");
1263 #else
1264 	bsetprops("mfg-name", "i86pc");
1265 	bsetprops("impl-arch-name", "i86pc");
1266 #endif
1267 
1268 	/*
1269 	 * Build firmware-provided system properties
1270 	 */
1271 	build_firmware_properties();
1272 
1273 	/*
1274 	 * XXPV
1275 	 *
1276 	 * Find out what these are:
1277 	 * - cpuid_feature_ecx_include
1278 	 * - cpuid_feature_ecx_exclude
1279 	 * - cpuid_feature_edx_include
1280 	 * - cpuid_feature_edx_exclude
1281 	 *
1282 	 * Find out what these are in multiboot:
1283 	 * - bootp-response
1284 	 * - netdev-path
1285 	 * - fstype
1286 	 */
1287 }
1288 
1289 #ifdef __xpv
1290 /*
1291  * Under the Hypervisor, memory usable for DMA may be scarce. One
1292  * very likely large pool of DMA friendly memory is occupied by
1293  * the boot_archive, as it was loaded by grub into low MFNs.
1294  *
1295  * Here we free up that memory by copying the boot archive to what are
1296  * likely higher MFN pages and then swapping the mfn/pfn mappings.
1297  */
1298 #define	PFN_2GIG	0x80000
1299 static void
1300 relocate_boot_archive(void)
1301 {
1302 	mfn_t max_mfn = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
1303 	struct boot_modules *bm = xbootp->bi_modules;
1304 	uintptr_t va;
1305 	pfn_t va_pfn;
1306 	mfn_t va_mfn;
1307 	caddr_t copy;
1308 	pfn_t copy_pfn;
1309 	mfn_t copy_mfn;
1310 	size_t	len;
1311 	int slop;
1312 	int total = 0;
1313 	int relocated = 0;
1314 	int mmu_update_return;
1315 	mmu_update_t t[2];
1316 	x86pte_t pte;
1317 
1318 	/*
1319 	 * If all MFN's are below 2Gig, don't bother doing this.
1320 	 */
1321 	if (max_mfn < PFN_2GIG)
1322 		return;
1323 	if (xbootp->bi_module_cnt < 1) {
1324 		DBG_MSG("no boot_archive!");
1325 		return;
1326 	}
1327 
1328 	DBG_MSG("moving boot_archive to high MFN memory\n");
1329 	va = (uintptr_t)bm->bm_addr;
1330 	len = bm->bm_size;
1331 	slop = va & MMU_PAGEOFFSET;
1332 	if (slop) {
1333 		va += MMU_PAGESIZE - slop;
1334 		len -= MMU_PAGESIZE - slop;
1335 	}
1336 	len = P2ALIGN(len, MMU_PAGESIZE);
1337 
1338 	/*
1339 	 * Go through all boot_archive pages, swapping any low MFN pages
1340 	 * with memory at next_phys.
1341 	 */
1342 	while (len != 0) {
1343 		++total;
1344 		va_pfn = mmu_btop(va - ONE_GIG);
1345 		va_mfn = mfn_list[va_pfn];
1346 		if (mfn_list[va_pfn] < PFN_2GIG) {
1347 			copy = kbm_remap_window(next_phys, 1);
1348 			bcopy((void *)va, copy, MMU_PAGESIZE);
1349 			copy_pfn = mmu_btop(next_phys);
1350 			copy_mfn = mfn_list[copy_pfn];
1351 
1352 			pte = mfn_to_ma(copy_mfn) | PT_NOCONSIST | PT_VALID;
1353 			if (HYPERVISOR_update_va_mapping(va, pte,
1354 			    UVMF_INVLPG | UVMF_LOCAL))
1355 				bop_panic("relocate_boot_archive():  "
1356 				    "HYPERVISOR_update_va_mapping() failed");
1357 
1358 			mfn_list[va_pfn] = copy_mfn;
1359 			mfn_list[copy_pfn] = va_mfn;
1360 
1361 			t[0].ptr = mfn_to_ma(copy_mfn) | MMU_MACHPHYS_UPDATE;
1362 			t[0].val = va_pfn;
1363 			t[1].ptr = mfn_to_ma(va_mfn) | MMU_MACHPHYS_UPDATE;
1364 			t[1].val = copy_pfn;
1365 			if (HYPERVISOR_mmu_update(t, 2, &mmu_update_return,
1366 			    DOMID_SELF) != 0 || mmu_update_return != 2)
1367 				bop_panic("relocate_boot_archive():  "
1368 				    "HYPERVISOR_mmu_update() failed");
1369 
1370 			next_phys += MMU_PAGESIZE;
1371 			++relocated;
1372 		}
1373 		len -= MMU_PAGESIZE;
1374 		va += MMU_PAGESIZE;
1375 	}
1376 	DBG_MSG("Relocated pages:\n");
1377 	DBG(relocated);
1378 	DBG_MSG("Out of total pages:\n");
1379 	DBG(total);
1380 }
1381 #endif /* __xpv */
1382 
1383 #if !defined(__xpv)
1384 /*
1385  * Install a temporary IDT that lets us catch errors in the boot time code.
1386  * We shouldn't get any faults at all while this is installed, so we'll
1387  * just generate a traceback and exit.
1388  */
1389 #ifdef __amd64
1390 static const int bcode_sel = B64CODE_SEL;
1391 #else
1392 static const int bcode_sel = B32CODE_SEL;
1393 #endif
1394 
1395 /*
1396  * simple description of a stack frame (args are 32 bit only currently)
1397  */
1398 typedef struct bop_frame {
1399 	struct bop_frame *old_frame;
1400 	pc_t retaddr;
1401 	long arg[1];
1402 } bop_frame_t;
1403 
1404 void
1405 bop_traceback(bop_frame_t *frame)
1406 {
1407 	pc_t pc;
1408 	int cnt;
1409 	int a;
1410 	char *ksym;
1411 	ulong_t off;
1412 
1413 	bop_printf(NULL, "Stack traceback:\n");
1414 	for (cnt = 0; cnt < 30; ++cnt) {	/* up to 30 frames */
1415 		pc = frame->retaddr;
1416 		if (pc == 0)
1417 			break;
1418 		ksym = kobj_getsymname(pc, &off);
1419 		if (ksym)
1420 			bop_printf(NULL, "  %s+%lx", ksym, off);
1421 		else
1422 			bop_printf(NULL, "  0x%lx", pc);
1423 
1424 		frame = frame->old_frame;
1425 		if (frame == 0) {
1426 			bop_printf(NULL, "\n");
1427 			break;
1428 		}
1429 		for (a = 0; a < 6; ++a) {	/* try for 6 args */
1430 #if defined(__i386)
1431 			if ((void *)&frame->arg[a] == (void *)frame->old_frame)
1432 				break;
1433 			if (a == 0)
1434 				bop_printf(NULL, "(");
1435 			else
1436 				bop_printf(NULL, ",");
1437 			bop_printf(NULL, "0x%lx", frame->arg[a]);
1438 #endif
1439 		}
1440 		bop_printf(NULL, ")\n");
1441 	}
1442 }
1443 
1444 struct trapframe {
1445 	ulong_t frame_ptr;	/* %[er]bp pushed by our code */
1446 	ulong_t error_code;	/* optional */
1447 	ulong_t inst_ptr;
1448 	ulong_t code_seg;
1449 	ulong_t flags_reg;
1450 #ifdef __amd64
1451 	ulong_t stk_ptr;
1452 	ulong_t stk_seg;
1453 #endif
1454 };
1455 
1456 void
1457 bop_trap(struct trapframe *tf)
1458 {
1459 	bop_frame_t fakeframe;
1460 	static int depth = 0;
1461 
1462 	/*
1463 	 * Check for an infinite loop of traps.
1464 	 */
1465 	if (++depth > 2)
1466 		bop_panic("Nested trap");
1467 
1468 	/*
1469 	 * adjust the tf for optional error_code by detecting the code selector
1470 	 */
1471 	if (tf->code_seg != bcode_sel)
1472 		tf = (struct trapframe *)((uintptr_t)tf - sizeof (ulong_t));
1473 
1474 	bop_printf(NULL, "Unexpected trap\n");
1475 	bop_printf(NULL, "instruction pointer  0x%lx\n", tf->inst_ptr);
1476 	bop_printf(NULL, "error code, optional 0x%lx\n",
1477 	    tf->error_code & 0xffffffff);
1478 	bop_printf(NULL, "code segment         0x%lx\n", tf->code_seg & 0xffff);
1479 	bop_printf(NULL, "flags register       0x%lx\n", tf->flags_reg);
1480 #ifdef __amd64
1481 	bop_printf(NULL, "return %%rsp         0x%lx\n", tf->stk_ptr);
1482 	bop_printf(NULL, "return %%ss          0x%lx\n", tf->stk_seg & 0xffff);
1483 #endif
1484 	fakeframe.old_frame = (bop_frame_t *)tf->frame_ptr;
1485 	fakeframe.retaddr = (pc_t)tf->inst_ptr;
1486 	bop_printf(NULL, "Attempting stack backtrace:\n");
1487 	bop_traceback(&fakeframe);
1488 	bop_panic("unexpected trap in early boot");
1489 }
1490 
1491 extern void bop_trap_handler(void);
1492 
1493 static gate_desc_t bop_idt[NIDT];
1494 
1495 static desctbr_t bop_idt_info;
1496 
1497 static void
1498 bop_idt_init(void)
1499 {
1500 	int t;
1501 
1502 	bzero(&bop_idt, sizeof (bop_idt));
1503 	for (t = 0; t < NIDT; ++t) {
1504 		set_gatesegd(&bop_idt[t], &bop_trap_handler, bcode_sel,
1505 		    SDT_SYSIGT, TRP_KPL);
1506 	}
1507 	bop_idt_info.dtr_limit = sizeof (bop_idt) - 1;
1508 	bop_idt_info.dtr_base = (uintptr_t)&bop_idt;
1509 	wr_idtr(&bop_idt_info);
1510 }
1511 #endif
1512 
1513 /*
1514  * This is where we enter the kernel. It dummies up the boot_ops and
1515  * boot_syscalls vectors and jumps off to _kobj_boot()
1516  */
1517 void
1518 _start(struct xboot_info *xbp)
1519 {
1520 	bootops_t *bops = &bootop;
1521 	extern void _kobj_boot();
1522 
1523 	/*
1524 	 * 1st off - initialize the console for any error messages
1525 	 */
1526 	xbootp = xbp;
1527 #ifdef __xpv
1528 	HYPERVISOR_shared_info = (void *)xbootp->bi_shared_info;
1529 	xen_info = xbootp->bi_xen_start_info;
1530 #endif
1531 	bcons_init((void *)xbootp->bi_cmdline);
1532 	have_console = 1;
1533 
1534 	/*
1535 	 * enable debugging
1536 	 */
1537 	if (strstr((char *)xbootp->bi_cmdline, "kbm_debug"))
1538 		kbm_debug = 1;
1539 
1540 	DBG_MSG("\n\n*** Entered Solaris in _start() cmdline is: ");
1541 	DBG_MSG((char *)xbootp->bi_cmdline);
1542 	DBG_MSG("\n\n\n");
1543 
1544 #ifndef __xpv
1545 	/*
1546 	 * Install an IDT to catch early pagefaults (shouldn't have any).
1547 	 * Also needed for kmdb.
1548 	 */
1549 	bop_idt_init();
1550 #endif
1551 
1552 	/*
1553 	 * physavail is no longer used by startup
1554 	 */
1555 	bm.physinstalled = xbp->bi_phys_install;
1556 	bm.pcimem = xbp->bi_pcimem;
1557 	bm.physavail = NULL;
1558 
1559 	/*
1560 	 * initialize the boot time allocator
1561 	 */
1562 	next_phys = xbootp->bi_next_paddr;
1563 	DBG(next_phys);
1564 	next_virt = (uintptr_t)xbootp->bi_next_vaddr;
1565 	DBG(next_virt);
1566 	DBG_MSG("Initializing boot time memory management...");
1567 #ifdef __xpv
1568 	{
1569 		xen_platform_parameters_t p;
1570 
1571 		/* This call shouldn't fail, dboot already did it once. */
1572 		(void) HYPERVISOR_xen_version(XENVER_platform_parameters, &p);
1573 		mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
1574 		DBG(xen_virt_start);
1575 	}
1576 #endif
1577 	kbm_init(xbootp);
1578 	DBG_MSG("done\n");
1579 
1580 	/*
1581 	 * Fill in the bootops vector
1582 	 */
1583 	bops->bsys_version = BO_VERSION;
1584 	bops->boot_mem = &bm;
1585 	bops->bsys_alloc = do_bsys_alloc;
1586 	bops->bsys_free = do_bsys_free;
1587 	bops->bsys_getproplen = do_bsys_getproplen;
1588 	bops->bsys_getprop = do_bsys_getprop;
1589 	bops->bsys_nextprop = do_bsys_nextprop;
1590 	bops->bsys_printf = bop_printf;
1591 	bops->bsys_doint = do_bsys_doint;
1592 
1593 	/*
1594 	 * BOP_EALLOC() is no longer needed
1595 	 */
1596 	bops->bsys_ealloc = do_bsys_ealloc;
1597 
1598 #ifdef __xpv
1599 	/*
1600 	 * On domain 0 we need to free up some physical memory that is
1601 	 * usable for DMA. Since GRUB loaded the boot_archive, it is
1602 	 * sitting in low MFN memory. We'll relocated the boot archive
1603 	 * pages to high PFN memory.
1604 	 */
1605 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1606 		relocate_boot_archive();
1607 #endif
1608 
1609 	/*
1610 	 *
1611 	 */
1612 	DBG_MSG("Initializing boot properties:\n");
1613 	build_boot_properties();
1614 
1615 	if (strstr((char *)xbootp->bi_cmdline, "prom_debug") || kbm_debug) {
1616 		char *name;
1617 		char *value;
1618 		char *cp;
1619 		int len;
1620 
1621 		value = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE);
1622 		bop_printf(NULL, "\nBoot properties:\n");
1623 		name = "";
1624 		while ((name = do_bsys_nextprop(NULL, name)) != NULL) {
1625 			bop_printf(NULL, "\t0x%p %s = ", (void *)name, name);
1626 			(void) do_bsys_getprop(NULL, name, value);
1627 			len = do_bsys_getproplen(NULL, name);
1628 			bop_printf(NULL, "len=%d ", len);
1629 			value[len] = 0;
1630 			for (cp = value; *cp; ++cp) {
1631 				if (' ' <= *cp && *cp <= '~')
1632 					bop_printf(NULL, "%c", *cp);
1633 				else
1634 					bop_printf(NULL, "-0x%x-", *cp);
1635 			}
1636 			bop_printf(NULL, "\n");
1637 		}
1638 	}
1639 
1640 	/*
1641 	 * jump into krtld...
1642 	 */
1643 	_kobj_boot(&bop_sysp, NULL, bops, NULL);
1644 }
1645 
1646 
1647 /*ARGSUSED*/
1648 static caddr_t
1649 no_more_alloc(bootops_t *bop, caddr_t virthint, size_t size, int align)
1650 {
1651 	panic("Attempt to bsys_alloc() too late\n");
1652 	return (NULL);
1653 }
1654 
1655 /*ARGSUSED*/
1656 static void
1657 no_more_free(bootops_t *bop, caddr_t virt, size_t size)
1658 {
1659 	panic("Attempt to bsys_free() too late\n");
1660 }
1661 
1662 void
1663 bop_no_more_mem(void)
1664 {
1665 	DBG(total_bop_alloc_scratch);
1666 	DBG(total_bop_alloc_kernel);
1667 	bootops->bsys_alloc = no_more_alloc;
1668 	bootops->bsys_free = no_more_free;
1669 }
1670 
1671 
1672 #ifndef __xpv
1673 /*
1674  * Set ACPI firmware properties
1675  */
1676 
1677 static caddr_t
1678 vmap_phys(size_t length, paddr_t pa)
1679 {
1680 	paddr_t	start, end;
1681 	caddr_t	va;
1682 	size_t	len, page;
1683 
1684 	start = P2ALIGN(pa, MMU_PAGESIZE);
1685 	end = P2ROUNDUP(pa + length, MMU_PAGESIZE);
1686 	len = end - start;
1687 	va = (caddr_t)alloc_vaddr(len, MMU_PAGESIZE);
1688 	for (page = 0; page < len; page += MMU_PAGESIZE)
1689 		kbm_map((uintptr_t)va + page, start + page, 0, 0);
1690 	return (va + (pa & MMU_PAGEOFFSET));
1691 }
1692 
1693 static uint8_t
1694 checksum_table(uint8_t *tp, size_t len)
1695 {
1696 	uint8_t sum = 0;
1697 
1698 	while (len-- > 0)
1699 		sum += *tp++;
1700 
1701 	return (sum);
1702 }
1703 
1704 static int
1705 valid_rsdp(struct rsdp *rp)
1706 {
1707 
1708 	/* validate the V1.x checksum */
1709 	if (checksum_table((uint8_t *)&rp->v1, sizeof (struct rsdp_v1)) != 0)
1710 		return (0);
1711 
1712 	/* If pre-ACPI 2.0, this is a valid RSDP */
1713 	if (rp->v1.revision < 2)
1714 		return (1);
1715 
1716 	/* validate the V2.x checksum */
1717 	if (checksum_table((uint8_t *)rp, sizeof (struct rsdp)) != 0)
1718 		return (0);
1719 
1720 	return (1);
1721 }
1722 
1723 /*
1724  * Scan memory range for an RSDP;
1725  * see ACPI 3.0 Spec, 5.2.5.1
1726  */
1727 static struct rsdp *
1728 scan_rsdp(paddr_t start, paddr_t end)
1729 {
1730 	size_t len  = end - start + 1;
1731 	caddr_t ptr;
1732 
1733 	ptr = vmap_phys(len, start);
1734 	while (len > 0) {
1735 		if (strncmp(ptr, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN) == 0)
1736 			if (valid_rsdp((struct rsdp *)ptr))
1737 				return ((struct rsdp *)ptr);
1738 		ptr += 16;
1739 		len -= 16;
1740 	}
1741 
1742 	return (NULL);
1743 }
1744 
1745 /*
1746  * Refer to ACPI 3.0 Spec, section 5.2.5.1 to understand this function
1747  */
1748 static struct rsdp *
1749 find_rsdp() {
1750 	struct rsdp *rsdp;
1751 	uint16_t *ebda_seg;
1752 	paddr_t  ebda_addr;
1753 
1754 	/*
1755 	 * Get the EBDA segment and scan the first 1K
1756 	 */
1757 	ebda_seg = (uint16_t *)vmap_phys(sizeof (uint16_t), ACPI_EBDA_SEG_ADDR);
1758 	ebda_addr = *ebda_seg << 4;
1759 	rsdp = scan_rsdp(ebda_addr, ebda_addr + ACPI_EBDA_LEN - 1);
1760 	if (rsdp == NULL)
1761 		/* if EBDA doesn't contain RSDP, look in BIOS memory */
1762 		rsdp = scan_rsdp(0xe0000, 0xfffff);
1763 	return (rsdp);
1764 }
1765 
1766 static struct table_header *
1767 map_fw_table(paddr_t table_addr)
1768 {
1769 	struct table_header *tp;
1770 	size_t len = MAX(sizeof (struct table_header), MMU_PAGESIZE);
1771 
1772 	/*
1773 	 * Map at least a page; if the table is larger than this, remap it
1774 	 */
1775 	tp = (struct table_header *)vmap_phys(len, table_addr);
1776 	if (tp->len > len)
1777 		tp = (struct table_header *)vmap_phys(tp->len, table_addr);
1778 	return (tp);
1779 }
1780 
1781 static struct table_header *
1782 find_fw_table(char *signature)
1783 {
1784 	static int revision = 0;
1785 	static struct xsdt *xsdt;
1786 	static int len;
1787 	paddr_t xsdt_addr;
1788 	struct rsdp *rsdp;
1789 	struct table_header *tp;
1790 	paddr_t table_addr;
1791 	int	n;
1792 
1793 	if (strlen(signature) != ACPI_TABLE_SIG_LEN)
1794 		return (NULL);
1795 
1796 	/*
1797 	 * Reading the ACPI 3.0 Spec, section 5.2.5.3 will help
1798 	 * understand this code.  If we haven't already found the RSDT/XSDT,
1799 	 * revision will be 0. Find the RSDP and check the revision
1800 	 * to find out whether to use the RSDT or XSDT.  If revision is
1801 	 * 0 or 1, use the RSDT and set internal revision to 1; if it is 2,
1802 	 * use the XSDT.  If the XSDT address is 0, though, fall back to
1803 	 * revision 1 and use the RSDT.
1804 	 */
1805 	if (revision == 0) {
1806 		if ((rsdp = (struct rsdp *)find_rsdp()) != NULL) {
1807 			revision = rsdp->v1.revision;
1808 			switch (revision) {
1809 			case 2:
1810 				/*
1811 				 * Use the XSDT unless BIOS is buggy and
1812 				 * claims to be rev 2 but has a null XSDT
1813 				 * address
1814 				 */
1815 				xsdt_addr = rsdp->xsdt;
1816 				if (xsdt_addr != 0)
1817 					break;
1818 				/* FALLTHROUGH */
1819 			case 0:
1820 				/* treat RSDP rev 0 as revision 1 internally */
1821 				revision = 1;
1822 				/* FALLTHROUGH */
1823 			case 1:
1824 				/* use the RSDT for rev 0/1 */
1825 				xsdt_addr = rsdp->v1.rsdt;
1826 				break;
1827 			default:
1828 				/* unknown revision */
1829 				revision = 0;
1830 				break;
1831 			}
1832 		}
1833 		if (revision == 0)
1834 			return (NULL);
1835 
1836 		/* cache the XSDT info */
1837 		xsdt = (struct xsdt *)map_fw_table(xsdt_addr);
1838 		len = (xsdt->hdr.len - sizeof (xsdt->hdr)) /
1839 		    ((revision == 1) ? sizeof (uint32_t) : sizeof (uint64_t));
1840 	}
1841 
1842 	/*
1843 	 * Scan the table headers looking for a signature match
1844 	 */
1845 	for (n = 0; n < len; n++) {
1846 		table_addr = (revision == 1) ? xsdt->p.r[n] : xsdt->p.x[n];
1847 		if (table_addr == 0)
1848 			continue;
1849 		tp = map_fw_table(table_addr);
1850 		if (strncmp(tp->sig, signature, ACPI_TABLE_SIG_LEN) == 0) {
1851 			return (tp);
1852 		}
1853 	}
1854 	return (NULL);
1855 }
1856 
1857 static void
1858 process_madt(struct madt *tp)
1859 {
1860 	struct madt_processor *cpu, *end;
1861 	uint32_t cpu_count = 0;
1862 
1863 	/*
1864 	 * User-set boot-ncpus overrides firmware count
1865 	 */
1866 	if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
1867 		return;
1868 
1869 	if (tp != NULL) {
1870 		end = (struct madt_processor *)(tp->hdr.len + (uintptr_t)tp);
1871 		cpu = tp->list;
1872 		while (cpu < end) {
1873 			if (cpu->type == MADT_PROCESSOR)
1874 				if (cpu->flags & 1)
1875 					cpu_count++;
1876 
1877 			cpu = (struct madt_processor *)
1878 			    (cpu->len + (uintptr_t)cpu);
1879 		}
1880 		bsetpropsi("boot-ncpus", cpu_count);
1881 	}
1882 
1883 }
1884 
1885 static void
1886 process_srat(struct srat *tp)
1887 {
1888 	struct srat_item *item, *end;
1889 	int i;
1890 	int proc_num, mem_num;
1891 #pragma pack(1)
1892 	struct {
1893 		uint32_t domain;
1894 		uint32_t apic_id;
1895 		uint32_t sapic_id;
1896 	} processor;
1897 	struct {
1898 		uint32_t domain;
1899 		uint64_t addr;
1900 		uint64_t length;
1901 		uint32_t flags;
1902 	} memory;
1903 #pragma pack()
1904 	char prop_name[30];
1905 
1906 	if (tp == NULL)
1907 		return;
1908 
1909 	proc_num = mem_num = 0;
1910 	end = (struct srat_item *)(tp->hdr.len + (uintptr_t)tp);
1911 	item = tp->list;
1912 	while (item < end) {
1913 		switch (item->type) {
1914 		case SRAT_PROCESSOR:
1915 			if (!(item->i.p.flags & SRAT_ENABLED))
1916 				break;
1917 			processor.domain = item->i.p.domain1;
1918 			for (i = 0; i < 3; i++)
1919 				processor.domain +=
1920 				    item->i.p.domain2[i] << ((i + 1) * 8);
1921 			processor.apic_id = item->i.p.apic_id;
1922 			processor.sapic_id = item->i.p.local_sapic_eid;
1923 			(void) snprintf(prop_name, 30, "acpi-srat-processor-%d",
1924 			    proc_num);
1925 			bsetprop(prop_name, strlen(prop_name), &processor,
1926 			    sizeof (processor));
1927 			proc_num++;
1928 			break;
1929 		case SRAT_MEMORY:
1930 			if (!(item->i.m.flags & SRAT_ENABLED))
1931 				break;
1932 			memory.domain = item->i.m.domain;
1933 			memory.addr = item->i.m.base_addr;
1934 			memory.length = item->i.m.len;
1935 			memory.flags = item->i.m.flags;
1936 			(void) snprintf(prop_name, 30, "acpi-srat-memory-%d",
1937 			    mem_num);
1938 			bsetprop(prop_name, strlen(prop_name), &memory,
1939 			    sizeof (memory));
1940 			mem_num++;
1941 			break;
1942 		}
1943 
1944 		item = (struct srat_item *)
1945 		    (item->len + (caddr_t)item);
1946 	}
1947 }
1948 
1949 static void
1950 process_slit(struct slit *tp)
1951 {
1952 
1953 	/*
1954 	 * Check the number of localities; if it's too huge, we just
1955 	 * return and locality enumeration code will handle this later,
1956 	 * if possible.
1957 	 *
1958 	 * Note that the size of the table is the square of the
1959 	 * number of localities; if the number of localities exceeds
1960 	 * UINT16_MAX, the table size may overflow an int when being
1961 	 * passed to bsetprop() below.
1962 	 */
1963 	if (tp->number >= SLIT_LOCALITIES_MAX)
1964 		return;
1965 
1966 	bsetprop(SLIT_NUM_PROPNAME, strlen(SLIT_NUM_PROPNAME), &tp->number,
1967 	    sizeof (tp->number));
1968 	bsetprop(SLIT_PROPNAME, strlen(SLIT_PROPNAME), &tp->entry,
1969 	    tp->number * tp->number);
1970 }
1971 #else /* __xpv */
1972 static void
1973 enumerate_xen_cpus()
1974 {
1975 	processorid_t	id, max_id;
1976 
1977 	/*
1978 	 * User-set boot-ncpus overrides enumeration
1979 	 */
1980 	if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
1981 		return;
1982 
1983 	/*
1984 	 * Probe every possible virtual CPU id and remember the
1985 	 * highest id present; the count of CPUs is one greater
1986 	 * than this.  This tacitly assumes at least cpu 0 is present.
1987 	 */
1988 	max_id = 0;
1989 	for (id = 0; id < MAX_VIRT_CPUS; id++)
1990 		if (HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL) == 0)
1991 			max_id = id;
1992 
1993 	bsetpropsi("boot-ncpus", max_id+1);
1994 
1995 }
1996 #endif /* __xpv */
1997 
1998 static void
1999 build_firmware_properties(void)
2000 {
2001 #ifndef __xpv
2002 	struct table_header *tp;
2003 
2004 	if ((tp = find_fw_table("APIC")) != NULL)
2005 		process_madt((struct madt *)tp);
2006 
2007 	if ((tp = find_fw_table("SRAT")) != NULL)
2008 		process_srat((struct srat *)tp);
2009 
2010 	if (tp = find_fw_table("SLIT"))
2011 		process_slit((struct slit *)tp);
2012 #else /* __xpv */
2013 	enumerate_xen_cpus();
2014 #endif /* __xpv */
2015 }
2016 
2017 /*
2018  * fake up a boot property for USB serial console early boot output
2019  */
2020 void *
2021 usbser_init(size_t size)
2022 {
2023 	static char *p = NULL;
2024 
2025 	p = do_bsys_alloc(NULL, NULL, size, MMU_PAGESIZE);
2026 	*p = 0;
2027 	bsetprop("usb-serial-buf", strlen("usb-serial-buf") + 1,
2028 	    &p, sizeof (p));
2029 	return (p);
2030 }
2031