xref: /titanic_51/usr/src/uts/i86pc/os/fakebop.c (revision bf4de67d4dd018e5f4bb0b566de3dac4ca5ba286)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2010, Intel Corporation.
27  * All rights reserved.
28  *
29  * Copyright 2013 Joyent, Inc.  All rights reserved.
30  */
31 
32 /*
33  * This file contains the functionality that mimics the boot operations
34  * on SPARC systems or the old boot.bin/multiboot programs on x86 systems.
35  * The x86 kernel now does everything on its own.
36  */
37 
38 #include <sys/types.h>
39 #include <sys/bootconf.h>
40 #include <sys/bootsvcs.h>
41 #include <sys/bootinfo.h>
42 #include <sys/multiboot.h>
43 #include <sys/bootvfs.h>
44 #include <sys/bootprops.h>
45 #include <sys/varargs.h>
46 #include <sys/param.h>
47 #include <sys/machparam.h>
48 #include <sys/machsystm.h>
49 #include <sys/archsystm.h>
50 #include <sys/boot_console.h>
51 #include <sys/cmn_err.h>
52 #include <sys/systm.h>
53 #include <sys/promif.h>
54 #include <sys/archsystm.h>
55 #include <sys/x86_archext.h>
56 #include <sys/kobj.h>
57 #include <sys/privregs.h>
58 #include <sys/sysmacros.h>
59 #include <sys/ctype.h>
60 #include <sys/fastboot.h>
61 #ifdef __xpv
62 #include <sys/hypervisor.h>
63 #include <net/if.h>
64 #endif
65 #include <vm/kboot_mmu.h>
66 #include <vm/hat_pte.h>
67 #include <sys/kobj.h>
68 #include <sys/kobj_lex.h>
69 #include <sys/pci_cfgspace_impl.h>
70 #include <sys/fastboot_impl.h>
71 #include <sys/acpi/acconfig.h>
72 #include <sys/acpi/acpi.h>
73 
74 static int have_console = 0;	/* set once primitive console is initialized */
75 static char *boot_args = "";
76 
77 /*
78  * Debugging macros
79  */
80 static uint_t kbm_debug = 0;
81 #define	DBG_MSG(s)	{ if (kbm_debug) bop_printf(NULL, "%s", s); }
82 #define	DBG(x)		{ if (kbm_debug)			\
83 	bop_printf(NULL, "%s is %" PRIx64 "\n", #x, (uint64_t)(x));	\
84 	}
85 
86 #define	PUT_STRING(s) {				\
87 	char *cp;				\
88 	for (cp = (s); *cp; ++cp)		\
89 		bcons_putchar(*cp);		\
90 	}
91 
92 struct xboot_info *xbootp;	/* boot info from "glue" code in low memory */
93 bootops_t bootop;	/* simple bootops we'll pass on to kernel */
94 struct bsys_mem bm;
95 
96 static uintptr_t next_virt;	/* next available virtual address */
97 static paddr_t next_phys;	/* next available physical address from dboot */
98 static paddr_t high_phys = -(paddr_t)1;	/* last used physical address */
99 
100 /*
101  * buffer for vsnprintf for console I/O
102  */
103 #define	BUFFERSIZE	512
104 static char buffer[BUFFERSIZE];
105 /*
106  * stuff to store/report/manipulate boot property settings.
107  */
108 typedef struct bootprop {
109 	struct bootprop *bp_next;
110 	char *bp_name;
111 	uint_t bp_vlen;
112 	char *bp_value;
113 } bootprop_t;
114 
115 static bootprop_t *bprops = NULL;
116 static char *curr_page = NULL;		/* ptr to avail bprop memory */
117 static int curr_space = 0;		/* amount of memory at curr_page */
118 
119 #ifdef __xpv
120 start_info_t *xen_info;
121 shared_info_t *HYPERVISOR_shared_info;
122 #endif
123 
124 /*
125  * some allocator statistics
126  */
127 static ulong_t total_bop_alloc_scratch = 0;
128 static ulong_t total_bop_alloc_kernel = 0;
129 
130 static void build_firmware_properties(void);
131 
132 static int early_allocation = 1;
133 
134 int force_fastreboot = 0;
135 volatile int fastreboot_onpanic = 0;
136 int post_fastreboot = 0;
137 #ifdef	__xpv
138 volatile int fastreboot_capable = 0;
139 #else
140 volatile int fastreboot_capable = 1;
141 #endif
142 
143 /*
144  * Information saved from current boot for fast reboot.
145  * If the information size exceeds what we have allocated, fast reboot
146  * will not be supported.
147  */
148 multiboot_info_t saved_mbi;
149 mb_memory_map_t saved_mmap[FASTBOOT_SAVED_MMAP_COUNT];
150 uint8_t saved_drives[FASTBOOT_SAVED_DRIVES_SIZE];
151 char saved_cmdline[FASTBOOT_SAVED_CMDLINE_LEN];
152 int saved_cmdline_len = 0;
153 size_t saved_file_size[FASTBOOT_MAX_FILES_MAP];
154 
155 /*
156  * Turn off fastreboot_onpanic to avoid panic loop.
157  */
158 char fastreboot_onpanic_cmdline[FASTBOOT_SAVED_CMDLINE_LEN];
159 static const char fastreboot_onpanic_args[] = " -B fastreboot_onpanic=0";
160 
161 /*
162  * Pointers to where System Resource Affinity Table (SRAT), System Locality
163  * Information Table (SLIT) and Maximum System Capability Table (MSCT)
164  * are mapped into virtual memory
165  */
166 ACPI_TABLE_SRAT	*srat_ptr = NULL;
167 ACPI_TABLE_SLIT	*slit_ptr = NULL;
168 ACPI_TABLE_MSCT	*msct_ptr = NULL;
169 
170 /*
171  * Arbitrary limit on number of localities we handle; if
172  * this limit is raised to more than UINT16_MAX, make sure
173  * process_slit() knows how to handle it.
174  */
175 #define	SLIT_LOCALITIES_MAX	(4096)
176 
177 #define	SLIT_NUM_PROPNAME	"acpi-slit-localities"
178 #define	SLIT_PROPNAME		"acpi-slit"
179 
180 /*
181  * Allocate aligned physical memory at boot time. This allocator allocates
182  * from the highest possible addresses. This avoids exhausting memory that
183  * would be useful for DMA buffers.
184  */
185 paddr_t
186 do_bop_phys_alloc(uint64_t size, uint64_t align)
187 {
188 	paddr_t	pa = 0;
189 	paddr_t	start;
190 	paddr_t	end;
191 	struct memlist	*ml = (struct memlist *)xbootp->bi_phys_install;
192 
193 	/*
194 	 * Be careful if high memory usage is limited in startup.c
195 	 * Since there are holes in the low part of the physical address
196 	 * space we can treat physmem as a pfn (not just a pgcnt) and
197 	 * get a conservative upper limit.
198 	 */
199 	if (physmem != 0 && high_phys > pfn_to_pa(physmem))
200 		high_phys = pfn_to_pa(physmem);
201 
202 	/*
203 	 * find the lowest or highest available memory in physinstalled
204 	 * On 32 bit avoid physmem above 4Gig if PAE isn't enabled
205 	 */
206 #if defined(__i386)
207 	if (xbootp->bi_use_pae == 0 && high_phys > FOUR_GIG)
208 		high_phys = FOUR_GIG;
209 #endif
210 
211 	/*
212 	 * find the highest available memory in physinstalled
213 	 */
214 	size = P2ROUNDUP(size, align);
215 	for (; ml; ml = ml->ml_next) {
216 		start = P2ROUNDUP(ml->ml_address, align);
217 		end = P2ALIGN(ml->ml_address + ml->ml_size, align);
218 		if (start < next_phys)
219 			start = P2ROUNDUP(next_phys, align);
220 		if (end > high_phys)
221 			end = P2ALIGN(high_phys, align);
222 
223 		if (end <= start)
224 			continue;
225 		if (end - start < size)
226 			continue;
227 
228 		/*
229 		 * Early allocations need to use low memory, since
230 		 * physmem might be further limited by bootenv.rc
231 		 */
232 		if (early_allocation) {
233 			if (pa == 0 || start < pa)
234 				pa = start;
235 		} else {
236 			if (end - size > pa)
237 				pa = end - size;
238 		}
239 	}
240 	if (pa != 0) {
241 		if (early_allocation)
242 			next_phys = pa + size;
243 		else
244 			high_phys = pa;
245 		return (pa);
246 	}
247 	bop_panic("do_bop_phys_alloc(0x%" PRIx64 ", 0x%" PRIx64
248 	    ") Out of memory\n", size, align);
249 	/*NOTREACHED*/
250 }
251 
252 uintptr_t
253 alloc_vaddr(size_t size, paddr_t align)
254 {
255 	uintptr_t rv;
256 
257 	next_virt = P2ROUNDUP(next_virt, (uintptr_t)align);
258 	rv = (uintptr_t)next_virt;
259 	next_virt += size;
260 	return (rv);
261 }
262 
263 /*
264  * Allocate virtual memory. The size is always rounded up to a multiple
265  * of base pagesize.
266  */
267 
268 /*ARGSUSED*/
269 static caddr_t
270 do_bsys_alloc(bootops_t *bop, caddr_t virthint, size_t size, int align)
271 {
272 	paddr_t a = align;	/* same type as pa for masking */
273 	uint_t pgsize;
274 	paddr_t pa;
275 	uintptr_t va;
276 	ssize_t s;		/* the aligned size */
277 	uint_t level;
278 	uint_t is_kernel = (virthint != 0);
279 
280 	if (a < MMU_PAGESIZE)
281 		a = MMU_PAGESIZE;
282 	else if (!ISP2(a))
283 		prom_panic("do_bsys_alloc() incorrect alignment");
284 	size = P2ROUNDUP(size, MMU_PAGESIZE);
285 
286 	/*
287 	 * Use the next aligned virtual address if we weren't given one.
288 	 */
289 	if (virthint == NULL) {
290 		virthint = (caddr_t)alloc_vaddr(size, a);
291 		total_bop_alloc_scratch += size;
292 	} else {
293 		total_bop_alloc_kernel += size;
294 	}
295 
296 	/*
297 	 * allocate the physical memory
298 	 */
299 	pa = do_bop_phys_alloc(size, a);
300 
301 	/*
302 	 * Add the mappings to the page tables, try large pages first.
303 	 */
304 	va = (uintptr_t)virthint;
305 	s = size;
306 	level = 1;
307 	pgsize = xbootp->bi_use_pae ? TWO_MEG : FOUR_MEG;
308 	if (xbootp->bi_use_largepage && a == pgsize) {
309 		while (IS_P2ALIGNED(pa, pgsize) && IS_P2ALIGNED(va, pgsize) &&
310 		    s >= pgsize) {
311 			kbm_map(va, pa, level, is_kernel);
312 			va += pgsize;
313 			pa += pgsize;
314 			s -= pgsize;
315 		}
316 	}
317 
318 	/*
319 	 * Map remaining pages use small mappings
320 	 */
321 	level = 0;
322 	pgsize = MMU_PAGESIZE;
323 	while (s > 0) {
324 		kbm_map(va, pa, level, is_kernel);
325 		va += pgsize;
326 		pa += pgsize;
327 		s -= pgsize;
328 	}
329 	return (virthint);
330 }
331 
332 /*
333  * Free virtual memory - we'll just ignore these.
334  */
335 /*ARGSUSED*/
336 static void
337 do_bsys_free(bootops_t *bop, caddr_t virt, size_t size)
338 {
339 	bop_printf(NULL, "do_bsys_free(virt=0x%p, size=0x%lx) ignored\n",
340 	    (void *)virt, size);
341 }
342 
343 /*
344  * Old interface
345  */
346 /*ARGSUSED*/
347 static caddr_t
348 do_bsys_ealloc(
349 	bootops_t *bop,
350 	caddr_t virthint,
351 	size_t size,
352 	int align,
353 	int flags)
354 {
355 	prom_panic("unsupported call to BOP_EALLOC()\n");
356 	return (0);
357 }
358 
359 
360 static void
361 bsetprop(char *name, int nlen, void *value, int vlen)
362 {
363 	uint_t size;
364 	uint_t need_size;
365 	bootprop_t *b;
366 
367 	/*
368 	 * align the size to 16 byte boundary
369 	 */
370 	size = sizeof (bootprop_t) + nlen + 1 + vlen;
371 	size = (size + 0xf) & ~0xf;
372 	if (size > curr_space) {
373 		need_size = (size + (MMU_PAGEOFFSET)) & MMU_PAGEMASK;
374 		curr_page = do_bsys_alloc(NULL, 0, need_size, MMU_PAGESIZE);
375 		curr_space = need_size;
376 	}
377 
378 	/*
379 	 * use a bootprop_t at curr_page and link into list
380 	 */
381 	b = (bootprop_t *)curr_page;
382 	curr_page += sizeof (bootprop_t);
383 	curr_space -=  sizeof (bootprop_t);
384 	b->bp_next = bprops;
385 	bprops = b;
386 
387 	/*
388 	 * follow by name and ending zero byte
389 	 */
390 	b->bp_name = curr_page;
391 	bcopy(name, curr_page, nlen);
392 	curr_page += nlen;
393 	*curr_page++ = 0;
394 	curr_space -= nlen + 1;
395 
396 	/*
397 	 * copy in value, but no ending zero byte
398 	 */
399 	b->bp_value = curr_page;
400 	b->bp_vlen = vlen;
401 	if (vlen > 0) {
402 		bcopy(value, curr_page, vlen);
403 		curr_page += vlen;
404 		curr_space -= vlen;
405 	}
406 
407 	/*
408 	 * align new values of curr_page, curr_space
409 	 */
410 	while (curr_space & 0xf) {
411 		++curr_page;
412 		--curr_space;
413 	}
414 }
415 
416 static void
417 bsetprops(char *name, char *value)
418 {
419 	bsetprop(name, strlen(name), value, strlen(value) + 1);
420 }
421 
422 static void
423 bsetprop64(char *name, uint64_t value)
424 {
425 	bsetprop(name, strlen(name), (void *)&value, sizeof (value));
426 }
427 
428 static void
429 bsetpropsi(char *name, int value)
430 {
431 	char prop_val[32];
432 
433 	(void) snprintf(prop_val, sizeof (prop_val), "%d", value);
434 	bsetprops(name, prop_val);
435 }
436 
437 /*
438  * to find the size of the buffer to allocate
439  */
440 /*ARGSUSED*/
441 int
442 do_bsys_getproplen(bootops_t *bop, const char *name)
443 {
444 	bootprop_t *b;
445 
446 	for (b = bprops; b; b = b->bp_next) {
447 		if (strcmp(name, b->bp_name) != 0)
448 			continue;
449 		return (b->bp_vlen);
450 	}
451 	return (-1);
452 }
453 
454 /*
455  * get the value associated with this name
456  */
457 /*ARGSUSED*/
458 int
459 do_bsys_getprop(bootops_t *bop, const char *name, void *value)
460 {
461 	bootprop_t *b;
462 
463 	for (b = bprops; b; b = b->bp_next) {
464 		if (strcmp(name, b->bp_name) != 0)
465 			continue;
466 		bcopy(b->bp_value, value, b->bp_vlen);
467 		return (0);
468 	}
469 	return (-1);
470 }
471 
472 /*
473  * get the name of the next property in succession from the standalone
474  */
475 /*ARGSUSED*/
476 static char *
477 do_bsys_nextprop(bootops_t *bop, char *name)
478 {
479 	bootprop_t *b;
480 
481 	/*
482 	 * A null name is a special signal for the 1st boot property
483 	 */
484 	if (name == NULL || strlen(name) == 0) {
485 		if (bprops == NULL)
486 			return (NULL);
487 		return (bprops->bp_name);
488 	}
489 
490 	for (b = bprops; b; b = b->bp_next) {
491 		if (name != b->bp_name)
492 			continue;
493 		b = b->bp_next;
494 		if (b == NULL)
495 			return (NULL);
496 		return (b->bp_name);
497 	}
498 	return (NULL);
499 }
500 
501 /*
502  * Parse numeric value from a string. Understands decimal, hex, octal, - and ~
503  */
504 static int
505 parse_value(char *p, uint64_t *retval)
506 {
507 	int adjust = 0;
508 	uint64_t tmp = 0;
509 	int digit;
510 	int radix = 10;
511 
512 	*retval = 0;
513 	if (*p == '-' || *p == '~')
514 		adjust = *p++;
515 
516 	if (*p == '0') {
517 		++p;
518 		if (*p == 0)
519 			return (0);
520 		if (*p == 'x' || *p == 'X') {
521 			radix = 16;
522 			++p;
523 		} else {
524 			radix = 8;
525 			++p;
526 		}
527 	}
528 	while (*p) {
529 		if ('0' <= *p && *p <= '9')
530 			digit = *p - '0';
531 		else if ('a' <= *p && *p <= 'f')
532 			digit = 10 + *p - 'a';
533 		else if ('A' <= *p && *p <= 'F')
534 			digit = 10 + *p - 'A';
535 		else
536 			return (-1);
537 		if (digit >= radix)
538 			return (-1);
539 		tmp = tmp * radix + digit;
540 		++p;
541 	}
542 	if (adjust == '-')
543 		tmp = -tmp;
544 	else if (adjust == '~')
545 		tmp = ~tmp;
546 	*retval = tmp;
547 	return (0);
548 }
549 
550 /*
551  * 2nd part of building the table of boot properties. This includes:
552  * - values from /boot/solaris/bootenv.rc (ie. eeprom(1m) values)
553  *
554  * lines look like one of:
555  * ^$
556  * ^# comment till end of line
557  * setprop name 'value'
558  * setprop name value
559  * setprop name "value"
560  *
561  * we do single character I/O since this is really just looking at memory
562  */
563 void
564 boot_prop_finish(void)
565 {
566 	int fd;
567 	char *line;
568 	int c;
569 	int bytes_read;
570 	char *name;
571 	int n_len;
572 	char *value;
573 	int v_len;
574 	char *inputdev;	/* these override the command line if serial ports */
575 	char *outputdev;
576 	char *consoledev;
577 	uint64_t lvalue;
578 	int use_xencons = 0;
579 
580 #ifdef __xpv
581 	if (!DOMAIN_IS_INITDOMAIN(xen_info))
582 		use_xencons = 1;
583 #endif /* __xpv */
584 
585 	DBG_MSG("Opening /boot/solaris/bootenv.rc\n");
586 	fd = BRD_OPEN(bfs_ops, "/boot/solaris/bootenv.rc", 0);
587 	DBG(fd);
588 
589 	line = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE);
590 	while (fd >= 0) {
591 
592 		/*
593 		 * get a line
594 		 */
595 		for (c = 0; ; ++c) {
596 			bytes_read = BRD_READ(bfs_ops, fd, line + c, 1);
597 			if (bytes_read == 0) {
598 				if (c == 0)
599 					goto done;
600 				break;
601 			}
602 			if (line[c] == '\n')
603 				break;
604 		}
605 		line[c] = 0;
606 
607 		/*
608 		 * ignore comment lines
609 		 */
610 		c = 0;
611 		while (ISSPACE(line[c]))
612 			++c;
613 		if (line[c] == '#' || line[c] == 0)
614 			continue;
615 
616 		/*
617 		 * must have "setprop " or "setprop\t"
618 		 */
619 		if (strncmp(line + c, "setprop ", 8) != 0 &&
620 		    strncmp(line + c, "setprop\t", 8) != 0)
621 			continue;
622 		c += 8;
623 		while (ISSPACE(line[c]))
624 			++c;
625 		if (line[c] == 0)
626 			continue;
627 
628 		/*
629 		 * gather up the property name
630 		 */
631 		name = line + c;
632 		n_len = 0;
633 		while (line[c] && !ISSPACE(line[c]))
634 			++n_len, ++c;
635 
636 		/*
637 		 * gather up the value, if any
638 		 */
639 		value = "";
640 		v_len = 0;
641 		while (ISSPACE(line[c]))
642 			++c;
643 		if (line[c] != 0) {
644 			value = line + c;
645 			while (line[c] && !ISSPACE(line[c]))
646 				++v_len, ++c;
647 		}
648 
649 		if (v_len >= 2 && value[0] == value[v_len - 1] &&
650 		    (value[0] == '\'' || value[0] == '"')) {
651 			++value;
652 			v_len -= 2;
653 		}
654 		name[n_len] = 0;
655 		if (v_len > 0)
656 			value[v_len] = 0;
657 		else
658 			continue;
659 
660 		/*
661 		 * ignore "boot-file" property, it's now meaningless
662 		 */
663 		if (strcmp(name, "boot-file") == 0)
664 			continue;
665 		if (strcmp(name, "boot-args") == 0 &&
666 		    strlen(boot_args) > 0)
667 			continue;
668 
669 		/*
670 		 * If a property was explicitly set on the command line
671 		 * it will override a setting in bootenv.rc
672 		 */
673 		if (do_bsys_getproplen(NULL, name) > 0)
674 			continue;
675 
676 		bsetprop(name, n_len, value, v_len + 1);
677 	}
678 done:
679 	if (fd >= 0)
680 		(void) BRD_CLOSE(bfs_ops, fd);
681 
682 	/*
683 	 * Check if we have to limit the boot time allocator
684 	 */
685 	if (do_bsys_getproplen(NULL, "physmem") != -1 &&
686 	    do_bsys_getprop(NULL, "physmem", line) >= 0 &&
687 	    parse_value(line, &lvalue) != -1) {
688 		if (0 < lvalue && (lvalue < physmem || physmem == 0)) {
689 			physmem = (pgcnt_t)lvalue;
690 			DBG(physmem);
691 		}
692 	}
693 	early_allocation = 0;
694 
695 	/*
696 	 * check to see if we have to override the default value of the console
697 	 */
698 	if (!use_xencons) {
699 		inputdev = line;
700 		v_len = do_bsys_getproplen(NULL, "input-device");
701 		if (v_len > 0)
702 			(void) do_bsys_getprop(NULL, "input-device", inputdev);
703 		else
704 			v_len = 0;
705 		inputdev[v_len] = 0;
706 
707 		outputdev = inputdev + v_len + 1;
708 		v_len = do_bsys_getproplen(NULL, "output-device");
709 		if (v_len > 0)
710 			(void) do_bsys_getprop(NULL, "output-device",
711 			    outputdev);
712 		else
713 			v_len = 0;
714 		outputdev[v_len] = 0;
715 
716 		consoledev = outputdev + v_len + 1;
717 		v_len = do_bsys_getproplen(NULL, "console");
718 		if (v_len > 0) {
719 			(void) do_bsys_getprop(NULL, "console", consoledev);
720 			if (post_fastreboot &&
721 			    strcmp(consoledev, "graphics") == 0) {
722 				bsetprops("console", "text");
723 				v_len = strlen("text");
724 				bcopy("text", consoledev, v_len);
725 			}
726 		} else {
727 			v_len = 0;
728 		}
729 		consoledev[v_len] = 0;
730 		bcons_init2(inputdev, outputdev, consoledev);
731 	} else {
732 		/*
733 		 * Ensure console property exists
734 		 * If not create it as "hypervisor"
735 		 */
736 		v_len = do_bsys_getproplen(NULL, "console");
737 		if (v_len < 0)
738 			bsetprops("console", "hypervisor");
739 		inputdev = outputdev = consoledev = "hypervisor";
740 		bcons_init2(inputdev, outputdev, consoledev);
741 	}
742 
743 	if (strstr((char *)xbootp->bi_cmdline, "prom_debug") || kbm_debug) {
744 		value = line;
745 		bop_printf(NULL, "\nBoot properties:\n");
746 		name = "";
747 		while ((name = do_bsys_nextprop(NULL, name)) != NULL) {
748 			bop_printf(NULL, "\t0x%p %s = ", (void *)name, name);
749 			(void) do_bsys_getprop(NULL, name, value);
750 			v_len = do_bsys_getproplen(NULL, name);
751 			bop_printf(NULL, "len=%d ", v_len);
752 			value[v_len] = 0;
753 			bop_printf(NULL, "%s\n", value);
754 		}
755 	}
756 }
757 
758 /*
759  * print formatted output
760  */
761 /*PRINTFLIKE2*/
762 /*ARGSUSED*/
763 void
764 bop_printf(bootops_t *bop, const char *fmt, ...)
765 {
766 	va_list	ap;
767 
768 	if (have_console == 0)
769 		return;
770 
771 	va_start(ap, fmt);
772 	(void) vsnprintf(buffer, BUFFERSIZE, fmt, ap);
773 	va_end(ap);
774 	PUT_STRING(buffer);
775 }
776 
777 /*
778  * Another panic() variant; this one can be used even earlier during boot than
779  * prom_panic().
780  */
781 /*PRINTFLIKE1*/
782 void
783 bop_panic(const char *fmt, ...)
784 {
785 	va_list ap;
786 
787 	va_start(ap, fmt);
788 	bop_printf(NULL, fmt, ap);
789 	va_end(ap);
790 
791 	bop_printf(NULL, "\nPress any key to reboot.\n");
792 	(void) bcons_getchar();
793 	bop_printf(NULL, "Resetting...\n");
794 	pc_reset();
795 }
796 
797 /*
798  * Do a real mode interrupt BIOS call
799  */
800 typedef struct bios_regs {
801 	unsigned short ax, bx, cx, dx, si, di, bp, es, ds;
802 } bios_regs_t;
803 typedef int (*bios_func_t)(int, bios_regs_t *);
804 
805 /*ARGSUSED*/
806 static void
807 do_bsys_doint(bootops_t *bop, int intnum, struct bop_regs *rp)
808 {
809 #if defined(__xpv)
810 	prom_panic("unsupported call to BOP_DOINT()\n");
811 #else	/* __xpv */
812 	static int firsttime = 1;
813 	bios_func_t bios_func = (bios_func_t)(void *)(uintptr_t)0x5000;
814 	bios_regs_t br;
815 
816 	/*
817 	 * The first time we do this, we have to copy the pre-packaged
818 	 * low memory bios call code image into place.
819 	 */
820 	if (firsttime) {
821 		extern char bios_image[];
822 		extern uint32_t bios_size;
823 
824 		bcopy(bios_image, (void *)bios_func, bios_size);
825 		firsttime = 0;
826 	}
827 
828 	br.ax = rp->eax.word.ax;
829 	br.bx = rp->ebx.word.bx;
830 	br.cx = rp->ecx.word.cx;
831 	br.dx = rp->edx.word.dx;
832 	br.bp = rp->ebp.word.bp;
833 	br.si = rp->esi.word.si;
834 	br.di = rp->edi.word.di;
835 	br.ds = rp->ds;
836 	br.es = rp->es;
837 
838 	DBG_MSG("Doing BIOS call...");
839 	DBG(br.ax);
840 	DBG(br.bx);
841 	DBG(br.dx);
842 	rp->eflags = bios_func(intnum, &br);
843 	DBG_MSG("done\n");
844 
845 	rp->eax.word.ax = br.ax;
846 	rp->ebx.word.bx = br.bx;
847 	rp->ecx.word.cx = br.cx;
848 	rp->edx.word.dx = br.dx;
849 	rp->ebp.word.bp = br.bp;
850 	rp->esi.word.si = br.si;
851 	rp->edi.word.di = br.di;
852 	rp->ds = br.ds;
853 	rp->es = br.es;
854 #endif /* __xpv */
855 }
856 
857 static struct boot_syscalls bop_sysp = {
858 	bcons_getchar,
859 	bcons_putchar,
860 	bcons_ischar,
861 };
862 
863 static char *whoami;
864 
865 #define	BUFLEN	64
866 
867 #if defined(__xpv)
868 
869 static char namebuf[32];
870 
871 static void
872 xen_parse_props(char *s, char *prop_map[], int n_prop)
873 {
874 	char **prop_name = prop_map;
875 	char *cp = s, *scp;
876 
877 	do {
878 		scp = cp;
879 		while ((*cp != NULL) && (*cp != ':'))
880 			cp++;
881 
882 		if ((scp != cp) && (*prop_name != NULL)) {
883 			*cp = NULL;
884 			bsetprops(*prop_name, scp);
885 		}
886 
887 		cp++;
888 		prop_name++;
889 		n_prop--;
890 	} while (n_prop > 0);
891 }
892 
893 #define	VBDPATHLEN	64
894 
895 /*
896  * parse the 'xpv-root' property to create properties used by
897  * ufs_mountroot.
898  */
899 static void
900 xen_vbdroot_props(char *s)
901 {
902 	char vbdpath[VBDPATHLEN] = "/xpvd/xdf@";
903 	const char lnamefix[] = "/dev/dsk/c0d";
904 	char *pnp;
905 	char *prop_p;
906 	char mi;
907 	short minor;
908 	long addr = 0;
909 
910 	pnp = vbdpath + strlen(vbdpath);
911 	prop_p = s + strlen(lnamefix);
912 	while ((*prop_p != '\0') && (*prop_p != 's') && (*prop_p != 'p'))
913 		addr = addr * 10 + *prop_p++ - '0';
914 	(void) snprintf(pnp, VBDPATHLEN, "%lx", addr);
915 	pnp = vbdpath + strlen(vbdpath);
916 	if (*prop_p == 's')
917 		mi = 'a';
918 	else if (*prop_p == 'p')
919 		mi = 'q';
920 	else
921 		ASSERT(0); /* shouldn't be here */
922 	prop_p++;
923 	ASSERT(*prop_p != '\0');
924 	if (ISDIGIT(*prop_p)) {
925 		minor = *prop_p - '0';
926 		prop_p++;
927 		if (ISDIGIT(*prop_p)) {
928 			minor = minor * 10 + *prop_p - '0';
929 		}
930 	} else {
931 		/* malformed root path, use 0 as default */
932 		minor = 0;
933 	}
934 	ASSERT(minor < 16); /* at most 16 partitions */
935 	mi += minor;
936 	*pnp++ = ':';
937 	*pnp++ = mi;
938 	*pnp++ = '\0';
939 	bsetprops("fstype", "ufs");
940 	bsetprops("bootpath", vbdpath);
941 
942 	DBG_MSG("VBD bootpath set to ");
943 	DBG_MSG(vbdpath);
944 	DBG_MSG("\n");
945 }
946 
947 /*
948  * parse the xpv-nfsroot property to create properties used by
949  * nfs_mountroot.
950  */
951 static void
952 xen_nfsroot_props(char *s)
953 {
954 	char *prop_map[] = {
955 		BP_SERVER_IP,	/* server IP address */
956 		BP_SERVER_NAME,	/* server hostname */
957 		BP_SERVER_PATH,	/* root path */
958 	};
959 	int n_prop = sizeof (prop_map) / sizeof (prop_map[0]);
960 
961 	bsetprop("fstype", 6, "nfs", 4);
962 
963 	xen_parse_props(s, prop_map, n_prop);
964 
965 	/*
966 	 * If a server name wasn't specified, use a default.
967 	 */
968 	if (do_bsys_getproplen(NULL, BP_SERVER_NAME) == -1)
969 		bsetprops(BP_SERVER_NAME, "unknown");
970 }
971 
972 /*
973  * Extract our IP address, etc. from the "xpv-ip" property.
974  */
975 static void
976 xen_ip_props(char *s)
977 {
978 	char *prop_map[] = {
979 		BP_HOST_IP,		/* IP address */
980 		NULL,			/* NFS server IP address (ignored in */
981 					/* favour of xpv-nfsroot) */
982 		BP_ROUTER_IP,		/* IP gateway */
983 		BP_SUBNET_MASK,		/* IP subnet mask */
984 		"xpv-hostname",		/* hostname (ignored) */
985 		BP_NETWORK_INTERFACE,	/* interface name */
986 		"xpv-hcp",		/* host configuration protocol */
987 	};
988 	int n_prop = sizeof (prop_map) / sizeof (prop_map[0]);
989 	char ifname[IFNAMSIZ];
990 
991 	xen_parse_props(s, prop_map, n_prop);
992 
993 	/*
994 	 * A Linux dom0 administrator expects all interfaces to be
995 	 * called "ethX", which is not the case here.
996 	 *
997 	 * If the interface name specified is "eth0", presume that
998 	 * this is really intended to be "xnf0" (the first domU ->
999 	 * dom0 interface for this domain).
1000 	 */
1001 	if ((do_bsys_getprop(NULL, BP_NETWORK_INTERFACE, ifname) == 0) &&
1002 	    (strcmp("eth0", ifname) == 0)) {
1003 		bsetprops(BP_NETWORK_INTERFACE, "xnf0");
1004 		bop_printf(NULL,
1005 		    "network interface name 'eth0' replaced with 'xnf0'\n");
1006 	}
1007 }
1008 
1009 #else	/* __xpv */
1010 
1011 static void
1012 setup_rarp_props(struct sol_netinfo *sip)
1013 {
1014 	char buf[BUFLEN];	/* to hold ip/mac addrs */
1015 	uint8_t *val;
1016 
1017 	val = (uint8_t *)&sip->sn_ciaddr;
1018 	(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
1019 	    val[0], val[1], val[2], val[3]);
1020 	bsetprops(BP_HOST_IP, buf);
1021 
1022 	val = (uint8_t *)&sip->sn_siaddr;
1023 	(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
1024 	    val[0], val[1], val[2], val[3]);
1025 	bsetprops(BP_SERVER_IP, buf);
1026 
1027 	if (sip->sn_giaddr != 0) {
1028 		val = (uint8_t *)&sip->sn_giaddr;
1029 		(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
1030 		    val[0], val[1], val[2], val[3]);
1031 		bsetprops(BP_ROUTER_IP, buf);
1032 	}
1033 
1034 	if (sip->sn_netmask != 0) {
1035 		val = (uint8_t *)&sip->sn_netmask;
1036 		(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
1037 		    val[0], val[1], val[2], val[3]);
1038 		bsetprops(BP_SUBNET_MASK, buf);
1039 	}
1040 
1041 	if (sip->sn_mactype != 4 || sip->sn_maclen != 6) {
1042 		bop_printf(NULL, "unsupported mac type %d, mac len %d\n",
1043 		    sip->sn_mactype, sip->sn_maclen);
1044 	} else {
1045 		val = sip->sn_macaddr;
1046 		(void) snprintf(buf, BUFLEN, "%x:%x:%x:%x:%x:%x",
1047 		    val[0], val[1], val[2], val[3], val[4], val[5]);
1048 		bsetprops(BP_BOOT_MAC, buf);
1049 	}
1050 }
1051 
1052 #endif	/* __xpv */
1053 
1054 static void
1055 build_panic_cmdline(const char *cmd, int cmdlen)
1056 {
1057 	int proplen;
1058 	size_t arglen;
1059 
1060 	arglen = sizeof (fastreboot_onpanic_args);
1061 	/*
1062 	 * If we allready have fastreboot-onpanic set to zero,
1063 	 * don't add them again.
1064 	 */
1065 	if ((proplen = do_bsys_getproplen(NULL, FASTREBOOT_ONPANIC)) > 0 &&
1066 	    proplen <=  sizeof (fastreboot_onpanic_cmdline)) {
1067 		(void) do_bsys_getprop(NULL, FASTREBOOT_ONPANIC,
1068 		    fastreboot_onpanic_cmdline);
1069 		if (FASTREBOOT_ONPANIC_NOTSET(fastreboot_onpanic_cmdline))
1070 			arglen = 1;
1071 	}
1072 
1073 	/*
1074 	 * construct fastreboot_onpanic_cmdline
1075 	 */
1076 	if (cmdlen + arglen > sizeof (fastreboot_onpanic_cmdline)) {
1077 		DBG_MSG("Command line too long: clearing "
1078 		    FASTREBOOT_ONPANIC "\n");
1079 		fastreboot_onpanic = 0;
1080 	} else {
1081 		bcopy(cmd, fastreboot_onpanic_cmdline, cmdlen);
1082 		if (arglen != 1)
1083 			bcopy(fastreboot_onpanic_args,
1084 			    fastreboot_onpanic_cmdline + cmdlen, arglen);
1085 		else
1086 			fastreboot_onpanic_cmdline[cmdlen] = 0;
1087 	}
1088 }
1089 
1090 
1091 #ifndef	__xpv
1092 /*
1093  * Construct boot command line for Fast Reboot
1094  */
1095 static void
1096 build_fastboot_cmdline(void)
1097 {
1098 	saved_cmdline_len =  strlen(xbootp->bi_cmdline) + 1;
1099 	if (saved_cmdline_len > FASTBOOT_SAVED_CMDLINE_LEN) {
1100 		DBG(saved_cmdline_len);
1101 		DBG_MSG("Command line too long: clearing fastreboot_capable\n");
1102 		fastreboot_capable = 0;
1103 	} else {
1104 		bcopy((void *)(xbootp->bi_cmdline), (void *)saved_cmdline,
1105 		    saved_cmdline_len);
1106 		saved_cmdline[saved_cmdline_len - 1] = '\0';
1107 		build_panic_cmdline(saved_cmdline, saved_cmdline_len - 1);
1108 	}
1109 }
1110 
1111 /*
1112  * Save memory layout, disk drive information, unix and boot archive sizes for
1113  * Fast Reboot.
1114  */
1115 static void
1116 save_boot_info(multiboot_info_t *mbi, struct xboot_info *xbi)
1117 {
1118 	struct boot_modules *modp;
1119 	int i;
1120 
1121 	bcopy(mbi, &saved_mbi, sizeof (multiboot_info_t));
1122 	if (mbi->mmap_length > sizeof (saved_mmap)) {
1123 		DBG_MSG("mbi->mmap_length too big: clearing "
1124 		    "fastreboot_capable\n");
1125 		fastreboot_capable = 0;
1126 	} else {
1127 		bcopy((void *)(uintptr_t)mbi->mmap_addr, (void *)saved_mmap,
1128 		    mbi->mmap_length);
1129 	}
1130 
1131 	if ((mbi->flags & MB_INFO_DRIVE_INFO) != 0) {
1132 		if (mbi->drives_length > sizeof (saved_drives)) {
1133 			DBG(mbi->drives_length);
1134 			DBG_MSG("mbi->drives_length too big: clearing "
1135 			    "fastreboot_capable\n");
1136 			fastreboot_capable = 0;
1137 		} else {
1138 			bcopy((void *)(uintptr_t)mbi->drives_addr,
1139 			    (void *)saved_drives, mbi->drives_length);
1140 		}
1141 	} else {
1142 		saved_mbi.drives_length = 0;
1143 		saved_mbi.drives_addr = NULL;
1144 	}
1145 
1146 	/*
1147 	 * Current file sizes.  Used by fastboot.c to figure out how much
1148 	 * memory to reserve for panic reboot.
1149 	 * Use the module list from the dboot-constructed xboot_info
1150 	 * instead of the list referenced by the multiboot structure
1151 	 * because that structure may not be addressable now.
1152 	 */
1153 	saved_file_size[FASTBOOT_NAME_UNIX] = FOUR_MEG - PAGESIZE;
1154 	for (i = 0, modp = (struct boot_modules *)(uintptr_t)xbi->bi_modules;
1155 	    i < xbi->bi_module_cnt; i++, modp++) {
1156 		saved_file_size[FASTBOOT_NAME_BOOTARCHIVE] += modp->bm_size;
1157 	}
1158 }
1159 #endif	/* __xpv */
1160 
1161 
1162 /*
1163  * 1st pass at building the table of boot properties. This includes:
1164  * - values set on the command line: -B a=x,b=y,c=z ....
1165  * - known values we just compute (ie. from xbootp)
1166  * - values from /boot/solaris/bootenv.rc (ie. eeprom(1m) values)
1167  *
1168  * the grub command line looked like:
1169  * kernel boot-file [-B prop=value[,prop=value]...] [boot-args]
1170  *
1171  * whoami is the same as boot-file
1172  */
1173 static void
1174 build_boot_properties(void)
1175 {
1176 	char *name;
1177 	int name_len;
1178 	char *value;
1179 	int value_len;
1180 	struct boot_modules *bm, *rdbm;
1181 	char *propbuf;
1182 	int quoted = 0;
1183 	int boot_arg_len;
1184 	uint_t i, midx;
1185 	char modid[32];
1186 #ifndef __xpv
1187 	static int stdout_val = 0;
1188 	uchar_t boot_device;
1189 	char str[3];
1190 	multiboot_info_t *mbi;
1191 	int netboot;
1192 	struct sol_netinfo *sip;
1193 #endif
1194 
1195 	/*
1196 	 * These have to be done first, so that kobj_mount_root() works
1197 	 */
1198 	DBG_MSG("Building boot properties\n");
1199 	propbuf = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, 0);
1200 	DBG((uintptr_t)propbuf);
1201 	if (xbootp->bi_module_cnt > 0) {
1202 		bm = xbootp->bi_modules;
1203 		rdbm = NULL;
1204 		for (midx = i = 0; i < xbootp->bi_module_cnt; i++) {
1205 			if (bm[i].bm_type == BMT_ROOTFS) {
1206 				rdbm = &bm[i];
1207 				continue;
1208 			}
1209 			if (bm[i].bm_type == BMT_HASH || bm[i].bm_name == NULL)
1210 				continue;
1211 
1212 			(void) snprintf(modid, sizeof (modid),
1213 			    "module-name-%u", midx);
1214 			bsetprops(modid, (char *)bm[i].bm_name);
1215 			(void) snprintf(modid, sizeof (modid),
1216 			    "module-addr-%u", midx);
1217 			bsetprop64(modid, (uint64_t)(uintptr_t)bm[i].bm_addr);
1218 			(void) snprintf(modid, sizeof (modid),
1219 			    "module-size-%u", midx);
1220 			bsetprop64(modid, (uint64_t)bm[i].bm_size);
1221 			++midx;
1222 		}
1223 		if (rdbm != NULL) {
1224 			bsetprop64("ramdisk_start",
1225 			    (uint64_t)(uintptr_t)rdbm->bm_addr);
1226 			bsetprop64("ramdisk_end",
1227 			    (uint64_t)(uintptr_t)rdbm->bm_addr + rdbm->bm_size);
1228 		}
1229 	}
1230 
1231 	/*
1232 	 * If there are any boot time modules or hashes present, then disable
1233 	 * fast reboot.
1234 	 */
1235 	if (xbootp->bi_module_cnt > 1) {
1236 		fastreboot_disable(FBNS_BOOTMOD);
1237 	}
1238 
1239 	DBG_MSG("Parsing command line for boot properties\n");
1240 	value = xbootp->bi_cmdline;
1241 
1242 	/*
1243 	 * allocate memory to collect boot_args into
1244 	 */
1245 	boot_arg_len = strlen(xbootp->bi_cmdline) + 1;
1246 	boot_args = do_bsys_alloc(NULL, NULL, boot_arg_len, MMU_PAGESIZE);
1247 	boot_args[0] = 0;
1248 	boot_arg_len = 0;
1249 
1250 #ifdef __xpv
1251 	/*
1252 	 * Xen puts a lot of device information in front of the kernel name
1253 	 * let's grab them and make them boot properties.  The first
1254 	 * string w/o an "=" in it will be the boot-file property.
1255 	 */
1256 	(void) strcpy(namebuf, "xpv-");
1257 	for (;;) {
1258 		/*
1259 		 * get to next property
1260 		 */
1261 		while (ISSPACE(*value))
1262 			++value;
1263 		name = value;
1264 		/*
1265 		 * look for an "="
1266 		 */
1267 		while (*value && !ISSPACE(*value) && *value != '=') {
1268 			value++;
1269 		}
1270 		if (*value != '=') { /* no "=" in the property */
1271 			value = name;
1272 			break;
1273 		}
1274 		name_len = value - name;
1275 		value_len = 0;
1276 		/*
1277 		 * skip over the "="
1278 		 */
1279 		value++;
1280 		while (value[value_len] && !ISSPACE(value[value_len])) {
1281 			++value_len;
1282 		}
1283 		/*
1284 		 * build property name with "xpv-" prefix
1285 		 */
1286 		if (name_len + 4 > 32) { /* skip if name too long */
1287 			value += value_len;
1288 			continue;
1289 		}
1290 		bcopy(name, &namebuf[4], name_len);
1291 		name_len += 4;
1292 		namebuf[name_len] = 0;
1293 		bcopy(value, propbuf, value_len);
1294 		propbuf[value_len] = 0;
1295 		bsetprops(namebuf, propbuf);
1296 
1297 		/*
1298 		 * xpv-root is set to the logical disk name of the xen
1299 		 * VBD when booting from a disk-based filesystem.
1300 		 */
1301 		if (strcmp(namebuf, "xpv-root") == 0)
1302 			xen_vbdroot_props(propbuf);
1303 		/*
1304 		 * While we're here, if we have a "xpv-nfsroot" property
1305 		 * then we need to set "fstype" to "nfs" so we mount
1306 		 * our root from the nfs server.  Also parse the xpv-nfsroot
1307 		 * property to create the properties that nfs_mountroot will
1308 		 * need to find the root and mount it.
1309 		 */
1310 		if (strcmp(namebuf, "xpv-nfsroot") == 0)
1311 			xen_nfsroot_props(propbuf);
1312 
1313 		if (strcmp(namebuf, "xpv-ip") == 0)
1314 			xen_ip_props(propbuf);
1315 		value += value_len;
1316 	}
1317 #endif
1318 
1319 	while (ISSPACE(*value))
1320 		++value;
1321 	/*
1322 	 * value now points at the boot-file
1323 	 */
1324 	value_len = 0;
1325 	while (value[value_len] && !ISSPACE(value[value_len]))
1326 		++value_len;
1327 	if (value_len > 0) {
1328 		whoami = propbuf;
1329 		bcopy(value, whoami, value_len);
1330 		whoami[value_len] = 0;
1331 		bsetprops("boot-file", whoami);
1332 		/*
1333 		 * strip leading path stuff from whoami, so running from
1334 		 * PXE/miniroot makes sense.
1335 		 */
1336 		if (strstr(whoami, "/platform/") != NULL)
1337 			whoami = strstr(whoami, "/platform/");
1338 		bsetprops("whoami", whoami);
1339 	}
1340 
1341 	/*
1342 	 * Values forcibly set boot properties on the command line via -B.
1343 	 * Allow use of quotes in values. Other stuff goes on kernel
1344 	 * command line.
1345 	 */
1346 	name = value + value_len;
1347 	while (*name != 0) {
1348 		/*
1349 		 * anything not " -B" is copied to the command line
1350 		 */
1351 		if (!ISSPACE(name[0]) || name[1] != '-' || name[2] != 'B') {
1352 			boot_args[boot_arg_len++] = *name;
1353 			boot_args[boot_arg_len] = 0;
1354 			++name;
1355 			continue;
1356 		}
1357 
1358 		/*
1359 		 * skip the " -B" and following white space
1360 		 */
1361 		name += 3;
1362 		while (ISSPACE(*name))
1363 			++name;
1364 		while (*name && !ISSPACE(*name)) {
1365 			value = strstr(name, "=");
1366 			if (value == NULL)
1367 				break;
1368 			name_len = value - name;
1369 			++value;
1370 			value_len = 0;
1371 			quoted = 0;
1372 			for (; ; ++value_len) {
1373 				if (!value[value_len])
1374 					break;
1375 
1376 				/*
1377 				 * is this value quoted?
1378 				 */
1379 				if (value_len == 0 &&
1380 				    (value[0] == '\'' || value[0] == '"')) {
1381 					quoted = value[0];
1382 					++value_len;
1383 				}
1384 
1385 				/*
1386 				 * In the quote accept any character,
1387 				 * but look for ending quote.
1388 				 */
1389 				if (quoted) {
1390 					if (value[value_len] == quoted)
1391 						quoted = 0;
1392 					continue;
1393 				}
1394 
1395 				/*
1396 				 * a comma or white space ends the value
1397 				 */
1398 				if (value[value_len] == ',' ||
1399 				    ISSPACE(value[value_len]))
1400 					break;
1401 			}
1402 
1403 			if (value_len == 0) {
1404 				bsetprop(name, name_len, "true", 5);
1405 			} else {
1406 				char *v = value;
1407 				int l = value_len;
1408 				if (v[0] == v[l - 1] &&
1409 				    (v[0] == '\'' || v[0] == '"')) {
1410 					++v;
1411 					l -= 2;
1412 				}
1413 				bcopy(v, propbuf, l);
1414 				propbuf[l] = '\0';
1415 				bsetprop(name, name_len, propbuf,
1416 				    l + 1);
1417 			}
1418 			name = value + value_len;
1419 			while (*name == ',')
1420 				++name;
1421 		}
1422 	}
1423 
1424 	/*
1425 	 * set boot-args property
1426 	 * 1275 name is bootargs, so set
1427 	 * that too
1428 	 */
1429 	bsetprops("boot-args", boot_args);
1430 	bsetprops("bootargs", boot_args);
1431 
1432 #ifndef __xpv
1433 	/*
1434 	 * set the BIOS boot device from GRUB
1435 	 */
1436 	netboot = 0;
1437 	mbi = xbootp->bi_mb_info;
1438 
1439 	/*
1440 	 * Build boot command line for Fast Reboot
1441 	 */
1442 	build_fastboot_cmdline();
1443 
1444 	/*
1445 	 * Save various boot information for Fast Reboot
1446 	 */
1447 	save_boot_info(mbi, xbootp);
1448 
1449 	if (mbi != NULL && mbi->flags & MB_INFO_BOOTDEV) {
1450 		boot_device = mbi->boot_device >> 24;
1451 		if (boot_device == 0x20)
1452 			netboot++;
1453 		str[0] = (boot_device >> 4) + '0';
1454 		str[1] = (boot_device & 0xf) + '0';
1455 		str[2] = 0;
1456 		bsetprops("bios-boot-device", str);
1457 	} else {
1458 		netboot = 1;
1459 	}
1460 
1461 	/*
1462 	 * In the netboot case, drives_info is overloaded with the dhcp ack.
1463 	 * This is not multiboot compliant and requires special pxegrub!
1464 	 */
1465 	if (netboot && mbi->drives_length != 0) {
1466 		sip = (struct sol_netinfo *)(uintptr_t)mbi->drives_addr;
1467 		if (sip->sn_infotype == SN_TYPE_BOOTP)
1468 			bsetprop("bootp-response", sizeof ("bootp-response"),
1469 			    (void *)(uintptr_t)mbi->drives_addr,
1470 			    mbi->drives_length);
1471 		else if (sip->sn_infotype == SN_TYPE_RARP)
1472 			setup_rarp_props(sip);
1473 	}
1474 	bsetprop("stdout", strlen("stdout"),
1475 	    &stdout_val, sizeof (stdout_val));
1476 #endif /* __xpv */
1477 
1478 	/*
1479 	 * more conjured up values for made up things....
1480 	 */
1481 #if defined(__xpv)
1482 	bsetprops("mfg-name", "i86xpv");
1483 	bsetprops("impl-arch-name", "i86xpv");
1484 #else
1485 	bsetprops("mfg-name", "i86pc");
1486 	bsetprops("impl-arch-name", "i86pc");
1487 #endif
1488 
1489 	/*
1490 	 * Build firmware-provided system properties
1491 	 */
1492 	build_firmware_properties();
1493 
1494 	/*
1495 	 * XXPV
1496 	 *
1497 	 * Find out what these are:
1498 	 * - cpuid_feature_ecx_include
1499 	 * - cpuid_feature_ecx_exclude
1500 	 * - cpuid_feature_edx_include
1501 	 * - cpuid_feature_edx_exclude
1502 	 *
1503 	 * Find out what these are in multiboot:
1504 	 * - netdev-path
1505 	 * - fstype
1506 	 */
1507 }
1508 
1509 #ifdef __xpv
1510 /*
1511  * Under the Hypervisor, memory usable for DMA may be scarce. One
1512  * very likely large pool of DMA friendly memory is occupied by
1513  * the boot_archive, as it was loaded by grub into low MFNs.
1514  *
1515  * Here we free up that memory by copying the boot archive to what are
1516  * likely higher MFN pages and then swapping the mfn/pfn mappings.
1517  */
1518 #define	PFN_2GIG	0x80000
1519 static void
1520 relocate_boot_archive(void)
1521 {
1522 	mfn_t max_mfn = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
1523 	struct boot_modules *bm = xbootp->bi_modules;
1524 	uintptr_t va;
1525 	pfn_t va_pfn;
1526 	mfn_t va_mfn;
1527 	caddr_t copy;
1528 	pfn_t copy_pfn;
1529 	mfn_t copy_mfn;
1530 	size_t	len;
1531 	int slop;
1532 	int total = 0;
1533 	int relocated = 0;
1534 	int mmu_update_return;
1535 	mmu_update_t t[2];
1536 	x86pte_t pte;
1537 
1538 	/*
1539 	 * If all MFN's are below 2Gig, don't bother doing this.
1540 	 */
1541 	if (max_mfn < PFN_2GIG)
1542 		return;
1543 	if (xbootp->bi_module_cnt < 1) {
1544 		DBG_MSG("no boot_archive!");
1545 		return;
1546 	}
1547 
1548 	DBG_MSG("moving boot_archive to high MFN memory\n");
1549 	va = (uintptr_t)bm->bm_addr;
1550 	len = bm->bm_size;
1551 	slop = va & MMU_PAGEOFFSET;
1552 	if (slop) {
1553 		va += MMU_PAGESIZE - slop;
1554 		len -= MMU_PAGESIZE - slop;
1555 	}
1556 	len = P2ALIGN(len, MMU_PAGESIZE);
1557 
1558 	/*
1559 	 * Go through all boot_archive pages, swapping any low MFN pages
1560 	 * with memory at next_phys.
1561 	 */
1562 	while (len != 0) {
1563 		++total;
1564 		va_pfn = mmu_btop(va - ONE_GIG);
1565 		va_mfn = mfn_list[va_pfn];
1566 		if (mfn_list[va_pfn] < PFN_2GIG) {
1567 			copy = kbm_remap_window(next_phys, 1);
1568 			bcopy((void *)va, copy, MMU_PAGESIZE);
1569 			copy_pfn = mmu_btop(next_phys);
1570 			copy_mfn = mfn_list[copy_pfn];
1571 
1572 			pte = mfn_to_ma(copy_mfn) | PT_NOCONSIST | PT_VALID;
1573 			if (HYPERVISOR_update_va_mapping(va, pte,
1574 			    UVMF_INVLPG | UVMF_LOCAL))
1575 				bop_panic("relocate_boot_archive():  "
1576 				    "HYPERVISOR_update_va_mapping() failed");
1577 
1578 			mfn_list[va_pfn] = copy_mfn;
1579 			mfn_list[copy_pfn] = va_mfn;
1580 
1581 			t[0].ptr = mfn_to_ma(copy_mfn) | MMU_MACHPHYS_UPDATE;
1582 			t[0].val = va_pfn;
1583 			t[1].ptr = mfn_to_ma(va_mfn) | MMU_MACHPHYS_UPDATE;
1584 			t[1].val = copy_pfn;
1585 			if (HYPERVISOR_mmu_update(t, 2, &mmu_update_return,
1586 			    DOMID_SELF) != 0 || mmu_update_return != 2)
1587 				bop_panic("relocate_boot_archive():  "
1588 				    "HYPERVISOR_mmu_update() failed");
1589 
1590 			next_phys += MMU_PAGESIZE;
1591 			++relocated;
1592 		}
1593 		len -= MMU_PAGESIZE;
1594 		va += MMU_PAGESIZE;
1595 	}
1596 	DBG_MSG("Relocated pages:\n");
1597 	DBG(relocated);
1598 	DBG_MSG("Out of total pages:\n");
1599 	DBG(total);
1600 }
1601 #endif /* __xpv */
1602 
1603 #if !defined(__xpv)
1604 /*
1605  * Install a temporary IDT that lets us catch errors in the boot time code.
1606  * We shouldn't get any faults at all while this is installed, so we'll
1607  * just generate a traceback and exit.
1608  */
1609 #ifdef __amd64
1610 static const int bcode_sel = B64CODE_SEL;
1611 #else
1612 static const int bcode_sel = B32CODE_SEL;
1613 #endif
1614 
1615 /*
1616  * simple description of a stack frame (args are 32 bit only currently)
1617  */
1618 typedef struct bop_frame {
1619 	struct bop_frame *old_frame;
1620 	pc_t retaddr;
1621 	long arg[1];
1622 } bop_frame_t;
1623 
1624 void
1625 bop_traceback(bop_frame_t *frame)
1626 {
1627 	pc_t pc;
1628 	int cnt;
1629 	char *ksym;
1630 	ulong_t off;
1631 #if defined(__i386)
1632 	int a;
1633 #endif
1634 
1635 	bop_printf(NULL, "Stack traceback:\n");
1636 	for (cnt = 0; cnt < 30; ++cnt) {	/* up to 30 frames */
1637 		pc = frame->retaddr;
1638 		if (pc == 0)
1639 			break;
1640 		ksym = kobj_getsymname(pc, &off);
1641 		if (ksym)
1642 			bop_printf(NULL, "  %s+%lx", ksym, off);
1643 		else
1644 			bop_printf(NULL, "  0x%lx", pc);
1645 
1646 		frame = frame->old_frame;
1647 		if (frame == 0) {
1648 			bop_printf(NULL, "\n");
1649 			break;
1650 		}
1651 #if defined(__i386)
1652 		for (a = 0; a < 6; ++a) {	/* try for 6 args */
1653 			if ((void *)&frame->arg[a] == (void *)frame->old_frame)
1654 				break;
1655 			if (a == 0)
1656 				bop_printf(NULL, "(");
1657 			else
1658 				bop_printf(NULL, ",");
1659 			bop_printf(NULL, "0x%lx", frame->arg[a]);
1660 		}
1661 		bop_printf(NULL, ")");
1662 #endif
1663 		bop_printf(NULL, "\n");
1664 	}
1665 }
1666 
1667 struct trapframe {
1668 	ulong_t error_code;	/* optional */
1669 	ulong_t inst_ptr;
1670 	ulong_t code_seg;
1671 	ulong_t flags_reg;
1672 #ifdef __amd64
1673 	ulong_t stk_ptr;
1674 	ulong_t stk_seg;
1675 #endif
1676 };
1677 
1678 void
1679 bop_trap(ulong_t *tfp)
1680 {
1681 	struct trapframe *tf = (struct trapframe *)tfp;
1682 	bop_frame_t fakeframe;
1683 	static int depth = 0;
1684 
1685 	/*
1686 	 * Check for an infinite loop of traps.
1687 	 */
1688 	if (++depth > 2)
1689 		bop_panic("Nested trap");
1690 
1691 	bop_printf(NULL, "Unexpected trap\n");
1692 
1693 	/*
1694 	 * adjust the tf for optional error_code by detecting the code selector
1695 	 */
1696 	if (tf->code_seg != bcode_sel)
1697 		tf = (struct trapframe *)(tfp - 1);
1698 	else
1699 		bop_printf(NULL, "error code           0x%lx\n",
1700 		    tf->error_code & 0xffffffff);
1701 
1702 	bop_printf(NULL, "instruction pointer  0x%lx\n", tf->inst_ptr);
1703 	bop_printf(NULL, "code segment         0x%lx\n", tf->code_seg & 0xffff);
1704 	bop_printf(NULL, "flags register       0x%lx\n", tf->flags_reg);
1705 #ifdef __amd64
1706 	bop_printf(NULL, "return %%rsp          0x%lx\n", tf->stk_ptr);
1707 	bop_printf(NULL, "return %%ss           0x%lx\n", tf->stk_seg & 0xffff);
1708 #endif
1709 
1710 	/* grab %[er]bp pushed by our code from the stack */
1711 	fakeframe.old_frame = (bop_frame_t *)*(tfp - 3);
1712 	fakeframe.retaddr = (pc_t)tf->inst_ptr;
1713 	bop_printf(NULL, "Attempting stack backtrace:\n");
1714 	bop_traceback(&fakeframe);
1715 	bop_panic("unexpected trap in early boot");
1716 }
1717 
1718 extern void bop_trap_handler(void);
1719 
1720 static gate_desc_t *bop_idt;
1721 
1722 static desctbr_t bop_idt_info;
1723 
1724 static void
1725 bop_idt_init(void)
1726 {
1727 	int t;
1728 
1729 	bop_idt = (gate_desc_t *)
1730 	    do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE);
1731 	bzero(bop_idt, MMU_PAGESIZE);
1732 	for (t = 0; t < NIDT; ++t) {
1733 		/*
1734 		 * Note that since boot runs without a TSS, the
1735 		 * double fault handler cannot use an alternate stack
1736 		 * (64-bit) or a task gate (32-bit).
1737 		 */
1738 		set_gatesegd(&bop_idt[t], &bop_trap_handler, bcode_sel,
1739 		    SDT_SYSIGT, TRP_KPL, 0);
1740 	}
1741 	bop_idt_info.dtr_limit = (NIDT * sizeof (gate_desc_t)) - 1;
1742 	bop_idt_info.dtr_base = (uintptr_t)bop_idt;
1743 	wr_idtr(&bop_idt_info);
1744 }
1745 #endif	/* !defined(__xpv) */
1746 
1747 /*
1748  * This is where we enter the kernel. It dummies up the boot_ops and
1749  * boot_syscalls vectors and jumps off to _kobj_boot()
1750  */
1751 void
1752 _start(struct xboot_info *xbp)
1753 {
1754 	bootops_t *bops = &bootop;
1755 	extern void _kobj_boot();
1756 
1757 	/*
1758 	 * 1st off - initialize the console for any error messages
1759 	 */
1760 	xbootp = xbp;
1761 #ifdef __xpv
1762 	HYPERVISOR_shared_info = (void *)xbootp->bi_shared_info;
1763 	xen_info = xbootp->bi_xen_start_info;
1764 #endif
1765 
1766 #ifndef __xpv
1767 	if (*((uint32_t *)(FASTBOOT_SWTCH_PA + FASTBOOT_STACK_OFFSET)) ==
1768 	    FASTBOOT_MAGIC) {
1769 		post_fastreboot = 1;
1770 		*((uint32_t *)(FASTBOOT_SWTCH_PA + FASTBOOT_STACK_OFFSET)) = 0;
1771 	}
1772 #endif
1773 
1774 	bcons_init((void *)xbootp->bi_cmdline);
1775 	have_console = 1;
1776 
1777 	/*
1778 	 * enable debugging
1779 	 */
1780 	if (strstr((char *)xbootp->bi_cmdline, "kbm_debug"))
1781 		kbm_debug = 1;
1782 
1783 	DBG_MSG("\n\n*** Entered Solaris in _start() cmdline is: ");
1784 	DBG_MSG((char *)xbootp->bi_cmdline);
1785 	DBG_MSG("\n\n\n");
1786 
1787 	/*
1788 	 * physavail is no longer used by startup
1789 	 */
1790 	bm.physinstalled = xbp->bi_phys_install;
1791 	bm.pcimem = xbp->bi_pcimem;
1792 	bm.rsvdmem = xbp->bi_rsvdmem;
1793 	bm.physavail = NULL;
1794 
1795 	/*
1796 	 * initialize the boot time allocator
1797 	 */
1798 	next_phys = xbootp->bi_next_paddr;
1799 	DBG(next_phys);
1800 	next_virt = (uintptr_t)xbootp->bi_next_vaddr;
1801 	DBG(next_virt);
1802 	DBG_MSG("Initializing boot time memory management...");
1803 #ifdef __xpv
1804 	{
1805 		xen_platform_parameters_t p;
1806 
1807 		/* This call shouldn't fail, dboot already did it once. */
1808 		(void) HYPERVISOR_xen_version(XENVER_platform_parameters, &p);
1809 		mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
1810 		DBG(xen_virt_start);
1811 	}
1812 #endif
1813 	kbm_init(xbootp);
1814 	DBG_MSG("done\n");
1815 
1816 	/*
1817 	 * Fill in the bootops vector
1818 	 */
1819 	bops->bsys_version = BO_VERSION;
1820 	bops->boot_mem = &bm;
1821 	bops->bsys_alloc = do_bsys_alloc;
1822 	bops->bsys_free = do_bsys_free;
1823 	bops->bsys_getproplen = do_bsys_getproplen;
1824 	bops->bsys_getprop = do_bsys_getprop;
1825 	bops->bsys_nextprop = do_bsys_nextprop;
1826 	bops->bsys_printf = bop_printf;
1827 	bops->bsys_doint = do_bsys_doint;
1828 
1829 	/*
1830 	 * BOP_EALLOC() is no longer needed
1831 	 */
1832 	bops->bsys_ealloc = do_bsys_ealloc;
1833 
1834 #ifdef __xpv
1835 	/*
1836 	 * On domain 0 we need to free up some physical memory that is
1837 	 * usable for DMA. Since GRUB loaded the boot_archive, it is
1838 	 * sitting in low MFN memory. We'll relocated the boot archive
1839 	 * pages to high PFN memory.
1840 	 */
1841 	if (DOMAIN_IS_INITDOMAIN(xen_info))
1842 		relocate_boot_archive();
1843 #endif
1844 
1845 #ifndef __xpv
1846 	/*
1847 	 * Install an IDT to catch early pagefaults (shouldn't have any).
1848 	 * Also needed for kmdb.
1849 	 */
1850 	bop_idt_init();
1851 #endif
1852 
1853 	/*
1854 	 * Start building the boot properties from the command line
1855 	 */
1856 	DBG_MSG("Initializing boot properties:\n");
1857 	build_boot_properties();
1858 
1859 	if (strstr((char *)xbootp->bi_cmdline, "prom_debug") || kbm_debug) {
1860 		char *name;
1861 		char *value;
1862 		char *cp;
1863 		int len;
1864 
1865 		value = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE);
1866 		bop_printf(NULL, "\nBoot properties:\n");
1867 		name = "";
1868 		while ((name = do_bsys_nextprop(NULL, name)) != NULL) {
1869 			bop_printf(NULL, "\t0x%p %s = ", (void *)name, name);
1870 			(void) do_bsys_getprop(NULL, name, value);
1871 			len = do_bsys_getproplen(NULL, name);
1872 			bop_printf(NULL, "len=%d ", len);
1873 			value[len] = 0;
1874 			for (cp = value; *cp; ++cp) {
1875 				if (' ' <= *cp && *cp <= '~')
1876 					bop_printf(NULL, "%c", *cp);
1877 				else
1878 					bop_printf(NULL, "-0x%x-", *cp);
1879 			}
1880 			bop_printf(NULL, "\n");
1881 		}
1882 	}
1883 
1884 	/*
1885 	 * jump into krtld...
1886 	 */
1887 	_kobj_boot(&bop_sysp, NULL, bops, NULL);
1888 }
1889 
1890 
1891 /*ARGSUSED*/
1892 static caddr_t
1893 no_more_alloc(bootops_t *bop, caddr_t virthint, size_t size, int align)
1894 {
1895 	panic("Attempt to bsys_alloc() too late\n");
1896 	return (NULL);
1897 }
1898 
1899 /*ARGSUSED*/
1900 static void
1901 no_more_free(bootops_t *bop, caddr_t virt, size_t size)
1902 {
1903 	panic("Attempt to bsys_free() too late\n");
1904 }
1905 
1906 void
1907 bop_no_more_mem(void)
1908 {
1909 	DBG(total_bop_alloc_scratch);
1910 	DBG(total_bop_alloc_kernel);
1911 	bootops->bsys_alloc = no_more_alloc;
1912 	bootops->bsys_free = no_more_free;
1913 }
1914 
1915 
1916 /*
1917  * Set ACPI firmware properties
1918  */
1919 
1920 static caddr_t
1921 vmap_phys(size_t length, paddr_t pa)
1922 {
1923 	paddr_t	start, end;
1924 	caddr_t	va;
1925 	size_t	len, page;
1926 
1927 #ifdef __xpv
1928 	pa = pfn_to_pa(xen_assign_pfn(mmu_btop(pa))) | (pa & MMU_PAGEOFFSET);
1929 #endif
1930 	start = P2ALIGN(pa, MMU_PAGESIZE);
1931 	end = P2ROUNDUP(pa + length, MMU_PAGESIZE);
1932 	len = end - start;
1933 	va = (caddr_t)alloc_vaddr(len, MMU_PAGESIZE);
1934 	for (page = 0; page < len; page += MMU_PAGESIZE)
1935 		kbm_map((uintptr_t)va + page, start + page, 0, 0);
1936 	return (va + (pa & MMU_PAGEOFFSET));
1937 }
1938 
1939 static uint8_t
1940 checksum_table(uint8_t *tp, size_t len)
1941 {
1942 	uint8_t sum = 0;
1943 
1944 	while (len-- > 0)
1945 		sum += *tp++;
1946 
1947 	return (sum);
1948 }
1949 
1950 static int
1951 valid_rsdp(ACPI_TABLE_RSDP *rp)
1952 {
1953 
1954 	/* validate the V1.x checksum */
1955 	if (checksum_table((uint8_t *)rp, ACPI_RSDP_CHECKSUM_LENGTH) != 0)
1956 		return (0);
1957 
1958 	/* If pre-ACPI 2.0, this is a valid RSDP */
1959 	if (rp->Revision < 2)
1960 		return (1);
1961 
1962 	/* validate the V2.x checksum */
1963 	if (checksum_table((uint8_t *)rp, ACPI_RSDP_XCHECKSUM_LENGTH) != 0)
1964 		return (0);
1965 
1966 	return (1);
1967 }
1968 
1969 /*
1970  * Scan memory range for an RSDP;
1971  * see ACPI 3.0 Spec, 5.2.5.1
1972  */
1973 static ACPI_TABLE_RSDP *
1974 scan_rsdp(paddr_t start, paddr_t end)
1975 {
1976 	size_t len  = end - start;
1977 	caddr_t ptr;
1978 
1979 	ptr = vmap_phys(len, start);
1980 	while (len > 0) {
1981 		if (strncmp(ptr, ACPI_SIG_RSDP, strlen(ACPI_SIG_RSDP)) == 0 &&
1982 		    valid_rsdp((ACPI_TABLE_RSDP *)ptr))
1983 			return ((ACPI_TABLE_RSDP *)ptr);
1984 
1985 		ptr += ACPI_RSDP_SCAN_STEP;
1986 		len -= ACPI_RSDP_SCAN_STEP;
1987 	}
1988 
1989 	return (NULL);
1990 }
1991 
1992 /*
1993  * Refer to ACPI 3.0 Spec, section 5.2.5.1 to understand this function
1994  */
1995 static ACPI_TABLE_RSDP *
1996 find_rsdp()
1997 {
1998 	ACPI_TABLE_RSDP *rsdp;
1999 	uint16_t *ebda_seg;
2000 	paddr_t  ebda_addr;
2001 
2002 	/*
2003 	 * Get the EBDA segment and scan the first 1K
2004 	 */
2005 	ebda_seg = (uint16_t *)vmap_phys(sizeof (uint16_t),
2006 	    ACPI_EBDA_PTR_LOCATION);
2007 	ebda_addr = *ebda_seg << 4;
2008 	rsdp = scan_rsdp(ebda_addr, ebda_addr + ACPI_EBDA_WINDOW_SIZE);
2009 	if (rsdp == NULL)
2010 		/* if EBDA doesn't contain RSDP, look in BIOS memory */
2011 		rsdp = scan_rsdp(ACPI_HI_RSDP_WINDOW_BASE,
2012 		    ACPI_HI_RSDP_WINDOW_BASE + ACPI_HI_RSDP_WINDOW_SIZE);
2013 	return (rsdp);
2014 }
2015 
2016 static ACPI_TABLE_HEADER *
2017 map_fw_table(paddr_t table_addr)
2018 {
2019 	ACPI_TABLE_HEADER *tp;
2020 	size_t len = MAX(sizeof (*tp), MMU_PAGESIZE);
2021 
2022 	/*
2023 	 * Map at least a page; if the table is larger than this, remap it
2024 	 */
2025 	tp = (ACPI_TABLE_HEADER *)vmap_phys(len, table_addr);
2026 	if (tp->Length > len)
2027 		tp = (ACPI_TABLE_HEADER *)vmap_phys(tp->Length, table_addr);
2028 	return (tp);
2029 }
2030 
2031 static ACPI_TABLE_HEADER *
2032 find_fw_table(char *signature)
2033 {
2034 	static int revision = 0;
2035 	static ACPI_TABLE_XSDT *xsdt;
2036 	static int len;
2037 	paddr_t xsdt_addr;
2038 	ACPI_TABLE_RSDP *rsdp;
2039 	ACPI_TABLE_HEADER *tp;
2040 	paddr_t table_addr;
2041 	int	n;
2042 
2043 	if (strlen(signature) != ACPI_NAME_SIZE)
2044 		return (NULL);
2045 
2046 	/*
2047 	 * Reading the ACPI 3.0 Spec, section 5.2.5.3 will help
2048 	 * understand this code.  If we haven't already found the RSDT/XSDT,
2049 	 * revision will be 0. Find the RSDP and check the revision
2050 	 * to find out whether to use the RSDT or XSDT.  If revision is
2051 	 * 0 or 1, use the RSDT and set internal revision to 1; if it is 2,
2052 	 * use the XSDT.  If the XSDT address is 0, though, fall back to
2053 	 * revision 1 and use the RSDT.
2054 	 */
2055 	if (revision == 0) {
2056 		if ((rsdp = find_rsdp()) != NULL) {
2057 			revision = rsdp->Revision;
2058 			/*
2059 			 * ACPI 6.0 states that current revision is 2
2060 			 * from acpi_table_rsdp definition:
2061 			 * Must be (0) for ACPI 1.0 or (2) for ACPI 2.0+
2062 			 */
2063 			if (revision > 2)
2064 				revision = 2;
2065 			switch (revision) {
2066 			case 2:
2067 				/*
2068 				 * Use the XSDT unless BIOS is buggy and
2069 				 * claims to be rev 2 but has a null XSDT
2070 				 * address
2071 				 */
2072 				xsdt_addr = rsdp->XsdtPhysicalAddress;
2073 				if (xsdt_addr != 0)
2074 					break;
2075 				/* FALLTHROUGH */
2076 			case 0:
2077 				/* treat RSDP rev 0 as revision 1 internally */
2078 				revision = 1;
2079 				/* FALLTHROUGH */
2080 			case 1:
2081 				/* use the RSDT for rev 0/1 */
2082 				xsdt_addr = rsdp->RsdtPhysicalAddress;
2083 				break;
2084 			default:
2085 				/* unknown revision */
2086 				revision = 0;
2087 				break;
2088 			}
2089 		}
2090 		if (revision == 0)
2091 			return (NULL);
2092 
2093 		/* cache the XSDT info */
2094 		xsdt = (ACPI_TABLE_XSDT *)map_fw_table(xsdt_addr);
2095 		len = (xsdt->Header.Length - sizeof (xsdt->Header)) /
2096 		    ((revision == 1) ? sizeof (uint32_t) : sizeof (uint64_t));
2097 	}
2098 
2099 	/*
2100 	 * Scan the table headers looking for a signature match
2101 	 */
2102 	for (n = 0; n < len; n++) {
2103 		ACPI_TABLE_RSDT *rsdt = (ACPI_TABLE_RSDT *)xsdt;
2104 		table_addr = (revision == 1) ? rsdt->TableOffsetEntry[n] :
2105 		    xsdt->TableOffsetEntry[n];
2106 
2107 		if (table_addr == 0)
2108 			continue;
2109 		tp = map_fw_table(table_addr);
2110 		if (strncmp(tp->Signature, signature, ACPI_NAME_SIZE) == 0) {
2111 			return (tp);
2112 		}
2113 	}
2114 	return (NULL);
2115 }
2116 
2117 static void
2118 process_mcfg(ACPI_TABLE_MCFG *tp)
2119 {
2120 	ACPI_MCFG_ALLOCATION *cfg_baap;
2121 	char *cfg_baa_endp;
2122 	int64_t ecfginfo[4];
2123 
2124 	cfg_baap = (ACPI_MCFG_ALLOCATION *)((uintptr_t)tp + sizeof (*tp));
2125 	cfg_baa_endp = ((char *)tp) + tp->Header.Length;
2126 	while ((char *)cfg_baap < cfg_baa_endp) {
2127 		if (cfg_baap->Address != 0 && cfg_baap->PciSegment == 0) {
2128 			ecfginfo[0] = cfg_baap->Address;
2129 			ecfginfo[1] = cfg_baap->PciSegment;
2130 			ecfginfo[2] = cfg_baap->StartBusNumber;
2131 			ecfginfo[3] = cfg_baap->EndBusNumber;
2132 			bsetprop(MCFG_PROPNAME, strlen(MCFG_PROPNAME),
2133 			    ecfginfo, sizeof (ecfginfo));
2134 			break;
2135 		}
2136 		cfg_baap++;
2137 	}
2138 }
2139 
2140 #ifndef __xpv
2141 static void
2142 process_madt_entries(ACPI_TABLE_MADT *tp, uint32_t *cpu_countp,
2143     uint32_t *cpu_possible_countp, uint32_t *cpu_apicid_array)
2144 {
2145 	ACPI_SUBTABLE_HEADER *item, *end;
2146 	uint32_t cpu_count = 0;
2147 	uint32_t cpu_possible_count = 0;
2148 
2149 	/*
2150 	 * Determine number of CPUs and keep track of "final" APIC ID
2151 	 * for each CPU by walking through ACPI MADT processor list
2152 	 */
2153 	end = (ACPI_SUBTABLE_HEADER *)(tp->Header.Length + (uintptr_t)tp);
2154 	item = (ACPI_SUBTABLE_HEADER *)((uintptr_t)tp + sizeof (*tp));
2155 
2156 	while (item < end) {
2157 		switch (item->Type) {
2158 		case ACPI_MADT_TYPE_LOCAL_APIC: {
2159 			ACPI_MADT_LOCAL_APIC *cpu =
2160 			    (ACPI_MADT_LOCAL_APIC *) item;
2161 
2162 			if (cpu->LapicFlags & ACPI_MADT_ENABLED) {
2163 				if (cpu_apicid_array != NULL)
2164 					cpu_apicid_array[cpu_count] = cpu->Id;
2165 				cpu_count++;
2166 			}
2167 			cpu_possible_count++;
2168 			break;
2169 		}
2170 		case ACPI_MADT_TYPE_LOCAL_X2APIC: {
2171 			ACPI_MADT_LOCAL_X2APIC *cpu =
2172 			    (ACPI_MADT_LOCAL_X2APIC *) item;
2173 
2174 			if (cpu->LapicFlags & ACPI_MADT_ENABLED) {
2175 				if (cpu_apicid_array != NULL)
2176 					cpu_apicid_array[cpu_count] =
2177 					    cpu->LocalApicId;
2178 				cpu_count++;
2179 			}
2180 			cpu_possible_count++;
2181 			break;
2182 		}
2183 		default:
2184 			if (kbm_debug)
2185 				bop_printf(NULL, "MADT type %d\n", item->Type);
2186 			break;
2187 		}
2188 
2189 		item = (ACPI_SUBTABLE_HEADER *)((uintptr_t)item + item->Length);
2190 	}
2191 	if (cpu_countp)
2192 		*cpu_countp = cpu_count;
2193 	if (cpu_possible_countp)
2194 		*cpu_possible_countp = cpu_possible_count;
2195 }
2196 
2197 static void
2198 process_madt(ACPI_TABLE_MADT *tp)
2199 {
2200 	uint32_t cpu_count = 0;
2201 	uint32_t cpu_possible_count = 0;
2202 	uint32_t *cpu_apicid_array; /* x2APIC ID is 32bit! */
2203 
2204 	if (tp != NULL) {
2205 		/* count cpu's */
2206 		process_madt_entries(tp, &cpu_count, &cpu_possible_count, NULL);
2207 
2208 		cpu_apicid_array = (uint32_t *)do_bsys_alloc(NULL, NULL,
2209 		    cpu_count * sizeof (*cpu_apicid_array), MMU_PAGESIZE);
2210 		if (cpu_apicid_array == NULL)
2211 			bop_panic("Not enough memory for APIC ID array");
2212 
2213 		/* copy IDs */
2214 		process_madt_entries(tp, NULL, NULL, cpu_apicid_array);
2215 
2216 		/*
2217 		 * Make boot property for array of "final" APIC IDs for each
2218 		 * CPU
2219 		 */
2220 		bsetprop(BP_CPU_APICID_ARRAY, strlen(BP_CPU_APICID_ARRAY),
2221 		    cpu_apicid_array, cpu_count * sizeof (*cpu_apicid_array));
2222 	}
2223 
2224 	/*
2225 	 * Check whether property plat-max-ncpus is already set.
2226 	 */
2227 	if (do_bsys_getproplen(NULL, PLAT_MAX_NCPUS_NAME) < 0) {
2228 		/*
2229 		 * Set plat-max-ncpus to number of maximum possible CPUs given
2230 		 * in MADT if it hasn't been set.
2231 		 * There's no formal way to detect max possible CPUs supported
2232 		 * by platform according to ACPI spec3.0b. So current CPU
2233 		 * hotplug implementation expects that all possible CPUs will
2234 		 * have an entry in MADT table and set plat-max-ncpus to number
2235 		 * of entries in MADT.
2236 		 * With introducing of ACPI4.0, Maximum System Capability Table
2237 		 * (MSCT) provides maximum number of CPUs supported by platform.
2238 		 * If MSCT is unavailable, fall back to old way.
2239 		 */
2240 		if (tp != NULL)
2241 			bsetpropsi(PLAT_MAX_NCPUS_NAME, cpu_possible_count);
2242 	}
2243 
2244 	/*
2245 	 * Set boot property boot-max-ncpus to number of CPUs existing at
2246 	 * boot time. boot-max-ncpus is mainly used for optimization.
2247 	 */
2248 	if (tp != NULL)
2249 		bsetpropsi(BOOT_MAX_NCPUS_NAME, cpu_count);
2250 
2251 	/*
2252 	 * User-set boot-ncpus overrides firmware count
2253 	 */
2254 	if (do_bsys_getproplen(NULL, BOOT_NCPUS_NAME) >= 0)
2255 		return;
2256 
2257 	/*
2258 	 * Set boot property boot-ncpus to number of active CPUs given in MADT
2259 	 * if it hasn't been set yet.
2260 	 */
2261 	if (tp != NULL)
2262 		bsetpropsi(BOOT_NCPUS_NAME, cpu_count);
2263 }
2264 
2265 static void
2266 process_srat(ACPI_TABLE_SRAT *tp)
2267 {
2268 	ACPI_SUBTABLE_HEADER *item, *end;
2269 	int i;
2270 	int proc_num, mem_num;
2271 #pragma pack(1)
2272 	struct {
2273 		uint32_t domain;
2274 		uint32_t apic_id;
2275 		uint32_t sapic_id;
2276 	} processor;
2277 	struct {
2278 		uint32_t domain;
2279 		uint32_t x2apic_id;
2280 	} x2apic;
2281 	struct {
2282 		uint32_t domain;
2283 		uint64_t addr;
2284 		uint64_t length;
2285 		uint32_t flags;
2286 	} memory;
2287 #pragma pack()
2288 	char prop_name[30];
2289 	uint64_t maxmem = 0;
2290 
2291 	if (tp == NULL)
2292 		return;
2293 
2294 	proc_num = mem_num = 0;
2295 	end = (ACPI_SUBTABLE_HEADER *)(tp->Header.Length + (uintptr_t)tp);
2296 	item = (ACPI_SUBTABLE_HEADER *)((uintptr_t)tp + sizeof (*tp));
2297 	while (item < end) {
2298 		switch (item->Type) {
2299 		case ACPI_SRAT_TYPE_CPU_AFFINITY: {
2300 			ACPI_SRAT_CPU_AFFINITY *cpu =
2301 			    (ACPI_SRAT_CPU_AFFINITY *) item;
2302 
2303 			if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED))
2304 				break;
2305 			processor.domain = cpu->ProximityDomainLo;
2306 			for (i = 0; i < 3; i++)
2307 				processor.domain +=
2308 				    cpu->ProximityDomainHi[i] << ((i + 1) * 8);
2309 			processor.apic_id = cpu->ApicId;
2310 			processor.sapic_id = cpu->LocalSapicEid;
2311 			(void) snprintf(prop_name, 30, "acpi-srat-processor-%d",
2312 			    proc_num);
2313 			bsetprop(prop_name, strlen(prop_name), &processor,
2314 			    sizeof (processor));
2315 			proc_num++;
2316 			break;
2317 		}
2318 		case ACPI_SRAT_TYPE_MEMORY_AFFINITY: {
2319 			ACPI_SRAT_MEM_AFFINITY *mem =
2320 			    (ACPI_SRAT_MEM_AFFINITY *)item;
2321 
2322 			if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED))
2323 				break;
2324 			memory.domain = mem->ProximityDomain;
2325 			memory.addr = mem->BaseAddress;
2326 			memory.length = mem->Length;
2327 			memory.flags = mem->Flags;
2328 			(void) snprintf(prop_name, 30, "acpi-srat-memory-%d",
2329 			    mem_num);
2330 			bsetprop(prop_name, strlen(prop_name), &memory,
2331 			    sizeof (memory));
2332 			if ((mem->Flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
2333 			    (memory.addr + memory.length > maxmem)) {
2334 				maxmem = memory.addr + memory.length;
2335 			}
2336 			mem_num++;
2337 			break;
2338 		}
2339 		case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: {
2340 			ACPI_SRAT_X2APIC_CPU_AFFINITY *x2cpu =
2341 			    (ACPI_SRAT_X2APIC_CPU_AFFINITY *) item;
2342 
2343 			if (!(x2cpu->Flags & ACPI_SRAT_CPU_ENABLED))
2344 				break;
2345 			x2apic.domain = x2cpu->ProximityDomain;
2346 			x2apic.x2apic_id = x2cpu->ApicId;
2347 			(void) snprintf(prop_name, 30, "acpi-srat-processor-%d",
2348 			    proc_num);
2349 			bsetprop(prop_name, strlen(prop_name), &x2apic,
2350 			    sizeof (x2apic));
2351 			proc_num++;
2352 			break;
2353 		}
2354 		default:
2355 			if (kbm_debug)
2356 				bop_printf(NULL, "SRAT type %d\n", item->Type);
2357 			break;
2358 		}
2359 
2360 		item = (ACPI_SUBTABLE_HEADER *)
2361 		    (item->Length + (uintptr_t)item);
2362 	}
2363 
2364 	/*
2365 	 * The maximum physical address calculated from the SRAT table is more
2366 	 * accurate than that calculated from the MSCT table.
2367 	 */
2368 	if (maxmem != 0) {
2369 		plat_dr_physmax = btop(maxmem);
2370 	}
2371 }
2372 
2373 static void
2374 process_slit(ACPI_TABLE_SLIT *tp)
2375 {
2376 
2377 	/*
2378 	 * Check the number of localities; if it's too huge, we just
2379 	 * return and locality enumeration code will handle this later,
2380 	 * if possible.
2381 	 *
2382 	 * Note that the size of the table is the square of the
2383 	 * number of localities; if the number of localities exceeds
2384 	 * UINT16_MAX, the table size may overflow an int when being
2385 	 * passed to bsetprop() below.
2386 	 */
2387 	if (tp->LocalityCount >= SLIT_LOCALITIES_MAX)
2388 		return;
2389 
2390 	bsetprop(SLIT_NUM_PROPNAME, strlen(SLIT_NUM_PROPNAME),
2391 	    &tp->LocalityCount, sizeof (tp->LocalityCount));
2392 	bsetprop(SLIT_PROPNAME, strlen(SLIT_PROPNAME), &tp->Entry,
2393 	    tp->LocalityCount * tp->LocalityCount);
2394 }
2395 
2396 static ACPI_TABLE_MSCT *
2397 process_msct(ACPI_TABLE_MSCT *tp)
2398 {
2399 	int last_seen = 0;
2400 	int proc_num = 0;
2401 	ACPI_MSCT_PROXIMITY *item, *end;
2402 	extern uint64_t plat_dr_options;
2403 
2404 	ASSERT(tp != NULL);
2405 
2406 	end = (ACPI_MSCT_PROXIMITY *)(tp->Header.Length + (uintptr_t)tp);
2407 	for (item = (void *)((uintptr_t)tp + tp->ProximityOffset);
2408 	    item < end;
2409 	    item = (void *)(item->Length + (uintptr_t)item)) {
2410 		/*
2411 		 * Sanity check according to section 5.2.19.1 of ACPI 4.0.
2412 		 * Revision 	1
2413 		 * Length	22
2414 		 */
2415 		if (item->Revision != 1 || item->Length != 22) {
2416 			cmn_err(CE_CONT,
2417 			    "?boot: unknown proximity domain structure in MSCT "
2418 			    "with Revision(%d), Length(%d).\n",
2419 			    (int)item->Revision, (int)item->Length);
2420 			return (NULL);
2421 		} else if (item->RangeStart > item->RangeEnd) {
2422 			cmn_err(CE_CONT,
2423 			    "?boot: invalid proximity domain structure in MSCT "
2424 			    "with RangeStart(%u), RangeEnd(%u).\n",
2425 			    item->RangeStart, item->RangeEnd);
2426 			return (NULL);
2427 		} else if (item->RangeStart != last_seen) {
2428 			/*
2429 			 * Items must be organized in ascending order of the
2430 			 * proximity domain enumerations.
2431 			 */
2432 			cmn_err(CE_CONT,
2433 			    "?boot: invalid proximity domain structure in MSCT,"
2434 			    " items are not orginized in ascending order.\n");
2435 			return (NULL);
2436 		}
2437 
2438 		/*
2439 		 * If ProcessorCapacity is 0 then there would be no CPUs in this
2440 		 * domain.
2441 		 */
2442 		if (item->ProcessorCapacity != 0) {
2443 			proc_num += (item->RangeEnd - item->RangeStart + 1) *
2444 			    item->ProcessorCapacity;
2445 		}
2446 
2447 		last_seen = item->RangeEnd - item->RangeStart + 1;
2448 		/*
2449 		 * Break out if all proximity domains have been processed.
2450 		 * Some BIOSes may have unused items at the end of MSCT table.
2451 		 */
2452 		if (last_seen > tp->MaxProximityDomains) {
2453 			break;
2454 		}
2455 	}
2456 	if (last_seen != tp->MaxProximityDomains + 1) {
2457 		cmn_err(CE_CONT,
2458 		    "?boot: invalid proximity domain structure in MSCT, "
2459 		    "proximity domain count doesn't match.\n");
2460 		return (NULL);
2461 	}
2462 
2463 	/*
2464 	 * Set plat-max-ncpus property if it hasn't been set yet.
2465 	 */
2466 	if (do_bsys_getproplen(NULL, PLAT_MAX_NCPUS_NAME) < 0) {
2467 		if (proc_num != 0) {
2468 			bsetpropsi(PLAT_MAX_NCPUS_NAME, proc_num);
2469 		}
2470 	}
2471 
2472 	/*
2473 	 * Use Maximum Physical Address from the MSCT table as upper limit for
2474 	 * memory hot-adding by default. It may be overridden by value from
2475 	 * the SRAT table or the "plat-dr-physmax" boot option.
2476 	 */
2477 	plat_dr_physmax = btop(tp->MaxAddress + 1);
2478 
2479 	/*
2480 	 * Existence of MSCT implies CPU/memory hotplug-capability for the
2481 	 * platform.
2482 	 */
2483 	plat_dr_options |= PLAT_DR_FEATURE_CPU;
2484 	plat_dr_options |= PLAT_DR_FEATURE_MEMORY;
2485 
2486 	return (tp);
2487 }
2488 
2489 #else /* __xpv */
2490 static void
2491 enumerate_xen_cpus()
2492 {
2493 	processorid_t	id, max_id;
2494 
2495 	/*
2496 	 * User-set boot-ncpus overrides enumeration
2497 	 */
2498 	if (do_bsys_getproplen(NULL, BOOT_NCPUS_NAME) >= 0)
2499 		return;
2500 
2501 	/*
2502 	 * Probe every possible virtual CPU id and remember the
2503 	 * highest id present; the count of CPUs is one greater
2504 	 * than this.  This tacitly assumes at least cpu 0 is present.
2505 	 */
2506 	max_id = 0;
2507 	for (id = 0; id < MAX_VIRT_CPUS; id++)
2508 		if (HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL) == 0)
2509 			max_id = id;
2510 
2511 	bsetpropsi(BOOT_NCPUS_NAME, max_id+1);
2512 
2513 }
2514 #endif /* __xpv */
2515 
2516 static void
2517 build_firmware_properties(void)
2518 {
2519 	ACPI_TABLE_HEADER *tp = NULL;
2520 
2521 #ifndef __xpv
2522 	if ((tp = find_fw_table(ACPI_SIG_MSCT)) != NULL)
2523 		msct_ptr = process_msct((ACPI_TABLE_MSCT *)tp);
2524 	else
2525 		msct_ptr = NULL;
2526 
2527 	if ((tp = find_fw_table(ACPI_SIG_MADT)) != NULL)
2528 		process_madt((ACPI_TABLE_MADT *)tp);
2529 
2530 	if ((srat_ptr = (ACPI_TABLE_SRAT *)
2531 	    find_fw_table(ACPI_SIG_SRAT)) != NULL)
2532 		process_srat(srat_ptr);
2533 
2534 	if (slit_ptr = (ACPI_TABLE_SLIT *)find_fw_table(ACPI_SIG_SLIT))
2535 		process_slit(slit_ptr);
2536 
2537 	tp = find_fw_table(ACPI_SIG_MCFG);
2538 #else /* __xpv */
2539 	enumerate_xen_cpus();
2540 	if (DOMAIN_IS_INITDOMAIN(xen_info))
2541 		tp = find_fw_table(ACPI_SIG_MCFG);
2542 #endif /* __xpv */
2543 	if (tp != NULL)
2544 		process_mcfg((ACPI_TABLE_MCFG *)tp);
2545 }
2546 
2547 /*
2548  * fake up a boot property for deferred early console output
2549  * this is used by both graphical boot and the (developer only)
2550  * USB serial console
2551  */
2552 void *
2553 defcons_init(size_t size)
2554 {
2555 	static char *p = NULL;
2556 
2557 	p = do_bsys_alloc(NULL, NULL, size, MMU_PAGESIZE);
2558 	*p = 0;
2559 	bsetprop("deferred-console-buf", strlen("deferred-console-buf") + 1,
2560 	    &p, sizeof (p));
2561 	return (p);
2562 }
2563 
2564 /*ARGSUSED*/
2565 int
2566 boot_compinfo(int fd, struct compinfo *cbp)
2567 {
2568 	cbp->iscmp = 0;
2569 	cbp->blksize = MAXBSIZE;
2570 	return (0);
2571 }
2572 
2573 #define	BP_MAX_STRLEN	32
2574 
2575 /*
2576  * Get value for given boot property
2577  */
2578 int
2579 bootprop_getval(const char *prop_name, u_longlong_t *prop_value)
2580 {
2581 	int		boot_prop_len;
2582 	char		str[BP_MAX_STRLEN];
2583 	u_longlong_t	value;
2584 
2585 	boot_prop_len = BOP_GETPROPLEN(bootops, prop_name);
2586 	if (boot_prop_len < 0 || boot_prop_len > sizeof (str) ||
2587 	    BOP_GETPROP(bootops, prop_name, str) < 0 ||
2588 	    kobj_getvalue(str, &value) == -1)
2589 		return (-1);
2590 
2591 	if (prop_value)
2592 		*prop_value = value;
2593 
2594 	return (0);
2595 }
2596