xref: /titanic_51/usr/src/boot/sys/boot/common/multiboot2.c (revision c5e7425f0365f3ad327b8cd9446d510ee3472c9f)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2017 Toomas Soome <tsoome@me.com>
14  */
15 
16 /*
17  * This module adds support for loading and booting illumos multiboot2
18  * kernel. This code is only built to support the illumos kernel, it does
19  * not support xen.
20  */
21 
22 #include <sys/cdefs.h>
23 #include <sys/stddef.h>
24 
25 #include <sys/param.h>
26 #include <sys/exec.h>
27 #include <sys/linker.h>
28 #include <sys/module.h>
29 #include <sys/stdint.h>
30 #include <sys/multiboot2.h>
31 #include <stand.h>
32 #include <stdbool.h>
33 #include <machine/elf.h>
34 #include "libzfs.h"
35 
36 #include "bootstrap.h"
37 
38 #include <machine/metadata.h>
39 #include <machine/pc/bios.h>
40 
41 #define	SUPPORT_DHCP
42 #include <bootp.h>
43 
44 #if !defined(EFI)
45 #include "../i386/libi386/libi386.h"
46 #include "../i386/btx/lib/btxv86.h"
47 
48 #else
49 #include <efi.h>
50 #include <efilib.h>
51 #include "loader_efi.h"
52 
53 static void (*trampoline)(uint32_t, struct relocator *, uint64_t);
54 #endif
55 
56 #include "platform/acfreebsd.h"
57 #include "acconfig.h"
58 #define ACPI_SYSTEM_XFACE
59 #include "actypes.h"
60 #include "actbl.h"
61 
62 extern ACPI_TABLE_RSDP *rsdp;
63 
64 /* MB data heap pointer. */
65 static vm_offset_t last_addr;
66 extern char bootprog_info[];
67 
68 static int multiboot2_loadfile(char *, u_int64_t, struct preloaded_file **);
69 static int multiboot2_exec(struct preloaded_file *);
70 
71 struct file_format multiboot2 = { multiboot2_loadfile, multiboot2_exec };
72 static bool keep_bs = false;
73 static bool have_framebuffer = false;
74 static vm_offset_t load_addr;
75 static vm_offset_t entry_addr;
76 
77 /*
78  * Validate tags in info request. This function is provided just to
79  * recognize the current tag list and only serves as a limited
80  * safe guard against possibly corrupt information.
81  */
82 static bool
83 is_info_request_valid(multiboot_header_tag_information_request_t *rtag)
84 {
85 	int i;
86 
87 	/*
88 	 * If the tag is optional and we do not support it, we do not
89 	 * have to do anything special, so we skip optional tags.
90 	 */
91 	if (rtag->mbh_flags & MULTIBOOT_HEADER_TAG_OPTIONAL)
92 		return (true);
93 
94 	for (i = 0; i < (rtag->mbh_size - sizeof (*rtag)) /
95 	    sizeof (rtag->mbh_requests[0]); i++)
96 		switch (rtag->mbh_requests[i]) {
97 		case MULTIBOOT_TAG_TYPE_END:
98 		case MULTIBOOT_TAG_TYPE_CMDLINE:
99 		case MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME:
100 		case MULTIBOOT_TAG_TYPE_MODULE:
101 		case MULTIBOOT_TAG_TYPE_BASIC_MEMINFO:
102 		case MULTIBOOT_TAG_TYPE_BOOTDEV:
103 		case MULTIBOOT_TAG_TYPE_MMAP:
104 		case MULTIBOOT_TAG_TYPE_FRAMEBUFFER:
105 		case MULTIBOOT_TAG_TYPE_VBE:
106 		case MULTIBOOT_TAG_TYPE_ELF_SECTIONS:
107 		case MULTIBOOT_TAG_TYPE_APM:
108 		case MULTIBOOT_TAG_TYPE_EFI32:
109 		case MULTIBOOT_TAG_TYPE_EFI64:
110 		case MULTIBOOT_TAG_TYPE_ACPI_OLD:
111 		case MULTIBOOT_TAG_TYPE_ACPI_NEW:
112 		case MULTIBOOT_TAG_TYPE_NETWORK:
113 		case MULTIBOOT_TAG_TYPE_EFI_MMAP:
114 		case MULTIBOOT_TAG_TYPE_EFI_BS:
115 		case MULTIBOOT_TAG_TYPE_EFI32_IH:
116 		case MULTIBOOT_TAG_TYPE_EFI64_IH:
117 		case MULTIBOOT_TAG_TYPE_LOAD_BASE_ADDR:
118 			break;
119 		default:
120 			printf("unsupported information tag: 0x%x\n",
121 			    rtag->mbh_requests[i]);
122 			return (false);
123 		}
124 	return (true);
125 }
126 
127 static int
128 multiboot2_loadfile(char *filename, u_int64_t dest,
129     struct preloaded_file **result)
130 {
131 	int fd, error;
132 	uint32_t i;
133 	struct stat st;
134 	caddr_t header_search;
135 	multiboot2_header_t *header;
136 	multiboot_header_tag_t *tag;
137 	multiboot_header_tag_address_t *addr_tag = NULL;
138 	multiboot_header_tag_entry_address_t *entry_tag = NULL;
139 	struct preloaded_file *fp;
140 
141 	/* This allows to check other file formats from file_formats array. */
142 	error = EFTYPE;
143 	if (filename == NULL)
144 		return (error);
145 
146 	/* is kernel already loaded? */
147 	fp = file_findfile(NULL, NULL);
148 	if (fp != NULL)
149 		return (error);
150 
151 	if ((fd = open(filename, O_RDONLY)) == -1)
152 		return (errno);
153 
154 	/*
155 	 * Read MULTIBOOT_SEARCH size in order to search for the
156 	 * multiboot magic header.
157 	 */
158 	header_search = malloc(MULTIBOOT_SEARCH);
159 	if (header_search == NULL) {
160 		close(fd);
161 		return (ENOMEM);
162 	}
163 
164 	if (read(fd, header_search, MULTIBOOT_SEARCH) != MULTIBOOT_SEARCH)
165 		goto out;
166 
167 	header = NULL;
168 	for (i = 0; i <= (MULTIBOOT_SEARCH - sizeof (multiboot2_header_t));
169 	    i += MULTIBOOT_HEADER_ALIGN) {
170 		header = (multiboot2_header_t *)(header_search + i);
171 
172 		/* Do we have match on magic? */
173 		if (header->mb2_magic != MULTIBOOT2_HEADER_MAGIC) {
174 			header = NULL;
175 			continue;
176 		}
177 		/*
178 		 * Validate checksum, the sum of magic + architecture +
179 		 * header_length + checksum must equal 0.
180 		 */
181 		if (header->mb2_magic + header->mb2_architecture +
182 		    header->mb2_header_length + header->mb2_checksum != 0) {
183 			header = NULL;
184 			continue;
185 		}
186 		/*
187 		 * Finally, the entire header must fit within MULTIBOOT_SEARCH.
188 		 */
189 		if (i + header->mb2_header_length > MULTIBOOT_SEARCH) {
190 			header = NULL;
191 			continue;
192 		}
193 		break;
194 	}
195 
196 	if (header == NULL)
197 		goto out;
198 
199 	for (tag = header->mb2_tags; tag->mbh_type != MULTIBOOT_TAG_TYPE_END;
200 	    tag = (multiboot_header_tag_t *)((uintptr_t)tag +
201 	    roundup2(tag->mbh_size, MULTIBOOT_TAG_ALIGN))) {
202 		switch (tag->mbh_type) {
203 		case MULTIBOOT_HEADER_TAG_INFORMATION_REQUEST:
204 			if (is_info_request_valid((void*)tag) == false)
205 				goto out;
206 			break;
207 		case MULTIBOOT_HEADER_TAG_ADDRESS:
208 			addr_tag = (multiboot_header_tag_address_t *)tag;
209 			break;
210 		case MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS:
211 			entry_tag =
212 			    (multiboot_header_tag_entry_address_t *)tag;
213 			break;
214 		case MULTIBOOT_HEADER_TAG_CONSOLE_FLAGS:
215 			break;
216 		case MULTIBOOT_HEADER_TAG_FRAMEBUFFER:
217 			have_framebuffer = true;
218 			break;
219 		case MULTIBOOT_HEADER_TAG_MODULE_ALIGN:
220 			/* we always align modules */
221 			break;
222 		case MULTIBOOT_HEADER_TAG_EFI_BS:
223 			keep_bs = true;
224 			break;
225 		default:
226 			if (!(tag->mbh_flags & MULTIBOOT_HEADER_TAG_OPTIONAL)) {
227 				printf("unsupported tag: 0x%x\n",
228 				    tag->mbh_type);
229 				goto out;
230 			}
231 		}
232 	}
233 
234 	/*
235 	 * We must have addr_tag and entry_tag to load a 64-bit kernel.
236 	 * If these tags are missing, we either have a 32-bit kernel, or
237 	 * this is not our kernel at all.
238 	 */
239 	if (addr_tag != NULL && entry_tag != NULL) {
240 		fp = file_alloc();
241 		if (fp == NULL) {
242 			error = ENOMEM;
243 			goto out;
244 		}
245 		if (lseek(fd, 0, SEEK_SET) == -1) {
246 			printf("lseek failed\n");
247 			error = EIO;
248 			file_discard(fp);
249 			goto out;
250 		}
251 		if (fstat(fd, &st) < 0) {
252 			printf("fstat failed\n");
253 			error = EIO;
254 			file_discard(fp);
255 			goto out;
256 		}
257 
258 		load_addr = addr_tag->mbh_load_addr;
259 		entry_addr = entry_tag->mbh_entry_addr;
260 		fp->f_addr = archsw.arch_loadaddr(LOAD_KERN, filename,
261 		    addr_tag->mbh_load_addr);
262 		if (fp->f_addr == 0) {
263 			error = ENOMEM;
264 			file_discard(fp);
265 			goto out;
266 		}
267 		fp->f_size = archsw.arch_readin(fd, fp->f_addr, st.st_size);
268 
269 		if (fp->f_size != st.st_size) {
270 			printf("error reading: %s", strerror(errno));
271 			file_discard(fp);
272 			error = EIO;
273 			goto out;
274 		}
275 
276 		fp->f_name = strdup(filename);
277 		fp->f_type = strdup("aout multiboot2 kernel");
278 		if (fp->f_name == NULL || fp->f_type == NULL) {
279 			error = ENOMEM;
280 			file_discard(fp);
281 			goto out;
282 		}
283 
284 		fp->f_metadata = NULL;
285 		error = 0;
286 	} else {
287 #if defined(EFI)
288 		/* 32-bit kernel is not yet supported for EFI */
289 		printf("32-bit kernel is not supported by UEFI loader\n");
290 		error = ENOTSUP;
291 		goto out;
292 #endif
293 		/* elf32_loadfile_raw will fill the attributes in fp. */
294 		error = elf32_loadfile_raw(filename, dest, &fp, 2);
295 		if (error != 0) {
296 			printf("elf32_loadfile_raw failed: %d unable to "
297 			    "load multiboot2 kernel\n", error);
298 			goto out;
299 		}
300 		entry_addr = fp->f_addr;
301 		/*
302 		 * We want the load_addr to have some legal value,
303 		 * so we set it same as the entry_addr.
304 		 * The distinction is important with UEFI, but not
305 		 * with BIOS version, because BIOS version does not use
306 		 * staging area.
307 		 */
308 		load_addr = fp->f_addr;
309 	}
310 
311 	setenv("kernelname", fp->f_name, 1);
312 #if defined(EFI)
313 	efi_addsmapdata(fp);
314 #else
315 	bios_addsmapdata(fp);
316 #endif
317 	*result = fp;
318 out:
319 	free(header_search);
320 	close(fd);
321 	return (error);
322 }
323 
324 /*
325  * Search the command line for named property.
326  *
327  * Return codes:
328  *	0	The name is found, we return the data in value and len.
329  *	ENOENT	The name is not found.
330  *	EINVAL	The provided command line is badly formed.
331  */
332 static int
333 find_property_value(const char *cmd, const char *name, const char **value,
334     size_t *len)
335 {
336 	const char *namep, *valuep;
337 	size_t name_len, value_len;
338 	int quoted;
339 
340 	*value = NULL;
341 	*len = 0;
342 
343 	if (cmd == NULL)
344 		return (ENOENT);
345 
346 	while (*cmd != '\0') {
347 		if (cmd[0] != '-' || cmd[1] != 'B') {
348 			cmd++;
349 			continue;
350 		}
351 		cmd += 2;	/* Skip -B */
352 		while (cmd[0] == ' ' || cmd[0] == '\t')
353 			cmd++;	/* Skip whitespaces. */
354 		while (*cmd != '\0' && cmd[0] != ' ' && cmd[0] != '\t') {
355 			namep = cmd;
356 			valuep = strchr(cmd, '=');
357 			if (valuep == NULL)
358 				break;
359 			name_len = valuep - namep;
360 			valuep++;
361 			value_len = 0;
362 			quoted = 0;
363 			for (; ; ++value_len) {
364 				if (valuep[value_len] == '\0')
365 					break;
366 
367 				/* Is this value quoted? */
368 				if (value_len == 0 &&
369 				    (valuep[0] == '\'' || valuep[0] == '"')) {
370 					quoted = valuep[0];
371 					++value_len;
372 				}
373 
374 				/*
375 				 * In the quote accept any character,
376 				 * but look for ending quote.
377 				 */
378 				if (quoted != 0) {
379 					if (valuep[value_len] == quoted)
380 						quoted = 0;
381 					continue;
382 				}
383 
384 				/* A comma or white space ends the value. */
385 				if (valuep[value_len] == ',' ||
386 				    valuep[value_len] == ' ' ||
387 				    valuep[value_len] == '\t')
388 					break;
389 			}
390 			if (quoted != 0) {
391 				printf("Missing closing '%c' in \"%s\"\n",
392 				    quoted, valuep);
393 				return (EINVAL);
394 			}
395 			if (value_len != 0) {
396 				if (strncmp(namep, name, name_len) == 0) {
397 					*value = valuep;
398 					*len = value_len;
399 					return (0);
400 				}
401 			}
402 			cmd = valuep + value_len;
403 			while (*cmd == ',')
404 				cmd++;
405 		}
406 	}
407 	return (ENOENT);
408 }
409 
410 /*
411  * If command line has " -B ", insert property after "-B ", otherwise
412  * append to command line.
413  */
414 static char *
415 insert_cmdline(const char *head, const char *prop)
416 {
417 	const char *prop_opt = " -B ";
418 	char *cmdline, *tail;
419 	int len = 0;
420 
421 	tail = strstr(head, prop_opt);
422 	if (tail != NULL) {
423 		ptrdiff_t diff;
424 		tail += strlen(prop_opt);
425 		diff = tail - head;
426 		if (diff >= INT_MAX)
427 			return (NULL);
428 		len = (int)diff;
429 	}
430 
431 	if (tail == NULL)
432 		asprintf(&cmdline, "%s%s%s", head, prop_opt, prop);
433 	else
434 		asprintf(&cmdline, "%.*s%s,%s", len, head, prop, tail);
435 
436 	return (cmdline);
437 }
438 
439 /*
440  * Since we have no way to pass the environment to the mb1 kernel other than
441  * through arguments, we need to take care of console setup.
442  *
443  * If the console is in mirror mode, set the kernel console from $os_console.
444  * If it's unset, use first item from $console.
445  * If $console is "ttyX", also pass $ttyX-mode, since it may have been set by
446  * the user.
447  *
448  * In case of memory allocation errors, just return the original command line
449  * so we have a chance of booting.
450  *
451  * On success, cl will be freed and a new, allocated command line string is
452  * returned.
453  *
454  * For the mb2 kernel, we only set command line console if os_console is set.
455  * We can not overwrite console in the environment, as it can disrupt the
456  * loader console messages, and we do not want to deal with the os_console
457  * in the kernel.
458  */
459 static char *
460 update_cmdline(char *cl, bool mb2)
461 {
462 	char *os_console = getenv("os_console");
463 	char *ttymode = NULL;
464 	char mode[10];
465 	char *tmp;
466 	const char *prop;
467 	size_t plen;
468 	int rv;
469 
470 	if (mb2 == true && os_console == NULL)
471 		return (cl);
472 
473 	if (os_console == NULL) {
474 		tmp = strdup(getenv("console"));
475 		os_console = strsep(&tmp, ", ");
476 	} else {
477 		os_console = strdup(os_console);
478 	}
479 
480 	if (os_console == NULL)
481 		return (cl);
482 
483 	if (mb2 == false && strncmp(os_console, "tty", 3) == 0) {
484 		snprintf(mode, sizeof (mode), "%s-mode", os_console);
485 		/*
486 		 * The ttyX-mode variable is set by our serial console
487 		 * driver for ttya-ttyd. However, since the os_console
488 		 * values are not verified, it is possible we get bogus
489 		 * name and no mode variable. If so, we do not set console
490 		 * property and let the kernel use defaults.
491 		 */
492 		if ((ttymode = getenv(mode)) == NULL)
493 			return (cl);
494 	}
495 
496 	rv = find_property_value(cl, "console", &prop, &plen);
497 	if (rv != 0 && rv != ENOENT) {
498 		free(os_console);
499 		return (cl);
500 	}
501 
502 	/* If console is set and this is MB2 boot, we are done. */
503 	if (rv == 0 && mb2 == true) {
504 		free(os_console);
505 		return (cl);
506 	}
507 
508 	/* If console is set, do we need to set tty mode? */
509 	if (rv == 0) {
510 		const char *ttyp = NULL;
511 		size_t ttylen;
512 
513 		free(os_console);
514 		os_console = NULL;
515 		*mode = '\0';
516 		if (strncmp(prop, "tty", 3) == 0 && plen == 4) {
517 			strncpy(mode, prop, plen);
518 			mode[plen] = '\0';
519 			strncat(mode, "-mode", 5);
520 			find_property_value(cl, mode, &ttyp, &ttylen);
521 		}
522 
523 		if (*mode != '\0' && ttyp == NULL)
524 			ttymode = getenv(mode);
525 		else
526 			return (cl);
527 	}
528 
529 	/* Build updated command line. */
530 	if (os_console != NULL) {
531 		char *propstr;
532 
533 		asprintf(&propstr, "console=%s", os_console);
534 		free(os_console);
535 		if (propstr == NULL) {
536 			return (cl);
537 		}
538 
539 		tmp = insert_cmdline(cl, propstr);
540                 free(propstr);
541                 if (tmp == NULL)
542 			return (cl);
543 
544                 free(cl);
545                 cl = tmp;
546 	}
547 	if (ttymode != NULL) {
548 		char *propstr;
549 
550 		asprintf(&propstr, "%s=\"%s\"", mode, ttymode);
551 		if (propstr == NULL)
552 			return (cl);
553 
554 		tmp = insert_cmdline(cl, propstr);
555                 free(propstr);
556                 if (tmp == NULL)
557 			return (cl);
558                 free(cl);
559                 cl = tmp;
560 	}
561 
562 	return (cl);
563 }
564 
565 /*
566  * Build the kernel command line. Shared function between MB1 and MB2.
567  *
568  * In both cases, if fstype is set and is not zfs, we do not set up
569  * zfs-bootfs property. But we set kernel file name and options.
570  *
571  * For the MB1, we only can pass properties on command line, so
572  * we will set console, ttyX-mode (for serial console) and zfs-bootfs.
573  *
574  * For the MB2, we can pass properties in environment, but if os_console
575  * is set in environment, we need to add console property on the kernel
576  * command line.
577  *
578  * The console properties are managed in update_cmdline().
579  */
580 int
581 mb_kernel_cmdline(struct preloaded_file *fp, struct devdesc *rootdev,
582     char **line)
583 {
584 	const char *fs = getenv("fstype");
585 	char *cmdline;
586 	size_t len;
587 	bool zfs_root = false;
588 	bool mb2;
589 	int rv;
590 
591 	/*
592 	 * 64-bit kernel has aout header, 32-bit kernel is elf, and the
593 	 * type strings are different. Lets just search for "multiboot2".
594 	 */
595 	if (strstr(fp->f_type, "multiboot2") == NULL)
596 		mb2 = false;
597 	else
598 		mb2 = true;
599 
600 	if (rootdev->d_type == DEVT_ZFS)
601 		zfs_root = true;
602 
603 	/* If we have fstype set in env, reset zfs_root if needed. */
604 	if (fs != NULL && strcmp(fs, "zfs") != 0)
605 		zfs_root = false;
606 
607 	/*
608 	 * If we have fstype set on the command line,
609 	 * reset zfs_root if needed.
610 	 */
611 	rv = find_property_value(fp->f_args, "fstype", &fs, &len);
612 	if (rv != 0 && rv != ENOENT)
613 		return (rv);
614 
615 	if (fs != NULL && strncmp(fs, "zfs", len) != 0)
616 		zfs_root = false;
617 
618 	/* zfs_bootfs() will set the environment, it must be called. */
619 	if (zfs_root == true)
620 		fs = zfs_bootfs(rootdev);
621 
622 	if (fp->f_args == NULL)
623 		cmdline = strdup(fp->f_name);
624 	else
625 		asprintf(&cmdline, "%s %s", fp->f_name, fp->f_args);
626 
627 	if (cmdline == NULL)
628 		return (ENOMEM);
629 
630 	/* Append zfs-bootfs for MB1 command line. */
631 	if (mb2 == false && zfs_root == true) {
632 		char *tmp;
633 
634 		tmp = insert_cmdline(cmdline, fs);
635 		free(cmdline);
636 		if (tmp == NULL)
637 			return (ENOMEM);
638 		cmdline = tmp;
639 	}
640 
641 	*line = update_cmdline(cmdline, mb2);
642 	return (0);
643 }
644 
645 /*
646  * Returns allocated virtual address from MB info area.
647  */
648 static vm_offset_t
649 mb_malloc(size_t n)
650 {
651 	vm_offset_t ptr = last_addr;
652 	last_addr = roundup(last_addr + n, MULTIBOOT_TAG_ALIGN);
653 	return (ptr);
654 }
655 
656 /*
657  * Calculate size for module tag list.
658  */
659 static size_t
660 module_size(struct preloaded_file *fp)
661 {
662 	size_t len, size;
663 	struct preloaded_file *mfp;
664 
665 	size = 0;
666 	for (mfp = fp->f_next; mfp != NULL; mfp = mfp->f_next) {
667 		len = strlen(mfp->f_name) + 1;
668 		len += strlen(mfp->f_type) + 5 + 1; /* 5 is for "type=" */
669 		if (mfp->f_args != NULL)
670 			len += strlen(mfp->f_args) + 1;
671 		size += sizeof (multiboot_tag_module_t) + len;
672 		size = roundup(size, MULTIBOOT_TAG_ALIGN);
673 	}
674 	return (size);
675 }
676 
677 #if defined (EFI)
678 /*
679  * Calculate size for UEFI memory map tag.
680  */
681 static int
682 efimemmap_size(void)
683 {
684 	UINTN size, cur_size, desc_size;
685 	EFI_MEMORY_DESCRIPTOR *mmap;
686 	EFI_STATUS ret;
687 
688 	size = EFI_PAGE_SIZE;		/* Start with 4k. */
689 	while (1) {
690 		cur_size = size;
691 		mmap = malloc(cur_size);
692 		if (mmap == NULL)
693 			return (0);
694 		ret = BS->GetMemoryMap(&cur_size, mmap, NULL, &desc_size, NULL);
695 		free(mmap);
696 		if (ret == EFI_SUCCESS)
697 			break;
698 		if (ret == EFI_BUFFER_TOO_SMALL) {
699 			if (size < cur_size)
700 				size = cur_size;
701 			size += (EFI_PAGE_SIZE);
702 		} else
703 			return (0);
704 	}
705 
706 	/* EFI MMAP will grow when we allocate MBI, set some buffer. */
707 	size += (3 << EFI_PAGE_SHIFT);
708 	size = roundup(size, desc_size);
709 	return (sizeof (multiboot_tag_efi_mmap_t) + size);
710 }
711 #endif
712 
713 /*
714  * Calculate size for bios smap tag.
715  */
716 static size_t
717 biossmap_size(struct preloaded_file *fp)
718 {
719 	int num;
720 	struct file_metadata *md;
721 
722 	md = file_findmetadata(fp, MODINFOMD_SMAP);
723 	if (md == NULL)
724 		return (0);
725 
726 	num = md->md_size / sizeof(struct bios_smap); /* number of entries */
727 	return (sizeof (multiboot_tag_mmap_t) +
728 	    num * sizeof (multiboot_mmap_entry_t));
729 }
730 
731 static size_t
732 mbi_size(struct preloaded_file *fp, char *cmdline)
733 {
734 	size_t size;
735 
736 	size = sizeof (uint32_t) * 2; /* first 2 fields from MBI header */
737 	size += sizeof (multiboot_tag_string_t) + strlen(cmdline) + 1;
738 	size = roundup2(size, MULTIBOOT_TAG_ALIGN);
739 	size += sizeof (multiboot_tag_string_t) + strlen(bootprog_info) + 1;
740 	size = roundup2(size, MULTIBOOT_TAG_ALIGN);
741 #if !defined (EFI)
742 	size += sizeof (multiboot_tag_basic_meminfo_t);
743 	size = roundup2(size, MULTIBOOT_TAG_ALIGN);
744 #endif
745 	size += module_size(fp);
746 	size = roundup2(size, MULTIBOOT_TAG_ALIGN);
747 #if defined (EFI)
748 	size += sizeof (multiboot_tag_efi64_t);
749 	size = roundup2(size, MULTIBOOT_TAG_ALIGN);
750 	size += efimemmap_size();
751 	size = roundup2(size, MULTIBOOT_TAG_ALIGN);
752 
753 	if (have_framebuffer == true) {
754 		size += sizeof (multiboot_tag_framebuffer_t);
755 		size = roundup2(size, MULTIBOOT_TAG_ALIGN);
756 	}
757 #endif
758 	size += biossmap_size(fp);
759 	size = roundup2(size, MULTIBOOT_TAG_ALIGN);
760 
761 	if (bootp_response != NULL) {
762 		size += sizeof(multiboot_tag_network_t) + bootp_response_size;
763 		size = roundup2(size, MULTIBOOT_TAG_ALIGN);
764 	}
765 
766 	if (rsdp != NULL) {
767 		if (rsdp->Revision == 0) {
768 			size += sizeof (multiboot_tag_old_acpi_t) +
769 			    sizeof(ACPI_RSDP_COMMON);
770 		} else {
771 			size += sizeof (multiboot_tag_new_acpi_t) +
772 			    rsdp->Length;
773 		}
774 		size = roundup2(size, MULTIBOOT_TAG_ALIGN);
775 	}
776 	size += sizeof(multiboot_tag_t);
777 
778 	return (size);
779 }
780 
781 static int
782 multiboot2_exec(struct preloaded_file *fp)
783 {
784 	struct preloaded_file *mfp;
785 	multiboot2_info_header_t *mbi;
786 	char *cmdline = NULL;
787 	struct devdesc *rootdev;
788 	struct file_metadata *md;
789 	int i, error, num;
790 	int rootfs = 0;
791 	size_t size;
792 	struct bios_smap *smap;
793 #if defined (EFI)
794 	multiboot_tag_module_t *module;
795 	EFI_MEMORY_DESCRIPTOR *map;
796 	struct relocator *relocator;
797 	struct chunk_head *head;
798 	struct chunk *chunk;
799 	vm_offset_t tmp;
800 
801 	efi_getdev((void **)(&rootdev), NULL, NULL);
802 #else
803 	i386_getdev((void **)(&rootdev), NULL, NULL);
804 #endif
805 
806 	error = EINVAL;
807 	if (rootdev == NULL) {
808 		printf("can't determine root device\n");
809 		goto error;
810 	}
811 
812 	/*
813 	 * Set the image command line.
814 	 */
815 	if (fp->f_args == NULL) {
816 		cmdline = getenv("boot-args");
817 		if (cmdline != NULL) {
818 			fp->f_args = strdup(cmdline);
819 			if (fp->f_args == NULL) {
820 				error = ENOMEM;
821 				goto error;
822 			}
823 		}
824 	}
825 
826 	error = mb_kernel_cmdline(fp, rootdev, &cmdline);
827 	if (error != 0)
828 		goto error;
829 
830 	/* mb_kernel_cmdline() updates the environment. */
831 	build_environment_module();
832 
833 	size = mbi_size(fp, cmdline);	/* Get the size for MBI. */
834 
835 	/* Set up the base for mb_malloc. */
836 	i = 0;
837 	for (mfp = fp; mfp->f_next != NULL; mfp = mfp->f_next)
838 		i++;
839 
840 #if defined (EFI)
841 	/* We need space for kernel + MBI + # modules */
842 	num = (EFI_PAGE_SIZE - offsetof(struct relocator, rel_chunklist)) /
843 	    sizeof (struct chunk);
844 	if (i + 2 >= num) {
845 		printf("Too many modules, do not have space for relocator.\n");
846 		error = ENOMEM;
847 		goto error;
848 	}
849 
850 	last_addr = efi_loadaddr(LOAD_MEM, &size, mfp->f_addr + mfp->f_size);
851 	mbi = (multiboot2_info_header_t *)last_addr;
852 	if (mbi == NULL) {
853 		error = ENOMEM;
854 		goto error;
855 	}
856 	last_addr = (vm_offset_t)mbi->mbi_tags;
857 #else
858 	/* Start info block from the new page. */
859 	last_addr = i386_loadaddr(LOAD_MEM, &size, mfp->f_addr + mfp->f_size);
860 
861 	/* Do we have space for multiboot info? */
862 	if (last_addr + size >= memtop_copyin) {
863 		error = ENOMEM;
864 		goto error;
865 	}
866 
867 	mbi = (multiboot2_info_header_t *)PTOV(last_addr);
868 	last_addr = (vm_offset_t)mbi->mbi_tags;
869 #endif	/* EFI */
870 
871 	{
872 		multiboot_tag_string_t *tag;
873 		i = sizeof (multiboot_tag_string_t) + strlen(cmdline) + 1;
874 		tag = (multiboot_tag_string_t *) mb_malloc(i);
875 
876 		tag->mb_type = MULTIBOOT_TAG_TYPE_CMDLINE;
877 		tag->mb_size = i;
878 		memcpy(tag->mb_string, cmdline, strlen(cmdline) + 1);
879 		free(cmdline);
880 		cmdline = NULL;
881 	}
882 
883 	{
884 		multiboot_tag_string_t *tag;
885 		i = sizeof (multiboot_tag_string_t) + strlen(bootprog_info) + 1;
886 		tag = (multiboot_tag_string_t *) mb_malloc(i);
887 
888 		tag->mb_type = MULTIBOOT_TAG_TYPE_BOOT_LOADER_NAME;
889 		tag->mb_size = i;
890 		memcpy(tag->mb_string, bootprog_info,
891 		    strlen(bootprog_info) + 1);
892 	}
893 
894 #if !defined (EFI)
895 	/* Only set in case of BIOS. */
896 	{
897 		multiboot_tag_basic_meminfo_t *tag;
898 		tag = (multiboot_tag_basic_meminfo_t *)
899 		    mb_malloc(sizeof (*tag));
900 
901 		tag->mb_type = MULTIBOOT_TAG_TYPE_BASIC_MEMINFO;
902 		tag->mb_size = sizeof (*tag);
903 		tag->mb_mem_lower = bios_basemem / 1024;
904 		tag->mb_mem_upper = bios_extmem / 1024;
905 	}
906 #endif
907 
908 	num = 0;
909 	for (mfp = fp->f_next; mfp != NULL; mfp = mfp->f_next) {
910 		num++;
911 		if (mfp->f_type != NULL && strcmp(mfp->f_type, "rootfs") == 0)
912 			rootfs++;
913 	}
914 
915 	if (num == 0 || rootfs == 0) {
916 		/* We need at least one module - rootfs. */
917 		printf("No rootfs module provided, aborting\n");
918 		error = EINVAL;
919 		goto error;
920 	}
921 
922 	/*
923 	 * Set the stage for physical memory layout:
924 	 * - We have kernel at load_addr.
925 	 * - Modules are aligned to page boundary.
926 	 * - MBI is aligned to page boundary.
927 	 * - Set the tmp to point to physical address of the first module.
928 	 * - tmp != mfp->f_addr only in case of EFI.
929 	 */
930 #if defined (EFI)
931 	tmp = roundup2(load_addr + fp->f_size, MULTIBOOT_MOD_ALIGN);
932 	module = (multiboot_tag_module_t *)last_addr;
933 #endif
934 
935 	for (mfp = fp->f_next; mfp != NULL; mfp = mfp->f_next) {
936 		multiboot_tag_module_t *tag;
937 
938 		num = strlen(mfp->f_name) + 1;
939 		num += strlen(mfp->f_type) + 5 + 1;
940 		if (mfp->f_args != NULL) {
941 			num += strlen(mfp->f_args) + 1;
942 		}
943 		cmdline = malloc(num);
944 		if (cmdline == NULL) {
945 			error = ENOMEM;
946 			goto error;
947 		}
948 
949 		if (mfp->f_args != NULL)
950 			snprintf(cmdline, num, "%s type=%s %s",
951 			    mfp->f_name, mfp->f_type, mfp->f_args);
952 		else
953 			snprintf(cmdline, num, "%s type=%s",
954 			    mfp->f_name, mfp->f_type);
955 
956 		tag = (multiboot_tag_module_t *)mb_malloc(sizeof (*tag) + num);
957 
958 		tag->mb_type = MULTIBOOT_TAG_TYPE_MODULE;
959 		tag->mb_size = sizeof (*tag) + num;
960 #if defined (EFI)
961 		tag->mb_mod_start = tmp;
962 		tag->mb_mod_end = tmp + mfp->f_size;
963 		tmp = roundup2(tag->mb_mod_end + 1, MULTIBOOT_MOD_ALIGN);
964 #else
965 		tag->mb_mod_start = mfp->f_addr;
966 		tag->mb_mod_end = mfp->f_addr + mfp->f_size;
967 #endif
968 		memcpy(tag->mb_cmdline, cmdline, num);
969 		free(cmdline);
970 		cmdline = NULL;
971 	}
972 
973 	md = file_findmetadata(fp, MODINFOMD_SMAP);
974 	if (md == NULL) {
975 		printf("no memory smap\n");
976 		error = EINVAL;
977 		goto error;
978 	}
979 
980 	smap = (struct bios_smap *)md->md_data;
981 	num = md->md_size / sizeof(struct bios_smap); /* number of entries */
982 
983 	{
984 		multiboot_tag_mmap_t *tag;
985 		multiboot_mmap_entry_t *mmap_entry;
986 
987 		tag = (multiboot_tag_mmap_t *)
988 		    mb_malloc(sizeof (*tag) +
989 		    num * sizeof (multiboot_mmap_entry_t));
990 
991 		tag->mb_type = MULTIBOOT_TAG_TYPE_MMAP;
992 		tag->mb_size = sizeof (*tag) +
993 		    num * sizeof (multiboot_mmap_entry_t);
994 		tag->mb_entry_size = sizeof (multiboot_mmap_entry_t);
995 		tag->mb_entry_version = 0;
996 		mmap_entry = (multiboot_mmap_entry_t *)tag->mb_entries;
997 
998 		for (i = 0; i < num; i++) {
999 			mmap_entry[i].mmap_addr = smap[i].base;
1000 			mmap_entry[i].mmap_len = smap[i].length;
1001 			mmap_entry[i].mmap_type = smap[i].type;
1002 			mmap_entry[i].mmap_reserved = 0;
1003 		}
1004 	}
1005 
1006 	if (bootp_response != NULL) {
1007 		multiboot_tag_network_t *tag;
1008 		tag = (multiboot_tag_network_t *)
1009 		    mb_malloc(sizeof(*tag) + bootp_response_size);
1010 
1011 		tag->mb_type = MULTIBOOT_TAG_TYPE_NETWORK;
1012 		tag->mb_size = sizeof(*tag) + bootp_response_size;
1013 		memcpy(tag->mb_dhcpack, bootp_response, bootp_response_size);
1014 	}
1015 
1016 	if (rsdp != NULL) {
1017 		multiboot_tag_new_acpi_t *ntag;
1018 		multiboot_tag_old_acpi_t *otag;
1019 		uint32_t tsize;
1020 
1021 		if (rsdp->Revision == 0) {
1022 			tsize = sizeof (*otag) + sizeof (ACPI_RSDP_COMMON);
1023 			otag = (multiboot_tag_old_acpi_t *)mb_malloc(tsize);
1024 			otag->mb_type = MULTIBOOT_TAG_TYPE_ACPI_OLD;
1025 			otag->mb_size = tsize;
1026 			memcpy(otag->mb_rsdp, rsdp, sizeof (ACPI_RSDP_COMMON));
1027 		} else {
1028 			tsize = sizeof (*ntag) + rsdp->Length;
1029 			ntag = (multiboot_tag_new_acpi_t *)mb_malloc(tsize);
1030 			ntag->mb_type = MULTIBOOT_TAG_TYPE_ACPI_NEW;
1031 			ntag->mb_size = tsize;
1032 			memcpy(ntag->mb_rsdp, rsdp, rsdp->Length);
1033 		}
1034 	}
1035 
1036 #if defined (EFI)
1037 	{
1038 		multiboot_tag_efi64_t *tag;
1039 		tag = (multiboot_tag_efi64_t *)
1040 		    mb_malloc(sizeof (*tag));
1041 
1042 		tag->mb_type = MULTIBOOT_TAG_TYPE_EFI64;
1043 		tag->mb_size = sizeof (*tag);
1044 		tag->mb_pointer = (uint64_t)(uintptr_t)ST;
1045 	}
1046 
1047 	if (have_framebuffer == true) {
1048 		multiboot_tag_framebuffer_t *tag;
1049 		int bpp;
1050 		struct efi_fb fb;
1051 		extern int efi_find_framebuffer(struct efi_fb *efifb);
1052 
1053 		if (efi_find_framebuffer(&fb) == 0) {
1054 			tag = (multiboot_tag_framebuffer_t *)
1055 			    mb_malloc(sizeof (*tag));
1056 
1057 			/*
1058 			 * We assume contiguous color bitmap, and use
1059 			 * the msb for bits per pixel calculation.
1060 			 */
1061 			bpp = fls(fb.fb_mask_red | fb.fb_mask_green |
1062 			    fb.fb_mask_blue | fb.fb_mask_reserved);
1063 
1064 			tag->framebuffer_common.mb_type =
1065 			    MULTIBOOT_TAG_TYPE_FRAMEBUFFER;
1066 			tag->framebuffer_common.mb_size =
1067 			    sizeof (multiboot_tag_framebuffer_t);
1068 			tag->framebuffer_common.framebuffer_addr = fb.fb_addr;
1069 			tag->framebuffer_common.framebuffer_width = fb.fb_width;
1070 			tag->framebuffer_common.framebuffer_height =
1071 			    fb.fb_height;
1072 			tag->framebuffer_common.framebuffer_bpp = bpp;
1073 			/*
1074 			 * Pitch is stride * bytes per pixel.
1075 			 * Stride is pixels per scanline.
1076 			 */
1077 			tag->framebuffer_common.framebuffer_pitch =
1078 			    fb.fb_stride * (bpp / 8);
1079 			tag->framebuffer_common.framebuffer_type =
1080 			    MULTIBOOT_FRAMEBUFFER_TYPE_RGB;
1081 			tag->framebuffer_common.mb_reserved = 0;
1082 
1083 			/*
1084 			 * The RGB or BGR color ordering.
1085 			 */
1086 			if (fb.fb_mask_red & 0x000000ff) {
1087 				tag->u.fb2.framebuffer_red_field_position = 0;
1088 				tag->u.fb2.framebuffer_blue_field_position = 16;
1089 			} else {
1090 				tag->u.fb2.framebuffer_red_field_position = 16;
1091 				tag->u.fb2.framebuffer_blue_field_position = 0;
1092 			}
1093 			tag->u.fb2.framebuffer_red_mask_size = 8;
1094 			tag->u.fb2.framebuffer_green_field_position = 8;
1095 			tag->u.fb2.framebuffer_green_mask_size = 8;
1096 			tag->u.fb2.framebuffer_blue_mask_size = 8;
1097 		}
1098 	}
1099 
1100 	/* Leave EFI memmap last as we will also switch off the BS. */
1101 	{
1102 		multiboot_tag_efi_mmap_t *tag;
1103 		UINTN size, desc_size, key;
1104 		EFI_STATUS status;
1105 
1106 		tag = (multiboot_tag_efi_mmap_t *)
1107 		    mb_malloc(sizeof (*tag));
1108 
1109 		size = 0;
1110 		status = BS->GetMemoryMap(&size,
1111 		    (EFI_MEMORY_DESCRIPTOR *)tag->mb_efi_mmap, &key,
1112 		    &desc_size, &tag->mb_descr_vers);
1113 		if (status != EFI_BUFFER_TOO_SMALL) {
1114 			error = EINVAL;
1115 			goto error;
1116 		}
1117 		status = BS->GetMemoryMap(&size,
1118 		    (EFI_MEMORY_DESCRIPTOR *)tag->mb_efi_mmap, &key,
1119 		    &desc_size, &tag->mb_descr_vers);
1120 		if (EFI_ERROR(status)) {
1121 			error = EINVAL;
1122 			goto error;
1123 		}
1124 		tag->mb_type = MULTIBOOT_TAG_TYPE_EFI_MMAP;
1125 		tag->mb_size = sizeof (*tag) + size;
1126 		tag->mb_descr_size = (uint32_t) desc_size;
1127 
1128 		/*
1129 		 * Find relocater pages. We assume we have free pages
1130 		 * below kernel load address.
1131 		 * In this version we are using 5 pages:
1132 		 * relocator data, trampoline, copy, memmove, stack.
1133 		 */
1134 		for (i = 0, map = (EFI_MEMORY_DESCRIPTOR *)tag->mb_efi_mmap;
1135 		    i < size / desc_size;
1136 		    i++, map = NextMemoryDescriptor(map, desc_size)) {
1137 			if (map->PhysicalStart == 0)
1138 				continue;
1139 			if (map->Type != EfiConventionalMemory)
1140 				continue;
1141 			if (map->PhysicalStart < load_addr &&
1142 			    map->NumberOfPages > 5)
1143 				break;
1144 		}
1145 		if (map->PhysicalStart == 0)
1146 			panic("Could not find memory for relocater\n");
1147 
1148 		if (keep_bs == 0) {
1149 			status = BS->ExitBootServices(IH, key);
1150 			if (EFI_ERROR(status)) {
1151 				printf("Call to ExitBootServices failed\n");
1152 				error = EINVAL;
1153 				goto error;
1154 			}
1155 		}
1156 
1157 		last_addr += size;
1158 		last_addr = roundup2(last_addr, MULTIBOOT_TAG_ALIGN);
1159 	}
1160 #endif
1161 
1162 	/*
1163 	 * MB tag list end marker.
1164 	 */
1165 	{
1166 		multiboot_tag_t *tag = (multiboot_tag_t *)
1167 		    mb_malloc(sizeof(*tag));
1168 		tag->mb_type = MULTIBOOT_TAG_TYPE_END;
1169 		tag->mb_size = sizeof(*tag);
1170 	}
1171 
1172 	mbi->mbi_total_size = last_addr - (vm_offset_t)mbi;
1173 	mbi->mbi_reserved = 0;
1174 
1175 #if defined (EFI)
1176 	/* At this point we have load_addr pointing to kernel load
1177 	 * address, module list in MBI having physical addresses,
1178 	 * module list in fp having logical addresses and tmp pointing to
1179 	 * physical address for MBI.
1180 	 * Now we must move all pieces to place and start the kernel.
1181 	 */
1182 	relocator = (struct relocator *)(uintptr_t)map->PhysicalStart;
1183 	head = &relocator->rel_chunk_head;
1184 	STAILQ_INIT(head);
1185 
1186 	i = 0;
1187 	chunk = &relocator->rel_chunklist[i++];
1188 	chunk->chunk_vaddr = fp->f_addr;
1189 	chunk->chunk_paddr = load_addr;
1190 	chunk->chunk_size = fp->f_size;
1191 
1192 	STAILQ_INSERT_TAIL(head, chunk, chunk_next);
1193 
1194 	for (mfp = fp->f_next; mfp != NULL; mfp = mfp->f_next) {
1195 		chunk = &relocator->rel_chunklist[i++];
1196 		chunk->chunk_vaddr = mfp->f_addr;
1197 		chunk->chunk_paddr = module->mb_mod_start;
1198 		chunk->chunk_size = mfp->f_size;
1199 		STAILQ_INSERT_TAIL(head, chunk, chunk_next);
1200 
1201 		module = (multiboot_tag_module_t *)
1202 		    roundup2((uintptr_t)module + module->mb_size,
1203 		    MULTIBOOT_TAG_ALIGN);
1204 	}
1205 	chunk = &relocator->rel_chunklist[i++];
1206 	chunk->chunk_vaddr = (EFI_VIRTUAL_ADDRESS)mbi;
1207 	chunk->chunk_paddr = tmp;
1208 	chunk->chunk_size = mbi->mbi_total_size;
1209 	STAILQ_INSERT_TAIL(head, chunk, chunk_next);
1210 
1211 	trampoline = (void *)(uintptr_t)relocator + EFI_PAGE_SIZE;
1212 	memmove(trampoline, multiboot_tramp, EFI_PAGE_SIZE);
1213 
1214 	relocator->rel_copy = (uintptr_t)trampoline + EFI_PAGE_SIZE;
1215 	memmove((void *)relocator->rel_copy, efi_copy_finish, EFI_PAGE_SIZE);
1216 
1217 	relocator->rel_memmove = (uintptr_t)relocator->rel_copy + EFI_PAGE_SIZE;
1218 	memmove((void *)relocator->rel_memmove, memmove, EFI_PAGE_SIZE);
1219 	relocator->rel_stack = relocator->rel_memmove + EFI_PAGE_SIZE - 8;
1220 
1221 	trampoline(MULTIBOOT2_BOOTLOADER_MAGIC, relocator, entry_addr);
1222 #else
1223 	dev_cleanup();
1224 	__exec((void *)VTOP(multiboot_tramp), MULTIBOOT2_BOOTLOADER_MAGIC,
1225 	    (void *)entry_addr, (void *)VTOP(mbi));
1226 #endif
1227 	panic("exec returned");
1228 
1229 error:
1230 	if (cmdline != NULL)
1231 		free(cmdline);
1232 #if defined (EFI)
1233 	if (mbi != NULL)
1234 		efi_free_loadaddr((uint64_t)mbi, EFI_SIZE_TO_PAGES(size));
1235 #endif
1236 	return (error);
1237 }
1238