xref: /freebsd/stand/efi/loader/copy.c (revision b5a3a89c50671a1ad29e7c43fe15e7b16feac239)
1 /*-
2  * Copyright (c) 2013 The FreeBSD Foundation
3  *
4  * This software was developed by Benno Rice under sponsorship from
5  * the FreeBSD Foundation.
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 
33 #include <stand.h>
34 #include <bootstrap.h>
35 
36 #include <efi.h>
37 #include <efilib.h>
38 
39 #include "loader_efi.h"
40 
41 #define	M(x)	((x) * 1024 * 1024)
42 #define	G(x)	(1UL * (x) * 1024 * 1024 * 1024)
43 
44 #if defined(__amd64__)
45 #include <machine/cpufunc.h>
46 #include <machine/specialreg.h>
47 #include <machine/vmparam.h>
48 
49 /*
50  * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(),
51  * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify().
52  */
53 #define CPUID_LEAF_HV_MAXLEAF		0x40000000
54 #define CPUID_LEAF_HV_INTERFACE		0x40000001
55 #define CPUID_LEAF_HV_FEATURES		0x40000003
56 #define CPUID_LEAF_HV_LIMITS		0x40000005
57 #define CPUID_HV_IFACE_HYPERV		0x31237648	/* HV#1 */
58 #define CPUID_HV_MSR_TIME_REFCNT	0x0002	/* MSR_HV_TIME_REF_COUNT */
59 #define CPUID_HV_MSR_HYPERCALL		0x0020
60 
61 static int
62 running_on_hyperv(void)
63 {
64 	char hv_vendor[16];
65 	uint32_t regs[4];
66 
67 	do_cpuid(1, regs);
68 	if ((regs[2] & CPUID2_HV) == 0)
69 		return (0);
70 
71 	do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
72 	if (regs[0] < CPUID_LEAF_HV_LIMITS)
73 		return (0);
74 
75 	((uint32_t *)&hv_vendor)[0] = regs[1];
76 	((uint32_t *)&hv_vendor)[1] = regs[2];
77 	((uint32_t *)&hv_vendor)[2] = regs[3];
78 	hv_vendor[12] = '\0';
79 	if (strcmp(hv_vendor, "Microsoft Hv") != 0)
80 		return (0);
81 
82 	do_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
83 	if (regs[0] != CPUID_HV_IFACE_HYPERV)
84 		return (0);
85 
86 	do_cpuid(CPUID_LEAF_HV_FEATURES, regs);
87 	if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0)
88 		return (0);
89 	if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0)
90 		return (0);
91 
92 	return (1);
93 }
94 
95 static void
96 efi_verify_staging_size(unsigned long *nr_pages)
97 {
98 	UINTN sz;
99 	EFI_MEMORY_DESCRIPTOR *map = NULL, *p;
100 	EFI_PHYSICAL_ADDRESS start, end;
101 	UINTN key, dsz;
102 	UINT32 dver;
103 	EFI_STATUS status;
104 	int i, ndesc;
105 	unsigned long available_pages = 0;
106 
107 	sz = 0;
108 
109 	for (;;) {
110 		status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver);
111 		if (!EFI_ERROR(status))
112 			break;
113 
114 		if (status != EFI_BUFFER_TOO_SMALL) {
115 			printf("Can't read memory map: %lu\n",
116 			    EFI_ERROR_CODE(status));
117 			goto out;
118 		}
119 
120 		free(map);
121 
122 		/* Allocate 10 descriptors more than the size reported,
123 		 * to allow for any fragmentation caused by calling
124 		 * malloc */
125 		map = malloc(sz + (10 * dsz));
126 		if (map == NULL) {
127 			printf("Unable to allocate memory\n");
128 			goto out;
129 		}
130 	}
131 
132 	ndesc = sz / dsz;
133 	for (i = 0, p = map; i < ndesc;
134 	     i++, p = NextMemoryDescriptor(p, dsz)) {
135 		start = p->PhysicalStart;
136 		end = start + p->NumberOfPages * EFI_PAGE_SIZE;
137 
138 		if (KERNLOAD < start || KERNLOAD >= end)
139 			continue;
140 
141 		available_pages = p->NumberOfPages -
142 			((KERNLOAD - start) >> EFI_PAGE_SHIFT);
143 		break;
144 	}
145 
146 	if (available_pages == 0) {
147 		printf("Can't find valid memory map for staging area!\n");
148 		goto out;
149 	}
150 
151 	i++;
152 	p = NextMemoryDescriptor(p, dsz);
153 
154 	for ( ; i < ndesc;
155 	     i++, p = NextMemoryDescriptor(p, dsz)) {
156 		if (p->Type != EfiConventionalMemory &&
157 		    p->Type != EfiLoaderData)
158 			break;
159 
160 		if (p->PhysicalStart != end)
161 			break;
162 
163 		end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE;
164 
165 		available_pages += p->NumberOfPages;
166 	}
167 
168 	if (*nr_pages > available_pages) {
169 		printf("Staging area's size is reduced: %ld -> %ld!\n",
170 		    *nr_pages, available_pages);
171 		*nr_pages = available_pages;
172 	}
173 out:
174 	free(map);
175 }
176 #endif /* __amd64__ */
177 
178 #if defined(__arm__)
179 #define	DEFAULT_EFI_STAGING_SIZE	32
180 #else
181 #define	DEFAULT_EFI_STAGING_SIZE	64
182 #endif
183 #ifndef EFI_STAGING_SIZE
184 #define	EFI_STAGING_SIZE	DEFAULT_EFI_STAGING_SIZE
185 #endif
186 
187 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
188     defined(__riscv)
189 #define	EFI_STAGING_2M_ALIGN	1
190 #else
191 #define	EFI_STAGING_2M_ALIGN	0
192 #endif
193 
194 #if defined(__amd64__)
195 #define	EFI_STAGING_SLOP	M(8)
196 #else
197 #define	EFI_STAGING_SLOP	0
198 #endif
199 
200 static u_long staging_slop = EFI_STAGING_SLOP;
201 
202 EFI_PHYSICAL_ADDRESS	staging, staging_end, staging_base;
203 bool			stage_offset_set = false;
204 ssize_t			stage_offset;
205 
206 static void
207 efi_copy_free(void)
208 {
209 	BS->FreePages(staging_base, (staging_end - staging_base) /
210 	    EFI_PAGE_SIZE);
211 	stage_offset_set = false;
212 	stage_offset = 0;
213 }
214 
215 #ifdef __amd64__
216 int copy_staging = COPY_STAGING_AUTO;
217 
218 static int
219 command_copy_staging(int argc, char *argv[])
220 {
221 	static const char *const mode[3] = {
222 		[COPY_STAGING_ENABLE] = "enable",
223 		[COPY_STAGING_DISABLE] = "disable",
224 		[COPY_STAGING_AUTO] = "auto",
225 	};
226 	int prev, res;
227 
228 	res = CMD_OK;
229 	if (argc > 2) {
230 		res = CMD_ERROR;
231 	} else if (argc == 2) {
232 		prev = copy_staging;
233 		if (strcmp(argv[1], "enable") == 0)
234 			copy_staging = COPY_STAGING_ENABLE;
235 		else if (strcmp(argv[1], "disable") == 0)
236 			copy_staging = COPY_STAGING_DISABLE;
237 		else if (strcmp(argv[1], "auto") == 0)
238 			copy_staging = COPY_STAGING_AUTO;
239 		else {
240 			printf("usage: copy_staging enable|disable|auto\n");
241 			res = CMD_ERROR;
242 		}
243 		if (res == CMD_OK && prev != copy_staging) {
244 			printf("changed copy_staging, unloading kernel\n");
245 			unload();
246 			efi_copy_free();
247 			efi_copy_init();
248 		}
249 	} else {
250 		printf("copy staging: %s\n", mode[copy_staging]);
251 	}
252 	return (res);
253 }
254 COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging);
255 #endif
256 
257 static int
258 command_staging_slop(int argc, char *argv[])
259 {
260 	char *endp;
261 	u_long new, prev;
262 	int res;
263 
264 	res = CMD_OK;
265 	if (argc > 2) {
266 		res = CMD_ERROR;
267 	} else if (argc == 2) {
268 		new = strtoul(argv[1], &endp, 0);
269 		if (*endp != '\0') {
270 			printf("invalid slop value\n");
271 			res = CMD_ERROR;
272 		}
273 		if (res == CMD_OK && staging_slop != new) {
274 			printf("changed slop, unloading kernel\n");
275 			unload();
276 			efi_copy_free();
277 			efi_copy_init();
278 		}
279 	} else {
280 		printf("staging slop %#lx\n", staging_slop);
281 	}
282 	return (res);
283 }
284 COMMAND_SET(staging_slop, "staging_slop", "set staging slop",
285     command_staging_slop);
286 
287 #if defined(__amd64__)
288 /*
289  * The staging area must reside in the first 1GB or 4GB physical
290  * memory: see elf64_exec() in
291  * boot/efi/loader/arch/amd64/elf64_freebsd.c.
292  */
293 static EFI_PHYSICAL_ADDRESS
294 get_staging_max(void)
295 {
296 	EFI_PHYSICAL_ADDRESS res;
297 
298 	res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4);
299 	return (res);
300 }
301 #define	EFI_ALLOC_METHOD	AllocateMaxAddress
302 #else
303 #define	EFI_ALLOC_METHOD	AllocateAnyPages
304 #endif
305 
306 int
307 efi_copy_init(void)
308 {
309 	EFI_STATUS	status;
310 	unsigned long nr_pages;
311 	vm_offset_t ess;
312 
313 	ess = EFI_STAGING_SIZE;
314 	if (ess < DEFAULT_EFI_STAGING_SIZE)
315 		ess = DEFAULT_EFI_STAGING_SIZE;
316 	nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess);
317 
318 #if defined(__amd64__)
319 	/*
320 	 * We'll decrease nr_pages, if it's too big. Currently we only
321 	 * apply this to FreeBSD VM running on Hyper-V. Why? Please see
322 	 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28
323 	 */
324 	if (running_on_hyperv())
325 		efi_verify_staging_size(&nr_pages);
326 
327 	staging = get_staging_max();
328 #endif
329 	status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode,
330 	    nr_pages, &staging);
331 	if (EFI_ERROR(status)) {
332 		printf("failed to allocate staging area: %lu\n",
333 		    EFI_ERROR_CODE(status));
334 		return (status);
335 	}
336 	staging_base = staging;
337 	staging_end = staging + nr_pages * EFI_PAGE_SIZE;
338 
339 #if EFI_STAGING_2M_ALIGN
340 	/*
341 	 * Round the kernel load address to a 2MiB value. This is needed
342 	 * because the kernel builds a page table based on where it has
343 	 * been loaded in physical address space. As the kernel will use
344 	 * either a 1MiB or 2MiB page for this we need to make sure it
345 	 * is correctly aligned for both cases.
346 	 */
347 	staging = roundup2(staging, M(2));
348 #endif
349 
350 	return (0);
351 }
352 
353 static bool
354 efi_check_space(vm_offset_t end)
355 {
356 	EFI_PHYSICAL_ADDRESS addr, new_base, new_staging;
357 	EFI_STATUS status;
358 	unsigned long nr_pages;
359 
360 	end = roundup2(end, EFI_PAGE_SIZE);
361 
362 	/* There is already enough space */
363 	if (end + staging_slop <= staging_end)
364 		return (true);
365 
366 	if (!boot_services_active) {
367 		if (end <= staging_end)
368 			return (true);
369 		panic("efi_check_space: cannot expand staging area "
370 		    "after boot services were exited\n");
371 	}
372 
373 	/*
374 	 * Add slop at the end:
375 	 * 1. amd64 kernel expects to do some very early allocations
376 	 *    by carving out memory after kernend.  Slop guarantees
377 	 *    that it does not ovewrite anything useful.
378 	 * 2. It seems that initial calculation of the staging size
379 	 *    could be somewhat smaller than actually copying in after
380 	 *    boot services are exited.  Slop avoids calling
381 	 *    BS->AllocatePages() when it cannot work.
382 	 */
383 	end += staging_slop;
384 
385 	nr_pages = EFI_SIZE_TO_PAGES(end - staging_end);
386 #if defined(__amd64__)
387 	/*
388 	 * amd64 needs all memory to be allocated under the 1G or 4G boundary.
389 	 */
390 	if (end > get_staging_max())
391 		goto before_staging;
392 #endif
393 
394 	/* Try to allocate more space after the previous allocation */
395 	addr = staging_end;
396 	status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages,
397 	    &addr);
398 	if (!EFI_ERROR(status)) {
399 		staging_end = staging_end + nr_pages * EFI_PAGE_SIZE;
400 		return (true);
401 	}
402 
403 before_staging:
404 	/* Try allocating space before the previous allocation */
405 	if (staging < nr_pages * EFI_PAGE_SIZE)
406 		goto expand;
407 	addr = staging - nr_pages * EFI_PAGE_SIZE;
408 #if EFI_STAGING_2M_ALIGN
409 	/* See efi_copy_init for why this is needed */
410 	addr = rounddown2(addr, M(2));
411 #endif
412 	nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr);
413 	status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages,
414 	    &addr);
415 	if (!EFI_ERROR(status)) {
416 		/*
417 		 * Move the old allocation and update the state so
418 		 * translation still works.
419 		 */
420 		staging_base = addr;
421 		memmove((void *)(uintptr_t)staging_base,
422 		    (void *)(uintptr_t)staging, staging_end - staging);
423 		stage_offset -= staging - staging_base;
424 		staging = staging_base;
425 		return (true);
426 	}
427 
428 expand:
429 	nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging);
430 #if EFI_STAGING_2M_ALIGN
431 	nr_pages += M(2) / EFI_PAGE_SIZE;
432 #endif
433 #if defined(__amd64__)
434 	new_base = get_staging_max();
435 #endif
436 	status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode,
437 	    nr_pages, &new_base);
438 	if (!EFI_ERROR(status)) {
439 #if EFI_STAGING_2M_ALIGN
440 		new_staging = roundup2(new_base, M(2));
441 #else
442 		new_staging = new_base;
443 #endif
444 		/*
445 		 * Move the old allocation and update the state so
446 		 * translation still works.
447 		 */
448 		memcpy((void *)(uintptr_t)new_staging,
449 		    (void *)(uintptr_t)staging, staging_end - staging);
450 		BS->FreePages(staging_base, (staging_end - staging_base) /
451 		    EFI_PAGE_SIZE);
452 		stage_offset -= staging - new_staging;
453 		staging = new_staging;
454 		staging_end = new_base + nr_pages * EFI_PAGE_SIZE;
455 		staging_base = new_base;
456 		return (true);
457 	}
458 
459 	printf("efi_check_space: Unable to expand staging area\n");
460 	return (false);
461 }
462 
463 void *
464 efi_translate(vm_offset_t ptr)
465 {
466 
467 	return ((void *)(ptr + stage_offset));
468 }
469 
470 ssize_t
471 efi_copyin(const void *src, vm_offset_t dest, const size_t len)
472 {
473 
474 	if (!stage_offset_set) {
475 		stage_offset = (vm_offset_t)staging - dest;
476 		stage_offset_set = true;
477 	}
478 
479 	/* XXX: Callers do not check for failure. */
480 	if (!efi_check_space(dest + stage_offset + len)) {
481 		errno = ENOMEM;
482 		return (-1);
483 	}
484 	bcopy(src, (void *)(dest + stage_offset), len);
485 	return (len);
486 }
487 
488 ssize_t
489 efi_copyout(const vm_offset_t src, void *dest, const size_t len)
490 {
491 
492 	/* XXX: Callers do not check for failure. */
493 	if (src + stage_offset + len > staging_end) {
494 		errno = ENOMEM;
495 		return (-1);
496 	}
497 	bcopy((void *)(src + stage_offset), dest, len);
498 	return (len);
499 }
500 
501 ssize_t
502 efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
503 {
504 
505 	if (!stage_offset_set) {
506 		stage_offset = (vm_offset_t)staging - dest;
507 		stage_offset_set = true;
508 	}
509 
510 	if (!efi_check_space(dest + stage_offset + len)) {
511 		errno = ENOMEM;
512 		return (-1);
513 	}
514 	return (VECTX_READ(fd, (void *)(dest + stage_offset), len));
515 }
516 
517 void
518 efi_copy_finish(void)
519 {
520 	uint64_t	*src, *dst, *last;
521 
522 	src = (uint64_t *)(uintptr_t)staging;
523 	dst = (uint64_t *)(uintptr_t)(staging - stage_offset);
524 	last = (uint64_t *)(uintptr_t)staging_end;
525 
526 	while (src < last)
527 		*dst++ = *src++;
528 }
529 
530 void
531 efi_copy_finish_nop(void)
532 {
533 }
534