xref: /freebsd/stand/efi/loader/copy.c (revision 10ff414c14eef433d8157f0c17904d740693933b)
1 /*-
2  * Copyright (c) 2013 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Benno Rice under sponsorship from
6  * the FreeBSD Foundation.
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 
34 #include <stand.h>
35 #include <bootstrap.h>
36 
37 #include <efi.h>
38 #include <efilib.h>
39 
40 #include "loader_efi.h"
41 
42 #define	M(x)	((x) * 1024 * 1024)
43 #define	G(x)	(1UL * (x) * 1024 * 1024 * 1024)
44 
45 extern int boot_services_gone;
46 
47 #if defined(__i386__) || defined(__amd64__)
48 #include <machine/cpufunc.h>
49 #include <machine/specialreg.h>
50 #include <machine/vmparam.h>
51 
52 /*
53  * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(),
54  * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify().
55  */
56 #define CPUID_LEAF_HV_MAXLEAF		0x40000000
57 #define CPUID_LEAF_HV_INTERFACE		0x40000001
58 #define CPUID_LEAF_HV_FEATURES		0x40000003
59 #define CPUID_LEAF_HV_LIMITS		0x40000005
60 #define CPUID_HV_IFACE_HYPERV		0x31237648	/* HV#1 */
61 #define CPUID_HV_MSR_TIME_REFCNT	0x0002	/* MSR_HV_TIME_REF_COUNT */
62 #define CPUID_HV_MSR_HYPERCALL		0x0020
63 
64 static int
65 running_on_hyperv(void)
66 {
67 	char hv_vendor[16];
68 	uint32_t regs[4];
69 
70 	do_cpuid(1, regs);
71 	if ((regs[2] & CPUID2_HV) == 0)
72 		return (0);
73 
74 	do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
75 	if (regs[0] < CPUID_LEAF_HV_LIMITS)
76 		return (0);
77 
78 	((uint32_t *)&hv_vendor)[0] = regs[1];
79 	((uint32_t *)&hv_vendor)[1] = regs[2];
80 	((uint32_t *)&hv_vendor)[2] = regs[3];
81 	hv_vendor[12] = '\0';
82 	if (strcmp(hv_vendor, "Microsoft Hv") != 0)
83 		return (0);
84 
85 	do_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
86 	if (regs[0] != CPUID_HV_IFACE_HYPERV)
87 		return (0);
88 
89 	do_cpuid(CPUID_LEAF_HV_FEATURES, regs);
90 	if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0)
91 		return (0);
92 	if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0)
93 		return (0);
94 
95 	return (1);
96 }
97 
98 static void
99 efi_verify_staging_size(unsigned long *nr_pages)
100 {
101 	UINTN sz;
102 	EFI_MEMORY_DESCRIPTOR *map = NULL, *p;
103 	EFI_PHYSICAL_ADDRESS start, end;
104 	UINTN key, dsz;
105 	UINT32 dver;
106 	EFI_STATUS status;
107 	int i, ndesc;
108 	unsigned long available_pages = 0;
109 
110 	sz = 0;
111 
112 	for (;;) {
113 		status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver);
114 		if (!EFI_ERROR(status))
115 			break;
116 
117 		if (status != EFI_BUFFER_TOO_SMALL) {
118 			printf("Can't read memory map: %lu\n",
119 			    EFI_ERROR_CODE(status));
120 			goto out;
121 		}
122 
123 		free(map);
124 
125 		/* Allocate 10 descriptors more than the size reported,
126 		 * to allow for any fragmentation caused by calling
127 		 * malloc */
128 		map = malloc(sz + (10 * dsz));
129 		if (map == NULL) {
130 			printf("Unable to allocate memory\n");
131 			goto out;
132 		}
133 	}
134 
135 	ndesc = sz / dsz;
136 	for (i = 0, p = map; i < ndesc;
137 	     i++, p = NextMemoryDescriptor(p, dsz)) {
138 		start = p->PhysicalStart;
139 		end = start + p->NumberOfPages * EFI_PAGE_SIZE;
140 
141 		if (KERNLOAD < start || KERNLOAD >= end)
142 			continue;
143 
144 		available_pages = p->NumberOfPages -
145 			((KERNLOAD - start) >> EFI_PAGE_SHIFT);
146 		break;
147 	}
148 
149 	if (available_pages == 0) {
150 		printf("Can't find valid memory map for staging area!\n");
151 		goto out;
152 	}
153 
154 	i++;
155 	p = NextMemoryDescriptor(p, dsz);
156 
157 	for ( ; i < ndesc;
158 	     i++, p = NextMemoryDescriptor(p, dsz)) {
159 		if (p->Type != EfiConventionalMemory &&
160 		    p->Type != EfiLoaderData)
161 			break;
162 
163 		if (p->PhysicalStart != end)
164 			break;
165 
166 		end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE;
167 
168 		available_pages += p->NumberOfPages;
169 	}
170 
171 	if (*nr_pages > available_pages) {
172 		printf("Staging area's size is reduced: %ld -> %ld!\n",
173 		    *nr_pages, available_pages);
174 		*nr_pages = available_pages;
175 	}
176 out:
177 	free(map);
178 }
179 #endif /* __i386__ || __amd64__ */
180 
181 #if defined(__arm__)
182 #define	DEFAULT_EFI_STAGING_SIZE	32
183 #else
184 #define	DEFAULT_EFI_STAGING_SIZE	64
185 #endif
186 #ifndef EFI_STAGING_SIZE
187 #define	EFI_STAGING_SIZE	DEFAULT_EFI_STAGING_SIZE
188 #endif
189 
190 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
191     defined(__riscv)
192 #define	EFI_STAGING_2M_ALIGN	1
193 #else
194 #define	EFI_STAGING_2M_ALIGN	0
195 #endif
196 
197 #if defined(__amd64__)
198 #define	EFI_STAGING_SLOP	M(8)
199 #else
200 #define	EFI_STAGING_SLOP	0
201 #endif
202 
203 static u_long staging_slop = EFI_STAGING_SLOP;
204 
205 EFI_PHYSICAL_ADDRESS	staging, staging_end, staging_base;
206 int			stage_offset_set = 0;
207 ssize_t			stage_offset;
208 
209 static void
210 efi_copy_free(void)
211 {
212 	BS->FreePages(staging_base, (staging_end - staging_base) /
213 	    EFI_PAGE_SIZE);
214 	stage_offset_set = 0;
215 	stage_offset = 0;
216 }
217 
218 #ifdef __amd64__
219 int copy_staging = COPY_STAGING_AUTO;
220 
221 static int
222 command_copy_staging(int argc, char *argv[])
223 {
224 	static const char *const mode[3] = {
225 		[COPY_STAGING_ENABLE] = "enable",
226 		[COPY_STAGING_DISABLE] = "disable",
227 		[COPY_STAGING_AUTO] = "auto",
228 	};
229 	int prev, res;
230 
231 	res = CMD_OK;
232 	if (argc > 2) {
233 		res = CMD_ERROR;
234 	} else if (argc == 2) {
235 		prev = copy_staging;
236 		if (strcmp(argv[1], "enable") == 0)
237 			copy_staging = COPY_STAGING_ENABLE;
238 		else if (strcmp(argv[1], "disable") == 0)
239 			copy_staging = COPY_STAGING_DISABLE;
240 		else if (strcmp(argv[1], "auto") == 0)
241 			copy_staging = COPY_STAGING_AUTO;
242 		else {
243 			printf("usage: copy_staging enable|disable|auto\n");
244 			res = CMD_ERROR;
245 		}
246 		if (res == CMD_OK && prev != copy_staging) {
247 			printf("changed copy_staging, unloading kernel\n");
248 			unload();
249 			efi_copy_free();
250 			efi_copy_init();
251 		}
252 	} else {
253 		printf("copy staging: %s\n", mode[copy_staging]);
254 	}
255 	return (res);
256 }
257 COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging);
258 #endif
259 
260 static int
261 command_staging_slop(int argc, char *argv[])
262 {
263 	char *endp;
264 	u_long new, prev;
265 	int res;
266 
267 	res = CMD_OK;
268 	if (argc > 2) {
269 		res = CMD_ERROR;
270 	} else if (argc == 2) {
271 		new = strtoul(argv[1], &endp, 0);
272 		if (*endp != '\0') {
273 			printf("invalid slop value\n");
274 			res = CMD_ERROR;
275 		}
276 		if (res == CMD_OK && staging_slop != new) {
277 			printf("changed slop, unloading kernel\n");
278 			unload();
279 			efi_copy_free();
280 			efi_copy_init();
281 		}
282 	} else {
283 		printf("staging slop %#lx\n", staging_slop);
284 	}
285 	return (res);
286 }
287 COMMAND_SET(staging_slop, "staging_slop", "set staging slop",
288     command_staging_slop);
289 
290 #if defined(__i386__) || defined(__amd64__)
291 /*
292  * The staging area must reside in the the first 1GB or 4GB physical
293  * memory: see elf64_exec() in
294  * boot/efi/loader/arch/amd64/elf64_freebsd.c.
295  */
296 static EFI_PHYSICAL_ADDRESS
297 get_staging_max(void)
298 {
299 	EFI_PHYSICAL_ADDRESS res;
300 
301 #if defined(__i386__)
302 	res = G(1);
303 #elif defined(__amd64__)
304 	res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4);
305 #endif
306 	return (res);
307 }
308 #define	EFI_ALLOC_METHOD	AllocateMaxAddress
309 #else
310 #define	EFI_ALLOC_METHOD	AllocateAnyPages
311 #endif
312 
313 int
314 efi_copy_init(void)
315 {
316 	EFI_STATUS	status;
317 	unsigned long nr_pages;
318 	vm_offset_t ess;
319 
320 	ess = EFI_STAGING_SIZE;
321 	if (ess < DEFAULT_EFI_STAGING_SIZE)
322 		ess = DEFAULT_EFI_STAGING_SIZE;
323 	nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess);
324 
325 #if defined(__i386__) || defined(__amd64__)
326 	/*
327 	 * We'll decrease nr_pages, if it's too big. Currently we only
328 	 * apply this to FreeBSD VM running on Hyper-V. Why? Please see
329 	 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28
330 	 */
331 	if (running_on_hyperv())
332 		efi_verify_staging_size(&nr_pages);
333 
334 	staging = get_staging_max();
335 #endif
336 	status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderData,
337 	    nr_pages, &staging);
338 	if (EFI_ERROR(status)) {
339 		printf("failed to allocate staging area: %lu\n",
340 		    EFI_ERROR_CODE(status));
341 		return (status);
342 	}
343 	staging_base = staging;
344 	staging_end = staging + nr_pages * EFI_PAGE_SIZE;
345 
346 #if EFI_STAGING_2M_ALIGN
347 	/*
348 	 * Round the kernel load address to a 2MiB value. This is needed
349 	 * because the kernel builds a page table based on where it has
350 	 * been loaded in physical address space. As the kernel will use
351 	 * either a 1MiB or 2MiB page for this we need to make sure it
352 	 * is correctly aligned for both cases.
353 	 */
354 	staging = roundup2(staging, M(2));
355 #endif
356 
357 	return (0);
358 }
359 
360 static bool
361 efi_check_space(vm_offset_t end)
362 {
363 	EFI_PHYSICAL_ADDRESS addr, new_base, new_staging;
364 	EFI_STATUS status;
365 	unsigned long nr_pages;
366 
367 	end = roundup2(end, EFI_PAGE_SIZE);
368 
369 	/* There is already enough space */
370 	if (end + staging_slop <= staging_end)
371 		return (true);
372 
373 	if (boot_services_gone) {
374 		if (end <= staging_end)
375 			return (true);
376 		panic("efi_check_space: cannot expand staging area "
377 		    "after boot services were exited\n");
378 	}
379 
380 	/*
381 	 * Add slop at the end:
382 	 * 1. amd64 kernel expects to do some very early allocations
383 	 *    by carving out memory after kernend.  Slop guarantees
384 	 *    that it does not ovewrite anything useful.
385 	 * 2. It seems that initial calculation of the staging size
386 	 *    could be somewhat smaller than actually copying in after
387 	 *    boot services are exited.  Slop avoids calling
388 	 *    BS->AllocatePages() when it cannot work.
389 	 */
390 	end += staging_slop;
391 
392 	nr_pages = EFI_SIZE_TO_PAGES(end - staging_end);
393 #if defined(__i386__) || defined(__amd64__)
394 	/*
395 	 * i386 needs all memory to be allocated under the 1G boundary.
396 	 * amd64 needs all memory to be allocated under the 1G or 4G boundary.
397 	 */
398 	if (end > get_staging_max())
399 		goto before_staging;
400 #endif
401 
402 	/* Try to allocate more space after the previous allocation */
403 	addr = staging_end;
404 	status = BS->AllocatePages(AllocateAddress, EfiLoaderData, nr_pages,
405 	    &addr);
406 	if (!EFI_ERROR(status)) {
407 		staging_end = staging_end + nr_pages * EFI_PAGE_SIZE;
408 		return (true);
409 	}
410 
411 before_staging:
412 	/* Try allocating space before the previous allocation */
413 	if (staging < nr_pages * EFI_PAGE_SIZE)
414 		goto expand;
415 	addr = staging - nr_pages * EFI_PAGE_SIZE;
416 #if EFI_STAGING_2M_ALIGN
417 	/* See efi_copy_init for why this is needed */
418 	addr = rounddown2(addr, M(2));
419 #endif
420 	nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr);
421 	status = BS->AllocatePages(AllocateAddress, EfiLoaderData, nr_pages,
422 	    &addr);
423 	if (!EFI_ERROR(status)) {
424 		/*
425 		 * Move the old allocation and update the state so
426 		 * translation still works.
427 		 */
428 		staging_base = addr;
429 		memmove((void *)(uintptr_t)staging_base,
430 		    (void *)(uintptr_t)staging, staging_end - staging);
431 		stage_offset -= staging - staging_base;
432 		staging = staging_base;
433 		return (true);
434 	}
435 
436 expand:
437 	nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging);
438 #if EFI_STAGING_2M_ALIGN
439 	nr_pages += M(2) / EFI_PAGE_SIZE;
440 #endif
441 #if defined(__i386__) || defined(__amd64__)
442 	new_base = get_staging_max();
443 #endif
444 	status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderData,
445 	    nr_pages, &new_base);
446 	if (!EFI_ERROR(status)) {
447 #if EFI_STAGING_2M_ALIGN
448 		new_staging = roundup2(new_base, M(2));
449 #else
450 		new_staging = new_base;
451 #endif
452 		/*
453 		 * Move the old allocation and update the state so
454 		 * translation still works.
455 		 */
456 		memcpy((void *)(uintptr_t)new_staging,
457 		    (void *)(uintptr_t)staging, staging_end - staging);
458 		BS->FreePages(staging_base, (staging_end - staging_base) /
459 		    EFI_PAGE_SIZE);
460 		stage_offset -= staging - new_staging;
461 		staging = new_staging;
462 		staging_end = new_base + nr_pages * EFI_PAGE_SIZE;
463 		staging_base = new_base;
464 		return (true);
465 	}
466 
467 	printf("efi_check_space: Unable to expand staging area\n");
468 	return (false);
469 }
470 
471 void *
472 efi_translate(vm_offset_t ptr)
473 {
474 
475 	return ((void *)(ptr + stage_offset));
476 }
477 
478 ssize_t
479 efi_copyin(const void *src, vm_offset_t dest, const size_t len)
480 {
481 
482 	if (!stage_offset_set) {
483 		stage_offset = (vm_offset_t)staging - dest;
484 		stage_offset_set = 1;
485 	}
486 
487 	/* XXX: Callers do not check for failure. */
488 	if (!efi_check_space(dest + stage_offset + len)) {
489 		errno = ENOMEM;
490 		return (-1);
491 	}
492 	bcopy(src, (void *)(dest + stage_offset), len);
493 	return (len);
494 }
495 
496 ssize_t
497 efi_copyout(const vm_offset_t src, void *dest, const size_t len)
498 {
499 
500 	/* XXX: Callers do not check for failure. */
501 	if (src + stage_offset + len > staging_end) {
502 		errno = ENOMEM;
503 		return (-1);
504 	}
505 	bcopy((void *)(src + stage_offset), dest, len);
506 	return (len);
507 }
508 
509 ssize_t
510 efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
511 {
512 
513 	if (!stage_offset_set) {
514 		stage_offset = (vm_offset_t)staging - dest;
515 		stage_offset_set = 1;
516 	}
517 
518 	if (!efi_check_space(dest + stage_offset + len)) {
519 		errno = ENOMEM;
520 		return (-1);
521 	}
522 	return (VECTX_READ(fd, (void *)(dest + stage_offset), len));
523 }
524 
525 void
526 efi_copy_finish(void)
527 {
528 	uint64_t	*src, *dst, *last;
529 
530 	src = (uint64_t *)(uintptr_t)staging;
531 	dst = (uint64_t *)(uintptr_t)(staging - stage_offset);
532 	last = (uint64_t *)(uintptr_t)staging_end;
533 
534 	while (src < last)
535 		*dst++ = *src++;
536 }
537 
538 void
539 efi_copy_finish_nop(void)
540 {
541 }
542