xref: /freebsd/stand/efi/loader/copy.c (revision ec0ea6efa1ad229d75c394c1a9b9cac33af2b1d3)
1 /*-
2  * Copyright (c) 2013 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Benno Rice under sponsorship from
6  * the FreeBSD Foundation.
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 
34 #include <stand.h>
35 #include <bootstrap.h>
36 
37 #include <efi.h>
38 #include <efilib.h>
39 
40 #include "loader_efi.h"
41 
42 #define	M(x)	((x) * 1024 * 1024)
43 #define	G(x)	(1UL * (x) * 1024 * 1024 * 1024)
44 
45 #if defined(__i386__) || defined(__amd64__)
46 #include <machine/cpufunc.h>
47 #include <machine/specialreg.h>
48 #include <machine/vmparam.h>
49 
50 /*
51  * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(),
52  * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify().
53  */
54 #define CPUID_LEAF_HV_MAXLEAF		0x40000000
55 #define CPUID_LEAF_HV_INTERFACE		0x40000001
56 #define CPUID_LEAF_HV_FEATURES		0x40000003
57 #define CPUID_LEAF_HV_LIMITS		0x40000005
58 #define CPUID_HV_IFACE_HYPERV		0x31237648	/* HV#1 */
59 #define CPUID_HV_MSR_TIME_REFCNT	0x0002	/* MSR_HV_TIME_REF_COUNT */
60 #define CPUID_HV_MSR_HYPERCALL		0x0020
61 
62 static int
63 running_on_hyperv(void)
64 {
65 	char hv_vendor[16];
66 	uint32_t regs[4];
67 
68 	do_cpuid(1, regs);
69 	if ((regs[2] & CPUID2_HV) == 0)
70 		return (0);
71 
72 	do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
73 	if (regs[0] < CPUID_LEAF_HV_LIMITS)
74 		return (0);
75 
76 	((uint32_t *)&hv_vendor)[0] = regs[1];
77 	((uint32_t *)&hv_vendor)[1] = regs[2];
78 	((uint32_t *)&hv_vendor)[2] = regs[3];
79 	hv_vendor[12] = '\0';
80 	if (strcmp(hv_vendor, "Microsoft Hv") != 0)
81 		return (0);
82 
83 	do_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
84 	if (regs[0] != CPUID_HV_IFACE_HYPERV)
85 		return (0);
86 
87 	do_cpuid(CPUID_LEAF_HV_FEATURES, regs);
88 	if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0)
89 		return (0);
90 	if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0)
91 		return (0);
92 
93 	return (1);
94 }
95 
96 static void
97 efi_verify_staging_size(unsigned long *nr_pages)
98 {
99 	UINTN sz;
100 	EFI_MEMORY_DESCRIPTOR *map = NULL, *p;
101 	EFI_PHYSICAL_ADDRESS start, end;
102 	UINTN key, dsz;
103 	UINT32 dver;
104 	EFI_STATUS status;
105 	int i, ndesc;
106 	unsigned long available_pages = 0;
107 
108 	sz = 0;
109 
110 	for (;;) {
111 		status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver);
112 		if (!EFI_ERROR(status))
113 			break;
114 
115 		if (status != EFI_BUFFER_TOO_SMALL) {
116 			printf("Can't read memory map: %lu\n",
117 			    EFI_ERROR_CODE(status));
118 			goto out;
119 		}
120 
121 		free(map);
122 
123 		/* Allocate 10 descriptors more than the size reported,
124 		 * to allow for any fragmentation caused by calling
125 		 * malloc */
126 		map = malloc(sz + (10 * dsz));
127 		if (map == NULL) {
128 			printf("Unable to allocate memory\n");
129 			goto out;
130 		}
131 	}
132 
133 	ndesc = sz / dsz;
134 	for (i = 0, p = map; i < ndesc;
135 	     i++, p = NextMemoryDescriptor(p, dsz)) {
136 		start = p->PhysicalStart;
137 		end = start + p->NumberOfPages * EFI_PAGE_SIZE;
138 
139 		if (KERNLOAD < start || KERNLOAD >= end)
140 			continue;
141 
142 		available_pages = p->NumberOfPages -
143 			((KERNLOAD - start) >> EFI_PAGE_SHIFT);
144 		break;
145 	}
146 
147 	if (available_pages == 0) {
148 		printf("Can't find valid memory map for staging area!\n");
149 		goto out;
150 	}
151 
152 	i++;
153 	p = NextMemoryDescriptor(p, dsz);
154 
155 	for ( ; i < ndesc;
156 	     i++, p = NextMemoryDescriptor(p, dsz)) {
157 		if (p->Type != EfiConventionalMemory &&
158 		    p->Type != EfiLoaderData)
159 			break;
160 
161 		if (p->PhysicalStart != end)
162 			break;
163 
164 		end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE;
165 
166 		available_pages += p->NumberOfPages;
167 	}
168 
169 	if (*nr_pages > available_pages) {
170 		printf("Staging area's size is reduced: %ld -> %ld!\n",
171 		    *nr_pages, available_pages);
172 		*nr_pages = available_pages;
173 	}
174 out:
175 	free(map);
176 }
177 #endif /* __i386__ || __amd64__ */
178 
179 #if defined(__arm__)
180 #define	DEFAULT_EFI_STAGING_SIZE	32
181 #else
182 #define	DEFAULT_EFI_STAGING_SIZE	64
183 #endif
184 #ifndef EFI_STAGING_SIZE
185 #define	EFI_STAGING_SIZE	DEFAULT_EFI_STAGING_SIZE
186 #endif
187 
188 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
189     defined(__riscv)
190 #define	EFI_STAGING_2M_ALIGN	1
191 #else
192 #define	EFI_STAGING_2M_ALIGN	0
193 #endif
194 
195 #if defined(__amd64__)
196 #define	EFI_STAGING_SLOP	M(8)
197 #else
198 #define	EFI_STAGING_SLOP	0
199 #endif
200 
201 static u_long staging_slop = EFI_STAGING_SLOP;
202 
203 EFI_PHYSICAL_ADDRESS	staging, staging_end, staging_base;
204 int			stage_offset_set = 0;
205 ssize_t			stage_offset;
206 
207 static void
208 efi_copy_free(void)
209 {
210 	BS->FreePages(staging_base, (staging_end - staging_base) /
211 	    EFI_PAGE_SIZE);
212 	stage_offset_set = 0;
213 	stage_offset = 0;
214 }
215 
216 #ifdef __amd64__
217 int copy_staging = COPY_STAGING_AUTO;
218 
219 static int
220 command_copy_staging(int argc, char *argv[])
221 {
222 	static const char *const mode[3] = {
223 		[COPY_STAGING_ENABLE] = "enable",
224 		[COPY_STAGING_DISABLE] = "disable",
225 		[COPY_STAGING_AUTO] = "auto",
226 	};
227 	int prev, res;
228 
229 	res = CMD_OK;
230 	if (argc > 2) {
231 		res = CMD_ERROR;
232 	} else if (argc == 2) {
233 		prev = copy_staging;
234 		if (strcmp(argv[1], "enable") == 0)
235 			copy_staging = COPY_STAGING_ENABLE;
236 		else if (strcmp(argv[1], "disable") == 0)
237 			copy_staging = COPY_STAGING_DISABLE;
238 		else if (strcmp(argv[1], "auto") == 0)
239 			copy_staging = COPY_STAGING_AUTO;
240 		else {
241 			printf("usage: copy_staging enable|disable|auto\n");
242 			res = CMD_ERROR;
243 		}
244 		if (res == CMD_OK && prev != copy_staging) {
245 			printf("changed copy_staging, unloading kernel\n");
246 			unload();
247 			efi_copy_free();
248 			efi_copy_init();
249 		}
250 	} else {
251 		printf("copy staging: %s\n", mode[copy_staging]);
252 	}
253 	return (res);
254 }
255 COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging);
256 #endif
257 
258 static int
259 command_staging_slop(int argc, char *argv[])
260 {
261 	char *endp;
262 	u_long new, prev;
263 	int res;
264 
265 	res = CMD_OK;
266 	if (argc > 2) {
267 		res = CMD_ERROR;
268 	} else if (argc == 2) {
269 		new = strtoul(argv[1], &endp, 0);
270 		if (*endp != '\0') {
271 			printf("invalid slop value\n");
272 			res = CMD_ERROR;
273 		}
274 		if (res == CMD_OK && staging_slop != new) {
275 			printf("changed slop, unloading kernel\n");
276 			unload();
277 			efi_copy_free();
278 			efi_copy_init();
279 		}
280 	} else {
281 		printf("staging slop %#lx\n", staging_slop);
282 	}
283 	return (res);
284 }
285 COMMAND_SET(staging_slop, "staging_slop", "set staging slop",
286     command_staging_slop);
287 
288 #if defined(__i386__) || defined(__amd64__)
289 /*
290  * The staging area must reside in the the first 1GB or 4GB physical
291  * memory: see elf64_exec() in
292  * boot/efi/loader/arch/amd64/elf64_freebsd.c.
293  */
294 static EFI_PHYSICAL_ADDRESS
295 get_staging_max(void)
296 {
297 	EFI_PHYSICAL_ADDRESS res;
298 
299 #if defined(__i386__)
300 	res = G(1);
301 #elif defined(__amd64__)
302 	res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4);
303 #endif
304 	return (res);
305 }
306 #define	EFI_ALLOC_METHOD	AllocateMaxAddress
307 #else
308 #define	EFI_ALLOC_METHOD	AllocateAnyPages
309 #endif
310 
311 int
312 efi_copy_init(void)
313 {
314 	EFI_STATUS	status;
315 	unsigned long nr_pages;
316 	vm_offset_t ess;
317 
318 	ess = EFI_STAGING_SIZE;
319 	if (ess < DEFAULT_EFI_STAGING_SIZE)
320 		ess = DEFAULT_EFI_STAGING_SIZE;
321 	nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess);
322 
323 #if defined(__i386__) || defined(__amd64__)
324 	/*
325 	 * We'll decrease nr_pages, if it's too big. Currently we only
326 	 * apply this to FreeBSD VM running on Hyper-V. Why? Please see
327 	 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28
328 	 */
329 	if (running_on_hyperv())
330 		efi_verify_staging_size(&nr_pages);
331 
332 	staging = get_staging_max();
333 #endif
334 	status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderData,
335 	    nr_pages, &staging);
336 	if (EFI_ERROR(status)) {
337 		printf("failed to allocate staging area: %lu\n",
338 		    EFI_ERROR_CODE(status));
339 		return (status);
340 	}
341 	staging_base = staging;
342 	staging_end = staging + nr_pages * EFI_PAGE_SIZE;
343 
344 #if EFI_STAGING_2M_ALIGN
345 	/*
346 	 * Round the kernel load address to a 2MiB value. This is needed
347 	 * because the kernel builds a page table based on where it has
348 	 * been loaded in physical address space. As the kernel will use
349 	 * either a 1MiB or 2MiB page for this we need to make sure it
350 	 * is correctly aligned for both cases.
351 	 */
352 	staging = roundup2(staging, M(2));
353 #endif
354 
355 	return (0);
356 }
357 
358 static bool
359 efi_check_space(vm_offset_t end)
360 {
361 	EFI_PHYSICAL_ADDRESS addr, new_base, new_staging;
362 	EFI_STATUS status;
363 	unsigned long nr_pages;
364 
365 	end = roundup2(end, EFI_PAGE_SIZE);
366 
367 	/* There is already enough space */
368 	if (end + staging_slop <= staging_end)
369 		return (true);
370 
371 	if (!boot_services_active) {
372 		if (end <= staging_end)
373 			return (true);
374 		panic("efi_check_space: cannot expand staging area "
375 		    "after boot services were exited\n");
376 	}
377 
378 	/*
379 	 * Add slop at the end:
380 	 * 1. amd64 kernel expects to do some very early allocations
381 	 *    by carving out memory after kernend.  Slop guarantees
382 	 *    that it does not ovewrite anything useful.
383 	 * 2. It seems that initial calculation of the staging size
384 	 *    could be somewhat smaller than actually copying in after
385 	 *    boot services are exited.  Slop avoids calling
386 	 *    BS->AllocatePages() when it cannot work.
387 	 */
388 	end += staging_slop;
389 
390 	nr_pages = EFI_SIZE_TO_PAGES(end - staging_end);
391 #if defined(__i386__) || defined(__amd64__)
392 	/*
393 	 * i386 needs all memory to be allocated under the 1G boundary.
394 	 * amd64 needs all memory to be allocated under the 1G or 4G boundary.
395 	 */
396 	if (end > get_staging_max())
397 		goto before_staging;
398 #endif
399 
400 	/* Try to allocate more space after the previous allocation */
401 	addr = staging_end;
402 	status = BS->AllocatePages(AllocateAddress, EfiLoaderData, nr_pages,
403 	    &addr);
404 	if (!EFI_ERROR(status)) {
405 		staging_end = staging_end + nr_pages * EFI_PAGE_SIZE;
406 		return (true);
407 	}
408 
409 before_staging:
410 	/* Try allocating space before the previous allocation */
411 	if (staging < nr_pages * EFI_PAGE_SIZE)
412 		goto expand;
413 	addr = staging - nr_pages * EFI_PAGE_SIZE;
414 #if EFI_STAGING_2M_ALIGN
415 	/* See efi_copy_init for why this is needed */
416 	addr = rounddown2(addr, M(2));
417 #endif
418 	nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr);
419 	status = BS->AllocatePages(AllocateAddress, EfiLoaderData, nr_pages,
420 	    &addr);
421 	if (!EFI_ERROR(status)) {
422 		/*
423 		 * Move the old allocation and update the state so
424 		 * translation still works.
425 		 */
426 		staging_base = addr;
427 		memmove((void *)(uintptr_t)staging_base,
428 		    (void *)(uintptr_t)staging, staging_end - staging);
429 		stage_offset -= staging - staging_base;
430 		staging = staging_base;
431 		return (true);
432 	}
433 
434 expand:
435 	nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging);
436 #if EFI_STAGING_2M_ALIGN
437 	nr_pages += M(2) / EFI_PAGE_SIZE;
438 #endif
439 #if defined(__i386__) || defined(__amd64__)
440 	new_base = get_staging_max();
441 #endif
442 	status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderData,
443 	    nr_pages, &new_base);
444 	if (!EFI_ERROR(status)) {
445 #if EFI_STAGING_2M_ALIGN
446 		new_staging = roundup2(new_base, M(2));
447 #else
448 		new_staging = new_base;
449 #endif
450 		/*
451 		 * Move the old allocation and update the state so
452 		 * translation still works.
453 		 */
454 		memcpy((void *)(uintptr_t)new_staging,
455 		    (void *)(uintptr_t)staging, staging_end - staging);
456 		BS->FreePages(staging_base, (staging_end - staging_base) /
457 		    EFI_PAGE_SIZE);
458 		stage_offset -= staging - new_staging;
459 		staging = new_staging;
460 		staging_end = new_base + nr_pages * EFI_PAGE_SIZE;
461 		staging_base = new_base;
462 		return (true);
463 	}
464 
465 	printf("efi_check_space: Unable to expand staging area\n");
466 	return (false);
467 }
468 
469 void *
470 efi_translate(vm_offset_t ptr)
471 {
472 
473 	return ((void *)(ptr + stage_offset));
474 }
475 
476 ssize_t
477 efi_copyin(const void *src, vm_offset_t dest, const size_t len)
478 {
479 
480 	if (!stage_offset_set) {
481 		stage_offset = (vm_offset_t)staging - dest;
482 		stage_offset_set = 1;
483 	}
484 
485 	/* XXX: Callers do not check for failure. */
486 	if (!efi_check_space(dest + stage_offset + len)) {
487 		errno = ENOMEM;
488 		return (-1);
489 	}
490 	bcopy(src, (void *)(dest + stage_offset), len);
491 	return (len);
492 }
493 
494 ssize_t
495 efi_copyout(const vm_offset_t src, void *dest, const size_t len)
496 {
497 
498 	/* XXX: Callers do not check for failure. */
499 	if (src + stage_offset + len > staging_end) {
500 		errno = ENOMEM;
501 		return (-1);
502 	}
503 	bcopy((void *)(src + stage_offset), dest, len);
504 	return (len);
505 }
506 
507 ssize_t
508 efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
509 {
510 
511 	if (!stage_offset_set) {
512 		stage_offset = (vm_offset_t)staging - dest;
513 		stage_offset_set = 1;
514 	}
515 
516 	if (!efi_check_space(dest + stage_offset + len)) {
517 		errno = ENOMEM;
518 		return (-1);
519 	}
520 	return (VECTX_READ(fd, (void *)(dest + stage_offset), len));
521 }
522 
523 void
524 efi_copy_finish(void)
525 {
526 	uint64_t	*src, *dst, *last;
527 
528 	src = (uint64_t *)(uintptr_t)staging;
529 	dst = (uint64_t *)(uintptr_t)(staging - stage_offset);
530 	last = (uint64_t *)(uintptr_t)staging_end;
531 
532 	while (src < last)
533 		*dst++ = *src++;
534 }
535 
536 void
537 efi_copy_finish_nop(void)
538 {
539 }
540