xref: /freebsd/stand/efi/loader/copy.c (revision a698c15a41afca19f87e11d4f8ce5664f992673b)
1 /*-
2  * Copyright (c) 2013 The FreeBSD Foundation
3  *
4  * This software was developed by Benno Rice under sponsorship from
5  * the FreeBSD Foundation.
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 
30 #include <stand.h>
31 #include <bootstrap.h>
32 
33 #include <efi.h>
34 #include <efilib.h>
35 
36 #include "loader_efi.h"
37 
38 #define	M(x)	((x) * 1024 * 1024)
39 #define	G(x)	(1ULL * (x) * 1024 * 1024 * 1024)
40 
41 #if defined(__amd64__)
42 #include <machine/cpufunc.h>
43 #include <machine/specialreg.h>
44 #include <machine/vmparam.h>
45 
46 /*
47  * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(),
48  * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify().
49  */
50 #define CPUID_LEAF_HV_MAXLEAF		0x40000000
51 #define CPUID_LEAF_HV_INTERFACE		0x40000001
52 #define CPUID_LEAF_HV_FEATURES		0x40000003
53 #define CPUID_LEAF_HV_LIMITS		0x40000005
54 #define CPUID_HV_IFACE_HYPERV		0x31237648	/* HV#1 */
55 #define CPUID_HV_MSR_TIME_REFCNT	0x0002	/* MSR_HV_TIME_REF_COUNT */
56 #define CPUID_HV_MSR_HYPERCALL		0x0020
57 
58 static int
59 running_on_hyperv(void)
60 {
61 	char hv_vendor[16];
62 	uint32_t regs[4];
63 
64 	do_cpuid(1, regs);
65 	if ((regs[2] & CPUID2_HV) == 0)
66 		return (0);
67 
68 	do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
69 	if (regs[0] < CPUID_LEAF_HV_LIMITS)
70 		return (0);
71 
72 	((uint32_t *)&hv_vendor)[0] = regs[1];
73 	((uint32_t *)&hv_vendor)[1] = regs[2];
74 	((uint32_t *)&hv_vendor)[2] = regs[3];
75 	hv_vendor[12] = '\0';
76 	if (strcmp(hv_vendor, "Microsoft Hv") != 0)
77 		return (0);
78 
79 	do_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
80 	if (regs[0] != CPUID_HV_IFACE_HYPERV)
81 		return (0);
82 
83 	do_cpuid(CPUID_LEAF_HV_FEATURES, regs);
84 	if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0)
85 		return (0);
86 	if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0)
87 		return (0);
88 
89 	return (1);
90 }
91 
92 static void
93 efi_verify_staging_size(unsigned long *nr_pages)
94 {
95 	UINTN sz;
96 	EFI_MEMORY_DESCRIPTOR *map = NULL, *p;
97 	EFI_PHYSICAL_ADDRESS start, end;
98 	UINTN key, dsz;
99 	UINT32 dver;
100 	EFI_STATUS status;
101 	int i, ndesc;
102 	unsigned long available_pages = 0;
103 
104 	sz = 0;
105 
106 	for (;;) {
107 		status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver);
108 		if (!EFI_ERROR(status))
109 			break;
110 
111 		if (status != EFI_BUFFER_TOO_SMALL) {
112 			printf("Can't read memory map: %lu\n",
113 			    EFI_ERROR_CODE(status));
114 			goto out;
115 		}
116 
117 		free(map);
118 
119 		/* Allocate 10 descriptors more than the size reported,
120 		 * to allow for any fragmentation caused by calling
121 		 * malloc */
122 		map = malloc(sz + (10 * dsz));
123 		if (map == NULL) {
124 			printf("Unable to allocate memory\n");
125 			goto out;
126 		}
127 	}
128 
129 	ndesc = sz / dsz;
130 	for (i = 0, p = map; i < ndesc;
131 	     i++, p = NextMemoryDescriptor(p, dsz)) {
132 		start = p->PhysicalStart;
133 		end = start + p->NumberOfPages * EFI_PAGE_SIZE;
134 
135 		if (KERNLOAD < start || KERNLOAD >= end)
136 			continue;
137 
138 		available_pages = p->NumberOfPages -
139 			((KERNLOAD - start) >> EFI_PAGE_SHIFT);
140 		break;
141 	}
142 
143 	if (available_pages == 0) {
144 		printf("Can't find valid memory map for staging area!\n");
145 		goto out;
146 	}
147 
148 	i++;
149 	p = NextMemoryDescriptor(p, dsz);
150 
151 	for ( ; i < ndesc;
152 	     i++, p = NextMemoryDescriptor(p, dsz)) {
153 		if (p->Type != EfiConventionalMemory &&
154 		    p->Type != EfiLoaderData)
155 			break;
156 
157 		if (p->PhysicalStart != end)
158 			break;
159 
160 		end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE;
161 
162 		available_pages += p->NumberOfPages;
163 	}
164 
165 	if (*nr_pages > available_pages) {
166 		printf("Staging area's size is reduced: %ld -> %ld!\n",
167 		    *nr_pages, available_pages);
168 		*nr_pages = available_pages;
169 	}
170 out:
171 	free(map);
172 }
173 #endif /* __amd64__ */
174 
175 #if defined(__arm__)
176 #define	DEFAULT_EFI_STAGING_SIZE	32
177 #else
178 #define	DEFAULT_EFI_STAGING_SIZE	64
179 #endif
180 #ifndef EFI_STAGING_SIZE
181 #define	EFI_STAGING_SIZE	DEFAULT_EFI_STAGING_SIZE
182 #endif
183 
184 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
185     defined(__riscv) || defined(__i386__)
186 #define	EFI_STAGING_2M_ALIGN	1
187 #else
188 #define	EFI_STAGING_2M_ALIGN	0
189 #endif
190 
191 #if defined(__amd64__) || defined(__i386__)
192 #define	EFI_STAGING_SLOP	M(8)
193 #else
194 #define	EFI_STAGING_SLOP	0
195 #endif
196 
197 static u_long staging_slop = EFI_STAGING_SLOP;
198 
199 EFI_PHYSICAL_ADDRESS	staging, staging_end, staging_base;
200 bool			stage_offset_set = false;
201 ssize_t			stage_offset;
202 
203 static void
204 efi_copy_free(void)
205 {
206 	BS->FreePages(staging_base, (staging_end - staging_base) /
207 	    EFI_PAGE_SIZE);
208 	stage_offset_set = false;
209 	stage_offset = 0;
210 }
211 
212 #if defined(__amd64__) || defined(__i386__)
213 int copy_staging = COPY_STAGING_AUTO;
214 
215 static int
216 command_copy_staging(int argc, char *argv[])
217 {
218 	static const char *const mode[3] = {
219 		[COPY_STAGING_ENABLE] = "enable",
220 		[COPY_STAGING_DISABLE] = "disable",
221 		[COPY_STAGING_AUTO] = "auto",
222 	};
223 	int prev;
224 
225 	if (argc > 2) {
226 		return (CMD_ERROR);
227 	} else if (argc == 2) {
228 		prev = copy_staging;
229 		if (strcmp(argv[1], "enable") == 0)
230 			copy_staging = COPY_STAGING_ENABLE;
231 		else if (strcmp(argv[1], "disable") == 0)
232 			copy_staging = COPY_STAGING_DISABLE;
233 		else if (strcmp(argv[1], "auto") == 0)
234 			copy_staging = COPY_STAGING_AUTO;
235 		else {
236 			printf("usage: copy_staging enable|disable|auto\n");
237 			return (CMD_ERROR);
238 		}
239 		if (prev != copy_staging) {
240 			printf("changed copy_staging, unloading kernel\n");
241 			unload();
242 			efi_copy_free();
243 			efi_copy_init();
244 		}
245 	} else {
246 		printf("copy staging: %s\n", mode[copy_staging]);
247 	}
248 	return (CMD_OK);
249 }
250 COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging);
251 #endif
252 
253 static int
254 command_staging_slop(int argc, char *argv[])
255 {
256 	char *endp;
257 	u_long new, prev;
258 
259 	if (argc > 2) {
260 		return (CMD_ERROR);
261 	} else if (argc == 2) {
262 		new = strtoul(argv[1], &endp, 0);
263 		if (*endp != '\0') {
264 			printf("invalid slop value\n");
265 			return (CMD_ERROR);
266 		}
267 		if (staging_slop != new) {
268 			staging_slop = new;
269 			printf("changed slop, unloading kernel\n");
270 
271 			unload();
272 			efi_copy_free();
273 			efi_copy_init();
274 		}
275 	} else {
276 		printf("staging slop %#lx\n", staging_slop);
277 	}
278 	return (CMD_OK);
279 }
280 COMMAND_SET(staging_slop, "staging_slop", "set staging slop",
281     command_staging_slop);
282 
283 #if defined(__amd64__) || defined(__i386__)
284 /*
285  * The staging area must reside in the first 1GB or 4GB physical
286  * memory: see elf64_exec() in
287  * boot/efi/loader/arch/amd64/elf64_freebsd.c.
288  */
289 static EFI_PHYSICAL_ADDRESS
290 get_staging_max(void)
291 {
292 	EFI_PHYSICAL_ADDRESS res;
293 
294 	res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4);
295 	return (res);
296 }
297 #define	EFI_ALLOC_METHOD	AllocateMaxAddress
298 #else
299 #define	EFI_ALLOC_METHOD	AllocateAnyPages
300 #endif
301 
302 int
303 efi_copy_init(void)
304 {
305 	EFI_STATUS	status;
306 	unsigned long nr_pages;
307 	vm_offset_t ess;
308 
309 	ess = EFI_STAGING_SIZE;
310 	if (ess < DEFAULT_EFI_STAGING_SIZE)
311 		ess = DEFAULT_EFI_STAGING_SIZE;
312 	nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess);
313 
314 #if defined(__amd64__)
315 	/*
316 	 * We'll decrease nr_pages, if it's too big. Currently we only
317 	 * apply this to FreeBSD VM running on Hyper-V. Why? Please see
318 	 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28
319 	 */
320 	if (running_on_hyperv())
321 		efi_verify_staging_size(&nr_pages);
322 #endif
323 #if defined(__amd64__) || defined(__i386__)
324 	staging = get_staging_max();
325 #endif
326 	status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode,
327 	    nr_pages, &staging);
328 	if (EFI_ERROR(status)) {
329 		printf("failed to allocate staging area: %lu\n",
330 		    EFI_ERROR_CODE(status));
331 		return (status);
332 	}
333 	staging_base = staging;
334 	staging_end = staging + nr_pages * EFI_PAGE_SIZE;
335 
336 #if EFI_STAGING_2M_ALIGN
337 	/*
338 	 * Round the kernel load address to a 2MiB value. This is needed
339 	 * because the kernel builds a page table based on where it has
340 	 * been loaded in physical address space. As the kernel will use
341 	 * either a 1MiB or 2MiB page for this we need to make sure it
342 	 * is correctly aligned for both cases.
343 	 */
344 	staging = roundup2(staging, M(2));
345 #endif
346 
347 	return (0);
348 }
349 
350 static bool
351 efi_check_space(vm_offset_t end)
352 {
353 	EFI_PHYSICAL_ADDRESS addr, new_base, new_staging;
354 	EFI_STATUS status;
355 	unsigned long nr_pages;
356 
357 	end = roundup2(end, EFI_PAGE_SIZE);
358 
359 	/* There is already enough space */
360 	if (end + staging_slop <= staging_end)
361 		return (true);
362 
363 	if (!boot_services_active) {
364 		if (end <= staging_end)
365 			return (true);
366 		panic("efi_check_space: cannot expand staging area "
367 		    "after boot services were exited\n");
368 	}
369 
370 	/*
371 	 * Add slop at the end:
372 	 * 1. amd64 kernel expects to do some very early allocations
373 	 *    by carving out memory after kernend.  Slop guarantees
374 	 *    that it does not ovewrite anything useful.
375 	 * 2. It seems that initial calculation of the staging size
376 	 *    could be somewhat smaller than actually copying in after
377 	 *    boot services are exited.  Slop avoids calling
378 	 *    BS->AllocatePages() when it cannot work.
379 	 */
380 	end += staging_slop;
381 
382 	nr_pages = EFI_SIZE_TO_PAGES(end - staging_end);
383 #if defined(__amd64__) || defined(__i386__)
384 	/*
385 	 * The amd64 kernel needs all memory to be allocated under the 1G or
386 	 * 4G boundary.
387 	 */
388 	if (end > get_staging_max())
389 		goto before_staging;
390 #endif
391 
392 	/* Try to allocate more space after the previous allocation */
393 	addr = staging_end;
394 	status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages,
395 	    &addr);
396 	if (!EFI_ERROR(status)) {
397 		staging_end = staging_end + nr_pages * EFI_PAGE_SIZE;
398 		return (true);
399 	}
400 
401 before_staging:
402 	/* Try allocating space before the previous allocation */
403 	if (staging < nr_pages * EFI_PAGE_SIZE)
404 		goto expand;
405 	addr = staging - nr_pages * EFI_PAGE_SIZE;
406 #if EFI_STAGING_2M_ALIGN
407 	/* See efi_copy_init for why this is needed */
408 	addr = rounddown2(addr, M(2));
409 #endif
410 	nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr);
411 	status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages,
412 	    &addr);
413 	if (!EFI_ERROR(status)) {
414 		/*
415 		 * Move the old allocation and update the state so
416 		 * translation still works.
417 		 */
418 		staging_base = addr;
419 		memmove((void *)(uintptr_t)staging_base,
420 		    (void *)(uintptr_t)staging, staging_end - staging);
421 		stage_offset -= staging - staging_base;
422 		staging = staging_base;
423 		return (true);
424 	}
425 
426 expand:
427 	nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging);
428 #if EFI_STAGING_2M_ALIGN
429 	nr_pages += M(2) / EFI_PAGE_SIZE;
430 #endif
431 #if defined(__amd64__) || defined(__i386__)
432 	new_base = get_staging_max();
433 #endif
434 	status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode,
435 	    nr_pages, &new_base);
436 	if (!EFI_ERROR(status)) {
437 #if EFI_STAGING_2M_ALIGN
438 		new_staging = roundup2(new_base, M(2));
439 #else
440 		new_staging = new_base;
441 #endif
442 		/*
443 		 * Move the old allocation and update the state so
444 		 * translation still works.
445 		 */
446 		memcpy((void *)(uintptr_t)new_staging,
447 		    (void *)(uintptr_t)staging, staging_end - staging);
448 		BS->FreePages(staging_base, (staging_end - staging_base) /
449 		    EFI_PAGE_SIZE);
450 		stage_offset -= staging - new_staging;
451 		staging = new_staging;
452 		staging_end = new_base + nr_pages * EFI_PAGE_SIZE;
453 		staging_base = new_base;
454 		return (true);
455 	}
456 
457 	printf("efi_check_space: Unable to expand staging area\n");
458 	return (false);
459 }
460 
461 void *
462 efi_translate(vm_offset_t ptr)
463 {
464 
465 	return ((void *)(ptr + stage_offset));
466 }
467 
468 ssize_t
469 efi_copyin(const void *src, vm_offset_t dest, const size_t len)
470 {
471 
472 	if (!stage_offset_set) {
473 		stage_offset = (vm_offset_t)staging - dest;
474 		stage_offset_set = true;
475 	}
476 
477 	/* XXX: Callers do not check for failure. */
478 	if (!efi_check_space(dest + stage_offset + len)) {
479 		errno = ENOMEM;
480 		return (-1);
481 	}
482 	bcopy(src, (void *)(dest + stage_offset), len);
483 	return (len);
484 }
485 
486 ssize_t
487 efi_copyout(const vm_offset_t src, void *dest, const size_t len)
488 {
489 
490 	/* XXX: Callers do not check for failure. */
491 	if (src + stage_offset + len > staging_end) {
492 		errno = ENOMEM;
493 		return (-1);
494 	}
495 	bcopy((void *)(src + stage_offset), dest, len);
496 	return (len);
497 }
498 
499 ssize_t
500 efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
501 {
502 
503 	if (!stage_offset_set) {
504 		stage_offset = (vm_offset_t)staging - dest;
505 		stage_offset_set = true;
506 	}
507 
508 	if (!efi_check_space(dest + stage_offset + len)) {
509 		errno = ENOMEM;
510 		return (-1);
511 	}
512 	return (VECTX_READ(fd, (void *)(dest + stage_offset), len));
513 }
514 
515 void
516 efi_copy_finish(void)
517 {
518 	uint64_t	*src, *dst, *last;
519 
520 	src = (uint64_t *)(uintptr_t)staging;
521 	dst = (uint64_t *)(uintptr_t)(staging - stage_offset);
522 	last = (uint64_t *)(uintptr_t)staging_end;
523 
524 	while (src < last)
525 		*dst++ = *src++;
526 }
527 
528 void
529 efi_copy_finish_nop(void)
530 {
531 }
532