xref: /freebsd/stand/efi/loader/copy.c (revision 80661e2153d2bc916444cb3be8aa50d0d110ad57)
1 /*-
2  * Copyright (c) 2013 The FreeBSD Foundation
3  *
4  * This software was developed by Benno Rice under sponsorship from
5  * the FreeBSD Foundation.
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/param.h>
29 
30 #include <stand.h>
31 #include <bootstrap.h>
32 
33 #include <efi.h>
34 #include <efilib.h>
35 
36 #include "loader_efi.h"
37 
38 #if defined(__amd64__)
39 #include <machine/cpufunc.h>
40 #include <machine/specialreg.h>
41 #include <machine/vmparam.h>
42 
43 /*
44  * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(),
45  * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify().
46  */
47 #define CPUID_LEAF_HV_MAXLEAF		0x40000000
48 #define CPUID_LEAF_HV_INTERFACE		0x40000001
49 #define CPUID_LEAF_HV_FEATURES		0x40000003
50 #define CPUID_LEAF_HV_LIMITS		0x40000005
51 #define CPUID_HV_IFACE_HYPERV		0x31237648	/* HV#1 */
52 #define CPUID_HV_MSR_TIME_REFCNT	0x0002	/* MSR_HV_TIME_REF_COUNT */
53 #define CPUID_HV_MSR_HYPERCALL		0x0020
54 
55 static int
running_on_hyperv(void)56 running_on_hyperv(void)
57 {
58 	char hv_vendor[16];
59 	uint32_t regs[4];
60 
61 	do_cpuid(1, regs);
62 	if ((regs[2] & CPUID2_HV) == 0)
63 		return (0);
64 
65 	do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
66 	if (regs[0] < CPUID_LEAF_HV_LIMITS)
67 		return (0);
68 
69 	((uint32_t *)&hv_vendor)[0] = regs[1];
70 	((uint32_t *)&hv_vendor)[1] = regs[2];
71 	((uint32_t *)&hv_vendor)[2] = regs[3];
72 	hv_vendor[12] = '\0';
73 	if (strcmp(hv_vendor, "Microsoft Hv") != 0)
74 		return (0);
75 
76 	do_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
77 	if (regs[0] != CPUID_HV_IFACE_HYPERV)
78 		return (0);
79 
80 	do_cpuid(CPUID_LEAF_HV_FEATURES, regs);
81 	if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0)
82 		return (0);
83 	if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0)
84 		return (0);
85 
86 	return (1);
87 }
88 
89 static void
efi_verify_staging_size(unsigned long * nr_pages)90 efi_verify_staging_size(unsigned long *nr_pages)
91 {
92 	UINTN sz;
93 	EFI_MEMORY_DESCRIPTOR *map = NULL, *p;
94 	EFI_PHYSICAL_ADDRESS start, end;
95 	UINTN key, dsz;
96 	UINT32 dver;
97 	EFI_STATUS status;
98 	int i, ndesc;
99 	unsigned long available_pages = 0;
100 
101 	sz = 0;
102 
103 	for (;;) {
104 		status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver);
105 		if (!EFI_ERROR(status))
106 			break;
107 
108 		if (status != EFI_BUFFER_TOO_SMALL) {
109 			printf("Can't read memory map: %lu\n",
110 			    EFI_ERROR_CODE(status));
111 			goto out;
112 		}
113 
114 		free(map);
115 
116 		/* Allocate 10 descriptors more than the size reported,
117 		 * to allow for any fragmentation caused by calling
118 		 * malloc */
119 		map = malloc(sz + (10 * dsz));
120 		if (map == NULL) {
121 			printf("Unable to allocate memory\n");
122 			goto out;
123 		}
124 	}
125 
126 	ndesc = sz / dsz;
127 	for (i = 0, p = map; i < ndesc;
128 	     i++, p = NextMemoryDescriptor(p, dsz)) {
129 		start = p->PhysicalStart;
130 		end = start + p->NumberOfPages * EFI_PAGE_SIZE;
131 
132 		if (KERNLOAD < start || KERNLOAD >= end)
133 			continue;
134 
135 		available_pages = p->NumberOfPages -
136 			((KERNLOAD - start) >> EFI_PAGE_SHIFT);
137 		break;
138 	}
139 
140 	if (available_pages == 0) {
141 		printf("Can't find valid memory map for staging area!\n");
142 		goto out;
143 	}
144 
145 	i++;
146 	p = NextMemoryDescriptor(p, dsz);
147 
148 	for ( ; i < ndesc;
149 	     i++, p = NextMemoryDescriptor(p, dsz)) {
150 		if (p->Type != EfiConventionalMemory &&
151 		    p->Type != EfiLoaderData)
152 			break;
153 
154 		if (p->PhysicalStart != end)
155 			break;
156 
157 		end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE;
158 
159 		available_pages += p->NumberOfPages;
160 	}
161 
162 	if (*nr_pages > available_pages) {
163 		printf("Staging area's size is reduced: %ld -> %ld!\n",
164 		    *nr_pages, available_pages);
165 		*nr_pages = available_pages;
166 	}
167 out:
168 	free(map);
169 }
170 #endif /* __amd64__ */
171 
172 #if defined(__arm__)
173 #define	DEFAULT_EFI_STAGING_SIZE	32
174 #else
175 #define	DEFAULT_EFI_STAGING_SIZE	64
176 #endif
177 #ifndef EFI_STAGING_SIZE
178 #define	EFI_STAGING_SIZE	DEFAULT_EFI_STAGING_SIZE
179 #endif
180 
181 #define	EFI_STAGING_2M_ALIGN	1
182 
183 #if defined(__amd64__) || defined(__i386__)
184 #define	EFI_STAGING_SLOP	M(8)
185 #else
186 #define	EFI_STAGING_SLOP	0
187 #endif
188 
189 static u_long staging_slop = EFI_STAGING_SLOP;
190 
191 EFI_PHYSICAL_ADDRESS	staging, staging_end, staging_base;
192 bool			stage_offset_set = false;
193 ssize_t			stage_offset;
194 
195 static void
efi_copy_free(void)196 efi_copy_free(void)
197 {
198 	BS->FreePages(staging_base, (staging_end - staging_base) /
199 	    EFI_PAGE_SIZE);
200 	stage_offset_set = false;
201 	stage_offset = 0;
202 }
203 
204 #if defined(__amd64__) || defined(__i386__)
205 int copy_staging = COPY_STAGING_AUTO;
206 
207 static int
command_copy_staging(int argc,char * argv[])208 command_copy_staging(int argc, char *argv[])
209 {
210 	static const char *const mode[3] = {
211 		[COPY_STAGING_ENABLE] = "enable",
212 		[COPY_STAGING_DISABLE] = "disable",
213 		[COPY_STAGING_AUTO] = "auto",
214 	};
215 	int prev;
216 
217 	if (argc > 2) {
218 		goto usage;
219 	} else if (argc == 2) {
220 		prev = copy_staging;
221 		if (strcmp(argv[1], "enable") == 0)
222 			copy_staging = COPY_STAGING_ENABLE;
223 		else if (strcmp(argv[1], "disable") == 0)
224 			copy_staging = COPY_STAGING_DISABLE;
225 		else if (strcmp(argv[1], "auto") == 0)
226 			copy_staging = COPY_STAGING_AUTO;
227 		else
228 			goto usage;
229 		if (prev != copy_staging) {
230 			printf("changed copy_staging, unloading kernel\n");
231 			unload();
232 			efi_copy_free();
233 			efi_copy_init();
234 		}
235 	} else {
236 		printf("copy staging: %s\n", mode[copy_staging]);
237 	}
238 	return (CMD_OK);
239 
240 usage:
241 	command_errmsg = "usage: copy_staging enable|disable|auto";
242 	return (CMD_ERROR);
243 }
244 COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging);
245 #endif
246 
247 static int
command_staging_slop(int argc,char * argv[])248 command_staging_slop(int argc, char *argv[])
249 {
250 	char *endp;
251 	u_long new;
252 
253 	if (argc > 2) {
254 		goto err;
255 	} else if (argc == 2) {
256 		new = strtoul(argv[1], &endp, 0);
257 		if (*endp != '\0')
258 			goto err;
259 		if (staging_slop != new) {
260 			staging_slop = new;
261 			printf("changed slop, unloading kernel\n");
262 
263 			unload();
264 			efi_copy_free();
265 			efi_copy_init();
266 		}
267 	} else {
268 		printf("staging slop %#lx\n", staging_slop);
269 	}
270 	return (CMD_OK);
271 
272 err:
273 	command_errmsg = "invalid slop value";
274 	return (CMD_ERROR);
275 }
276 COMMAND_SET(staging_slop, "staging_slop", "set staging slop",
277     command_staging_slop);
278 
279 #if defined(__amd64__) || defined(__i386__)
280 /*
281  * The staging area must reside in the first 1GB or 4GB physical
282  * memory: see elf64_exec() in
283  * boot/efi/loader/arch/amd64/elf64_freebsd.c.
284  */
285 static EFI_PHYSICAL_ADDRESS
get_staging_max(void)286 get_staging_max(void)
287 {
288 	EFI_PHYSICAL_ADDRESS res;
289 
290 	res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4);
291 	return (res);
292 }
293 #define	EFI_ALLOC_MAX_ADDR
294 #elif defined(__aarch64__)
295 /*
296  * Older kernels only support a 48-bit physical address space, and locore.S
297  * only supports a 50-bit space. Limit to 48 bits so older kernels can boot
298  * even if FEAT_LPA2 is supported by the hardware.
299  */
300 #define	get_staging_max()	(1ul << 48)
301 #define	EFI_ALLOC_MAX_ADDR
302 #endif
303 #ifdef EFI_ALLOC_MAX_ADDR
304 #define	EFI_ALLOC_METHOD	AllocateMaxAddress
305 #else
306 #define	EFI_ALLOC_METHOD	AllocateAnyPages
307 #endif
308 
309 int
efi_copy_init(void)310 efi_copy_init(void)
311 {
312 	EFI_STATUS	status;
313 	unsigned long nr_pages;
314 	vm_offset_t ess;
315 
316 	ess = EFI_STAGING_SIZE;
317 	if (ess < DEFAULT_EFI_STAGING_SIZE)
318 		ess = DEFAULT_EFI_STAGING_SIZE;
319 	nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess);
320 
321 #if defined(__amd64__)
322 	/*
323 	 * We'll decrease nr_pages, if it's too big. Currently we only
324 	 * apply this to FreeBSD VM running on Hyper-V. Why? Please see
325 	 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28
326 	 */
327 	if (running_on_hyperv())
328 		efi_verify_staging_size(&nr_pages);
329 #endif
330 #ifdef EFI_ALLOC_MAX_ADDR
331 	staging = get_staging_max();
332 #endif
333 	status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode,
334 	    nr_pages, &staging);
335 	if (EFI_ERROR(status)) {
336 		printf("failed to allocate staging area: %lu\n",
337 		    EFI_ERROR_CODE(status));
338 		return (status);
339 	}
340 	staging_base = staging;
341 	staging_end = staging + nr_pages * EFI_PAGE_SIZE;
342 
343 #if EFI_STAGING_2M_ALIGN
344 	/*
345 	 * Round the kernel load address to a 2MiB value. This is needed
346 	 * because the kernel builds a page table based on where it has
347 	 * been loaded in physical address space. As the kernel will use
348 	 * either a 1MiB or 2MiB page for this we need to make sure it
349 	 * is correctly aligned for both cases.
350 	 */
351 	staging = roundup2(staging, M(2));
352 #endif
353 
354 	return (0);
355 }
356 
357 static bool
efi_check_space(vm_offset_t end)358 efi_check_space(vm_offset_t end)
359 {
360 	EFI_PHYSICAL_ADDRESS addr, new_base, new_staging;
361 	EFI_STATUS status;
362 	unsigned long nr_pages;
363 
364 	end = roundup2(end, EFI_PAGE_SIZE);
365 
366 	/* There is already enough space */
367 	if (end + staging_slop <= staging_end)
368 		return (true);
369 
370 	if (!boot_services_active) {
371 		if (end <= staging_end)
372 			return (true);
373 		panic("efi_check_space: cannot expand staging area "
374 		    "after boot services were exited\n");
375 	}
376 
377 	/*
378 	 * Add slop at the end:
379 	 * 1. amd64 kernel expects to do some very early allocations
380 	 *    by carving out memory after kernend.  Slop guarantees
381 	 *    that it does not ovewrite anything useful.
382 	 * 2. It seems that initial calculation of the staging size
383 	 *    could be somewhat smaller than actually copying in after
384 	 *    boot services are exited.  Slop avoids calling
385 	 *    BS->AllocatePages() when it cannot work.
386 	 */
387 	end += staging_slop;
388 
389 	nr_pages = EFI_SIZE_TO_PAGES(end - staging_end);
390 #if defined(__amd64__) || defined(__i386__)
391 	/*
392 	 * The amd64 kernel needs all memory to be allocated under the 1G or
393 	 * 4G boundary.
394 	 */
395 	if (end > get_staging_max())
396 		goto before_staging;
397 #endif
398 
399 	/* Try to allocate more space after the previous allocation */
400 	addr = staging_end;
401 	status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages,
402 	    &addr);
403 	if (!EFI_ERROR(status)) {
404 		staging_end = staging_end + nr_pages * EFI_PAGE_SIZE;
405 		return (true);
406 	}
407 
408 #if defined(__amd64__) || defined(__i386__)
409 before_staging:
410 #endif
411 	/* Try allocating space before the previous allocation */
412 	if (staging < nr_pages * EFI_PAGE_SIZE)
413 		goto expand;
414 	addr = staging - nr_pages * EFI_PAGE_SIZE;
415 #if EFI_STAGING_2M_ALIGN
416 	/* See efi_copy_init for why this is needed */
417 	addr = rounddown2(addr, M(2));
418 #endif
419 	nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr);
420 	status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages,
421 	    &addr);
422 	if (!EFI_ERROR(status)) {
423 		/*
424 		 * Move the old allocation and update the state so
425 		 * translation still works.
426 		 */
427 		staging_base = addr;
428 		memmove((void *)(uintptr_t)staging_base,
429 		    (void *)(uintptr_t)staging, staging_end - staging);
430 		stage_offset -= staging - staging_base;
431 		staging = staging_base;
432 		return (true);
433 	}
434 
435 expand:
436 	nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging);
437 #if EFI_STAGING_2M_ALIGN
438 	nr_pages += M(2) / EFI_PAGE_SIZE;
439 #endif
440 #ifdef EFI_ALLOC_MAX_ADDR
441 	new_base = get_staging_max();
442 #endif
443 	status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode,
444 	    nr_pages, &new_base);
445 	if (!EFI_ERROR(status)) {
446 #if EFI_STAGING_2M_ALIGN
447 		new_staging = roundup2(new_base, M(2));
448 #else
449 		new_staging = new_base;
450 #endif
451 		/*
452 		 * Move the old allocation and update the state so
453 		 * translation still works.
454 		 */
455 		memcpy((void *)(uintptr_t)new_staging,
456 		    (void *)(uintptr_t)staging, staging_end - staging);
457 		BS->FreePages(staging_base, (staging_end - staging_base) /
458 		    EFI_PAGE_SIZE);
459 		stage_offset -= staging - new_staging;
460 		staging = new_staging;
461 		staging_end = new_base + nr_pages * EFI_PAGE_SIZE;
462 		staging_base = new_base;
463 		return (true);
464 	}
465 
466 	printf("efi_check_space: Unable to expand staging area\n");
467 	return (false);
468 }
469 
470 void *
efi_translate(vm_offset_t ptr)471 efi_translate(vm_offset_t ptr)
472 {
473 
474 	return ((void *)(ptr + stage_offset));
475 }
476 
477 ssize_t
efi_copyin(const void * src,vm_offset_t dest,const size_t len)478 efi_copyin(const void *src, vm_offset_t dest, const size_t len)
479 {
480 
481 	if (!stage_offset_set) {
482 		stage_offset = (vm_offset_t)staging - dest;
483 		stage_offset_set = true;
484 	}
485 
486 	/* XXX: Callers do not check for failure. */
487 	if (!efi_check_space(dest + stage_offset + len)) {
488 		errno = ENOMEM;
489 		return (-1);
490 	}
491 	bcopy(src, (void *)(dest + stage_offset), len);
492 	return (len);
493 }
494 
495 ssize_t
efi_copyout(const vm_offset_t src,void * dest,const size_t len)496 efi_copyout(const vm_offset_t src, void *dest, const size_t len)
497 {
498 
499 	/* XXX: Callers do not check for failure. */
500 	if (src + stage_offset + len > staging_end) {
501 		errno = ENOMEM;
502 		return (-1);
503 	}
504 	bcopy((void *)(src + stage_offset), dest, len);
505 	return (len);
506 }
507 
508 ssize_t
efi_readin(readin_handle_t fd,vm_offset_t dest,const size_t len)509 efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
510 {
511 
512 	if (!stage_offset_set) {
513 		stage_offset = (vm_offset_t)staging - dest;
514 		stage_offset_set = true;
515 	}
516 
517 	if (!efi_check_space(dest + stage_offset + len)) {
518 		errno = ENOMEM;
519 		return (-1);
520 	}
521 	return (VECTX_READ(fd, (void *)(dest + stage_offset), len));
522 }
523 
524 void
efi_copy_finish(void)525 efi_copy_finish(void)
526 {
527 	uint64_t	*src, *dst, *last;
528 
529 	src = (uint64_t *)(uintptr_t)staging;
530 	dst = (uint64_t *)(uintptr_t)(staging - stage_offset);
531 	last = (uint64_t *)(uintptr_t)staging_end;
532 
533 	while (src < last)
534 		*dst++ = *src++;
535 }
536 
537 void
efi_copy_finish_nop(void)538 efi_copy_finish_nop(void)
539 {
540 }
541