1 /*-
2 * Copyright (c) 2013 The FreeBSD Foundation
3 *
4 * This software was developed by Benno Rice under sponsorship from
5 * the FreeBSD Foundation.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/param.h>
29
30 #include <stand.h>
31 #include <bootstrap.h>
32
33 #include <efi.h>
34 #include <efilib.h>
35
36 #include "loader_efi.h"
37
38 #define M(x) ((x) * 1024 * 1024)
39 #define G(x) (1ULL * (x) * 1024 * 1024 * 1024)
40
41 #if defined(__amd64__)
42 #include <machine/cpufunc.h>
43 #include <machine/specialreg.h>
44 #include <machine/vmparam.h>
45
46 /*
47 * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(),
48 * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify().
49 */
50 #define CPUID_LEAF_HV_MAXLEAF 0x40000000
51 #define CPUID_LEAF_HV_INTERFACE 0x40000001
52 #define CPUID_LEAF_HV_FEATURES 0x40000003
53 #define CPUID_LEAF_HV_LIMITS 0x40000005
54 #define CPUID_HV_IFACE_HYPERV 0x31237648 /* HV#1 */
55 #define CPUID_HV_MSR_TIME_REFCNT 0x0002 /* MSR_HV_TIME_REF_COUNT */
56 #define CPUID_HV_MSR_HYPERCALL 0x0020
57
58 static int
running_on_hyperv(void)59 running_on_hyperv(void)
60 {
61 char hv_vendor[16];
62 uint32_t regs[4];
63
64 do_cpuid(1, regs);
65 if ((regs[2] & CPUID2_HV) == 0)
66 return (0);
67
68 do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs);
69 if (regs[0] < CPUID_LEAF_HV_LIMITS)
70 return (0);
71
72 ((uint32_t *)&hv_vendor)[0] = regs[1];
73 ((uint32_t *)&hv_vendor)[1] = regs[2];
74 ((uint32_t *)&hv_vendor)[2] = regs[3];
75 hv_vendor[12] = '\0';
76 if (strcmp(hv_vendor, "Microsoft Hv") != 0)
77 return (0);
78
79 do_cpuid(CPUID_LEAF_HV_INTERFACE, regs);
80 if (regs[0] != CPUID_HV_IFACE_HYPERV)
81 return (0);
82
83 do_cpuid(CPUID_LEAF_HV_FEATURES, regs);
84 if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0)
85 return (0);
86 if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0)
87 return (0);
88
89 return (1);
90 }
91
92 static void
efi_verify_staging_size(unsigned long * nr_pages)93 efi_verify_staging_size(unsigned long *nr_pages)
94 {
95 UINTN sz;
96 EFI_MEMORY_DESCRIPTOR *map = NULL, *p;
97 EFI_PHYSICAL_ADDRESS start, end;
98 UINTN key, dsz;
99 UINT32 dver;
100 EFI_STATUS status;
101 int i, ndesc;
102 unsigned long available_pages = 0;
103
104 sz = 0;
105
106 for (;;) {
107 status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver);
108 if (!EFI_ERROR(status))
109 break;
110
111 if (status != EFI_BUFFER_TOO_SMALL) {
112 printf("Can't read memory map: %lu\n",
113 EFI_ERROR_CODE(status));
114 goto out;
115 }
116
117 free(map);
118
119 /* Allocate 10 descriptors more than the size reported,
120 * to allow for any fragmentation caused by calling
121 * malloc */
122 map = malloc(sz + (10 * dsz));
123 if (map == NULL) {
124 printf("Unable to allocate memory\n");
125 goto out;
126 }
127 }
128
129 ndesc = sz / dsz;
130 for (i = 0, p = map; i < ndesc;
131 i++, p = NextMemoryDescriptor(p, dsz)) {
132 start = p->PhysicalStart;
133 end = start + p->NumberOfPages * EFI_PAGE_SIZE;
134
135 if (KERNLOAD < start || KERNLOAD >= end)
136 continue;
137
138 available_pages = p->NumberOfPages -
139 ((KERNLOAD - start) >> EFI_PAGE_SHIFT);
140 break;
141 }
142
143 if (available_pages == 0) {
144 printf("Can't find valid memory map for staging area!\n");
145 goto out;
146 }
147
148 i++;
149 p = NextMemoryDescriptor(p, dsz);
150
151 for ( ; i < ndesc;
152 i++, p = NextMemoryDescriptor(p, dsz)) {
153 if (p->Type != EfiConventionalMemory &&
154 p->Type != EfiLoaderData)
155 break;
156
157 if (p->PhysicalStart != end)
158 break;
159
160 end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE;
161
162 available_pages += p->NumberOfPages;
163 }
164
165 if (*nr_pages > available_pages) {
166 printf("Staging area's size is reduced: %ld -> %ld!\n",
167 *nr_pages, available_pages);
168 *nr_pages = available_pages;
169 }
170 out:
171 free(map);
172 }
173 #endif /* __amd64__ */
174
175 #if defined(__arm__)
176 #define DEFAULT_EFI_STAGING_SIZE 32
177 #else
178 #define DEFAULT_EFI_STAGING_SIZE 64
179 #endif
180 #ifndef EFI_STAGING_SIZE
181 #define EFI_STAGING_SIZE DEFAULT_EFI_STAGING_SIZE
182 #endif
183
184 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \
185 defined(__riscv) || defined(__i386__)
186 #define EFI_STAGING_2M_ALIGN 1
187 #else
188 #define EFI_STAGING_2M_ALIGN 0
189 #endif
190
191 #if defined(__amd64__) || defined(__i386__)
192 #define EFI_STAGING_SLOP M(8)
193 #else
194 #define EFI_STAGING_SLOP 0
195 #endif
196
197 static u_long staging_slop = EFI_STAGING_SLOP;
198
199 EFI_PHYSICAL_ADDRESS staging, staging_end, staging_base;
200 bool stage_offset_set = false;
201 ssize_t stage_offset;
202
203 static void
efi_copy_free(void)204 efi_copy_free(void)
205 {
206 BS->FreePages(staging_base, (staging_end - staging_base) /
207 EFI_PAGE_SIZE);
208 stage_offset_set = false;
209 stage_offset = 0;
210 }
211
212 #if defined(__amd64__) || defined(__i386__)
213 int copy_staging = COPY_STAGING_AUTO;
214
215 static int
command_copy_staging(int argc,char * argv[])216 command_copy_staging(int argc, char *argv[])
217 {
218 static const char *const mode[3] = {
219 [COPY_STAGING_ENABLE] = "enable",
220 [COPY_STAGING_DISABLE] = "disable",
221 [COPY_STAGING_AUTO] = "auto",
222 };
223 int prev;
224
225 if (argc > 2) {
226 goto usage;
227 } else if (argc == 2) {
228 prev = copy_staging;
229 if (strcmp(argv[1], "enable") == 0)
230 copy_staging = COPY_STAGING_ENABLE;
231 else if (strcmp(argv[1], "disable") == 0)
232 copy_staging = COPY_STAGING_DISABLE;
233 else if (strcmp(argv[1], "auto") == 0)
234 copy_staging = COPY_STAGING_AUTO;
235 else
236 goto usage;
237 if (prev != copy_staging) {
238 printf("changed copy_staging, unloading kernel\n");
239 unload();
240 efi_copy_free();
241 efi_copy_init();
242 }
243 } else {
244 printf("copy staging: %s\n", mode[copy_staging]);
245 }
246 return (CMD_OK);
247
248 usage:
249 command_errmsg = "usage: copy_staging enable|disable|auto";
250 return (CMD_ERROR);
251 }
252 COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging);
253 #endif
254
255 static int
command_staging_slop(int argc,char * argv[])256 command_staging_slop(int argc, char *argv[])
257 {
258 char *endp;
259 u_long new, prev;
260
261 if (argc > 2) {
262 goto err;
263 } else if (argc == 2) {
264 new = strtoul(argv[1], &endp, 0);
265 if (*endp != '\0')
266 goto err;
267 if (staging_slop != new) {
268 staging_slop = new;
269 printf("changed slop, unloading kernel\n");
270
271 unload();
272 efi_copy_free();
273 efi_copy_init();
274 }
275 } else {
276 printf("staging slop %#lx\n", staging_slop);
277 }
278 return (CMD_OK);
279
280 err:
281 command_errmsg = "invalid slop value";
282 return (CMD_ERROR);
283 }
284 COMMAND_SET(staging_slop, "staging_slop", "set staging slop",
285 command_staging_slop);
286
287 #if defined(__amd64__) || defined(__i386__)
288 /*
289 * The staging area must reside in the first 1GB or 4GB physical
290 * memory: see elf64_exec() in
291 * boot/efi/loader/arch/amd64/elf64_freebsd.c.
292 */
293 static EFI_PHYSICAL_ADDRESS
get_staging_max(void)294 get_staging_max(void)
295 {
296 EFI_PHYSICAL_ADDRESS res;
297
298 res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4);
299 return (res);
300 }
301 #define EFI_ALLOC_METHOD AllocateMaxAddress
302 #else
303 #define EFI_ALLOC_METHOD AllocateAnyPages
304 #endif
305
306 int
efi_copy_init(void)307 efi_copy_init(void)
308 {
309 EFI_STATUS status;
310 unsigned long nr_pages;
311 vm_offset_t ess;
312
313 ess = EFI_STAGING_SIZE;
314 if (ess < DEFAULT_EFI_STAGING_SIZE)
315 ess = DEFAULT_EFI_STAGING_SIZE;
316 nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess);
317
318 #if defined(__amd64__)
319 /*
320 * We'll decrease nr_pages, if it's too big. Currently we only
321 * apply this to FreeBSD VM running on Hyper-V. Why? Please see
322 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28
323 */
324 if (running_on_hyperv())
325 efi_verify_staging_size(&nr_pages);
326 #endif
327 #if defined(__amd64__) || defined(__i386__)
328 staging = get_staging_max();
329 #endif
330 status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode,
331 nr_pages, &staging);
332 if (EFI_ERROR(status)) {
333 printf("failed to allocate staging area: %lu\n",
334 EFI_ERROR_CODE(status));
335 return (status);
336 }
337 staging_base = staging;
338 staging_end = staging + nr_pages * EFI_PAGE_SIZE;
339
340 #if EFI_STAGING_2M_ALIGN
341 /*
342 * Round the kernel load address to a 2MiB value. This is needed
343 * because the kernel builds a page table based on where it has
344 * been loaded in physical address space. As the kernel will use
345 * either a 1MiB or 2MiB page for this we need to make sure it
346 * is correctly aligned for both cases.
347 */
348 staging = roundup2(staging, M(2));
349 #endif
350
351 return (0);
352 }
353
354 static bool
efi_check_space(vm_offset_t end)355 efi_check_space(vm_offset_t end)
356 {
357 EFI_PHYSICAL_ADDRESS addr, new_base, new_staging;
358 EFI_STATUS status;
359 unsigned long nr_pages;
360
361 end = roundup2(end, EFI_PAGE_SIZE);
362
363 /* There is already enough space */
364 if (end + staging_slop <= staging_end)
365 return (true);
366
367 if (!boot_services_active) {
368 if (end <= staging_end)
369 return (true);
370 panic("efi_check_space: cannot expand staging area "
371 "after boot services were exited\n");
372 }
373
374 /*
375 * Add slop at the end:
376 * 1. amd64 kernel expects to do some very early allocations
377 * by carving out memory after kernend. Slop guarantees
378 * that it does not ovewrite anything useful.
379 * 2. It seems that initial calculation of the staging size
380 * could be somewhat smaller than actually copying in after
381 * boot services are exited. Slop avoids calling
382 * BS->AllocatePages() when it cannot work.
383 */
384 end += staging_slop;
385
386 nr_pages = EFI_SIZE_TO_PAGES(end - staging_end);
387 #if defined(__amd64__) || defined(__i386__)
388 /*
389 * The amd64 kernel needs all memory to be allocated under the 1G or
390 * 4G boundary.
391 */
392 if (end > get_staging_max())
393 goto before_staging;
394 #endif
395
396 /* Try to allocate more space after the previous allocation */
397 addr = staging_end;
398 status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages,
399 &addr);
400 if (!EFI_ERROR(status)) {
401 staging_end = staging_end + nr_pages * EFI_PAGE_SIZE;
402 return (true);
403 }
404
405 before_staging:
406 /* Try allocating space before the previous allocation */
407 if (staging < nr_pages * EFI_PAGE_SIZE)
408 goto expand;
409 addr = staging - nr_pages * EFI_PAGE_SIZE;
410 #if EFI_STAGING_2M_ALIGN
411 /* See efi_copy_init for why this is needed */
412 addr = rounddown2(addr, M(2));
413 #endif
414 nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr);
415 status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages,
416 &addr);
417 if (!EFI_ERROR(status)) {
418 /*
419 * Move the old allocation and update the state so
420 * translation still works.
421 */
422 staging_base = addr;
423 memmove((void *)(uintptr_t)staging_base,
424 (void *)(uintptr_t)staging, staging_end - staging);
425 stage_offset -= staging - staging_base;
426 staging = staging_base;
427 return (true);
428 }
429
430 expand:
431 nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging);
432 #if EFI_STAGING_2M_ALIGN
433 nr_pages += M(2) / EFI_PAGE_SIZE;
434 #endif
435 #if defined(__amd64__) || defined(__i386__)
436 new_base = get_staging_max();
437 #endif
438 status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode,
439 nr_pages, &new_base);
440 if (!EFI_ERROR(status)) {
441 #if EFI_STAGING_2M_ALIGN
442 new_staging = roundup2(new_base, M(2));
443 #else
444 new_staging = new_base;
445 #endif
446 /*
447 * Move the old allocation and update the state so
448 * translation still works.
449 */
450 memcpy((void *)(uintptr_t)new_staging,
451 (void *)(uintptr_t)staging, staging_end - staging);
452 BS->FreePages(staging_base, (staging_end - staging_base) /
453 EFI_PAGE_SIZE);
454 stage_offset -= staging - new_staging;
455 staging = new_staging;
456 staging_end = new_base + nr_pages * EFI_PAGE_SIZE;
457 staging_base = new_base;
458 return (true);
459 }
460
461 printf("efi_check_space: Unable to expand staging area\n");
462 return (false);
463 }
464
465 void *
efi_translate(vm_offset_t ptr)466 efi_translate(vm_offset_t ptr)
467 {
468
469 return ((void *)(ptr + stage_offset));
470 }
471
472 ssize_t
efi_copyin(const void * src,vm_offset_t dest,const size_t len)473 efi_copyin(const void *src, vm_offset_t dest, const size_t len)
474 {
475
476 if (!stage_offset_set) {
477 stage_offset = (vm_offset_t)staging - dest;
478 stage_offset_set = true;
479 }
480
481 /* XXX: Callers do not check for failure. */
482 if (!efi_check_space(dest + stage_offset + len)) {
483 errno = ENOMEM;
484 return (-1);
485 }
486 bcopy(src, (void *)(dest + stage_offset), len);
487 return (len);
488 }
489
490 ssize_t
efi_copyout(const vm_offset_t src,void * dest,const size_t len)491 efi_copyout(const vm_offset_t src, void *dest, const size_t len)
492 {
493
494 /* XXX: Callers do not check for failure. */
495 if (src + stage_offset + len > staging_end) {
496 errno = ENOMEM;
497 return (-1);
498 }
499 bcopy((void *)(src + stage_offset), dest, len);
500 return (len);
501 }
502
503 ssize_t
efi_readin(readin_handle_t fd,vm_offset_t dest,const size_t len)504 efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
505 {
506
507 if (!stage_offset_set) {
508 stage_offset = (vm_offset_t)staging - dest;
509 stage_offset_set = true;
510 }
511
512 if (!efi_check_space(dest + stage_offset + len)) {
513 errno = ENOMEM;
514 return (-1);
515 }
516 return (VECTX_READ(fd, (void *)(dest + stage_offset), len));
517 }
518
519 void
efi_copy_finish(void)520 efi_copy_finish(void)
521 {
522 uint64_t *src, *dst, *last;
523
524 src = (uint64_t *)(uintptr_t)staging;
525 dst = (uint64_t *)(uintptr_t)(staging - stage_offset);
526 last = (uint64_t *)(uintptr_t)staging_end;
527
528 while (src < last)
529 *dst++ = *src++;
530 }
531
532 void
efi_copy_finish_nop(void)533 efi_copy_finish_nop(void)
534 {
535 }
536