1 /*- 2 * Copyright (c) 2013 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Benno Rice under sponsorship from 6 * the FreeBSD Foundation. 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 34 #include <stand.h> 35 #include <bootstrap.h> 36 37 #include <efi.h> 38 #include <efilib.h> 39 40 #include "loader_efi.h" 41 42 #define M(x) ((x) * 1024 * 1024) 43 #define G(x) (1UL * (x) * 1024 * 1024 * 1024) 44 45 #if defined(__i386__) || defined(__amd64__) 46 #include <machine/cpufunc.h> 47 #include <machine/specialreg.h> 48 #include <machine/vmparam.h> 49 50 /* 51 * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(), 52 * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify(). 53 */ 54 #define CPUID_LEAF_HV_MAXLEAF 0x40000000 55 #define CPUID_LEAF_HV_INTERFACE 0x40000001 56 #define CPUID_LEAF_HV_FEATURES 0x40000003 57 #define CPUID_LEAF_HV_LIMITS 0x40000005 58 #define CPUID_HV_IFACE_HYPERV 0x31237648 /* HV#1 */ 59 #define CPUID_HV_MSR_TIME_REFCNT 0x0002 /* MSR_HV_TIME_REF_COUNT */ 60 #define CPUID_HV_MSR_HYPERCALL 0x0020 61 62 static int 63 running_on_hyperv(void) 64 { 65 char hv_vendor[16]; 66 uint32_t regs[4]; 67 68 do_cpuid(1, regs); 69 if ((regs[2] & CPUID2_HV) == 0) 70 return (0); 71 72 do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs); 73 if (regs[0] < CPUID_LEAF_HV_LIMITS) 74 return (0); 75 76 ((uint32_t *)&hv_vendor)[0] = regs[1]; 77 ((uint32_t *)&hv_vendor)[1] = regs[2]; 78 ((uint32_t *)&hv_vendor)[2] = regs[3]; 79 hv_vendor[12] = '\0'; 80 if (strcmp(hv_vendor, "Microsoft Hv") != 0) 81 return (0); 82 83 do_cpuid(CPUID_LEAF_HV_INTERFACE, regs); 84 if (regs[0] != CPUID_HV_IFACE_HYPERV) 85 return (0); 86 87 do_cpuid(CPUID_LEAF_HV_FEATURES, regs); 88 if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) 89 return (0); 90 if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0) 91 return (0); 92 93 return (1); 94 } 95 96 static void 97 efi_verify_staging_size(unsigned long *nr_pages) 98 { 99 UINTN sz; 100 EFI_MEMORY_DESCRIPTOR *map = NULL, *p; 101 EFI_PHYSICAL_ADDRESS start, end; 102 UINTN key, dsz; 103 UINT32 dver; 104 EFI_STATUS status; 105 int i, ndesc; 106 unsigned long available_pages = 0; 107 108 sz = 0; 109 110 for (;;) { 111 status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver); 112 if (!EFI_ERROR(status)) 113 break; 114 115 if (status != EFI_BUFFER_TOO_SMALL) { 116 printf("Can't read memory map: %lu\n", 117 EFI_ERROR_CODE(status)); 118 goto out; 119 } 120 121 free(map); 122 123 /* Allocate 10 descriptors more than the size reported, 124 * to allow for any fragmentation caused by calling 125 * malloc */ 126 map = malloc(sz + (10 * dsz)); 127 if (map == NULL) { 128 printf("Unable to allocate memory\n"); 129 goto out; 130 } 131 } 132 133 ndesc = sz / dsz; 134 for (i = 0, p = map; i < ndesc; 135 i++, p = NextMemoryDescriptor(p, dsz)) { 136 start = p->PhysicalStart; 137 end = start + p->NumberOfPages * EFI_PAGE_SIZE; 138 139 if (KERNLOAD < start || KERNLOAD >= end) 140 continue; 141 142 available_pages = p->NumberOfPages - 143 ((KERNLOAD - start) >> EFI_PAGE_SHIFT); 144 break; 145 } 146 147 if (available_pages == 0) { 148 printf("Can't find valid memory map for staging area!\n"); 149 goto out; 150 } 151 152 i++; 153 p = NextMemoryDescriptor(p, dsz); 154 155 for ( ; i < ndesc; 156 i++, p = NextMemoryDescriptor(p, dsz)) { 157 if (p->Type != EfiConventionalMemory && 158 p->Type != EfiLoaderData) 159 break; 160 161 if (p->PhysicalStart != end) 162 break; 163 164 end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE; 165 166 available_pages += p->NumberOfPages; 167 } 168 169 if (*nr_pages > available_pages) { 170 printf("Staging area's size is reduced: %ld -> %ld!\n", 171 *nr_pages, available_pages); 172 *nr_pages = available_pages; 173 } 174 out: 175 free(map); 176 } 177 #endif /* __i386__ || __amd64__ */ 178 179 #if defined(__arm__) 180 #define DEFAULT_EFI_STAGING_SIZE 32 181 #else 182 #define DEFAULT_EFI_STAGING_SIZE 64 183 #endif 184 #ifndef EFI_STAGING_SIZE 185 #define EFI_STAGING_SIZE DEFAULT_EFI_STAGING_SIZE 186 #endif 187 188 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ 189 defined(__riscv) 190 #define EFI_STAGING_2M_ALIGN 1 191 #else 192 #define EFI_STAGING_2M_ALIGN 0 193 #endif 194 195 #if defined(__amd64__) 196 #define EFI_STAGING_SLOP M(8) 197 #else 198 #define EFI_STAGING_SLOP 0 199 #endif 200 201 static u_long staging_slop = EFI_STAGING_SLOP; 202 203 EFI_PHYSICAL_ADDRESS staging, staging_end, staging_base; 204 int stage_offset_set = 0; 205 ssize_t stage_offset; 206 207 static void 208 efi_copy_free(void) 209 { 210 BS->FreePages(staging_base, (staging_end - staging_base) / 211 EFI_PAGE_SIZE); 212 stage_offset_set = 0; 213 stage_offset = 0; 214 } 215 216 #ifdef __amd64__ 217 int copy_staging = COPY_STAGING_AUTO; 218 219 static int 220 command_copy_staging(int argc, char *argv[]) 221 { 222 static const char *const mode[3] = { 223 [COPY_STAGING_ENABLE] = "enable", 224 [COPY_STAGING_DISABLE] = "disable", 225 [COPY_STAGING_AUTO] = "auto", 226 }; 227 int prev, res; 228 229 res = CMD_OK; 230 if (argc > 2) { 231 res = CMD_ERROR; 232 } else if (argc == 2) { 233 prev = copy_staging; 234 if (strcmp(argv[1], "enable") == 0) 235 copy_staging = COPY_STAGING_ENABLE; 236 else if (strcmp(argv[1], "disable") == 0) 237 copy_staging = COPY_STAGING_DISABLE; 238 else if (strcmp(argv[1], "auto") == 0) 239 copy_staging = COPY_STAGING_AUTO; 240 else { 241 printf("usage: copy_staging enable|disable|auto\n"); 242 res = CMD_ERROR; 243 } 244 if (res == CMD_OK && prev != copy_staging) { 245 printf("changed copy_staging, unloading kernel\n"); 246 unload(); 247 efi_copy_free(); 248 efi_copy_init(); 249 } 250 } else { 251 printf("copy staging: %s\n", mode[copy_staging]); 252 } 253 return (res); 254 } 255 COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging); 256 #endif 257 258 static int 259 command_staging_slop(int argc, char *argv[]) 260 { 261 char *endp; 262 u_long new, prev; 263 int res; 264 265 res = CMD_OK; 266 if (argc > 2) { 267 res = CMD_ERROR; 268 } else if (argc == 2) { 269 new = strtoul(argv[1], &endp, 0); 270 if (*endp != '\0') { 271 printf("invalid slop value\n"); 272 res = CMD_ERROR; 273 } 274 if (res == CMD_OK && staging_slop != new) { 275 printf("changed slop, unloading kernel\n"); 276 unload(); 277 efi_copy_free(); 278 efi_copy_init(); 279 } 280 } else { 281 printf("staging slop %#lx\n", staging_slop); 282 } 283 return (res); 284 } 285 COMMAND_SET(staging_slop, "staging_slop", "set staging slop", 286 command_staging_slop); 287 288 #if defined(__i386__) || defined(__amd64__) 289 /* 290 * The staging area must reside in the the first 1GB or 4GB physical 291 * memory: see elf64_exec() in 292 * boot/efi/loader/arch/amd64/elf64_freebsd.c. 293 */ 294 static EFI_PHYSICAL_ADDRESS 295 get_staging_max(void) 296 { 297 EFI_PHYSICAL_ADDRESS res; 298 299 #if defined(__i386__) 300 res = G(1); 301 #elif defined(__amd64__) 302 res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4); 303 #endif 304 return (res); 305 } 306 #define EFI_ALLOC_METHOD AllocateMaxAddress 307 #else 308 #define EFI_ALLOC_METHOD AllocateAnyPages 309 #endif 310 311 int 312 efi_copy_init(void) 313 { 314 EFI_STATUS status; 315 unsigned long nr_pages; 316 vm_offset_t ess; 317 318 ess = EFI_STAGING_SIZE; 319 if (ess < DEFAULT_EFI_STAGING_SIZE) 320 ess = DEFAULT_EFI_STAGING_SIZE; 321 nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess); 322 323 #if defined(__i386__) || defined(__amd64__) 324 /* 325 * We'll decrease nr_pages, if it's too big. Currently we only 326 * apply this to FreeBSD VM running on Hyper-V. Why? Please see 327 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28 328 */ 329 if (running_on_hyperv()) 330 efi_verify_staging_size(&nr_pages); 331 332 staging = get_staging_max(); 333 #endif 334 status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderData, 335 nr_pages, &staging); 336 if (EFI_ERROR(status)) { 337 printf("failed to allocate staging area: %lu\n", 338 EFI_ERROR_CODE(status)); 339 return (status); 340 } 341 staging_base = staging; 342 staging_end = staging + nr_pages * EFI_PAGE_SIZE; 343 344 #if EFI_STAGING_2M_ALIGN 345 /* 346 * Round the kernel load address to a 2MiB value. This is needed 347 * because the kernel builds a page table based on where it has 348 * been loaded in physical address space. As the kernel will use 349 * either a 1MiB or 2MiB page for this we need to make sure it 350 * is correctly aligned for both cases. 351 */ 352 staging = roundup2(staging, M(2)); 353 #endif 354 355 return (0); 356 } 357 358 static bool 359 efi_check_space(vm_offset_t end) 360 { 361 EFI_PHYSICAL_ADDRESS addr, new_base, new_staging; 362 EFI_STATUS status; 363 unsigned long nr_pages; 364 365 end = roundup2(end, EFI_PAGE_SIZE); 366 367 /* There is already enough space */ 368 if (end + staging_slop <= staging_end) 369 return (true); 370 371 if (!boot_services_active) { 372 if (end <= staging_end) 373 return (true); 374 panic("efi_check_space: cannot expand staging area " 375 "after boot services were exited\n"); 376 } 377 378 /* 379 * Add slop at the end: 380 * 1. amd64 kernel expects to do some very early allocations 381 * by carving out memory after kernend. Slop guarantees 382 * that it does not ovewrite anything useful. 383 * 2. It seems that initial calculation of the staging size 384 * could be somewhat smaller than actually copying in after 385 * boot services are exited. Slop avoids calling 386 * BS->AllocatePages() when it cannot work. 387 */ 388 end += staging_slop; 389 390 nr_pages = EFI_SIZE_TO_PAGES(end - staging_end); 391 #if defined(__i386__) || defined(__amd64__) 392 /* 393 * i386 needs all memory to be allocated under the 1G boundary. 394 * amd64 needs all memory to be allocated under the 1G or 4G boundary. 395 */ 396 if (end > get_staging_max()) 397 goto before_staging; 398 #endif 399 400 /* Try to allocate more space after the previous allocation */ 401 addr = staging_end; 402 status = BS->AllocatePages(AllocateAddress, EfiLoaderData, nr_pages, 403 &addr); 404 if (!EFI_ERROR(status)) { 405 staging_end = staging_end + nr_pages * EFI_PAGE_SIZE; 406 return (true); 407 } 408 409 before_staging: 410 /* Try allocating space before the previous allocation */ 411 if (staging < nr_pages * EFI_PAGE_SIZE) 412 goto expand; 413 addr = staging - nr_pages * EFI_PAGE_SIZE; 414 #if EFI_STAGING_2M_ALIGN 415 /* See efi_copy_init for why this is needed */ 416 addr = rounddown2(addr, M(2)); 417 #endif 418 nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr); 419 status = BS->AllocatePages(AllocateAddress, EfiLoaderData, nr_pages, 420 &addr); 421 if (!EFI_ERROR(status)) { 422 /* 423 * Move the old allocation and update the state so 424 * translation still works. 425 */ 426 staging_base = addr; 427 memmove((void *)(uintptr_t)staging_base, 428 (void *)(uintptr_t)staging, staging_end - staging); 429 stage_offset -= staging - staging_base; 430 staging = staging_base; 431 return (true); 432 } 433 434 expand: 435 nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging); 436 #if EFI_STAGING_2M_ALIGN 437 nr_pages += M(2) / EFI_PAGE_SIZE; 438 #endif 439 #if defined(__i386__) || defined(__amd64__) 440 new_base = get_staging_max(); 441 #endif 442 status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderData, 443 nr_pages, &new_base); 444 if (!EFI_ERROR(status)) { 445 #if EFI_STAGING_2M_ALIGN 446 new_staging = roundup2(new_base, M(2)); 447 #else 448 new_staging = new_base; 449 #endif 450 /* 451 * Move the old allocation and update the state so 452 * translation still works. 453 */ 454 memcpy((void *)(uintptr_t)new_staging, 455 (void *)(uintptr_t)staging, staging_end - staging); 456 BS->FreePages(staging_base, (staging_end - staging_base) / 457 EFI_PAGE_SIZE); 458 stage_offset -= staging - new_staging; 459 staging = new_staging; 460 staging_end = new_base + nr_pages * EFI_PAGE_SIZE; 461 staging_base = new_base; 462 return (true); 463 } 464 465 printf("efi_check_space: Unable to expand staging area\n"); 466 return (false); 467 } 468 469 void * 470 efi_translate(vm_offset_t ptr) 471 { 472 473 return ((void *)(ptr + stage_offset)); 474 } 475 476 ssize_t 477 efi_copyin(const void *src, vm_offset_t dest, const size_t len) 478 { 479 480 if (!stage_offset_set) { 481 stage_offset = (vm_offset_t)staging - dest; 482 stage_offset_set = 1; 483 } 484 485 /* XXX: Callers do not check for failure. */ 486 if (!efi_check_space(dest + stage_offset + len)) { 487 errno = ENOMEM; 488 return (-1); 489 } 490 bcopy(src, (void *)(dest + stage_offset), len); 491 return (len); 492 } 493 494 ssize_t 495 efi_copyout(const vm_offset_t src, void *dest, const size_t len) 496 { 497 498 /* XXX: Callers do not check for failure. */ 499 if (src + stage_offset + len > staging_end) { 500 errno = ENOMEM; 501 return (-1); 502 } 503 bcopy((void *)(src + stage_offset), dest, len); 504 return (len); 505 } 506 507 ssize_t 508 efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len) 509 { 510 511 if (!stage_offset_set) { 512 stage_offset = (vm_offset_t)staging - dest; 513 stage_offset_set = 1; 514 } 515 516 if (!efi_check_space(dest + stage_offset + len)) { 517 errno = ENOMEM; 518 return (-1); 519 } 520 return (VECTX_READ(fd, (void *)(dest + stage_offset), len)); 521 } 522 523 void 524 efi_copy_finish(void) 525 { 526 uint64_t *src, *dst, *last; 527 528 src = (uint64_t *)(uintptr_t)staging; 529 dst = (uint64_t *)(uintptr_t)(staging - stage_offset); 530 last = (uint64_t *)(uintptr_t)staging_end; 531 532 while (src < last) 533 *dst++ = *src++; 534 } 535 536 void 537 efi_copy_finish_nop(void) 538 { 539 } 540