1 /*- 2 * Copyright (c) 2013 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Benno Rice under sponsorship from 6 * the FreeBSD Foundation. 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 34 #include <stand.h> 35 #include <bootstrap.h> 36 37 #include <efi.h> 38 #include <efilib.h> 39 40 #include "loader_efi.h" 41 42 #define M(x) ((x) * 1024 * 1024) 43 #define G(x) (1UL * (x) * 1024 * 1024 * 1024) 44 45 extern int boot_services_gone; 46 47 #if defined(__i386__) || defined(__amd64__) 48 #include <machine/cpufunc.h> 49 #include <machine/specialreg.h> 50 #include <machine/vmparam.h> 51 52 /* 53 * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(), 54 * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify(). 55 */ 56 #define CPUID_LEAF_HV_MAXLEAF 0x40000000 57 #define CPUID_LEAF_HV_INTERFACE 0x40000001 58 #define CPUID_LEAF_HV_FEATURES 0x40000003 59 #define CPUID_LEAF_HV_LIMITS 0x40000005 60 #define CPUID_HV_IFACE_HYPERV 0x31237648 /* HV#1 */ 61 #define CPUID_HV_MSR_TIME_REFCNT 0x0002 /* MSR_HV_TIME_REF_COUNT */ 62 #define CPUID_HV_MSR_HYPERCALL 0x0020 63 64 static int 65 running_on_hyperv(void) 66 { 67 char hv_vendor[16]; 68 uint32_t regs[4]; 69 70 do_cpuid(1, regs); 71 if ((regs[2] & CPUID2_HV) == 0) 72 return (0); 73 74 do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs); 75 if (regs[0] < CPUID_LEAF_HV_LIMITS) 76 return (0); 77 78 ((uint32_t *)&hv_vendor)[0] = regs[1]; 79 ((uint32_t *)&hv_vendor)[1] = regs[2]; 80 ((uint32_t *)&hv_vendor)[2] = regs[3]; 81 hv_vendor[12] = '\0'; 82 if (strcmp(hv_vendor, "Microsoft Hv") != 0) 83 return (0); 84 85 do_cpuid(CPUID_LEAF_HV_INTERFACE, regs); 86 if (regs[0] != CPUID_HV_IFACE_HYPERV) 87 return (0); 88 89 do_cpuid(CPUID_LEAF_HV_FEATURES, regs); 90 if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) 91 return (0); 92 if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0) 93 return (0); 94 95 return (1); 96 } 97 98 static void 99 efi_verify_staging_size(unsigned long *nr_pages) 100 { 101 UINTN sz; 102 EFI_MEMORY_DESCRIPTOR *map = NULL, *p; 103 EFI_PHYSICAL_ADDRESS start, end; 104 UINTN key, dsz; 105 UINT32 dver; 106 EFI_STATUS status; 107 int i, ndesc; 108 unsigned long available_pages = 0; 109 110 sz = 0; 111 112 for (;;) { 113 status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver); 114 if (!EFI_ERROR(status)) 115 break; 116 117 if (status != EFI_BUFFER_TOO_SMALL) { 118 printf("Can't read memory map: %lu\n", 119 EFI_ERROR_CODE(status)); 120 goto out; 121 } 122 123 free(map); 124 125 /* Allocate 10 descriptors more than the size reported, 126 * to allow for any fragmentation caused by calling 127 * malloc */ 128 map = malloc(sz + (10 * dsz)); 129 if (map == NULL) { 130 printf("Unable to allocate memory\n"); 131 goto out; 132 } 133 } 134 135 ndesc = sz / dsz; 136 for (i = 0, p = map; i < ndesc; 137 i++, p = NextMemoryDescriptor(p, dsz)) { 138 start = p->PhysicalStart; 139 end = start + p->NumberOfPages * EFI_PAGE_SIZE; 140 141 if (KERNLOAD < start || KERNLOAD >= end) 142 continue; 143 144 available_pages = p->NumberOfPages - 145 ((KERNLOAD - start) >> EFI_PAGE_SHIFT); 146 break; 147 } 148 149 if (available_pages == 0) { 150 printf("Can't find valid memory map for staging area!\n"); 151 goto out; 152 } 153 154 i++; 155 p = NextMemoryDescriptor(p, dsz); 156 157 for ( ; i < ndesc; 158 i++, p = NextMemoryDescriptor(p, dsz)) { 159 if (p->Type != EfiConventionalMemory && 160 p->Type != EfiLoaderData) 161 break; 162 163 if (p->PhysicalStart != end) 164 break; 165 166 end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE; 167 168 available_pages += p->NumberOfPages; 169 } 170 171 if (*nr_pages > available_pages) { 172 printf("Staging area's size is reduced: %ld -> %ld!\n", 173 *nr_pages, available_pages); 174 *nr_pages = available_pages; 175 } 176 out: 177 free(map); 178 } 179 #endif /* __i386__ || __amd64__ */ 180 181 #if defined(__arm__) 182 #define DEFAULT_EFI_STAGING_SIZE 32 183 #else 184 #define DEFAULT_EFI_STAGING_SIZE 64 185 #endif 186 #ifndef EFI_STAGING_SIZE 187 #define EFI_STAGING_SIZE DEFAULT_EFI_STAGING_SIZE 188 #endif 189 190 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ 191 defined(__riscv) 192 #define EFI_STAGING_2M_ALIGN 1 193 #else 194 #define EFI_STAGING_2M_ALIGN 0 195 #endif 196 197 #if defined(__amd64__) 198 #define EFI_STAGING_SLOP M(8) 199 #else 200 #define EFI_STAGING_SLOP 0 201 #endif 202 203 static u_long staging_slop = EFI_STAGING_SLOP; 204 205 EFI_PHYSICAL_ADDRESS staging, staging_end, staging_base; 206 int stage_offset_set = 0; 207 ssize_t stage_offset; 208 209 static void 210 efi_copy_free(void) 211 { 212 BS->FreePages(staging_base, (staging_end - staging_base) / 213 EFI_PAGE_SIZE); 214 stage_offset_set = 0; 215 stage_offset = 0; 216 } 217 218 #ifdef __amd64__ 219 int copy_staging = COPY_STAGING_AUTO; 220 221 static int 222 command_copy_staging(int argc, char *argv[]) 223 { 224 static const char *const mode[3] = { 225 [COPY_STAGING_ENABLE] = "enable", 226 [COPY_STAGING_DISABLE] = "disable", 227 [COPY_STAGING_AUTO] = "auto", 228 }; 229 int prev, res; 230 231 res = CMD_OK; 232 if (argc > 2) { 233 res = CMD_ERROR; 234 } else if (argc == 2) { 235 prev = copy_staging; 236 if (strcmp(argv[1], "enable") == 0) 237 copy_staging = COPY_STAGING_ENABLE; 238 else if (strcmp(argv[1], "disable") == 0) 239 copy_staging = COPY_STAGING_DISABLE; 240 else if (strcmp(argv[1], "auto") == 0) 241 copy_staging = COPY_STAGING_AUTO; 242 else { 243 printf("usage: copy_staging enable|disable|auto\n"); 244 res = CMD_ERROR; 245 } 246 if (res == CMD_OK && prev != copy_staging) { 247 printf("changed copy_staging, unloading kernel\n"); 248 unload(); 249 efi_copy_free(); 250 efi_copy_init(); 251 } 252 } else { 253 printf("copy staging: %s\n", mode[copy_staging]); 254 } 255 return (res); 256 } 257 COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging); 258 #endif 259 260 static int 261 command_staging_slop(int argc, char *argv[]) 262 { 263 char *endp; 264 u_long new, prev; 265 int res; 266 267 res = CMD_OK; 268 if (argc > 2) { 269 res = CMD_ERROR; 270 } else if (argc == 2) { 271 new = strtoul(argv[1], &endp, 0); 272 if (*endp != '\0') { 273 printf("invalid slop value\n"); 274 res = CMD_ERROR; 275 } 276 if (res == CMD_OK && staging_slop != new) { 277 printf("changed slop, unloading kernel\n"); 278 unload(); 279 efi_copy_free(); 280 efi_copy_init(); 281 } 282 } else { 283 printf("staging slop %#lx\n", staging_slop); 284 } 285 return (res); 286 } 287 COMMAND_SET(staging_slop, "staging_slop", "set staging slop", 288 command_staging_slop); 289 290 #if defined(__i386__) || defined(__amd64__) 291 /* 292 * The staging area must reside in the the first 1GB or 4GB physical 293 * memory: see elf64_exec() in 294 * boot/efi/loader/arch/amd64/elf64_freebsd.c. 295 */ 296 static EFI_PHYSICAL_ADDRESS 297 get_staging_max(void) 298 { 299 EFI_PHYSICAL_ADDRESS res; 300 301 #if defined(__i386__) 302 res = G(1); 303 #elif defined(__amd64__) 304 res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4); 305 #endif 306 return (res); 307 } 308 #define EFI_ALLOC_METHOD AllocateMaxAddress 309 #else 310 #define EFI_ALLOC_METHOD AllocateAnyPages 311 #endif 312 313 int 314 efi_copy_init(void) 315 { 316 EFI_STATUS status; 317 unsigned long nr_pages; 318 vm_offset_t ess; 319 320 ess = EFI_STAGING_SIZE; 321 if (ess < DEFAULT_EFI_STAGING_SIZE) 322 ess = DEFAULT_EFI_STAGING_SIZE; 323 nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess); 324 325 #if defined(__i386__) || defined(__amd64__) 326 /* 327 * We'll decrease nr_pages, if it's too big. Currently we only 328 * apply this to FreeBSD VM running on Hyper-V. Why? Please see 329 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28 330 */ 331 if (running_on_hyperv()) 332 efi_verify_staging_size(&nr_pages); 333 334 staging = get_staging_max(); 335 #endif 336 status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderData, 337 nr_pages, &staging); 338 if (EFI_ERROR(status)) { 339 printf("failed to allocate staging area: %lu\n", 340 EFI_ERROR_CODE(status)); 341 return (status); 342 } 343 staging_base = staging; 344 staging_end = staging + nr_pages * EFI_PAGE_SIZE; 345 346 #if EFI_STAGING_2M_ALIGN 347 /* 348 * Round the kernel load address to a 2MiB value. This is needed 349 * because the kernel builds a page table based on where it has 350 * been loaded in physical address space. As the kernel will use 351 * either a 1MiB or 2MiB page for this we need to make sure it 352 * is correctly aligned for both cases. 353 */ 354 staging = roundup2(staging, M(2)); 355 #endif 356 357 return (0); 358 } 359 360 static bool 361 efi_check_space(vm_offset_t end) 362 { 363 EFI_PHYSICAL_ADDRESS addr, new_base, new_staging; 364 EFI_STATUS status; 365 unsigned long nr_pages; 366 367 end = roundup2(end, EFI_PAGE_SIZE); 368 369 /* There is already enough space */ 370 if (end + staging_slop <= staging_end) 371 return (true); 372 373 if (boot_services_gone) { 374 if (end <= staging_end) 375 return (true); 376 panic("efi_check_space: cannot expand staging area " 377 "after boot services were exited\n"); 378 } 379 380 /* 381 * Add slop at the end: 382 * 1. amd64 kernel expects to do some very early allocations 383 * by carving out memory after kernend. Slop guarantees 384 * that it does not ovewrite anything useful. 385 * 2. It seems that initial calculation of the staging size 386 * could be somewhat smaller than actually copying in after 387 * boot services are exited. Slop avoids calling 388 * BS->AllocatePages() when it cannot work. 389 */ 390 end += staging_slop; 391 392 nr_pages = EFI_SIZE_TO_PAGES(end - staging_end); 393 #if defined(__i386__) || defined(__amd64__) 394 /* 395 * i386 needs all memory to be allocated under the 1G boundary. 396 * amd64 needs all memory to be allocated under the 1G or 4G boundary. 397 */ 398 if (end > get_staging_max()) 399 goto before_staging; 400 #endif 401 402 /* Try to allocate more space after the previous allocation */ 403 addr = staging_end; 404 status = BS->AllocatePages(AllocateAddress, EfiLoaderData, nr_pages, 405 &addr); 406 if (!EFI_ERROR(status)) { 407 staging_end = staging_end + nr_pages * EFI_PAGE_SIZE; 408 return (true); 409 } 410 411 before_staging: 412 /* Try allocating space before the previous allocation */ 413 if (staging < nr_pages * EFI_PAGE_SIZE) 414 goto expand; 415 addr = staging - nr_pages * EFI_PAGE_SIZE; 416 #if EFI_STAGING_2M_ALIGN 417 /* See efi_copy_init for why this is needed */ 418 addr = rounddown2(addr, M(2)); 419 #endif 420 nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr); 421 status = BS->AllocatePages(AllocateAddress, EfiLoaderData, nr_pages, 422 &addr); 423 if (!EFI_ERROR(status)) { 424 /* 425 * Move the old allocation and update the state so 426 * translation still works. 427 */ 428 staging_base = addr; 429 memmove((void *)(uintptr_t)staging_base, 430 (void *)(uintptr_t)staging, staging_end - staging); 431 stage_offset -= staging - staging_base; 432 staging = staging_base; 433 return (true); 434 } 435 436 expand: 437 nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging); 438 #if EFI_STAGING_2M_ALIGN 439 nr_pages += M(2) / EFI_PAGE_SIZE; 440 #endif 441 #if defined(__i386__) || defined(__amd64__) 442 new_base = get_staging_max(); 443 #endif 444 status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderData, 445 nr_pages, &new_base); 446 if (!EFI_ERROR(status)) { 447 #if EFI_STAGING_2M_ALIGN 448 new_staging = roundup2(new_base, M(2)); 449 #else 450 new_staging = new_base; 451 #endif 452 /* 453 * Move the old allocation and update the state so 454 * translation still works. 455 */ 456 memcpy((void *)(uintptr_t)new_staging, 457 (void *)(uintptr_t)staging, staging_end - staging); 458 BS->FreePages(staging_base, (staging_end - staging_base) / 459 EFI_PAGE_SIZE); 460 stage_offset -= staging - new_staging; 461 staging = new_staging; 462 staging_end = new_base + nr_pages * EFI_PAGE_SIZE; 463 staging_base = new_base; 464 return (true); 465 } 466 467 printf("efi_check_space: Unable to expand staging area\n"); 468 return (false); 469 } 470 471 void * 472 efi_translate(vm_offset_t ptr) 473 { 474 475 return ((void *)(ptr + stage_offset)); 476 } 477 478 ssize_t 479 efi_copyin(const void *src, vm_offset_t dest, const size_t len) 480 { 481 482 if (!stage_offset_set) { 483 stage_offset = (vm_offset_t)staging - dest; 484 stage_offset_set = 1; 485 } 486 487 /* XXX: Callers do not check for failure. */ 488 if (!efi_check_space(dest + stage_offset + len)) { 489 errno = ENOMEM; 490 return (-1); 491 } 492 bcopy(src, (void *)(dest + stage_offset), len); 493 return (len); 494 } 495 496 ssize_t 497 efi_copyout(const vm_offset_t src, void *dest, const size_t len) 498 { 499 500 /* XXX: Callers do not check for failure. */ 501 if (src + stage_offset + len > staging_end) { 502 errno = ENOMEM; 503 return (-1); 504 } 505 bcopy((void *)(src + stage_offset), dest, len); 506 return (len); 507 } 508 509 ssize_t 510 efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len) 511 { 512 513 if (!stage_offset_set) { 514 stage_offset = (vm_offset_t)staging - dest; 515 stage_offset_set = 1; 516 } 517 518 if (!efi_check_space(dest + stage_offset + len)) { 519 errno = ENOMEM; 520 return (-1); 521 } 522 return (VECTX_READ(fd, (void *)(dest + stage_offset), len)); 523 } 524 525 void 526 efi_copy_finish(void) 527 { 528 uint64_t *src, *dst, *last; 529 530 src = (uint64_t *)(uintptr_t)staging; 531 dst = (uint64_t *)(uintptr_t)(staging - stage_offset); 532 last = (uint64_t *)(uintptr_t)staging_end; 533 534 while (src < last) 535 *dst++ = *src++; 536 } 537 538 void 539 efi_copy_finish_nop(void) 540 { 541 } 542