1 /*- 2 * Copyright (c) 2013 The FreeBSD Foundation 3 * 4 * This software was developed by Benno Rice under sponsorship from 5 * the FreeBSD Foundation. 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include <sys/param.h> 32 33 #include <stand.h> 34 #include <bootstrap.h> 35 36 #include <efi.h> 37 #include <efilib.h> 38 39 #include "loader_efi.h" 40 41 #define M(x) ((x) * 1024 * 1024) 42 #define G(x) (1UL * (x) * 1024 * 1024 * 1024) 43 44 #if defined(__i386__) || defined(__amd64__) 45 #include <machine/cpufunc.h> 46 #include <machine/specialreg.h> 47 #include <machine/vmparam.h> 48 49 /* 50 * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(), 51 * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify(). 52 */ 53 #define CPUID_LEAF_HV_MAXLEAF 0x40000000 54 #define CPUID_LEAF_HV_INTERFACE 0x40000001 55 #define CPUID_LEAF_HV_FEATURES 0x40000003 56 #define CPUID_LEAF_HV_LIMITS 0x40000005 57 #define CPUID_HV_IFACE_HYPERV 0x31237648 /* HV#1 */ 58 #define CPUID_HV_MSR_TIME_REFCNT 0x0002 /* MSR_HV_TIME_REF_COUNT */ 59 #define CPUID_HV_MSR_HYPERCALL 0x0020 60 61 static int 62 running_on_hyperv(void) 63 { 64 char hv_vendor[16]; 65 uint32_t regs[4]; 66 67 do_cpuid(1, regs); 68 if ((regs[2] & CPUID2_HV) == 0) 69 return (0); 70 71 do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs); 72 if (regs[0] < CPUID_LEAF_HV_LIMITS) 73 return (0); 74 75 ((uint32_t *)&hv_vendor)[0] = regs[1]; 76 ((uint32_t *)&hv_vendor)[1] = regs[2]; 77 ((uint32_t *)&hv_vendor)[2] = regs[3]; 78 hv_vendor[12] = '\0'; 79 if (strcmp(hv_vendor, "Microsoft Hv") != 0) 80 return (0); 81 82 do_cpuid(CPUID_LEAF_HV_INTERFACE, regs); 83 if (regs[0] != CPUID_HV_IFACE_HYPERV) 84 return (0); 85 86 do_cpuid(CPUID_LEAF_HV_FEATURES, regs); 87 if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) 88 return (0); 89 if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0) 90 return (0); 91 92 return (1); 93 } 94 95 static void 96 efi_verify_staging_size(unsigned long *nr_pages) 97 { 98 UINTN sz; 99 EFI_MEMORY_DESCRIPTOR *map = NULL, *p; 100 EFI_PHYSICAL_ADDRESS start, end; 101 UINTN key, dsz; 102 UINT32 dver; 103 EFI_STATUS status; 104 int i, ndesc; 105 unsigned long available_pages = 0; 106 107 sz = 0; 108 109 for (;;) { 110 status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver); 111 if (!EFI_ERROR(status)) 112 break; 113 114 if (status != EFI_BUFFER_TOO_SMALL) { 115 printf("Can't read memory map: %lu\n", 116 EFI_ERROR_CODE(status)); 117 goto out; 118 } 119 120 free(map); 121 122 /* Allocate 10 descriptors more than the size reported, 123 * to allow for any fragmentation caused by calling 124 * malloc */ 125 map = malloc(sz + (10 * dsz)); 126 if (map == NULL) { 127 printf("Unable to allocate memory\n"); 128 goto out; 129 } 130 } 131 132 ndesc = sz / dsz; 133 for (i = 0, p = map; i < ndesc; 134 i++, p = NextMemoryDescriptor(p, dsz)) { 135 start = p->PhysicalStart; 136 end = start + p->NumberOfPages * EFI_PAGE_SIZE; 137 138 if (KERNLOAD < start || KERNLOAD >= end) 139 continue; 140 141 available_pages = p->NumberOfPages - 142 ((KERNLOAD - start) >> EFI_PAGE_SHIFT); 143 break; 144 } 145 146 if (available_pages == 0) { 147 printf("Can't find valid memory map for staging area!\n"); 148 goto out; 149 } 150 151 i++; 152 p = NextMemoryDescriptor(p, dsz); 153 154 for ( ; i < ndesc; 155 i++, p = NextMemoryDescriptor(p, dsz)) { 156 if (p->Type != EfiConventionalMemory && 157 p->Type != EfiLoaderData) 158 break; 159 160 if (p->PhysicalStart != end) 161 break; 162 163 end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE; 164 165 available_pages += p->NumberOfPages; 166 } 167 168 if (*nr_pages > available_pages) { 169 printf("Staging area's size is reduced: %ld -> %ld!\n", 170 *nr_pages, available_pages); 171 *nr_pages = available_pages; 172 } 173 out: 174 free(map); 175 } 176 #endif /* __i386__ || __amd64__ */ 177 178 #if defined(__arm__) 179 #define DEFAULT_EFI_STAGING_SIZE 32 180 #else 181 #define DEFAULT_EFI_STAGING_SIZE 64 182 #endif 183 #ifndef EFI_STAGING_SIZE 184 #define EFI_STAGING_SIZE DEFAULT_EFI_STAGING_SIZE 185 #endif 186 187 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ 188 defined(__riscv) 189 #define EFI_STAGING_2M_ALIGN 1 190 #else 191 #define EFI_STAGING_2M_ALIGN 0 192 #endif 193 194 #if defined(__amd64__) 195 #define EFI_STAGING_SLOP M(8) 196 #else 197 #define EFI_STAGING_SLOP 0 198 #endif 199 200 static u_long staging_slop = EFI_STAGING_SLOP; 201 202 EFI_PHYSICAL_ADDRESS staging, staging_end, staging_base; 203 bool stage_offset_set = false; 204 ssize_t stage_offset; 205 206 static void 207 efi_copy_free(void) 208 { 209 BS->FreePages(staging_base, (staging_end - staging_base) / 210 EFI_PAGE_SIZE); 211 stage_offset_set = false; 212 stage_offset = 0; 213 } 214 215 #ifdef __amd64__ 216 int copy_staging = COPY_STAGING_AUTO; 217 218 static int 219 command_copy_staging(int argc, char *argv[]) 220 { 221 static const char *const mode[3] = { 222 [COPY_STAGING_ENABLE] = "enable", 223 [COPY_STAGING_DISABLE] = "disable", 224 [COPY_STAGING_AUTO] = "auto", 225 }; 226 int prev, res; 227 228 res = CMD_OK; 229 if (argc > 2) { 230 res = CMD_ERROR; 231 } else if (argc == 2) { 232 prev = copy_staging; 233 if (strcmp(argv[1], "enable") == 0) 234 copy_staging = COPY_STAGING_ENABLE; 235 else if (strcmp(argv[1], "disable") == 0) 236 copy_staging = COPY_STAGING_DISABLE; 237 else if (strcmp(argv[1], "auto") == 0) 238 copy_staging = COPY_STAGING_AUTO; 239 else { 240 printf("usage: copy_staging enable|disable|auto\n"); 241 res = CMD_ERROR; 242 } 243 if (res == CMD_OK && prev != copy_staging) { 244 printf("changed copy_staging, unloading kernel\n"); 245 unload(); 246 efi_copy_free(); 247 efi_copy_init(); 248 } 249 } else { 250 printf("copy staging: %s\n", mode[copy_staging]); 251 } 252 return (res); 253 } 254 COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging); 255 #endif 256 257 static int 258 command_staging_slop(int argc, char *argv[]) 259 { 260 char *endp; 261 u_long new, prev; 262 int res; 263 264 res = CMD_OK; 265 if (argc > 2) { 266 res = CMD_ERROR; 267 } else if (argc == 2) { 268 new = strtoul(argv[1], &endp, 0); 269 if (*endp != '\0') { 270 printf("invalid slop value\n"); 271 res = CMD_ERROR; 272 } 273 if (res == CMD_OK && staging_slop != new) { 274 printf("changed slop, unloading kernel\n"); 275 unload(); 276 efi_copy_free(); 277 efi_copy_init(); 278 } 279 } else { 280 printf("staging slop %#lx\n", staging_slop); 281 } 282 return (res); 283 } 284 COMMAND_SET(staging_slop, "staging_slop", "set staging slop", 285 command_staging_slop); 286 287 #if defined(__i386__) || defined(__amd64__) 288 /* 289 * The staging area must reside in the the first 1GB or 4GB physical 290 * memory: see elf64_exec() in 291 * boot/efi/loader/arch/amd64/elf64_freebsd.c. 292 */ 293 static EFI_PHYSICAL_ADDRESS 294 get_staging_max(void) 295 { 296 EFI_PHYSICAL_ADDRESS res; 297 298 #if defined(__i386__) 299 res = G(1); 300 #elif defined(__amd64__) 301 res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4); 302 #endif 303 return (res); 304 } 305 #define EFI_ALLOC_METHOD AllocateMaxAddress 306 #else 307 #define EFI_ALLOC_METHOD AllocateAnyPages 308 #endif 309 310 int 311 efi_copy_init(void) 312 { 313 EFI_STATUS status; 314 unsigned long nr_pages; 315 vm_offset_t ess; 316 317 ess = EFI_STAGING_SIZE; 318 if (ess < DEFAULT_EFI_STAGING_SIZE) 319 ess = DEFAULT_EFI_STAGING_SIZE; 320 nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess); 321 322 #if defined(__i386__) || defined(__amd64__) 323 /* 324 * We'll decrease nr_pages, if it's too big. Currently we only 325 * apply this to FreeBSD VM running on Hyper-V. Why? Please see 326 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28 327 */ 328 if (running_on_hyperv()) 329 efi_verify_staging_size(&nr_pages); 330 331 staging = get_staging_max(); 332 #endif 333 status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderData, 334 nr_pages, &staging); 335 if (EFI_ERROR(status)) { 336 printf("failed to allocate staging area: %lu\n", 337 EFI_ERROR_CODE(status)); 338 return (status); 339 } 340 staging_base = staging; 341 staging_end = staging + nr_pages * EFI_PAGE_SIZE; 342 343 #if EFI_STAGING_2M_ALIGN 344 /* 345 * Round the kernel load address to a 2MiB value. This is needed 346 * because the kernel builds a page table based on where it has 347 * been loaded in physical address space. As the kernel will use 348 * either a 1MiB or 2MiB page for this we need to make sure it 349 * is correctly aligned for both cases. 350 */ 351 staging = roundup2(staging, M(2)); 352 #endif 353 354 return (0); 355 } 356 357 static bool 358 efi_check_space(vm_offset_t end) 359 { 360 EFI_PHYSICAL_ADDRESS addr, new_base, new_staging; 361 EFI_STATUS status; 362 unsigned long nr_pages; 363 364 end = roundup2(end, EFI_PAGE_SIZE); 365 366 /* There is already enough space */ 367 if (end + staging_slop <= staging_end) 368 return (true); 369 370 if (!boot_services_active) { 371 if (end <= staging_end) 372 return (true); 373 panic("efi_check_space: cannot expand staging area " 374 "after boot services were exited\n"); 375 } 376 377 /* 378 * Add slop at the end: 379 * 1. amd64 kernel expects to do some very early allocations 380 * by carving out memory after kernend. Slop guarantees 381 * that it does not ovewrite anything useful. 382 * 2. It seems that initial calculation of the staging size 383 * could be somewhat smaller than actually copying in after 384 * boot services are exited. Slop avoids calling 385 * BS->AllocatePages() when it cannot work. 386 */ 387 end += staging_slop; 388 389 nr_pages = EFI_SIZE_TO_PAGES(end - staging_end); 390 #if defined(__i386__) || defined(__amd64__) 391 /* 392 * i386 needs all memory to be allocated under the 1G boundary. 393 * amd64 needs all memory to be allocated under the 1G or 4G boundary. 394 */ 395 if (end > get_staging_max()) 396 goto before_staging; 397 #endif 398 399 /* Try to allocate more space after the previous allocation */ 400 addr = staging_end; 401 status = BS->AllocatePages(AllocateAddress, EfiLoaderData, nr_pages, 402 &addr); 403 if (!EFI_ERROR(status)) { 404 staging_end = staging_end + nr_pages * EFI_PAGE_SIZE; 405 return (true); 406 } 407 408 before_staging: 409 /* Try allocating space before the previous allocation */ 410 if (staging < nr_pages * EFI_PAGE_SIZE) 411 goto expand; 412 addr = staging - nr_pages * EFI_PAGE_SIZE; 413 #if EFI_STAGING_2M_ALIGN 414 /* See efi_copy_init for why this is needed */ 415 addr = rounddown2(addr, M(2)); 416 #endif 417 nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr); 418 status = BS->AllocatePages(AllocateAddress, EfiLoaderData, nr_pages, 419 &addr); 420 if (!EFI_ERROR(status)) { 421 /* 422 * Move the old allocation and update the state so 423 * translation still works. 424 */ 425 staging_base = addr; 426 memmove((void *)(uintptr_t)staging_base, 427 (void *)(uintptr_t)staging, staging_end - staging); 428 stage_offset -= staging - staging_base; 429 staging = staging_base; 430 return (true); 431 } 432 433 expand: 434 nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging); 435 #if EFI_STAGING_2M_ALIGN 436 nr_pages += M(2) / EFI_PAGE_SIZE; 437 #endif 438 #if defined(__i386__) || defined(__amd64__) 439 new_base = get_staging_max(); 440 #endif 441 status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderData, 442 nr_pages, &new_base); 443 if (!EFI_ERROR(status)) { 444 #if EFI_STAGING_2M_ALIGN 445 new_staging = roundup2(new_base, M(2)); 446 #else 447 new_staging = new_base; 448 #endif 449 /* 450 * Move the old allocation and update the state so 451 * translation still works. 452 */ 453 memcpy((void *)(uintptr_t)new_staging, 454 (void *)(uintptr_t)staging, staging_end - staging); 455 BS->FreePages(staging_base, (staging_end - staging_base) / 456 EFI_PAGE_SIZE); 457 stage_offset -= staging - new_staging; 458 staging = new_staging; 459 staging_end = new_base + nr_pages * EFI_PAGE_SIZE; 460 staging_base = new_base; 461 return (true); 462 } 463 464 printf("efi_check_space: Unable to expand staging area\n"); 465 return (false); 466 } 467 468 void * 469 efi_translate(vm_offset_t ptr) 470 { 471 472 return ((void *)(ptr + stage_offset)); 473 } 474 475 ssize_t 476 efi_copyin(const void *src, vm_offset_t dest, const size_t len) 477 { 478 479 if (!stage_offset_set) { 480 stage_offset = (vm_offset_t)staging - dest; 481 stage_offset_set = true; 482 } 483 484 /* XXX: Callers do not check for failure. */ 485 if (!efi_check_space(dest + stage_offset + len)) { 486 errno = ENOMEM; 487 return (-1); 488 } 489 bcopy(src, (void *)(dest + stage_offset), len); 490 return (len); 491 } 492 493 ssize_t 494 efi_copyout(const vm_offset_t src, void *dest, const size_t len) 495 { 496 497 /* XXX: Callers do not check for failure. */ 498 if (src + stage_offset + len > staging_end) { 499 errno = ENOMEM; 500 return (-1); 501 } 502 bcopy((void *)(src + stage_offset), dest, len); 503 return (len); 504 } 505 506 ssize_t 507 efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len) 508 { 509 510 if (!stage_offset_set) { 511 stage_offset = (vm_offset_t)staging - dest; 512 stage_offset_set = true; 513 } 514 515 if (!efi_check_space(dest + stage_offset + len)) { 516 errno = ENOMEM; 517 return (-1); 518 } 519 return (VECTX_READ(fd, (void *)(dest + stage_offset), len)); 520 } 521 522 void 523 efi_copy_finish(void) 524 { 525 uint64_t *src, *dst, *last; 526 527 src = (uint64_t *)(uintptr_t)staging; 528 dst = (uint64_t *)(uintptr_t)(staging - stage_offset); 529 last = (uint64_t *)(uintptr_t)staging_end; 530 531 while (src < last) 532 *dst++ = *src++; 533 } 534 535 void 536 efi_copy_finish_nop(void) 537 { 538 } 539