1 /*- 2 * Copyright (c) 2013 The FreeBSD Foundation 3 * 4 * This software was developed by Benno Rice under sponsorship from 5 * the FreeBSD Foundation. 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 30 #include <stand.h> 31 #include <bootstrap.h> 32 33 #include <efi.h> 34 #include <efilib.h> 35 36 #include "loader_efi.h" 37 38 #define M(x) ((x) * 1024 * 1024) 39 #define G(x) (1ULL * (x) * 1024 * 1024 * 1024) 40 41 #if defined(__amd64__) 42 #include <machine/cpufunc.h> 43 #include <machine/specialreg.h> 44 #include <machine/vmparam.h> 45 46 /* 47 * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(), 48 * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify(). 49 */ 50 #define CPUID_LEAF_HV_MAXLEAF 0x40000000 51 #define CPUID_LEAF_HV_INTERFACE 0x40000001 52 #define CPUID_LEAF_HV_FEATURES 0x40000003 53 #define CPUID_LEAF_HV_LIMITS 0x40000005 54 #define CPUID_HV_IFACE_HYPERV 0x31237648 /* HV#1 */ 55 #define CPUID_HV_MSR_TIME_REFCNT 0x0002 /* MSR_HV_TIME_REF_COUNT */ 56 #define CPUID_HV_MSR_HYPERCALL 0x0020 57 58 static int 59 running_on_hyperv(void) 60 { 61 char hv_vendor[16]; 62 uint32_t regs[4]; 63 64 do_cpuid(1, regs); 65 if ((regs[2] & CPUID2_HV) == 0) 66 return (0); 67 68 do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs); 69 if (regs[0] < CPUID_LEAF_HV_LIMITS) 70 return (0); 71 72 ((uint32_t *)&hv_vendor)[0] = regs[1]; 73 ((uint32_t *)&hv_vendor)[1] = regs[2]; 74 ((uint32_t *)&hv_vendor)[2] = regs[3]; 75 hv_vendor[12] = '\0'; 76 if (strcmp(hv_vendor, "Microsoft Hv") != 0) 77 return (0); 78 79 do_cpuid(CPUID_LEAF_HV_INTERFACE, regs); 80 if (regs[0] != CPUID_HV_IFACE_HYPERV) 81 return (0); 82 83 do_cpuid(CPUID_LEAF_HV_FEATURES, regs); 84 if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) 85 return (0); 86 if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0) 87 return (0); 88 89 return (1); 90 } 91 92 static void 93 efi_verify_staging_size(unsigned long *nr_pages) 94 { 95 UINTN sz; 96 EFI_MEMORY_DESCRIPTOR *map = NULL, *p; 97 EFI_PHYSICAL_ADDRESS start, end; 98 UINTN key, dsz; 99 UINT32 dver; 100 EFI_STATUS status; 101 int i, ndesc; 102 unsigned long available_pages = 0; 103 104 sz = 0; 105 106 for (;;) { 107 status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver); 108 if (!EFI_ERROR(status)) 109 break; 110 111 if (status != EFI_BUFFER_TOO_SMALL) { 112 printf("Can't read memory map: %lu\n", 113 EFI_ERROR_CODE(status)); 114 goto out; 115 } 116 117 free(map); 118 119 /* Allocate 10 descriptors more than the size reported, 120 * to allow for any fragmentation caused by calling 121 * malloc */ 122 map = malloc(sz + (10 * dsz)); 123 if (map == NULL) { 124 printf("Unable to allocate memory\n"); 125 goto out; 126 } 127 } 128 129 ndesc = sz / dsz; 130 for (i = 0, p = map; i < ndesc; 131 i++, p = NextMemoryDescriptor(p, dsz)) { 132 start = p->PhysicalStart; 133 end = start + p->NumberOfPages * EFI_PAGE_SIZE; 134 135 if (KERNLOAD < start || KERNLOAD >= end) 136 continue; 137 138 available_pages = p->NumberOfPages - 139 ((KERNLOAD - start) >> EFI_PAGE_SHIFT); 140 break; 141 } 142 143 if (available_pages == 0) { 144 printf("Can't find valid memory map for staging area!\n"); 145 goto out; 146 } 147 148 i++; 149 p = NextMemoryDescriptor(p, dsz); 150 151 for ( ; i < ndesc; 152 i++, p = NextMemoryDescriptor(p, dsz)) { 153 if (p->Type != EfiConventionalMemory && 154 p->Type != EfiLoaderData) 155 break; 156 157 if (p->PhysicalStart != end) 158 break; 159 160 end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE; 161 162 available_pages += p->NumberOfPages; 163 } 164 165 if (*nr_pages > available_pages) { 166 printf("Staging area's size is reduced: %ld -> %ld!\n", 167 *nr_pages, available_pages); 168 *nr_pages = available_pages; 169 } 170 out: 171 free(map); 172 } 173 #endif /* __amd64__ */ 174 175 #if defined(__arm__) 176 #define DEFAULT_EFI_STAGING_SIZE 32 177 #else 178 #define DEFAULT_EFI_STAGING_SIZE 64 179 #endif 180 #ifndef EFI_STAGING_SIZE 181 #define EFI_STAGING_SIZE DEFAULT_EFI_STAGING_SIZE 182 #endif 183 184 #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ 185 defined(__riscv) || defined(__i386__) 186 #define EFI_STAGING_2M_ALIGN 1 187 #else 188 #define EFI_STAGING_2M_ALIGN 0 189 #endif 190 191 #if defined(__amd64__) || defined(__i386__) 192 #define EFI_STAGING_SLOP M(8) 193 #else 194 #define EFI_STAGING_SLOP 0 195 #endif 196 197 static u_long staging_slop = EFI_STAGING_SLOP; 198 199 EFI_PHYSICAL_ADDRESS staging, staging_end, staging_base; 200 bool stage_offset_set = false; 201 ssize_t stage_offset; 202 203 static void 204 efi_copy_free(void) 205 { 206 BS->FreePages(staging_base, (staging_end - staging_base) / 207 EFI_PAGE_SIZE); 208 stage_offset_set = false; 209 stage_offset = 0; 210 } 211 212 #if defined(__amd64__) || defined(__i386__) 213 int copy_staging = COPY_STAGING_AUTO; 214 215 static int 216 command_copy_staging(int argc, char *argv[]) 217 { 218 static const char *const mode[3] = { 219 [COPY_STAGING_ENABLE] = "enable", 220 [COPY_STAGING_DISABLE] = "disable", 221 [COPY_STAGING_AUTO] = "auto", 222 }; 223 int prev; 224 225 if (argc > 2) { 226 goto usage; 227 } else if (argc == 2) { 228 prev = copy_staging; 229 if (strcmp(argv[1], "enable") == 0) 230 copy_staging = COPY_STAGING_ENABLE; 231 else if (strcmp(argv[1], "disable") == 0) 232 copy_staging = COPY_STAGING_DISABLE; 233 else if (strcmp(argv[1], "auto") == 0) 234 copy_staging = COPY_STAGING_AUTO; 235 else 236 goto usage; 237 if (prev != copy_staging) { 238 printf("changed copy_staging, unloading kernel\n"); 239 unload(); 240 efi_copy_free(); 241 efi_copy_init(); 242 } 243 } else { 244 printf("copy staging: %s\n", mode[copy_staging]); 245 } 246 return (CMD_OK); 247 248 usage: 249 command_errmsg = "usage: copy_staging enable|disable|auto"; 250 return (CMD_ERROR); 251 } 252 COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging); 253 #endif 254 255 static int 256 command_staging_slop(int argc, char *argv[]) 257 { 258 char *endp; 259 u_long new, prev; 260 261 if (argc > 2) { 262 goto err; 263 } else if (argc == 2) { 264 new = strtoul(argv[1], &endp, 0); 265 if (*endp != '\0') 266 goto err; 267 if (staging_slop != new) { 268 staging_slop = new; 269 printf("changed slop, unloading kernel\n"); 270 271 unload(); 272 efi_copy_free(); 273 efi_copy_init(); 274 } 275 } else { 276 printf("staging slop %#lx\n", staging_slop); 277 } 278 return (CMD_OK); 279 280 err: 281 command_errmsg = "invalid slop value"; 282 return (CMD_ERROR); 283 } 284 COMMAND_SET(staging_slop, "staging_slop", "set staging slop", 285 command_staging_slop); 286 287 #if defined(__amd64__) || defined(__i386__) 288 /* 289 * The staging area must reside in the first 1GB or 4GB physical 290 * memory: see elf64_exec() in 291 * boot/efi/loader/arch/amd64/elf64_freebsd.c. 292 */ 293 static EFI_PHYSICAL_ADDRESS 294 get_staging_max(void) 295 { 296 EFI_PHYSICAL_ADDRESS res; 297 298 res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4); 299 return (res); 300 } 301 #define EFI_ALLOC_METHOD AllocateMaxAddress 302 #else 303 #define EFI_ALLOC_METHOD AllocateAnyPages 304 #endif 305 306 int 307 efi_copy_init(void) 308 { 309 EFI_STATUS status; 310 unsigned long nr_pages; 311 vm_offset_t ess; 312 313 ess = EFI_STAGING_SIZE; 314 if (ess < DEFAULT_EFI_STAGING_SIZE) 315 ess = DEFAULT_EFI_STAGING_SIZE; 316 nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess); 317 318 #if defined(__amd64__) 319 /* 320 * We'll decrease nr_pages, if it's too big. Currently we only 321 * apply this to FreeBSD VM running on Hyper-V. Why? Please see 322 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28 323 */ 324 if (running_on_hyperv()) 325 efi_verify_staging_size(&nr_pages); 326 #endif 327 #if defined(__amd64__) || defined(__i386__) 328 staging = get_staging_max(); 329 #endif 330 status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode, 331 nr_pages, &staging); 332 if (EFI_ERROR(status)) { 333 printf("failed to allocate staging area: %lu\n", 334 EFI_ERROR_CODE(status)); 335 return (status); 336 } 337 staging_base = staging; 338 staging_end = staging + nr_pages * EFI_PAGE_SIZE; 339 340 #if EFI_STAGING_2M_ALIGN 341 /* 342 * Round the kernel load address to a 2MiB value. This is needed 343 * because the kernel builds a page table based on where it has 344 * been loaded in physical address space. As the kernel will use 345 * either a 1MiB or 2MiB page for this we need to make sure it 346 * is correctly aligned for both cases. 347 */ 348 staging = roundup2(staging, M(2)); 349 #endif 350 351 return (0); 352 } 353 354 static bool 355 efi_check_space(vm_offset_t end) 356 { 357 EFI_PHYSICAL_ADDRESS addr, new_base, new_staging; 358 EFI_STATUS status; 359 unsigned long nr_pages; 360 361 end = roundup2(end, EFI_PAGE_SIZE); 362 363 /* There is already enough space */ 364 if (end + staging_slop <= staging_end) 365 return (true); 366 367 if (!boot_services_active) { 368 if (end <= staging_end) 369 return (true); 370 panic("efi_check_space: cannot expand staging area " 371 "after boot services were exited\n"); 372 } 373 374 /* 375 * Add slop at the end: 376 * 1. amd64 kernel expects to do some very early allocations 377 * by carving out memory after kernend. Slop guarantees 378 * that it does not ovewrite anything useful. 379 * 2. It seems that initial calculation of the staging size 380 * could be somewhat smaller than actually copying in after 381 * boot services are exited. Slop avoids calling 382 * BS->AllocatePages() when it cannot work. 383 */ 384 end += staging_slop; 385 386 nr_pages = EFI_SIZE_TO_PAGES(end - staging_end); 387 #if defined(__amd64__) || defined(__i386__) 388 /* 389 * The amd64 kernel needs all memory to be allocated under the 1G or 390 * 4G boundary. 391 */ 392 if (end > get_staging_max()) 393 goto before_staging; 394 #endif 395 396 /* Try to allocate more space after the previous allocation */ 397 addr = staging_end; 398 status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages, 399 &addr); 400 if (!EFI_ERROR(status)) { 401 staging_end = staging_end + nr_pages * EFI_PAGE_SIZE; 402 return (true); 403 } 404 405 before_staging: 406 /* Try allocating space before the previous allocation */ 407 if (staging < nr_pages * EFI_PAGE_SIZE) 408 goto expand; 409 addr = staging - nr_pages * EFI_PAGE_SIZE; 410 #if EFI_STAGING_2M_ALIGN 411 /* See efi_copy_init for why this is needed */ 412 addr = rounddown2(addr, M(2)); 413 #endif 414 nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr); 415 status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages, 416 &addr); 417 if (!EFI_ERROR(status)) { 418 /* 419 * Move the old allocation and update the state so 420 * translation still works. 421 */ 422 staging_base = addr; 423 memmove((void *)(uintptr_t)staging_base, 424 (void *)(uintptr_t)staging, staging_end - staging); 425 stage_offset -= staging - staging_base; 426 staging = staging_base; 427 return (true); 428 } 429 430 expand: 431 nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging); 432 #if EFI_STAGING_2M_ALIGN 433 nr_pages += M(2) / EFI_PAGE_SIZE; 434 #endif 435 #if defined(__amd64__) || defined(__i386__) 436 new_base = get_staging_max(); 437 #endif 438 status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode, 439 nr_pages, &new_base); 440 if (!EFI_ERROR(status)) { 441 #if EFI_STAGING_2M_ALIGN 442 new_staging = roundup2(new_base, M(2)); 443 #else 444 new_staging = new_base; 445 #endif 446 /* 447 * Move the old allocation and update the state so 448 * translation still works. 449 */ 450 memcpy((void *)(uintptr_t)new_staging, 451 (void *)(uintptr_t)staging, staging_end - staging); 452 BS->FreePages(staging_base, (staging_end - staging_base) / 453 EFI_PAGE_SIZE); 454 stage_offset -= staging - new_staging; 455 staging = new_staging; 456 staging_end = new_base + nr_pages * EFI_PAGE_SIZE; 457 staging_base = new_base; 458 return (true); 459 } 460 461 printf("efi_check_space: Unable to expand staging area\n"); 462 return (false); 463 } 464 465 void * 466 efi_translate(vm_offset_t ptr) 467 { 468 469 return ((void *)(ptr + stage_offset)); 470 } 471 472 ssize_t 473 efi_copyin(const void *src, vm_offset_t dest, const size_t len) 474 { 475 476 if (!stage_offset_set) { 477 stage_offset = (vm_offset_t)staging - dest; 478 stage_offset_set = true; 479 } 480 481 /* XXX: Callers do not check for failure. */ 482 if (!efi_check_space(dest + stage_offset + len)) { 483 errno = ENOMEM; 484 return (-1); 485 } 486 bcopy(src, (void *)(dest + stage_offset), len); 487 return (len); 488 } 489 490 ssize_t 491 efi_copyout(const vm_offset_t src, void *dest, const size_t len) 492 { 493 494 /* XXX: Callers do not check for failure. */ 495 if (src + stage_offset + len > staging_end) { 496 errno = ENOMEM; 497 return (-1); 498 } 499 bcopy((void *)(src + stage_offset), dest, len); 500 return (len); 501 } 502 503 ssize_t 504 efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len) 505 { 506 507 if (!stage_offset_set) { 508 stage_offset = (vm_offset_t)staging - dest; 509 stage_offset_set = true; 510 } 511 512 if (!efi_check_space(dest + stage_offset + len)) { 513 errno = ENOMEM; 514 return (-1); 515 } 516 return (VECTX_READ(fd, (void *)(dest + stage_offset), len)); 517 } 518 519 void 520 efi_copy_finish(void) 521 { 522 uint64_t *src, *dst, *last; 523 524 src = (uint64_t *)(uintptr_t)staging; 525 dst = (uint64_t *)(uintptr_t)(staging - stage_offset); 526 last = (uint64_t *)(uintptr_t)staging_end; 527 528 while (src < last) 529 *dst++ = *src++; 530 } 531 532 void 533 efi_copy_finish_nop(void) 534 { 535 } 536