1 /*- 2 * Copyright (c) 2013 The FreeBSD Foundation 3 * 4 * This software was developed by Benno Rice under sponsorship from 5 * the FreeBSD Foundation. 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/param.h> 29 30 #include <stand.h> 31 #include <bootstrap.h> 32 33 #include <efi.h> 34 #include <efilib.h> 35 36 #include "loader_efi.h" 37 38 #if defined(__amd64__) 39 #include <machine/cpufunc.h> 40 #include <machine/specialreg.h> 41 #include <machine/vmparam.h> 42 43 /* 44 * The code is excerpted from sys/x86/x86/identcpu.c: identify_cpu(), 45 * identify_hypervisor(), and dev/hyperv/vmbus/hyperv.c: hyperv_identify(). 46 */ 47 #define CPUID_LEAF_HV_MAXLEAF 0x40000000 48 #define CPUID_LEAF_HV_INTERFACE 0x40000001 49 #define CPUID_LEAF_HV_FEATURES 0x40000003 50 #define CPUID_LEAF_HV_LIMITS 0x40000005 51 #define CPUID_HV_IFACE_HYPERV 0x31237648 /* HV#1 */ 52 #define CPUID_HV_MSR_TIME_REFCNT 0x0002 /* MSR_HV_TIME_REF_COUNT */ 53 #define CPUID_HV_MSR_HYPERCALL 0x0020 54 55 static int 56 running_on_hyperv(void) 57 { 58 char hv_vendor[16]; 59 uint32_t regs[4]; 60 61 do_cpuid(1, regs); 62 if ((regs[2] & CPUID2_HV) == 0) 63 return (0); 64 65 do_cpuid(CPUID_LEAF_HV_MAXLEAF, regs); 66 if (regs[0] < CPUID_LEAF_HV_LIMITS) 67 return (0); 68 69 ((uint32_t *)&hv_vendor)[0] = regs[1]; 70 ((uint32_t *)&hv_vendor)[1] = regs[2]; 71 ((uint32_t *)&hv_vendor)[2] = regs[3]; 72 hv_vendor[12] = '\0'; 73 if (strcmp(hv_vendor, "Microsoft Hv") != 0) 74 return (0); 75 76 do_cpuid(CPUID_LEAF_HV_INTERFACE, regs); 77 if (regs[0] != CPUID_HV_IFACE_HYPERV) 78 return (0); 79 80 do_cpuid(CPUID_LEAF_HV_FEATURES, regs); 81 if ((regs[0] & CPUID_HV_MSR_HYPERCALL) == 0) 82 return (0); 83 if ((regs[0] & CPUID_HV_MSR_TIME_REFCNT) == 0) 84 return (0); 85 86 return (1); 87 } 88 89 static void 90 efi_verify_staging_size(unsigned long *nr_pages) 91 { 92 UINTN sz; 93 EFI_MEMORY_DESCRIPTOR *map = NULL, *p; 94 EFI_PHYSICAL_ADDRESS start, end; 95 UINTN key, dsz; 96 UINT32 dver; 97 EFI_STATUS status; 98 int i, ndesc; 99 unsigned long available_pages = 0; 100 101 sz = 0; 102 103 for (;;) { 104 status = BS->GetMemoryMap(&sz, map, &key, &dsz, &dver); 105 if (!EFI_ERROR(status)) 106 break; 107 108 if (status != EFI_BUFFER_TOO_SMALL) { 109 printf("Can't read memory map: %lu\n", 110 EFI_ERROR_CODE(status)); 111 goto out; 112 } 113 114 free(map); 115 116 /* Allocate 10 descriptors more than the size reported, 117 * to allow for any fragmentation caused by calling 118 * malloc */ 119 map = malloc(sz + (10 * dsz)); 120 if (map == NULL) { 121 printf("Unable to allocate memory\n"); 122 goto out; 123 } 124 } 125 126 ndesc = sz / dsz; 127 for (i = 0, p = map; i < ndesc; 128 i++, p = NextMemoryDescriptor(p, dsz)) { 129 start = p->PhysicalStart; 130 end = start + p->NumberOfPages * EFI_PAGE_SIZE; 131 132 if (KERNLOAD < start || KERNLOAD >= end) 133 continue; 134 135 available_pages = p->NumberOfPages - 136 ((KERNLOAD - start) >> EFI_PAGE_SHIFT); 137 break; 138 } 139 140 if (available_pages == 0) { 141 printf("Can't find valid memory map for staging area!\n"); 142 goto out; 143 } 144 145 i++; 146 p = NextMemoryDescriptor(p, dsz); 147 148 for ( ; i < ndesc; 149 i++, p = NextMemoryDescriptor(p, dsz)) { 150 if (p->Type != EfiConventionalMemory && 151 p->Type != EfiLoaderData) 152 break; 153 154 if (p->PhysicalStart != end) 155 break; 156 157 end = p->PhysicalStart + p->NumberOfPages * EFI_PAGE_SIZE; 158 159 available_pages += p->NumberOfPages; 160 } 161 162 if (*nr_pages > available_pages) { 163 printf("Staging area's size is reduced: %ld -> %ld!\n", 164 *nr_pages, available_pages); 165 *nr_pages = available_pages; 166 } 167 out: 168 free(map); 169 } 170 #endif /* __amd64__ */ 171 172 #if defined(__arm__) 173 #define DEFAULT_EFI_STAGING_SIZE 32 174 #else 175 #define DEFAULT_EFI_STAGING_SIZE 64 176 #endif 177 #ifndef EFI_STAGING_SIZE 178 #define EFI_STAGING_SIZE DEFAULT_EFI_STAGING_SIZE 179 #endif 180 181 #define EFI_STAGING_2M_ALIGN 1 182 183 #if defined(__amd64__) || defined(__i386__) 184 #define EFI_STAGING_SLOP M(8) 185 #else 186 #define EFI_STAGING_SLOP 0 187 #endif 188 189 static u_long staging_slop = EFI_STAGING_SLOP; 190 191 EFI_PHYSICAL_ADDRESS staging, staging_end, staging_base; 192 bool stage_offset_set = false; 193 ssize_t stage_offset; 194 195 static void 196 efi_copy_free(void) 197 { 198 BS->FreePages(staging_base, (staging_end - staging_base) / 199 EFI_PAGE_SIZE); 200 stage_offset_set = false; 201 stage_offset = 0; 202 } 203 204 #if defined(__amd64__) || defined(__i386__) 205 int copy_staging = COPY_STAGING_AUTO; 206 207 static int 208 command_copy_staging(int argc, char *argv[]) 209 { 210 static const char *const mode[3] = { 211 [COPY_STAGING_ENABLE] = "enable", 212 [COPY_STAGING_DISABLE] = "disable", 213 [COPY_STAGING_AUTO] = "auto", 214 }; 215 int prev; 216 217 if (argc > 2) { 218 goto usage; 219 } else if (argc == 2) { 220 prev = copy_staging; 221 if (strcmp(argv[1], "enable") == 0) 222 copy_staging = COPY_STAGING_ENABLE; 223 else if (strcmp(argv[1], "disable") == 0) 224 copy_staging = COPY_STAGING_DISABLE; 225 else if (strcmp(argv[1], "auto") == 0) 226 copy_staging = COPY_STAGING_AUTO; 227 else 228 goto usage; 229 if (prev != copy_staging) { 230 printf("changed copy_staging, unloading kernel\n"); 231 unload(); 232 efi_copy_free(); 233 efi_copy_init(); 234 } 235 } else { 236 printf("copy staging: %s\n", mode[copy_staging]); 237 } 238 return (CMD_OK); 239 240 usage: 241 command_errmsg = "usage: copy_staging enable|disable|auto"; 242 return (CMD_ERROR); 243 } 244 COMMAND_SET(copy_staging, "copy_staging", "copy staging", command_copy_staging); 245 #endif 246 247 static int 248 command_staging_slop(int argc, char *argv[]) 249 { 250 char *endp; 251 u_long new, prev; 252 253 if (argc > 2) { 254 goto err; 255 } else if (argc == 2) { 256 new = strtoul(argv[1], &endp, 0); 257 if (*endp != '\0') 258 goto err; 259 if (staging_slop != new) { 260 staging_slop = new; 261 printf("changed slop, unloading kernel\n"); 262 263 unload(); 264 efi_copy_free(); 265 efi_copy_init(); 266 } 267 } else { 268 printf("staging slop %#lx\n", staging_slop); 269 } 270 return (CMD_OK); 271 272 err: 273 command_errmsg = "invalid slop value"; 274 return (CMD_ERROR); 275 } 276 COMMAND_SET(staging_slop, "staging_slop", "set staging slop", 277 command_staging_slop); 278 279 #if defined(__amd64__) || defined(__i386__) 280 /* 281 * The staging area must reside in the first 1GB or 4GB physical 282 * memory: see elf64_exec() in 283 * boot/efi/loader/arch/amd64/elf64_freebsd.c. 284 */ 285 static EFI_PHYSICAL_ADDRESS 286 get_staging_max(void) 287 { 288 EFI_PHYSICAL_ADDRESS res; 289 290 res = copy_staging == COPY_STAGING_ENABLE ? G(1) : G(4); 291 return (res); 292 } 293 #define EFI_ALLOC_METHOD AllocateMaxAddress 294 #else 295 #define EFI_ALLOC_METHOD AllocateAnyPages 296 #endif 297 298 int 299 efi_copy_init(void) 300 { 301 EFI_STATUS status; 302 unsigned long nr_pages; 303 vm_offset_t ess; 304 305 ess = EFI_STAGING_SIZE; 306 if (ess < DEFAULT_EFI_STAGING_SIZE) 307 ess = DEFAULT_EFI_STAGING_SIZE; 308 nr_pages = EFI_SIZE_TO_PAGES(M(1) * ess); 309 310 #if defined(__amd64__) 311 /* 312 * We'll decrease nr_pages, if it's too big. Currently we only 313 * apply this to FreeBSD VM running on Hyper-V. Why? Please see 314 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=211746#c28 315 */ 316 if (running_on_hyperv()) 317 efi_verify_staging_size(&nr_pages); 318 #endif 319 #if defined(__amd64__) || defined(__i386__) 320 staging = get_staging_max(); 321 #endif 322 status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode, 323 nr_pages, &staging); 324 if (EFI_ERROR(status)) { 325 printf("failed to allocate staging area: %lu\n", 326 EFI_ERROR_CODE(status)); 327 return (status); 328 } 329 staging_base = staging; 330 staging_end = staging + nr_pages * EFI_PAGE_SIZE; 331 332 #if EFI_STAGING_2M_ALIGN 333 /* 334 * Round the kernel load address to a 2MiB value. This is needed 335 * because the kernel builds a page table based on where it has 336 * been loaded in physical address space. As the kernel will use 337 * either a 1MiB or 2MiB page for this we need to make sure it 338 * is correctly aligned for both cases. 339 */ 340 staging = roundup2(staging, M(2)); 341 #endif 342 343 return (0); 344 } 345 346 static bool 347 efi_check_space(vm_offset_t end) 348 { 349 EFI_PHYSICAL_ADDRESS addr, new_base, new_staging; 350 EFI_STATUS status; 351 unsigned long nr_pages; 352 353 end = roundup2(end, EFI_PAGE_SIZE); 354 355 /* There is already enough space */ 356 if (end + staging_slop <= staging_end) 357 return (true); 358 359 if (!boot_services_active) { 360 if (end <= staging_end) 361 return (true); 362 panic("efi_check_space: cannot expand staging area " 363 "after boot services were exited\n"); 364 } 365 366 /* 367 * Add slop at the end: 368 * 1. amd64 kernel expects to do some very early allocations 369 * by carving out memory after kernend. Slop guarantees 370 * that it does not ovewrite anything useful. 371 * 2. It seems that initial calculation of the staging size 372 * could be somewhat smaller than actually copying in after 373 * boot services are exited. Slop avoids calling 374 * BS->AllocatePages() when it cannot work. 375 */ 376 end += staging_slop; 377 378 nr_pages = EFI_SIZE_TO_PAGES(end - staging_end); 379 #if defined(__amd64__) || defined(__i386__) 380 /* 381 * The amd64 kernel needs all memory to be allocated under the 1G or 382 * 4G boundary. 383 */ 384 if (end > get_staging_max()) 385 goto before_staging; 386 #endif 387 388 /* Try to allocate more space after the previous allocation */ 389 addr = staging_end; 390 status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages, 391 &addr); 392 if (!EFI_ERROR(status)) { 393 staging_end = staging_end + nr_pages * EFI_PAGE_SIZE; 394 return (true); 395 } 396 397 before_staging: 398 /* Try allocating space before the previous allocation */ 399 if (staging < nr_pages * EFI_PAGE_SIZE) 400 goto expand; 401 addr = staging - nr_pages * EFI_PAGE_SIZE; 402 #if EFI_STAGING_2M_ALIGN 403 /* See efi_copy_init for why this is needed */ 404 addr = rounddown2(addr, M(2)); 405 #endif 406 nr_pages = EFI_SIZE_TO_PAGES(staging_base - addr); 407 status = BS->AllocatePages(AllocateAddress, EfiLoaderCode, nr_pages, 408 &addr); 409 if (!EFI_ERROR(status)) { 410 /* 411 * Move the old allocation and update the state so 412 * translation still works. 413 */ 414 staging_base = addr; 415 memmove((void *)(uintptr_t)staging_base, 416 (void *)(uintptr_t)staging, staging_end - staging); 417 stage_offset -= staging - staging_base; 418 staging = staging_base; 419 return (true); 420 } 421 422 expand: 423 nr_pages = EFI_SIZE_TO_PAGES(end - (vm_offset_t)staging); 424 #if EFI_STAGING_2M_ALIGN 425 nr_pages += M(2) / EFI_PAGE_SIZE; 426 #endif 427 #if defined(__amd64__) || defined(__i386__) 428 new_base = get_staging_max(); 429 #endif 430 status = BS->AllocatePages(EFI_ALLOC_METHOD, EfiLoaderCode, 431 nr_pages, &new_base); 432 if (!EFI_ERROR(status)) { 433 #if EFI_STAGING_2M_ALIGN 434 new_staging = roundup2(new_base, M(2)); 435 #else 436 new_staging = new_base; 437 #endif 438 /* 439 * Move the old allocation and update the state so 440 * translation still works. 441 */ 442 memcpy((void *)(uintptr_t)new_staging, 443 (void *)(uintptr_t)staging, staging_end - staging); 444 BS->FreePages(staging_base, (staging_end - staging_base) / 445 EFI_PAGE_SIZE); 446 stage_offset -= staging - new_staging; 447 staging = new_staging; 448 staging_end = new_base + nr_pages * EFI_PAGE_SIZE; 449 staging_base = new_base; 450 return (true); 451 } 452 453 printf("efi_check_space: Unable to expand staging area\n"); 454 return (false); 455 } 456 457 void * 458 efi_translate(vm_offset_t ptr) 459 { 460 461 return ((void *)(ptr + stage_offset)); 462 } 463 464 ssize_t 465 efi_copyin(const void *src, vm_offset_t dest, const size_t len) 466 { 467 468 if (!stage_offset_set) { 469 stage_offset = (vm_offset_t)staging - dest; 470 stage_offset_set = true; 471 } 472 473 /* XXX: Callers do not check for failure. */ 474 if (!efi_check_space(dest + stage_offset + len)) { 475 errno = ENOMEM; 476 return (-1); 477 } 478 bcopy(src, (void *)(dest + stage_offset), len); 479 return (len); 480 } 481 482 ssize_t 483 efi_copyout(const vm_offset_t src, void *dest, const size_t len) 484 { 485 486 /* XXX: Callers do not check for failure. */ 487 if (src + stage_offset + len > staging_end) { 488 errno = ENOMEM; 489 return (-1); 490 } 491 bcopy((void *)(src + stage_offset), dest, len); 492 return (len); 493 } 494 495 ssize_t 496 efi_readin(readin_handle_t fd, vm_offset_t dest, const size_t len) 497 { 498 499 if (!stage_offset_set) { 500 stage_offset = (vm_offset_t)staging - dest; 501 stage_offset_set = true; 502 } 503 504 if (!efi_check_space(dest + stage_offset + len)) { 505 errno = ENOMEM; 506 return (-1); 507 } 508 return (VECTX_READ(fd, (void *)(dest + stage_offset), len)); 509 } 510 511 void 512 efi_copy_finish(void) 513 { 514 uint64_t *src, *dst, *last; 515 516 src = (uint64_t *)(uintptr_t)staging; 517 dst = (uint64_t *)(uintptr_t)(staging - stage_offset); 518 last = (uint64_t *)(uintptr_t)staging_end; 519 520 while (src < last) 521 *dst++ = *src++; 522 } 523 524 void 525 efi_copy_finish_nop(void) 526 { 527 } 528