1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * kexec: kexec_file_load system call 4 * 5 * Copyright (C) 2014 Red Hat Inc. 6 * Authors: 7 * Vivek Goyal <vgoyal@redhat.com> 8 */ 9 10 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 11 12 #include <linux/capability.h> 13 #include <linux/mm.h> 14 #include <linux/file.h> 15 #include <linux/slab.h> 16 #include <linux/kexec.h> 17 #include <linux/memblock.h> 18 #include <linux/mutex.h> 19 #include <linux/list.h> 20 #include <linux/fs.h> 21 #include <linux/ima.h> 22 #include <crypto/hash.h> 23 #include <crypto/sha2.h> 24 #include <linux/elf.h> 25 #include <linux/elfcore.h> 26 #include <linux/kernel.h> 27 #include <linux/kernel_read_file.h> 28 #include <linux/syscalls.h> 29 #include <linux/vmalloc.h> 30 #include "kexec_internal.h" 31 32 static int kexec_calculate_store_digests(struct kimage *image); 33 34 /* 35 * Currently this is the only default function that is exported as some 36 * architectures need it to do additional handlings. 37 * In the future, other default functions may be exported too if required. 38 */ 39 int kexec_image_probe_default(struct kimage *image, void *buf, 40 unsigned long buf_len) 41 { 42 const struct kexec_file_ops * const *fops; 43 int ret = -ENOEXEC; 44 45 for (fops = &kexec_file_loaders[0]; *fops && (*fops)->probe; ++fops) { 46 ret = (*fops)->probe(buf, buf_len); 47 if (!ret) { 48 image->fops = *fops; 49 return ret; 50 } 51 } 52 53 return ret; 54 } 55 56 /* Architectures can provide this probe function */ 57 int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf, 58 unsigned long buf_len) 59 { 60 return kexec_image_probe_default(image, buf, buf_len); 61 } 62 63 static void *kexec_image_load_default(struct kimage *image) 64 { 65 if (!image->fops || !image->fops->load) 66 return ERR_PTR(-ENOEXEC); 67 68 return image->fops->load(image, image->kernel_buf, 69 image->kernel_buf_len, image->initrd_buf, 70 image->initrd_buf_len, image->cmdline_buf, 71 image->cmdline_buf_len); 72 } 73 74 void * __weak arch_kexec_kernel_image_load(struct kimage *image) 75 { 76 return kexec_image_load_default(image); 77 } 78 79 int kexec_image_post_load_cleanup_default(struct kimage *image) 80 { 81 if (!image->fops || !image->fops->cleanup) 82 return 0; 83 84 return image->fops->cleanup(image->image_loader_data); 85 } 86 87 int __weak arch_kimage_file_post_load_cleanup(struct kimage *image) 88 { 89 return kexec_image_post_load_cleanup_default(image); 90 } 91 92 #ifdef CONFIG_KEXEC_SIG 93 static int kexec_image_verify_sig_default(struct kimage *image, void *buf, 94 unsigned long buf_len) 95 { 96 if (!image->fops || !image->fops->verify_sig) { 97 pr_debug("kernel loader does not support signature verification.\n"); 98 return -EKEYREJECTED; 99 } 100 101 return image->fops->verify_sig(buf, buf_len); 102 } 103 104 int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, 105 unsigned long buf_len) 106 { 107 return kexec_image_verify_sig_default(image, buf, buf_len); 108 } 109 #endif 110 111 /* 112 * Free up memory used by kernel, initrd, and command line. This is temporary 113 * memory allocation which is not needed any more after these buffers have 114 * been loaded into separate segments and have been copied elsewhere. 115 */ 116 void kimage_file_post_load_cleanup(struct kimage *image) 117 { 118 struct purgatory_info *pi = &image->purgatory_info; 119 120 vfree(image->kernel_buf); 121 image->kernel_buf = NULL; 122 123 vfree(image->initrd_buf); 124 image->initrd_buf = NULL; 125 126 kfree(image->cmdline_buf); 127 image->cmdline_buf = NULL; 128 129 vfree(pi->purgatory_buf); 130 pi->purgatory_buf = NULL; 131 132 vfree(pi->sechdrs); 133 pi->sechdrs = NULL; 134 135 #ifdef CONFIG_IMA_KEXEC 136 vfree(image->ima_buffer); 137 image->ima_buffer = NULL; 138 #endif /* CONFIG_IMA_KEXEC */ 139 140 /* See if architecture has anything to cleanup post load */ 141 arch_kimage_file_post_load_cleanup(image); 142 143 /* 144 * Above call should have called into bootloader to free up 145 * any data stored in kimage->image_loader_data. It should 146 * be ok now to free it up. 147 */ 148 kfree(image->image_loader_data); 149 image->image_loader_data = NULL; 150 } 151 152 #ifdef CONFIG_KEXEC_SIG 153 static int 154 kimage_validate_signature(struct kimage *image) 155 { 156 int ret; 157 158 ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf, 159 image->kernel_buf_len); 160 if (ret) { 161 162 if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) { 163 pr_notice("Enforced kernel signature verification failed (%d).\n", ret); 164 return ret; 165 } 166 167 /* 168 * If IMA is guaranteed to appraise a signature on the kexec 169 * image, permit it even if the kernel is otherwise locked 170 * down. 171 */ 172 if (!ima_appraise_signature(READING_KEXEC_IMAGE) && 173 security_locked_down(LOCKDOWN_KEXEC)) 174 return -EPERM; 175 176 pr_debug("kernel signature verification failed (%d).\n", ret); 177 } 178 179 return 0; 180 } 181 #endif 182 183 /* 184 * In file mode list of segments is prepared by kernel. Copy relevant 185 * data from user space, do error checking, prepare segment list 186 */ 187 static int 188 kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd, 189 const char __user *cmdline_ptr, 190 unsigned long cmdline_len, unsigned flags) 191 { 192 int ret; 193 void *ldata; 194 195 ret = kernel_read_file_from_fd(kernel_fd, 0, &image->kernel_buf, 196 INT_MAX, NULL, READING_KEXEC_IMAGE); 197 if (ret < 0) 198 return ret; 199 image->kernel_buf_len = ret; 200 201 /* Call arch image probe handlers */ 202 ret = arch_kexec_kernel_image_probe(image, image->kernel_buf, 203 image->kernel_buf_len); 204 if (ret) 205 goto out; 206 207 #ifdef CONFIG_KEXEC_SIG 208 ret = kimage_validate_signature(image); 209 210 if (ret) 211 goto out; 212 #endif 213 /* It is possible that there no initramfs is being loaded */ 214 if (!(flags & KEXEC_FILE_NO_INITRAMFS)) { 215 ret = kernel_read_file_from_fd(initrd_fd, 0, &image->initrd_buf, 216 INT_MAX, NULL, 217 READING_KEXEC_INITRAMFS); 218 if (ret < 0) 219 goto out; 220 image->initrd_buf_len = ret; 221 ret = 0; 222 } 223 224 if (cmdline_len) { 225 image->cmdline_buf = memdup_user(cmdline_ptr, cmdline_len); 226 if (IS_ERR(image->cmdline_buf)) { 227 ret = PTR_ERR(image->cmdline_buf); 228 image->cmdline_buf = NULL; 229 goto out; 230 } 231 232 image->cmdline_buf_len = cmdline_len; 233 234 /* command line should be a string with last byte null */ 235 if (image->cmdline_buf[cmdline_len - 1] != '\0') { 236 ret = -EINVAL; 237 goto out; 238 } 239 240 ima_kexec_cmdline(kernel_fd, image->cmdline_buf, 241 image->cmdline_buf_len - 1); 242 } 243 244 /* IMA needs to pass the measurement list to the next kernel. */ 245 ima_add_kexec_buffer(image); 246 247 /* Call arch image load handlers */ 248 ldata = arch_kexec_kernel_image_load(image); 249 250 if (IS_ERR(ldata)) { 251 ret = PTR_ERR(ldata); 252 goto out; 253 } 254 255 image->image_loader_data = ldata; 256 out: 257 /* In case of error, free up all allocated memory in this function */ 258 if (ret) 259 kimage_file_post_load_cleanup(image); 260 return ret; 261 } 262 263 static int 264 kimage_file_alloc_init(struct kimage **rimage, int kernel_fd, 265 int initrd_fd, const char __user *cmdline_ptr, 266 unsigned long cmdline_len, unsigned long flags) 267 { 268 int ret; 269 struct kimage *image; 270 bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH; 271 272 image = do_kimage_alloc_init(); 273 if (!image) 274 return -ENOMEM; 275 276 image->file_mode = 1; 277 278 if (kexec_on_panic) { 279 /* Enable special crash kernel control page alloc policy. */ 280 image->control_page = crashk_res.start; 281 image->type = KEXEC_TYPE_CRASH; 282 } 283 284 ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd, 285 cmdline_ptr, cmdline_len, flags); 286 if (ret) 287 goto out_free_image; 288 289 ret = sanity_check_segment_list(image); 290 if (ret) 291 goto out_free_post_load_bufs; 292 293 ret = -ENOMEM; 294 image->control_code_page = kimage_alloc_control_pages(image, 295 get_order(KEXEC_CONTROL_PAGE_SIZE)); 296 if (!image->control_code_page) { 297 pr_err("Could not allocate control_code_buffer\n"); 298 goto out_free_post_load_bufs; 299 } 300 301 if (!kexec_on_panic) { 302 image->swap_page = kimage_alloc_control_pages(image, 0); 303 if (!image->swap_page) { 304 pr_err("Could not allocate swap buffer\n"); 305 goto out_free_control_pages; 306 } 307 } 308 309 *rimage = image; 310 return 0; 311 out_free_control_pages: 312 kimage_free_page_list(&image->control_pages); 313 out_free_post_load_bufs: 314 kimage_file_post_load_cleanup(image); 315 out_free_image: 316 kfree(image); 317 return ret; 318 } 319 320 SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, 321 unsigned long, cmdline_len, const char __user *, cmdline_ptr, 322 unsigned long, flags) 323 { 324 int ret = 0, i; 325 struct kimage **dest_image, *image; 326 327 /* We only trust the superuser with rebooting the system. */ 328 if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) 329 return -EPERM; 330 331 /* Make sure we have a legal set of flags */ 332 if (flags != (flags & KEXEC_FILE_FLAGS)) 333 return -EINVAL; 334 335 image = NULL; 336 337 if (!mutex_trylock(&kexec_mutex)) 338 return -EBUSY; 339 340 dest_image = &kexec_image; 341 if (flags & KEXEC_FILE_ON_CRASH) { 342 dest_image = &kexec_crash_image; 343 if (kexec_crash_image) 344 arch_kexec_unprotect_crashkres(); 345 } 346 347 if (flags & KEXEC_FILE_UNLOAD) 348 goto exchange; 349 350 /* 351 * In case of crash, new kernel gets loaded in reserved region. It is 352 * same memory where old crash kernel might be loaded. Free any 353 * current crash dump kernel before we corrupt it. 354 */ 355 if (flags & KEXEC_FILE_ON_CRASH) 356 kimage_free(xchg(&kexec_crash_image, NULL)); 357 358 ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr, 359 cmdline_len, flags); 360 if (ret) 361 goto out; 362 363 ret = machine_kexec_prepare(image); 364 if (ret) 365 goto out; 366 367 /* 368 * Some architecture(like S390) may touch the crash memory before 369 * machine_kexec_prepare(), we must copy vmcoreinfo data after it. 370 */ 371 ret = kimage_crash_copy_vmcoreinfo(image); 372 if (ret) 373 goto out; 374 375 ret = kexec_calculate_store_digests(image); 376 if (ret) 377 goto out; 378 379 for (i = 0; i < image->nr_segments; i++) { 380 struct kexec_segment *ksegment; 381 382 ksegment = &image->segment[i]; 383 pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n", 384 i, ksegment->buf, ksegment->bufsz, ksegment->mem, 385 ksegment->memsz); 386 387 ret = kimage_load_segment(image, &image->segment[i]); 388 if (ret) 389 goto out; 390 } 391 392 kimage_terminate(image); 393 394 ret = machine_kexec_post_load(image); 395 if (ret) 396 goto out; 397 398 /* 399 * Free up any temporary buffers allocated which are not needed 400 * after image has been loaded 401 */ 402 kimage_file_post_load_cleanup(image); 403 exchange: 404 image = xchg(dest_image, image); 405 out: 406 if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image) 407 arch_kexec_protect_crashkres(); 408 409 mutex_unlock(&kexec_mutex); 410 kimage_free(image); 411 return ret; 412 } 413 414 static int locate_mem_hole_top_down(unsigned long start, unsigned long end, 415 struct kexec_buf *kbuf) 416 { 417 struct kimage *image = kbuf->image; 418 unsigned long temp_start, temp_end; 419 420 temp_end = min(end, kbuf->buf_max); 421 temp_start = temp_end - kbuf->memsz; 422 423 do { 424 /* align down start */ 425 temp_start = temp_start & (~(kbuf->buf_align - 1)); 426 427 if (temp_start < start || temp_start < kbuf->buf_min) 428 return 0; 429 430 temp_end = temp_start + kbuf->memsz - 1; 431 432 /* 433 * Make sure this does not conflict with any of existing 434 * segments 435 */ 436 if (kimage_is_destination_range(image, temp_start, temp_end)) { 437 temp_start = temp_start - PAGE_SIZE; 438 continue; 439 } 440 441 /* We found a suitable memory range */ 442 break; 443 } while (1); 444 445 /* If we are here, we found a suitable memory range */ 446 kbuf->mem = temp_start; 447 448 /* Success, stop navigating through remaining System RAM ranges */ 449 return 1; 450 } 451 452 static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end, 453 struct kexec_buf *kbuf) 454 { 455 struct kimage *image = kbuf->image; 456 unsigned long temp_start, temp_end; 457 458 temp_start = max(start, kbuf->buf_min); 459 460 do { 461 temp_start = ALIGN(temp_start, kbuf->buf_align); 462 temp_end = temp_start + kbuf->memsz - 1; 463 464 if (temp_end > end || temp_end > kbuf->buf_max) 465 return 0; 466 /* 467 * Make sure this does not conflict with any of existing 468 * segments 469 */ 470 if (kimage_is_destination_range(image, temp_start, temp_end)) { 471 temp_start = temp_start + PAGE_SIZE; 472 continue; 473 } 474 475 /* We found a suitable memory range */ 476 break; 477 } while (1); 478 479 /* If we are here, we found a suitable memory range */ 480 kbuf->mem = temp_start; 481 482 /* Success, stop navigating through remaining System RAM ranges */ 483 return 1; 484 } 485 486 static int locate_mem_hole_callback(struct resource *res, void *arg) 487 { 488 struct kexec_buf *kbuf = (struct kexec_buf *)arg; 489 u64 start = res->start, end = res->end; 490 unsigned long sz = end - start + 1; 491 492 /* Returning 0 will take to next memory range */ 493 494 /* Don't use memory that will be detected and handled by a driver. */ 495 if (res->flags & IORESOURCE_SYSRAM_DRIVER_MANAGED) 496 return 0; 497 498 if (sz < kbuf->memsz) 499 return 0; 500 501 if (end < kbuf->buf_min || start > kbuf->buf_max) 502 return 0; 503 504 /* 505 * Allocate memory top down with-in ram range. Otherwise bottom up 506 * allocation. 507 */ 508 if (kbuf->top_down) 509 return locate_mem_hole_top_down(start, end, kbuf); 510 return locate_mem_hole_bottom_up(start, end, kbuf); 511 } 512 513 #ifdef CONFIG_ARCH_KEEP_MEMBLOCK 514 static int kexec_walk_memblock(struct kexec_buf *kbuf, 515 int (*func)(struct resource *, void *)) 516 { 517 int ret = 0; 518 u64 i; 519 phys_addr_t mstart, mend; 520 struct resource res = { }; 521 522 if (kbuf->image->type == KEXEC_TYPE_CRASH) 523 return func(&crashk_res, kbuf); 524 525 /* 526 * Using MEMBLOCK_NONE will properly skip MEMBLOCK_DRIVER_MANAGED. See 527 * IORESOURCE_SYSRAM_DRIVER_MANAGED handling in 528 * locate_mem_hole_callback(). 529 */ 530 if (kbuf->top_down) { 531 for_each_free_mem_range_reverse(i, NUMA_NO_NODE, MEMBLOCK_NONE, 532 &mstart, &mend, NULL) { 533 /* 534 * In memblock, end points to the first byte after the 535 * range while in kexec, end points to the last byte 536 * in the range. 537 */ 538 res.start = mstart; 539 res.end = mend - 1; 540 ret = func(&res, kbuf); 541 if (ret) 542 break; 543 } 544 } else { 545 for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, 546 &mstart, &mend, NULL) { 547 /* 548 * In memblock, end points to the first byte after the 549 * range while in kexec, end points to the last byte 550 * in the range. 551 */ 552 res.start = mstart; 553 res.end = mend - 1; 554 ret = func(&res, kbuf); 555 if (ret) 556 break; 557 } 558 } 559 560 return ret; 561 } 562 #else 563 static int kexec_walk_memblock(struct kexec_buf *kbuf, 564 int (*func)(struct resource *, void *)) 565 { 566 return 0; 567 } 568 #endif 569 570 /** 571 * kexec_walk_resources - call func(data) on free memory regions 572 * @kbuf: Context info for the search. Also passed to @func. 573 * @func: Function to call for each memory region. 574 * 575 * Return: The memory walk will stop when func returns a non-zero value 576 * and that value will be returned. If all free regions are visited without 577 * func returning non-zero, then zero will be returned. 578 */ 579 static int kexec_walk_resources(struct kexec_buf *kbuf, 580 int (*func)(struct resource *, void *)) 581 { 582 if (kbuf->image->type == KEXEC_TYPE_CRASH) 583 return walk_iomem_res_desc(crashk_res.desc, 584 IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY, 585 crashk_res.start, crashk_res.end, 586 kbuf, func); 587 else 588 return walk_system_ram_res(0, ULONG_MAX, kbuf, func); 589 } 590 591 /** 592 * kexec_locate_mem_hole - find free memory for the purgatory or the next kernel 593 * @kbuf: Parameters for the memory search. 594 * 595 * On success, kbuf->mem will have the start address of the memory region found. 596 * 597 * Return: 0 on success, negative errno on error. 598 */ 599 int kexec_locate_mem_hole(struct kexec_buf *kbuf) 600 { 601 int ret; 602 603 /* Arch knows where to place */ 604 if (kbuf->mem != KEXEC_BUF_MEM_UNKNOWN) 605 return 0; 606 607 if (!IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) 608 ret = kexec_walk_resources(kbuf, locate_mem_hole_callback); 609 else 610 ret = kexec_walk_memblock(kbuf, locate_mem_hole_callback); 611 612 return ret == 1 ? 0 : -EADDRNOTAVAIL; 613 } 614 615 /** 616 * arch_kexec_locate_mem_hole - Find free memory to place the segments. 617 * @kbuf: Parameters for the memory search. 618 * 619 * On success, kbuf->mem will have the start address of the memory region found. 620 * 621 * Return: 0 on success, negative errno on error. 622 */ 623 int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) 624 { 625 return kexec_locate_mem_hole(kbuf); 626 } 627 628 /** 629 * kexec_add_buffer - place a buffer in a kexec segment 630 * @kbuf: Buffer contents and memory parameters. 631 * 632 * This function assumes that kexec_mutex is held. 633 * On successful return, @kbuf->mem will have the physical address of 634 * the buffer in memory. 635 * 636 * Return: 0 on success, negative errno on error. 637 */ 638 int kexec_add_buffer(struct kexec_buf *kbuf) 639 { 640 struct kexec_segment *ksegment; 641 int ret; 642 643 /* Currently adding segment this way is allowed only in file mode */ 644 if (!kbuf->image->file_mode) 645 return -EINVAL; 646 647 if (kbuf->image->nr_segments >= KEXEC_SEGMENT_MAX) 648 return -EINVAL; 649 650 /* 651 * Make sure we are not trying to add buffer after allocating 652 * control pages. All segments need to be placed first before 653 * any control pages are allocated. As control page allocation 654 * logic goes through list of segments to make sure there are 655 * no destination overlaps. 656 */ 657 if (!list_empty(&kbuf->image->control_pages)) { 658 WARN_ON(1); 659 return -EINVAL; 660 } 661 662 /* Ensure minimum alignment needed for segments. */ 663 kbuf->memsz = ALIGN(kbuf->memsz, PAGE_SIZE); 664 kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE); 665 666 /* Walk the RAM ranges and allocate a suitable range for the buffer */ 667 ret = arch_kexec_locate_mem_hole(kbuf); 668 if (ret) 669 return ret; 670 671 /* Found a suitable memory range */ 672 ksegment = &kbuf->image->segment[kbuf->image->nr_segments]; 673 ksegment->kbuf = kbuf->buffer; 674 ksegment->bufsz = kbuf->bufsz; 675 ksegment->mem = kbuf->mem; 676 ksegment->memsz = kbuf->memsz; 677 kbuf->image->nr_segments++; 678 return 0; 679 } 680 681 /* Calculate and store the digest of segments */ 682 static int kexec_calculate_store_digests(struct kimage *image) 683 { 684 struct crypto_shash *tfm; 685 struct shash_desc *desc; 686 int ret = 0, i, j, zero_buf_sz, sha_region_sz; 687 size_t desc_size, nullsz; 688 char *digest; 689 void *zero_buf; 690 struct kexec_sha_region *sha_regions; 691 struct purgatory_info *pi = &image->purgatory_info; 692 693 if (!IS_ENABLED(CONFIG_ARCH_HAS_KEXEC_PURGATORY)) 694 return 0; 695 696 zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT); 697 zero_buf_sz = PAGE_SIZE; 698 699 tfm = crypto_alloc_shash("sha256", 0, 0); 700 if (IS_ERR(tfm)) { 701 ret = PTR_ERR(tfm); 702 goto out; 703 } 704 705 desc_size = crypto_shash_descsize(tfm) + sizeof(*desc); 706 desc = kzalloc(desc_size, GFP_KERNEL); 707 if (!desc) { 708 ret = -ENOMEM; 709 goto out_free_tfm; 710 } 711 712 sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region); 713 sha_regions = vzalloc(sha_region_sz); 714 if (!sha_regions) { 715 ret = -ENOMEM; 716 goto out_free_desc; 717 } 718 719 desc->tfm = tfm; 720 721 ret = crypto_shash_init(desc); 722 if (ret < 0) 723 goto out_free_sha_regions; 724 725 digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL); 726 if (!digest) { 727 ret = -ENOMEM; 728 goto out_free_sha_regions; 729 } 730 731 for (j = i = 0; i < image->nr_segments; i++) { 732 struct kexec_segment *ksegment; 733 734 ksegment = &image->segment[i]; 735 /* 736 * Skip purgatory as it will be modified once we put digest 737 * info in purgatory. 738 */ 739 if (ksegment->kbuf == pi->purgatory_buf) 740 continue; 741 742 ret = crypto_shash_update(desc, ksegment->kbuf, 743 ksegment->bufsz); 744 if (ret) 745 break; 746 747 /* 748 * Assume rest of the buffer is filled with zero and 749 * update digest accordingly. 750 */ 751 nullsz = ksegment->memsz - ksegment->bufsz; 752 while (nullsz) { 753 unsigned long bytes = nullsz; 754 755 if (bytes > zero_buf_sz) 756 bytes = zero_buf_sz; 757 ret = crypto_shash_update(desc, zero_buf, bytes); 758 if (ret) 759 break; 760 nullsz -= bytes; 761 } 762 763 if (ret) 764 break; 765 766 sha_regions[j].start = ksegment->mem; 767 sha_regions[j].len = ksegment->memsz; 768 j++; 769 } 770 771 if (!ret) { 772 ret = crypto_shash_final(desc, digest); 773 if (ret) 774 goto out_free_digest; 775 ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions", 776 sha_regions, sha_region_sz, 0); 777 if (ret) 778 goto out_free_digest; 779 780 ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest", 781 digest, SHA256_DIGEST_SIZE, 0); 782 if (ret) 783 goto out_free_digest; 784 } 785 786 out_free_digest: 787 kfree(digest); 788 out_free_sha_regions: 789 vfree(sha_regions); 790 out_free_desc: 791 kfree(desc); 792 out_free_tfm: 793 kfree(tfm); 794 out: 795 return ret; 796 } 797 798 #ifdef CONFIG_ARCH_HAS_KEXEC_PURGATORY 799 /* 800 * kexec_purgatory_setup_kbuf - prepare buffer to load purgatory. 801 * @pi: Purgatory to be loaded. 802 * @kbuf: Buffer to setup. 803 * 804 * Allocates the memory needed for the buffer. Caller is responsible to free 805 * the memory after use. 806 * 807 * Return: 0 on success, negative errno on error. 808 */ 809 static int kexec_purgatory_setup_kbuf(struct purgatory_info *pi, 810 struct kexec_buf *kbuf) 811 { 812 const Elf_Shdr *sechdrs; 813 unsigned long bss_align; 814 unsigned long bss_sz; 815 unsigned long align; 816 int i, ret; 817 818 sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; 819 kbuf->buf_align = bss_align = 1; 820 kbuf->bufsz = bss_sz = 0; 821 822 for (i = 0; i < pi->ehdr->e_shnum; i++) { 823 if (!(sechdrs[i].sh_flags & SHF_ALLOC)) 824 continue; 825 826 align = sechdrs[i].sh_addralign; 827 if (sechdrs[i].sh_type != SHT_NOBITS) { 828 if (kbuf->buf_align < align) 829 kbuf->buf_align = align; 830 kbuf->bufsz = ALIGN(kbuf->bufsz, align); 831 kbuf->bufsz += sechdrs[i].sh_size; 832 } else { 833 if (bss_align < align) 834 bss_align = align; 835 bss_sz = ALIGN(bss_sz, align); 836 bss_sz += sechdrs[i].sh_size; 837 } 838 } 839 kbuf->bufsz = ALIGN(kbuf->bufsz, bss_align); 840 kbuf->memsz = kbuf->bufsz + bss_sz; 841 if (kbuf->buf_align < bss_align) 842 kbuf->buf_align = bss_align; 843 844 kbuf->buffer = vzalloc(kbuf->bufsz); 845 if (!kbuf->buffer) 846 return -ENOMEM; 847 pi->purgatory_buf = kbuf->buffer; 848 849 ret = kexec_add_buffer(kbuf); 850 if (ret) 851 goto out; 852 853 return 0; 854 out: 855 vfree(pi->purgatory_buf); 856 pi->purgatory_buf = NULL; 857 return ret; 858 } 859 860 /* 861 * kexec_purgatory_setup_sechdrs - prepares the pi->sechdrs buffer. 862 * @pi: Purgatory to be loaded. 863 * @kbuf: Buffer prepared to store purgatory. 864 * 865 * Allocates the memory needed for the buffer. Caller is responsible to free 866 * the memory after use. 867 * 868 * Return: 0 on success, negative errno on error. 869 */ 870 static int kexec_purgatory_setup_sechdrs(struct purgatory_info *pi, 871 struct kexec_buf *kbuf) 872 { 873 unsigned long bss_addr; 874 unsigned long offset; 875 Elf_Shdr *sechdrs; 876 int i; 877 878 /* 879 * The section headers in kexec_purgatory are read-only. In order to 880 * have them modifiable make a temporary copy. 881 */ 882 sechdrs = vzalloc(array_size(sizeof(Elf_Shdr), pi->ehdr->e_shnum)); 883 if (!sechdrs) 884 return -ENOMEM; 885 memcpy(sechdrs, (void *)pi->ehdr + pi->ehdr->e_shoff, 886 pi->ehdr->e_shnum * sizeof(Elf_Shdr)); 887 pi->sechdrs = sechdrs; 888 889 offset = 0; 890 bss_addr = kbuf->mem + kbuf->bufsz; 891 kbuf->image->start = pi->ehdr->e_entry; 892 893 for (i = 0; i < pi->ehdr->e_shnum; i++) { 894 unsigned long align; 895 void *src, *dst; 896 897 if (!(sechdrs[i].sh_flags & SHF_ALLOC)) 898 continue; 899 900 align = sechdrs[i].sh_addralign; 901 if (sechdrs[i].sh_type == SHT_NOBITS) { 902 bss_addr = ALIGN(bss_addr, align); 903 sechdrs[i].sh_addr = bss_addr; 904 bss_addr += sechdrs[i].sh_size; 905 continue; 906 } 907 908 offset = ALIGN(offset, align); 909 if (sechdrs[i].sh_flags & SHF_EXECINSTR && 910 pi->ehdr->e_entry >= sechdrs[i].sh_addr && 911 pi->ehdr->e_entry < (sechdrs[i].sh_addr 912 + sechdrs[i].sh_size)) { 913 kbuf->image->start -= sechdrs[i].sh_addr; 914 kbuf->image->start += kbuf->mem + offset; 915 } 916 917 src = (void *)pi->ehdr + sechdrs[i].sh_offset; 918 dst = pi->purgatory_buf + offset; 919 memcpy(dst, src, sechdrs[i].sh_size); 920 921 sechdrs[i].sh_addr = kbuf->mem + offset; 922 sechdrs[i].sh_offset = offset; 923 offset += sechdrs[i].sh_size; 924 } 925 926 return 0; 927 } 928 929 static int kexec_apply_relocations(struct kimage *image) 930 { 931 int i, ret; 932 struct purgatory_info *pi = &image->purgatory_info; 933 const Elf_Shdr *sechdrs; 934 935 sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; 936 937 for (i = 0; i < pi->ehdr->e_shnum; i++) { 938 const Elf_Shdr *relsec; 939 const Elf_Shdr *symtab; 940 Elf_Shdr *section; 941 942 relsec = sechdrs + i; 943 944 if (relsec->sh_type != SHT_RELA && 945 relsec->sh_type != SHT_REL) 946 continue; 947 948 /* 949 * For section of type SHT_RELA/SHT_REL, 950 * ->sh_link contains section header index of associated 951 * symbol table. And ->sh_info contains section header 952 * index of section to which relocations apply. 953 */ 954 if (relsec->sh_info >= pi->ehdr->e_shnum || 955 relsec->sh_link >= pi->ehdr->e_shnum) 956 return -ENOEXEC; 957 958 section = pi->sechdrs + relsec->sh_info; 959 symtab = sechdrs + relsec->sh_link; 960 961 if (!(section->sh_flags & SHF_ALLOC)) 962 continue; 963 964 /* 965 * symtab->sh_link contain section header index of associated 966 * string table. 967 */ 968 if (symtab->sh_link >= pi->ehdr->e_shnum) 969 /* Invalid section number? */ 970 continue; 971 972 /* 973 * Respective architecture needs to provide support for applying 974 * relocations of type SHT_RELA/SHT_REL. 975 */ 976 if (relsec->sh_type == SHT_RELA) 977 ret = arch_kexec_apply_relocations_add(pi, section, 978 relsec, symtab); 979 else if (relsec->sh_type == SHT_REL) 980 ret = arch_kexec_apply_relocations(pi, section, 981 relsec, symtab); 982 if (ret) 983 return ret; 984 } 985 986 return 0; 987 } 988 989 /* 990 * kexec_load_purgatory - Load and relocate the purgatory object. 991 * @image: Image to add the purgatory to. 992 * @kbuf: Memory parameters to use. 993 * 994 * Allocates the memory needed for image->purgatory_info.sechdrs and 995 * image->purgatory_info.purgatory_buf/kbuf->buffer. Caller is responsible 996 * to free the memory after use. 997 * 998 * Return: 0 on success, negative errno on error. 999 */ 1000 int kexec_load_purgatory(struct kimage *image, struct kexec_buf *kbuf) 1001 { 1002 struct purgatory_info *pi = &image->purgatory_info; 1003 int ret; 1004 1005 if (kexec_purgatory_size <= 0) 1006 return -EINVAL; 1007 1008 pi->ehdr = (const Elf_Ehdr *)kexec_purgatory; 1009 1010 ret = kexec_purgatory_setup_kbuf(pi, kbuf); 1011 if (ret) 1012 return ret; 1013 1014 ret = kexec_purgatory_setup_sechdrs(pi, kbuf); 1015 if (ret) 1016 goto out_free_kbuf; 1017 1018 ret = kexec_apply_relocations(image); 1019 if (ret) 1020 goto out; 1021 1022 return 0; 1023 out: 1024 vfree(pi->sechdrs); 1025 pi->sechdrs = NULL; 1026 out_free_kbuf: 1027 vfree(pi->purgatory_buf); 1028 pi->purgatory_buf = NULL; 1029 return ret; 1030 } 1031 1032 /* 1033 * kexec_purgatory_find_symbol - find a symbol in the purgatory 1034 * @pi: Purgatory to search in. 1035 * @name: Name of the symbol. 1036 * 1037 * Return: pointer to symbol in read-only symtab on success, NULL on error. 1038 */ 1039 static const Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi, 1040 const char *name) 1041 { 1042 const Elf_Shdr *sechdrs; 1043 const Elf_Ehdr *ehdr; 1044 const Elf_Sym *syms; 1045 const char *strtab; 1046 int i, k; 1047 1048 if (!pi->ehdr) 1049 return NULL; 1050 1051 ehdr = pi->ehdr; 1052 sechdrs = (void *)ehdr + ehdr->e_shoff; 1053 1054 for (i = 0; i < ehdr->e_shnum; i++) { 1055 if (sechdrs[i].sh_type != SHT_SYMTAB) 1056 continue; 1057 1058 if (sechdrs[i].sh_link >= ehdr->e_shnum) 1059 /* Invalid strtab section number */ 1060 continue; 1061 strtab = (void *)ehdr + sechdrs[sechdrs[i].sh_link].sh_offset; 1062 syms = (void *)ehdr + sechdrs[i].sh_offset; 1063 1064 /* Go through symbols for a match */ 1065 for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) { 1066 if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL) 1067 continue; 1068 1069 if (strcmp(strtab + syms[k].st_name, name) != 0) 1070 continue; 1071 1072 if (syms[k].st_shndx == SHN_UNDEF || 1073 syms[k].st_shndx >= ehdr->e_shnum) { 1074 pr_debug("Symbol: %s has bad section index %d.\n", 1075 name, syms[k].st_shndx); 1076 return NULL; 1077 } 1078 1079 /* Found the symbol we are looking for */ 1080 return &syms[k]; 1081 } 1082 } 1083 1084 return NULL; 1085 } 1086 1087 void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name) 1088 { 1089 struct purgatory_info *pi = &image->purgatory_info; 1090 const Elf_Sym *sym; 1091 Elf_Shdr *sechdr; 1092 1093 sym = kexec_purgatory_find_symbol(pi, name); 1094 if (!sym) 1095 return ERR_PTR(-EINVAL); 1096 1097 sechdr = &pi->sechdrs[sym->st_shndx]; 1098 1099 /* 1100 * Returns the address where symbol will finally be loaded after 1101 * kexec_load_segment() 1102 */ 1103 return (void *)(sechdr->sh_addr + sym->st_value); 1104 } 1105 1106 /* 1107 * Get or set value of a symbol. If "get_value" is true, symbol value is 1108 * returned in buf otherwise symbol value is set based on value in buf. 1109 */ 1110 int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, 1111 void *buf, unsigned int size, bool get_value) 1112 { 1113 struct purgatory_info *pi = &image->purgatory_info; 1114 const Elf_Sym *sym; 1115 Elf_Shdr *sec; 1116 char *sym_buf; 1117 1118 sym = kexec_purgatory_find_symbol(pi, name); 1119 if (!sym) 1120 return -EINVAL; 1121 1122 if (sym->st_size != size) { 1123 pr_err("symbol %s size mismatch: expected %lu actual %u\n", 1124 name, (unsigned long)sym->st_size, size); 1125 return -EINVAL; 1126 } 1127 1128 sec = pi->sechdrs + sym->st_shndx; 1129 1130 if (sec->sh_type == SHT_NOBITS) { 1131 pr_err("symbol %s is in a bss section. Cannot %s\n", name, 1132 get_value ? "get" : "set"); 1133 return -EINVAL; 1134 } 1135 1136 sym_buf = (char *)pi->purgatory_buf + sec->sh_offset + sym->st_value; 1137 1138 if (get_value) 1139 memcpy((void *)buf, sym_buf, size); 1140 else 1141 memcpy((void *)sym_buf, buf, size); 1142 1143 return 0; 1144 } 1145 #endif /* CONFIG_ARCH_HAS_KEXEC_PURGATORY */ 1146 1147 int crash_exclude_mem_range(struct crash_mem *mem, 1148 unsigned long long mstart, unsigned long long mend) 1149 { 1150 int i, j; 1151 unsigned long long start, end, p_start, p_end; 1152 struct crash_mem_range temp_range = {0, 0}; 1153 1154 for (i = 0; i < mem->nr_ranges; i++) { 1155 start = mem->ranges[i].start; 1156 end = mem->ranges[i].end; 1157 p_start = mstart; 1158 p_end = mend; 1159 1160 if (mstart > end || mend < start) 1161 continue; 1162 1163 /* Truncate any area outside of range */ 1164 if (mstart < start) 1165 p_start = start; 1166 if (mend > end) 1167 p_end = end; 1168 1169 /* Found completely overlapping range */ 1170 if (p_start == start && p_end == end) { 1171 mem->ranges[i].start = 0; 1172 mem->ranges[i].end = 0; 1173 if (i < mem->nr_ranges - 1) { 1174 /* Shift rest of the ranges to left */ 1175 for (j = i; j < mem->nr_ranges - 1; j++) { 1176 mem->ranges[j].start = 1177 mem->ranges[j+1].start; 1178 mem->ranges[j].end = 1179 mem->ranges[j+1].end; 1180 } 1181 1182 /* 1183 * Continue to check if there are another overlapping ranges 1184 * from the current position because of shifting the above 1185 * mem ranges. 1186 */ 1187 i--; 1188 mem->nr_ranges--; 1189 continue; 1190 } 1191 mem->nr_ranges--; 1192 return 0; 1193 } 1194 1195 if (p_start > start && p_end < end) { 1196 /* Split original range */ 1197 mem->ranges[i].end = p_start - 1; 1198 temp_range.start = p_end + 1; 1199 temp_range.end = end; 1200 } else if (p_start != start) 1201 mem->ranges[i].end = p_start - 1; 1202 else 1203 mem->ranges[i].start = p_end + 1; 1204 break; 1205 } 1206 1207 /* If a split happened, add the split to array */ 1208 if (!temp_range.end) 1209 return 0; 1210 1211 /* Split happened */ 1212 if (i == mem->max_nr_ranges - 1) 1213 return -ENOMEM; 1214 1215 /* Location where new range should go */ 1216 j = i + 1; 1217 if (j < mem->nr_ranges) { 1218 /* Move over all ranges one slot towards the end */ 1219 for (i = mem->nr_ranges - 1; i >= j; i--) 1220 mem->ranges[i + 1] = mem->ranges[i]; 1221 } 1222 1223 mem->ranges[j].start = temp_range.start; 1224 mem->ranges[j].end = temp_range.end; 1225 mem->nr_ranges++; 1226 return 0; 1227 } 1228 1229 int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, 1230 void **addr, unsigned long *sz) 1231 { 1232 Elf64_Ehdr *ehdr; 1233 Elf64_Phdr *phdr; 1234 unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; 1235 unsigned char *buf; 1236 unsigned int cpu, i; 1237 unsigned long long notes_addr; 1238 unsigned long mstart, mend; 1239 1240 /* extra phdr for vmcoreinfo ELF note */ 1241 nr_phdr = nr_cpus + 1; 1242 nr_phdr += mem->nr_ranges; 1243 1244 /* 1245 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping 1246 * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64). 1247 * I think this is required by tools like gdb. So same physical 1248 * memory will be mapped in two ELF headers. One will contain kernel 1249 * text virtual addresses and other will have __va(physical) addresses. 1250 */ 1251 1252 nr_phdr++; 1253 elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); 1254 elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); 1255 1256 buf = vzalloc(elf_sz); 1257 if (!buf) 1258 return -ENOMEM; 1259 1260 ehdr = (Elf64_Ehdr *)buf; 1261 phdr = (Elf64_Phdr *)(ehdr + 1); 1262 memcpy(ehdr->e_ident, ELFMAG, SELFMAG); 1263 ehdr->e_ident[EI_CLASS] = ELFCLASS64; 1264 ehdr->e_ident[EI_DATA] = ELFDATA2LSB; 1265 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 1266 ehdr->e_ident[EI_OSABI] = ELF_OSABI; 1267 memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); 1268 ehdr->e_type = ET_CORE; 1269 ehdr->e_machine = ELF_ARCH; 1270 ehdr->e_version = EV_CURRENT; 1271 ehdr->e_phoff = sizeof(Elf64_Ehdr); 1272 ehdr->e_ehsize = sizeof(Elf64_Ehdr); 1273 ehdr->e_phentsize = sizeof(Elf64_Phdr); 1274 1275 /* Prepare one phdr of type PT_NOTE for each present CPU */ 1276 for_each_present_cpu(cpu) { 1277 phdr->p_type = PT_NOTE; 1278 notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); 1279 phdr->p_offset = phdr->p_paddr = notes_addr; 1280 phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); 1281 (ehdr->e_phnum)++; 1282 phdr++; 1283 } 1284 1285 /* Prepare one PT_NOTE header for vmcoreinfo */ 1286 phdr->p_type = PT_NOTE; 1287 phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); 1288 phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE; 1289 (ehdr->e_phnum)++; 1290 phdr++; 1291 1292 /* Prepare PT_LOAD type program header for kernel text region */ 1293 if (need_kernel_map) { 1294 phdr->p_type = PT_LOAD; 1295 phdr->p_flags = PF_R|PF_W|PF_X; 1296 phdr->p_vaddr = (unsigned long) _text; 1297 phdr->p_filesz = phdr->p_memsz = _end - _text; 1298 phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); 1299 ehdr->e_phnum++; 1300 phdr++; 1301 } 1302 1303 /* Go through all the ranges in mem->ranges[] and prepare phdr */ 1304 for (i = 0; i < mem->nr_ranges; i++) { 1305 mstart = mem->ranges[i].start; 1306 mend = mem->ranges[i].end; 1307 1308 phdr->p_type = PT_LOAD; 1309 phdr->p_flags = PF_R|PF_W|PF_X; 1310 phdr->p_offset = mstart; 1311 1312 phdr->p_paddr = mstart; 1313 phdr->p_vaddr = (unsigned long) __va(mstart); 1314 phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; 1315 phdr->p_align = 0; 1316 ehdr->e_phnum++; 1317 pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", 1318 phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, 1319 ehdr->e_phnum, phdr->p_offset); 1320 phdr++; 1321 } 1322 1323 *addr = buf; 1324 *sz = elf_sz; 1325 return 0; 1326 } 1327