1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * crash.c - kernel crash support code. 4 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7 #include <linux/buildid.h> 8 #include <linux/init.h> 9 #include <linux/utsname.h> 10 #include <linux/vmalloc.h> 11 #include <linux/sizes.h> 12 #include <linux/kexec.h> 13 #include <linux/memory.h> 14 #include <linux/cpuhotplug.h> 15 #include <linux/memblock.h> 16 #include <linux/kexec.h> 17 #include <linux/kmemleak.h> 18 19 #include <asm/page.h> 20 #include <asm/sections.h> 21 22 #include <crypto/sha1.h> 23 24 #include "kallsyms_internal.h" 25 #include "kexec_internal.h" 26 27 /* Per cpu memory for storing cpu states in case of system crash. */ 28 note_buf_t __percpu *crash_notes; 29 30 /* vmcoreinfo stuff */ 31 unsigned char *vmcoreinfo_data; 32 size_t vmcoreinfo_size; 33 u32 *vmcoreinfo_note; 34 35 /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ 36 static unsigned char *vmcoreinfo_data_safecopy; 37 38 /* Location of the reserved area for the crash kernel */ 39 struct resource crashk_res = { 40 .name = "Crash kernel", 41 .start = 0, 42 .end = 0, 43 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 44 .desc = IORES_DESC_CRASH_KERNEL 45 }; 46 struct resource crashk_low_res = { 47 .name = "Crash kernel", 48 .start = 0, 49 .end = 0, 50 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 51 .desc = IORES_DESC_CRASH_KERNEL 52 }; 53 54 /* 55 * parsing the "crashkernel" commandline 56 * 57 * this code is intended to be called from architecture specific code 58 */ 59 60 61 /* 62 * This function parses command lines in the format 63 * 64 * crashkernel=ramsize-range:size[,...][@offset] 65 * 66 * The function returns 0 on success and -EINVAL on failure. 67 */ 68 static int __init parse_crashkernel_mem(char *cmdline, 69 unsigned long long system_ram, 70 unsigned long long *crash_size, 71 unsigned long long *crash_base) 72 { 73 char *cur = cmdline, *tmp; 74 unsigned long long total_mem = system_ram; 75 76 /* 77 * Firmware sometimes reserves some memory regions for its own use, 78 * so the system memory size is less than the actual physical memory 79 * size. Work around this by rounding up the total size to 128M, 80 * which is enough for most test cases. 81 */ 82 total_mem = roundup(total_mem, SZ_128M); 83 84 /* for each entry of the comma-separated list */ 85 do { 86 unsigned long long start, end = ULLONG_MAX, size; 87 88 /* get the start of the range */ 89 start = memparse(cur, &tmp); 90 if (cur == tmp) { 91 pr_warn("crashkernel: Memory value expected\n"); 92 return -EINVAL; 93 } 94 cur = tmp; 95 if (*cur != '-') { 96 pr_warn("crashkernel: '-' expected\n"); 97 return -EINVAL; 98 } 99 cur++; 100 101 /* if no ':' is here, than we read the end */ 102 if (*cur != ':') { 103 end = memparse(cur, &tmp); 104 if (cur == tmp) { 105 pr_warn("crashkernel: Memory value expected\n"); 106 return -EINVAL; 107 } 108 cur = tmp; 109 if (end <= start) { 110 pr_warn("crashkernel: end <= start\n"); 111 return -EINVAL; 112 } 113 } 114 115 if (*cur != ':') { 116 pr_warn("crashkernel: ':' expected\n"); 117 return -EINVAL; 118 } 119 cur++; 120 121 size = memparse(cur, &tmp); 122 if (cur == tmp) { 123 pr_warn("Memory value expected\n"); 124 return -EINVAL; 125 } 126 cur = tmp; 127 if (size >= total_mem) { 128 pr_warn("crashkernel: invalid size\n"); 129 return -EINVAL; 130 } 131 132 /* match ? */ 133 if (total_mem >= start && total_mem < end) { 134 *crash_size = size; 135 break; 136 } 137 } while (*cur++ == ','); 138 139 if (*crash_size > 0) { 140 while (*cur && *cur != ' ' && *cur != '@') 141 cur++; 142 if (*cur == '@') { 143 cur++; 144 *crash_base = memparse(cur, &tmp); 145 if (cur == tmp) { 146 pr_warn("Memory value expected after '@'\n"); 147 return -EINVAL; 148 } 149 } 150 } else 151 pr_info("crashkernel size resulted in zero bytes\n"); 152 153 return 0; 154 } 155 156 /* 157 * That function parses "simple" (old) crashkernel command lines like 158 * 159 * crashkernel=size[@offset] 160 * 161 * It returns 0 on success and -EINVAL on failure. 162 */ 163 static int __init parse_crashkernel_simple(char *cmdline, 164 unsigned long long *crash_size, 165 unsigned long long *crash_base) 166 { 167 char *cur = cmdline; 168 169 *crash_size = memparse(cmdline, &cur); 170 if (cmdline == cur) { 171 pr_warn("crashkernel: memory value expected\n"); 172 return -EINVAL; 173 } 174 175 if (*cur == '@') 176 *crash_base = memparse(cur+1, &cur); 177 else if (*cur != ' ' && *cur != '\0') { 178 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 179 return -EINVAL; 180 } 181 182 return 0; 183 } 184 185 #define SUFFIX_HIGH 0 186 #define SUFFIX_LOW 1 187 #define SUFFIX_NULL 2 188 static __initdata char *suffix_tbl[] = { 189 [SUFFIX_HIGH] = ",high", 190 [SUFFIX_LOW] = ",low", 191 [SUFFIX_NULL] = NULL, 192 }; 193 194 /* 195 * That function parses "suffix" crashkernel command lines like 196 * 197 * crashkernel=size,[high|low] 198 * 199 * It returns 0 on success and -EINVAL on failure. 200 */ 201 static int __init parse_crashkernel_suffix(char *cmdline, 202 unsigned long long *crash_size, 203 const char *suffix) 204 { 205 char *cur = cmdline; 206 207 *crash_size = memparse(cmdline, &cur); 208 if (cmdline == cur) { 209 pr_warn("crashkernel: memory value expected\n"); 210 return -EINVAL; 211 } 212 213 /* check with suffix */ 214 if (strncmp(cur, suffix, strlen(suffix))) { 215 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 216 return -EINVAL; 217 } 218 cur += strlen(suffix); 219 if (*cur != ' ' && *cur != '\0') { 220 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 221 return -EINVAL; 222 } 223 224 return 0; 225 } 226 227 static __init char *get_last_crashkernel(char *cmdline, 228 const char *name, 229 const char *suffix) 230 { 231 char *p = cmdline, *ck_cmdline = NULL; 232 233 /* find crashkernel and use the last one if there are more */ 234 p = strstr(p, name); 235 while (p) { 236 char *end_p = strchr(p, ' '); 237 char *q; 238 239 if (!end_p) 240 end_p = p + strlen(p); 241 242 if (!suffix) { 243 int i; 244 245 /* skip the one with any known suffix */ 246 for (i = 0; suffix_tbl[i]; i++) { 247 q = end_p - strlen(suffix_tbl[i]); 248 if (!strncmp(q, suffix_tbl[i], 249 strlen(suffix_tbl[i]))) 250 goto next; 251 } 252 ck_cmdline = p; 253 } else { 254 q = end_p - strlen(suffix); 255 if (!strncmp(q, suffix, strlen(suffix))) 256 ck_cmdline = p; 257 } 258 next: 259 p = strstr(p+1, name); 260 } 261 262 return ck_cmdline; 263 } 264 265 static int __init __parse_crashkernel(char *cmdline, 266 unsigned long long system_ram, 267 unsigned long long *crash_size, 268 unsigned long long *crash_base, 269 const char *suffix) 270 { 271 char *first_colon, *first_space; 272 char *ck_cmdline; 273 char *name = "crashkernel="; 274 275 BUG_ON(!crash_size || !crash_base); 276 *crash_size = 0; 277 *crash_base = 0; 278 279 ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 280 if (!ck_cmdline) 281 return -ENOENT; 282 283 ck_cmdline += strlen(name); 284 285 if (suffix) 286 return parse_crashkernel_suffix(ck_cmdline, crash_size, 287 suffix); 288 /* 289 * if the commandline contains a ':', then that's the extended 290 * syntax -- if not, it must be the classic syntax 291 */ 292 first_colon = strchr(ck_cmdline, ':'); 293 first_space = strchr(ck_cmdline, ' '); 294 if (first_colon && (!first_space || first_colon < first_space)) 295 return parse_crashkernel_mem(ck_cmdline, system_ram, 296 crash_size, crash_base); 297 298 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 299 } 300 301 /* 302 * That function is the entry point for command line parsing and should be 303 * called from the arch-specific code. 304 * 305 * If crashkernel=,high|low is supported on architecture, non-NULL values 306 * should be passed to parameters 'low_size' and 'high'. 307 */ 308 int __init parse_crashkernel(char *cmdline, 309 unsigned long long system_ram, 310 unsigned long long *crash_size, 311 unsigned long long *crash_base, 312 unsigned long long *low_size, 313 bool *high) 314 { 315 int ret; 316 317 /* crashkernel=X[@offset] */ 318 ret = __parse_crashkernel(cmdline, system_ram, crash_size, 319 crash_base, NULL); 320 #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION 321 /* 322 * If non-NULL 'high' passed in and no normal crashkernel 323 * setting detected, try parsing crashkernel=,high|low. 324 */ 325 if (high && ret == -ENOENT) { 326 ret = __parse_crashkernel(cmdline, 0, crash_size, 327 crash_base, suffix_tbl[SUFFIX_HIGH]); 328 if (ret || !*crash_size) 329 return -EINVAL; 330 331 /* 332 * crashkernel=Y,low can be specified or not, but invalid value 333 * is not allowed. 334 */ 335 ret = __parse_crashkernel(cmdline, 0, low_size, 336 crash_base, suffix_tbl[SUFFIX_LOW]); 337 if (ret == -ENOENT) { 338 *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; 339 ret = 0; 340 } else if (ret) { 341 return ret; 342 } 343 344 *high = true; 345 } 346 #endif 347 if (!*crash_size) 348 ret = -EINVAL; 349 350 return ret; 351 } 352 353 /* 354 * Add a dummy early_param handler to mark crashkernel= as a known command line 355 * parameter and suppress incorrect warnings in init/main.c. 356 */ 357 static int __init parse_crashkernel_dummy(char *arg) 358 { 359 return 0; 360 } 361 early_param("crashkernel", parse_crashkernel_dummy); 362 363 #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION 364 static int __init reserve_crashkernel_low(unsigned long long low_size) 365 { 366 #ifdef CONFIG_64BIT 367 unsigned long long low_base; 368 369 low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); 370 if (!low_base) { 371 pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); 372 return -ENOMEM; 373 } 374 375 pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", 376 low_base, low_base + low_size, low_size >> 20); 377 378 crashk_low_res.start = low_base; 379 crashk_low_res.end = low_base + low_size - 1; 380 insert_resource(&iomem_resource, &crashk_low_res); 381 #endif 382 return 0; 383 } 384 385 void __init reserve_crashkernel_generic(char *cmdline, 386 unsigned long long crash_size, 387 unsigned long long crash_base, 388 unsigned long long crash_low_size, 389 bool high) 390 { 391 unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0; 392 bool fixed_base = false; 393 394 /* User specifies base address explicitly. */ 395 if (crash_base) { 396 fixed_base = true; 397 search_base = crash_base; 398 search_end = crash_base + crash_size; 399 } else if (high) { 400 search_base = CRASH_ADDR_LOW_MAX; 401 search_end = CRASH_ADDR_HIGH_MAX; 402 } 403 404 retry: 405 crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, 406 search_base, search_end); 407 if (!crash_base) { 408 /* 409 * For crashkernel=size[KMG]@offset[KMG], print out failure 410 * message if can't reserve the specified region. 411 */ 412 if (fixed_base) { 413 pr_warn("crashkernel reservation failed - memory is in use.\n"); 414 return; 415 } 416 417 /* 418 * For crashkernel=size[KMG], if the first attempt was for 419 * low memory, fall back to high memory, the minimum required 420 * low memory will be reserved later. 421 */ 422 if (!high && search_end == CRASH_ADDR_LOW_MAX) { 423 search_end = CRASH_ADDR_HIGH_MAX; 424 search_base = CRASH_ADDR_LOW_MAX; 425 crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; 426 goto retry; 427 } 428 429 /* 430 * For crashkernel=size[KMG],high, if the first attempt was 431 * for high memory, fall back to low memory. 432 */ 433 if (high && search_end == CRASH_ADDR_HIGH_MAX) { 434 search_end = CRASH_ADDR_LOW_MAX; 435 search_base = 0; 436 goto retry; 437 } 438 pr_warn("cannot allocate crashkernel (size:0x%llx)\n", 439 crash_size); 440 return; 441 } 442 443 if ((crash_base >= CRASH_ADDR_LOW_MAX) && 444 crash_low_size && reserve_crashkernel_low(crash_low_size)) { 445 memblock_phys_free(crash_base, crash_size); 446 return; 447 } 448 449 pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", 450 crash_base, crash_base + crash_size, crash_size >> 20); 451 452 /* 453 * The crashkernel memory will be removed from the kernel linear 454 * map. Inform kmemleak so that it won't try to access it. 455 */ 456 kmemleak_ignore_phys(crash_base); 457 if (crashk_low_res.end) 458 kmemleak_ignore_phys(crashk_low_res.start); 459 460 crashk_res.start = crash_base; 461 crashk_res.end = crash_base + crash_size - 1; 462 insert_resource(&iomem_resource, &crashk_res); 463 } 464 #endif 465 466 int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, 467 void **addr, unsigned long *sz) 468 { 469 Elf64_Ehdr *ehdr; 470 Elf64_Phdr *phdr; 471 unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; 472 unsigned char *buf; 473 unsigned int cpu, i; 474 unsigned long long notes_addr; 475 unsigned long mstart, mend; 476 477 /* extra phdr for vmcoreinfo ELF note */ 478 nr_phdr = nr_cpus + 1; 479 nr_phdr += mem->nr_ranges; 480 481 /* 482 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping 483 * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64). 484 * I think this is required by tools like gdb. So same physical 485 * memory will be mapped in two ELF headers. One will contain kernel 486 * text virtual addresses and other will have __va(physical) addresses. 487 */ 488 489 nr_phdr++; 490 elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); 491 elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); 492 493 buf = vzalloc(elf_sz); 494 if (!buf) 495 return -ENOMEM; 496 497 ehdr = (Elf64_Ehdr *)buf; 498 phdr = (Elf64_Phdr *)(ehdr + 1); 499 memcpy(ehdr->e_ident, ELFMAG, SELFMAG); 500 ehdr->e_ident[EI_CLASS] = ELFCLASS64; 501 ehdr->e_ident[EI_DATA] = ELFDATA2LSB; 502 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 503 ehdr->e_ident[EI_OSABI] = ELF_OSABI; 504 memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); 505 ehdr->e_type = ET_CORE; 506 ehdr->e_machine = ELF_ARCH; 507 ehdr->e_version = EV_CURRENT; 508 ehdr->e_phoff = sizeof(Elf64_Ehdr); 509 ehdr->e_ehsize = sizeof(Elf64_Ehdr); 510 ehdr->e_phentsize = sizeof(Elf64_Phdr); 511 512 /* Prepare one phdr of type PT_NOTE for each possible CPU */ 513 for_each_possible_cpu(cpu) { 514 phdr->p_type = PT_NOTE; 515 notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); 516 phdr->p_offset = phdr->p_paddr = notes_addr; 517 phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); 518 (ehdr->e_phnum)++; 519 phdr++; 520 } 521 522 /* Prepare one PT_NOTE header for vmcoreinfo */ 523 phdr->p_type = PT_NOTE; 524 phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); 525 phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE; 526 (ehdr->e_phnum)++; 527 phdr++; 528 529 /* Prepare PT_LOAD type program header for kernel text region */ 530 if (need_kernel_map) { 531 phdr->p_type = PT_LOAD; 532 phdr->p_flags = PF_R|PF_W|PF_X; 533 phdr->p_vaddr = (unsigned long) _text; 534 phdr->p_filesz = phdr->p_memsz = _end - _text; 535 phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); 536 ehdr->e_phnum++; 537 phdr++; 538 } 539 540 /* Go through all the ranges in mem->ranges[] and prepare phdr */ 541 for (i = 0; i < mem->nr_ranges; i++) { 542 mstart = mem->ranges[i].start; 543 mend = mem->ranges[i].end; 544 545 phdr->p_type = PT_LOAD; 546 phdr->p_flags = PF_R|PF_W|PF_X; 547 phdr->p_offset = mstart; 548 549 phdr->p_paddr = mstart; 550 phdr->p_vaddr = (unsigned long) __va(mstart); 551 phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; 552 phdr->p_align = 0; 553 ehdr->e_phnum++; 554 pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", 555 phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, 556 ehdr->e_phnum, phdr->p_offset); 557 phdr++; 558 } 559 560 *addr = buf; 561 *sz = elf_sz; 562 return 0; 563 } 564 565 int crash_exclude_mem_range(struct crash_mem *mem, 566 unsigned long long mstart, unsigned long long mend) 567 { 568 int i, j; 569 unsigned long long start, end, p_start, p_end; 570 struct range temp_range = {0, 0}; 571 572 for (i = 0; i < mem->nr_ranges; i++) { 573 start = mem->ranges[i].start; 574 end = mem->ranges[i].end; 575 p_start = mstart; 576 p_end = mend; 577 578 if (mstart > end || mend < start) 579 continue; 580 581 /* Truncate any area outside of range */ 582 if (mstart < start) 583 p_start = start; 584 if (mend > end) 585 p_end = end; 586 587 /* Found completely overlapping range */ 588 if (p_start == start && p_end == end) { 589 mem->ranges[i].start = 0; 590 mem->ranges[i].end = 0; 591 if (i < mem->nr_ranges - 1) { 592 /* Shift rest of the ranges to left */ 593 for (j = i; j < mem->nr_ranges - 1; j++) { 594 mem->ranges[j].start = 595 mem->ranges[j+1].start; 596 mem->ranges[j].end = 597 mem->ranges[j+1].end; 598 } 599 600 /* 601 * Continue to check if there are another overlapping ranges 602 * from the current position because of shifting the above 603 * mem ranges. 604 */ 605 i--; 606 mem->nr_ranges--; 607 continue; 608 } 609 mem->nr_ranges--; 610 return 0; 611 } 612 613 if (p_start > start && p_end < end) { 614 /* Split original range */ 615 mem->ranges[i].end = p_start - 1; 616 temp_range.start = p_end + 1; 617 temp_range.end = end; 618 } else if (p_start != start) 619 mem->ranges[i].end = p_start - 1; 620 else 621 mem->ranges[i].start = p_end + 1; 622 break; 623 } 624 625 /* If a split happened, add the split to array */ 626 if (!temp_range.end) 627 return 0; 628 629 /* Split happened */ 630 if (i == mem->max_nr_ranges - 1) 631 return -ENOMEM; 632 633 /* Location where new range should go */ 634 j = i + 1; 635 if (j < mem->nr_ranges) { 636 /* Move over all ranges one slot towards the end */ 637 for (i = mem->nr_ranges - 1; i >= j; i--) 638 mem->ranges[i + 1] = mem->ranges[i]; 639 } 640 641 mem->ranges[j].start = temp_range.start; 642 mem->ranges[j].end = temp_range.end; 643 mem->nr_ranges++; 644 return 0; 645 } 646 647 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, 648 void *data, size_t data_len) 649 { 650 struct elf_note *note = (struct elf_note *)buf; 651 652 note->n_namesz = strlen(name) + 1; 653 note->n_descsz = data_len; 654 note->n_type = type; 655 buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word)); 656 memcpy(buf, name, note->n_namesz); 657 buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word)); 658 memcpy(buf, data, data_len); 659 buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word)); 660 661 return buf; 662 } 663 664 void final_note(Elf_Word *buf) 665 { 666 memset(buf, 0, sizeof(struct elf_note)); 667 } 668 669 static void update_vmcoreinfo_note(void) 670 { 671 u32 *buf = vmcoreinfo_note; 672 673 if (!vmcoreinfo_size) 674 return; 675 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 676 vmcoreinfo_size); 677 final_note(buf); 678 } 679 680 void crash_update_vmcoreinfo_safecopy(void *ptr) 681 { 682 if (ptr) 683 memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size); 684 685 vmcoreinfo_data_safecopy = ptr; 686 } 687 688 void crash_save_vmcoreinfo(void) 689 { 690 if (!vmcoreinfo_note) 691 return; 692 693 /* Use the safe copy to generate vmcoreinfo note if have */ 694 if (vmcoreinfo_data_safecopy) 695 vmcoreinfo_data = vmcoreinfo_data_safecopy; 696 697 vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds()); 698 update_vmcoreinfo_note(); 699 } 700 701 void vmcoreinfo_append_str(const char *fmt, ...) 702 { 703 va_list args; 704 char buf[0x50]; 705 size_t r; 706 707 va_start(args, fmt); 708 r = vscnprintf(buf, sizeof(buf), fmt, args); 709 va_end(args); 710 711 r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size); 712 713 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 714 715 vmcoreinfo_size += r; 716 717 WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES, 718 "vmcoreinfo data exceeds allocated size, truncating"); 719 } 720 721 /* 722 * provide an empty default implementation here -- architecture 723 * code may override this 724 */ 725 void __weak arch_crash_save_vmcoreinfo(void) 726 {} 727 728 phys_addr_t __weak paddr_vmcoreinfo_note(void) 729 { 730 return __pa(vmcoreinfo_note); 731 } 732 EXPORT_SYMBOL(paddr_vmcoreinfo_note); 733 734 static int __init crash_save_vmcoreinfo_init(void) 735 { 736 vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); 737 if (!vmcoreinfo_data) { 738 pr_warn("Memory allocation for vmcoreinfo_data failed\n"); 739 return -ENOMEM; 740 } 741 742 vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE, 743 GFP_KERNEL | __GFP_ZERO); 744 if (!vmcoreinfo_note) { 745 free_page((unsigned long)vmcoreinfo_data); 746 vmcoreinfo_data = NULL; 747 pr_warn("Memory allocation for vmcoreinfo_note failed\n"); 748 return -ENOMEM; 749 } 750 751 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 752 VMCOREINFO_BUILD_ID(); 753 VMCOREINFO_PAGESIZE(PAGE_SIZE); 754 755 VMCOREINFO_SYMBOL(init_uts_ns); 756 VMCOREINFO_OFFSET(uts_namespace, name); 757 VMCOREINFO_SYMBOL(node_online_map); 758 #ifdef CONFIG_MMU 759 VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); 760 #endif 761 VMCOREINFO_SYMBOL(_stext); 762 VMCOREINFO_SYMBOL(vmap_area_list); 763 764 #ifndef CONFIG_NUMA 765 VMCOREINFO_SYMBOL(mem_map); 766 VMCOREINFO_SYMBOL(contig_page_data); 767 #endif 768 #ifdef CONFIG_SPARSEMEM 769 VMCOREINFO_SYMBOL_ARRAY(mem_section); 770 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 771 VMCOREINFO_STRUCT_SIZE(mem_section); 772 VMCOREINFO_OFFSET(mem_section, section_mem_map); 773 VMCOREINFO_NUMBER(SECTION_SIZE_BITS); 774 VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); 775 #endif 776 VMCOREINFO_STRUCT_SIZE(page); 777 VMCOREINFO_STRUCT_SIZE(pglist_data); 778 VMCOREINFO_STRUCT_SIZE(zone); 779 VMCOREINFO_STRUCT_SIZE(free_area); 780 VMCOREINFO_STRUCT_SIZE(list_head); 781 VMCOREINFO_SIZE(nodemask_t); 782 VMCOREINFO_OFFSET(page, flags); 783 VMCOREINFO_OFFSET(page, _refcount); 784 VMCOREINFO_OFFSET(page, mapping); 785 VMCOREINFO_OFFSET(page, lru); 786 VMCOREINFO_OFFSET(page, _mapcount); 787 VMCOREINFO_OFFSET(page, private); 788 VMCOREINFO_OFFSET(page, compound_head); 789 VMCOREINFO_OFFSET(pglist_data, node_zones); 790 VMCOREINFO_OFFSET(pglist_data, nr_zones); 791 #ifdef CONFIG_FLATMEM 792 VMCOREINFO_OFFSET(pglist_data, node_mem_map); 793 #endif 794 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); 795 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); 796 VMCOREINFO_OFFSET(pglist_data, node_id); 797 VMCOREINFO_OFFSET(zone, free_area); 798 VMCOREINFO_OFFSET(zone, vm_stat); 799 VMCOREINFO_OFFSET(zone, spanned_pages); 800 VMCOREINFO_OFFSET(free_area, free_list); 801 VMCOREINFO_OFFSET(list_head, next); 802 VMCOREINFO_OFFSET(list_head, prev); 803 VMCOREINFO_OFFSET(vmap_area, va_start); 804 VMCOREINFO_OFFSET(vmap_area, list); 805 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER + 1); 806 log_buf_vmcoreinfo_setup(); 807 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 808 VMCOREINFO_NUMBER(NR_FREE_PAGES); 809 VMCOREINFO_NUMBER(PG_lru); 810 VMCOREINFO_NUMBER(PG_private); 811 VMCOREINFO_NUMBER(PG_swapcache); 812 VMCOREINFO_NUMBER(PG_swapbacked); 813 VMCOREINFO_NUMBER(PG_slab); 814 #ifdef CONFIG_MEMORY_FAILURE 815 VMCOREINFO_NUMBER(PG_hwpoison); 816 #endif 817 VMCOREINFO_NUMBER(PG_head_mask); 818 #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) 819 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 820 #ifdef CONFIG_HUGETLB_PAGE 821 VMCOREINFO_NUMBER(PG_hugetlb); 822 #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) 823 VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); 824 #endif 825 826 #ifdef CONFIG_KALLSYMS 827 VMCOREINFO_SYMBOL(kallsyms_names); 828 VMCOREINFO_SYMBOL(kallsyms_num_syms); 829 VMCOREINFO_SYMBOL(kallsyms_token_table); 830 VMCOREINFO_SYMBOL(kallsyms_token_index); 831 #ifdef CONFIG_KALLSYMS_BASE_RELATIVE 832 VMCOREINFO_SYMBOL(kallsyms_offsets); 833 VMCOREINFO_SYMBOL(kallsyms_relative_base); 834 #else 835 VMCOREINFO_SYMBOL(kallsyms_addresses); 836 #endif /* CONFIG_KALLSYMS_BASE_RELATIVE */ 837 #endif /* CONFIG_KALLSYMS */ 838 839 arch_crash_save_vmcoreinfo(); 840 update_vmcoreinfo_note(); 841 842 return 0; 843 } 844 845 subsys_initcall(crash_save_vmcoreinfo_init); 846 847 static int __init crash_notes_memory_init(void) 848 { 849 /* Allocate memory for saving cpu registers. */ 850 size_t size, align; 851 852 /* 853 * crash_notes could be allocated across 2 vmalloc pages when percpu 854 * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc 855 * pages are also on 2 continuous physical pages. In this case the 856 * 2nd part of crash_notes in 2nd page could be lost since only the 857 * starting address and size of crash_notes are exported through sysfs. 858 * Here round up the size of crash_notes to the nearest power of two 859 * and pass it to __alloc_percpu as align value. This can make sure 860 * crash_notes is allocated inside one physical page. 861 */ 862 size = sizeof(note_buf_t); 863 align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); 864 865 /* 866 * Break compile if size is bigger than PAGE_SIZE since crash_notes 867 * definitely will be in 2 pages with that. 868 */ 869 BUILD_BUG_ON(size > PAGE_SIZE); 870 871 crash_notes = __alloc_percpu(size, align); 872 if (!crash_notes) { 873 pr_warn("Memory allocation for saving cpu register states failed\n"); 874 return -ENOMEM; 875 } 876 return 0; 877 } 878 subsys_initcall(crash_notes_memory_init); 879 880 #ifdef CONFIG_CRASH_HOTPLUG 881 #undef pr_fmt 882 #define pr_fmt(fmt) "crash hp: " fmt 883 884 /* 885 * Different than kexec/kdump loading/unloading/jumping/shrinking which 886 * usually rarely happen, there will be many crash hotplug events notified 887 * during one short period, e.g one memory board is hot added and memory 888 * regions are online. So mutex lock __crash_hotplug_lock is used to 889 * serialize the crash hotplug handling specifically. 890 */ 891 DEFINE_MUTEX(__crash_hotplug_lock); 892 #define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock) 893 #define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock) 894 895 /* 896 * This routine utilized when the crash_hotplug sysfs node is read. 897 * It reflects the kernel's ability/permission to update the crash 898 * elfcorehdr directly. 899 */ 900 int crash_check_update_elfcorehdr(void) 901 { 902 int rc = 0; 903 904 crash_hotplug_lock(); 905 /* Obtain lock while reading crash information */ 906 if (!kexec_trylock()) { 907 pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); 908 crash_hotplug_unlock(); 909 return 0; 910 } 911 if (kexec_crash_image) { 912 if (kexec_crash_image->file_mode) 913 rc = 1; 914 else 915 rc = kexec_crash_image->update_elfcorehdr; 916 } 917 /* Release lock now that update complete */ 918 kexec_unlock(); 919 crash_hotplug_unlock(); 920 921 return rc; 922 } 923 924 /* 925 * To accurately reflect hot un/plug changes of cpu and memory resources 926 * (including onling and offlining of those resources), the elfcorehdr 927 * (which is passed to the crash kernel via the elfcorehdr= parameter) 928 * must be updated with the new list of CPUs and memories. 929 * 930 * In order to make changes to elfcorehdr, two conditions are needed: 931 * First, the segment containing the elfcorehdr must be large enough 932 * to permit a growing number of resources; the elfcorehdr memory size 933 * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES. 934 * Second, purgatory must explicitly exclude the elfcorehdr from the 935 * list of segments it checks (since the elfcorehdr changes and thus 936 * would require an update to purgatory itself to update the digest). 937 */ 938 static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu) 939 { 940 struct kimage *image; 941 942 crash_hotplug_lock(); 943 /* Obtain lock while changing crash information */ 944 if (!kexec_trylock()) { 945 pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); 946 crash_hotplug_unlock(); 947 return; 948 } 949 950 /* Check kdump is not loaded */ 951 if (!kexec_crash_image) 952 goto out; 953 954 image = kexec_crash_image; 955 956 /* Check that updating elfcorehdr is permitted */ 957 if (!(image->file_mode || image->update_elfcorehdr)) 958 goto out; 959 960 if (hp_action == KEXEC_CRASH_HP_ADD_CPU || 961 hp_action == KEXEC_CRASH_HP_REMOVE_CPU) 962 pr_debug("hp_action %u, cpu %u\n", hp_action, cpu); 963 else 964 pr_debug("hp_action %u\n", hp_action); 965 966 /* 967 * The elfcorehdr_index is set to -1 when the struct kimage 968 * is allocated. Find the segment containing the elfcorehdr, 969 * if not already found. 970 */ 971 if (image->elfcorehdr_index < 0) { 972 unsigned long mem; 973 unsigned char *ptr; 974 unsigned int n; 975 976 for (n = 0; n < image->nr_segments; n++) { 977 mem = image->segment[n].mem; 978 ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT)); 979 if (ptr) { 980 /* The segment containing elfcorehdr */ 981 if (memcmp(ptr, ELFMAG, SELFMAG) == 0) 982 image->elfcorehdr_index = (int)n; 983 kunmap_local(ptr); 984 } 985 } 986 } 987 988 if (image->elfcorehdr_index < 0) { 989 pr_err("unable to locate elfcorehdr segment"); 990 goto out; 991 } 992 993 /* Needed in order for the segments to be updated */ 994 arch_kexec_unprotect_crashkres(); 995 996 /* Differentiate between normal load and hotplug update */ 997 image->hp_action = hp_action; 998 999 /* Now invoke arch-specific update handler */ 1000 arch_crash_handle_hotplug_event(image); 1001 1002 /* No longer handling a hotplug event */ 1003 image->hp_action = KEXEC_CRASH_HP_NONE; 1004 image->elfcorehdr_updated = true; 1005 1006 /* Change back to read-only */ 1007 arch_kexec_protect_crashkres(); 1008 1009 /* Errors in the callback is not a reason to rollback state */ 1010 out: 1011 /* Release lock now that update complete */ 1012 kexec_unlock(); 1013 crash_hotplug_unlock(); 1014 } 1015 1016 static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v) 1017 { 1018 switch (val) { 1019 case MEM_ONLINE: 1020 crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, 1021 KEXEC_CRASH_HP_INVALID_CPU); 1022 break; 1023 1024 case MEM_OFFLINE: 1025 crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, 1026 KEXEC_CRASH_HP_INVALID_CPU); 1027 break; 1028 } 1029 return NOTIFY_OK; 1030 } 1031 1032 static struct notifier_block crash_memhp_nb = { 1033 .notifier_call = crash_memhp_notifier, 1034 .priority = 0 1035 }; 1036 1037 static int crash_cpuhp_online(unsigned int cpu) 1038 { 1039 crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu); 1040 return 0; 1041 } 1042 1043 static int crash_cpuhp_offline(unsigned int cpu) 1044 { 1045 crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu); 1046 return 0; 1047 } 1048 1049 static int __init crash_hotplug_init(void) 1050 { 1051 int result = 0; 1052 1053 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) 1054 register_memory_notifier(&crash_memhp_nb); 1055 1056 if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { 1057 result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN, 1058 "crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline); 1059 } 1060 1061 return result; 1062 } 1063 1064 subsys_initcall(crash_hotplug_init); 1065 #endif 1066