1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * crash.c - kernel crash support code. 4 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7 #include <linux/buildid.h> 8 #include <linux/init.h> 9 #include <linux/utsname.h> 10 #include <linux/vmalloc.h> 11 #include <linux/sizes.h> 12 #include <linux/kexec.h> 13 #include <linux/memory.h> 14 #include <linux/cpuhotplug.h> 15 #include <linux/memblock.h> 16 #include <linux/kmemleak.h> 17 18 #include <asm/page.h> 19 #include <asm/sections.h> 20 21 #include <crypto/sha1.h> 22 23 #include "kallsyms_internal.h" 24 #include "kexec_internal.h" 25 26 /* Per cpu memory for storing cpu states in case of system crash. */ 27 note_buf_t __percpu *crash_notes; 28 29 /* vmcoreinfo stuff */ 30 unsigned char *vmcoreinfo_data; 31 size_t vmcoreinfo_size; 32 u32 *vmcoreinfo_note; 33 34 /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ 35 static unsigned char *vmcoreinfo_data_safecopy; 36 37 /* Location of the reserved area for the crash kernel */ 38 struct resource crashk_res = { 39 .name = "Crash kernel", 40 .start = 0, 41 .end = 0, 42 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 43 .desc = IORES_DESC_CRASH_KERNEL 44 }; 45 struct resource crashk_low_res = { 46 .name = "Crash kernel", 47 .start = 0, 48 .end = 0, 49 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 50 .desc = IORES_DESC_CRASH_KERNEL 51 }; 52 53 /* 54 * parsing the "crashkernel" commandline 55 * 56 * this code is intended to be called from architecture specific code 57 */ 58 59 60 /* 61 * This function parses command lines in the format 62 * 63 * crashkernel=ramsize-range:size[,...][@offset] 64 * 65 * The function returns 0 on success and -EINVAL on failure. 66 */ 67 static int __init parse_crashkernel_mem(char *cmdline, 68 unsigned long long system_ram, 69 unsigned long long *crash_size, 70 unsigned long long *crash_base) 71 { 72 char *cur = cmdline, *tmp; 73 unsigned long long total_mem = system_ram; 74 75 /* 76 * Firmware sometimes reserves some memory regions for its own use, 77 * so the system memory size is less than the actual physical memory 78 * size. Work around this by rounding up the total size to 128M, 79 * which is enough for most test cases. 80 */ 81 total_mem = roundup(total_mem, SZ_128M); 82 83 /* for each entry of the comma-separated list */ 84 do { 85 unsigned long long start, end = ULLONG_MAX, size; 86 87 /* get the start of the range */ 88 start = memparse(cur, &tmp); 89 if (cur == tmp) { 90 pr_warn("crashkernel: Memory value expected\n"); 91 return -EINVAL; 92 } 93 cur = tmp; 94 if (*cur != '-') { 95 pr_warn("crashkernel: '-' expected\n"); 96 return -EINVAL; 97 } 98 cur++; 99 100 /* if no ':' is here, than we read the end */ 101 if (*cur != ':') { 102 end = memparse(cur, &tmp); 103 if (cur == tmp) { 104 pr_warn("crashkernel: Memory value expected\n"); 105 return -EINVAL; 106 } 107 cur = tmp; 108 if (end <= start) { 109 pr_warn("crashkernel: end <= start\n"); 110 return -EINVAL; 111 } 112 } 113 114 if (*cur != ':') { 115 pr_warn("crashkernel: ':' expected\n"); 116 return -EINVAL; 117 } 118 cur++; 119 120 size = memparse(cur, &tmp); 121 if (cur == tmp) { 122 pr_warn("Memory value expected\n"); 123 return -EINVAL; 124 } 125 cur = tmp; 126 if (size >= total_mem) { 127 pr_warn("crashkernel: invalid size\n"); 128 return -EINVAL; 129 } 130 131 /* match ? */ 132 if (total_mem >= start && total_mem < end) { 133 *crash_size = size; 134 break; 135 } 136 } while (*cur++ == ','); 137 138 if (*crash_size > 0) { 139 while (*cur && *cur != ' ' && *cur != '@') 140 cur++; 141 if (*cur == '@') { 142 cur++; 143 *crash_base = memparse(cur, &tmp); 144 if (cur == tmp) { 145 pr_warn("Memory value expected after '@'\n"); 146 return -EINVAL; 147 } 148 } 149 } else 150 pr_info("crashkernel size resulted in zero bytes\n"); 151 152 return 0; 153 } 154 155 /* 156 * That function parses "simple" (old) crashkernel command lines like 157 * 158 * crashkernel=size[@offset] 159 * 160 * It returns 0 on success and -EINVAL on failure. 161 */ 162 static int __init parse_crashkernel_simple(char *cmdline, 163 unsigned long long *crash_size, 164 unsigned long long *crash_base) 165 { 166 char *cur = cmdline; 167 168 *crash_size = memparse(cmdline, &cur); 169 if (cmdline == cur) { 170 pr_warn("crashkernel: memory value expected\n"); 171 return -EINVAL; 172 } 173 174 if (*cur == '@') 175 *crash_base = memparse(cur+1, &cur); 176 else if (*cur != ' ' && *cur != '\0') { 177 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 178 return -EINVAL; 179 } 180 181 return 0; 182 } 183 184 #define SUFFIX_HIGH 0 185 #define SUFFIX_LOW 1 186 #define SUFFIX_NULL 2 187 static __initdata char *suffix_tbl[] = { 188 [SUFFIX_HIGH] = ",high", 189 [SUFFIX_LOW] = ",low", 190 [SUFFIX_NULL] = NULL, 191 }; 192 193 /* 194 * That function parses "suffix" crashkernel command lines like 195 * 196 * crashkernel=size,[high|low] 197 * 198 * It returns 0 on success and -EINVAL on failure. 199 */ 200 static int __init parse_crashkernel_suffix(char *cmdline, 201 unsigned long long *crash_size, 202 const char *suffix) 203 { 204 char *cur = cmdline; 205 206 *crash_size = memparse(cmdline, &cur); 207 if (cmdline == cur) { 208 pr_warn("crashkernel: memory value expected\n"); 209 return -EINVAL; 210 } 211 212 /* check with suffix */ 213 if (strncmp(cur, suffix, strlen(suffix))) { 214 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 215 return -EINVAL; 216 } 217 cur += strlen(suffix); 218 if (*cur != ' ' && *cur != '\0') { 219 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 220 return -EINVAL; 221 } 222 223 return 0; 224 } 225 226 static __init char *get_last_crashkernel(char *cmdline, 227 const char *name, 228 const char *suffix) 229 { 230 char *p = cmdline, *ck_cmdline = NULL; 231 232 /* find crashkernel and use the last one if there are more */ 233 p = strstr(p, name); 234 while (p) { 235 char *end_p = strchr(p, ' '); 236 char *q; 237 238 if (!end_p) 239 end_p = p + strlen(p); 240 241 if (!suffix) { 242 int i; 243 244 /* skip the one with any known suffix */ 245 for (i = 0; suffix_tbl[i]; i++) { 246 q = end_p - strlen(suffix_tbl[i]); 247 if (!strncmp(q, suffix_tbl[i], 248 strlen(suffix_tbl[i]))) 249 goto next; 250 } 251 ck_cmdline = p; 252 } else { 253 q = end_p - strlen(suffix); 254 if (!strncmp(q, suffix, strlen(suffix))) 255 ck_cmdline = p; 256 } 257 next: 258 p = strstr(p+1, name); 259 } 260 261 return ck_cmdline; 262 } 263 264 static int __init __parse_crashkernel(char *cmdline, 265 unsigned long long system_ram, 266 unsigned long long *crash_size, 267 unsigned long long *crash_base, 268 const char *suffix) 269 { 270 char *first_colon, *first_space; 271 char *ck_cmdline; 272 char *name = "crashkernel="; 273 274 BUG_ON(!crash_size || !crash_base); 275 *crash_size = 0; 276 *crash_base = 0; 277 278 ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 279 if (!ck_cmdline) 280 return -ENOENT; 281 282 ck_cmdline += strlen(name); 283 284 if (suffix) 285 return parse_crashkernel_suffix(ck_cmdline, crash_size, 286 suffix); 287 /* 288 * if the commandline contains a ':', then that's the extended 289 * syntax -- if not, it must be the classic syntax 290 */ 291 first_colon = strchr(ck_cmdline, ':'); 292 first_space = strchr(ck_cmdline, ' '); 293 if (first_colon && (!first_space || first_colon < first_space)) 294 return parse_crashkernel_mem(ck_cmdline, system_ram, 295 crash_size, crash_base); 296 297 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 298 } 299 300 /* 301 * That function is the entry point for command line parsing and should be 302 * called from the arch-specific code. 303 * 304 * If crashkernel=,high|low is supported on architecture, non-NULL values 305 * should be passed to parameters 'low_size' and 'high'. 306 */ 307 int __init parse_crashkernel(char *cmdline, 308 unsigned long long system_ram, 309 unsigned long long *crash_size, 310 unsigned long long *crash_base, 311 unsigned long long *low_size, 312 bool *high) 313 { 314 int ret; 315 316 /* crashkernel=X[@offset] */ 317 ret = __parse_crashkernel(cmdline, system_ram, crash_size, 318 crash_base, NULL); 319 #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION 320 /* 321 * If non-NULL 'high' passed in and no normal crashkernel 322 * setting detected, try parsing crashkernel=,high|low. 323 */ 324 if (high && ret == -ENOENT) { 325 ret = __parse_crashkernel(cmdline, 0, crash_size, 326 crash_base, suffix_tbl[SUFFIX_HIGH]); 327 if (ret || !*crash_size) 328 return -EINVAL; 329 330 /* 331 * crashkernel=Y,low can be specified or not, but invalid value 332 * is not allowed. 333 */ 334 ret = __parse_crashkernel(cmdline, 0, low_size, 335 crash_base, suffix_tbl[SUFFIX_LOW]); 336 if (ret == -ENOENT) { 337 *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; 338 ret = 0; 339 } else if (ret) { 340 return ret; 341 } 342 343 *high = true; 344 } 345 #endif 346 if (!*crash_size) 347 ret = -EINVAL; 348 349 return ret; 350 } 351 352 /* 353 * Add a dummy early_param handler to mark crashkernel= as a known command line 354 * parameter and suppress incorrect warnings in init/main.c. 355 */ 356 static int __init parse_crashkernel_dummy(char *arg) 357 { 358 return 0; 359 } 360 early_param("crashkernel", parse_crashkernel_dummy); 361 362 #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION 363 static int __init reserve_crashkernel_low(unsigned long long low_size) 364 { 365 #ifdef CONFIG_64BIT 366 unsigned long long low_base; 367 368 low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); 369 if (!low_base) { 370 pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); 371 return -ENOMEM; 372 } 373 374 pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", 375 low_base, low_base + low_size, low_size >> 20); 376 377 crashk_low_res.start = low_base; 378 crashk_low_res.end = low_base + low_size - 1; 379 insert_resource(&iomem_resource, &crashk_low_res); 380 #endif 381 return 0; 382 } 383 384 void __init reserve_crashkernel_generic(char *cmdline, 385 unsigned long long crash_size, 386 unsigned long long crash_base, 387 unsigned long long crash_low_size, 388 bool high) 389 { 390 unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0; 391 bool fixed_base = false; 392 393 /* User specifies base address explicitly. */ 394 if (crash_base) { 395 fixed_base = true; 396 search_base = crash_base; 397 search_end = crash_base + crash_size; 398 } else if (high) { 399 search_base = CRASH_ADDR_LOW_MAX; 400 search_end = CRASH_ADDR_HIGH_MAX; 401 } 402 403 retry: 404 crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, 405 search_base, search_end); 406 if (!crash_base) { 407 /* 408 * For crashkernel=size[KMG]@offset[KMG], print out failure 409 * message if can't reserve the specified region. 410 */ 411 if (fixed_base) { 412 pr_warn("crashkernel reservation failed - memory is in use.\n"); 413 return; 414 } 415 416 /* 417 * For crashkernel=size[KMG], if the first attempt was for 418 * low memory, fall back to high memory, the minimum required 419 * low memory will be reserved later. 420 */ 421 if (!high && search_end == CRASH_ADDR_LOW_MAX) { 422 search_end = CRASH_ADDR_HIGH_MAX; 423 search_base = CRASH_ADDR_LOW_MAX; 424 crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; 425 goto retry; 426 } 427 428 /* 429 * For crashkernel=size[KMG],high, if the first attempt was 430 * for high memory, fall back to low memory. 431 */ 432 if (high && search_end == CRASH_ADDR_HIGH_MAX) { 433 search_end = CRASH_ADDR_LOW_MAX; 434 search_base = 0; 435 goto retry; 436 } 437 pr_warn("cannot allocate crashkernel (size:0x%llx)\n", 438 crash_size); 439 return; 440 } 441 442 if ((crash_base >= CRASH_ADDR_LOW_MAX) && 443 crash_low_size && reserve_crashkernel_low(crash_low_size)) { 444 memblock_phys_free(crash_base, crash_size); 445 return; 446 } 447 448 pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", 449 crash_base, crash_base + crash_size, crash_size >> 20); 450 451 /* 452 * The crashkernel memory will be removed from the kernel linear 453 * map. Inform kmemleak so that it won't try to access it. 454 */ 455 kmemleak_ignore_phys(crash_base); 456 if (crashk_low_res.end) 457 kmemleak_ignore_phys(crashk_low_res.start); 458 459 crashk_res.start = crash_base; 460 crashk_res.end = crash_base + crash_size - 1; 461 insert_resource(&iomem_resource, &crashk_res); 462 } 463 #endif 464 465 int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, 466 void **addr, unsigned long *sz) 467 { 468 Elf64_Ehdr *ehdr; 469 Elf64_Phdr *phdr; 470 unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; 471 unsigned char *buf; 472 unsigned int cpu, i; 473 unsigned long long notes_addr; 474 unsigned long mstart, mend; 475 476 /* extra phdr for vmcoreinfo ELF note */ 477 nr_phdr = nr_cpus + 1; 478 nr_phdr += mem->nr_ranges; 479 480 /* 481 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping 482 * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64). 483 * I think this is required by tools like gdb. So same physical 484 * memory will be mapped in two ELF headers. One will contain kernel 485 * text virtual addresses and other will have __va(physical) addresses. 486 */ 487 488 nr_phdr++; 489 elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); 490 elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); 491 492 buf = vzalloc(elf_sz); 493 if (!buf) 494 return -ENOMEM; 495 496 ehdr = (Elf64_Ehdr *)buf; 497 phdr = (Elf64_Phdr *)(ehdr + 1); 498 memcpy(ehdr->e_ident, ELFMAG, SELFMAG); 499 ehdr->e_ident[EI_CLASS] = ELFCLASS64; 500 ehdr->e_ident[EI_DATA] = ELFDATA2LSB; 501 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 502 ehdr->e_ident[EI_OSABI] = ELF_OSABI; 503 memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); 504 ehdr->e_type = ET_CORE; 505 ehdr->e_machine = ELF_ARCH; 506 ehdr->e_version = EV_CURRENT; 507 ehdr->e_phoff = sizeof(Elf64_Ehdr); 508 ehdr->e_ehsize = sizeof(Elf64_Ehdr); 509 ehdr->e_phentsize = sizeof(Elf64_Phdr); 510 511 /* Prepare one phdr of type PT_NOTE for each possible CPU */ 512 for_each_possible_cpu(cpu) { 513 phdr->p_type = PT_NOTE; 514 notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); 515 phdr->p_offset = phdr->p_paddr = notes_addr; 516 phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); 517 (ehdr->e_phnum)++; 518 phdr++; 519 } 520 521 /* Prepare one PT_NOTE header for vmcoreinfo */ 522 phdr->p_type = PT_NOTE; 523 phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); 524 phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE; 525 (ehdr->e_phnum)++; 526 phdr++; 527 528 /* Prepare PT_LOAD type program header for kernel text region */ 529 if (need_kernel_map) { 530 phdr->p_type = PT_LOAD; 531 phdr->p_flags = PF_R|PF_W|PF_X; 532 phdr->p_vaddr = (unsigned long) _text; 533 phdr->p_filesz = phdr->p_memsz = _end - _text; 534 phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); 535 ehdr->e_phnum++; 536 phdr++; 537 } 538 539 /* Go through all the ranges in mem->ranges[] and prepare phdr */ 540 for (i = 0; i < mem->nr_ranges; i++) { 541 mstart = mem->ranges[i].start; 542 mend = mem->ranges[i].end; 543 544 phdr->p_type = PT_LOAD; 545 phdr->p_flags = PF_R|PF_W|PF_X; 546 phdr->p_offset = mstart; 547 548 phdr->p_paddr = mstart; 549 phdr->p_vaddr = (unsigned long) __va(mstart); 550 phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; 551 phdr->p_align = 0; 552 ehdr->e_phnum++; 553 #ifdef CONFIG_KEXEC_FILE 554 kexec_dprintk("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", 555 phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, 556 ehdr->e_phnum, phdr->p_offset); 557 #endif 558 phdr++; 559 } 560 561 *addr = buf; 562 *sz = elf_sz; 563 return 0; 564 } 565 566 int crash_exclude_mem_range(struct crash_mem *mem, 567 unsigned long long mstart, unsigned long long mend) 568 { 569 int i; 570 unsigned long long start, end, p_start, p_end; 571 572 for (i = 0; i < mem->nr_ranges; i++) { 573 start = mem->ranges[i].start; 574 end = mem->ranges[i].end; 575 p_start = mstart; 576 p_end = mend; 577 578 if (p_start > end) 579 continue; 580 581 /* 582 * Because the memory ranges in mem->ranges are stored in 583 * ascending order, when we detect `p_end < start`, we can 584 * immediately exit the for loop, as the subsequent memory 585 * ranges will definitely be outside the range we are looking 586 * for. 587 */ 588 if (p_end < start) 589 break; 590 591 /* Truncate any area outside of range */ 592 if (p_start < start) 593 p_start = start; 594 if (p_end > end) 595 p_end = end; 596 597 /* Found completely overlapping range */ 598 if (p_start == start && p_end == end) { 599 memmove(&mem->ranges[i], &mem->ranges[i + 1], 600 (mem->nr_ranges - (i + 1)) * sizeof(mem->ranges[i])); 601 i--; 602 mem->nr_ranges--; 603 } else if (p_start > start && p_end < end) { 604 /* Split original range */ 605 if (mem->nr_ranges >= mem->max_nr_ranges) 606 return -ENOMEM; 607 608 memmove(&mem->ranges[i + 2], &mem->ranges[i + 1], 609 (mem->nr_ranges - (i + 1)) * sizeof(mem->ranges[i])); 610 611 mem->ranges[i].end = p_start - 1; 612 mem->ranges[i + 1].start = p_end + 1; 613 mem->ranges[i + 1].end = end; 614 615 i++; 616 mem->nr_ranges++; 617 } else if (p_start != start) 618 mem->ranges[i].end = p_start - 1; 619 else 620 mem->ranges[i].start = p_end + 1; 621 } 622 623 return 0; 624 } 625 626 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, 627 void *data, size_t data_len) 628 { 629 struct elf_note *note = (struct elf_note *)buf; 630 631 note->n_namesz = strlen(name) + 1; 632 note->n_descsz = data_len; 633 note->n_type = type; 634 buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word)); 635 memcpy(buf, name, note->n_namesz); 636 buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word)); 637 memcpy(buf, data, data_len); 638 buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word)); 639 640 return buf; 641 } 642 643 void final_note(Elf_Word *buf) 644 { 645 memset(buf, 0, sizeof(struct elf_note)); 646 } 647 648 static void update_vmcoreinfo_note(void) 649 { 650 u32 *buf = vmcoreinfo_note; 651 652 if (!vmcoreinfo_size) 653 return; 654 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 655 vmcoreinfo_size); 656 final_note(buf); 657 } 658 659 void crash_update_vmcoreinfo_safecopy(void *ptr) 660 { 661 if (ptr) 662 memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size); 663 664 vmcoreinfo_data_safecopy = ptr; 665 } 666 667 void crash_save_vmcoreinfo(void) 668 { 669 if (!vmcoreinfo_note) 670 return; 671 672 /* Use the safe copy to generate vmcoreinfo note if have */ 673 if (vmcoreinfo_data_safecopy) 674 vmcoreinfo_data = vmcoreinfo_data_safecopy; 675 676 vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds()); 677 update_vmcoreinfo_note(); 678 } 679 680 void vmcoreinfo_append_str(const char *fmt, ...) 681 { 682 va_list args; 683 char buf[0x50]; 684 size_t r; 685 686 va_start(args, fmt); 687 r = vscnprintf(buf, sizeof(buf), fmt, args); 688 va_end(args); 689 690 r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size); 691 692 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 693 694 vmcoreinfo_size += r; 695 696 WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES, 697 "vmcoreinfo data exceeds allocated size, truncating"); 698 } 699 700 /* 701 * provide an empty default implementation here -- architecture 702 * code may override this 703 */ 704 void __weak arch_crash_save_vmcoreinfo(void) 705 {} 706 707 phys_addr_t __weak paddr_vmcoreinfo_note(void) 708 { 709 return __pa(vmcoreinfo_note); 710 } 711 EXPORT_SYMBOL(paddr_vmcoreinfo_note); 712 713 static int __init crash_save_vmcoreinfo_init(void) 714 { 715 vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); 716 if (!vmcoreinfo_data) { 717 pr_warn("Memory allocation for vmcoreinfo_data failed\n"); 718 return -ENOMEM; 719 } 720 721 vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE, 722 GFP_KERNEL | __GFP_ZERO); 723 if (!vmcoreinfo_note) { 724 free_page((unsigned long)vmcoreinfo_data); 725 vmcoreinfo_data = NULL; 726 pr_warn("Memory allocation for vmcoreinfo_note failed\n"); 727 return -ENOMEM; 728 } 729 730 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 731 VMCOREINFO_BUILD_ID(); 732 VMCOREINFO_PAGESIZE(PAGE_SIZE); 733 734 VMCOREINFO_SYMBOL(init_uts_ns); 735 VMCOREINFO_OFFSET(uts_namespace, name); 736 VMCOREINFO_SYMBOL(node_online_map); 737 #ifdef CONFIG_MMU 738 VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); 739 #endif 740 VMCOREINFO_SYMBOL(_stext); 741 VMCOREINFO_SYMBOL(vmap_area_list); 742 743 #ifndef CONFIG_NUMA 744 VMCOREINFO_SYMBOL(mem_map); 745 VMCOREINFO_SYMBOL(contig_page_data); 746 #endif 747 #ifdef CONFIG_SPARSEMEM 748 VMCOREINFO_SYMBOL_ARRAY(mem_section); 749 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 750 VMCOREINFO_STRUCT_SIZE(mem_section); 751 VMCOREINFO_OFFSET(mem_section, section_mem_map); 752 VMCOREINFO_NUMBER(SECTION_SIZE_BITS); 753 VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); 754 #endif 755 VMCOREINFO_STRUCT_SIZE(page); 756 VMCOREINFO_STRUCT_SIZE(pglist_data); 757 VMCOREINFO_STRUCT_SIZE(zone); 758 VMCOREINFO_STRUCT_SIZE(free_area); 759 VMCOREINFO_STRUCT_SIZE(list_head); 760 VMCOREINFO_SIZE(nodemask_t); 761 VMCOREINFO_OFFSET(page, flags); 762 VMCOREINFO_OFFSET(page, _refcount); 763 VMCOREINFO_OFFSET(page, mapping); 764 VMCOREINFO_OFFSET(page, lru); 765 VMCOREINFO_OFFSET(page, _mapcount); 766 VMCOREINFO_OFFSET(page, private); 767 VMCOREINFO_OFFSET(page, compound_head); 768 VMCOREINFO_OFFSET(pglist_data, node_zones); 769 VMCOREINFO_OFFSET(pglist_data, nr_zones); 770 #ifdef CONFIG_FLATMEM 771 VMCOREINFO_OFFSET(pglist_data, node_mem_map); 772 #endif 773 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); 774 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); 775 VMCOREINFO_OFFSET(pglist_data, node_id); 776 VMCOREINFO_OFFSET(zone, free_area); 777 VMCOREINFO_OFFSET(zone, vm_stat); 778 VMCOREINFO_OFFSET(zone, spanned_pages); 779 VMCOREINFO_OFFSET(free_area, free_list); 780 VMCOREINFO_OFFSET(list_head, next); 781 VMCOREINFO_OFFSET(list_head, prev); 782 VMCOREINFO_OFFSET(vmap_area, va_start); 783 VMCOREINFO_OFFSET(vmap_area, list); 784 VMCOREINFO_LENGTH(zone.free_area, NR_PAGE_ORDERS); 785 log_buf_vmcoreinfo_setup(); 786 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 787 VMCOREINFO_NUMBER(NR_FREE_PAGES); 788 VMCOREINFO_NUMBER(PG_lru); 789 VMCOREINFO_NUMBER(PG_private); 790 VMCOREINFO_NUMBER(PG_swapcache); 791 VMCOREINFO_NUMBER(PG_swapbacked); 792 VMCOREINFO_NUMBER(PG_slab); 793 #ifdef CONFIG_MEMORY_FAILURE 794 VMCOREINFO_NUMBER(PG_hwpoison); 795 #endif 796 VMCOREINFO_NUMBER(PG_head_mask); 797 #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) 798 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 799 #ifdef CONFIG_HUGETLB_PAGE 800 VMCOREINFO_NUMBER(PG_hugetlb); 801 #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) 802 VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); 803 #endif 804 805 #ifdef CONFIG_KALLSYMS 806 VMCOREINFO_SYMBOL(kallsyms_names); 807 VMCOREINFO_SYMBOL(kallsyms_num_syms); 808 VMCOREINFO_SYMBOL(kallsyms_token_table); 809 VMCOREINFO_SYMBOL(kallsyms_token_index); 810 #ifdef CONFIG_KALLSYMS_BASE_RELATIVE 811 VMCOREINFO_SYMBOL(kallsyms_offsets); 812 VMCOREINFO_SYMBOL(kallsyms_relative_base); 813 #else 814 VMCOREINFO_SYMBOL(kallsyms_addresses); 815 #endif /* CONFIG_KALLSYMS_BASE_RELATIVE */ 816 #endif /* CONFIG_KALLSYMS */ 817 818 arch_crash_save_vmcoreinfo(); 819 update_vmcoreinfo_note(); 820 821 return 0; 822 } 823 824 subsys_initcall(crash_save_vmcoreinfo_init); 825 826 static int __init crash_notes_memory_init(void) 827 { 828 /* Allocate memory for saving cpu registers. */ 829 size_t size, align; 830 831 /* 832 * crash_notes could be allocated across 2 vmalloc pages when percpu 833 * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc 834 * pages are also on 2 continuous physical pages. In this case the 835 * 2nd part of crash_notes in 2nd page could be lost since only the 836 * starting address and size of crash_notes are exported through sysfs. 837 * Here round up the size of crash_notes to the nearest power of two 838 * and pass it to __alloc_percpu as align value. This can make sure 839 * crash_notes is allocated inside one physical page. 840 */ 841 size = sizeof(note_buf_t); 842 align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); 843 844 /* 845 * Break compile if size is bigger than PAGE_SIZE since crash_notes 846 * definitely will be in 2 pages with that. 847 */ 848 BUILD_BUG_ON(size > PAGE_SIZE); 849 850 crash_notes = __alloc_percpu(size, align); 851 if (!crash_notes) { 852 pr_warn("Memory allocation for saving cpu register states failed\n"); 853 return -ENOMEM; 854 } 855 return 0; 856 } 857 subsys_initcall(crash_notes_memory_init); 858 859 #ifdef CONFIG_CRASH_HOTPLUG 860 #undef pr_fmt 861 #define pr_fmt(fmt) "crash hp: " fmt 862 863 /* 864 * Different than kexec/kdump loading/unloading/jumping/shrinking which 865 * usually rarely happen, there will be many crash hotplug events notified 866 * during one short period, e.g one memory board is hot added and memory 867 * regions are online. So mutex lock __crash_hotplug_lock is used to 868 * serialize the crash hotplug handling specifically. 869 */ 870 DEFINE_MUTEX(__crash_hotplug_lock); 871 #define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock) 872 #define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock) 873 874 /* 875 * This routine utilized when the crash_hotplug sysfs node is read. 876 * It reflects the kernel's ability/permission to update the crash 877 * elfcorehdr directly. 878 */ 879 int crash_check_update_elfcorehdr(void) 880 { 881 int rc = 0; 882 883 crash_hotplug_lock(); 884 /* Obtain lock while reading crash information */ 885 if (!kexec_trylock()) { 886 pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); 887 crash_hotplug_unlock(); 888 return 0; 889 } 890 if (kexec_crash_image) { 891 if (kexec_crash_image->file_mode) 892 rc = 1; 893 else 894 rc = kexec_crash_image->update_elfcorehdr; 895 } 896 /* Release lock now that update complete */ 897 kexec_unlock(); 898 crash_hotplug_unlock(); 899 900 return rc; 901 } 902 903 /* 904 * To accurately reflect hot un/plug changes of cpu and memory resources 905 * (including onling and offlining of those resources), the elfcorehdr 906 * (which is passed to the crash kernel via the elfcorehdr= parameter) 907 * must be updated with the new list of CPUs and memories. 908 * 909 * In order to make changes to elfcorehdr, two conditions are needed: 910 * First, the segment containing the elfcorehdr must be large enough 911 * to permit a growing number of resources; the elfcorehdr memory size 912 * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES. 913 * Second, purgatory must explicitly exclude the elfcorehdr from the 914 * list of segments it checks (since the elfcorehdr changes and thus 915 * would require an update to purgatory itself to update the digest). 916 */ 917 static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu) 918 { 919 struct kimage *image; 920 921 crash_hotplug_lock(); 922 /* Obtain lock while changing crash information */ 923 if (!kexec_trylock()) { 924 pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); 925 crash_hotplug_unlock(); 926 return; 927 } 928 929 /* Check kdump is not loaded */ 930 if (!kexec_crash_image) 931 goto out; 932 933 image = kexec_crash_image; 934 935 /* Check that updating elfcorehdr is permitted */ 936 if (!(image->file_mode || image->update_elfcorehdr)) 937 goto out; 938 939 if (hp_action == KEXEC_CRASH_HP_ADD_CPU || 940 hp_action == KEXEC_CRASH_HP_REMOVE_CPU) 941 pr_debug("hp_action %u, cpu %u\n", hp_action, cpu); 942 else 943 pr_debug("hp_action %u\n", hp_action); 944 945 /* 946 * The elfcorehdr_index is set to -1 when the struct kimage 947 * is allocated. Find the segment containing the elfcorehdr, 948 * if not already found. 949 */ 950 if (image->elfcorehdr_index < 0) { 951 unsigned long mem; 952 unsigned char *ptr; 953 unsigned int n; 954 955 for (n = 0; n < image->nr_segments; n++) { 956 mem = image->segment[n].mem; 957 ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT)); 958 if (ptr) { 959 /* The segment containing elfcorehdr */ 960 if (memcmp(ptr, ELFMAG, SELFMAG) == 0) 961 image->elfcorehdr_index = (int)n; 962 kunmap_local(ptr); 963 } 964 } 965 } 966 967 if (image->elfcorehdr_index < 0) { 968 pr_err("unable to locate elfcorehdr segment"); 969 goto out; 970 } 971 972 /* Needed in order for the segments to be updated */ 973 arch_kexec_unprotect_crashkres(); 974 975 /* Differentiate between normal load and hotplug update */ 976 image->hp_action = hp_action; 977 978 /* Now invoke arch-specific update handler */ 979 arch_crash_handle_hotplug_event(image); 980 981 /* No longer handling a hotplug event */ 982 image->hp_action = KEXEC_CRASH_HP_NONE; 983 image->elfcorehdr_updated = true; 984 985 /* Change back to read-only */ 986 arch_kexec_protect_crashkres(); 987 988 /* Errors in the callback is not a reason to rollback state */ 989 out: 990 /* Release lock now that update complete */ 991 kexec_unlock(); 992 crash_hotplug_unlock(); 993 } 994 995 static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v) 996 { 997 switch (val) { 998 case MEM_ONLINE: 999 crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, 1000 KEXEC_CRASH_HP_INVALID_CPU); 1001 break; 1002 1003 case MEM_OFFLINE: 1004 crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, 1005 KEXEC_CRASH_HP_INVALID_CPU); 1006 break; 1007 } 1008 return NOTIFY_OK; 1009 } 1010 1011 static struct notifier_block crash_memhp_nb = { 1012 .notifier_call = crash_memhp_notifier, 1013 .priority = 0 1014 }; 1015 1016 static int crash_cpuhp_online(unsigned int cpu) 1017 { 1018 crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu); 1019 return 0; 1020 } 1021 1022 static int crash_cpuhp_offline(unsigned int cpu) 1023 { 1024 crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu); 1025 return 0; 1026 } 1027 1028 static int __init crash_hotplug_init(void) 1029 { 1030 int result = 0; 1031 1032 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) 1033 register_memory_notifier(&crash_memhp_nb); 1034 1035 if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { 1036 result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN, 1037 "crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline); 1038 } 1039 1040 return result; 1041 } 1042 1043 subsys_initcall(crash_hotplug_init); 1044 #endif 1045