1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * crash.c - kernel crash support code. 4 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 5 */ 6 7 #include <linux/buildid.h> 8 #include <linux/init.h> 9 #include <linux/utsname.h> 10 #include <linux/vmalloc.h> 11 #include <linux/sizes.h> 12 #include <linux/kexec.h> 13 #include <linux/memory.h> 14 #include <linux/cpuhotplug.h> 15 #include <linux/memblock.h> 16 #include <linux/kmemleak.h> 17 18 #include <asm/page.h> 19 #include <asm/sections.h> 20 21 #include <crypto/sha1.h> 22 23 #include "kallsyms_internal.h" 24 #include "kexec_internal.h" 25 26 /* Per cpu memory for storing cpu states in case of system crash. */ 27 note_buf_t __percpu *crash_notes; 28 29 /* vmcoreinfo stuff */ 30 unsigned char *vmcoreinfo_data; 31 size_t vmcoreinfo_size; 32 u32 *vmcoreinfo_note; 33 34 /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ 35 static unsigned char *vmcoreinfo_data_safecopy; 36 37 /* Location of the reserved area for the crash kernel */ 38 struct resource crashk_res = { 39 .name = "Crash kernel", 40 .start = 0, 41 .end = 0, 42 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 43 .desc = IORES_DESC_CRASH_KERNEL 44 }; 45 struct resource crashk_low_res = { 46 .name = "Crash kernel", 47 .start = 0, 48 .end = 0, 49 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 50 .desc = IORES_DESC_CRASH_KERNEL 51 }; 52 53 /* 54 * parsing the "crashkernel" commandline 55 * 56 * this code is intended to be called from architecture specific code 57 */ 58 59 60 /* 61 * This function parses command lines in the format 62 * 63 * crashkernel=ramsize-range:size[,...][@offset] 64 * 65 * The function returns 0 on success and -EINVAL on failure. 66 */ 67 static int __init parse_crashkernel_mem(char *cmdline, 68 unsigned long long system_ram, 69 unsigned long long *crash_size, 70 unsigned long long *crash_base) 71 { 72 char *cur = cmdline, *tmp; 73 unsigned long long total_mem = system_ram; 74 75 /* 76 * Firmware sometimes reserves some memory regions for its own use, 77 * so the system memory size is less than the actual physical memory 78 * size. Work around this by rounding up the total size to 128M, 79 * which is enough for most test cases. 80 */ 81 total_mem = roundup(total_mem, SZ_128M); 82 83 /* for each entry of the comma-separated list */ 84 do { 85 unsigned long long start, end = ULLONG_MAX, size; 86 87 /* get the start of the range */ 88 start = memparse(cur, &tmp); 89 if (cur == tmp) { 90 pr_warn("crashkernel: Memory value expected\n"); 91 return -EINVAL; 92 } 93 cur = tmp; 94 if (*cur != '-') { 95 pr_warn("crashkernel: '-' expected\n"); 96 return -EINVAL; 97 } 98 cur++; 99 100 /* if no ':' is here, than we read the end */ 101 if (*cur != ':') { 102 end = memparse(cur, &tmp); 103 if (cur == tmp) { 104 pr_warn("crashkernel: Memory value expected\n"); 105 return -EINVAL; 106 } 107 cur = tmp; 108 if (end <= start) { 109 pr_warn("crashkernel: end <= start\n"); 110 return -EINVAL; 111 } 112 } 113 114 if (*cur != ':') { 115 pr_warn("crashkernel: ':' expected\n"); 116 return -EINVAL; 117 } 118 cur++; 119 120 size = memparse(cur, &tmp); 121 if (cur == tmp) { 122 pr_warn("Memory value expected\n"); 123 return -EINVAL; 124 } 125 cur = tmp; 126 if (size >= total_mem) { 127 pr_warn("crashkernel: invalid size\n"); 128 return -EINVAL; 129 } 130 131 /* match ? */ 132 if (total_mem >= start && total_mem < end) { 133 *crash_size = size; 134 break; 135 } 136 } while (*cur++ == ','); 137 138 if (*crash_size > 0) { 139 while (*cur && *cur != ' ' && *cur != '@') 140 cur++; 141 if (*cur == '@') { 142 cur++; 143 *crash_base = memparse(cur, &tmp); 144 if (cur == tmp) { 145 pr_warn("Memory value expected after '@'\n"); 146 return -EINVAL; 147 } 148 } 149 } else 150 pr_info("crashkernel size resulted in zero bytes\n"); 151 152 return 0; 153 } 154 155 /* 156 * That function parses "simple" (old) crashkernel command lines like 157 * 158 * crashkernel=size[@offset] 159 * 160 * It returns 0 on success and -EINVAL on failure. 161 */ 162 static int __init parse_crashkernel_simple(char *cmdline, 163 unsigned long long *crash_size, 164 unsigned long long *crash_base) 165 { 166 char *cur = cmdline; 167 168 *crash_size = memparse(cmdline, &cur); 169 if (cmdline == cur) { 170 pr_warn("crashkernel: memory value expected\n"); 171 return -EINVAL; 172 } 173 174 if (*cur == '@') 175 *crash_base = memparse(cur+1, &cur); 176 else if (*cur != ' ' && *cur != '\0') { 177 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 178 return -EINVAL; 179 } 180 181 return 0; 182 } 183 184 #define SUFFIX_HIGH 0 185 #define SUFFIX_LOW 1 186 #define SUFFIX_NULL 2 187 static __initdata char *suffix_tbl[] = { 188 [SUFFIX_HIGH] = ",high", 189 [SUFFIX_LOW] = ",low", 190 [SUFFIX_NULL] = NULL, 191 }; 192 193 /* 194 * That function parses "suffix" crashkernel command lines like 195 * 196 * crashkernel=size,[high|low] 197 * 198 * It returns 0 on success and -EINVAL on failure. 199 */ 200 static int __init parse_crashkernel_suffix(char *cmdline, 201 unsigned long long *crash_size, 202 const char *suffix) 203 { 204 char *cur = cmdline; 205 206 *crash_size = memparse(cmdline, &cur); 207 if (cmdline == cur) { 208 pr_warn("crashkernel: memory value expected\n"); 209 return -EINVAL; 210 } 211 212 /* check with suffix */ 213 if (strncmp(cur, suffix, strlen(suffix))) { 214 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 215 return -EINVAL; 216 } 217 cur += strlen(suffix); 218 if (*cur != ' ' && *cur != '\0') { 219 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 220 return -EINVAL; 221 } 222 223 return 0; 224 } 225 226 static __init char *get_last_crashkernel(char *cmdline, 227 const char *name, 228 const char *suffix) 229 { 230 char *p = cmdline, *ck_cmdline = NULL; 231 232 /* find crashkernel and use the last one if there are more */ 233 p = strstr(p, name); 234 while (p) { 235 char *end_p = strchr(p, ' '); 236 char *q; 237 238 if (!end_p) 239 end_p = p + strlen(p); 240 241 if (!suffix) { 242 int i; 243 244 /* skip the one with any known suffix */ 245 for (i = 0; suffix_tbl[i]; i++) { 246 q = end_p - strlen(suffix_tbl[i]); 247 if (!strncmp(q, suffix_tbl[i], 248 strlen(suffix_tbl[i]))) 249 goto next; 250 } 251 ck_cmdline = p; 252 } else { 253 q = end_p - strlen(suffix); 254 if (!strncmp(q, suffix, strlen(suffix))) 255 ck_cmdline = p; 256 } 257 next: 258 p = strstr(p+1, name); 259 } 260 261 return ck_cmdline; 262 } 263 264 static int __init __parse_crashkernel(char *cmdline, 265 unsigned long long system_ram, 266 unsigned long long *crash_size, 267 unsigned long long *crash_base, 268 const char *suffix) 269 { 270 char *first_colon, *first_space; 271 char *ck_cmdline; 272 char *name = "crashkernel="; 273 274 BUG_ON(!crash_size || !crash_base); 275 *crash_size = 0; 276 *crash_base = 0; 277 278 ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 279 if (!ck_cmdline) 280 return -ENOENT; 281 282 ck_cmdline += strlen(name); 283 284 if (suffix) 285 return parse_crashkernel_suffix(ck_cmdline, crash_size, 286 suffix); 287 /* 288 * if the commandline contains a ':', then that's the extended 289 * syntax -- if not, it must be the classic syntax 290 */ 291 first_colon = strchr(ck_cmdline, ':'); 292 first_space = strchr(ck_cmdline, ' '); 293 if (first_colon && (!first_space || first_colon < first_space)) 294 return parse_crashkernel_mem(ck_cmdline, system_ram, 295 crash_size, crash_base); 296 297 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 298 } 299 300 /* 301 * That function is the entry point for command line parsing and should be 302 * called from the arch-specific code. 303 * 304 * If crashkernel=,high|low is supported on architecture, non-NULL values 305 * should be passed to parameters 'low_size' and 'high'. 306 */ 307 int __init parse_crashkernel(char *cmdline, 308 unsigned long long system_ram, 309 unsigned long long *crash_size, 310 unsigned long long *crash_base, 311 unsigned long long *low_size, 312 bool *high) 313 { 314 int ret; 315 316 /* crashkernel=X[@offset] */ 317 ret = __parse_crashkernel(cmdline, system_ram, crash_size, 318 crash_base, NULL); 319 #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION 320 /* 321 * If non-NULL 'high' passed in and no normal crashkernel 322 * setting detected, try parsing crashkernel=,high|low. 323 */ 324 if (high && ret == -ENOENT) { 325 ret = __parse_crashkernel(cmdline, 0, crash_size, 326 crash_base, suffix_tbl[SUFFIX_HIGH]); 327 if (ret || !*crash_size) 328 return -EINVAL; 329 330 /* 331 * crashkernel=Y,low can be specified or not, but invalid value 332 * is not allowed. 333 */ 334 ret = __parse_crashkernel(cmdline, 0, low_size, 335 crash_base, suffix_tbl[SUFFIX_LOW]); 336 if (ret == -ENOENT) { 337 *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; 338 ret = 0; 339 } else if (ret) { 340 return ret; 341 } 342 343 *high = true; 344 } 345 #endif 346 if (!*crash_size) 347 ret = -EINVAL; 348 349 return ret; 350 } 351 352 /* 353 * Add a dummy early_param handler to mark crashkernel= as a known command line 354 * parameter and suppress incorrect warnings in init/main.c. 355 */ 356 static int __init parse_crashkernel_dummy(char *arg) 357 { 358 return 0; 359 } 360 early_param("crashkernel", parse_crashkernel_dummy); 361 362 #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION 363 static int __init reserve_crashkernel_low(unsigned long long low_size) 364 { 365 #ifdef CONFIG_64BIT 366 unsigned long long low_base; 367 368 low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); 369 if (!low_base) { 370 pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n", low_size); 371 return -ENOMEM; 372 } 373 374 pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n", 375 low_base, low_base + low_size, low_size >> 20); 376 377 crashk_low_res.start = low_base; 378 crashk_low_res.end = low_base + low_size - 1; 379 #endif 380 return 0; 381 } 382 383 void __init reserve_crashkernel_generic(char *cmdline, 384 unsigned long long crash_size, 385 unsigned long long crash_base, 386 unsigned long long crash_low_size, 387 bool high) 388 { 389 unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0; 390 bool fixed_base = false; 391 392 /* User specifies base address explicitly. */ 393 if (crash_base) { 394 fixed_base = true; 395 search_base = crash_base; 396 search_end = crash_base + crash_size; 397 } else if (high) { 398 search_base = CRASH_ADDR_LOW_MAX; 399 search_end = CRASH_ADDR_HIGH_MAX; 400 } 401 402 retry: 403 crash_base = memblock_phys_alloc_range(crash_size, CRASH_ALIGN, 404 search_base, search_end); 405 if (!crash_base) { 406 /* 407 * For crashkernel=size[KMG]@offset[KMG], print out failure 408 * message if can't reserve the specified region. 409 */ 410 if (fixed_base) { 411 pr_warn("crashkernel reservation failed - memory is in use.\n"); 412 return; 413 } 414 415 /* 416 * For crashkernel=size[KMG], if the first attempt was for 417 * low memory, fall back to high memory, the minimum required 418 * low memory will be reserved later. 419 */ 420 if (!high && search_end == CRASH_ADDR_LOW_MAX) { 421 search_end = CRASH_ADDR_HIGH_MAX; 422 search_base = CRASH_ADDR_LOW_MAX; 423 crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; 424 goto retry; 425 } 426 427 /* 428 * For crashkernel=size[KMG],high, if the first attempt was 429 * for high memory, fall back to low memory. 430 */ 431 if (high && search_end == CRASH_ADDR_HIGH_MAX) { 432 search_end = CRASH_ADDR_LOW_MAX; 433 search_base = 0; 434 goto retry; 435 } 436 pr_warn("cannot allocate crashkernel (size:0x%llx)\n", 437 crash_size); 438 return; 439 } 440 441 if ((crash_base >= CRASH_ADDR_LOW_MAX) && 442 crash_low_size && reserve_crashkernel_low(crash_low_size)) { 443 memblock_phys_free(crash_base, crash_size); 444 return; 445 } 446 447 pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n", 448 crash_base, crash_base + crash_size, crash_size >> 20); 449 450 /* 451 * The crashkernel memory will be removed from the kernel linear 452 * map. Inform kmemleak so that it won't try to access it. 453 */ 454 kmemleak_ignore_phys(crash_base); 455 if (crashk_low_res.end) 456 kmemleak_ignore_phys(crashk_low_res.start); 457 458 crashk_res.start = crash_base; 459 crashk_res.end = crash_base + crash_size - 1; 460 } 461 462 static __init int insert_crashkernel_resources(void) 463 { 464 if (crashk_res.start < crashk_res.end) 465 insert_resource(&iomem_resource, &crashk_res); 466 467 if (crashk_low_res.start < crashk_low_res.end) 468 insert_resource(&iomem_resource, &crashk_low_res); 469 470 return 0; 471 } 472 early_initcall(insert_crashkernel_resources); 473 #endif 474 475 int crash_prepare_elf64_headers(struct crash_mem *mem, int need_kernel_map, 476 void **addr, unsigned long *sz) 477 { 478 Elf64_Ehdr *ehdr; 479 Elf64_Phdr *phdr; 480 unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; 481 unsigned char *buf; 482 unsigned int cpu, i; 483 unsigned long long notes_addr; 484 unsigned long mstart, mend; 485 486 /* extra phdr for vmcoreinfo ELF note */ 487 nr_phdr = nr_cpus + 1; 488 nr_phdr += mem->nr_ranges; 489 490 /* 491 * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping 492 * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64). 493 * I think this is required by tools like gdb. So same physical 494 * memory will be mapped in two ELF headers. One will contain kernel 495 * text virtual addresses and other will have __va(physical) addresses. 496 */ 497 498 nr_phdr++; 499 elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); 500 elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); 501 502 buf = vzalloc(elf_sz); 503 if (!buf) 504 return -ENOMEM; 505 506 ehdr = (Elf64_Ehdr *)buf; 507 phdr = (Elf64_Phdr *)(ehdr + 1); 508 memcpy(ehdr->e_ident, ELFMAG, SELFMAG); 509 ehdr->e_ident[EI_CLASS] = ELFCLASS64; 510 ehdr->e_ident[EI_DATA] = ELFDATA2LSB; 511 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 512 ehdr->e_ident[EI_OSABI] = ELF_OSABI; 513 memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); 514 ehdr->e_type = ET_CORE; 515 ehdr->e_machine = ELF_ARCH; 516 ehdr->e_version = EV_CURRENT; 517 ehdr->e_phoff = sizeof(Elf64_Ehdr); 518 ehdr->e_ehsize = sizeof(Elf64_Ehdr); 519 ehdr->e_phentsize = sizeof(Elf64_Phdr); 520 521 /* Prepare one phdr of type PT_NOTE for each possible CPU */ 522 for_each_possible_cpu(cpu) { 523 phdr->p_type = PT_NOTE; 524 notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); 525 phdr->p_offset = phdr->p_paddr = notes_addr; 526 phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); 527 (ehdr->e_phnum)++; 528 phdr++; 529 } 530 531 /* Prepare one PT_NOTE header for vmcoreinfo */ 532 phdr->p_type = PT_NOTE; 533 phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); 534 phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE; 535 (ehdr->e_phnum)++; 536 phdr++; 537 538 /* Prepare PT_LOAD type program header for kernel text region */ 539 if (need_kernel_map) { 540 phdr->p_type = PT_LOAD; 541 phdr->p_flags = PF_R|PF_W|PF_X; 542 phdr->p_vaddr = (unsigned long) _text; 543 phdr->p_filesz = phdr->p_memsz = _end - _text; 544 phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); 545 ehdr->e_phnum++; 546 phdr++; 547 } 548 549 /* Go through all the ranges in mem->ranges[] and prepare phdr */ 550 for (i = 0; i < mem->nr_ranges; i++) { 551 mstart = mem->ranges[i].start; 552 mend = mem->ranges[i].end; 553 554 phdr->p_type = PT_LOAD; 555 phdr->p_flags = PF_R|PF_W|PF_X; 556 phdr->p_offset = mstart; 557 558 phdr->p_paddr = mstart; 559 phdr->p_vaddr = (unsigned long) __va(mstart); 560 phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; 561 phdr->p_align = 0; 562 ehdr->e_phnum++; 563 #ifdef CONFIG_KEXEC_FILE 564 kexec_dprintk("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n", 565 phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, 566 ehdr->e_phnum, phdr->p_offset); 567 #endif 568 phdr++; 569 } 570 571 *addr = buf; 572 *sz = elf_sz; 573 return 0; 574 } 575 576 int crash_exclude_mem_range(struct crash_mem *mem, 577 unsigned long long mstart, unsigned long long mend) 578 { 579 int i; 580 unsigned long long start, end, p_start, p_end; 581 582 for (i = 0; i < mem->nr_ranges; i++) { 583 start = mem->ranges[i].start; 584 end = mem->ranges[i].end; 585 p_start = mstart; 586 p_end = mend; 587 588 if (p_start > end) 589 continue; 590 591 /* 592 * Because the memory ranges in mem->ranges are stored in 593 * ascending order, when we detect `p_end < start`, we can 594 * immediately exit the for loop, as the subsequent memory 595 * ranges will definitely be outside the range we are looking 596 * for. 597 */ 598 if (p_end < start) 599 break; 600 601 /* Truncate any area outside of range */ 602 if (p_start < start) 603 p_start = start; 604 if (p_end > end) 605 p_end = end; 606 607 /* Found completely overlapping range */ 608 if (p_start == start && p_end == end) { 609 memmove(&mem->ranges[i], &mem->ranges[i + 1], 610 (mem->nr_ranges - (i + 1)) * sizeof(mem->ranges[i])); 611 i--; 612 mem->nr_ranges--; 613 } else if (p_start > start && p_end < end) { 614 /* Split original range */ 615 if (mem->nr_ranges >= mem->max_nr_ranges) 616 return -ENOMEM; 617 618 memmove(&mem->ranges[i + 2], &mem->ranges[i + 1], 619 (mem->nr_ranges - (i + 1)) * sizeof(mem->ranges[i])); 620 621 mem->ranges[i].end = p_start - 1; 622 mem->ranges[i + 1].start = p_end + 1; 623 mem->ranges[i + 1].end = end; 624 625 i++; 626 mem->nr_ranges++; 627 } else if (p_start != start) 628 mem->ranges[i].end = p_start - 1; 629 else 630 mem->ranges[i].start = p_end + 1; 631 } 632 633 return 0; 634 } 635 636 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, 637 void *data, size_t data_len) 638 { 639 struct elf_note *note = (struct elf_note *)buf; 640 641 note->n_namesz = strlen(name) + 1; 642 note->n_descsz = data_len; 643 note->n_type = type; 644 buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word)); 645 memcpy(buf, name, note->n_namesz); 646 buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word)); 647 memcpy(buf, data, data_len); 648 buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word)); 649 650 return buf; 651 } 652 653 void final_note(Elf_Word *buf) 654 { 655 memset(buf, 0, sizeof(struct elf_note)); 656 } 657 658 static void update_vmcoreinfo_note(void) 659 { 660 u32 *buf = vmcoreinfo_note; 661 662 if (!vmcoreinfo_size) 663 return; 664 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 665 vmcoreinfo_size); 666 final_note(buf); 667 } 668 669 void crash_update_vmcoreinfo_safecopy(void *ptr) 670 { 671 if (ptr) 672 memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size); 673 674 vmcoreinfo_data_safecopy = ptr; 675 } 676 677 void crash_save_vmcoreinfo(void) 678 { 679 if (!vmcoreinfo_note) 680 return; 681 682 /* Use the safe copy to generate vmcoreinfo note if have */ 683 if (vmcoreinfo_data_safecopy) 684 vmcoreinfo_data = vmcoreinfo_data_safecopy; 685 686 vmcoreinfo_append_str("CRASHTIME=%lld\n", ktime_get_real_seconds()); 687 update_vmcoreinfo_note(); 688 } 689 690 void vmcoreinfo_append_str(const char *fmt, ...) 691 { 692 va_list args; 693 char buf[0x50]; 694 size_t r; 695 696 va_start(args, fmt); 697 r = vscnprintf(buf, sizeof(buf), fmt, args); 698 va_end(args); 699 700 r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size); 701 702 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 703 704 vmcoreinfo_size += r; 705 706 WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES, 707 "vmcoreinfo data exceeds allocated size, truncating"); 708 } 709 710 /* 711 * provide an empty default implementation here -- architecture 712 * code may override this 713 */ 714 void __weak arch_crash_save_vmcoreinfo(void) 715 {} 716 717 phys_addr_t __weak paddr_vmcoreinfo_note(void) 718 { 719 return __pa(vmcoreinfo_note); 720 } 721 EXPORT_SYMBOL(paddr_vmcoreinfo_note); 722 723 static int __init crash_save_vmcoreinfo_init(void) 724 { 725 vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); 726 if (!vmcoreinfo_data) { 727 pr_warn("Memory allocation for vmcoreinfo_data failed\n"); 728 return -ENOMEM; 729 } 730 731 vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE, 732 GFP_KERNEL | __GFP_ZERO); 733 if (!vmcoreinfo_note) { 734 free_page((unsigned long)vmcoreinfo_data); 735 vmcoreinfo_data = NULL; 736 pr_warn("Memory allocation for vmcoreinfo_note failed\n"); 737 return -ENOMEM; 738 } 739 740 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 741 VMCOREINFO_BUILD_ID(); 742 VMCOREINFO_PAGESIZE(PAGE_SIZE); 743 744 VMCOREINFO_SYMBOL(init_uts_ns); 745 VMCOREINFO_OFFSET(uts_namespace, name); 746 VMCOREINFO_SYMBOL(node_online_map); 747 #ifdef CONFIG_MMU 748 VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); 749 #endif 750 VMCOREINFO_SYMBOL(_stext); 751 VMCOREINFO_SYMBOL(vmap_area_list); 752 753 #ifndef CONFIG_NUMA 754 VMCOREINFO_SYMBOL(mem_map); 755 VMCOREINFO_SYMBOL(contig_page_data); 756 #endif 757 #ifdef CONFIG_SPARSEMEM 758 VMCOREINFO_SYMBOL_ARRAY(mem_section); 759 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 760 VMCOREINFO_STRUCT_SIZE(mem_section); 761 VMCOREINFO_OFFSET(mem_section, section_mem_map); 762 VMCOREINFO_NUMBER(SECTION_SIZE_BITS); 763 VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); 764 #endif 765 VMCOREINFO_STRUCT_SIZE(page); 766 VMCOREINFO_STRUCT_SIZE(pglist_data); 767 VMCOREINFO_STRUCT_SIZE(zone); 768 VMCOREINFO_STRUCT_SIZE(free_area); 769 VMCOREINFO_STRUCT_SIZE(list_head); 770 VMCOREINFO_SIZE(nodemask_t); 771 VMCOREINFO_OFFSET(page, flags); 772 VMCOREINFO_OFFSET(page, _refcount); 773 VMCOREINFO_OFFSET(page, mapping); 774 VMCOREINFO_OFFSET(page, lru); 775 VMCOREINFO_OFFSET(page, _mapcount); 776 VMCOREINFO_OFFSET(page, private); 777 VMCOREINFO_OFFSET(page, compound_head); 778 VMCOREINFO_OFFSET(pglist_data, node_zones); 779 VMCOREINFO_OFFSET(pglist_data, nr_zones); 780 #ifdef CONFIG_FLATMEM 781 VMCOREINFO_OFFSET(pglist_data, node_mem_map); 782 #endif 783 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); 784 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); 785 VMCOREINFO_OFFSET(pglist_data, node_id); 786 VMCOREINFO_OFFSET(zone, free_area); 787 VMCOREINFO_OFFSET(zone, vm_stat); 788 VMCOREINFO_OFFSET(zone, spanned_pages); 789 VMCOREINFO_OFFSET(free_area, free_list); 790 VMCOREINFO_OFFSET(list_head, next); 791 VMCOREINFO_OFFSET(list_head, prev); 792 VMCOREINFO_OFFSET(vmap_area, va_start); 793 VMCOREINFO_OFFSET(vmap_area, list); 794 VMCOREINFO_LENGTH(zone.free_area, NR_PAGE_ORDERS); 795 log_buf_vmcoreinfo_setup(); 796 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 797 VMCOREINFO_NUMBER(NR_FREE_PAGES); 798 VMCOREINFO_NUMBER(PG_lru); 799 VMCOREINFO_NUMBER(PG_private); 800 VMCOREINFO_NUMBER(PG_swapcache); 801 VMCOREINFO_NUMBER(PG_swapbacked); 802 VMCOREINFO_NUMBER(PG_slab); 803 #ifdef CONFIG_MEMORY_FAILURE 804 VMCOREINFO_NUMBER(PG_hwpoison); 805 #endif 806 VMCOREINFO_NUMBER(PG_head_mask); 807 #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) 808 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 809 #ifdef CONFIG_HUGETLB_PAGE 810 VMCOREINFO_NUMBER(PG_hugetlb); 811 #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) 812 VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); 813 #endif 814 815 #ifdef CONFIG_KALLSYMS 816 VMCOREINFO_SYMBOL(kallsyms_names); 817 VMCOREINFO_SYMBOL(kallsyms_num_syms); 818 VMCOREINFO_SYMBOL(kallsyms_token_table); 819 VMCOREINFO_SYMBOL(kallsyms_token_index); 820 #ifdef CONFIG_KALLSYMS_BASE_RELATIVE 821 VMCOREINFO_SYMBOL(kallsyms_offsets); 822 VMCOREINFO_SYMBOL(kallsyms_relative_base); 823 #else 824 VMCOREINFO_SYMBOL(kallsyms_addresses); 825 #endif /* CONFIG_KALLSYMS_BASE_RELATIVE */ 826 #endif /* CONFIG_KALLSYMS */ 827 828 arch_crash_save_vmcoreinfo(); 829 update_vmcoreinfo_note(); 830 831 return 0; 832 } 833 834 subsys_initcall(crash_save_vmcoreinfo_init); 835 836 static int __init crash_notes_memory_init(void) 837 { 838 /* Allocate memory for saving cpu registers. */ 839 size_t size, align; 840 841 /* 842 * crash_notes could be allocated across 2 vmalloc pages when percpu 843 * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc 844 * pages are also on 2 continuous physical pages. In this case the 845 * 2nd part of crash_notes in 2nd page could be lost since only the 846 * starting address and size of crash_notes are exported through sysfs. 847 * Here round up the size of crash_notes to the nearest power of two 848 * and pass it to __alloc_percpu as align value. This can make sure 849 * crash_notes is allocated inside one physical page. 850 */ 851 size = sizeof(note_buf_t); 852 align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); 853 854 /* 855 * Break compile if size is bigger than PAGE_SIZE since crash_notes 856 * definitely will be in 2 pages with that. 857 */ 858 BUILD_BUG_ON(size > PAGE_SIZE); 859 860 crash_notes = __alloc_percpu(size, align); 861 if (!crash_notes) { 862 pr_warn("Memory allocation for saving cpu register states failed\n"); 863 return -ENOMEM; 864 } 865 return 0; 866 } 867 subsys_initcall(crash_notes_memory_init); 868 869 #ifdef CONFIG_CRASH_HOTPLUG 870 #undef pr_fmt 871 #define pr_fmt(fmt) "crash hp: " fmt 872 873 /* 874 * Different than kexec/kdump loading/unloading/jumping/shrinking which 875 * usually rarely happen, there will be many crash hotplug events notified 876 * during one short period, e.g one memory board is hot added and memory 877 * regions are online. So mutex lock __crash_hotplug_lock is used to 878 * serialize the crash hotplug handling specifically. 879 */ 880 static DEFINE_MUTEX(__crash_hotplug_lock); 881 #define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock) 882 #define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock) 883 884 /* 885 * This routine utilized when the crash_hotplug sysfs node is read. 886 * It reflects the kernel's ability/permission to update the crash 887 * elfcorehdr directly. 888 */ 889 int crash_check_update_elfcorehdr(void) 890 { 891 int rc = 0; 892 893 crash_hotplug_lock(); 894 /* Obtain lock while reading crash information */ 895 if (!kexec_trylock()) { 896 pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); 897 crash_hotplug_unlock(); 898 return 0; 899 } 900 if (kexec_crash_image) { 901 if (kexec_crash_image->file_mode) 902 rc = 1; 903 else 904 rc = kexec_crash_image->update_elfcorehdr; 905 } 906 /* Release lock now that update complete */ 907 kexec_unlock(); 908 crash_hotplug_unlock(); 909 910 return rc; 911 } 912 913 /* 914 * To accurately reflect hot un/plug changes of cpu and memory resources 915 * (including onling and offlining of those resources), the elfcorehdr 916 * (which is passed to the crash kernel via the elfcorehdr= parameter) 917 * must be updated with the new list of CPUs and memories. 918 * 919 * In order to make changes to elfcorehdr, two conditions are needed: 920 * First, the segment containing the elfcorehdr must be large enough 921 * to permit a growing number of resources; the elfcorehdr memory size 922 * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES. 923 * Second, purgatory must explicitly exclude the elfcorehdr from the 924 * list of segments it checks (since the elfcorehdr changes and thus 925 * would require an update to purgatory itself to update the digest). 926 */ 927 static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu) 928 { 929 struct kimage *image; 930 931 crash_hotplug_lock(); 932 /* Obtain lock while changing crash information */ 933 if (!kexec_trylock()) { 934 pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n"); 935 crash_hotplug_unlock(); 936 return; 937 } 938 939 /* Check kdump is not loaded */ 940 if (!kexec_crash_image) 941 goto out; 942 943 image = kexec_crash_image; 944 945 /* Check that updating elfcorehdr is permitted */ 946 if (!(image->file_mode || image->update_elfcorehdr)) 947 goto out; 948 949 if (hp_action == KEXEC_CRASH_HP_ADD_CPU || 950 hp_action == KEXEC_CRASH_HP_REMOVE_CPU) 951 pr_debug("hp_action %u, cpu %u\n", hp_action, cpu); 952 else 953 pr_debug("hp_action %u\n", hp_action); 954 955 /* 956 * The elfcorehdr_index is set to -1 when the struct kimage 957 * is allocated. Find the segment containing the elfcorehdr, 958 * if not already found. 959 */ 960 if (image->elfcorehdr_index < 0) { 961 unsigned long mem; 962 unsigned char *ptr; 963 unsigned int n; 964 965 for (n = 0; n < image->nr_segments; n++) { 966 mem = image->segment[n].mem; 967 ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT)); 968 if (ptr) { 969 /* The segment containing elfcorehdr */ 970 if (memcmp(ptr, ELFMAG, SELFMAG) == 0) 971 image->elfcorehdr_index = (int)n; 972 kunmap_local(ptr); 973 } 974 } 975 } 976 977 if (image->elfcorehdr_index < 0) { 978 pr_err("unable to locate elfcorehdr segment"); 979 goto out; 980 } 981 982 /* Needed in order for the segments to be updated */ 983 arch_kexec_unprotect_crashkres(); 984 985 /* Differentiate between normal load and hotplug update */ 986 image->hp_action = hp_action; 987 988 /* Now invoke arch-specific update handler */ 989 arch_crash_handle_hotplug_event(image); 990 991 /* No longer handling a hotplug event */ 992 image->hp_action = KEXEC_CRASH_HP_NONE; 993 image->elfcorehdr_updated = true; 994 995 /* Change back to read-only */ 996 arch_kexec_protect_crashkres(); 997 998 /* Errors in the callback is not a reason to rollback state */ 999 out: 1000 /* Release lock now that update complete */ 1001 kexec_unlock(); 1002 crash_hotplug_unlock(); 1003 } 1004 1005 static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v) 1006 { 1007 switch (val) { 1008 case MEM_ONLINE: 1009 crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, 1010 KEXEC_CRASH_HP_INVALID_CPU); 1011 break; 1012 1013 case MEM_OFFLINE: 1014 crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, 1015 KEXEC_CRASH_HP_INVALID_CPU); 1016 break; 1017 } 1018 return NOTIFY_OK; 1019 } 1020 1021 static struct notifier_block crash_memhp_nb = { 1022 .notifier_call = crash_memhp_notifier, 1023 .priority = 0 1024 }; 1025 1026 static int crash_cpuhp_online(unsigned int cpu) 1027 { 1028 crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu); 1029 return 0; 1030 } 1031 1032 static int crash_cpuhp_offline(unsigned int cpu) 1033 { 1034 crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu); 1035 return 0; 1036 } 1037 1038 static int __init crash_hotplug_init(void) 1039 { 1040 int result = 0; 1041 1042 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) 1043 register_memory_notifier(&crash_memhp_nb); 1044 1045 if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { 1046 result = cpuhp_setup_state_nocalls(CPUHP_BP_PREPARE_DYN, 1047 "crash/cpuhp", crash_cpuhp_online, crash_cpuhp_offline); 1048 } 1049 1050 return result; 1051 } 1052 1053 subsys_initcall(crash_hotplug_init); 1054 #endif 1055