1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Firmware Assisted dump: A robust mechanism to get reliable kernel crash 4 * dump with assistance from firmware. This approach does not use kexec, 5 * instead firmware assists in booting the kdump kernel while preserving 6 * memory contents. The most of the code implementation has been adapted 7 * from phyp assisted dump implementation written by Linas Vepstas and 8 * Manish Ahuja 9 * 10 * Copyright 2011 IBM Corporation 11 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 12 */ 13 14 #undef DEBUG 15 #define pr_fmt(fmt) "fadump: " fmt 16 17 #include <linux/string.h> 18 #include <linux/memblock.h> 19 #include <linux/delay.h> 20 #include <linux/seq_file.h> 21 #include <linux/crash_dump.h> 22 #include <linux/kobject.h> 23 #include <linux/sysfs.h> 24 #include <linux/slab.h> 25 #include <linux/cma.h> 26 #include <linux/hugetlb.h> 27 28 #include <asm/debugfs.h> 29 #include <asm/page.h> 30 #include <asm/prom.h> 31 #include <asm/rtas.h> 32 #include <asm/fadump.h> 33 #include <asm/setup.h> 34 35 static struct fw_dump fw_dump; 36 static struct fadump_mem_struct fdm; 37 static const struct fadump_mem_struct *fdm_active; 38 #ifdef CONFIG_CMA 39 static struct cma *fadump_cma; 40 #endif 41 42 static DEFINE_MUTEX(fadump_mutex); 43 struct fad_crash_memory_ranges *crash_memory_ranges; 44 int crash_memory_ranges_size; 45 int crash_mem_ranges; 46 int max_crash_mem_ranges; 47 48 #ifdef CONFIG_CMA 49 /* 50 * fadump_cma_init() - Initialize CMA area from a fadump reserved memory 51 * 52 * This function initializes CMA area from fadump reserved memory. 53 * The total size of fadump reserved memory covers for boot memory size 54 * + cpu data size + hpte size and metadata. 55 * Initialize only the area equivalent to boot memory size for CMA use. 56 * The reamining portion of fadump reserved memory will be not given 57 * to CMA and pages for thoes will stay reserved. boot memory size is 58 * aligned per CMA requirement to satisy cma_init_reserved_mem() call. 59 * But for some reason even if it fails we still have the memory reservation 60 * with us and we can still continue doing fadump. 61 */ 62 int __init fadump_cma_init(void) 63 { 64 unsigned long long base, size; 65 int rc; 66 67 if (!fw_dump.fadump_enabled) 68 return 0; 69 70 /* 71 * Do not use CMA if user has provided fadump=nocma kernel parameter. 72 * Return 1 to continue with fadump old behaviour. 73 */ 74 if (fw_dump.nocma) 75 return 1; 76 77 base = fw_dump.reserve_dump_area_start; 78 size = fw_dump.boot_memory_size; 79 80 if (!size) 81 return 0; 82 83 rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma); 84 if (rc) { 85 pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc); 86 /* 87 * Though the CMA init has failed we still have memory 88 * reservation with us. The reserved memory will be 89 * blocked from production system usage. Hence return 1, 90 * so that we can continue with fadump. 91 */ 92 return 1; 93 } 94 95 /* 96 * So we now have successfully initialized cma area for fadump. 97 */ 98 pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx " 99 "bytes of memory reserved for firmware-assisted dump\n", 100 cma_get_size(fadump_cma), 101 (unsigned long)cma_get_base(fadump_cma) >> 20, 102 fw_dump.reserve_dump_area_size); 103 return 1; 104 } 105 #else 106 static int __init fadump_cma_init(void) { return 1; } 107 #endif /* CONFIG_CMA */ 108 109 /* Scan the Firmware Assisted dump configuration details. */ 110 int __init early_init_dt_scan_fw_dump(unsigned long node, 111 const char *uname, int depth, void *data) 112 { 113 const __be32 *sections; 114 int i, num_sections; 115 int size; 116 const __be32 *token; 117 118 if (depth != 1 || strcmp(uname, "rtas") != 0) 119 return 0; 120 121 /* 122 * Check if Firmware Assisted dump is supported. if yes, check 123 * if dump has been initiated on last reboot. 124 */ 125 token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL); 126 if (!token) 127 return 1; 128 129 fw_dump.fadump_supported = 1; 130 fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token); 131 132 /* 133 * The 'ibm,kernel-dump' rtas node is present only if there is 134 * dump data waiting for us. 135 */ 136 fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL); 137 if (fdm_active) 138 fw_dump.dump_active = 1; 139 140 /* Get the sizes required to store dump data for the firmware provided 141 * dump sections. 142 * For each dump section type supported, a 32bit cell which defines 143 * the ID of a supported section followed by two 32 bit cells which 144 * gives teh size of the section in bytes. 145 */ 146 sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes", 147 &size); 148 149 if (!sections) 150 return 1; 151 152 num_sections = size / (3 * sizeof(u32)); 153 154 for (i = 0; i < num_sections; i++, sections += 3) { 155 u32 type = (u32)of_read_number(sections, 1); 156 157 switch (type) { 158 case FADUMP_CPU_STATE_DATA: 159 fw_dump.cpu_state_data_size = 160 of_read_ulong(§ions[1], 2); 161 break; 162 case FADUMP_HPTE_REGION: 163 fw_dump.hpte_region_size = 164 of_read_ulong(§ions[1], 2); 165 break; 166 } 167 } 168 169 return 1; 170 } 171 172 /* 173 * If fadump is registered, check if the memory provided 174 * falls within boot memory area and reserved memory area. 175 */ 176 int is_fadump_memory_area(u64 addr, ulong size) 177 { 178 u64 d_start = fw_dump.reserve_dump_area_start; 179 u64 d_end = d_start + fw_dump.reserve_dump_area_size; 180 181 if (!fw_dump.dump_registered) 182 return 0; 183 184 if (((addr + size) > d_start) && (addr <= d_end)) 185 return 1; 186 187 return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size; 188 } 189 190 int should_fadump_crash(void) 191 { 192 if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) 193 return 0; 194 return 1; 195 } 196 197 int is_fadump_active(void) 198 { 199 return fw_dump.dump_active; 200 } 201 202 /* 203 * Returns 1, if there are no holes in boot memory area, 204 * 0 otherwise. 205 */ 206 static int is_boot_memory_area_contiguous(void) 207 { 208 struct memblock_region *reg; 209 unsigned long tstart, tend; 210 unsigned long start_pfn = PHYS_PFN(RMA_START); 211 unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size); 212 unsigned int ret = 0; 213 214 for_each_memblock(memory, reg) { 215 tstart = max(start_pfn, memblock_region_memory_base_pfn(reg)); 216 tend = min(end_pfn, memblock_region_memory_end_pfn(reg)); 217 if (tstart < tend) { 218 /* Memory hole from start_pfn to tstart */ 219 if (tstart > start_pfn) 220 break; 221 222 if (tend == end_pfn) { 223 ret = 1; 224 break; 225 } 226 227 start_pfn = tend + 1; 228 } 229 } 230 231 return ret; 232 } 233 234 /* 235 * Returns true, if there are no holes in reserved memory area, 236 * false otherwise. 237 */ 238 static bool is_reserved_memory_area_contiguous(void) 239 { 240 struct memblock_region *reg; 241 unsigned long start, end; 242 unsigned long d_start = fw_dump.reserve_dump_area_start; 243 unsigned long d_end = d_start + fw_dump.reserve_dump_area_size; 244 245 for_each_memblock(memory, reg) { 246 start = max(d_start, (unsigned long)reg->base); 247 end = min(d_end, (unsigned long)(reg->base + reg->size)); 248 if (d_start < end) { 249 /* Memory hole from d_start to start */ 250 if (start > d_start) 251 break; 252 253 if (end == d_end) 254 return true; 255 256 d_start = end + 1; 257 } 258 } 259 260 return false; 261 } 262 263 /* Print firmware assisted dump configurations for debugging purpose. */ 264 static void fadump_show_config(void) 265 { 266 pr_debug("Support for firmware-assisted dump (fadump): %s\n", 267 (fw_dump.fadump_supported ? "present" : "no support")); 268 269 if (!fw_dump.fadump_supported) 270 return; 271 272 pr_debug("Fadump enabled : %s\n", 273 (fw_dump.fadump_enabled ? "yes" : "no")); 274 pr_debug("Dump Active : %s\n", 275 (fw_dump.dump_active ? "yes" : "no")); 276 pr_debug("Dump section sizes:\n"); 277 pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); 278 pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); 279 pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size); 280 } 281 282 static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm, 283 unsigned long addr) 284 { 285 if (!fdm) 286 return 0; 287 288 memset(fdm, 0, sizeof(struct fadump_mem_struct)); 289 addr = addr & PAGE_MASK; 290 291 fdm->header.dump_format_version = cpu_to_be32(0x00000001); 292 fdm->header.dump_num_sections = cpu_to_be16(3); 293 fdm->header.dump_status_flag = 0; 294 fdm->header.offset_first_dump_section = 295 cpu_to_be32((u32)offsetof(struct fadump_mem_struct, cpu_state_data)); 296 297 /* 298 * Fields for disk dump option. 299 * We are not using disk dump option, hence set these fields to 0. 300 */ 301 fdm->header.dd_block_size = 0; 302 fdm->header.dd_block_offset = 0; 303 fdm->header.dd_num_blocks = 0; 304 fdm->header.dd_offset_disk_path = 0; 305 306 /* set 0 to disable an automatic dump-reboot. */ 307 fdm->header.max_time_auto = 0; 308 309 /* Kernel dump sections */ 310 /* cpu state data section. */ 311 fdm->cpu_state_data.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG); 312 fdm->cpu_state_data.source_data_type = cpu_to_be16(FADUMP_CPU_STATE_DATA); 313 fdm->cpu_state_data.source_address = 0; 314 fdm->cpu_state_data.source_len = cpu_to_be64(fw_dump.cpu_state_data_size); 315 fdm->cpu_state_data.destination_address = cpu_to_be64(addr); 316 addr += fw_dump.cpu_state_data_size; 317 318 /* hpte region section */ 319 fdm->hpte_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG); 320 fdm->hpte_region.source_data_type = cpu_to_be16(FADUMP_HPTE_REGION); 321 fdm->hpte_region.source_address = 0; 322 fdm->hpte_region.source_len = cpu_to_be64(fw_dump.hpte_region_size); 323 fdm->hpte_region.destination_address = cpu_to_be64(addr); 324 addr += fw_dump.hpte_region_size; 325 326 /* RMA region section */ 327 fdm->rmr_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG); 328 fdm->rmr_region.source_data_type = cpu_to_be16(FADUMP_REAL_MODE_REGION); 329 fdm->rmr_region.source_address = cpu_to_be64(RMA_START); 330 fdm->rmr_region.source_len = cpu_to_be64(fw_dump.boot_memory_size); 331 fdm->rmr_region.destination_address = cpu_to_be64(addr); 332 addr += fw_dump.boot_memory_size; 333 334 return addr; 335 } 336 337 /** 338 * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM 339 * 340 * Function to find the largest memory size we need to reserve during early 341 * boot process. This will be the size of the memory that is required for a 342 * kernel to boot successfully. 343 * 344 * This function has been taken from phyp-assisted dump feature implementation. 345 * 346 * returns larger of 256MB or 5% rounded down to multiples of 256MB. 347 * 348 * TODO: Come up with better approach to find out more accurate memory size 349 * that is required for a kernel to boot successfully. 350 * 351 */ 352 static inline unsigned long fadump_calculate_reserve_size(void) 353 { 354 int ret; 355 unsigned long long base, size; 356 357 if (fw_dump.reserve_bootvar) 358 pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n"); 359 360 /* 361 * Check if the size is specified through crashkernel= cmdline 362 * option. If yes, then use that but ignore base as fadump reserves 363 * memory at a predefined offset. 364 */ 365 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 366 &size, &base); 367 if (ret == 0 && size > 0) { 368 unsigned long max_size; 369 370 if (fw_dump.reserve_bootvar) 371 pr_info("Using 'crashkernel=' parameter for memory reservation.\n"); 372 373 fw_dump.reserve_bootvar = (unsigned long)size; 374 375 /* 376 * Adjust if the boot memory size specified is above 377 * the upper limit. 378 */ 379 max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO; 380 if (fw_dump.reserve_bootvar > max_size) { 381 fw_dump.reserve_bootvar = max_size; 382 pr_info("Adjusted boot memory size to %luMB\n", 383 (fw_dump.reserve_bootvar >> 20)); 384 } 385 386 return fw_dump.reserve_bootvar; 387 } else if (fw_dump.reserve_bootvar) { 388 /* 389 * 'fadump_reserve_mem=' is being used to reserve memory 390 * for firmware-assisted dump. 391 */ 392 return fw_dump.reserve_bootvar; 393 } 394 395 /* divide by 20 to get 5% of value */ 396 size = memblock_phys_mem_size() / 20; 397 398 /* round it down in multiples of 256 */ 399 size = size & ~0x0FFFFFFFUL; 400 401 /* Truncate to memory_limit. We don't want to over reserve the memory.*/ 402 if (memory_limit && size > memory_limit) 403 size = memory_limit; 404 405 return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM); 406 } 407 408 /* 409 * Calculate the total memory size required to be reserved for 410 * firmware-assisted dump registration. 411 */ 412 static unsigned long get_fadump_area_size(void) 413 { 414 unsigned long size = 0; 415 416 size += fw_dump.cpu_state_data_size; 417 size += fw_dump.hpte_region_size; 418 size += fw_dump.boot_memory_size; 419 size += sizeof(struct fadump_crash_info_header); 420 size += sizeof(struct elfhdr); /* ELF core header.*/ 421 size += sizeof(struct elf_phdr); /* place holder for cpu notes */ 422 /* Program headers for crash memory regions. */ 423 size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); 424 425 size = PAGE_ALIGN(size); 426 return size; 427 } 428 429 static void __init fadump_reserve_crash_area(unsigned long base, 430 unsigned long size) 431 { 432 struct memblock_region *reg; 433 unsigned long mstart, mend, msize; 434 435 for_each_memblock(memory, reg) { 436 mstart = max_t(unsigned long, base, reg->base); 437 mend = reg->base + reg->size; 438 mend = min(base + size, mend); 439 440 if (mstart < mend) { 441 msize = mend - mstart; 442 memblock_reserve(mstart, msize); 443 pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n", 444 (msize >> 20), mstart); 445 } 446 } 447 } 448 449 int __init fadump_reserve_mem(void) 450 { 451 unsigned long base, size, memory_boundary; 452 453 if (!fw_dump.fadump_enabled) 454 return 0; 455 456 if (!fw_dump.fadump_supported) { 457 printk(KERN_INFO "Firmware-assisted dump is not supported on" 458 " this hardware\n"); 459 fw_dump.fadump_enabled = 0; 460 return 0; 461 } 462 /* 463 * Initialize boot memory size 464 * If dump is active then we have already calculated the size during 465 * first kernel. 466 */ 467 if (fdm_active) 468 fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len); 469 else { 470 fw_dump.boot_memory_size = fadump_calculate_reserve_size(); 471 #ifdef CONFIG_CMA 472 if (!fw_dump.nocma) 473 fw_dump.boot_memory_size = 474 ALIGN(fw_dump.boot_memory_size, 475 FADUMP_CMA_ALIGNMENT); 476 #endif 477 } 478 479 /* 480 * Calculate the memory boundary. 481 * If memory_limit is less than actual memory boundary then reserve 482 * the memory for fadump beyond the memory_limit and adjust the 483 * memory_limit accordingly, so that the running kernel can run with 484 * specified memory_limit. 485 */ 486 if (memory_limit && memory_limit < memblock_end_of_DRAM()) { 487 size = get_fadump_area_size(); 488 if ((memory_limit + size) < memblock_end_of_DRAM()) 489 memory_limit += size; 490 else 491 memory_limit = memblock_end_of_DRAM(); 492 printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" 493 " dump, now %#016llx\n", memory_limit); 494 } 495 if (memory_limit) 496 memory_boundary = memory_limit; 497 else 498 memory_boundary = memblock_end_of_DRAM(); 499 500 if (fw_dump.dump_active) { 501 pr_info("Firmware-assisted dump is active.\n"); 502 503 #ifdef CONFIG_HUGETLB_PAGE 504 /* 505 * FADump capture kernel doesn't care much about hugepages. 506 * In fact, handling hugepages in capture kernel is asking for 507 * trouble. So, disable HugeTLB support when fadump is active. 508 */ 509 hugetlb_disabled = true; 510 #endif 511 /* 512 * If last boot has crashed then reserve all the memory 513 * above boot_memory_size so that we don't touch it until 514 * dump is written to disk by userspace tool. This memory 515 * will be released for general use once the dump is saved. 516 */ 517 base = fw_dump.boot_memory_size; 518 size = memory_boundary - base; 519 fadump_reserve_crash_area(base, size); 520 521 fw_dump.fadumphdr_addr = 522 be64_to_cpu(fdm_active->rmr_region.destination_address) + 523 be64_to_cpu(fdm_active->rmr_region.source_len); 524 pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr); 525 fw_dump.reserve_dump_area_start = base; 526 fw_dump.reserve_dump_area_size = size; 527 } else { 528 size = get_fadump_area_size(); 529 530 /* 531 * Reserve memory at an offset closer to bottom of the RAM to 532 * minimize the impact of memory hot-remove operation. We can't 533 * use memblock_find_in_range() here since it doesn't allocate 534 * from bottom to top. 535 */ 536 for (base = fw_dump.boot_memory_size; 537 base <= (memory_boundary - size); 538 base += size) { 539 if (memblock_is_region_memory(base, size) && 540 !memblock_is_region_reserved(base, size)) 541 break; 542 } 543 if ((base > (memory_boundary - size)) || 544 memblock_reserve(base, size)) { 545 pr_err("Failed to reserve memory\n"); 546 return 0; 547 } 548 549 pr_info("Reserved %ldMB of memory at %ldMB for firmware-" 550 "assisted dump (System RAM: %ldMB)\n", 551 (unsigned long)(size >> 20), 552 (unsigned long)(base >> 20), 553 (unsigned long)(memblock_phys_mem_size() >> 20)); 554 555 fw_dump.reserve_dump_area_start = base; 556 fw_dump.reserve_dump_area_size = size; 557 return fadump_cma_init(); 558 } 559 return 1; 560 } 561 562 unsigned long __init arch_reserved_kernel_pages(void) 563 { 564 return memblock_reserved_size() / PAGE_SIZE; 565 } 566 567 /* Look for fadump= cmdline option. */ 568 static int __init early_fadump_param(char *p) 569 { 570 if (!p) 571 return 1; 572 573 if (strncmp(p, "on", 2) == 0) 574 fw_dump.fadump_enabled = 1; 575 else if (strncmp(p, "off", 3) == 0) 576 fw_dump.fadump_enabled = 0; 577 else if (strncmp(p, "nocma", 5) == 0) { 578 fw_dump.fadump_enabled = 1; 579 fw_dump.nocma = 1; 580 } 581 582 return 0; 583 } 584 early_param("fadump", early_fadump_param); 585 586 /* 587 * Look for fadump_reserve_mem= cmdline option 588 * TODO: Remove references to 'fadump_reserve_mem=' parameter, 589 * the sooner 'crashkernel=' parameter is accustomed to. 590 */ 591 static int __init early_fadump_reserve_mem(char *p) 592 { 593 if (p) 594 fw_dump.reserve_bootvar = memparse(p, &p); 595 return 0; 596 } 597 early_param("fadump_reserve_mem", early_fadump_reserve_mem); 598 599 static int register_fw_dump(struct fadump_mem_struct *fdm) 600 { 601 int rc, err; 602 unsigned int wait_time; 603 604 pr_debug("Registering for firmware-assisted kernel dump...\n"); 605 606 /* TODO: Add upper time limit for the delay */ 607 do { 608 rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, 609 FADUMP_REGISTER, fdm, 610 sizeof(struct fadump_mem_struct)); 611 612 wait_time = rtas_busy_delay_time(rc); 613 if (wait_time) 614 mdelay(wait_time); 615 616 } while (wait_time); 617 618 err = -EIO; 619 switch (rc) { 620 default: 621 pr_err("Failed to register. Unknown Error(%d).\n", rc); 622 break; 623 case -1: 624 printk(KERN_ERR "Failed to register firmware-assisted kernel" 625 " dump. Hardware Error(%d).\n", rc); 626 break; 627 case -3: 628 if (!is_boot_memory_area_contiguous()) 629 pr_err("Can't have holes in boot memory area while registering fadump\n"); 630 else if (!is_reserved_memory_area_contiguous()) 631 pr_err("Can't have holes in reserved memory area while" 632 " registering fadump\n"); 633 634 printk(KERN_ERR "Failed to register firmware-assisted kernel" 635 " dump. Parameter Error(%d).\n", rc); 636 err = -EINVAL; 637 break; 638 case -9: 639 printk(KERN_ERR "firmware-assisted kernel dump is already " 640 " registered."); 641 fw_dump.dump_registered = 1; 642 err = -EEXIST; 643 break; 644 case 0: 645 printk(KERN_INFO "firmware-assisted kernel dump registration" 646 " is successful\n"); 647 fw_dump.dump_registered = 1; 648 err = 0; 649 break; 650 } 651 return err; 652 } 653 654 void crash_fadump(struct pt_regs *regs, const char *str) 655 { 656 struct fadump_crash_info_header *fdh = NULL; 657 int old_cpu, this_cpu; 658 659 if (!should_fadump_crash()) 660 return; 661 662 /* 663 * old_cpu == -1 means this is the first CPU which has come here, 664 * go ahead and trigger fadump. 665 * 666 * old_cpu != -1 means some other CPU has already on it's way 667 * to trigger fadump, just keep looping here. 668 */ 669 this_cpu = smp_processor_id(); 670 old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); 671 672 if (old_cpu != -1) { 673 /* 674 * We can't loop here indefinitely. Wait as long as fadump 675 * is in force. If we race with fadump un-registration this 676 * loop will break and then we go down to normal panic path 677 * and reboot. If fadump is in force the first crashing 678 * cpu will definitely trigger fadump. 679 */ 680 while (fw_dump.dump_registered) 681 cpu_relax(); 682 return; 683 } 684 685 fdh = __va(fw_dump.fadumphdr_addr); 686 fdh->crashing_cpu = crashing_cpu; 687 crash_save_vmcoreinfo(); 688 689 if (regs) 690 fdh->regs = *regs; 691 else 692 ppc_save_regs(&fdh->regs); 693 694 fdh->online_mask = *cpu_online_mask; 695 696 /* Call ibm,os-term rtas call to trigger firmware assisted dump */ 697 rtas_os_term((char *)str); 698 } 699 700 #define GPR_MASK 0xffffff0000000000 701 static inline int fadump_gpr_index(u64 id) 702 { 703 int i = -1; 704 char str[3]; 705 706 if ((id & GPR_MASK) == REG_ID("GPR")) { 707 /* get the digits at the end */ 708 id &= ~GPR_MASK; 709 id >>= 24; 710 str[2] = '\0'; 711 str[1] = id & 0xff; 712 str[0] = (id >> 8) & 0xff; 713 sscanf(str, "%d", &i); 714 if (i > 31) 715 i = -1; 716 } 717 return i; 718 } 719 720 static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id, 721 u64 reg_val) 722 { 723 int i; 724 725 i = fadump_gpr_index(reg_id); 726 if (i >= 0) 727 regs->gpr[i] = (unsigned long)reg_val; 728 else if (reg_id == REG_ID("NIA")) 729 regs->nip = (unsigned long)reg_val; 730 else if (reg_id == REG_ID("MSR")) 731 regs->msr = (unsigned long)reg_val; 732 else if (reg_id == REG_ID("CTR")) 733 regs->ctr = (unsigned long)reg_val; 734 else if (reg_id == REG_ID("LR")) 735 regs->link = (unsigned long)reg_val; 736 else if (reg_id == REG_ID("XER")) 737 regs->xer = (unsigned long)reg_val; 738 else if (reg_id == REG_ID("CR")) 739 regs->ccr = (unsigned long)reg_val; 740 else if (reg_id == REG_ID("DAR")) 741 regs->dar = (unsigned long)reg_val; 742 else if (reg_id == REG_ID("DSISR")) 743 regs->dsisr = (unsigned long)reg_val; 744 } 745 746 static struct fadump_reg_entry* 747 fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs) 748 { 749 memset(regs, 0, sizeof(struct pt_regs)); 750 751 while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) { 752 fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id), 753 be64_to_cpu(reg_entry->reg_value)); 754 reg_entry++; 755 } 756 reg_entry++; 757 return reg_entry; 758 } 759 760 static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) 761 { 762 struct elf_prstatus prstatus; 763 764 memset(&prstatus, 0, sizeof(prstatus)); 765 /* 766 * FIXME: How do i get PID? Do I really need it? 767 * prstatus.pr_pid = ???? 768 */ 769 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); 770 buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, 771 &prstatus, sizeof(prstatus)); 772 return buf; 773 } 774 775 static void fadump_update_elfcore_header(char *bufp) 776 { 777 struct elfhdr *elf; 778 struct elf_phdr *phdr; 779 780 elf = (struct elfhdr *)bufp; 781 bufp += sizeof(struct elfhdr); 782 783 /* First note is a place holder for cpu notes info. */ 784 phdr = (struct elf_phdr *)bufp; 785 786 if (phdr->p_type == PT_NOTE) { 787 phdr->p_paddr = fw_dump.cpu_notes_buf; 788 phdr->p_offset = phdr->p_paddr; 789 phdr->p_filesz = fw_dump.cpu_notes_buf_size; 790 phdr->p_memsz = fw_dump.cpu_notes_buf_size; 791 } 792 return; 793 } 794 795 static void *fadump_cpu_notes_buf_alloc(unsigned long size) 796 { 797 void *vaddr; 798 struct page *page; 799 unsigned long order, count, i; 800 801 order = get_order(size); 802 vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order); 803 if (!vaddr) 804 return NULL; 805 806 count = 1 << order; 807 page = virt_to_page(vaddr); 808 for (i = 0; i < count; i++) 809 SetPageReserved(page + i); 810 return vaddr; 811 } 812 813 static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size) 814 { 815 struct page *page; 816 unsigned long order, count, i; 817 818 order = get_order(size); 819 count = 1 << order; 820 page = virt_to_page(vaddr); 821 for (i = 0; i < count; i++) 822 ClearPageReserved(page + i); 823 __free_pages(page, order); 824 } 825 826 /* 827 * Read CPU state dump data and convert it into ELF notes. 828 * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be 829 * used to access the data to allow for additional fields to be added without 830 * affecting compatibility. Each list of registers for a CPU starts with 831 * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes, 832 * 8 Byte ASCII identifier and 8 Byte register value. The register entry 833 * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part 834 * of register value. For more details refer to PAPR document. 835 * 836 * Only for the crashing cpu we ignore the CPU dump data and get exact 837 * state from fadump crash info structure populated by first kernel at the 838 * time of crash. 839 */ 840 static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm) 841 { 842 struct fadump_reg_save_area_header *reg_header; 843 struct fadump_reg_entry *reg_entry; 844 struct fadump_crash_info_header *fdh = NULL; 845 void *vaddr; 846 unsigned long addr; 847 u32 num_cpus, *note_buf; 848 struct pt_regs regs; 849 int i, rc = 0, cpu = 0; 850 851 if (!fdm->cpu_state_data.bytes_dumped) 852 return -EINVAL; 853 854 addr = be64_to_cpu(fdm->cpu_state_data.destination_address); 855 vaddr = __va(addr); 856 857 reg_header = vaddr; 858 if (be64_to_cpu(reg_header->magic_number) != REGSAVE_AREA_MAGIC) { 859 printk(KERN_ERR "Unable to read register save area.\n"); 860 return -ENOENT; 861 } 862 pr_debug("--------CPU State Data------------\n"); 863 pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number)); 864 pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset)); 865 866 vaddr += be32_to_cpu(reg_header->num_cpu_offset); 867 num_cpus = be32_to_cpu(*((__be32 *)(vaddr))); 868 pr_debug("NumCpus : %u\n", num_cpus); 869 vaddr += sizeof(u32); 870 reg_entry = (struct fadump_reg_entry *)vaddr; 871 872 /* Allocate buffer to hold cpu crash notes. */ 873 fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); 874 fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size); 875 note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size); 876 if (!note_buf) { 877 printk(KERN_ERR "Failed to allocate 0x%lx bytes for " 878 "cpu notes buffer\n", fw_dump.cpu_notes_buf_size); 879 return -ENOMEM; 880 } 881 fw_dump.cpu_notes_buf = __pa(note_buf); 882 883 pr_debug("Allocated buffer for cpu notes of size %ld at %p\n", 884 (num_cpus * sizeof(note_buf_t)), note_buf); 885 886 if (fw_dump.fadumphdr_addr) 887 fdh = __va(fw_dump.fadumphdr_addr); 888 889 for (i = 0; i < num_cpus; i++) { 890 if (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUSTRT")) { 891 printk(KERN_ERR "Unable to read CPU state data\n"); 892 rc = -ENOENT; 893 goto error_out; 894 } 895 /* Lower 4 bytes of reg_value contains logical cpu id */ 896 cpu = be64_to_cpu(reg_entry->reg_value) & FADUMP_CPU_ID_MASK; 897 if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) { 898 SKIP_TO_NEXT_CPU(reg_entry); 899 continue; 900 } 901 pr_debug("Reading register data for cpu %d...\n", cpu); 902 if (fdh && fdh->crashing_cpu == cpu) { 903 regs = fdh->regs; 904 note_buf = fadump_regs_to_elf_notes(note_buf, ®s); 905 SKIP_TO_NEXT_CPU(reg_entry); 906 } else { 907 reg_entry++; 908 reg_entry = fadump_read_registers(reg_entry, ®s); 909 note_buf = fadump_regs_to_elf_notes(note_buf, ®s); 910 } 911 } 912 final_note(note_buf); 913 914 if (fdh) { 915 pr_debug("Updating elfcore header (%llx) with cpu notes\n", 916 fdh->elfcorehdr_addr); 917 fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr)); 918 } 919 return 0; 920 921 error_out: 922 fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf), 923 fw_dump.cpu_notes_buf_size); 924 fw_dump.cpu_notes_buf = 0; 925 fw_dump.cpu_notes_buf_size = 0; 926 return rc; 927 928 } 929 930 /* 931 * Validate and process the dump data stored by firmware before exporting 932 * it through '/proc/vmcore'. 933 */ 934 static int __init process_fadump(const struct fadump_mem_struct *fdm_active) 935 { 936 struct fadump_crash_info_header *fdh; 937 int rc = 0; 938 939 if (!fdm_active || !fw_dump.fadumphdr_addr) 940 return -EINVAL; 941 942 /* Check if the dump data is valid. */ 943 if ((be16_to_cpu(fdm_active->header.dump_status_flag) == FADUMP_ERROR_FLAG) || 944 (fdm_active->cpu_state_data.error_flags != 0) || 945 (fdm_active->rmr_region.error_flags != 0)) { 946 printk(KERN_ERR "Dump taken by platform is not valid\n"); 947 return -EINVAL; 948 } 949 if ((fdm_active->rmr_region.bytes_dumped != 950 fdm_active->rmr_region.source_len) || 951 !fdm_active->cpu_state_data.bytes_dumped) { 952 printk(KERN_ERR "Dump taken by platform is incomplete\n"); 953 return -EINVAL; 954 } 955 956 /* Validate the fadump crash info header */ 957 fdh = __va(fw_dump.fadumphdr_addr); 958 if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { 959 printk(KERN_ERR "Crash info header is not valid.\n"); 960 return -EINVAL; 961 } 962 963 rc = fadump_build_cpu_notes(fdm_active); 964 if (rc) 965 return rc; 966 967 /* 968 * We are done validating dump info and elfcore header is now ready 969 * to be exported. set elfcorehdr_addr so that vmcore module will 970 * export the elfcore header through '/proc/vmcore'. 971 */ 972 elfcorehdr_addr = fdh->elfcorehdr_addr; 973 974 return 0; 975 } 976 977 static void free_crash_memory_ranges(void) 978 { 979 kfree(crash_memory_ranges); 980 crash_memory_ranges = NULL; 981 crash_memory_ranges_size = 0; 982 max_crash_mem_ranges = 0; 983 } 984 985 /* 986 * Allocate or reallocate crash memory ranges array in incremental units 987 * of PAGE_SIZE. 988 */ 989 static int allocate_crash_memory_ranges(void) 990 { 991 struct fad_crash_memory_ranges *new_array; 992 u64 new_size; 993 994 new_size = crash_memory_ranges_size + PAGE_SIZE; 995 pr_debug("Allocating %llu bytes of memory for crash memory ranges\n", 996 new_size); 997 998 new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL); 999 if (new_array == NULL) { 1000 pr_err("Insufficient memory for setting up crash memory ranges\n"); 1001 free_crash_memory_ranges(); 1002 return -ENOMEM; 1003 } 1004 1005 crash_memory_ranges = new_array; 1006 crash_memory_ranges_size = new_size; 1007 max_crash_mem_ranges = (new_size / 1008 sizeof(struct fad_crash_memory_ranges)); 1009 return 0; 1010 } 1011 1012 static inline int fadump_add_crash_memory(unsigned long long base, 1013 unsigned long long end) 1014 { 1015 u64 start, size; 1016 bool is_adjacent = false; 1017 1018 if (base == end) 1019 return 0; 1020 1021 /* 1022 * Fold adjacent memory ranges to bring down the memory ranges/ 1023 * PT_LOAD segments count. 1024 */ 1025 if (crash_mem_ranges) { 1026 start = crash_memory_ranges[crash_mem_ranges - 1].base; 1027 size = crash_memory_ranges[crash_mem_ranges - 1].size; 1028 1029 if ((start + size) == base) 1030 is_adjacent = true; 1031 } 1032 if (!is_adjacent) { 1033 /* resize the array on reaching the limit */ 1034 if (crash_mem_ranges == max_crash_mem_ranges) { 1035 int ret; 1036 1037 ret = allocate_crash_memory_ranges(); 1038 if (ret) 1039 return ret; 1040 } 1041 1042 start = base; 1043 crash_memory_ranges[crash_mem_ranges].base = start; 1044 crash_mem_ranges++; 1045 } 1046 1047 crash_memory_ranges[crash_mem_ranges - 1].size = (end - start); 1048 pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", 1049 (crash_mem_ranges - 1), start, end - 1, (end - start)); 1050 return 0; 1051 } 1052 1053 static int fadump_exclude_reserved_area(unsigned long long start, 1054 unsigned long long end) 1055 { 1056 unsigned long long ra_start, ra_end; 1057 int ret = 0; 1058 1059 ra_start = fw_dump.reserve_dump_area_start; 1060 ra_end = ra_start + fw_dump.reserve_dump_area_size; 1061 1062 if ((ra_start < end) && (ra_end > start)) { 1063 if ((start < ra_start) && (end > ra_end)) { 1064 ret = fadump_add_crash_memory(start, ra_start); 1065 if (ret) 1066 return ret; 1067 1068 ret = fadump_add_crash_memory(ra_end, end); 1069 } else if (start < ra_start) { 1070 ret = fadump_add_crash_memory(start, ra_start); 1071 } else if (ra_end < end) { 1072 ret = fadump_add_crash_memory(ra_end, end); 1073 } 1074 } else 1075 ret = fadump_add_crash_memory(start, end); 1076 1077 return ret; 1078 } 1079 1080 static int fadump_init_elfcore_header(char *bufp) 1081 { 1082 struct elfhdr *elf; 1083 1084 elf = (struct elfhdr *) bufp; 1085 bufp += sizeof(struct elfhdr); 1086 memcpy(elf->e_ident, ELFMAG, SELFMAG); 1087 elf->e_ident[EI_CLASS] = ELF_CLASS; 1088 elf->e_ident[EI_DATA] = ELF_DATA; 1089 elf->e_ident[EI_VERSION] = EV_CURRENT; 1090 elf->e_ident[EI_OSABI] = ELF_OSABI; 1091 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); 1092 elf->e_type = ET_CORE; 1093 elf->e_machine = ELF_ARCH; 1094 elf->e_version = EV_CURRENT; 1095 elf->e_entry = 0; 1096 elf->e_phoff = sizeof(struct elfhdr); 1097 elf->e_shoff = 0; 1098 #if defined(_CALL_ELF) 1099 elf->e_flags = _CALL_ELF; 1100 #else 1101 elf->e_flags = 0; 1102 #endif 1103 elf->e_ehsize = sizeof(struct elfhdr); 1104 elf->e_phentsize = sizeof(struct elf_phdr); 1105 elf->e_phnum = 0; 1106 elf->e_shentsize = 0; 1107 elf->e_shnum = 0; 1108 elf->e_shstrndx = 0; 1109 1110 return 0; 1111 } 1112 1113 /* 1114 * Traverse through memblock structure and setup crash memory ranges. These 1115 * ranges will be used create PT_LOAD program headers in elfcore header. 1116 */ 1117 static int fadump_setup_crash_memory_ranges(void) 1118 { 1119 struct memblock_region *reg; 1120 unsigned long long start, end; 1121 int ret; 1122 1123 pr_debug("Setup crash memory ranges.\n"); 1124 crash_mem_ranges = 0; 1125 1126 /* 1127 * add the first memory chunk (RMA_START through boot_memory_size) as 1128 * a separate memory chunk. The reason is, at the time crash firmware 1129 * will move the content of this memory chunk to different location 1130 * specified during fadump registration. We need to create a separate 1131 * program header for this chunk with the correct offset. 1132 */ 1133 ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); 1134 if (ret) 1135 return ret; 1136 1137 for_each_memblock(memory, reg) { 1138 start = (unsigned long long)reg->base; 1139 end = start + (unsigned long long)reg->size; 1140 1141 /* 1142 * skip the first memory chunk that is already added (RMA_START 1143 * through boot_memory_size). This logic needs a relook if and 1144 * when RMA_START changes to a non-zero value. 1145 */ 1146 BUILD_BUG_ON(RMA_START != 0); 1147 if (start < fw_dump.boot_memory_size) { 1148 if (end > fw_dump.boot_memory_size) 1149 start = fw_dump.boot_memory_size; 1150 else 1151 continue; 1152 } 1153 1154 /* add this range excluding the reserved dump area. */ 1155 ret = fadump_exclude_reserved_area(start, end); 1156 if (ret) 1157 return ret; 1158 } 1159 1160 return 0; 1161 } 1162 1163 /* 1164 * If the given physical address falls within the boot memory region then 1165 * return the relocated address that points to the dump region reserved 1166 * for saving initial boot memory contents. 1167 */ 1168 static inline unsigned long fadump_relocate(unsigned long paddr) 1169 { 1170 if (paddr > RMA_START && paddr < fw_dump.boot_memory_size) 1171 return be64_to_cpu(fdm.rmr_region.destination_address) + paddr; 1172 else 1173 return paddr; 1174 } 1175 1176 static int fadump_create_elfcore_headers(char *bufp) 1177 { 1178 struct elfhdr *elf; 1179 struct elf_phdr *phdr; 1180 int i; 1181 1182 fadump_init_elfcore_header(bufp); 1183 elf = (struct elfhdr *)bufp; 1184 bufp += sizeof(struct elfhdr); 1185 1186 /* 1187 * setup ELF PT_NOTE, place holder for cpu notes info. The notes info 1188 * will be populated during second kernel boot after crash. Hence 1189 * this PT_NOTE will always be the first elf note. 1190 * 1191 * NOTE: Any new ELF note addition should be placed after this note. 1192 */ 1193 phdr = (struct elf_phdr *)bufp; 1194 bufp += sizeof(struct elf_phdr); 1195 phdr->p_type = PT_NOTE; 1196 phdr->p_flags = 0; 1197 phdr->p_vaddr = 0; 1198 phdr->p_align = 0; 1199 1200 phdr->p_offset = 0; 1201 phdr->p_paddr = 0; 1202 phdr->p_filesz = 0; 1203 phdr->p_memsz = 0; 1204 1205 (elf->e_phnum)++; 1206 1207 /* setup ELF PT_NOTE for vmcoreinfo */ 1208 phdr = (struct elf_phdr *)bufp; 1209 bufp += sizeof(struct elf_phdr); 1210 phdr->p_type = PT_NOTE; 1211 phdr->p_flags = 0; 1212 phdr->p_vaddr = 0; 1213 phdr->p_align = 0; 1214 1215 phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); 1216 phdr->p_offset = phdr->p_paddr; 1217 phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE; 1218 1219 /* Increment number of program headers. */ 1220 (elf->e_phnum)++; 1221 1222 /* setup PT_LOAD sections. */ 1223 1224 for (i = 0; i < crash_mem_ranges; i++) { 1225 unsigned long long mbase, msize; 1226 mbase = crash_memory_ranges[i].base; 1227 msize = crash_memory_ranges[i].size; 1228 1229 if (!msize) 1230 continue; 1231 1232 phdr = (struct elf_phdr *)bufp; 1233 bufp += sizeof(struct elf_phdr); 1234 phdr->p_type = PT_LOAD; 1235 phdr->p_flags = PF_R|PF_W|PF_X; 1236 phdr->p_offset = mbase; 1237 1238 if (mbase == RMA_START) { 1239 /* 1240 * The entire RMA region will be moved by firmware 1241 * to the specified destination_address. Hence set 1242 * the correct offset. 1243 */ 1244 phdr->p_offset = be64_to_cpu(fdm.rmr_region.destination_address); 1245 } 1246 1247 phdr->p_paddr = mbase; 1248 phdr->p_vaddr = (unsigned long)__va(mbase); 1249 phdr->p_filesz = msize; 1250 phdr->p_memsz = msize; 1251 phdr->p_align = 0; 1252 1253 /* Increment number of program headers. */ 1254 (elf->e_phnum)++; 1255 } 1256 return 0; 1257 } 1258 1259 static unsigned long init_fadump_header(unsigned long addr) 1260 { 1261 struct fadump_crash_info_header *fdh; 1262 1263 if (!addr) 1264 return 0; 1265 1266 fw_dump.fadumphdr_addr = addr; 1267 fdh = __va(addr); 1268 addr += sizeof(struct fadump_crash_info_header); 1269 1270 memset(fdh, 0, sizeof(struct fadump_crash_info_header)); 1271 fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; 1272 fdh->elfcorehdr_addr = addr; 1273 /* We will set the crashing cpu id in crash_fadump() during crash. */ 1274 fdh->crashing_cpu = CPU_UNKNOWN; 1275 1276 return addr; 1277 } 1278 1279 static int register_fadump(void) 1280 { 1281 unsigned long addr; 1282 void *vaddr; 1283 int ret; 1284 1285 /* 1286 * If no memory is reserved then we can not register for firmware- 1287 * assisted dump. 1288 */ 1289 if (!fw_dump.reserve_dump_area_size) 1290 return -ENODEV; 1291 1292 ret = fadump_setup_crash_memory_ranges(); 1293 if (ret) 1294 return ret; 1295 1296 addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len); 1297 /* Initialize fadump crash info header. */ 1298 addr = init_fadump_header(addr); 1299 vaddr = __va(addr); 1300 1301 pr_debug("Creating ELF core headers at %#016lx\n", addr); 1302 fadump_create_elfcore_headers(vaddr); 1303 1304 /* register the future kernel dump with firmware. */ 1305 return register_fw_dump(&fdm); 1306 } 1307 1308 static int fadump_unregister_dump(struct fadump_mem_struct *fdm) 1309 { 1310 int rc = 0; 1311 unsigned int wait_time; 1312 1313 pr_debug("Un-register firmware-assisted dump\n"); 1314 1315 /* TODO: Add upper time limit for the delay */ 1316 do { 1317 rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, 1318 FADUMP_UNREGISTER, fdm, 1319 sizeof(struct fadump_mem_struct)); 1320 1321 wait_time = rtas_busy_delay_time(rc); 1322 if (wait_time) 1323 mdelay(wait_time); 1324 } while (wait_time); 1325 1326 if (rc) { 1327 printk(KERN_ERR "Failed to un-register firmware-assisted dump." 1328 " unexpected error(%d).\n", rc); 1329 return rc; 1330 } 1331 fw_dump.dump_registered = 0; 1332 return 0; 1333 } 1334 1335 static int fadump_invalidate_dump(const struct fadump_mem_struct *fdm) 1336 { 1337 int rc = 0; 1338 unsigned int wait_time; 1339 1340 pr_debug("Invalidating firmware-assisted dump registration\n"); 1341 1342 /* TODO: Add upper time limit for the delay */ 1343 do { 1344 rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, 1345 FADUMP_INVALIDATE, fdm, 1346 sizeof(struct fadump_mem_struct)); 1347 1348 wait_time = rtas_busy_delay_time(rc); 1349 if (wait_time) 1350 mdelay(wait_time); 1351 } while (wait_time); 1352 1353 if (rc) { 1354 pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc); 1355 return rc; 1356 } 1357 fw_dump.dump_active = 0; 1358 fdm_active = NULL; 1359 return 0; 1360 } 1361 1362 void fadump_cleanup(void) 1363 { 1364 /* Invalidate the registration only if dump is active. */ 1365 if (fw_dump.dump_active) { 1366 /* pass the same memory dump structure provided by platform */ 1367 fadump_invalidate_dump(fdm_active); 1368 } else if (fw_dump.dump_registered) { 1369 /* Un-register Firmware-assisted dump if it was registered. */ 1370 fadump_unregister_dump(&fdm); 1371 free_crash_memory_ranges(); 1372 } 1373 } 1374 1375 static void fadump_free_reserved_memory(unsigned long start_pfn, 1376 unsigned long end_pfn) 1377 { 1378 unsigned long pfn; 1379 unsigned long time_limit = jiffies + HZ; 1380 1381 pr_info("freeing reserved memory (0x%llx - 0x%llx)\n", 1382 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); 1383 1384 for (pfn = start_pfn; pfn < end_pfn; pfn++) { 1385 free_reserved_page(pfn_to_page(pfn)); 1386 1387 if (time_after(jiffies, time_limit)) { 1388 cond_resched(); 1389 time_limit = jiffies + HZ; 1390 } 1391 } 1392 } 1393 1394 /* 1395 * Skip memory holes and free memory that was actually reserved. 1396 */ 1397 static void fadump_release_reserved_area(unsigned long start, unsigned long end) 1398 { 1399 struct memblock_region *reg; 1400 unsigned long tstart, tend; 1401 unsigned long start_pfn = PHYS_PFN(start); 1402 unsigned long end_pfn = PHYS_PFN(end); 1403 1404 for_each_memblock(memory, reg) { 1405 tstart = max(start_pfn, memblock_region_memory_base_pfn(reg)); 1406 tend = min(end_pfn, memblock_region_memory_end_pfn(reg)); 1407 if (tstart < tend) { 1408 fadump_free_reserved_memory(tstart, tend); 1409 1410 if (tend == end_pfn) 1411 break; 1412 1413 start_pfn = tend + 1; 1414 } 1415 } 1416 } 1417 1418 /* 1419 * Release the memory that was reserved in early boot to preserve the memory 1420 * contents. The released memory will be available for general use. 1421 */ 1422 static void fadump_release_memory(unsigned long begin, unsigned long end) 1423 { 1424 unsigned long ra_start, ra_end; 1425 1426 ra_start = fw_dump.reserve_dump_area_start; 1427 ra_end = ra_start + fw_dump.reserve_dump_area_size; 1428 1429 /* 1430 * exclude the dump reserve area. Will reuse it for next 1431 * fadump registration. 1432 */ 1433 if (begin < ra_end && end > ra_start) { 1434 if (begin < ra_start) 1435 fadump_release_reserved_area(begin, ra_start); 1436 if (end > ra_end) 1437 fadump_release_reserved_area(ra_end, end); 1438 } else 1439 fadump_release_reserved_area(begin, end); 1440 } 1441 1442 static void fadump_invalidate_release_mem(void) 1443 { 1444 unsigned long reserved_area_start, reserved_area_end; 1445 unsigned long destination_address; 1446 1447 mutex_lock(&fadump_mutex); 1448 if (!fw_dump.dump_active) { 1449 mutex_unlock(&fadump_mutex); 1450 return; 1451 } 1452 1453 destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address); 1454 fadump_cleanup(); 1455 mutex_unlock(&fadump_mutex); 1456 1457 /* 1458 * Save the current reserved memory bounds we will require them 1459 * later for releasing the memory for general use. 1460 */ 1461 reserved_area_start = fw_dump.reserve_dump_area_start; 1462 reserved_area_end = reserved_area_start + 1463 fw_dump.reserve_dump_area_size; 1464 /* 1465 * Setup reserve_dump_area_start and its size so that we can 1466 * reuse this reserved memory for Re-registration. 1467 */ 1468 fw_dump.reserve_dump_area_start = destination_address; 1469 fw_dump.reserve_dump_area_size = get_fadump_area_size(); 1470 1471 fadump_release_memory(reserved_area_start, reserved_area_end); 1472 if (fw_dump.cpu_notes_buf) { 1473 fadump_cpu_notes_buf_free( 1474 (unsigned long)__va(fw_dump.cpu_notes_buf), 1475 fw_dump.cpu_notes_buf_size); 1476 fw_dump.cpu_notes_buf = 0; 1477 fw_dump.cpu_notes_buf_size = 0; 1478 } 1479 /* Initialize the kernel dump memory structure for FAD registration. */ 1480 init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); 1481 } 1482 1483 static ssize_t fadump_release_memory_store(struct kobject *kobj, 1484 struct kobj_attribute *attr, 1485 const char *buf, size_t count) 1486 { 1487 int input = -1; 1488 1489 if (!fw_dump.dump_active) 1490 return -EPERM; 1491 1492 if (kstrtoint(buf, 0, &input)) 1493 return -EINVAL; 1494 1495 if (input == 1) { 1496 /* 1497 * Take away the '/proc/vmcore'. We are releasing the dump 1498 * memory, hence it will not be valid anymore. 1499 */ 1500 #ifdef CONFIG_PROC_VMCORE 1501 vmcore_cleanup(); 1502 #endif 1503 fadump_invalidate_release_mem(); 1504 1505 } else 1506 return -EINVAL; 1507 return count; 1508 } 1509 1510 static ssize_t fadump_enabled_show(struct kobject *kobj, 1511 struct kobj_attribute *attr, 1512 char *buf) 1513 { 1514 return sprintf(buf, "%d\n", fw_dump.fadump_enabled); 1515 } 1516 1517 static ssize_t fadump_register_show(struct kobject *kobj, 1518 struct kobj_attribute *attr, 1519 char *buf) 1520 { 1521 return sprintf(buf, "%d\n", fw_dump.dump_registered); 1522 } 1523 1524 static ssize_t fadump_register_store(struct kobject *kobj, 1525 struct kobj_attribute *attr, 1526 const char *buf, size_t count) 1527 { 1528 int ret = 0; 1529 int input = -1; 1530 1531 if (!fw_dump.fadump_enabled || fdm_active) 1532 return -EPERM; 1533 1534 if (kstrtoint(buf, 0, &input)) 1535 return -EINVAL; 1536 1537 mutex_lock(&fadump_mutex); 1538 1539 switch (input) { 1540 case 0: 1541 if (fw_dump.dump_registered == 0) { 1542 goto unlock_out; 1543 } 1544 /* Un-register Firmware-assisted dump */ 1545 fadump_unregister_dump(&fdm); 1546 break; 1547 case 1: 1548 if (fw_dump.dump_registered == 1) { 1549 /* Un-register Firmware-assisted dump */ 1550 fadump_unregister_dump(&fdm); 1551 } 1552 /* Register Firmware-assisted dump */ 1553 ret = register_fadump(); 1554 break; 1555 default: 1556 ret = -EINVAL; 1557 break; 1558 } 1559 1560 unlock_out: 1561 mutex_unlock(&fadump_mutex); 1562 return ret < 0 ? ret : count; 1563 } 1564 1565 static int fadump_region_show(struct seq_file *m, void *private) 1566 { 1567 const struct fadump_mem_struct *fdm_ptr; 1568 1569 if (!fw_dump.fadump_enabled) 1570 return 0; 1571 1572 mutex_lock(&fadump_mutex); 1573 if (fdm_active) 1574 fdm_ptr = fdm_active; 1575 else { 1576 mutex_unlock(&fadump_mutex); 1577 fdm_ptr = &fdm; 1578 } 1579 1580 seq_printf(m, 1581 "CPU : [%#016llx-%#016llx] %#llx bytes, " 1582 "Dumped: %#llx\n", 1583 be64_to_cpu(fdm_ptr->cpu_state_data.destination_address), 1584 be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) + 1585 be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1, 1586 be64_to_cpu(fdm_ptr->cpu_state_data.source_len), 1587 be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped)); 1588 seq_printf(m, 1589 "HPTE: [%#016llx-%#016llx] %#llx bytes, " 1590 "Dumped: %#llx\n", 1591 be64_to_cpu(fdm_ptr->hpte_region.destination_address), 1592 be64_to_cpu(fdm_ptr->hpte_region.destination_address) + 1593 be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1, 1594 be64_to_cpu(fdm_ptr->hpte_region.source_len), 1595 be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped)); 1596 seq_printf(m, 1597 "DUMP: [%#016llx-%#016llx] %#llx bytes, " 1598 "Dumped: %#llx\n", 1599 be64_to_cpu(fdm_ptr->rmr_region.destination_address), 1600 be64_to_cpu(fdm_ptr->rmr_region.destination_address) + 1601 be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1, 1602 be64_to_cpu(fdm_ptr->rmr_region.source_len), 1603 be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped)); 1604 1605 if (!fdm_active || 1606 (fw_dump.reserve_dump_area_start == 1607 be64_to_cpu(fdm_ptr->cpu_state_data.destination_address))) 1608 goto out; 1609 1610 /* Dump is active. Show reserved memory region. */ 1611 seq_printf(m, 1612 " : [%#016llx-%#016llx] %#llx bytes, " 1613 "Dumped: %#llx\n", 1614 (unsigned long long)fw_dump.reserve_dump_area_start, 1615 be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1, 1616 be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1617 fw_dump.reserve_dump_area_start, 1618 be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1619 fw_dump.reserve_dump_area_start); 1620 out: 1621 if (fdm_active) 1622 mutex_unlock(&fadump_mutex); 1623 return 0; 1624 } 1625 1626 static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem, 1627 0200, NULL, 1628 fadump_release_memory_store); 1629 static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled, 1630 0444, fadump_enabled_show, 1631 NULL); 1632 static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered, 1633 0644, fadump_register_show, 1634 fadump_register_store); 1635 1636 DEFINE_SHOW_ATTRIBUTE(fadump_region); 1637 1638 static void fadump_init_files(void) 1639 { 1640 struct dentry *debugfs_file; 1641 int rc = 0; 1642 1643 rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr); 1644 if (rc) 1645 printk(KERN_ERR "fadump: unable to create sysfs file" 1646 " fadump_enabled (%d)\n", rc); 1647 1648 rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr); 1649 if (rc) 1650 printk(KERN_ERR "fadump: unable to create sysfs file" 1651 " fadump_registered (%d)\n", rc); 1652 1653 debugfs_file = debugfs_create_file("fadump_region", 0444, 1654 powerpc_debugfs_root, NULL, 1655 &fadump_region_fops); 1656 if (!debugfs_file) 1657 printk(KERN_ERR "fadump: unable to create debugfs file" 1658 " fadump_region\n"); 1659 1660 if (fw_dump.dump_active) { 1661 rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr); 1662 if (rc) 1663 printk(KERN_ERR "fadump: unable to create sysfs file" 1664 " fadump_release_mem (%d)\n", rc); 1665 } 1666 return; 1667 } 1668 1669 /* 1670 * Prepare for firmware-assisted dump. 1671 */ 1672 int __init setup_fadump(void) 1673 { 1674 if (!fw_dump.fadump_enabled) 1675 return 0; 1676 1677 if (!fw_dump.fadump_supported) { 1678 printk(KERN_ERR "Firmware-assisted dump is not supported on" 1679 " this hardware\n"); 1680 return 0; 1681 } 1682 1683 fadump_show_config(); 1684 /* 1685 * If dump data is available then see if it is valid and prepare for 1686 * saving it to the disk. 1687 */ 1688 if (fw_dump.dump_active) { 1689 /* 1690 * if dump process fails then invalidate the registration 1691 * and release memory before proceeding for re-registration. 1692 */ 1693 if (process_fadump(fdm_active) < 0) 1694 fadump_invalidate_release_mem(); 1695 } 1696 /* Initialize the kernel dump memory structure for FAD registration. */ 1697 else if (fw_dump.reserve_dump_area_size) 1698 init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); 1699 fadump_init_files(); 1700 1701 return 1; 1702 } 1703 subsys_initcall(setup_fadump); 1704