1 /* 2 * Firmware Assisted dump: A robust mechanism to get reliable kernel crash 3 * dump with assistance from firmware. This approach does not use kexec, 4 * instead firmware assists in booting the kdump kernel while preserving 5 * memory contents. The most of the code implementation has been adapted 6 * from phyp assisted dump implementation written by Linas Vepstas and 7 * Manish Ahuja 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of the GNU General Public License as published by 11 * the Free Software Foundation; either version 2 of the License, or 12 * (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 22 * 23 * Copyright 2011 IBM Corporation 24 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> 25 */ 26 27 #undef DEBUG 28 #define pr_fmt(fmt) "fadump: " fmt 29 30 #include <linux/string.h> 31 #include <linux/memblock.h> 32 #include <linux/delay.h> 33 #include <linux/seq_file.h> 34 #include <linux/crash_dump.h> 35 #include <linux/kobject.h> 36 #include <linux/sysfs.h> 37 38 #include <asm/debugfs.h> 39 #include <asm/page.h> 40 #include <asm/prom.h> 41 #include <asm/rtas.h> 42 #include <asm/fadump.h> 43 #include <asm/setup.h> 44 45 static struct fw_dump fw_dump; 46 static struct fadump_mem_struct fdm; 47 static const struct fadump_mem_struct *fdm_active; 48 49 static DEFINE_MUTEX(fadump_mutex); 50 struct fad_crash_memory_ranges *crash_memory_ranges; 51 int crash_memory_ranges_size; 52 int crash_mem_ranges; 53 int max_crash_mem_ranges; 54 55 /* Scan the Firmware Assisted dump configuration details. */ 56 int __init early_init_dt_scan_fw_dump(unsigned long node, 57 const char *uname, int depth, void *data) 58 { 59 const __be32 *sections; 60 int i, num_sections; 61 int size; 62 const __be32 *token; 63 64 if (depth != 1 || strcmp(uname, "rtas") != 0) 65 return 0; 66 67 /* 68 * Check if Firmware Assisted dump is supported. if yes, check 69 * if dump has been initiated on last reboot. 70 */ 71 token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL); 72 if (!token) 73 return 1; 74 75 fw_dump.fadump_supported = 1; 76 fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token); 77 78 /* 79 * The 'ibm,kernel-dump' rtas node is present only if there is 80 * dump data waiting for us. 81 */ 82 fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL); 83 if (fdm_active) 84 fw_dump.dump_active = 1; 85 86 /* Get the sizes required to store dump data for the firmware provided 87 * dump sections. 88 * For each dump section type supported, a 32bit cell which defines 89 * the ID of a supported section followed by two 32 bit cells which 90 * gives teh size of the section in bytes. 91 */ 92 sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes", 93 &size); 94 95 if (!sections) 96 return 1; 97 98 num_sections = size / (3 * sizeof(u32)); 99 100 for (i = 0; i < num_sections; i++, sections += 3) { 101 u32 type = (u32)of_read_number(sections, 1); 102 103 switch (type) { 104 case FADUMP_CPU_STATE_DATA: 105 fw_dump.cpu_state_data_size = 106 of_read_ulong(§ions[1], 2); 107 break; 108 case FADUMP_HPTE_REGION: 109 fw_dump.hpte_region_size = 110 of_read_ulong(§ions[1], 2); 111 break; 112 } 113 } 114 115 return 1; 116 } 117 118 /* 119 * If fadump is registered, check if the memory provided 120 * falls within boot memory area. 121 */ 122 int is_fadump_boot_memory_area(u64 addr, ulong size) 123 { 124 if (!fw_dump.dump_registered) 125 return 0; 126 127 return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size; 128 } 129 130 int should_fadump_crash(void) 131 { 132 if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr) 133 return 0; 134 return 1; 135 } 136 137 int is_fadump_active(void) 138 { 139 return fw_dump.dump_active; 140 } 141 142 /* 143 * Returns 1, if there are no holes in boot memory area, 144 * 0 otherwise. 145 */ 146 static int is_boot_memory_area_contiguous(void) 147 { 148 struct memblock_region *reg; 149 unsigned long tstart, tend; 150 unsigned long start_pfn = PHYS_PFN(RMA_START); 151 unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size); 152 unsigned int ret = 0; 153 154 for_each_memblock(memory, reg) { 155 tstart = max(start_pfn, memblock_region_memory_base_pfn(reg)); 156 tend = min(end_pfn, memblock_region_memory_end_pfn(reg)); 157 if (tstart < tend) { 158 /* Memory hole from start_pfn to tstart */ 159 if (tstart > start_pfn) 160 break; 161 162 if (tend == end_pfn) { 163 ret = 1; 164 break; 165 } 166 167 start_pfn = tend + 1; 168 } 169 } 170 171 return ret; 172 } 173 174 /* Print firmware assisted dump configurations for debugging purpose. */ 175 static void fadump_show_config(void) 176 { 177 pr_debug("Support for firmware-assisted dump (fadump): %s\n", 178 (fw_dump.fadump_supported ? "present" : "no support")); 179 180 if (!fw_dump.fadump_supported) 181 return; 182 183 pr_debug("Fadump enabled : %s\n", 184 (fw_dump.fadump_enabled ? "yes" : "no")); 185 pr_debug("Dump Active : %s\n", 186 (fw_dump.dump_active ? "yes" : "no")); 187 pr_debug("Dump section sizes:\n"); 188 pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size); 189 pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size); 190 pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size); 191 } 192 193 static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm, 194 unsigned long addr) 195 { 196 if (!fdm) 197 return 0; 198 199 memset(fdm, 0, sizeof(struct fadump_mem_struct)); 200 addr = addr & PAGE_MASK; 201 202 fdm->header.dump_format_version = cpu_to_be32(0x00000001); 203 fdm->header.dump_num_sections = cpu_to_be16(3); 204 fdm->header.dump_status_flag = 0; 205 fdm->header.offset_first_dump_section = 206 cpu_to_be32((u32)offsetof(struct fadump_mem_struct, cpu_state_data)); 207 208 /* 209 * Fields for disk dump option. 210 * We are not using disk dump option, hence set these fields to 0. 211 */ 212 fdm->header.dd_block_size = 0; 213 fdm->header.dd_block_offset = 0; 214 fdm->header.dd_num_blocks = 0; 215 fdm->header.dd_offset_disk_path = 0; 216 217 /* set 0 to disable an automatic dump-reboot. */ 218 fdm->header.max_time_auto = 0; 219 220 /* Kernel dump sections */ 221 /* cpu state data section. */ 222 fdm->cpu_state_data.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG); 223 fdm->cpu_state_data.source_data_type = cpu_to_be16(FADUMP_CPU_STATE_DATA); 224 fdm->cpu_state_data.source_address = 0; 225 fdm->cpu_state_data.source_len = cpu_to_be64(fw_dump.cpu_state_data_size); 226 fdm->cpu_state_data.destination_address = cpu_to_be64(addr); 227 addr += fw_dump.cpu_state_data_size; 228 229 /* hpte region section */ 230 fdm->hpte_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG); 231 fdm->hpte_region.source_data_type = cpu_to_be16(FADUMP_HPTE_REGION); 232 fdm->hpte_region.source_address = 0; 233 fdm->hpte_region.source_len = cpu_to_be64(fw_dump.hpte_region_size); 234 fdm->hpte_region.destination_address = cpu_to_be64(addr); 235 addr += fw_dump.hpte_region_size; 236 237 /* RMA region section */ 238 fdm->rmr_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG); 239 fdm->rmr_region.source_data_type = cpu_to_be16(FADUMP_REAL_MODE_REGION); 240 fdm->rmr_region.source_address = cpu_to_be64(RMA_START); 241 fdm->rmr_region.source_len = cpu_to_be64(fw_dump.boot_memory_size); 242 fdm->rmr_region.destination_address = cpu_to_be64(addr); 243 addr += fw_dump.boot_memory_size; 244 245 return addr; 246 } 247 248 /** 249 * fadump_calculate_reserve_size(): reserve variable boot area 5% of System RAM 250 * 251 * Function to find the largest memory size we need to reserve during early 252 * boot process. This will be the size of the memory that is required for a 253 * kernel to boot successfully. 254 * 255 * This function has been taken from phyp-assisted dump feature implementation. 256 * 257 * returns larger of 256MB or 5% rounded down to multiples of 256MB. 258 * 259 * TODO: Come up with better approach to find out more accurate memory size 260 * that is required for a kernel to boot successfully. 261 * 262 */ 263 static inline unsigned long fadump_calculate_reserve_size(void) 264 { 265 int ret; 266 unsigned long long base, size; 267 268 if (fw_dump.reserve_bootvar) 269 pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n"); 270 271 /* 272 * Check if the size is specified through crashkernel= cmdline 273 * option. If yes, then use that but ignore base as fadump reserves 274 * memory at a predefined offset. 275 */ 276 ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), 277 &size, &base); 278 if (ret == 0 && size > 0) { 279 unsigned long max_size; 280 281 if (fw_dump.reserve_bootvar) 282 pr_info("Using 'crashkernel=' parameter for memory reservation.\n"); 283 284 fw_dump.reserve_bootvar = (unsigned long)size; 285 286 /* 287 * Adjust if the boot memory size specified is above 288 * the upper limit. 289 */ 290 max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO; 291 if (fw_dump.reserve_bootvar > max_size) { 292 fw_dump.reserve_bootvar = max_size; 293 pr_info("Adjusted boot memory size to %luMB\n", 294 (fw_dump.reserve_bootvar >> 20)); 295 } 296 297 return fw_dump.reserve_bootvar; 298 } else if (fw_dump.reserve_bootvar) { 299 /* 300 * 'fadump_reserve_mem=' is being used to reserve memory 301 * for firmware-assisted dump. 302 */ 303 return fw_dump.reserve_bootvar; 304 } 305 306 /* divide by 20 to get 5% of value */ 307 size = memblock_phys_mem_size() / 20; 308 309 /* round it down in multiples of 256 */ 310 size = size & ~0x0FFFFFFFUL; 311 312 /* Truncate to memory_limit. We don't want to over reserve the memory.*/ 313 if (memory_limit && size > memory_limit) 314 size = memory_limit; 315 316 return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM); 317 } 318 319 /* 320 * Calculate the total memory size required to be reserved for 321 * firmware-assisted dump registration. 322 */ 323 static unsigned long get_fadump_area_size(void) 324 { 325 unsigned long size = 0; 326 327 size += fw_dump.cpu_state_data_size; 328 size += fw_dump.hpte_region_size; 329 size += fw_dump.boot_memory_size; 330 size += sizeof(struct fadump_crash_info_header); 331 size += sizeof(struct elfhdr); /* ELF core header.*/ 332 size += sizeof(struct elf_phdr); /* place holder for cpu notes */ 333 /* Program headers for crash memory regions. */ 334 size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); 335 336 size = PAGE_ALIGN(size); 337 return size; 338 } 339 340 static void __init fadump_reserve_crash_area(unsigned long base, 341 unsigned long size) 342 { 343 struct memblock_region *reg; 344 unsigned long mstart, mend, msize; 345 346 for_each_memblock(memory, reg) { 347 mstart = max_t(unsigned long, base, reg->base); 348 mend = reg->base + reg->size; 349 mend = min(base + size, mend); 350 351 if (mstart < mend) { 352 msize = mend - mstart; 353 memblock_reserve(mstart, msize); 354 pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n", 355 (msize >> 20), mstart); 356 } 357 } 358 } 359 360 int __init fadump_reserve_mem(void) 361 { 362 unsigned long base, size, memory_boundary; 363 364 if (!fw_dump.fadump_enabled) 365 return 0; 366 367 if (!fw_dump.fadump_supported) { 368 printk(KERN_INFO "Firmware-assisted dump is not supported on" 369 " this hardware\n"); 370 fw_dump.fadump_enabled = 0; 371 return 0; 372 } 373 /* 374 * Initialize boot memory size 375 * If dump is active then we have already calculated the size during 376 * first kernel. 377 */ 378 if (fdm_active) 379 fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len); 380 else 381 fw_dump.boot_memory_size = fadump_calculate_reserve_size(); 382 383 /* 384 * Calculate the memory boundary. 385 * If memory_limit is less than actual memory boundary then reserve 386 * the memory for fadump beyond the memory_limit and adjust the 387 * memory_limit accordingly, so that the running kernel can run with 388 * specified memory_limit. 389 */ 390 if (memory_limit && memory_limit < memblock_end_of_DRAM()) { 391 size = get_fadump_area_size(); 392 if ((memory_limit + size) < memblock_end_of_DRAM()) 393 memory_limit += size; 394 else 395 memory_limit = memblock_end_of_DRAM(); 396 printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" 397 " dump, now %#016llx\n", memory_limit); 398 } 399 if (memory_limit) 400 memory_boundary = memory_limit; 401 else 402 memory_boundary = memblock_end_of_DRAM(); 403 404 if (fw_dump.dump_active) { 405 pr_info("Firmware-assisted dump is active.\n"); 406 407 #ifdef CONFIG_HUGETLB_PAGE 408 /* 409 * FADump capture kernel doesn't care much about hugepages. 410 * In fact, handling hugepages in capture kernel is asking for 411 * trouble. So, disable HugeTLB support when fadump is active. 412 */ 413 hugetlb_disabled = true; 414 #endif 415 /* 416 * If last boot has crashed then reserve all the memory 417 * above boot_memory_size so that we don't touch it until 418 * dump is written to disk by userspace tool. This memory 419 * will be released for general use once the dump is saved. 420 */ 421 base = fw_dump.boot_memory_size; 422 size = memory_boundary - base; 423 fadump_reserve_crash_area(base, size); 424 425 fw_dump.fadumphdr_addr = 426 be64_to_cpu(fdm_active->rmr_region.destination_address) + 427 be64_to_cpu(fdm_active->rmr_region.source_len); 428 pr_debug("fadumphdr_addr = %p\n", 429 (void *) fw_dump.fadumphdr_addr); 430 } else { 431 size = get_fadump_area_size(); 432 433 /* 434 * Reserve memory at an offset closer to bottom of the RAM to 435 * minimize the impact of memory hot-remove operation. We can't 436 * use memblock_find_in_range() here since it doesn't allocate 437 * from bottom to top. 438 */ 439 for (base = fw_dump.boot_memory_size; 440 base <= (memory_boundary - size); 441 base += size) { 442 if (memblock_is_region_memory(base, size) && 443 !memblock_is_region_reserved(base, size)) 444 break; 445 } 446 if ((base > (memory_boundary - size)) || 447 memblock_reserve(base, size)) { 448 pr_err("Failed to reserve memory\n"); 449 return 0; 450 } 451 452 pr_info("Reserved %ldMB of memory at %ldMB for firmware-" 453 "assisted dump (System RAM: %ldMB)\n", 454 (unsigned long)(size >> 20), 455 (unsigned long)(base >> 20), 456 (unsigned long)(memblock_phys_mem_size() >> 20)); 457 } 458 459 fw_dump.reserve_dump_area_start = base; 460 fw_dump.reserve_dump_area_size = size; 461 return 1; 462 } 463 464 unsigned long __init arch_reserved_kernel_pages(void) 465 { 466 return memblock_reserved_size() / PAGE_SIZE; 467 } 468 469 /* Look for fadump= cmdline option. */ 470 static int __init early_fadump_param(char *p) 471 { 472 if (!p) 473 return 1; 474 475 if (strncmp(p, "on", 2) == 0) 476 fw_dump.fadump_enabled = 1; 477 else if (strncmp(p, "off", 3) == 0) 478 fw_dump.fadump_enabled = 0; 479 480 return 0; 481 } 482 early_param("fadump", early_fadump_param); 483 484 /* 485 * Look for fadump_reserve_mem= cmdline option 486 * TODO: Remove references to 'fadump_reserve_mem=' parameter, 487 * the sooner 'crashkernel=' parameter is accustomed to. 488 */ 489 static int __init early_fadump_reserve_mem(char *p) 490 { 491 if (p) 492 fw_dump.reserve_bootvar = memparse(p, &p); 493 return 0; 494 } 495 early_param("fadump_reserve_mem", early_fadump_reserve_mem); 496 497 static int register_fw_dump(struct fadump_mem_struct *fdm) 498 { 499 int rc, err; 500 unsigned int wait_time; 501 502 pr_debug("Registering for firmware-assisted kernel dump...\n"); 503 504 /* TODO: Add upper time limit for the delay */ 505 do { 506 rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, 507 FADUMP_REGISTER, fdm, 508 sizeof(struct fadump_mem_struct)); 509 510 wait_time = rtas_busy_delay_time(rc); 511 if (wait_time) 512 mdelay(wait_time); 513 514 } while (wait_time); 515 516 err = -EIO; 517 switch (rc) { 518 default: 519 pr_err("Failed to register. Unknown Error(%d).\n", rc); 520 break; 521 case -1: 522 printk(KERN_ERR "Failed to register firmware-assisted kernel" 523 " dump. Hardware Error(%d).\n", rc); 524 break; 525 case -3: 526 if (!is_boot_memory_area_contiguous()) 527 pr_err("Can't have holes in boot memory area while " 528 "registering fadump\n"); 529 530 printk(KERN_ERR "Failed to register firmware-assisted kernel" 531 " dump. Parameter Error(%d).\n", rc); 532 err = -EINVAL; 533 break; 534 case -9: 535 printk(KERN_ERR "firmware-assisted kernel dump is already " 536 " registered."); 537 fw_dump.dump_registered = 1; 538 err = -EEXIST; 539 break; 540 case 0: 541 printk(KERN_INFO "firmware-assisted kernel dump registration" 542 " is successful\n"); 543 fw_dump.dump_registered = 1; 544 err = 0; 545 break; 546 } 547 return err; 548 } 549 550 void crash_fadump(struct pt_regs *regs, const char *str) 551 { 552 struct fadump_crash_info_header *fdh = NULL; 553 int old_cpu, this_cpu; 554 555 if (!should_fadump_crash()) 556 return; 557 558 /* 559 * old_cpu == -1 means this is the first CPU which has come here, 560 * go ahead and trigger fadump. 561 * 562 * old_cpu != -1 means some other CPU has already on it's way 563 * to trigger fadump, just keep looping here. 564 */ 565 this_cpu = smp_processor_id(); 566 old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu); 567 568 if (old_cpu != -1) { 569 /* 570 * We can't loop here indefinitely. Wait as long as fadump 571 * is in force. If we race with fadump un-registration this 572 * loop will break and then we go down to normal panic path 573 * and reboot. If fadump is in force the first crashing 574 * cpu will definitely trigger fadump. 575 */ 576 while (fw_dump.dump_registered) 577 cpu_relax(); 578 return; 579 } 580 581 fdh = __va(fw_dump.fadumphdr_addr); 582 fdh->crashing_cpu = crashing_cpu; 583 crash_save_vmcoreinfo(); 584 585 if (regs) 586 fdh->regs = *regs; 587 else 588 ppc_save_regs(&fdh->regs); 589 590 fdh->online_mask = *cpu_online_mask; 591 592 /* Call ibm,os-term rtas call to trigger firmware assisted dump */ 593 rtas_os_term((char *)str); 594 } 595 596 #define GPR_MASK 0xffffff0000000000 597 static inline int fadump_gpr_index(u64 id) 598 { 599 int i = -1; 600 char str[3]; 601 602 if ((id & GPR_MASK) == REG_ID("GPR")) { 603 /* get the digits at the end */ 604 id &= ~GPR_MASK; 605 id >>= 24; 606 str[2] = '\0'; 607 str[1] = id & 0xff; 608 str[0] = (id >> 8) & 0xff; 609 sscanf(str, "%d", &i); 610 if (i > 31) 611 i = -1; 612 } 613 return i; 614 } 615 616 static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id, 617 u64 reg_val) 618 { 619 int i; 620 621 i = fadump_gpr_index(reg_id); 622 if (i >= 0) 623 regs->gpr[i] = (unsigned long)reg_val; 624 else if (reg_id == REG_ID("NIA")) 625 regs->nip = (unsigned long)reg_val; 626 else if (reg_id == REG_ID("MSR")) 627 regs->msr = (unsigned long)reg_val; 628 else if (reg_id == REG_ID("CTR")) 629 regs->ctr = (unsigned long)reg_val; 630 else if (reg_id == REG_ID("LR")) 631 regs->link = (unsigned long)reg_val; 632 else if (reg_id == REG_ID("XER")) 633 regs->xer = (unsigned long)reg_val; 634 else if (reg_id == REG_ID("CR")) 635 regs->ccr = (unsigned long)reg_val; 636 else if (reg_id == REG_ID("DAR")) 637 regs->dar = (unsigned long)reg_val; 638 else if (reg_id == REG_ID("DSISR")) 639 regs->dsisr = (unsigned long)reg_val; 640 } 641 642 static struct fadump_reg_entry* 643 fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs) 644 { 645 memset(regs, 0, sizeof(struct pt_regs)); 646 647 while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) { 648 fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id), 649 be64_to_cpu(reg_entry->reg_value)); 650 reg_entry++; 651 } 652 reg_entry++; 653 return reg_entry; 654 } 655 656 static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) 657 { 658 struct elf_prstatus prstatus; 659 660 memset(&prstatus, 0, sizeof(prstatus)); 661 /* 662 * FIXME: How do i get PID? Do I really need it? 663 * prstatus.pr_pid = ???? 664 */ 665 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); 666 buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, 667 &prstatus, sizeof(prstatus)); 668 return buf; 669 } 670 671 static void fadump_update_elfcore_header(char *bufp) 672 { 673 struct elfhdr *elf; 674 struct elf_phdr *phdr; 675 676 elf = (struct elfhdr *)bufp; 677 bufp += sizeof(struct elfhdr); 678 679 /* First note is a place holder for cpu notes info. */ 680 phdr = (struct elf_phdr *)bufp; 681 682 if (phdr->p_type == PT_NOTE) { 683 phdr->p_paddr = fw_dump.cpu_notes_buf; 684 phdr->p_offset = phdr->p_paddr; 685 phdr->p_filesz = fw_dump.cpu_notes_buf_size; 686 phdr->p_memsz = fw_dump.cpu_notes_buf_size; 687 } 688 return; 689 } 690 691 static void *fadump_cpu_notes_buf_alloc(unsigned long size) 692 { 693 void *vaddr; 694 struct page *page; 695 unsigned long order, count, i; 696 697 order = get_order(size); 698 vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order); 699 if (!vaddr) 700 return NULL; 701 702 count = 1 << order; 703 page = virt_to_page(vaddr); 704 for (i = 0; i < count; i++) 705 SetPageReserved(page + i); 706 return vaddr; 707 } 708 709 static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size) 710 { 711 struct page *page; 712 unsigned long order, count, i; 713 714 order = get_order(size); 715 count = 1 << order; 716 page = virt_to_page(vaddr); 717 for (i = 0; i < count; i++) 718 ClearPageReserved(page + i); 719 __free_pages(page, order); 720 } 721 722 /* 723 * Read CPU state dump data and convert it into ELF notes. 724 * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be 725 * used to access the data to allow for additional fields to be added without 726 * affecting compatibility. Each list of registers for a CPU starts with 727 * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes, 728 * 8 Byte ASCII identifier and 8 Byte register value. The register entry 729 * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part 730 * of register value. For more details refer to PAPR document. 731 * 732 * Only for the crashing cpu we ignore the CPU dump data and get exact 733 * state from fadump crash info structure populated by first kernel at the 734 * time of crash. 735 */ 736 static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm) 737 { 738 struct fadump_reg_save_area_header *reg_header; 739 struct fadump_reg_entry *reg_entry; 740 struct fadump_crash_info_header *fdh = NULL; 741 void *vaddr; 742 unsigned long addr; 743 u32 num_cpus, *note_buf; 744 struct pt_regs regs; 745 int i, rc = 0, cpu = 0; 746 747 if (!fdm->cpu_state_data.bytes_dumped) 748 return -EINVAL; 749 750 addr = be64_to_cpu(fdm->cpu_state_data.destination_address); 751 vaddr = __va(addr); 752 753 reg_header = vaddr; 754 if (be64_to_cpu(reg_header->magic_number) != REGSAVE_AREA_MAGIC) { 755 printk(KERN_ERR "Unable to read register save area.\n"); 756 return -ENOENT; 757 } 758 pr_debug("--------CPU State Data------------\n"); 759 pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number)); 760 pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset)); 761 762 vaddr += be32_to_cpu(reg_header->num_cpu_offset); 763 num_cpus = be32_to_cpu(*((__be32 *)(vaddr))); 764 pr_debug("NumCpus : %u\n", num_cpus); 765 vaddr += sizeof(u32); 766 reg_entry = (struct fadump_reg_entry *)vaddr; 767 768 /* Allocate buffer to hold cpu crash notes. */ 769 fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t); 770 fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size); 771 note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size); 772 if (!note_buf) { 773 printk(KERN_ERR "Failed to allocate 0x%lx bytes for " 774 "cpu notes buffer\n", fw_dump.cpu_notes_buf_size); 775 return -ENOMEM; 776 } 777 fw_dump.cpu_notes_buf = __pa(note_buf); 778 779 pr_debug("Allocated buffer for cpu notes of size %ld at %p\n", 780 (num_cpus * sizeof(note_buf_t)), note_buf); 781 782 if (fw_dump.fadumphdr_addr) 783 fdh = __va(fw_dump.fadumphdr_addr); 784 785 for (i = 0; i < num_cpus; i++) { 786 if (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUSTRT")) { 787 printk(KERN_ERR "Unable to read CPU state data\n"); 788 rc = -ENOENT; 789 goto error_out; 790 } 791 /* Lower 4 bytes of reg_value contains logical cpu id */ 792 cpu = be64_to_cpu(reg_entry->reg_value) & FADUMP_CPU_ID_MASK; 793 if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) { 794 SKIP_TO_NEXT_CPU(reg_entry); 795 continue; 796 } 797 pr_debug("Reading register data for cpu %d...\n", cpu); 798 if (fdh && fdh->crashing_cpu == cpu) { 799 regs = fdh->regs; 800 note_buf = fadump_regs_to_elf_notes(note_buf, ®s); 801 SKIP_TO_NEXT_CPU(reg_entry); 802 } else { 803 reg_entry++; 804 reg_entry = fadump_read_registers(reg_entry, ®s); 805 note_buf = fadump_regs_to_elf_notes(note_buf, ®s); 806 } 807 } 808 final_note(note_buf); 809 810 if (fdh) { 811 pr_debug("Updating elfcore header (%llx) with cpu notes\n", 812 fdh->elfcorehdr_addr); 813 fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr)); 814 } 815 return 0; 816 817 error_out: 818 fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf), 819 fw_dump.cpu_notes_buf_size); 820 fw_dump.cpu_notes_buf = 0; 821 fw_dump.cpu_notes_buf_size = 0; 822 return rc; 823 824 } 825 826 /* 827 * Validate and process the dump data stored by firmware before exporting 828 * it through '/proc/vmcore'. 829 */ 830 static int __init process_fadump(const struct fadump_mem_struct *fdm_active) 831 { 832 struct fadump_crash_info_header *fdh; 833 int rc = 0; 834 835 if (!fdm_active || !fw_dump.fadumphdr_addr) 836 return -EINVAL; 837 838 /* Check if the dump data is valid. */ 839 if ((be16_to_cpu(fdm_active->header.dump_status_flag) == FADUMP_ERROR_FLAG) || 840 (fdm_active->cpu_state_data.error_flags != 0) || 841 (fdm_active->rmr_region.error_flags != 0)) { 842 printk(KERN_ERR "Dump taken by platform is not valid\n"); 843 return -EINVAL; 844 } 845 if ((fdm_active->rmr_region.bytes_dumped != 846 fdm_active->rmr_region.source_len) || 847 !fdm_active->cpu_state_data.bytes_dumped) { 848 printk(KERN_ERR "Dump taken by platform is incomplete\n"); 849 return -EINVAL; 850 } 851 852 /* Validate the fadump crash info header */ 853 fdh = __va(fw_dump.fadumphdr_addr); 854 if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { 855 printk(KERN_ERR "Crash info header is not valid.\n"); 856 return -EINVAL; 857 } 858 859 rc = fadump_build_cpu_notes(fdm_active); 860 if (rc) 861 return rc; 862 863 /* 864 * We are done validating dump info and elfcore header is now ready 865 * to be exported. set elfcorehdr_addr so that vmcore module will 866 * export the elfcore header through '/proc/vmcore'. 867 */ 868 elfcorehdr_addr = fdh->elfcorehdr_addr; 869 870 return 0; 871 } 872 873 static void free_crash_memory_ranges(void) 874 { 875 kfree(crash_memory_ranges); 876 crash_memory_ranges = NULL; 877 crash_memory_ranges_size = 0; 878 max_crash_mem_ranges = 0; 879 } 880 881 /* 882 * Allocate or reallocate crash memory ranges array in incremental units 883 * of PAGE_SIZE. 884 */ 885 static int allocate_crash_memory_ranges(void) 886 { 887 struct fad_crash_memory_ranges *new_array; 888 u64 new_size; 889 890 new_size = crash_memory_ranges_size + PAGE_SIZE; 891 pr_debug("Allocating %llu bytes of memory for crash memory ranges\n", 892 new_size); 893 894 new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL); 895 if (new_array == NULL) { 896 pr_err("Insufficient memory for setting up crash memory ranges\n"); 897 free_crash_memory_ranges(); 898 return -ENOMEM; 899 } 900 901 crash_memory_ranges = new_array; 902 crash_memory_ranges_size = new_size; 903 max_crash_mem_ranges = (new_size / 904 sizeof(struct fad_crash_memory_ranges)); 905 return 0; 906 } 907 908 static inline int fadump_add_crash_memory(unsigned long long base, 909 unsigned long long end) 910 { 911 u64 start, size; 912 bool is_adjacent = false; 913 914 if (base == end) 915 return 0; 916 917 /* 918 * Fold adjacent memory ranges to bring down the memory ranges/ 919 * PT_LOAD segments count. 920 */ 921 if (crash_mem_ranges) { 922 start = crash_memory_ranges[crash_mem_ranges - 1].base; 923 size = crash_memory_ranges[crash_mem_ranges - 1].size; 924 925 if ((start + size) == base) 926 is_adjacent = true; 927 } 928 if (!is_adjacent) { 929 /* resize the array on reaching the limit */ 930 if (crash_mem_ranges == max_crash_mem_ranges) { 931 int ret; 932 933 ret = allocate_crash_memory_ranges(); 934 if (ret) 935 return ret; 936 } 937 938 start = base; 939 crash_memory_ranges[crash_mem_ranges].base = start; 940 crash_mem_ranges++; 941 } 942 943 crash_memory_ranges[crash_mem_ranges - 1].size = (end - start); 944 pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n", 945 (crash_mem_ranges - 1), start, end - 1, (end - start)); 946 return 0; 947 } 948 949 static int fadump_exclude_reserved_area(unsigned long long start, 950 unsigned long long end) 951 { 952 unsigned long long ra_start, ra_end; 953 int ret = 0; 954 955 ra_start = fw_dump.reserve_dump_area_start; 956 ra_end = ra_start + fw_dump.reserve_dump_area_size; 957 958 if ((ra_start < end) && (ra_end > start)) { 959 if ((start < ra_start) && (end > ra_end)) { 960 ret = fadump_add_crash_memory(start, ra_start); 961 if (ret) 962 return ret; 963 964 ret = fadump_add_crash_memory(ra_end, end); 965 } else if (start < ra_start) { 966 ret = fadump_add_crash_memory(start, ra_start); 967 } else if (ra_end < end) { 968 ret = fadump_add_crash_memory(ra_end, end); 969 } 970 } else 971 ret = fadump_add_crash_memory(start, end); 972 973 return ret; 974 } 975 976 static int fadump_init_elfcore_header(char *bufp) 977 { 978 struct elfhdr *elf; 979 980 elf = (struct elfhdr *) bufp; 981 bufp += sizeof(struct elfhdr); 982 memcpy(elf->e_ident, ELFMAG, SELFMAG); 983 elf->e_ident[EI_CLASS] = ELF_CLASS; 984 elf->e_ident[EI_DATA] = ELF_DATA; 985 elf->e_ident[EI_VERSION] = EV_CURRENT; 986 elf->e_ident[EI_OSABI] = ELF_OSABI; 987 memset(elf->e_ident+EI_PAD, 0, EI_NIDENT-EI_PAD); 988 elf->e_type = ET_CORE; 989 elf->e_machine = ELF_ARCH; 990 elf->e_version = EV_CURRENT; 991 elf->e_entry = 0; 992 elf->e_phoff = sizeof(struct elfhdr); 993 elf->e_shoff = 0; 994 #if defined(_CALL_ELF) 995 elf->e_flags = _CALL_ELF; 996 #else 997 elf->e_flags = 0; 998 #endif 999 elf->e_ehsize = sizeof(struct elfhdr); 1000 elf->e_phentsize = sizeof(struct elf_phdr); 1001 elf->e_phnum = 0; 1002 elf->e_shentsize = 0; 1003 elf->e_shnum = 0; 1004 elf->e_shstrndx = 0; 1005 1006 return 0; 1007 } 1008 1009 /* 1010 * Traverse through memblock structure and setup crash memory ranges. These 1011 * ranges will be used create PT_LOAD program headers in elfcore header. 1012 */ 1013 static int fadump_setup_crash_memory_ranges(void) 1014 { 1015 struct memblock_region *reg; 1016 unsigned long long start, end; 1017 int ret; 1018 1019 pr_debug("Setup crash memory ranges.\n"); 1020 crash_mem_ranges = 0; 1021 1022 /* allocate memory for crash memory ranges for the first time */ 1023 if (!max_crash_mem_ranges) { 1024 ret = allocate_crash_memory_ranges(); 1025 if (ret) 1026 return ret; 1027 } 1028 1029 /* 1030 * add the first memory chunk (RMA_START through boot_memory_size) as 1031 * a separate memory chunk. The reason is, at the time crash firmware 1032 * will move the content of this memory chunk to different location 1033 * specified during fadump registration. We need to create a separate 1034 * program header for this chunk with the correct offset. 1035 */ 1036 ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size); 1037 if (ret) 1038 return ret; 1039 1040 for_each_memblock(memory, reg) { 1041 start = (unsigned long long)reg->base; 1042 end = start + (unsigned long long)reg->size; 1043 1044 /* 1045 * skip the first memory chunk that is already added (RMA_START 1046 * through boot_memory_size). This logic needs a relook if and 1047 * when RMA_START changes to a non-zero value. 1048 */ 1049 BUILD_BUG_ON(RMA_START != 0); 1050 if (start < fw_dump.boot_memory_size) { 1051 if (end > fw_dump.boot_memory_size) 1052 start = fw_dump.boot_memory_size; 1053 else 1054 continue; 1055 } 1056 1057 /* add this range excluding the reserved dump area. */ 1058 ret = fadump_exclude_reserved_area(start, end); 1059 if (ret) 1060 return ret; 1061 } 1062 1063 return 0; 1064 } 1065 1066 /* 1067 * If the given physical address falls within the boot memory region then 1068 * return the relocated address that points to the dump region reserved 1069 * for saving initial boot memory contents. 1070 */ 1071 static inline unsigned long fadump_relocate(unsigned long paddr) 1072 { 1073 if (paddr > RMA_START && paddr < fw_dump.boot_memory_size) 1074 return be64_to_cpu(fdm.rmr_region.destination_address) + paddr; 1075 else 1076 return paddr; 1077 } 1078 1079 static int fadump_create_elfcore_headers(char *bufp) 1080 { 1081 struct elfhdr *elf; 1082 struct elf_phdr *phdr; 1083 int i; 1084 1085 fadump_init_elfcore_header(bufp); 1086 elf = (struct elfhdr *)bufp; 1087 bufp += sizeof(struct elfhdr); 1088 1089 /* 1090 * setup ELF PT_NOTE, place holder for cpu notes info. The notes info 1091 * will be populated during second kernel boot after crash. Hence 1092 * this PT_NOTE will always be the first elf note. 1093 * 1094 * NOTE: Any new ELF note addition should be placed after this note. 1095 */ 1096 phdr = (struct elf_phdr *)bufp; 1097 bufp += sizeof(struct elf_phdr); 1098 phdr->p_type = PT_NOTE; 1099 phdr->p_flags = 0; 1100 phdr->p_vaddr = 0; 1101 phdr->p_align = 0; 1102 1103 phdr->p_offset = 0; 1104 phdr->p_paddr = 0; 1105 phdr->p_filesz = 0; 1106 phdr->p_memsz = 0; 1107 1108 (elf->e_phnum)++; 1109 1110 /* setup ELF PT_NOTE for vmcoreinfo */ 1111 phdr = (struct elf_phdr *)bufp; 1112 bufp += sizeof(struct elf_phdr); 1113 phdr->p_type = PT_NOTE; 1114 phdr->p_flags = 0; 1115 phdr->p_vaddr = 0; 1116 phdr->p_align = 0; 1117 1118 phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); 1119 phdr->p_offset = phdr->p_paddr; 1120 phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE; 1121 1122 /* Increment number of program headers. */ 1123 (elf->e_phnum)++; 1124 1125 /* setup PT_LOAD sections. */ 1126 1127 for (i = 0; i < crash_mem_ranges; i++) { 1128 unsigned long long mbase, msize; 1129 mbase = crash_memory_ranges[i].base; 1130 msize = crash_memory_ranges[i].size; 1131 1132 if (!msize) 1133 continue; 1134 1135 phdr = (struct elf_phdr *)bufp; 1136 bufp += sizeof(struct elf_phdr); 1137 phdr->p_type = PT_LOAD; 1138 phdr->p_flags = PF_R|PF_W|PF_X; 1139 phdr->p_offset = mbase; 1140 1141 if (mbase == RMA_START) { 1142 /* 1143 * The entire RMA region will be moved by firmware 1144 * to the specified destination_address. Hence set 1145 * the correct offset. 1146 */ 1147 phdr->p_offset = be64_to_cpu(fdm.rmr_region.destination_address); 1148 } 1149 1150 phdr->p_paddr = mbase; 1151 phdr->p_vaddr = (unsigned long)__va(mbase); 1152 phdr->p_filesz = msize; 1153 phdr->p_memsz = msize; 1154 phdr->p_align = 0; 1155 1156 /* Increment number of program headers. */ 1157 (elf->e_phnum)++; 1158 } 1159 return 0; 1160 } 1161 1162 static unsigned long init_fadump_header(unsigned long addr) 1163 { 1164 struct fadump_crash_info_header *fdh; 1165 1166 if (!addr) 1167 return 0; 1168 1169 fw_dump.fadumphdr_addr = addr; 1170 fdh = __va(addr); 1171 addr += sizeof(struct fadump_crash_info_header); 1172 1173 memset(fdh, 0, sizeof(struct fadump_crash_info_header)); 1174 fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; 1175 fdh->elfcorehdr_addr = addr; 1176 /* We will set the crashing cpu id in crash_fadump() during crash. */ 1177 fdh->crashing_cpu = CPU_UNKNOWN; 1178 1179 return addr; 1180 } 1181 1182 static int register_fadump(void) 1183 { 1184 unsigned long addr; 1185 void *vaddr; 1186 int ret; 1187 1188 /* 1189 * If no memory is reserved then we can not register for firmware- 1190 * assisted dump. 1191 */ 1192 if (!fw_dump.reserve_dump_area_size) 1193 return -ENODEV; 1194 1195 ret = fadump_setup_crash_memory_ranges(); 1196 if (ret) 1197 return ret; 1198 1199 addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len); 1200 /* Initialize fadump crash info header. */ 1201 addr = init_fadump_header(addr); 1202 vaddr = __va(addr); 1203 1204 pr_debug("Creating ELF core headers at %#016lx\n", addr); 1205 fadump_create_elfcore_headers(vaddr); 1206 1207 /* register the future kernel dump with firmware. */ 1208 return register_fw_dump(&fdm); 1209 } 1210 1211 static int fadump_unregister_dump(struct fadump_mem_struct *fdm) 1212 { 1213 int rc = 0; 1214 unsigned int wait_time; 1215 1216 pr_debug("Un-register firmware-assisted dump\n"); 1217 1218 /* TODO: Add upper time limit for the delay */ 1219 do { 1220 rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, 1221 FADUMP_UNREGISTER, fdm, 1222 sizeof(struct fadump_mem_struct)); 1223 1224 wait_time = rtas_busy_delay_time(rc); 1225 if (wait_time) 1226 mdelay(wait_time); 1227 } while (wait_time); 1228 1229 if (rc) { 1230 printk(KERN_ERR "Failed to un-register firmware-assisted dump." 1231 " unexpected error(%d).\n", rc); 1232 return rc; 1233 } 1234 fw_dump.dump_registered = 0; 1235 return 0; 1236 } 1237 1238 static int fadump_invalidate_dump(struct fadump_mem_struct *fdm) 1239 { 1240 int rc = 0; 1241 unsigned int wait_time; 1242 1243 pr_debug("Invalidating firmware-assisted dump registration\n"); 1244 1245 /* TODO: Add upper time limit for the delay */ 1246 do { 1247 rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, 1248 FADUMP_INVALIDATE, fdm, 1249 sizeof(struct fadump_mem_struct)); 1250 1251 wait_time = rtas_busy_delay_time(rc); 1252 if (wait_time) 1253 mdelay(wait_time); 1254 } while (wait_time); 1255 1256 if (rc) { 1257 pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc); 1258 return rc; 1259 } 1260 fw_dump.dump_active = 0; 1261 fdm_active = NULL; 1262 return 0; 1263 } 1264 1265 void fadump_cleanup(void) 1266 { 1267 /* Invalidate the registration only if dump is active. */ 1268 if (fw_dump.dump_active) { 1269 init_fadump_mem_struct(&fdm, 1270 be64_to_cpu(fdm_active->cpu_state_data.destination_address)); 1271 fadump_invalidate_dump(&fdm); 1272 } else if (fw_dump.dump_registered) { 1273 /* Un-register Firmware-assisted dump if it was registered. */ 1274 fadump_unregister_dump(&fdm); 1275 free_crash_memory_ranges(); 1276 } 1277 } 1278 1279 static void fadump_free_reserved_memory(unsigned long start_pfn, 1280 unsigned long end_pfn) 1281 { 1282 unsigned long pfn; 1283 unsigned long time_limit = jiffies + HZ; 1284 1285 pr_info("freeing reserved memory (0x%llx - 0x%llx)\n", 1286 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn)); 1287 1288 for (pfn = start_pfn; pfn < end_pfn; pfn++) { 1289 free_reserved_page(pfn_to_page(pfn)); 1290 1291 if (time_after(jiffies, time_limit)) { 1292 cond_resched(); 1293 time_limit = jiffies + HZ; 1294 } 1295 } 1296 } 1297 1298 /* 1299 * Skip memory holes and free memory that was actually reserved. 1300 */ 1301 static void fadump_release_reserved_area(unsigned long start, unsigned long end) 1302 { 1303 struct memblock_region *reg; 1304 unsigned long tstart, tend; 1305 unsigned long start_pfn = PHYS_PFN(start); 1306 unsigned long end_pfn = PHYS_PFN(end); 1307 1308 for_each_memblock(memory, reg) { 1309 tstart = max(start_pfn, memblock_region_memory_base_pfn(reg)); 1310 tend = min(end_pfn, memblock_region_memory_end_pfn(reg)); 1311 if (tstart < tend) { 1312 fadump_free_reserved_memory(tstart, tend); 1313 1314 if (tend == end_pfn) 1315 break; 1316 1317 start_pfn = tend + 1; 1318 } 1319 } 1320 } 1321 1322 /* 1323 * Release the memory that was reserved in early boot to preserve the memory 1324 * contents. The released memory will be available for general use. 1325 */ 1326 static void fadump_release_memory(unsigned long begin, unsigned long end) 1327 { 1328 unsigned long ra_start, ra_end; 1329 1330 ra_start = fw_dump.reserve_dump_area_start; 1331 ra_end = ra_start + fw_dump.reserve_dump_area_size; 1332 1333 /* 1334 * exclude the dump reserve area. Will reuse it for next 1335 * fadump registration. 1336 */ 1337 if (begin < ra_end && end > ra_start) { 1338 if (begin < ra_start) 1339 fadump_release_reserved_area(begin, ra_start); 1340 if (end > ra_end) 1341 fadump_release_reserved_area(ra_end, end); 1342 } else 1343 fadump_release_reserved_area(begin, end); 1344 } 1345 1346 static void fadump_invalidate_release_mem(void) 1347 { 1348 unsigned long reserved_area_start, reserved_area_end; 1349 unsigned long destination_address; 1350 1351 mutex_lock(&fadump_mutex); 1352 if (!fw_dump.dump_active) { 1353 mutex_unlock(&fadump_mutex); 1354 return; 1355 } 1356 1357 destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address); 1358 fadump_cleanup(); 1359 mutex_unlock(&fadump_mutex); 1360 1361 /* 1362 * Save the current reserved memory bounds we will require them 1363 * later for releasing the memory for general use. 1364 */ 1365 reserved_area_start = fw_dump.reserve_dump_area_start; 1366 reserved_area_end = reserved_area_start + 1367 fw_dump.reserve_dump_area_size; 1368 /* 1369 * Setup reserve_dump_area_start and its size so that we can 1370 * reuse this reserved memory for Re-registration. 1371 */ 1372 fw_dump.reserve_dump_area_start = destination_address; 1373 fw_dump.reserve_dump_area_size = get_fadump_area_size(); 1374 1375 fadump_release_memory(reserved_area_start, reserved_area_end); 1376 if (fw_dump.cpu_notes_buf) { 1377 fadump_cpu_notes_buf_free( 1378 (unsigned long)__va(fw_dump.cpu_notes_buf), 1379 fw_dump.cpu_notes_buf_size); 1380 fw_dump.cpu_notes_buf = 0; 1381 fw_dump.cpu_notes_buf_size = 0; 1382 } 1383 /* Initialize the kernel dump memory structure for FAD registration. */ 1384 init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); 1385 } 1386 1387 static ssize_t fadump_release_memory_store(struct kobject *kobj, 1388 struct kobj_attribute *attr, 1389 const char *buf, size_t count) 1390 { 1391 int input = -1; 1392 1393 if (!fw_dump.dump_active) 1394 return -EPERM; 1395 1396 if (kstrtoint(buf, 0, &input)) 1397 return -EINVAL; 1398 1399 if (input == 1) { 1400 /* 1401 * Take away the '/proc/vmcore'. We are releasing the dump 1402 * memory, hence it will not be valid anymore. 1403 */ 1404 #ifdef CONFIG_PROC_VMCORE 1405 vmcore_cleanup(); 1406 #endif 1407 fadump_invalidate_release_mem(); 1408 1409 } else 1410 return -EINVAL; 1411 return count; 1412 } 1413 1414 static ssize_t fadump_enabled_show(struct kobject *kobj, 1415 struct kobj_attribute *attr, 1416 char *buf) 1417 { 1418 return sprintf(buf, "%d\n", fw_dump.fadump_enabled); 1419 } 1420 1421 static ssize_t fadump_register_show(struct kobject *kobj, 1422 struct kobj_attribute *attr, 1423 char *buf) 1424 { 1425 return sprintf(buf, "%d\n", fw_dump.dump_registered); 1426 } 1427 1428 static ssize_t fadump_register_store(struct kobject *kobj, 1429 struct kobj_attribute *attr, 1430 const char *buf, size_t count) 1431 { 1432 int ret = 0; 1433 int input = -1; 1434 1435 if (!fw_dump.fadump_enabled || fdm_active) 1436 return -EPERM; 1437 1438 if (kstrtoint(buf, 0, &input)) 1439 return -EINVAL; 1440 1441 mutex_lock(&fadump_mutex); 1442 1443 switch (input) { 1444 case 0: 1445 if (fw_dump.dump_registered == 0) { 1446 goto unlock_out; 1447 } 1448 /* Un-register Firmware-assisted dump */ 1449 fadump_unregister_dump(&fdm); 1450 break; 1451 case 1: 1452 if (fw_dump.dump_registered == 1) { 1453 ret = -EEXIST; 1454 goto unlock_out; 1455 } 1456 /* Register Firmware-assisted dump */ 1457 ret = register_fadump(); 1458 break; 1459 default: 1460 ret = -EINVAL; 1461 break; 1462 } 1463 1464 unlock_out: 1465 mutex_unlock(&fadump_mutex); 1466 return ret < 0 ? ret : count; 1467 } 1468 1469 static int fadump_region_show(struct seq_file *m, void *private) 1470 { 1471 const struct fadump_mem_struct *fdm_ptr; 1472 1473 if (!fw_dump.fadump_enabled) 1474 return 0; 1475 1476 mutex_lock(&fadump_mutex); 1477 if (fdm_active) 1478 fdm_ptr = fdm_active; 1479 else { 1480 mutex_unlock(&fadump_mutex); 1481 fdm_ptr = &fdm; 1482 } 1483 1484 seq_printf(m, 1485 "CPU : [%#016llx-%#016llx] %#llx bytes, " 1486 "Dumped: %#llx\n", 1487 be64_to_cpu(fdm_ptr->cpu_state_data.destination_address), 1488 be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) + 1489 be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1, 1490 be64_to_cpu(fdm_ptr->cpu_state_data.source_len), 1491 be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped)); 1492 seq_printf(m, 1493 "HPTE: [%#016llx-%#016llx] %#llx bytes, " 1494 "Dumped: %#llx\n", 1495 be64_to_cpu(fdm_ptr->hpte_region.destination_address), 1496 be64_to_cpu(fdm_ptr->hpte_region.destination_address) + 1497 be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1, 1498 be64_to_cpu(fdm_ptr->hpte_region.source_len), 1499 be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped)); 1500 seq_printf(m, 1501 "DUMP: [%#016llx-%#016llx] %#llx bytes, " 1502 "Dumped: %#llx\n", 1503 be64_to_cpu(fdm_ptr->rmr_region.destination_address), 1504 be64_to_cpu(fdm_ptr->rmr_region.destination_address) + 1505 be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1, 1506 be64_to_cpu(fdm_ptr->rmr_region.source_len), 1507 be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped)); 1508 1509 if (!fdm_active || 1510 (fw_dump.reserve_dump_area_start == 1511 be64_to_cpu(fdm_ptr->cpu_state_data.destination_address))) 1512 goto out; 1513 1514 /* Dump is active. Show reserved memory region. */ 1515 seq_printf(m, 1516 " : [%#016llx-%#016llx] %#llx bytes, " 1517 "Dumped: %#llx\n", 1518 (unsigned long long)fw_dump.reserve_dump_area_start, 1519 be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1, 1520 be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1521 fw_dump.reserve_dump_area_start, 1522 be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1523 fw_dump.reserve_dump_area_start); 1524 out: 1525 if (fdm_active) 1526 mutex_unlock(&fadump_mutex); 1527 return 0; 1528 } 1529 1530 static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem, 1531 0200, NULL, 1532 fadump_release_memory_store); 1533 static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled, 1534 0444, fadump_enabled_show, 1535 NULL); 1536 static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered, 1537 0644, fadump_register_show, 1538 fadump_register_store); 1539 1540 static int fadump_region_open(struct inode *inode, struct file *file) 1541 { 1542 return single_open(file, fadump_region_show, inode->i_private); 1543 } 1544 1545 static const struct file_operations fadump_region_fops = { 1546 .open = fadump_region_open, 1547 .read = seq_read, 1548 .llseek = seq_lseek, 1549 .release = single_release, 1550 }; 1551 1552 static void fadump_init_files(void) 1553 { 1554 struct dentry *debugfs_file; 1555 int rc = 0; 1556 1557 rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr); 1558 if (rc) 1559 printk(KERN_ERR "fadump: unable to create sysfs file" 1560 " fadump_enabled (%d)\n", rc); 1561 1562 rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr); 1563 if (rc) 1564 printk(KERN_ERR "fadump: unable to create sysfs file" 1565 " fadump_registered (%d)\n", rc); 1566 1567 debugfs_file = debugfs_create_file("fadump_region", 0444, 1568 powerpc_debugfs_root, NULL, 1569 &fadump_region_fops); 1570 if (!debugfs_file) 1571 printk(KERN_ERR "fadump: unable to create debugfs file" 1572 " fadump_region\n"); 1573 1574 if (fw_dump.dump_active) { 1575 rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr); 1576 if (rc) 1577 printk(KERN_ERR "fadump: unable to create sysfs file" 1578 " fadump_release_mem (%d)\n", rc); 1579 } 1580 return; 1581 } 1582 1583 /* 1584 * Prepare for firmware-assisted dump. 1585 */ 1586 int __init setup_fadump(void) 1587 { 1588 if (!fw_dump.fadump_enabled) 1589 return 0; 1590 1591 if (!fw_dump.fadump_supported) { 1592 printk(KERN_ERR "Firmware-assisted dump is not supported on" 1593 " this hardware\n"); 1594 return 0; 1595 } 1596 1597 fadump_show_config(); 1598 /* 1599 * If dump data is available then see if it is valid and prepare for 1600 * saving it to the disk. 1601 */ 1602 if (fw_dump.dump_active) { 1603 /* 1604 * if dump process fails then invalidate the registration 1605 * and release memory before proceeding for re-registration. 1606 */ 1607 if (process_fadump(fdm_active) < 0) 1608 fadump_invalidate_release_mem(); 1609 } 1610 /* Initialize the kernel dump memory structure for FAD registration. */ 1611 else if (fw_dump.reserve_dump_area_size) 1612 init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start); 1613 fadump_init_files(); 1614 1615 return 1; 1616 } 1617 subsys_initcall(setup_fadump); 1618