1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Architecture specific (PPC64) functions for kexec based crash dumps. 4 * 5 * Copyright (C) 2005, IBM Corp. 6 * 7 * Created by: Haren Myneni 8 */ 9 10 #include <linux/kernel.h> 11 #include <linux/smp.h> 12 #include <linux/reboot.h> 13 #include <linux/kexec.h> 14 #include <linux/export.h> 15 #include <linux/crash_dump.h> 16 #include <linux/delay.h> 17 #include <linux/irq.h> 18 #include <linux/types.h> 19 #include <linux/libfdt.h> 20 #include <linux/memory.h> 21 22 #include <asm/processor.h> 23 #include <asm/machdep.h> 24 #include <asm/kexec.h> 25 #include <asm/smp.h> 26 #include <asm/setjmp.h> 27 #include <asm/debug.h> 28 #include <asm/interrupt.h> 29 #include <asm/kexec_ranges.h> 30 31 /* 32 * The primary CPU waits a while for all secondary CPUs to enter. This is to 33 * avoid sending an IPI if the secondary CPUs are entering 34 * crash_kexec_secondary on their own (eg via a system reset). 35 * 36 * The secondary timeout has to be longer than the primary. Both timeouts are 37 * in milliseconds. 38 */ 39 #define PRIMARY_TIMEOUT 500 40 #define SECONDARY_TIMEOUT 1000 41 42 #define IPI_TIMEOUT 10000 43 #define REAL_MODE_TIMEOUT 10000 44 45 static int time_to_dump; 46 47 /* 48 * In case of system reset, secondary CPUs enter crash_kexec_secondary with out 49 * having to send an IPI explicitly. So, indicate if the crash is via 50 * system reset to avoid sending another IPI. 51 */ 52 static int is_via_system_reset; 53 54 /* 55 * crash_wake_offline should be set to 1 by platforms that intend to wake 56 * up offline cpus prior to jumping to a kdump kernel. Currently powernv 57 * sets it to 1, since we want to avoid things from happening when an 58 * offline CPU wakes up due to something like an HMI (malfunction error), 59 * which propagates to all threads. 60 */ 61 int crash_wake_offline; 62 63 #define CRASH_HANDLER_MAX 3 64 /* List of shutdown handles */ 65 static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX]; 66 static DEFINE_SPINLOCK(crash_handlers_lock); 67 68 static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; 69 static int crash_shutdown_cpu = -1; 70 71 static int handle_fault(struct pt_regs *regs) 72 { 73 if (crash_shutdown_cpu == smp_processor_id()) 74 longjmp(crash_shutdown_buf, 1); 75 return 0; 76 } 77 78 #ifdef CONFIG_SMP 79 80 static atomic_t cpus_in_crash; 81 void crash_ipi_callback(struct pt_regs *regs) 82 { 83 static cpumask_t cpus_state_saved = CPU_MASK_NONE; 84 85 int cpu = smp_processor_id(); 86 87 hard_irq_disable(); 88 if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { 89 crash_save_cpu(regs, cpu); 90 cpumask_set_cpu(cpu, &cpus_state_saved); 91 } 92 93 atomic_inc(&cpus_in_crash); 94 smp_mb__after_atomic(); 95 96 /* 97 * Starting the kdump boot. 98 * This barrier is needed to make sure that all CPUs are stopped. 99 */ 100 while (!time_to_dump) 101 cpu_relax(); 102 103 if (ppc_md.kexec_cpu_down) 104 ppc_md.kexec_cpu_down(1, 1); 105 106 #ifdef CONFIG_PPC64 107 kexec_smp_wait(); 108 #else 109 for (;;); /* FIXME */ 110 #endif 111 112 /* NOTREACHED */ 113 } 114 115 static void crash_kexec_prepare_cpus(void) 116 { 117 unsigned int msecs; 118 volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ 119 volatile int tries = 0; 120 int (*old_handler)(struct pt_regs *regs); 121 122 printk(KERN_EMERG "Sending IPI to other CPUs\n"); 123 124 if (crash_wake_offline) 125 ncpus = num_present_cpus() - 1; 126 127 /* 128 * If we came in via system reset, secondaries enter via crash_kexec_secondary(). 129 * So, wait a while for the secondary CPUs to enter for that case. 130 * Else, send IPI to all other CPUs. 131 */ 132 if (is_via_system_reset) 133 mdelay(PRIMARY_TIMEOUT); 134 else 135 crash_send_ipi(crash_ipi_callback); 136 smp_wmb(); 137 138 again: 139 /* 140 * FIXME: Until we will have the way to stop other CPUs reliably, 141 * the crash CPU will send an IPI and wait for other CPUs to 142 * respond. 143 */ 144 msecs = IPI_TIMEOUT; 145 while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) 146 mdelay(1); 147 148 /* Would it be better to replace the trap vector here? */ 149 150 if (atomic_read(&cpus_in_crash) >= ncpus) { 151 printk(KERN_EMERG "IPI complete\n"); 152 return; 153 } 154 155 printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", 156 ncpus - atomic_read(&cpus_in_crash)); 157 158 /* 159 * If we have a panic timeout set then we can't wait indefinitely 160 * for someone to activate system reset. We also give up on the 161 * second time through if system reset fail to work. 162 */ 163 if ((panic_timeout > 0) || (tries > 0)) 164 return; 165 166 /* 167 * A system reset will cause all CPUs to take an 0x100 exception. 168 * The primary CPU returns here via setjmp, and the secondary 169 * CPUs reexecute the crash_kexec_secondary path. 170 */ 171 old_handler = __debugger; 172 __debugger = handle_fault; 173 crash_shutdown_cpu = smp_processor_id(); 174 175 if (setjmp(crash_shutdown_buf) == 0) { 176 printk(KERN_EMERG "Activate system reset (dumprestart) " 177 "to stop other cpu(s)\n"); 178 179 /* 180 * A system reset will force all CPUs to execute the 181 * crash code again. We need to reset cpus_in_crash so we 182 * wait for everyone to do this. 183 */ 184 atomic_set(&cpus_in_crash, 0); 185 smp_mb(); 186 187 while (atomic_read(&cpus_in_crash) < ncpus) 188 cpu_relax(); 189 } 190 191 crash_shutdown_cpu = -1; 192 __debugger = old_handler; 193 194 tries++; 195 goto again; 196 } 197 198 /* 199 * This function will be called by secondary cpus. 200 */ 201 void crash_kexec_secondary(struct pt_regs *regs) 202 { 203 unsigned long flags; 204 int msecs = SECONDARY_TIMEOUT; 205 206 local_irq_save(flags); 207 208 /* Wait for the primary crash CPU to signal its progress */ 209 while (crashing_cpu < 0) { 210 if (--msecs < 0) { 211 /* No response, kdump image may not have been loaded */ 212 local_irq_restore(flags); 213 return; 214 } 215 216 mdelay(1); 217 } 218 219 crash_ipi_callback(regs); 220 } 221 222 #else /* ! CONFIG_SMP */ 223 224 static void crash_kexec_prepare_cpus(void) 225 { 226 /* 227 * move the secondaries to us so that we can copy 228 * the new kernel 0-0x100 safely 229 * 230 * do this if kexec in setup.c ? 231 */ 232 #ifdef CONFIG_PPC64 233 smp_release_cpus(); 234 #else 235 /* FIXME */ 236 #endif 237 } 238 239 void crash_kexec_secondary(struct pt_regs *regs) 240 { 241 } 242 #endif /* CONFIG_SMP */ 243 244 /* wait for all the CPUs to hit real mode but timeout if they don't come in */ 245 #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) 246 noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu) 247 { 248 unsigned int msecs; 249 int i; 250 251 msecs = REAL_MODE_TIMEOUT; 252 for (i=0; i < nr_cpu_ids && msecs > 0; i++) { 253 if (i == cpu) 254 continue; 255 256 while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) { 257 barrier(); 258 if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) 259 break; 260 msecs--; 261 mdelay(1); 262 } 263 } 264 mb(); 265 } 266 #else 267 static inline void crash_kexec_wait_realmode(int cpu) {} 268 #endif /* CONFIG_SMP && CONFIG_PPC64 */ 269 270 void crash_kexec_prepare(void) 271 { 272 /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ 273 printk_deferred_enter(); 274 275 /* 276 * This function is only called after the system 277 * has panicked or is otherwise in a critical state. 278 * The minimum amount of code to allow a kexec'd kernel 279 * to run successfully needs to happen here. 280 * 281 * In practice this means stopping other cpus in 282 * an SMP system. 283 * The kernel is broken so disable interrupts. 284 */ 285 hard_irq_disable(); 286 287 /* 288 * Make a note of crashing cpu. Will be used in machine_kexec 289 * such that another IPI will not be sent. 290 */ 291 crashing_cpu = smp_processor_id(); 292 293 crash_kexec_prepare_cpus(); 294 } 295 296 /* 297 * Register a function to be called on shutdown. Only use this if you 298 * can't reset your device in the second kernel. 299 */ 300 int crash_shutdown_register(crash_shutdown_t handler) 301 { 302 unsigned int i, rc; 303 304 spin_lock(&crash_handlers_lock); 305 for (i = 0 ; i < CRASH_HANDLER_MAX; i++) 306 if (!crash_shutdown_handles[i]) { 307 /* Insert handle at first empty entry */ 308 crash_shutdown_handles[i] = handler; 309 rc = 0; 310 break; 311 } 312 313 if (i == CRASH_HANDLER_MAX) { 314 printk(KERN_ERR "Crash shutdown handles full, " 315 "not registered.\n"); 316 rc = 1; 317 } 318 319 spin_unlock(&crash_handlers_lock); 320 return rc; 321 } 322 EXPORT_SYMBOL(crash_shutdown_register); 323 324 int crash_shutdown_unregister(crash_shutdown_t handler) 325 { 326 unsigned int i, rc; 327 328 spin_lock(&crash_handlers_lock); 329 for (i = 0 ; i < CRASH_HANDLER_MAX; i++) 330 if (crash_shutdown_handles[i] == handler) 331 break; 332 333 if (i == CRASH_HANDLER_MAX) { 334 printk(KERN_ERR "Crash shutdown handle not found\n"); 335 rc = 1; 336 } else { 337 /* Shift handles down */ 338 for (; i < (CRASH_HANDLER_MAX - 1); i++) 339 crash_shutdown_handles[i] = 340 crash_shutdown_handles[i+1]; 341 /* 342 * Reset last entry to NULL now that it has been shifted down, 343 * this will allow new handles to be added here. 344 */ 345 crash_shutdown_handles[i] = NULL; 346 rc = 0; 347 } 348 349 spin_unlock(&crash_handlers_lock); 350 return rc; 351 } 352 EXPORT_SYMBOL(crash_shutdown_unregister); 353 354 void default_machine_crash_shutdown(struct pt_regs *regs) 355 { 356 volatile unsigned int i; 357 int (*old_handler)(struct pt_regs *regs); 358 359 if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) 360 is_via_system_reset = 1; 361 362 crash_smp_send_stop(); 363 364 crash_save_cpu(regs, crashing_cpu); 365 366 time_to_dump = 1; 367 368 crash_kexec_wait_realmode(crashing_cpu); 369 370 machine_kexec_mask_interrupts(); 371 372 /* 373 * Call registered shutdown routines safely. Swap out 374 * __debugger_fault_handler, and replace on exit. 375 */ 376 old_handler = __debugger_fault_handler; 377 __debugger_fault_handler = handle_fault; 378 crash_shutdown_cpu = smp_processor_id(); 379 for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) { 380 if (setjmp(crash_shutdown_buf) == 0) { 381 /* 382 * Insert syncs and delay to ensure 383 * instructions in the dangerous region don't 384 * leak away from this protected region. 385 */ 386 asm volatile("sync; isync"); 387 /* dangerous region */ 388 crash_shutdown_handles[i](); 389 asm volatile("sync; isync"); 390 } 391 } 392 crash_shutdown_cpu = -1; 393 __debugger_fault_handler = old_handler; 394 395 if (ppc_md.kexec_cpu_down) 396 ppc_md.kexec_cpu_down(1, 0); 397 } 398 399 #ifdef CONFIG_CRASH_HOTPLUG 400 #undef pr_fmt 401 #define pr_fmt(fmt) "crash hp: " fmt 402 403 /* 404 * Advertise preferred elfcorehdr size to userspace via 405 * /sys/kernel/crash_elfcorehdr_size sysfs interface. 406 */ 407 unsigned int arch_crash_get_elfcorehdr_size(void) 408 { 409 unsigned long phdr_cnt; 410 411 /* A program header for possible CPUs + vmcoreinfo */ 412 phdr_cnt = num_possible_cpus() + 1; 413 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) 414 phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES; 415 416 return sizeof(struct elfhdr) + (phdr_cnt * sizeof(Elf64_Phdr)); 417 } 418 419 /** 420 * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old 421 * elfcorehdr in the kexec segment array. 422 * @image: the active struct kimage 423 * @mn: struct memory_notify data handler 424 */ 425 static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *mn) 426 { 427 int ret; 428 struct crash_mem *cmem = NULL; 429 struct kexec_segment *ksegment; 430 void *ptr, *mem, *elfbuf = NULL; 431 unsigned long elfsz, memsz, base_addr, size; 432 433 ksegment = &image->segment[image->elfcorehdr_index]; 434 mem = (void *) ksegment->mem; 435 memsz = ksegment->memsz; 436 437 ret = get_crash_memory_ranges(&cmem); 438 if (ret) { 439 pr_err("Failed to get crash mem range\n"); 440 return; 441 } 442 443 /* 444 * The hot unplugged memory is part of crash memory ranges, 445 * remove it here. 446 */ 447 if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) { 448 base_addr = PFN_PHYS(mn->start_pfn); 449 size = mn->nr_pages * PAGE_SIZE; 450 ret = remove_mem_range(&cmem, base_addr, size); 451 if (ret) { 452 pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n"); 453 goto out; 454 } 455 } 456 457 ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz); 458 if (ret) { 459 pr_err("Failed to prepare elf header\n"); 460 goto out; 461 } 462 463 /* 464 * It is unlikely that kernel hit this because elfcorehdr kexec 465 * segment (memsz) is built with addition space to accommodate growing 466 * number of crash memory ranges while loading the kdump kernel. It is 467 * Just to avoid any unforeseen case. 468 */ 469 if (elfsz > memsz) { 470 pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz); 471 goto out; 472 } 473 474 ptr = __va(mem); 475 if (ptr) { 476 /* Temporarily invalidate the crash image while it is replaced */ 477 xchg(&kexec_crash_image, NULL); 478 479 /* Replace the old elfcorehdr with newly prepared elfcorehdr */ 480 memcpy((void *)ptr, elfbuf, elfsz); 481 482 /* The crash image is now valid once again */ 483 xchg(&kexec_crash_image, image); 484 } 485 out: 486 kvfree(cmem); 487 kvfree(elfbuf); 488 } 489 490 /** 491 * get_fdt_index - Loop through the kexec segment array and find 492 * the index of the FDT segment. 493 * @image: a pointer to kexec_crash_image 494 * 495 * Returns the index of FDT segment in the kexec segment array 496 * if found; otherwise -1. 497 */ 498 static int get_fdt_index(struct kimage *image) 499 { 500 void *ptr; 501 unsigned long mem; 502 int i, fdt_index = -1; 503 504 /* Find the FDT segment index in kexec segment array. */ 505 for (i = 0; i < image->nr_segments; i++) { 506 mem = image->segment[i].mem; 507 ptr = __va(mem); 508 509 if (ptr && fdt_magic(ptr) == FDT_MAGIC) { 510 fdt_index = i; 511 break; 512 } 513 } 514 515 return fdt_index; 516 } 517 518 /** 519 * update_crash_fdt - updates the cpus node of the crash FDT. 520 * 521 * @image: a pointer to kexec_crash_image 522 */ 523 static void update_crash_fdt(struct kimage *image) 524 { 525 void *fdt; 526 int fdt_index; 527 528 fdt_index = get_fdt_index(image); 529 if (fdt_index < 0) { 530 pr_err("Unable to locate FDT segment.\n"); 531 return; 532 } 533 534 fdt = __va((void *)image->segment[fdt_index].mem); 535 536 /* Temporarily invalidate the crash image while it is replaced */ 537 xchg(&kexec_crash_image, NULL); 538 539 /* update FDT to reflect changes in CPU resources */ 540 if (update_cpus_node(fdt)) 541 pr_err("Failed to update crash FDT"); 542 543 /* The crash image is now valid once again */ 544 xchg(&kexec_crash_image, image); 545 } 546 547 int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags) 548 { 549 #ifdef CONFIG_KEXEC_FILE 550 if (image->file_mode) 551 return 1; 552 #endif 553 return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT; 554 } 555 556 /** 557 * arch_crash_handle_hotplug_event - Handle crash CPU/Memory hotplug events to update the 558 * necessary kexec segments based on the hotplug event. 559 * @image: a pointer to kexec_crash_image 560 * @arg: struct memory_notify handler for memory hotplug case and NULL for CPU hotplug case. 561 * 562 * Update the kdump image based on the type of hotplug event, represented by image->hp_action. 563 * CPU add: Update the FDT segment to include the newly added CPU. 564 * CPU remove: No action is needed, with the assumption that it's okay to have offline CPUs 565 * part of the FDT. 566 * Memory add/remove: No action is taken as this is not yet supported. 567 */ 568 void arch_crash_handle_hotplug_event(struct kimage *image, void *arg) 569 { 570 struct memory_notify *mn; 571 572 switch (image->hp_action) { 573 case KEXEC_CRASH_HP_REMOVE_CPU: 574 return; 575 576 case KEXEC_CRASH_HP_ADD_CPU: 577 update_crash_fdt(image); 578 break; 579 580 case KEXEC_CRASH_HP_REMOVE_MEMORY: 581 case KEXEC_CRASH_HP_ADD_MEMORY: 582 mn = (struct memory_notify *)arg; 583 update_crash_elfcorehdr(image, mn); 584 return; 585 default: 586 pr_warn_once("Unknown hotplug action\n"); 587 } 588 } 589 #endif /* CONFIG_CRASH_HOTPLUG */ 590