xref: /linux/arch/powerpc/platforms/powernv/opal-fadump.c (revision c532de5a67a70f8533d495f8f2aaa9a0491c3ad0)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Firmware-Assisted Dump support on POWER platform (OPAL).
4  *
5  * Copyright 2019, Hari Bathini, IBM Corporation.
6  */
7 
8 #define pr_fmt(fmt) "opal fadump: " fmt
9 
10 #include <linux/string.h>
11 #include <linux/seq_file.h>
12 #include <linux/of.h>
13 #include <linux/of_fdt.h>
14 #include <linux/libfdt.h>
15 #include <linux/mm.h>
16 #include <linux/crash_dump.h>
17 
18 #include <asm/page.h>
19 #include <asm/opal.h>
20 #include <asm/fadump-internal.h>
21 
22 #include "opal-fadump.h"
23 
24 
25 #ifdef CONFIG_PRESERVE_FA_DUMP
26 /*
27  * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
28  * ensure crash data is preserved in hope that the subsequent memory
29  * preserving kernel boot is going to process this crash data.
30  */
31 void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
32 {
33 	const struct opal_fadump_mem_struct *opal_fdm_active;
34 	const __be32 *prop;
35 	unsigned long dn;
36 	u64 addr = 0;
37 	s64 ret;
38 
39 	dn = of_get_flat_dt_subnode_by_name(node, "dump");
40 	if (dn == -FDT_ERR_NOTFOUND)
41 		return;
42 
43 	/*
44 	 * Check if dump has been initiated on last reboot.
45 	 */
46 	prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
47 	if (!prop)
48 		return;
49 
50 	ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &addr);
51 	if ((ret != OPAL_SUCCESS) || !addr) {
52 		pr_debug("Could not get Kernel metadata (%lld)\n", ret);
53 		return;
54 	}
55 
56 	/*
57 	 * Preserve memory only if kernel memory regions are registered
58 	 * with f/w for MPIPL.
59 	 */
60 	addr = be64_to_cpu(addr);
61 	pr_debug("Kernel metadata addr: %llx\n", addr);
62 	opal_fdm_active = (void *)addr;
63 	if (be16_to_cpu(opal_fdm_active->registered_regions) == 0)
64 		return;
65 
66 	ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_BOOT_MEM, &addr);
67 	if ((ret != OPAL_SUCCESS) || !addr) {
68 		pr_err("Failed to get boot memory tag (%lld)\n", ret);
69 		return;
70 	}
71 
72 	/*
73 	 * Memory below this address can be used for booting a
74 	 * capture kernel or petitboot kernel. Preserve everything
75 	 * above this address for processing crashdump.
76 	 */
77 	fadump_conf->boot_mem_top = be64_to_cpu(addr);
78 	pr_debug("Preserve everything above %llx\n", fadump_conf->boot_mem_top);
79 
80 	pr_info("Firmware-assisted dump is active.\n");
81 	fadump_conf->dump_active = 1;
82 }
83 
84 #else /* CONFIG_PRESERVE_FA_DUMP */
85 static const struct opal_fadump_mem_struct *opal_fdm_active;
86 static const struct opal_mpipl_fadump *opal_cpu_metadata;
87 static struct opal_fadump_mem_struct *opal_fdm;
88 
89 #ifdef CONFIG_OPAL_CORE
90 extern bool kernel_initiated;
91 #endif
92 
93 static int opal_fadump_unregister(struct fw_dump *fadump_conf);
94 
95 static void opal_fadump_update_config(struct fw_dump *fadump_conf,
96 				      const struct opal_fadump_mem_struct *fdm)
97 {
98 	pr_debug("Boot memory regions count: %d\n", be16_to_cpu(fdm->region_cnt));
99 
100 	/*
101 	 * The destination address of the first boot memory region is the
102 	 * destination address of boot memory regions.
103 	 */
104 	fadump_conf->boot_mem_dest_addr = be64_to_cpu(fdm->rgn[0].dest);
105 	pr_debug("Destination address of boot memory regions: %#016llx\n",
106 		 fadump_conf->boot_mem_dest_addr);
107 
108 	fadump_conf->fadumphdr_addr = be64_to_cpu(fdm->fadumphdr_addr);
109 }
110 
111 /*
112  * This function is called in the capture kernel to get configuration details
113  * from metadata setup by the first kernel.
114  */
115 static void __init opal_fadump_get_config(struct fw_dump *fadump_conf,
116 				   const struct opal_fadump_mem_struct *fdm)
117 {
118 	unsigned long base, size, last_end, hole_size;
119 	int i;
120 
121 	if (!fadump_conf->dump_active)
122 		return;
123 
124 	last_end = 0;
125 	hole_size = 0;
126 	fadump_conf->boot_memory_size = 0;
127 
128 	pr_debug("Boot memory regions:\n");
129 	for (i = 0; i < be16_to_cpu(fdm->region_cnt); i++) {
130 		base = be64_to_cpu(fdm->rgn[i].src);
131 		size = be64_to_cpu(fdm->rgn[i].size);
132 		pr_debug("\t[%03d] base: 0x%lx, size: 0x%lx\n", i, base, size);
133 
134 		fadump_conf->boot_mem_addr[i] = base;
135 		fadump_conf->boot_mem_sz[i] = size;
136 		fadump_conf->boot_memory_size += size;
137 		hole_size += (base - last_end);
138 
139 		last_end = base + size;
140 	}
141 
142 	/*
143 	 * Start address of reserve dump area (permanent reservation) for
144 	 * re-registering FADump after dump capture.
145 	 */
146 	fadump_conf->reserve_dump_area_start = be64_to_cpu(fdm->rgn[0].dest);
147 
148 	/*
149 	 * Rarely, but it can so happen that system crashes before all
150 	 * boot memory regions are registered for MPIPL. In such
151 	 * cases, warn that the vmcore may not be accurate and proceed
152 	 * anyway as that is the best bet considering free pages, cache
153 	 * pages, user pages, etc are usually filtered out.
154 	 *
155 	 * Hope the memory that could not be preserved only has pages
156 	 * that are usually filtered out while saving the vmcore.
157 	 */
158 	if (be16_to_cpu(fdm->region_cnt) > be16_to_cpu(fdm->registered_regions)) {
159 		pr_warn("Not all memory regions were saved!!!\n");
160 		pr_warn("  Unsaved memory regions:\n");
161 		i = be16_to_cpu(fdm->registered_regions);
162 		while (i < be16_to_cpu(fdm->region_cnt)) {
163 			pr_warn("\t[%03d] base: 0x%llx, size: 0x%llx\n",
164 				i, be64_to_cpu(fdm->rgn[i].src),
165 				be64_to_cpu(fdm->rgn[i].size));
166 			i++;
167 		}
168 
169 		pr_warn("If the unsaved regions only contain pages that are filtered out (eg. free/user pages), the vmcore should still be usable.\n");
170 		pr_warn("WARNING: If the unsaved regions contain kernel pages, the vmcore will be corrupted.\n");
171 	}
172 
173 	fadump_conf->boot_mem_top = (fadump_conf->boot_memory_size + hole_size);
174 	fadump_conf->boot_mem_regs_cnt = be16_to_cpu(fdm->region_cnt);
175 	opal_fadump_update_config(fadump_conf, fdm);
176 }
177 
178 /* Initialize kernel metadata */
179 static void opal_fadump_init_metadata(struct opal_fadump_mem_struct *fdm)
180 {
181 	fdm->version = OPAL_FADUMP_VERSION;
182 	fdm->region_cnt = cpu_to_be16(0);
183 	fdm->registered_regions = cpu_to_be16(0);
184 	fdm->fadumphdr_addr = cpu_to_be64(0);
185 }
186 
187 static u64 opal_fadump_init_mem_struct(struct fw_dump *fadump_conf)
188 {
189 	u64 addr = fadump_conf->reserve_dump_area_start;
190 	u16 reg_cnt;
191 	int i;
192 
193 	opal_fdm = __va(fadump_conf->kernel_metadata);
194 	opal_fadump_init_metadata(opal_fdm);
195 
196 	/* Boot memory regions */
197 	reg_cnt = be16_to_cpu(opal_fdm->region_cnt);
198 	for (i = 0; i < fadump_conf->boot_mem_regs_cnt; i++) {
199 		opal_fdm->rgn[i].src	= cpu_to_be64(fadump_conf->boot_mem_addr[i]);
200 		opal_fdm->rgn[i].dest	= cpu_to_be64(addr);
201 		opal_fdm->rgn[i].size	= cpu_to_be64(fadump_conf->boot_mem_sz[i]);
202 
203 		reg_cnt++;
204 		addr += fadump_conf->boot_mem_sz[i];
205 	}
206 	opal_fdm->region_cnt = cpu_to_be16(reg_cnt);
207 
208 	/*
209 	 * Kernel metadata is passed to f/w and retrieved in capture kernel.
210 	 * So, use it to save fadump header address instead of calculating it.
211 	 */
212 	opal_fdm->fadumphdr_addr = cpu_to_be64(be64_to_cpu(opal_fdm->rgn[0].dest) +
213 					       fadump_conf->boot_memory_size);
214 
215 	opal_fadump_update_config(fadump_conf, opal_fdm);
216 
217 	return addr;
218 }
219 
220 static u64 opal_fadump_get_metadata_size(void)
221 {
222 	return PAGE_ALIGN(sizeof(struct opal_fadump_mem_struct));
223 }
224 
225 static int opal_fadump_setup_metadata(struct fw_dump *fadump_conf)
226 {
227 	int err = 0;
228 	s64 ret;
229 
230 	/*
231 	 * Use the last page(s) in FADump memory reservation for
232 	 * kernel metadata.
233 	 */
234 	fadump_conf->kernel_metadata = (fadump_conf->reserve_dump_area_start +
235 					fadump_conf->reserve_dump_area_size -
236 					opal_fadump_get_metadata_size());
237 	pr_info("Kernel metadata addr: %llx\n", fadump_conf->kernel_metadata);
238 
239 	/* Initialize kernel metadata before registering the address with f/w */
240 	opal_fdm = __va(fadump_conf->kernel_metadata);
241 	opal_fadump_init_metadata(opal_fdm);
242 
243 	/*
244 	 * Register metadata address with f/w. Can be retrieved in
245 	 * the capture kernel.
246 	 */
247 	ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL,
248 				      fadump_conf->kernel_metadata);
249 	if (ret != OPAL_SUCCESS) {
250 		pr_err("Failed to set kernel metadata tag!\n");
251 		err = -EPERM;
252 	}
253 
254 	/*
255 	 * Register boot memory top address with f/w. Should be retrieved
256 	 * by a kernel that intends to preserve crash'ed kernel's memory.
257 	 */
258 	ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_BOOT_MEM,
259 				      fadump_conf->boot_mem_top);
260 	if (ret != OPAL_SUCCESS) {
261 		pr_err("Failed to set boot memory tag!\n");
262 		err = -EPERM;
263 	}
264 
265 	return err;
266 }
267 
268 static u64 opal_fadump_get_bootmem_min(void)
269 {
270 	return OPAL_FADUMP_MIN_BOOT_MEM;
271 }
272 
273 static int opal_fadump_register(struct fw_dump *fadump_conf)
274 {
275 	s64 rc = OPAL_PARAMETER;
276 	u16 registered_regs;
277 	int i, err = -EIO;
278 
279 	registered_regs = be16_to_cpu(opal_fdm->registered_regions);
280 	for (i = 0; i < be16_to_cpu(opal_fdm->region_cnt); i++) {
281 		rc = opal_mpipl_update(OPAL_MPIPL_ADD_RANGE,
282 				       be64_to_cpu(opal_fdm->rgn[i].src),
283 				       be64_to_cpu(opal_fdm->rgn[i].dest),
284 				       be64_to_cpu(opal_fdm->rgn[i].size));
285 		if (rc != OPAL_SUCCESS)
286 			break;
287 
288 		registered_regs++;
289 	}
290 	opal_fdm->registered_regions = cpu_to_be16(registered_regs);
291 
292 	switch (rc) {
293 	case OPAL_SUCCESS:
294 		pr_info("Registration is successful!\n");
295 		fadump_conf->dump_registered = 1;
296 		err = 0;
297 		break;
298 	case OPAL_RESOURCE:
299 		/* If MAX regions limit in f/w is hit, warn and proceed. */
300 		pr_warn("%d regions could not be registered for MPIPL as MAX limit is reached!\n",
301 			(be16_to_cpu(opal_fdm->region_cnt) -
302 			 be16_to_cpu(opal_fdm->registered_regions)));
303 		fadump_conf->dump_registered = 1;
304 		err = 0;
305 		break;
306 	case OPAL_PARAMETER:
307 		pr_err("Failed to register. Parameter Error(%lld).\n", rc);
308 		break;
309 	case OPAL_HARDWARE:
310 		pr_err("Support not available.\n");
311 		fadump_conf->fadump_supported = 0;
312 		fadump_conf->fadump_enabled = 0;
313 		break;
314 	default:
315 		pr_err("Failed to register. Unknown Error(%lld).\n", rc);
316 		break;
317 	}
318 
319 	/*
320 	 * If some regions were registered before OPAL_MPIPL_ADD_RANGE
321 	 * OPAL call failed, unregister all regions.
322 	 */
323 	if ((err < 0) && (be16_to_cpu(opal_fdm->registered_regions) > 0))
324 		opal_fadump_unregister(fadump_conf);
325 
326 	return err;
327 }
328 
329 static int opal_fadump_unregister(struct fw_dump *fadump_conf)
330 {
331 	s64 rc;
332 
333 	rc = opal_mpipl_update(OPAL_MPIPL_REMOVE_ALL, 0, 0, 0);
334 	if (rc) {
335 		pr_err("Failed to un-register - unexpected Error(%lld).\n", rc);
336 		return -EIO;
337 	}
338 
339 	opal_fdm->registered_regions = cpu_to_be16(0);
340 	fadump_conf->dump_registered = 0;
341 	return 0;
342 }
343 
344 static int opal_fadump_invalidate(struct fw_dump *fadump_conf)
345 {
346 	s64 rc;
347 
348 	rc = opal_mpipl_update(OPAL_MPIPL_FREE_PRESERVED_MEMORY, 0, 0, 0);
349 	if (rc) {
350 		pr_err("Failed to invalidate - unexpected Error(%lld).\n", rc);
351 		return -EIO;
352 	}
353 
354 	fadump_conf->dump_active = 0;
355 	opal_fdm_active = NULL;
356 	return 0;
357 }
358 
359 static void opal_fadump_cleanup(struct fw_dump *fadump_conf)
360 {
361 	s64 ret;
362 
363 	ret = opal_mpipl_register_tag(OPAL_MPIPL_TAG_KERNEL, 0);
364 	if (ret != OPAL_SUCCESS)
365 		pr_warn("Could not reset (%llu) kernel metadata tag!\n", ret);
366 }
367 
368 /*
369  * Verify if CPU state data is available. If available, do a bit of sanity
370  * checking before processing this data.
371  */
372 static bool __init is_opal_fadump_cpu_data_valid(struct fw_dump *fadump_conf)
373 {
374 	if (!opal_cpu_metadata)
375 		return false;
376 
377 	fadump_conf->cpu_state_data_version =
378 		be32_to_cpu(opal_cpu_metadata->cpu_data_version);
379 	fadump_conf->cpu_state_entry_size =
380 		be32_to_cpu(opal_cpu_metadata->cpu_data_size);
381 	fadump_conf->cpu_state_dest_vaddr =
382 		(u64)__va(be64_to_cpu(opal_cpu_metadata->region[0].dest));
383 	fadump_conf->cpu_state_data_size =
384 		be64_to_cpu(opal_cpu_metadata->region[0].size);
385 
386 	if (fadump_conf->cpu_state_data_version != HDAT_FADUMP_CPU_DATA_VER) {
387 		pr_warn("Supported CPU state data version: %u, found: %d!\n",
388 			HDAT_FADUMP_CPU_DATA_VER,
389 			fadump_conf->cpu_state_data_version);
390 		pr_warn("WARNING: F/W using newer CPU state data format!!\n");
391 	}
392 
393 	if ((fadump_conf->cpu_state_dest_vaddr == 0) ||
394 	    (fadump_conf->cpu_state_entry_size == 0) ||
395 	    (fadump_conf->cpu_state_entry_size >
396 	     fadump_conf->cpu_state_data_size)) {
397 		pr_err("CPU state data is invalid. Ignoring!\n");
398 		return false;
399 	}
400 
401 	return true;
402 }
403 
404 /*
405  * Convert CPU state data saved at the time of crash into ELF notes.
406  *
407  * While the crashing CPU's register data is saved by the kernel, CPU state
408  * data for all CPUs is saved by f/w. In CPU state data provided by f/w,
409  * each register entry is of 16 bytes, a numerical identifier along with
410  * a GPR/SPR flag in the first 8 bytes and the register value in the next
411  * 8 bytes. For more details refer to F/W documentation. If this data is
412  * missing or in unsupported format, append crashing CPU's register data
413  * saved by the kernel in the PT_NOTE, to have something to work with in
414  * the vmcore file.
415  */
416 static int __init
417 opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf,
418 			    struct fadump_crash_info_header *fdh)
419 {
420 	u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize;
421 	struct hdat_fadump_thread_hdr *thdr;
422 	bool is_cpu_data_valid = false;
423 	u32 num_cpus = 1, *note_buf;
424 	struct pt_regs regs;
425 	char *bufp;
426 	int rc, i;
427 
428 	if (is_opal_fadump_cpu_data_valid(fadump_conf)) {
429 		size_per_thread = fadump_conf->cpu_state_entry_size;
430 		num_cpus = (fadump_conf->cpu_state_data_size / size_per_thread);
431 		bufp = __va(fadump_conf->cpu_state_dest_vaddr);
432 		is_cpu_data_valid = true;
433 	}
434 
435 	rc = fadump_setup_cpu_notes_buf(num_cpus);
436 	if (rc != 0)
437 		return rc;
438 
439 	note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
440 	if (!is_cpu_data_valid)
441 		goto out;
442 
443 	/*
444 	 * Offset for register entries, entry size and registers count is
445 	 * duplicated in every thread header in keeping with HDAT format.
446 	 * Use these values from the first thread header.
447 	 */
448 	thdr = (struct hdat_fadump_thread_hdr *)bufp;
449 	regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
450 		       be32_to_cpu(thdr->offset));
451 	reg_esize = be32_to_cpu(thdr->esize);
452 	regs_cnt  = be32_to_cpu(thdr->ecnt);
453 
454 	pr_debug("--------CPU State Data------------\n");
455 	pr_debug("NumCpus     : %u\n", num_cpus);
456 	pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
457 		 regs_offset, reg_esize, regs_cnt);
458 
459 	for (i = 0; i < num_cpus; i++, bufp += size_per_thread) {
460 		thdr = (struct hdat_fadump_thread_hdr *)bufp;
461 
462 		thread_pir = be32_to_cpu(thdr->pir);
463 		pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n",
464 			 i, thread_pir, thdr->core_state);
465 
466 		/*
467 		 * If this is kernel initiated crash, crashing_cpu would be set
468 		 * appropriately and register data of the crashing CPU saved by
469 		 * crashing kernel. Add this saved register data of crashing CPU
470 		 * to elf notes and populate the pt_regs for the remaining CPUs
471 		 * from register state data provided by firmware.
472 		 */
473 		if (fdh->crashing_cpu == thread_pir) {
474 			note_buf = fadump_regs_to_elf_notes(note_buf,
475 							    &fdh->regs);
476 			pr_debug("Crashing CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
477 				 fdh->crashing_cpu, fdh->regs.gpr[1],
478 				 fdh->regs.nip);
479 			continue;
480 		}
481 
482 		/*
483 		 * Register state data of MAX cores is provided by firmware,
484 		 * but some of this cores may not be active. So, while
485 		 * processing register state data, check core state and
486 		 * skip threads that belong to inactive cores.
487 		 */
488 		if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE)
489 			continue;
490 
491 		opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
492 				      reg_esize, true, &regs);
493 		note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
494 		pr_debug("CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
495 			 thread_pir, regs.gpr[1], regs.nip);
496 	}
497 
498 out:
499 	/*
500 	 * CPU state data is invalid/unsupported. Try appending crashing CPU's
501 	 * register data, if it is saved by the kernel.
502 	 */
503 	if (fadump_conf->cpu_notes_buf_vaddr == (u64)note_buf) {
504 		if (fdh->crashing_cpu == FADUMP_CPU_UNKNOWN) {
505 			fadump_free_cpu_notes_buf();
506 			return -ENODEV;
507 		}
508 
509 		pr_warn("WARNING: appending only crashing CPU's register data\n");
510 		note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
511 	}
512 
513 	final_note(note_buf);
514 
515 	pr_debug("Updating elfcore header (%llx) with cpu notes\n",
516 		 fadump_conf->elfcorehdr_addr);
517 	fadump_update_elfcore_header((char *)fadump_conf->elfcorehdr_addr);
518 	return 0;
519 }
520 
521 static int __init opal_fadump_process(struct fw_dump *fadump_conf)
522 {
523 	struct fadump_crash_info_header *fdh;
524 	int rc = -EINVAL;
525 
526 	if (!opal_fdm_active || !fadump_conf->fadumphdr_addr)
527 		return rc;
528 
529 	fdh = __va(fadump_conf->fadumphdr_addr);
530 
531 #ifdef CONFIG_OPAL_CORE
532 	/*
533 	 * If this is a kernel initiated crash, crashing_cpu would be set
534 	 * appropriately and register data of the crashing CPU saved by
535 	 * crashing kernel. Add this saved register data of crashing CPU
536 	 * to elf notes and populate the pt_regs for the remaining CPUs
537 	 * from register state data provided by firmware.
538 	 */
539 	if (fdh->crashing_cpu != FADUMP_CPU_UNKNOWN)
540 		kernel_initiated = true;
541 #endif
542 
543 	return opal_fadump_build_cpu_notes(fadump_conf, fdh);
544 }
545 
546 static void opal_fadump_region_show(struct fw_dump *fadump_conf,
547 				    struct seq_file *m)
548 {
549 	const struct opal_fadump_mem_struct *fdm_ptr;
550 	u64 dumped_bytes = 0;
551 	int i;
552 
553 	if (fadump_conf->dump_active)
554 		fdm_ptr = opal_fdm_active;
555 	else
556 		fdm_ptr = opal_fdm;
557 
558 	for (i = 0; i < be16_to_cpu(fdm_ptr->region_cnt); i++) {
559 		/*
560 		 * Only regions that are registered for MPIPL
561 		 * would have dump data.
562 		 */
563 		if ((fadump_conf->dump_active) &&
564 		    (i < be16_to_cpu(fdm_ptr->registered_regions)))
565 			dumped_bytes = be64_to_cpu(fdm_ptr->rgn[i].size);
566 
567 		seq_printf(m, "DUMP: Src: %#016llx, Dest: %#016llx, ",
568 			   be64_to_cpu(fdm_ptr->rgn[i].src),
569 			   be64_to_cpu(fdm_ptr->rgn[i].dest));
570 		seq_printf(m, "Size: %#llx, Dumped: %#llx bytes\n",
571 			   be64_to_cpu(fdm_ptr->rgn[i].size), dumped_bytes);
572 	}
573 
574 	/* Dump is active. Show preserved area start address. */
575 	if (fadump_conf->dump_active) {
576 		seq_printf(m, "\nMemory above %#016llx is reserved for saving crash dump\n",
577 			   fadump_conf->boot_mem_top);
578 	}
579 }
580 
581 static void opal_fadump_trigger(struct fadump_crash_info_header *fdh,
582 				const char *msg)
583 {
584 	int rc;
585 
586 	/*
587 	 * Unlike on pSeries platform, logical CPU number is not provided
588 	 * with architected register state data. So, store the crashing
589 	 * CPU's PIR instead to plug the appropriate register data for
590 	 * crashing CPU in the vmcore file.
591 	 */
592 	fdh->crashing_cpu = (u32)mfspr(SPRN_PIR);
593 
594 	rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
595 	if (rc == OPAL_UNSUPPORTED) {
596 		pr_emerg("Reboot type %d not supported.\n",
597 			 OPAL_REBOOT_MPIPL);
598 	} else if (rc == OPAL_HARDWARE)
599 		pr_emerg("No backend support for MPIPL!\n");
600 }
601 
602 /* FADUMP_MAX_MEM_REGS or lower */
603 static int opal_fadump_max_boot_mem_rgns(void)
604 {
605 	return FADUMP_MAX_MEM_REGS;
606 }
607 
608 static struct fadump_ops opal_fadump_ops = {
609 	.fadump_init_mem_struct		= opal_fadump_init_mem_struct,
610 	.fadump_get_metadata_size	= opal_fadump_get_metadata_size,
611 	.fadump_setup_metadata		= opal_fadump_setup_metadata,
612 	.fadump_get_bootmem_min		= opal_fadump_get_bootmem_min,
613 	.fadump_register		= opal_fadump_register,
614 	.fadump_unregister		= opal_fadump_unregister,
615 	.fadump_invalidate		= opal_fadump_invalidate,
616 	.fadump_cleanup			= opal_fadump_cleanup,
617 	.fadump_process			= opal_fadump_process,
618 	.fadump_region_show		= opal_fadump_region_show,
619 	.fadump_trigger			= opal_fadump_trigger,
620 	.fadump_max_boot_mem_rgns	= opal_fadump_max_boot_mem_rgns,
621 };
622 
623 void __init opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node)
624 {
625 	const __be32 *prop;
626 	unsigned long dn;
627 	__be64 be_addr;
628 	u64 addr = 0;
629 	int i, len;
630 	s64 ret;
631 
632 	/*
633 	 * Check if Firmware-Assisted Dump is supported. if yes, check
634 	 * if dump has been initiated on last reboot.
635 	 */
636 	dn = of_get_flat_dt_subnode_by_name(node, "dump");
637 	if (dn == -FDT_ERR_NOTFOUND) {
638 		pr_debug("FADump support is missing!\n");
639 		return;
640 	}
641 
642 	if (!of_flat_dt_is_compatible(dn, "ibm,opal-dump")) {
643 		pr_err("Support missing for this f/w version!\n");
644 		return;
645 	}
646 
647 	prop = of_get_flat_dt_prop(dn, "fw-load-area", &len);
648 	if (prop) {
649 		/*
650 		 * Each f/w load area is an (address,size) pair,
651 		 * 2 cells each, totalling 4 cells per range.
652 		 */
653 		for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
654 			u64 base, end;
655 
656 			base = of_read_number(prop + (i * 4) + 0, 2);
657 			end = base;
658 			end += of_read_number(prop + (i * 4) + 2, 2);
659 			if (end > OPAL_FADUMP_MIN_BOOT_MEM) {
660 				pr_err("F/W load area: 0x%llx-0x%llx\n",
661 				       base, end);
662 				pr_err("F/W version not supported!\n");
663 				return;
664 			}
665 		}
666 	}
667 
668 	fadump_conf->ops			= &opal_fadump_ops;
669 	fadump_conf->fadump_supported		= 1;
670 	/* TODO: Add support to pass additional parameters */
671 	fadump_conf->param_area_supported	= 0;
672 
673 	/*
674 	 * Firmware supports 32-bit field for size. Align it to PAGE_SIZE
675 	 * and request firmware to copy multiple kernel boot memory regions.
676 	 */
677 	fadump_conf->max_copy_size = ALIGN_DOWN(U32_MAX, PAGE_SIZE);
678 
679 	/*
680 	 * Check if dump has been initiated on last reboot.
681 	 */
682 	prop = of_get_flat_dt_prop(dn, "mpipl-boot", NULL);
683 	if (!prop)
684 		return;
685 
686 	ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_KERNEL, &be_addr);
687 	if ((ret != OPAL_SUCCESS) || !be_addr) {
688 		pr_err("Failed to get Kernel metadata (%lld)\n", ret);
689 		return;
690 	}
691 
692 	addr = be64_to_cpu(be_addr);
693 	pr_debug("Kernel metadata addr: %llx\n", addr);
694 
695 	opal_fdm_active = __va(addr);
696 	if (opal_fdm_active->version != OPAL_FADUMP_VERSION) {
697 		pr_warn("Supported kernel metadata version: %u, found: %d!\n",
698 			OPAL_FADUMP_VERSION, opal_fdm_active->version);
699 		pr_warn("WARNING: Kernel metadata format mismatch identified! Core file maybe corrupted..\n");
700 	}
701 
702 	/* Kernel regions not registered with f/w for MPIPL */
703 	if (be16_to_cpu(opal_fdm_active->registered_regions) == 0) {
704 		opal_fdm_active = NULL;
705 		return;
706 	}
707 
708 	ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &be_addr);
709 	if (be_addr) {
710 		addr = be64_to_cpu(be_addr);
711 		pr_debug("CPU metadata addr: %llx\n", addr);
712 		opal_cpu_metadata = __va(addr);
713 	}
714 
715 	pr_info("Firmware-assisted dump is active.\n");
716 	fadump_conf->dump_active = 1;
717 	opal_fadump_get_config(fadump_conf, opal_fdm_active);
718 }
719 #endif /* !CONFIG_PRESERVE_FA_DUMP */
720