1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * powerpc code to implement the kexec_file_load syscall
4 *
5 * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
6 * Copyright (C) 2004 IBM Corp.
7 * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
8 * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
9 * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
10 * Copyright (C) 2020 IBM Corporation
11 *
12 * Based on kexec-tools' kexec-ppc64.c, fs2dt.c.
13 * Heavily modified for the kernel by
14 * Hari Bathini, IBM Corporation.
15 */
16
17 #define pr_fmt(fmt) "kexec ranges: " fmt
18
19 #include <linux/sort.h>
20 #include <linux/kexec.h>
21 #include <linux/of.h>
22 #include <linux/slab.h>
23 #include <linux/memblock.h>
24 #include <linux/crash_core.h>
25 #include <asm/sections.h>
26 #include <asm/kexec_ranges.h>
27 #include <asm/crashdump-ppc64.h>
28
29 #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
30 /**
31 * get_max_nr_ranges - Get the max no. of ranges crash_mem structure
32 * could hold, given the size allocated for it.
33 * @size: Allocation size of crash_mem structure.
34 *
35 * Returns the maximum no. of ranges.
36 */
get_max_nr_ranges(size_t size)37 static inline unsigned int get_max_nr_ranges(size_t size)
38 {
39 return ((size - sizeof(struct crash_mem)) /
40 sizeof(struct range));
41 }
42
43 /**
44 * get_mem_rngs_size - Get the allocated size of mem_rngs based on
45 * max_nr_ranges and chunk size.
46 * @mem_rngs: Memory ranges.
47 *
48 * Returns the maximum size of @mem_rngs.
49 */
get_mem_rngs_size(struct crash_mem * mem_rngs)50 static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs)
51 {
52 size_t size;
53
54 if (!mem_rngs)
55 return 0;
56
57 size = (sizeof(struct crash_mem) +
58 (mem_rngs->max_nr_ranges * sizeof(struct range)));
59
60 /*
61 * Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ.
62 * So, align to get the actual length.
63 */
64 return ALIGN(size, MEM_RANGE_CHUNK_SZ);
65 }
66
67 /**
68 * __add_mem_range - add a memory range to memory ranges list.
69 * @mem_ranges: Range list to add the memory range to.
70 * @base: Base address of the range to add.
71 * @size: Size of the memory range to add.
72 *
73 * (Re)allocates memory, if needed.
74 *
75 * Returns 0 on success, negative errno on error.
76 */
__add_mem_range(struct crash_mem ** mem_ranges,u64 base,u64 size)77 static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
78 {
79 struct crash_mem *mem_rngs = *mem_ranges;
80
81 if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) {
82 mem_rngs = realloc_mem_ranges(mem_ranges);
83 if (!mem_rngs)
84 return -ENOMEM;
85 }
86
87 mem_rngs->ranges[mem_rngs->nr_ranges].start = base;
88 mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1;
89 pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n",
90 base, base + size - 1, mem_rngs->nr_ranges);
91 mem_rngs->nr_ranges++;
92 return 0;
93 }
94
95 /**
96 * __merge_memory_ranges - Merges the given memory ranges list.
97 * @mem_rngs: Range list to merge.
98 *
99 * Assumes a sorted range list.
100 *
101 * Returns nothing.
102 */
__merge_memory_ranges(struct crash_mem * mem_rngs)103 static void __merge_memory_ranges(struct crash_mem *mem_rngs)
104 {
105 struct range *ranges;
106 int i, idx;
107
108 if (!mem_rngs)
109 return;
110
111 idx = 0;
112 ranges = &(mem_rngs->ranges[0]);
113 for (i = 1; i < mem_rngs->nr_ranges; i++) {
114 if (ranges[i].start <= (ranges[i-1].end + 1))
115 ranges[idx].end = ranges[i].end;
116 else {
117 idx++;
118 if (i == idx)
119 continue;
120
121 ranges[idx] = ranges[i];
122 }
123 }
124 mem_rngs->nr_ranges = idx + 1;
125 }
126
127 /* cmp_func_t callback to sort ranges with sort() */
rngcmp(const void * _x,const void * _y)128 static int rngcmp(const void *_x, const void *_y)
129 {
130 const struct range *x = _x, *y = _y;
131
132 if (x->start > y->start)
133 return 1;
134 if (x->start < y->start)
135 return -1;
136 return 0;
137 }
138
139 /**
140 * sort_memory_ranges - Sorts the given memory ranges list.
141 * @mem_rngs: Range list to sort.
142 * @merge: If true, merge the list after sorting.
143 *
144 * Returns nothing.
145 */
sort_memory_ranges(struct crash_mem * mem_rngs,bool merge)146 void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge)
147 {
148 int i;
149
150 if (!mem_rngs)
151 return;
152
153 /* Sort the ranges in-place */
154 sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges,
155 sizeof(mem_rngs->ranges[0]), rngcmp, NULL);
156
157 if (merge)
158 __merge_memory_ranges(mem_rngs);
159
160 /* For debugging purpose */
161 pr_debug("Memory ranges:\n");
162 for (i = 0; i < mem_rngs->nr_ranges; i++) {
163 pr_debug("\t[%03d][%#016llx - %#016llx]\n", i,
164 mem_rngs->ranges[i].start,
165 mem_rngs->ranges[i].end);
166 }
167 }
168
169 /**
170 * realloc_mem_ranges - reallocate mem_ranges with size incremented
171 * by MEM_RANGE_CHUNK_SZ. Frees up the old memory,
172 * if memory allocation fails.
173 * @mem_ranges: Memory ranges to reallocate.
174 *
175 * Returns pointer to reallocated memory on success, NULL otherwise.
176 */
realloc_mem_ranges(struct crash_mem ** mem_ranges)177 struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges)
178 {
179 struct crash_mem *mem_rngs = *mem_ranges;
180 unsigned int nr_ranges;
181 size_t size;
182
183 size = get_mem_rngs_size(mem_rngs);
184 nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0;
185
186 size += MEM_RANGE_CHUNK_SZ;
187 mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL);
188 if (!mem_rngs) {
189 kfree(*mem_ranges);
190 *mem_ranges = NULL;
191 return NULL;
192 }
193
194 mem_rngs->nr_ranges = nr_ranges;
195 mem_rngs->max_nr_ranges = get_max_nr_ranges(size);
196 *mem_ranges = mem_rngs;
197
198 return mem_rngs;
199 }
200
201 /**
202 * add_mem_range - Updates existing memory range, if there is an overlap.
203 * Else, adds a new memory range.
204 * @mem_ranges: Range list to add the memory range to.
205 * @base: Base address of the range to add.
206 * @size: Size of the memory range to add.
207 *
208 * (Re)allocates memory, if needed.
209 *
210 * Returns 0 on success, negative errno on error.
211 */
add_mem_range(struct crash_mem ** mem_ranges,u64 base,u64 size)212 int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
213 {
214 struct crash_mem *mem_rngs = *mem_ranges;
215 u64 mstart, mend, end;
216 unsigned int i;
217
218 if (!size)
219 return 0;
220
221 end = base + size - 1;
222
223 if (!mem_rngs || !(mem_rngs->nr_ranges))
224 return __add_mem_range(mem_ranges, base, size);
225
226 for (i = 0; i < mem_rngs->nr_ranges; i++) {
227 mstart = mem_rngs->ranges[i].start;
228 mend = mem_rngs->ranges[i].end;
229 if (base < mend && end > mstart) {
230 if (base < mstart)
231 mem_rngs->ranges[i].start = base;
232 if (end > mend)
233 mem_rngs->ranges[i].end = end;
234 return 0;
235 }
236 }
237
238 return __add_mem_range(mem_ranges, base, size);
239 }
240
241 #endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
242
243 #ifdef CONFIG_KEXEC_FILE
244 /**
245 * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.
246 * @mem_ranges: Range list to add the memory range(s) to.
247 *
248 * Returns 0 on success, negative errno on error.
249 */
add_tce_mem_ranges(struct crash_mem ** mem_ranges)250 static int add_tce_mem_ranges(struct crash_mem **mem_ranges)
251 {
252 struct device_node *dn = NULL;
253 int ret = 0;
254
255 for_each_node_by_type(dn, "pci") {
256 u64 base;
257 u32 size;
258
259 ret = of_property_read_u64(dn, "linux,tce-base", &base);
260 ret |= of_property_read_u32(dn, "linux,tce-size", &size);
261 if (ret) {
262 /*
263 * It is ok to have pci nodes without tce. So, ignore
264 * property does not exist error.
265 */
266 if (ret == -EINVAL) {
267 ret = 0;
268 continue;
269 }
270 break;
271 }
272
273 ret = add_mem_range(mem_ranges, base, size);
274 if (ret)
275 break;
276 }
277
278 of_node_put(dn);
279 return ret;
280 }
281
282 /**
283 * add_initrd_mem_range - Adds initrd range to the given memory ranges list,
284 * if the initrd was retained.
285 * @mem_ranges: Range list to add the memory range to.
286 *
287 * Returns 0 on success, negative errno on error.
288 */
add_initrd_mem_range(struct crash_mem ** mem_ranges)289 static int add_initrd_mem_range(struct crash_mem **mem_ranges)
290 {
291 u64 base, end;
292 int ret;
293
294 /* This range means something, only if initrd was retained */
295 if (!strstr(saved_command_line, "retain_initrd"))
296 return 0;
297
298 ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base);
299 ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end);
300 if (!ret)
301 ret = add_mem_range(mem_ranges, base, end - base + 1);
302
303 return ret;
304 }
305
306 /**
307 * add_htab_mem_range - Adds htab range to the given memory ranges list,
308 * if it exists
309 * @mem_ranges: Range list to add the memory range to.
310 *
311 * Returns 0 on success, negative errno on error.
312 */
add_htab_mem_range(struct crash_mem ** mem_ranges)313 static int add_htab_mem_range(struct crash_mem **mem_ranges)
314 {
315
316 #ifdef CONFIG_PPC_64S_HASH_MMU
317 if (!htab_address)
318 return 0;
319
320 return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes);
321 #else
322 return 0;
323 #endif
324 }
325
326 /**
327 * add_kernel_mem_range - Adds kernel text region to the given
328 * memory ranges list.
329 * @mem_ranges: Range list to add the memory range to.
330 *
331 * Returns 0 on success, negative errno on error.
332 */
add_kernel_mem_range(struct crash_mem ** mem_ranges)333 static int add_kernel_mem_range(struct crash_mem **mem_ranges)
334 {
335 return add_mem_range(mem_ranges, 0, __pa(_end));
336 }
337 #endif /* CONFIG_KEXEC_FILE */
338
339 #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
340 /**
341 * add_rtas_mem_range - Adds RTAS region to the given memory ranges list.
342 * @mem_ranges: Range list to add the memory range to.
343 *
344 * Returns 0 on success, negative errno on error.
345 */
add_rtas_mem_range(struct crash_mem ** mem_ranges)346 static int add_rtas_mem_range(struct crash_mem **mem_ranges)
347 {
348 struct device_node *dn;
349 u32 base, size;
350 int ret = 0;
351
352 dn = of_find_node_by_path("/rtas");
353 if (!dn)
354 return 0;
355
356 ret = of_property_read_u32(dn, "linux,rtas-base", &base);
357 ret |= of_property_read_u32(dn, "rtas-size", &size);
358 if (!ret)
359 ret = add_mem_range(mem_ranges, base, size);
360
361 of_node_put(dn);
362 return ret;
363 }
364
365 /**
366 * add_opal_mem_range - Adds OPAL region to the given memory ranges list.
367 * @mem_ranges: Range list to add the memory range to.
368 *
369 * Returns 0 on success, negative errno on error.
370 */
add_opal_mem_range(struct crash_mem ** mem_ranges)371 static int add_opal_mem_range(struct crash_mem **mem_ranges)
372 {
373 struct device_node *dn;
374 u64 base, size;
375 int ret;
376
377 dn = of_find_node_by_path("/ibm,opal");
378 if (!dn)
379 return 0;
380
381 ret = of_property_read_u64(dn, "opal-base-address", &base);
382 ret |= of_property_read_u64(dn, "opal-runtime-size", &size);
383 if (!ret)
384 ret = add_mem_range(mem_ranges, base, size);
385
386 of_node_put(dn);
387 return ret;
388 }
389 #endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
390
391 #ifdef CONFIG_KEXEC_FILE
392 /**
393 * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w
394 * to the given memory ranges list.
395 * @mem_ranges: Range list to add the memory ranges to.
396 *
397 * Returns 0 on success, negative errno on error.
398 */
add_reserved_mem_ranges(struct crash_mem ** mem_ranges)399 static int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
400 {
401 int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0;
402 struct device_node *root = of_find_node_by_path("/");
403 const __be32 *prop;
404
405 prop = of_get_property(root, "reserved-ranges", &len);
406 n_mem_addr_cells = of_n_addr_cells(root);
407 n_mem_size_cells = of_n_size_cells(root);
408 of_node_put(root);
409 if (!prop)
410 return 0;
411
412 cells = n_mem_addr_cells + n_mem_size_cells;
413
414 /* Each reserved range is an (address,size) pair */
415 for (i = 0; i < (len / (sizeof(u32) * cells)); i++) {
416 u64 base, size;
417
418 base = of_read_number(prop + (i * cells), n_mem_addr_cells);
419 size = of_read_number(prop + (i * cells) + n_mem_addr_cells,
420 n_mem_size_cells);
421
422 ret = add_mem_range(mem_ranges, base, size);
423 if (ret)
424 break;
425 }
426
427 return ret;
428 }
429
430 /**
431 * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
432 * memory regions that should be added to the
433 * memory reserve map to ensure the region is
434 * protected from any mischief.
435 * @mem_ranges: Range list to add the memory ranges to.
436 *
437 * Returns 0 on success, negative errno on error.
438 */
get_reserved_memory_ranges(struct crash_mem ** mem_ranges)439 int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
440 {
441 int ret;
442
443 ret = add_rtas_mem_range(mem_ranges);
444 if (ret)
445 goto out;
446
447 ret = add_tce_mem_ranges(mem_ranges);
448 if (ret)
449 goto out;
450
451 ret = add_reserved_mem_ranges(mem_ranges);
452 out:
453 if (ret)
454 pr_err("Failed to setup reserved memory ranges\n");
455 return ret;
456 }
457
458 /**
459 * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
460 * regions like opal/rtas, tce-table, initrd,
461 * kernel, htab which should be avoided while
462 * setting up kexec load segments.
463 * @mem_ranges: Range list to add the memory ranges to.
464 *
465 * Returns 0 on success, negative errno on error.
466 */
get_exclude_memory_ranges(struct crash_mem ** mem_ranges)467 int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
468 {
469 int ret;
470
471 ret = add_tce_mem_ranges(mem_ranges);
472 if (ret)
473 goto out;
474
475 ret = add_initrd_mem_range(mem_ranges);
476 if (ret)
477 goto out;
478
479 ret = add_htab_mem_range(mem_ranges);
480 if (ret)
481 goto out;
482
483 ret = add_kernel_mem_range(mem_ranges);
484 if (ret)
485 goto out;
486
487 ret = add_rtas_mem_range(mem_ranges);
488 if (ret)
489 goto out;
490
491 ret = add_opal_mem_range(mem_ranges);
492 if (ret)
493 goto out;
494
495 ret = add_reserved_mem_ranges(mem_ranges);
496 if (ret)
497 goto out;
498
499 /* exclude memory ranges should be sorted for easy lookup */
500 sort_memory_ranges(*mem_ranges, true);
501 out:
502 if (ret)
503 pr_err("Failed to setup exclude memory ranges\n");
504 return ret;
505 }
506
507 #ifdef CONFIG_CRASH_DUMP
508 /**
509 * get_usable_memory_ranges - Get usable memory ranges. This list includes
510 * regions like crashkernel, opal/rtas & tce-table,
511 * that kdump kernel could use.
512 * @mem_ranges: Range list to add the memory ranges to.
513 *
514 * Returns 0 on success, negative errno on error.
515 */
get_usable_memory_ranges(struct crash_mem ** mem_ranges)516 int get_usable_memory_ranges(struct crash_mem **mem_ranges)
517 {
518 int ret;
519
520 /*
521 * Early boot failure observed on guests when low memory (first memory
522 * block?) is not added to usable memory. So, add [0, crashk_res.end]
523 * instead of [crashk_res.start, crashk_res.end] to workaround it.
524 * Also, crashed kernel's memory must be added to reserve map to
525 * avoid kdump kernel from using it.
526 */
527 ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
528 if (ret)
529 goto out;
530
531 ret = add_rtas_mem_range(mem_ranges);
532 if (ret)
533 goto out;
534
535 ret = add_opal_mem_range(mem_ranges);
536 if (ret)
537 goto out;
538
539 ret = add_tce_mem_ranges(mem_ranges);
540 out:
541 if (ret)
542 pr_err("Failed to setup usable memory ranges\n");
543 return ret;
544 }
545 #endif /* CONFIG_CRASH_DUMP */
546 #endif /* CONFIG_KEXEC_FILE */
547
548 #ifdef CONFIG_CRASH_DUMP
549 /**
550 * get_crash_memory_ranges - Get crash memory ranges. This list includes
551 * first/crashing kernel's memory regions that
552 * would be exported via an elfcore.
553 * @mem_ranges: Range list to add the memory ranges to.
554 *
555 * Returns 0 on success, negative errno on error.
556 */
get_crash_memory_ranges(struct crash_mem ** mem_ranges)557 int get_crash_memory_ranges(struct crash_mem **mem_ranges)
558 {
559 phys_addr_t base, end;
560 struct crash_mem *tmem;
561 u64 i;
562 int ret;
563
564 for_each_mem_range(i, &base, &end) {
565 u64 size = end - base;
566
567 /* Skip backup memory region, which needs a separate entry */
568 if (base == BACKUP_SRC_START) {
569 if (size > BACKUP_SRC_SIZE) {
570 base = BACKUP_SRC_END + 1;
571 size -= BACKUP_SRC_SIZE;
572 } else
573 continue;
574 }
575
576 ret = add_mem_range(mem_ranges, base, size);
577 if (ret)
578 goto out;
579
580 /* Try merging adjacent ranges before reallocation attempt */
581 if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
582 sort_memory_ranges(*mem_ranges, true);
583 }
584
585 /* Reallocate memory ranges if there is no space to split ranges */
586 tmem = *mem_ranges;
587 if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
588 tmem = realloc_mem_ranges(mem_ranges);
589 if (!tmem)
590 goto out;
591 }
592
593 /* Exclude crashkernel region */
594 ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
595 if (ret)
596 goto out;
597
598 /*
599 * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
600 * regions are exported to save their context at the time of
601 * crash, they should actually be backed up just like the
602 * first 64K bytes of memory.
603 */
604 ret = add_rtas_mem_range(mem_ranges);
605 if (ret)
606 goto out;
607
608 ret = add_opal_mem_range(mem_ranges);
609 if (ret)
610 goto out;
611
612 /* create a separate program header for the backup region */
613 ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
614 if (ret)
615 goto out;
616
617 sort_memory_ranges(*mem_ranges, false);
618 out:
619 if (ret)
620 pr_err("Failed to setup crash memory ranges\n");
621 return ret;
622 }
623
624 /**
625 * remove_mem_range - Removes the given memory range from the range list.
626 * @mem_ranges: Range list to remove the memory range to.
627 * @base: Base address of the range to remove.
628 * @size: Size of the memory range to remove.
629 *
630 * (Re)allocates memory, if needed.
631 *
632 * Returns 0 on success, negative errno on error.
633 */
remove_mem_range(struct crash_mem ** mem_ranges,u64 base,u64 size)634 int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
635 {
636 u64 end;
637 int ret = 0;
638 unsigned int i;
639 u64 mstart, mend;
640 struct crash_mem *mem_rngs = *mem_ranges;
641
642 if (!size)
643 return 0;
644
645 /*
646 * Memory range are stored as start and end address, use
647 * the same format to do remove operation.
648 */
649 end = base + size - 1;
650
651 for (i = 0; i < mem_rngs->nr_ranges; i++) {
652 mstart = mem_rngs->ranges[i].start;
653 mend = mem_rngs->ranges[i].end;
654
655 /*
656 * Memory range to remove is not part of this range entry
657 * in the memory range list
658 */
659 if (!(base >= mstart && end <= mend))
660 continue;
661
662 /*
663 * Memory range to remove is equivalent to this entry in the
664 * memory range list. Remove the range entry from the list.
665 */
666 if (base == mstart && end == mend) {
667 for (; i < mem_rngs->nr_ranges - 1; i++) {
668 mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start;
669 mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end;
670 }
671 mem_rngs->nr_ranges--;
672 goto out;
673 }
674 /*
675 * Start address of the memory range to remove and the
676 * current memory range entry in the list is same. Just
677 * move the start address of the current memory range
678 * entry in the list to end + 1.
679 */
680 else if (base == mstart) {
681 mem_rngs->ranges[i].start = end + 1;
682 goto out;
683 }
684 /*
685 * End address of the memory range to remove and the
686 * current memory range entry in the list is same.
687 * Just move the end address of the current memory
688 * range entry in the list to base - 1.
689 */
690 else if (end == mend) {
691 mem_rngs->ranges[i].end = base - 1;
692 goto out;
693 }
694 /*
695 * Memory range to remove is not at the edge of current
696 * memory range entry. Split the current memory entry into
697 * two half.
698 */
699 else {
700 mem_rngs->ranges[i].end = base - 1;
701 size = mem_rngs->ranges[i].end - end;
702 ret = add_mem_range(mem_ranges, end + 1, size);
703 }
704 }
705 out:
706 return ret;
707 }
708 #endif /* CONFIG_CRASH_DUMP */
709