1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * powerpc code to implement the kexec_file_load syscall 4 * 5 * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) 6 * Copyright (C) 2004 IBM Corp. 7 * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation 8 * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) 9 * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) 10 * Copyright (C) 2020 IBM Corporation 11 * 12 * Based on kexec-tools' kexec-ppc64.c, fs2dt.c. 13 * Heavily modified for the kernel by 14 * Hari Bathini, IBM Corporation. 15 */ 16 17 #define pr_fmt(fmt) "kexec ranges: " fmt 18 19 #include <linux/sort.h> 20 #include <linux/kexec.h> 21 #include <linux/of.h> 22 #include <linux/slab.h> 23 #include <linux/memblock.h> 24 #include <linux/crash_core.h> 25 #include <asm/sections.h> 26 #include <asm/kexec_ranges.h> 27 #include <asm/crashdump-ppc64.h> 28 29 #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) 30 /** 31 * get_max_nr_ranges - Get the max no. of ranges crash_mem structure 32 * could hold, given the size allocated for it. 33 * @size: Allocation size of crash_mem structure. 34 * 35 * Returns the maximum no. of ranges. 36 */ 37 static inline unsigned int get_max_nr_ranges(size_t size) 38 { 39 return ((size - sizeof(struct crash_mem)) / 40 sizeof(struct range)); 41 } 42 43 /** 44 * get_mem_rngs_size - Get the allocated size of mem_rngs based on 45 * max_nr_ranges and chunk size. 46 * @mem_rngs: Memory ranges. 47 * 48 * Returns the maximum size of @mem_rngs. 49 */ 50 static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs) 51 { 52 size_t size; 53 54 if (!mem_rngs) 55 return 0; 56 57 size = (sizeof(struct crash_mem) + 58 (mem_rngs->max_nr_ranges * sizeof(struct range))); 59 60 /* 61 * Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ. 62 * So, align to get the actual length. 63 */ 64 return ALIGN(size, MEM_RANGE_CHUNK_SZ); 65 } 66 67 /** 68 * __add_mem_range - add a memory range to memory ranges list. 69 * @mem_ranges: Range list to add the memory range to. 70 * @base: Base address of the range to add. 71 * @size: Size of the memory range to add. 72 * 73 * (Re)allocates memory, if needed. 74 * 75 * Returns 0 on success, negative errno on error. 76 */ 77 static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) 78 { 79 struct crash_mem *mem_rngs = *mem_ranges; 80 81 if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) { 82 mem_rngs = realloc_mem_ranges(mem_ranges); 83 if (!mem_rngs) 84 return -ENOMEM; 85 } 86 87 mem_rngs->ranges[mem_rngs->nr_ranges].start = base; 88 mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1; 89 pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n", 90 base, base + size - 1, mem_rngs->nr_ranges); 91 mem_rngs->nr_ranges++; 92 return 0; 93 } 94 95 /** 96 * __merge_memory_ranges - Merges the given memory ranges list. 97 * @mem_rngs: Range list to merge. 98 * 99 * Assumes a sorted range list. 100 * 101 * Returns nothing. 102 */ 103 static void __merge_memory_ranges(struct crash_mem *mem_rngs) 104 { 105 struct range *ranges; 106 int i, idx; 107 108 if (!mem_rngs) 109 return; 110 111 idx = 0; 112 ranges = &(mem_rngs->ranges[0]); 113 for (i = 1; i < mem_rngs->nr_ranges; i++) { 114 if (ranges[i].start <= (ranges[i-1].end + 1)) 115 ranges[idx].end = ranges[i].end; 116 else { 117 idx++; 118 if (i == idx) 119 continue; 120 121 ranges[idx] = ranges[i]; 122 } 123 } 124 mem_rngs->nr_ranges = idx + 1; 125 } 126 127 /* cmp_func_t callback to sort ranges with sort() */ 128 static int rngcmp(const void *_x, const void *_y) 129 { 130 const struct range *x = _x, *y = _y; 131 132 if (x->start > y->start) 133 return 1; 134 if (x->start < y->start) 135 return -1; 136 return 0; 137 } 138 139 /** 140 * sort_memory_ranges - Sorts the given memory ranges list. 141 * @mem_rngs: Range list to sort. 142 * @merge: If true, merge the list after sorting. 143 * 144 * Returns nothing. 145 */ 146 void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge) 147 { 148 int i; 149 150 if (!mem_rngs) 151 return; 152 153 /* Sort the ranges in-place */ 154 sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges, 155 sizeof(mem_rngs->ranges[0]), rngcmp, NULL); 156 157 if (merge) 158 __merge_memory_ranges(mem_rngs); 159 160 /* For debugging purpose */ 161 pr_debug("Memory ranges:\n"); 162 for (i = 0; i < mem_rngs->nr_ranges; i++) { 163 pr_debug("\t[%03d][%#016llx - %#016llx]\n", i, 164 mem_rngs->ranges[i].start, 165 mem_rngs->ranges[i].end); 166 } 167 } 168 169 /** 170 * realloc_mem_ranges - reallocate mem_ranges with size incremented 171 * by MEM_RANGE_CHUNK_SZ. Frees up the old memory, 172 * if memory allocation fails. 173 * @mem_ranges: Memory ranges to reallocate. 174 * 175 * Returns pointer to reallocated memory on success, NULL otherwise. 176 */ 177 struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges) 178 { 179 struct crash_mem *mem_rngs = *mem_ranges; 180 unsigned int nr_ranges; 181 size_t size; 182 183 size = get_mem_rngs_size(mem_rngs); 184 nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0; 185 186 size += MEM_RANGE_CHUNK_SZ; 187 mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL); 188 if (!mem_rngs) { 189 kfree(*mem_ranges); 190 *mem_ranges = NULL; 191 return NULL; 192 } 193 194 mem_rngs->nr_ranges = nr_ranges; 195 mem_rngs->max_nr_ranges = get_max_nr_ranges(size); 196 *mem_ranges = mem_rngs; 197 198 return mem_rngs; 199 } 200 201 /** 202 * add_mem_range - Updates existing memory range, if there is an overlap. 203 * Else, adds a new memory range. 204 * @mem_ranges: Range list to add the memory range to. 205 * @base: Base address of the range to add. 206 * @size: Size of the memory range to add. 207 * 208 * (Re)allocates memory, if needed. 209 * 210 * Returns 0 on success, negative errno on error. 211 */ 212 int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) 213 { 214 struct crash_mem *mem_rngs = *mem_ranges; 215 u64 mstart, mend, end; 216 unsigned int i; 217 218 if (!size) 219 return 0; 220 221 end = base + size - 1; 222 223 if (!mem_rngs || !(mem_rngs->nr_ranges)) 224 return __add_mem_range(mem_ranges, base, size); 225 226 for (i = 0; i < mem_rngs->nr_ranges; i++) { 227 mstart = mem_rngs->ranges[i].start; 228 mend = mem_rngs->ranges[i].end; 229 if (base < mend && end > mstart) { 230 if (base < mstart) 231 mem_rngs->ranges[i].start = base; 232 if (end > mend) 233 mem_rngs->ranges[i].end = end; 234 return 0; 235 } 236 } 237 238 return __add_mem_range(mem_ranges, base, size); 239 } 240 241 #endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */ 242 243 #ifdef CONFIG_KEXEC_FILE 244 /** 245 * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list. 246 * @mem_ranges: Range list to add the memory range(s) to. 247 * 248 * Returns 0 on success, negative errno on error. 249 */ 250 static int add_tce_mem_ranges(struct crash_mem **mem_ranges) 251 { 252 struct device_node *dn = NULL; 253 int ret = 0; 254 255 for_each_node_by_type(dn, "pci") { 256 u64 base; 257 u32 size; 258 259 ret = of_property_read_u64(dn, "linux,tce-base", &base); 260 ret |= of_property_read_u32(dn, "linux,tce-size", &size); 261 if (ret) { 262 /* 263 * It is ok to have pci nodes without tce. So, ignore 264 * property does not exist error. 265 */ 266 if (ret == -EINVAL) { 267 ret = 0; 268 continue; 269 } 270 break; 271 } 272 273 ret = add_mem_range(mem_ranges, base, size); 274 if (ret) 275 break; 276 } 277 278 of_node_put(dn); 279 return ret; 280 } 281 282 /** 283 * add_initrd_mem_range - Adds initrd range to the given memory ranges list, 284 * if the initrd was retained. 285 * @mem_ranges: Range list to add the memory range to. 286 * 287 * Returns 0 on success, negative errno on error. 288 */ 289 static int add_initrd_mem_range(struct crash_mem **mem_ranges) 290 { 291 u64 base, end; 292 int ret; 293 294 /* This range means something, only if initrd was retained */ 295 if (!strstr(saved_command_line, "retain_initrd")) 296 return 0; 297 298 ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base); 299 ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end); 300 if (!ret) 301 ret = add_mem_range(mem_ranges, base, end - base + 1); 302 303 return ret; 304 } 305 306 /** 307 * add_htab_mem_range - Adds htab range to the given memory ranges list, 308 * if it exists 309 * @mem_ranges: Range list to add the memory range to. 310 * 311 * Returns 0 on success, negative errno on error. 312 */ 313 static int add_htab_mem_range(struct crash_mem **mem_ranges) 314 { 315 316 #ifdef CONFIG_PPC_64S_HASH_MMU 317 if (!htab_address) 318 return 0; 319 320 return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes); 321 #else 322 return 0; 323 #endif 324 } 325 326 /** 327 * add_kernel_mem_range - Adds kernel text region to the given 328 * memory ranges list. 329 * @mem_ranges: Range list to add the memory range to. 330 * 331 * Returns 0 on success, negative errno on error. 332 */ 333 static int add_kernel_mem_range(struct crash_mem **mem_ranges) 334 { 335 return add_mem_range(mem_ranges, 0, __pa(_end)); 336 } 337 #endif /* CONFIG_KEXEC_FILE */ 338 339 #if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP) 340 /** 341 * add_rtas_mem_range - Adds RTAS region to the given memory ranges list. 342 * @mem_ranges: Range list to add the memory range to. 343 * 344 * Returns 0 on success, negative errno on error. 345 */ 346 static int add_rtas_mem_range(struct crash_mem **mem_ranges) 347 { 348 struct device_node *dn; 349 u32 base, size; 350 int ret = 0; 351 352 dn = of_find_node_by_path("/rtas"); 353 if (!dn) 354 return 0; 355 356 ret = of_property_read_u32(dn, "linux,rtas-base", &base); 357 ret |= of_property_read_u32(dn, "rtas-size", &size); 358 if (!ret) 359 ret = add_mem_range(mem_ranges, base, size); 360 361 of_node_put(dn); 362 return ret; 363 } 364 365 /** 366 * add_opal_mem_range - Adds OPAL region to the given memory ranges list. 367 * @mem_ranges: Range list to add the memory range to. 368 * 369 * Returns 0 on success, negative errno on error. 370 */ 371 static int add_opal_mem_range(struct crash_mem **mem_ranges) 372 { 373 struct device_node *dn; 374 u64 base, size; 375 int ret; 376 377 dn = of_find_node_by_path("/ibm,opal"); 378 if (!dn) 379 return 0; 380 381 ret = of_property_read_u64(dn, "opal-base-address", &base); 382 ret |= of_property_read_u64(dn, "opal-runtime-size", &size); 383 if (!ret) 384 ret = add_mem_range(mem_ranges, base, size); 385 386 of_node_put(dn); 387 return ret; 388 } 389 #endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */ 390 391 #ifdef CONFIG_KEXEC_FILE 392 /** 393 * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w 394 * to the given memory ranges list. 395 * @mem_ranges: Range list to add the memory ranges to. 396 * 397 * Returns 0 on success, negative errno on error. 398 */ 399 static int add_reserved_mem_ranges(struct crash_mem **mem_ranges) 400 { 401 int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0; 402 struct device_node *root = of_find_node_by_path("/"); 403 const __be32 *prop; 404 405 prop = of_get_property(root, "reserved-ranges", &len); 406 n_mem_addr_cells = of_n_addr_cells(root); 407 n_mem_size_cells = of_n_size_cells(root); 408 of_node_put(root); 409 if (!prop) 410 return 0; 411 412 cells = n_mem_addr_cells + n_mem_size_cells; 413 414 /* Each reserved range is an (address,size) pair */ 415 for (i = 0; i < (len / (sizeof(u32) * cells)); i++) { 416 u64 base, size; 417 418 base = of_read_number(prop + (i * cells), n_mem_addr_cells); 419 size = of_read_number(prop + (i * cells) + n_mem_addr_cells, 420 n_mem_size_cells); 421 422 ret = add_mem_range(mem_ranges, base, size); 423 if (ret) 424 break; 425 } 426 427 return ret; 428 } 429 430 /** 431 * get_reserved_memory_ranges - Get reserve memory ranges. This list includes 432 * memory regions that should be added to the 433 * memory reserve map to ensure the region is 434 * protected from any mischief. 435 * @mem_ranges: Range list to add the memory ranges to. 436 * 437 * Returns 0 on success, negative errno on error. 438 */ 439 int get_reserved_memory_ranges(struct crash_mem **mem_ranges) 440 { 441 int ret; 442 443 ret = add_rtas_mem_range(mem_ranges); 444 if (ret) 445 goto out; 446 447 ret = add_tce_mem_ranges(mem_ranges); 448 if (ret) 449 goto out; 450 451 ret = add_reserved_mem_ranges(mem_ranges); 452 out: 453 if (ret) 454 pr_err("Failed to setup reserved memory ranges\n"); 455 return ret; 456 } 457 458 /** 459 * get_exclude_memory_ranges - Get exclude memory ranges. This list includes 460 * regions like opal/rtas, tce-table, initrd, 461 * kernel, htab which should be avoided while 462 * setting up kexec load segments. 463 * @mem_ranges: Range list to add the memory ranges to. 464 * 465 * Returns 0 on success, negative errno on error. 466 */ 467 int get_exclude_memory_ranges(struct crash_mem **mem_ranges) 468 { 469 int ret; 470 471 ret = add_tce_mem_ranges(mem_ranges); 472 if (ret) 473 goto out; 474 475 ret = add_initrd_mem_range(mem_ranges); 476 if (ret) 477 goto out; 478 479 ret = add_htab_mem_range(mem_ranges); 480 if (ret) 481 goto out; 482 483 ret = add_kernel_mem_range(mem_ranges); 484 if (ret) 485 goto out; 486 487 ret = add_rtas_mem_range(mem_ranges); 488 if (ret) 489 goto out; 490 491 ret = add_opal_mem_range(mem_ranges); 492 if (ret) 493 goto out; 494 495 ret = add_reserved_mem_ranges(mem_ranges); 496 if (ret) 497 goto out; 498 499 /* exclude memory ranges should be sorted for easy lookup */ 500 sort_memory_ranges(*mem_ranges, true); 501 out: 502 if (ret) 503 pr_err("Failed to setup exclude memory ranges\n"); 504 return ret; 505 } 506 507 #ifdef CONFIG_CRASH_DUMP 508 /** 509 * get_usable_memory_ranges - Get usable memory ranges. This list includes 510 * regions like crashkernel, opal/rtas & tce-table, 511 * that kdump kernel could use. 512 * @mem_ranges: Range list to add the memory ranges to. 513 * 514 * Returns 0 on success, negative errno on error. 515 */ 516 int get_usable_memory_ranges(struct crash_mem **mem_ranges) 517 { 518 int ret; 519 520 /* 521 * Early boot failure observed on guests when low memory (first memory 522 * block?) is not added to usable memory. So, add [0, crashk_res.end] 523 * instead of [crashk_res.start, crashk_res.end] to workaround it. 524 * Also, crashed kernel's memory must be added to reserve map to 525 * avoid kdump kernel from using it. 526 */ 527 ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); 528 if (ret) 529 goto out; 530 531 ret = add_rtas_mem_range(mem_ranges); 532 if (ret) 533 goto out; 534 535 ret = add_opal_mem_range(mem_ranges); 536 if (ret) 537 goto out; 538 539 ret = add_tce_mem_ranges(mem_ranges); 540 out: 541 if (ret) 542 pr_err("Failed to setup usable memory ranges\n"); 543 return ret; 544 } 545 #endif /* CONFIG_CRASH_DUMP */ 546 #endif /* CONFIG_KEXEC_FILE */ 547 548 #ifdef CONFIG_CRASH_DUMP 549 /** 550 * get_crash_memory_ranges - Get crash memory ranges. This list includes 551 * first/crashing kernel's memory regions that 552 * would be exported via an elfcore. 553 * @mem_ranges: Range list to add the memory ranges to. 554 * 555 * Returns 0 on success, negative errno on error. 556 */ 557 int get_crash_memory_ranges(struct crash_mem **mem_ranges) 558 { 559 phys_addr_t base, end; 560 struct crash_mem *tmem; 561 u64 i; 562 int ret; 563 564 for_each_mem_range(i, &base, &end) { 565 u64 size = end - base; 566 567 /* Skip backup memory region, which needs a separate entry */ 568 if (base == BACKUP_SRC_START) { 569 if (size > BACKUP_SRC_SIZE) { 570 base = BACKUP_SRC_END + 1; 571 size -= BACKUP_SRC_SIZE; 572 } else 573 continue; 574 } 575 576 ret = add_mem_range(mem_ranges, base, size); 577 if (ret) 578 goto out; 579 580 /* Try merging adjacent ranges before reallocation attempt */ 581 if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) 582 sort_memory_ranges(*mem_ranges, true); 583 } 584 585 /* Reallocate memory ranges if there is no space to split ranges */ 586 tmem = *mem_ranges; 587 if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { 588 tmem = realloc_mem_ranges(mem_ranges); 589 if (!tmem) 590 goto out; 591 } 592 593 /* Exclude crashkernel region */ 594 ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); 595 if (ret) 596 goto out; 597 598 /* 599 * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL 600 * regions are exported to save their context at the time of 601 * crash, they should actually be backed up just like the 602 * first 64K bytes of memory. 603 */ 604 ret = add_rtas_mem_range(mem_ranges); 605 if (ret) 606 goto out; 607 608 ret = add_opal_mem_range(mem_ranges); 609 if (ret) 610 goto out; 611 612 /* create a separate program header for the backup region */ 613 ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); 614 if (ret) 615 goto out; 616 617 sort_memory_ranges(*mem_ranges, false); 618 out: 619 if (ret) 620 pr_err("Failed to setup crash memory ranges\n"); 621 return ret; 622 } 623 624 /** 625 * remove_mem_range - Removes the given memory range from the range list. 626 * @mem_ranges: Range list to remove the memory range to. 627 * @base: Base address of the range to remove. 628 * @size: Size of the memory range to remove. 629 * 630 * (Re)allocates memory, if needed. 631 * 632 * Returns 0 on success, negative errno on error. 633 */ 634 int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size) 635 { 636 u64 end; 637 int ret = 0; 638 unsigned int i; 639 u64 mstart, mend; 640 struct crash_mem *mem_rngs = *mem_ranges; 641 642 if (!size) 643 return 0; 644 645 /* 646 * Memory range are stored as start and end address, use 647 * the same format to do remove operation. 648 */ 649 end = base + size - 1; 650 651 for (i = 0; i < mem_rngs->nr_ranges; i++) { 652 mstart = mem_rngs->ranges[i].start; 653 mend = mem_rngs->ranges[i].end; 654 655 /* 656 * Memory range to remove is not part of this range entry 657 * in the memory range list 658 */ 659 if (!(base >= mstart && end <= mend)) 660 continue; 661 662 /* 663 * Memory range to remove is equivalent to this entry in the 664 * memory range list. Remove the range entry from the list. 665 */ 666 if (base == mstart && end == mend) { 667 for (; i < mem_rngs->nr_ranges - 1; i++) { 668 mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start; 669 mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end; 670 } 671 mem_rngs->nr_ranges--; 672 goto out; 673 } 674 /* 675 * Start address of the memory range to remove and the 676 * current memory range entry in the list is same. Just 677 * move the start address of the current memory range 678 * entry in the list to end + 1. 679 */ 680 else if (base == mstart) { 681 mem_rngs->ranges[i].start = end + 1; 682 goto out; 683 } 684 /* 685 * End address of the memory range to remove and the 686 * current memory range entry in the list is same. 687 * Just move the end address of the current memory 688 * range entry in the list to base - 1. 689 */ 690 else if (end == mend) { 691 mem_rngs->ranges[i].end = base - 1; 692 goto out; 693 } 694 /* 695 * Memory range to remove is not at the edge of current 696 * memory range entry. Split the current memory entry into 697 * two half. 698 */ 699 else { 700 mem_rngs->ranges[i].end = base - 1; 701 size = mem_rngs->ranges[i].end - end; 702 ret = add_mem_range(mem_ranges, end + 1, size); 703 } 704 } 705 out: 706 return ret; 707 } 708 #endif /* CONFIG_CRASH_DUMP */ 709