1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * ppc64 code to implement the kexec_file_load syscall 4 * 5 * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) 6 * Copyright (C) 2004 IBM Corp. 7 * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation 8 * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) 9 * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) 10 * Copyright (C) 2020 IBM Corporation 11 * 12 * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c. 13 * Heavily modified for the kernel by 14 * Hari Bathini, IBM Corporation. 15 */ 16 17 #include <linux/kexec.h> 18 #include <linux/of_fdt.h> 19 #include <linux/libfdt.h> 20 #include <linux/of.h> 21 #include <linux/memblock.h> 22 #include <linux/slab.h> 23 #include <linux/vmalloc.h> 24 #include <asm/setup.h> 25 #include <asm/drmem.h> 26 #include <asm/firmware.h> 27 #include <asm/kexec_ranges.h> 28 #include <asm/crashdump-ppc64.h> 29 #include <asm/mmzone.h> 30 #include <asm/iommu.h> 31 #include <asm/prom.h> 32 #include <asm/plpks.h> 33 34 struct umem_info { 35 __be64 *buf; /* data buffer for usable-memory property */ 36 u32 size; /* size allocated for the data buffer */ 37 u32 max_entries; /* maximum no. of entries */ 38 u32 idx; /* index of current entry */ 39 40 /* usable memory ranges to look up */ 41 unsigned int nr_ranges; 42 const struct range *ranges; 43 }; 44 45 const struct kexec_file_ops * const kexec_file_loaders[] = { 46 &kexec_elf64_ops, 47 NULL 48 }; 49 50 /** 51 * get_exclude_memory_ranges - Get exclude memory ranges. This list includes 52 * regions like opal/rtas, tce-table, initrd, 53 * kernel, htab which should be avoided while 54 * setting up kexec load segments. 55 * @mem_ranges: Range list to add the memory ranges to. 56 * 57 * Returns 0 on success, negative errno on error. 58 */ 59 static int get_exclude_memory_ranges(struct crash_mem **mem_ranges) 60 { 61 int ret; 62 63 ret = add_tce_mem_ranges(mem_ranges); 64 if (ret) 65 goto out; 66 67 ret = add_initrd_mem_range(mem_ranges); 68 if (ret) 69 goto out; 70 71 ret = add_htab_mem_range(mem_ranges); 72 if (ret) 73 goto out; 74 75 ret = add_kernel_mem_range(mem_ranges); 76 if (ret) 77 goto out; 78 79 ret = add_rtas_mem_range(mem_ranges); 80 if (ret) 81 goto out; 82 83 ret = add_opal_mem_range(mem_ranges); 84 if (ret) 85 goto out; 86 87 ret = add_reserved_mem_ranges(mem_ranges); 88 if (ret) 89 goto out; 90 91 /* exclude memory ranges should be sorted for easy lookup */ 92 sort_memory_ranges(*mem_ranges, true); 93 out: 94 if (ret) 95 pr_err("Failed to setup exclude memory ranges\n"); 96 return ret; 97 } 98 99 /** 100 * get_reserved_memory_ranges - Get reserve memory ranges. This list includes 101 * memory regions that should be added to the 102 * memory reserve map to ensure the region is 103 * protected from any mischief. 104 * @mem_ranges: Range list to add the memory ranges to. 105 * 106 * Returns 0 on success, negative errno on error. 107 */ 108 static int get_reserved_memory_ranges(struct crash_mem **mem_ranges) 109 { 110 int ret; 111 112 ret = add_rtas_mem_range(mem_ranges); 113 if (ret) 114 goto out; 115 116 ret = add_tce_mem_ranges(mem_ranges); 117 if (ret) 118 goto out; 119 120 ret = add_reserved_mem_ranges(mem_ranges); 121 out: 122 if (ret) 123 pr_err("Failed to setup reserved memory ranges\n"); 124 return ret; 125 } 126 127 /** 128 * __locate_mem_hole_top_down - Looks top down for a large enough memory hole 129 * in the memory regions between buf_min & buf_max 130 * for the buffer. If found, sets kbuf->mem. 131 * @kbuf: Buffer contents and memory parameters. 132 * @buf_min: Minimum address for the buffer. 133 * @buf_max: Maximum address for the buffer. 134 * 135 * Returns 0 on success, negative errno on error. 136 */ 137 static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, 138 u64 buf_min, u64 buf_max) 139 { 140 int ret = -EADDRNOTAVAIL; 141 phys_addr_t start, end; 142 u64 i; 143 144 for_each_mem_range_rev(i, &start, &end) { 145 /* 146 * memblock uses [start, end) convention while it is 147 * [start, end] here. Fix the off-by-one to have the 148 * same convention. 149 */ 150 end -= 1; 151 152 if (start > buf_max) 153 continue; 154 155 /* Memory hole not found */ 156 if (end < buf_min) 157 break; 158 159 /* Adjust memory region based on the given range */ 160 if (start < buf_min) 161 start = buf_min; 162 if (end > buf_max) 163 end = buf_max; 164 165 start = ALIGN(start, kbuf->buf_align); 166 if (start < end && (end - start + 1) >= kbuf->memsz) { 167 /* Suitable memory range found. Set kbuf->mem */ 168 kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1, 169 kbuf->buf_align); 170 ret = 0; 171 break; 172 } 173 } 174 175 return ret; 176 } 177 178 /** 179 * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a 180 * suitable buffer with top down approach. 181 * @kbuf: Buffer contents and memory parameters. 182 * @buf_min: Minimum address for the buffer. 183 * @buf_max: Maximum address for the buffer. 184 * @emem: Exclude memory ranges. 185 * 186 * Returns 0 on success, negative errno on error. 187 */ 188 static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf, 189 u64 buf_min, u64 buf_max, 190 const struct crash_mem *emem) 191 { 192 int i, ret = 0, err = -EADDRNOTAVAIL; 193 u64 start, end, tmin, tmax; 194 195 tmax = buf_max; 196 for (i = (emem->nr_ranges - 1); i >= 0; i--) { 197 start = emem->ranges[i].start; 198 end = emem->ranges[i].end; 199 200 if (start > tmax) 201 continue; 202 203 if (end < tmax) { 204 tmin = (end < buf_min ? buf_min : end + 1); 205 ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); 206 if (!ret) 207 return 0; 208 } 209 210 tmax = start - 1; 211 212 if (tmax < buf_min) { 213 ret = err; 214 break; 215 } 216 ret = 0; 217 } 218 219 if (!ret) { 220 tmin = buf_min; 221 ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); 222 } 223 return ret; 224 } 225 226 /** 227 * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole 228 * in the memory regions between buf_min & buf_max 229 * for the buffer. If found, sets kbuf->mem. 230 * @kbuf: Buffer contents and memory parameters. 231 * @buf_min: Minimum address for the buffer. 232 * @buf_max: Maximum address for the buffer. 233 * 234 * Returns 0 on success, negative errno on error. 235 */ 236 static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf, 237 u64 buf_min, u64 buf_max) 238 { 239 int ret = -EADDRNOTAVAIL; 240 phys_addr_t start, end; 241 u64 i; 242 243 for_each_mem_range(i, &start, &end) { 244 /* 245 * memblock uses [start, end) convention while it is 246 * [start, end] here. Fix the off-by-one to have the 247 * same convention. 248 */ 249 end -= 1; 250 251 if (end < buf_min) 252 continue; 253 254 /* Memory hole not found */ 255 if (start > buf_max) 256 break; 257 258 /* Adjust memory region based on the given range */ 259 if (start < buf_min) 260 start = buf_min; 261 if (end > buf_max) 262 end = buf_max; 263 264 start = ALIGN(start, kbuf->buf_align); 265 if (start < end && (end - start + 1) >= kbuf->memsz) { 266 /* Suitable memory range found. Set kbuf->mem */ 267 kbuf->mem = start; 268 ret = 0; 269 break; 270 } 271 } 272 273 return ret; 274 } 275 276 /** 277 * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a 278 * suitable buffer with bottom up approach. 279 * @kbuf: Buffer contents and memory parameters. 280 * @buf_min: Minimum address for the buffer. 281 * @buf_max: Maximum address for the buffer. 282 * @emem: Exclude memory ranges. 283 * 284 * Returns 0 on success, negative errno on error. 285 */ 286 static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, 287 u64 buf_min, u64 buf_max, 288 const struct crash_mem *emem) 289 { 290 int i, ret = 0, err = -EADDRNOTAVAIL; 291 u64 start, end, tmin, tmax; 292 293 tmin = buf_min; 294 for (i = 0; i < emem->nr_ranges; i++) { 295 start = emem->ranges[i].start; 296 end = emem->ranges[i].end; 297 298 if (end < tmin) 299 continue; 300 301 if (start > tmin) { 302 tmax = (start > buf_max ? buf_max : start - 1); 303 ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); 304 if (!ret) 305 return 0; 306 } 307 308 tmin = end + 1; 309 310 if (tmin > buf_max) { 311 ret = err; 312 break; 313 } 314 ret = 0; 315 } 316 317 if (!ret) { 318 tmax = buf_max; 319 ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); 320 } 321 return ret; 322 } 323 324 #ifdef CONFIG_CRASH_DUMP 325 /** 326 * get_usable_memory_ranges - Get usable memory ranges. This list includes 327 * regions like crashkernel, opal/rtas & tce-table, 328 * that kdump kernel could use. 329 * @mem_ranges: Range list to add the memory ranges to. 330 * 331 * Returns 0 on success, negative errno on error. 332 */ 333 static int get_usable_memory_ranges(struct crash_mem **mem_ranges) 334 { 335 int ret; 336 337 /* 338 * Early boot failure observed on guests when low memory (first memory 339 * block?) is not added to usable memory. So, add [0, crashk_res.end] 340 * instead of [crashk_res.start, crashk_res.end] to workaround it. 341 * Also, crashed kernel's memory must be added to reserve map to 342 * avoid kdump kernel from using it. 343 */ 344 ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); 345 if (ret) 346 goto out; 347 348 ret = add_rtas_mem_range(mem_ranges); 349 if (ret) 350 goto out; 351 352 ret = add_opal_mem_range(mem_ranges); 353 if (ret) 354 goto out; 355 356 ret = add_tce_mem_ranges(mem_ranges); 357 out: 358 if (ret) 359 pr_err("Failed to setup usable memory ranges\n"); 360 return ret; 361 } 362 363 /** 364 * get_crash_memory_ranges - Get crash memory ranges. This list includes 365 * first/crashing kernel's memory regions that 366 * would be exported via an elfcore. 367 * @mem_ranges: Range list to add the memory ranges to. 368 * 369 * Returns 0 on success, negative errno on error. 370 */ 371 static int get_crash_memory_ranges(struct crash_mem **mem_ranges) 372 { 373 phys_addr_t base, end; 374 struct crash_mem *tmem; 375 u64 i; 376 int ret; 377 378 for_each_mem_range(i, &base, &end) { 379 u64 size = end - base; 380 381 /* Skip backup memory region, which needs a separate entry */ 382 if (base == BACKUP_SRC_START) { 383 if (size > BACKUP_SRC_SIZE) { 384 base = BACKUP_SRC_END + 1; 385 size -= BACKUP_SRC_SIZE; 386 } else 387 continue; 388 } 389 390 ret = add_mem_range(mem_ranges, base, size); 391 if (ret) 392 goto out; 393 394 /* Try merging adjacent ranges before reallocation attempt */ 395 if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) 396 sort_memory_ranges(*mem_ranges, true); 397 } 398 399 /* Reallocate memory ranges if there is no space to split ranges */ 400 tmem = *mem_ranges; 401 if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { 402 tmem = realloc_mem_ranges(mem_ranges); 403 if (!tmem) 404 goto out; 405 } 406 407 /* Exclude crashkernel region */ 408 ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); 409 if (ret) 410 goto out; 411 412 /* 413 * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL 414 * regions are exported to save their context at the time of 415 * crash, they should actually be backed up just like the 416 * first 64K bytes of memory. 417 */ 418 ret = add_rtas_mem_range(mem_ranges); 419 if (ret) 420 goto out; 421 422 ret = add_opal_mem_range(mem_ranges); 423 if (ret) 424 goto out; 425 426 /* create a separate program header for the backup region */ 427 ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); 428 if (ret) 429 goto out; 430 431 sort_memory_ranges(*mem_ranges, false); 432 out: 433 if (ret) 434 pr_err("Failed to setup crash memory ranges\n"); 435 return ret; 436 } 437 438 /** 439 * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries 440 * @um_info: Usable memory buffer and ranges info. 441 * @cnt: No. of entries to accommodate. 442 * 443 * Frees up the old buffer if memory reallocation fails. 444 * 445 * Returns buffer on success, NULL on error. 446 */ 447 static __be64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt) 448 { 449 u32 new_size; 450 __be64 *tbuf; 451 452 if ((um_info->idx + cnt) <= um_info->max_entries) 453 return um_info->buf; 454 455 new_size = um_info->size + MEM_RANGE_CHUNK_SZ; 456 tbuf = krealloc(um_info->buf, new_size, GFP_KERNEL); 457 if (tbuf) { 458 um_info->buf = tbuf; 459 um_info->size = new_size; 460 um_info->max_entries = (um_info->size / sizeof(u64)); 461 } 462 463 return tbuf; 464 } 465 466 /** 467 * add_usable_mem - Add the usable memory ranges within the given memory range 468 * to the buffer 469 * @um_info: Usable memory buffer and ranges info. 470 * @base: Base address of memory range to look for. 471 * @end: End address of memory range to look for. 472 * 473 * Returns 0 on success, negative errno on error. 474 */ 475 static int add_usable_mem(struct umem_info *um_info, u64 base, u64 end) 476 { 477 u64 loc_base, loc_end; 478 bool add; 479 int i; 480 481 for (i = 0; i < um_info->nr_ranges; i++) { 482 add = false; 483 loc_base = um_info->ranges[i].start; 484 loc_end = um_info->ranges[i].end; 485 if (loc_base >= base && loc_end <= end) 486 add = true; 487 else if (base < loc_end && end > loc_base) { 488 if (loc_base < base) 489 loc_base = base; 490 if (loc_end > end) 491 loc_end = end; 492 add = true; 493 } 494 495 if (add) { 496 if (!check_realloc_usable_mem(um_info, 2)) 497 return -ENOMEM; 498 499 um_info->buf[um_info->idx++] = cpu_to_be64(loc_base); 500 um_info->buf[um_info->idx++] = 501 cpu_to_be64(loc_end - loc_base + 1); 502 } 503 } 504 505 return 0; 506 } 507 508 /** 509 * kdump_setup_usable_lmb - This is a callback function that gets called by 510 * walk_drmem_lmbs for every LMB to set its 511 * usable memory ranges. 512 * @lmb: LMB info. 513 * @usm: linux,drconf-usable-memory property value. 514 * @data: Pointer to usable memory buffer and ranges info. 515 * 516 * Returns 0 on success, negative errno on error. 517 */ 518 static int kdump_setup_usable_lmb(struct drmem_lmb *lmb, const __be32 **usm, 519 void *data) 520 { 521 struct umem_info *um_info; 522 int tmp_idx, ret; 523 u64 base, end; 524 525 /* 526 * kdump load isn't supported on kernels already booted with 527 * linux,drconf-usable-memory property. 528 */ 529 if (*usm) { 530 pr_err("linux,drconf-usable-memory property already exists!"); 531 return -EINVAL; 532 } 533 534 um_info = data; 535 tmp_idx = um_info->idx; 536 if (!check_realloc_usable_mem(um_info, 1)) 537 return -ENOMEM; 538 539 um_info->idx++; 540 base = lmb->base_addr; 541 end = base + drmem_lmb_size() - 1; 542 ret = add_usable_mem(um_info, base, end); 543 if (!ret) { 544 /* 545 * Update the no. of ranges added. Two entries (base & size) 546 * for every range added. 547 */ 548 um_info->buf[tmp_idx] = 549 cpu_to_be64((um_info->idx - tmp_idx - 1) / 2); 550 } 551 552 return ret; 553 } 554 555 #define NODE_PATH_LEN 256 556 /** 557 * add_usable_mem_property - Add usable memory property for the given 558 * memory node. 559 * @fdt: Flattened device tree for the kdump kernel. 560 * @dn: Memory node. 561 * @um_info: Usable memory buffer and ranges info. 562 * 563 * Returns 0 on success, negative errno on error. 564 */ 565 static int add_usable_mem_property(void *fdt, struct device_node *dn, 566 struct umem_info *um_info) 567 { 568 int n_mem_addr_cells, n_mem_size_cells, node; 569 char path[NODE_PATH_LEN]; 570 int i, len, ranges, ret; 571 const __be32 *prop; 572 u64 base, end; 573 574 of_node_get(dn); 575 576 if (snprintf(path, NODE_PATH_LEN, "%pOF", dn) > (NODE_PATH_LEN - 1)) { 577 pr_err("Buffer (%d) too small for memory node: %pOF\n", 578 NODE_PATH_LEN, dn); 579 return -EOVERFLOW; 580 } 581 kexec_dprintk("Memory node path: %s\n", path); 582 583 /* Now that we know the path, find its offset in kdump kernel's fdt */ 584 node = fdt_path_offset(fdt, path); 585 if (node < 0) { 586 pr_err("Malformed device tree: error reading %s\n", path); 587 ret = -EINVAL; 588 goto out; 589 } 590 591 /* Get the address & size cells */ 592 n_mem_addr_cells = of_n_addr_cells(dn); 593 n_mem_size_cells = of_n_size_cells(dn); 594 kexec_dprintk("address cells: %d, size cells: %d\n", n_mem_addr_cells, 595 n_mem_size_cells); 596 597 um_info->idx = 0; 598 if (!check_realloc_usable_mem(um_info, 2)) { 599 ret = -ENOMEM; 600 goto out; 601 } 602 603 prop = of_get_property(dn, "reg", &len); 604 if (!prop || len <= 0) { 605 ret = 0; 606 goto out; 607 } 608 609 /* 610 * "reg" property represents sequence of (addr,size) tuples 611 * each representing a memory range. 612 */ 613 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 614 615 for (i = 0; i < ranges; i++) { 616 base = of_read_number(prop, n_mem_addr_cells); 617 prop += n_mem_addr_cells; 618 end = base + of_read_number(prop, n_mem_size_cells) - 1; 619 prop += n_mem_size_cells; 620 621 ret = add_usable_mem(um_info, base, end); 622 if (ret) 623 goto out; 624 } 625 626 /* 627 * No kdump kernel usable memory found in this memory node. 628 * Write (0,0) tuple in linux,usable-memory property for 629 * this region to be ignored. 630 */ 631 if (um_info->idx == 0) { 632 um_info->buf[0] = 0; 633 um_info->buf[1] = 0; 634 um_info->idx = 2; 635 } 636 637 ret = fdt_setprop(fdt, node, "linux,usable-memory", um_info->buf, 638 (um_info->idx * sizeof(u64))); 639 640 out: 641 of_node_put(dn); 642 return ret; 643 } 644 645 646 /** 647 * update_usable_mem_fdt - Updates kdump kernel's fdt with linux,usable-memory 648 * and linux,drconf-usable-memory DT properties as 649 * appropriate to restrict its memory usage. 650 * @fdt: Flattened device tree for the kdump kernel. 651 * @usable_mem: Usable memory ranges for kdump kernel. 652 * 653 * Returns 0 on success, negative errno on error. 654 */ 655 static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem) 656 { 657 struct umem_info um_info; 658 struct device_node *dn; 659 int node, ret = 0; 660 661 if (!usable_mem) { 662 pr_err("Usable memory ranges for kdump kernel not found\n"); 663 return -ENOENT; 664 } 665 666 node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory"); 667 if (node == -FDT_ERR_NOTFOUND) 668 kexec_dprintk("No dynamic reconfiguration memory found\n"); 669 else if (node < 0) { 670 pr_err("Malformed device tree: error reading /ibm,dynamic-reconfiguration-memory.\n"); 671 return -EINVAL; 672 } 673 674 um_info.buf = NULL; 675 um_info.size = 0; 676 um_info.max_entries = 0; 677 um_info.idx = 0; 678 /* Memory ranges to look up */ 679 um_info.ranges = &(usable_mem->ranges[0]); 680 um_info.nr_ranges = usable_mem->nr_ranges; 681 682 dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 683 if (dn) { 684 ret = walk_drmem_lmbs(dn, &um_info, kdump_setup_usable_lmb); 685 of_node_put(dn); 686 687 if (ret) { 688 pr_err("Could not setup linux,drconf-usable-memory property for kdump\n"); 689 goto out; 690 } 691 692 ret = fdt_setprop(fdt, node, "linux,drconf-usable-memory", 693 um_info.buf, (um_info.idx * sizeof(u64))); 694 if (ret) { 695 pr_err("Failed to update fdt with linux,drconf-usable-memory property: %s", 696 fdt_strerror(ret)); 697 goto out; 698 } 699 } 700 701 /* 702 * Walk through each memory node and set linux,usable-memory property 703 * for the corresponding node in kdump kernel's fdt. 704 */ 705 for_each_node_by_type(dn, "memory") { 706 ret = add_usable_mem_property(fdt, dn, &um_info); 707 if (ret) { 708 pr_err("Failed to set linux,usable-memory property for %s node", 709 dn->full_name); 710 of_node_put(dn); 711 goto out; 712 } 713 } 714 715 out: 716 kfree(um_info.buf); 717 return ret; 718 } 719 720 /** 721 * load_backup_segment - Locate a memory hole to place the backup region. 722 * @image: Kexec image. 723 * @kbuf: Buffer contents and memory parameters. 724 * 725 * Returns 0 on success, negative errno on error. 726 */ 727 static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf) 728 { 729 void *buf; 730 int ret; 731 732 /* 733 * Setup a source buffer for backup segment. 734 * 735 * A source buffer has no meaning for backup region as data will 736 * be copied from backup source, after crash, in the purgatory. 737 * But as load segment code doesn't recognize such segments, 738 * setup a dummy source buffer to keep it happy for now. 739 */ 740 buf = vzalloc(BACKUP_SRC_SIZE); 741 if (!buf) 742 return -ENOMEM; 743 744 kbuf->buffer = buf; 745 kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; 746 kbuf->bufsz = kbuf->memsz = BACKUP_SRC_SIZE; 747 kbuf->top_down = false; 748 749 ret = kexec_add_buffer(kbuf); 750 if (ret) { 751 vfree(buf); 752 return ret; 753 } 754 755 image->arch.backup_buf = buf; 756 image->arch.backup_start = kbuf->mem; 757 return 0; 758 } 759 760 /** 761 * update_backup_region_phdr - Update backup region's offset for the core to 762 * export the region appropriately. 763 * @image: Kexec image. 764 * @ehdr: ELF core header. 765 * 766 * Assumes an exclusive program header is setup for the backup region 767 * in the ELF headers 768 * 769 * Returns nothing. 770 */ 771 static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr) 772 { 773 Elf64_Phdr *phdr; 774 unsigned int i; 775 776 phdr = (Elf64_Phdr *)(ehdr + 1); 777 for (i = 0; i < ehdr->e_phnum; i++) { 778 if (phdr->p_paddr == BACKUP_SRC_START) { 779 phdr->p_offset = image->arch.backup_start; 780 kexec_dprintk("Backup region offset updated to 0x%lx\n", 781 image->arch.backup_start); 782 return; 783 } 784 } 785 } 786 787 /** 788 * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr 789 * segment needed to load kdump kernel. 790 * @image: Kexec image. 791 * @kbuf: Buffer contents and memory parameters. 792 * 793 * Returns 0 on success, negative errno on error. 794 */ 795 static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) 796 { 797 struct crash_mem *cmem = NULL; 798 unsigned long headers_sz; 799 void *headers = NULL; 800 int ret; 801 802 ret = get_crash_memory_ranges(&cmem); 803 if (ret) 804 goto out; 805 806 /* Setup elfcorehdr segment */ 807 ret = crash_prepare_elf64_headers(cmem, false, &headers, &headers_sz); 808 if (ret) { 809 pr_err("Failed to prepare elf headers for the core\n"); 810 goto out; 811 } 812 813 /* Fix the offset for backup region in the ELF header */ 814 update_backup_region_phdr(image, headers); 815 816 kbuf->buffer = headers; 817 kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; 818 kbuf->bufsz = kbuf->memsz = headers_sz; 819 kbuf->top_down = false; 820 821 ret = kexec_add_buffer(kbuf); 822 if (ret) { 823 vfree(headers); 824 goto out; 825 } 826 827 image->elf_load_addr = kbuf->mem; 828 image->elf_headers_sz = headers_sz; 829 image->elf_headers = headers; 830 out: 831 kfree(cmem); 832 return ret; 833 } 834 835 /** 836 * load_crashdump_segments_ppc64 - Initialize the additional segements needed 837 * to load kdump kernel. 838 * @image: Kexec image. 839 * @kbuf: Buffer contents and memory parameters. 840 * 841 * Returns 0 on success, negative errno on error. 842 */ 843 int load_crashdump_segments_ppc64(struct kimage *image, 844 struct kexec_buf *kbuf) 845 { 846 int ret; 847 848 /* Load backup segment - first 64K bytes of the crashing kernel */ 849 ret = load_backup_segment(image, kbuf); 850 if (ret) { 851 pr_err("Failed to load backup segment\n"); 852 return ret; 853 } 854 kexec_dprintk("Loaded the backup region at 0x%lx\n", kbuf->mem); 855 856 /* Load elfcorehdr segment - to export crashing kernel's vmcore */ 857 ret = load_elfcorehdr_segment(image, kbuf); 858 if (ret) { 859 pr_err("Failed to load elfcorehdr segment\n"); 860 return ret; 861 } 862 kexec_dprintk("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n", 863 image->elf_load_addr, kbuf->bufsz, kbuf->memsz); 864 865 return 0; 866 } 867 #endif 868 869 /** 870 * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global 871 * variables and call setup_purgatory() to initialize 872 * common global variable. 873 * @image: kexec image. 874 * @slave_code: Slave code for the purgatory. 875 * @fdt: Flattened device tree for the next kernel. 876 * @kernel_load_addr: Address where the kernel is loaded. 877 * @fdt_load_addr: Address where the flattened device tree is loaded. 878 * 879 * Returns 0 on success, negative errno on error. 880 */ 881 int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, 882 const void *fdt, unsigned long kernel_load_addr, 883 unsigned long fdt_load_addr) 884 { 885 struct device_node *dn = NULL; 886 int ret; 887 888 ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, 889 fdt_load_addr); 890 if (ret) 891 goto out; 892 893 if (image->type == KEXEC_TYPE_CRASH) { 894 u32 my_run_at_load = 1; 895 896 /* 897 * Tell relocatable kernel to run at load address 898 * via the word meant for that at 0x5c. 899 */ 900 ret = kexec_purgatory_get_set_symbol(image, "run_at_load", 901 &my_run_at_load, 902 sizeof(my_run_at_load), 903 false); 904 if (ret) 905 goto out; 906 } 907 908 /* Tell purgatory where to look for backup region */ 909 ret = kexec_purgatory_get_set_symbol(image, "backup_start", 910 &image->arch.backup_start, 911 sizeof(image->arch.backup_start), 912 false); 913 if (ret) 914 goto out; 915 916 /* Setup OPAL base & entry values */ 917 dn = of_find_node_by_path("/ibm,opal"); 918 if (dn) { 919 u64 val; 920 921 of_property_read_u64(dn, "opal-base-address", &val); 922 ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val, 923 sizeof(val), false); 924 if (ret) 925 goto out; 926 927 of_property_read_u64(dn, "opal-entry-address", &val); 928 ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val, 929 sizeof(val), false); 930 } 931 out: 932 if (ret) 933 pr_err("Failed to setup purgatory symbols"); 934 of_node_put(dn); 935 return ret; 936 } 937 938 /** 939 * cpu_node_size - Compute the size of a CPU node in the FDT. 940 * This should be done only once and the value is stored in 941 * a static variable. 942 * Returns the max size of a CPU node in the FDT. 943 */ 944 static unsigned int cpu_node_size(void) 945 { 946 static unsigned int size; 947 struct device_node *dn; 948 struct property *pp; 949 950 /* 951 * Don't compute it twice, we are assuming that the per CPU node size 952 * doesn't change during the system's life. 953 */ 954 if (size) 955 return size; 956 957 dn = of_find_node_by_type(NULL, "cpu"); 958 if (WARN_ON_ONCE(!dn)) { 959 // Unlikely to happen 960 return 0; 961 } 962 963 /* 964 * We compute the sub node size for a CPU node, assuming it 965 * will be the same for all. 966 */ 967 size += strlen(dn->name) + 5; 968 for_each_property_of_node(dn, pp) { 969 size += strlen(pp->name); 970 size += pp->length; 971 } 972 973 of_node_put(dn); 974 return size; 975 } 976 977 static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image) 978 { 979 unsigned int cpu_nodes, extra_size = 0; 980 struct device_node *dn; 981 u64 usm_entries; 982 983 if (!IS_ENABLED(CONFIG_CRASH_DUMP) || image->type != KEXEC_TYPE_CRASH) 984 return 0; 985 986 /* 987 * For kdump kernel, account for linux,usable-memory and 988 * linux,drconf-usable-memory properties. Get an approximate on the 989 * number of usable memory entries and use for FDT size estimation. 990 */ 991 if (drmem_lmb_size()) { 992 usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) + 993 (2 * (resource_size(&crashk_res) / drmem_lmb_size()))); 994 extra_size += (unsigned int)(usm_entries * sizeof(u64)); 995 } 996 997 /* 998 * Get the number of CPU nodes in the current DT. This allows to 999 * reserve places for CPU nodes added since the boot time. 1000 */ 1001 cpu_nodes = 0; 1002 for_each_node_by_type(dn, "cpu") { 1003 cpu_nodes++; 1004 } 1005 1006 if (cpu_nodes > boot_cpu_node_count) 1007 extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size(); 1008 1009 return extra_size; 1010 } 1011 1012 /** 1013 * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to 1014 * setup FDT for kexec/kdump kernel. 1015 * @image: kexec image being loaded. 1016 * 1017 * Returns the estimated extra size needed for kexec/kdump kernel FDT. 1018 */ 1019 unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) 1020 { 1021 unsigned int extra_size = 0; 1022 1023 // Budget some space for the password blob. There's already extra space 1024 // for the key name 1025 if (plpks_is_available()) 1026 extra_size += (unsigned int)plpks_get_passwordlen(); 1027 1028 return extra_size + kdump_extra_fdt_size_ppc64(image); 1029 } 1030 1031 /** 1032 * add_node_props - Reads node properties from device node structure and add 1033 * them to fdt. 1034 * @fdt: Flattened device tree of the kernel 1035 * @node_offset: offset of the node to add a property at 1036 * @dn: device node pointer 1037 * 1038 * Returns 0 on success, negative errno on error. 1039 */ 1040 static int add_node_props(void *fdt, int node_offset, const struct device_node *dn) 1041 { 1042 int ret = 0; 1043 struct property *pp; 1044 1045 if (!dn) 1046 return -EINVAL; 1047 1048 for_each_property_of_node(dn, pp) { 1049 ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length); 1050 if (ret < 0) { 1051 pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret)); 1052 return ret; 1053 } 1054 } 1055 return ret; 1056 } 1057 1058 /** 1059 * update_cpus_node - Update cpus node of flattened device tree using of_root 1060 * device node. 1061 * @fdt: Flattened device tree of the kernel. 1062 * 1063 * Returns 0 on success, negative errno on error. 1064 */ 1065 static int update_cpus_node(void *fdt) 1066 { 1067 struct device_node *cpus_node, *dn; 1068 int cpus_offset, cpus_subnode_offset, ret = 0; 1069 1070 cpus_offset = fdt_path_offset(fdt, "/cpus"); 1071 if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) { 1072 pr_err("Malformed device tree: error reading /cpus node: %s\n", 1073 fdt_strerror(cpus_offset)); 1074 return cpus_offset; 1075 } 1076 1077 if (cpus_offset > 0) { 1078 ret = fdt_del_node(fdt, cpus_offset); 1079 if (ret < 0) { 1080 pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret)); 1081 return -EINVAL; 1082 } 1083 } 1084 1085 /* Add cpus node to fdt */ 1086 cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus"); 1087 if (cpus_offset < 0) { 1088 pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset)); 1089 return -EINVAL; 1090 } 1091 1092 /* Add cpus node properties */ 1093 cpus_node = of_find_node_by_path("/cpus"); 1094 ret = add_node_props(fdt, cpus_offset, cpus_node); 1095 of_node_put(cpus_node); 1096 if (ret < 0) 1097 return ret; 1098 1099 /* Loop through all subnodes of cpus and add them to fdt */ 1100 for_each_node_by_type(dn, "cpu") { 1101 cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name); 1102 if (cpus_subnode_offset < 0) { 1103 pr_err("Unable to add %s subnode: %s\n", dn->full_name, 1104 fdt_strerror(cpus_subnode_offset)); 1105 ret = cpus_subnode_offset; 1106 goto out; 1107 } 1108 1109 ret = add_node_props(fdt, cpus_subnode_offset, dn); 1110 if (ret < 0) 1111 goto out; 1112 } 1113 out: 1114 of_node_put(dn); 1115 return ret; 1116 } 1117 1118 static int copy_property(void *fdt, int node_offset, const struct device_node *dn, 1119 const char *propname) 1120 { 1121 const void *prop, *fdtprop; 1122 int len = 0, fdtlen = 0; 1123 1124 prop = of_get_property(dn, propname, &len); 1125 fdtprop = fdt_getprop(fdt, node_offset, propname, &fdtlen); 1126 1127 if (fdtprop && !prop) 1128 return fdt_delprop(fdt, node_offset, propname); 1129 else if (prop) 1130 return fdt_setprop(fdt, node_offset, propname, prop, len); 1131 else 1132 return -FDT_ERR_NOTFOUND; 1133 } 1134 1135 static int update_pci_dma_nodes(void *fdt, const char *dmapropname) 1136 { 1137 struct device_node *dn; 1138 int pci_offset, root_offset, ret = 0; 1139 1140 if (!firmware_has_feature(FW_FEATURE_LPAR)) 1141 return 0; 1142 1143 root_offset = fdt_path_offset(fdt, "/"); 1144 for_each_node_with_property(dn, dmapropname) { 1145 pci_offset = fdt_subnode_offset(fdt, root_offset, of_node_full_name(dn)); 1146 if (pci_offset < 0) 1147 continue; 1148 1149 ret = copy_property(fdt, pci_offset, dn, "ibm,dma-window"); 1150 if (ret < 0) { 1151 of_node_put(dn); 1152 break; 1153 } 1154 ret = copy_property(fdt, pci_offset, dn, dmapropname); 1155 if (ret < 0) { 1156 of_node_put(dn); 1157 break; 1158 } 1159 } 1160 1161 return ret; 1162 } 1163 1164 /** 1165 * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel 1166 * being loaded. 1167 * @image: kexec image being loaded. 1168 * @fdt: Flattened device tree for the next kernel. 1169 * @initrd_load_addr: Address where the next initrd will be loaded. 1170 * @initrd_len: Size of the next initrd, or 0 if there will be none. 1171 * @cmdline: Command line for the next kernel, or NULL if there will 1172 * be none. 1173 * 1174 * Returns 0 on success, negative errno on error. 1175 */ 1176 int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, 1177 unsigned long initrd_load_addr, 1178 unsigned long initrd_len, const char *cmdline) 1179 { 1180 struct crash_mem *umem = NULL, *rmem = NULL; 1181 int i, nr_ranges, ret; 1182 1183 #ifdef CONFIG_CRASH_DUMP 1184 /* 1185 * Restrict memory usage for kdump kernel by setting up 1186 * usable memory ranges and memory reserve map. 1187 */ 1188 if (image->type == KEXEC_TYPE_CRASH) { 1189 ret = get_usable_memory_ranges(&umem); 1190 if (ret) 1191 goto out; 1192 1193 ret = update_usable_mem_fdt(fdt, umem); 1194 if (ret) { 1195 pr_err("Error setting up usable-memory property for kdump kernel\n"); 1196 goto out; 1197 } 1198 1199 /* 1200 * Ensure we don't touch crashed kernel's memory except the 1201 * first 64K of RAM, which will be backed up. 1202 */ 1203 ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_END + 1, 1204 crashk_res.start - BACKUP_SRC_SIZE); 1205 if (ret) { 1206 pr_err("Error reserving crash memory: %s\n", 1207 fdt_strerror(ret)); 1208 goto out; 1209 } 1210 1211 /* Ensure backup region is not used by kdump/capture kernel */ 1212 ret = fdt_add_mem_rsv(fdt, image->arch.backup_start, 1213 BACKUP_SRC_SIZE); 1214 if (ret) { 1215 pr_err("Error reserving memory for backup: %s\n", 1216 fdt_strerror(ret)); 1217 goto out; 1218 } 1219 } 1220 #endif 1221 1222 /* Update cpus nodes information to account hotplug CPUs. */ 1223 ret = update_cpus_node(fdt); 1224 if (ret < 0) 1225 goto out; 1226 1227 ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME); 1228 if (ret < 0) 1229 goto out; 1230 1231 ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME); 1232 if (ret < 0) 1233 goto out; 1234 1235 /* Update memory reserve map */ 1236 ret = get_reserved_memory_ranges(&rmem); 1237 if (ret) 1238 goto out; 1239 1240 nr_ranges = rmem ? rmem->nr_ranges : 0; 1241 for (i = 0; i < nr_ranges; i++) { 1242 u64 base, size; 1243 1244 base = rmem->ranges[i].start; 1245 size = rmem->ranges[i].end - base + 1; 1246 ret = fdt_add_mem_rsv(fdt, base, size); 1247 if (ret) { 1248 pr_err("Error updating memory reserve map: %s\n", 1249 fdt_strerror(ret)); 1250 goto out; 1251 } 1252 } 1253 1254 // If we have PLPKS active, we need to provide the password to the new kernel 1255 if (plpks_is_available()) 1256 ret = plpks_populate_fdt(fdt); 1257 1258 out: 1259 kfree(rmem); 1260 kfree(umem); 1261 return ret; 1262 } 1263 1264 /** 1265 * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal, 1266 * tce-table, reserved-ranges & such (exclude 1267 * memory ranges) as they can't be used for kexec 1268 * segment buffer. Sets kbuf->mem when a suitable 1269 * memory hole is found. 1270 * @kbuf: Buffer contents and memory parameters. 1271 * 1272 * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align. 1273 * 1274 * Returns 0 on success, negative errno on error. 1275 */ 1276 int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) 1277 { 1278 struct crash_mem **emem; 1279 u64 buf_min, buf_max; 1280 int ret; 1281 1282 /* Look up the exclude ranges list while locating the memory hole */ 1283 emem = &(kbuf->image->arch.exclude_ranges); 1284 if (!(*emem) || ((*emem)->nr_ranges == 0)) { 1285 pr_warn("No exclude range list. Using the default locate mem hole method\n"); 1286 return kexec_locate_mem_hole(kbuf); 1287 } 1288 1289 buf_min = kbuf->buf_min; 1290 buf_max = kbuf->buf_max; 1291 /* Segments for kdump kernel should be within crashkernel region */ 1292 if (IS_ENABLED(CONFIG_CRASH_DUMP) && kbuf->image->type == KEXEC_TYPE_CRASH) { 1293 buf_min = (buf_min < crashk_res.start ? 1294 crashk_res.start : buf_min); 1295 buf_max = (buf_max > crashk_res.end ? 1296 crashk_res.end : buf_max); 1297 } 1298 1299 if (buf_min > buf_max) { 1300 pr_err("Invalid buffer min and/or max values\n"); 1301 return -EINVAL; 1302 } 1303 1304 if (kbuf->top_down) 1305 ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max, 1306 *emem); 1307 else 1308 ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max, 1309 *emem); 1310 1311 /* Add the buffer allocated to the exclude list for the next lookup */ 1312 if (!ret) { 1313 add_mem_range(emem, kbuf->mem, kbuf->memsz); 1314 sort_memory_ranges(*emem, true); 1315 } else { 1316 pr_err("Failed to locate memory buffer of size %lu\n", 1317 kbuf->memsz); 1318 } 1319 return ret; 1320 } 1321 1322 /** 1323 * arch_kexec_kernel_image_probe - Does additional handling needed to setup 1324 * kexec segments. 1325 * @image: kexec image being loaded. 1326 * @buf: Buffer pointing to elf data. 1327 * @buf_len: Length of the buffer. 1328 * 1329 * Returns 0 on success, negative errno on error. 1330 */ 1331 int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, 1332 unsigned long buf_len) 1333 { 1334 int ret; 1335 1336 /* Get exclude memory ranges needed for setting up kexec segments */ 1337 ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges)); 1338 if (ret) { 1339 pr_err("Failed to setup exclude memory ranges for buffer lookup\n"); 1340 return ret; 1341 } 1342 1343 return kexec_image_probe_default(image, buf, buf_len); 1344 } 1345 1346 /** 1347 * arch_kimage_file_post_load_cleanup - Frees up all the allocations done 1348 * while loading the image. 1349 * @image: kexec image being loaded. 1350 * 1351 * Returns 0 on success, negative errno on error. 1352 */ 1353 int arch_kimage_file_post_load_cleanup(struct kimage *image) 1354 { 1355 kfree(image->arch.exclude_ranges); 1356 image->arch.exclude_ranges = NULL; 1357 1358 vfree(image->arch.backup_buf); 1359 image->arch.backup_buf = NULL; 1360 1361 vfree(image->elf_headers); 1362 image->elf_headers = NULL; 1363 image->elf_headers_sz = 0; 1364 1365 kvfree(image->arch.fdt); 1366 image->arch.fdt = NULL; 1367 1368 return kexec_image_post_load_cleanup_default(image); 1369 } 1370