1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * ppc64 code to implement the kexec_file_load syscall 4 * 5 * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) 6 * Copyright (C) 2004 IBM Corp. 7 * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation 8 * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) 9 * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) 10 * Copyright (C) 2020 IBM Corporation 11 * 12 * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c. 13 * Heavily modified for the kernel by 14 * Hari Bathini, IBM Corporation. 15 */ 16 17 #include <linux/kexec.h> 18 #include <linux/of_fdt.h> 19 #include <linux/libfdt.h> 20 #include <linux/of.h> 21 #include <linux/memblock.h> 22 #include <linux/slab.h> 23 #include <linux/vmalloc.h> 24 #include <asm/setup.h> 25 #include <asm/drmem.h> 26 #include <asm/firmware.h> 27 #include <asm/kexec_ranges.h> 28 #include <asm/crashdump-ppc64.h> 29 #include <asm/mmzone.h> 30 #include <asm/iommu.h> 31 #include <asm/prom.h> 32 #include <asm/plpks.h> 33 34 struct umem_info { 35 __be64 *buf; /* data buffer for usable-memory property */ 36 u32 size; /* size allocated for the data buffer */ 37 u32 max_entries; /* maximum no. of entries */ 38 u32 idx; /* index of current entry */ 39 40 /* usable memory ranges to look up */ 41 unsigned int nr_ranges; 42 const struct range *ranges; 43 }; 44 45 const struct kexec_file_ops * const kexec_file_loaders[] = { 46 &kexec_elf64_ops, 47 NULL 48 }; 49 50 /** 51 * get_exclude_memory_ranges - Get exclude memory ranges. This list includes 52 * regions like opal/rtas, tce-table, initrd, 53 * kernel, htab which should be avoided while 54 * setting up kexec load segments. 55 * @mem_ranges: Range list to add the memory ranges to. 56 * 57 * Returns 0 on success, negative errno on error. 58 */ 59 static int get_exclude_memory_ranges(struct crash_mem **mem_ranges) 60 { 61 int ret; 62 63 ret = add_tce_mem_ranges(mem_ranges); 64 if (ret) 65 goto out; 66 67 ret = add_initrd_mem_range(mem_ranges); 68 if (ret) 69 goto out; 70 71 ret = add_htab_mem_range(mem_ranges); 72 if (ret) 73 goto out; 74 75 ret = add_kernel_mem_range(mem_ranges); 76 if (ret) 77 goto out; 78 79 ret = add_rtas_mem_range(mem_ranges); 80 if (ret) 81 goto out; 82 83 ret = add_opal_mem_range(mem_ranges); 84 if (ret) 85 goto out; 86 87 ret = add_reserved_mem_ranges(mem_ranges); 88 if (ret) 89 goto out; 90 91 /* exclude memory ranges should be sorted for easy lookup */ 92 sort_memory_ranges(*mem_ranges, true); 93 out: 94 if (ret) 95 pr_err("Failed to setup exclude memory ranges\n"); 96 return ret; 97 } 98 99 /** 100 * get_usable_memory_ranges - Get usable memory ranges. This list includes 101 * regions like crashkernel, opal/rtas & tce-table, 102 * that kdump kernel could use. 103 * @mem_ranges: Range list to add the memory ranges to. 104 * 105 * Returns 0 on success, negative errno on error. 106 */ 107 static int get_usable_memory_ranges(struct crash_mem **mem_ranges) 108 { 109 int ret; 110 111 /* 112 * Early boot failure observed on guests when low memory (first memory 113 * block?) is not added to usable memory. So, add [0, crashk_res.end] 114 * instead of [crashk_res.start, crashk_res.end] to workaround it. 115 * Also, crashed kernel's memory must be added to reserve map to 116 * avoid kdump kernel from using it. 117 */ 118 ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1); 119 if (ret) 120 goto out; 121 122 ret = add_rtas_mem_range(mem_ranges); 123 if (ret) 124 goto out; 125 126 ret = add_opal_mem_range(mem_ranges); 127 if (ret) 128 goto out; 129 130 ret = add_tce_mem_ranges(mem_ranges); 131 out: 132 if (ret) 133 pr_err("Failed to setup usable memory ranges\n"); 134 return ret; 135 } 136 137 /** 138 * get_crash_memory_ranges - Get crash memory ranges. This list includes 139 * first/crashing kernel's memory regions that 140 * would be exported via an elfcore. 141 * @mem_ranges: Range list to add the memory ranges to. 142 * 143 * Returns 0 on success, negative errno on error. 144 */ 145 static int get_crash_memory_ranges(struct crash_mem **mem_ranges) 146 { 147 phys_addr_t base, end; 148 struct crash_mem *tmem; 149 u64 i; 150 int ret; 151 152 for_each_mem_range(i, &base, &end) { 153 u64 size = end - base; 154 155 /* Skip backup memory region, which needs a separate entry */ 156 if (base == BACKUP_SRC_START) { 157 if (size > BACKUP_SRC_SIZE) { 158 base = BACKUP_SRC_END + 1; 159 size -= BACKUP_SRC_SIZE; 160 } else 161 continue; 162 } 163 164 ret = add_mem_range(mem_ranges, base, size); 165 if (ret) 166 goto out; 167 168 /* Try merging adjacent ranges before reallocation attempt */ 169 if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges) 170 sort_memory_ranges(*mem_ranges, true); 171 } 172 173 /* Reallocate memory ranges if there is no space to split ranges */ 174 tmem = *mem_ranges; 175 if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) { 176 tmem = realloc_mem_ranges(mem_ranges); 177 if (!tmem) 178 goto out; 179 } 180 181 /* Exclude crashkernel region */ 182 ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end); 183 if (ret) 184 goto out; 185 186 /* 187 * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL 188 * regions are exported to save their context at the time of 189 * crash, they should actually be backed up just like the 190 * first 64K bytes of memory. 191 */ 192 ret = add_rtas_mem_range(mem_ranges); 193 if (ret) 194 goto out; 195 196 ret = add_opal_mem_range(mem_ranges); 197 if (ret) 198 goto out; 199 200 /* create a separate program header for the backup region */ 201 ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE); 202 if (ret) 203 goto out; 204 205 sort_memory_ranges(*mem_ranges, false); 206 out: 207 if (ret) 208 pr_err("Failed to setup crash memory ranges\n"); 209 return ret; 210 } 211 212 /** 213 * get_reserved_memory_ranges - Get reserve memory ranges. This list includes 214 * memory regions that should be added to the 215 * memory reserve map to ensure the region is 216 * protected from any mischief. 217 * @mem_ranges: Range list to add the memory ranges to. 218 * 219 * Returns 0 on success, negative errno on error. 220 */ 221 static int get_reserved_memory_ranges(struct crash_mem **mem_ranges) 222 { 223 int ret; 224 225 ret = add_rtas_mem_range(mem_ranges); 226 if (ret) 227 goto out; 228 229 ret = add_tce_mem_ranges(mem_ranges); 230 if (ret) 231 goto out; 232 233 ret = add_reserved_mem_ranges(mem_ranges); 234 out: 235 if (ret) 236 pr_err("Failed to setup reserved memory ranges\n"); 237 return ret; 238 } 239 240 /** 241 * __locate_mem_hole_top_down - Looks top down for a large enough memory hole 242 * in the memory regions between buf_min & buf_max 243 * for the buffer. If found, sets kbuf->mem. 244 * @kbuf: Buffer contents and memory parameters. 245 * @buf_min: Minimum address for the buffer. 246 * @buf_max: Maximum address for the buffer. 247 * 248 * Returns 0 on success, negative errno on error. 249 */ 250 static int __locate_mem_hole_top_down(struct kexec_buf *kbuf, 251 u64 buf_min, u64 buf_max) 252 { 253 int ret = -EADDRNOTAVAIL; 254 phys_addr_t start, end; 255 u64 i; 256 257 for_each_mem_range_rev(i, &start, &end) { 258 /* 259 * memblock uses [start, end) convention while it is 260 * [start, end] here. Fix the off-by-one to have the 261 * same convention. 262 */ 263 end -= 1; 264 265 if (start > buf_max) 266 continue; 267 268 /* Memory hole not found */ 269 if (end < buf_min) 270 break; 271 272 /* Adjust memory region based on the given range */ 273 if (start < buf_min) 274 start = buf_min; 275 if (end > buf_max) 276 end = buf_max; 277 278 start = ALIGN(start, kbuf->buf_align); 279 if (start < end && (end - start + 1) >= kbuf->memsz) { 280 /* Suitable memory range found. Set kbuf->mem */ 281 kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1, 282 kbuf->buf_align); 283 ret = 0; 284 break; 285 } 286 } 287 288 return ret; 289 } 290 291 /** 292 * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a 293 * suitable buffer with top down approach. 294 * @kbuf: Buffer contents and memory parameters. 295 * @buf_min: Minimum address for the buffer. 296 * @buf_max: Maximum address for the buffer. 297 * @emem: Exclude memory ranges. 298 * 299 * Returns 0 on success, negative errno on error. 300 */ 301 static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf, 302 u64 buf_min, u64 buf_max, 303 const struct crash_mem *emem) 304 { 305 int i, ret = 0, err = -EADDRNOTAVAIL; 306 u64 start, end, tmin, tmax; 307 308 tmax = buf_max; 309 for (i = (emem->nr_ranges - 1); i >= 0; i--) { 310 start = emem->ranges[i].start; 311 end = emem->ranges[i].end; 312 313 if (start > tmax) 314 continue; 315 316 if (end < tmax) { 317 tmin = (end < buf_min ? buf_min : end + 1); 318 ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); 319 if (!ret) 320 return 0; 321 } 322 323 tmax = start - 1; 324 325 if (tmax < buf_min) { 326 ret = err; 327 break; 328 } 329 ret = 0; 330 } 331 332 if (!ret) { 333 tmin = buf_min; 334 ret = __locate_mem_hole_top_down(kbuf, tmin, tmax); 335 } 336 return ret; 337 } 338 339 /** 340 * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole 341 * in the memory regions between buf_min & buf_max 342 * for the buffer. If found, sets kbuf->mem. 343 * @kbuf: Buffer contents and memory parameters. 344 * @buf_min: Minimum address for the buffer. 345 * @buf_max: Maximum address for the buffer. 346 * 347 * Returns 0 on success, negative errno on error. 348 */ 349 static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf, 350 u64 buf_min, u64 buf_max) 351 { 352 int ret = -EADDRNOTAVAIL; 353 phys_addr_t start, end; 354 u64 i; 355 356 for_each_mem_range(i, &start, &end) { 357 /* 358 * memblock uses [start, end) convention while it is 359 * [start, end] here. Fix the off-by-one to have the 360 * same convention. 361 */ 362 end -= 1; 363 364 if (end < buf_min) 365 continue; 366 367 /* Memory hole not found */ 368 if (start > buf_max) 369 break; 370 371 /* Adjust memory region based on the given range */ 372 if (start < buf_min) 373 start = buf_min; 374 if (end > buf_max) 375 end = buf_max; 376 377 start = ALIGN(start, kbuf->buf_align); 378 if (start < end && (end - start + 1) >= kbuf->memsz) { 379 /* Suitable memory range found. Set kbuf->mem */ 380 kbuf->mem = start; 381 ret = 0; 382 break; 383 } 384 } 385 386 return ret; 387 } 388 389 /** 390 * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a 391 * suitable buffer with bottom up approach. 392 * @kbuf: Buffer contents and memory parameters. 393 * @buf_min: Minimum address for the buffer. 394 * @buf_max: Maximum address for the buffer. 395 * @emem: Exclude memory ranges. 396 * 397 * Returns 0 on success, negative errno on error. 398 */ 399 static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf, 400 u64 buf_min, u64 buf_max, 401 const struct crash_mem *emem) 402 { 403 int i, ret = 0, err = -EADDRNOTAVAIL; 404 u64 start, end, tmin, tmax; 405 406 tmin = buf_min; 407 for (i = 0; i < emem->nr_ranges; i++) { 408 start = emem->ranges[i].start; 409 end = emem->ranges[i].end; 410 411 if (end < tmin) 412 continue; 413 414 if (start > tmin) { 415 tmax = (start > buf_max ? buf_max : start - 1); 416 ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); 417 if (!ret) 418 return 0; 419 } 420 421 tmin = end + 1; 422 423 if (tmin > buf_max) { 424 ret = err; 425 break; 426 } 427 ret = 0; 428 } 429 430 if (!ret) { 431 tmax = buf_max; 432 ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax); 433 } 434 return ret; 435 } 436 437 /** 438 * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries 439 * @um_info: Usable memory buffer and ranges info. 440 * @cnt: No. of entries to accommodate. 441 * 442 * Frees up the old buffer if memory reallocation fails. 443 * 444 * Returns buffer on success, NULL on error. 445 */ 446 static __be64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt) 447 { 448 u32 new_size; 449 __be64 *tbuf; 450 451 if ((um_info->idx + cnt) <= um_info->max_entries) 452 return um_info->buf; 453 454 new_size = um_info->size + MEM_RANGE_CHUNK_SZ; 455 tbuf = krealloc(um_info->buf, new_size, GFP_KERNEL); 456 if (tbuf) { 457 um_info->buf = tbuf; 458 um_info->size = new_size; 459 um_info->max_entries = (um_info->size / sizeof(u64)); 460 } 461 462 return tbuf; 463 } 464 465 /** 466 * add_usable_mem - Add the usable memory ranges within the given memory range 467 * to the buffer 468 * @um_info: Usable memory buffer and ranges info. 469 * @base: Base address of memory range to look for. 470 * @end: End address of memory range to look for. 471 * 472 * Returns 0 on success, negative errno on error. 473 */ 474 static int add_usable_mem(struct umem_info *um_info, u64 base, u64 end) 475 { 476 u64 loc_base, loc_end; 477 bool add; 478 int i; 479 480 for (i = 0; i < um_info->nr_ranges; i++) { 481 add = false; 482 loc_base = um_info->ranges[i].start; 483 loc_end = um_info->ranges[i].end; 484 if (loc_base >= base && loc_end <= end) 485 add = true; 486 else if (base < loc_end && end > loc_base) { 487 if (loc_base < base) 488 loc_base = base; 489 if (loc_end > end) 490 loc_end = end; 491 add = true; 492 } 493 494 if (add) { 495 if (!check_realloc_usable_mem(um_info, 2)) 496 return -ENOMEM; 497 498 um_info->buf[um_info->idx++] = cpu_to_be64(loc_base); 499 um_info->buf[um_info->idx++] = 500 cpu_to_be64(loc_end - loc_base + 1); 501 } 502 } 503 504 return 0; 505 } 506 507 /** 508 * kdump_setup_usable_lmb - This is a callback function that gets called by 509 * walk_drmem_lmbs for every LMB to set its 510 * usable memory ranges. 511 * @lmb: LMB info. 512 * @usm: linux,drconf-usable-memory property value. 513 * @data: Pointer to usable memory buffer and ranges info. 514 * 515 * Returns 0 on success, negative errno on error. 516 */ 517 static int kdump_setup_usable_lmb(struct drmem_lmb *lmb, const __be32 **usm, 518 void *data) 519 { 520 struct umem_info *um_info; 521 int tmp_idx, ret; 522 u64 base, end; 523 524 /* 525 * kdump load isn't supported on kernels already booted with 526 * linux,drconf-usable-memory property. 527 */ 528 if (*usm) { 529 pr_err("linux,drconf-usable-memory property already exists!"); 530 return -EINVAL; 531 } 532 533 um_info = data; 534 tmp_idx = um_info->idx; 535 if (!check_realloc_usable_mem(um_info, 1)) 536 return -ENOMEM; 537 538 um_info->idx++; 539 base = lmb->base_addr; 540 end = base + drmem_lmb_size() - 1; 541 ret = add_usable_mem(um_info, base, end); 542 if (!ret) { 543 /* 544 * Update the no. of ranges added. Two entries (base & size) 545 * for every range added. 546 */ 547 um_info->buf[tmp_idx] = 548 cpu_to_be64((um_info->idx - tmp_idx - 1) / 2); 549 } 550 551 return ret; 552 } 553 554 #define NODE_PATH_LEN 256 555 /** 556 * add_usable_mem_property - Add usable memory property for the given 557 * memory node. 558 * @fdt: Flattened device tree for the kdump kernel. 559 * @dn: Memory node. 560 * @um_info: Usable memory buffer and ranges info. 561 * 562 * Returns 0 on success, negative errno on error. 563 */ 564 static int add_usable_mem_property(void *fdt, struct device_node *dn, 565 struct umem_info *um_info) 566 { 567 int n_mem_addr_cells, n_mem_size_cells, node; 568 char path[NODE_PATH_LEN]; 569 int i, len, ranges, ret; 570 const __be32 *prop; 571 u64 base, end; 572 573 of_node_get(dn); 574 575 if (snprintf(path, NODE_PATH_LEN, "%pOF", dn) > (NODE_PATH_LEN - 1)) { 576 pr_err("Buffer (%d) too small for memory node: %pOF\n", 577 NODE_PATH_LEN, dn); 578 return -EOVERFLOW; 579 } 580 kexec_dprintk("Memory node path: %s\n", path); 581 582 /* Now that we know the path, find its offset in kdump kernel's fdt */ 583 node = fdt_path_offset(fdt, path); 584 if (node < 0) { 585 pr_err("Malformed device tree: error reading %s\n", path); 586 ret = -EINVAL; 587 goto out; 588 } 589 590 /* Get the address & size cells */ 591 n_mem_addr_cells = of_n_addr_cells(dn); 592 n_mem_size_cells = of_n_size_cells(dn); 593 kexec_dprintk("address cells: %d, size cells: %d\n", n_mem_addr_cells, 594 n_mem_size_cells); 595 596 um_info->idx = 0; 597 if (!check_realloc_usable_mem(um_info, 2)) { 598 ret = -ENOMEM; 599 goto out; 600 } 601 602 prop = of_get_property(dn, "reg", &len); 603 if (!prop || len <= 0) { 604 ret = 0; 605 goto out; 606 } 607 608 /* 609 * "reg" property represents sequence of (addr,size) tuples 610 * each representing a memory range. 611 */ 612 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells); 613 614 for (i = 0; i < ranges; i++) { 615 base = of_read_number(prop, n_mem_addr_cells); 616 prop += n_mem_addr_cells; 617 end = base + of_read_number(prop, n_mem_size_cells) - 1; 618 prop += n_mem_size_cells; 619 620 ret = add_usable_mem(um_info, base, end); 621 if (ret) 622 goto out; 623 } 624 625 /* 626 * No kdump kernel usable memory found in this memory node. 627 * Write (0,0) tuple in linux,usable-memory property for 628 * this region to be ignored. 629 */ 630 if (um_info->idx == 0) { 631 um_info->buf[0] = 0; 632 um_info->buf[1] = 0; 633 um_info->idx = 2; 634 } 635 636 ret = fdt_setprop(fdt, node, "linux,usable-memory", um_info->buf, 637 (um_info->idx * sizeof(u64))); 638 639 out: 640 of_node_put(dn); 641 return ret; 642 } 643 644 645 /** 646 * update_usable_mem_fdt - Updates kdump kernel's fdt with linux,usable-memory 647 * and linux,drconf-usable-memory DT properties as 648 * appropriate to restrict its memory usage. 649 * @fdt: Flattened device tree for the kdump kernel. 650 * @usable_mem: Usable memory ranges for kdump kernel. 651 * 652 * Returns 0 on success, negative errno on error. 653 */ 654 static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem) 655 { 656 struct umem_info um_info; 657 struct device_node *dn; 658 int node, ret = 0; 659 660 if (!usable_mem) { 661 pr_err("Usable memory ranges for kdump kernel not found\n"); 662 return -ENOENT; 663 } 664 665 node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory"); 666 if (node == -FDT_ERR_NOTFOUND) 667 kexec_dprintk("No dynamic reconfiguration memory found\n"); 668 else if (node < 0) { 669 pr_err("Malformed device tree: error reading /ibm,dynamic-reconfiguration-memory.\n"); 670 return -EINVAL; 671 } 672 673 um_info.buf = NULL; 674 um_info.size = 0; 675 um_info.max_entries = 0; 676 um_info.idx = 0; 677 /* Memory ranges to look up */ 678 um_info.ranges = &(usable_mem->ranges[0]); 679 um_info.nr_ranges = usable_mem->nr_ranges; 680 681 dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); 682 if (dn) { 683 ret = walk_drmem_lmbs(dn, &um_info, kdump_setup_usable_lmb); 684 of_node_put(dn); 685 686 if (ret) { 687 pr_err("Could not setup linux,drconf-usable-memory property for kdump\n"); 688 goto out; 689 } 690 691 ret = fdt_setprop(fdt, node, "linux,drconf-usable-memory", 692 um_info.buf, (um_info.idx * sizeof(u64))); 693 if (ret) { 694 pr_err("Failed to update fdt with linux,drconf-usable-memory property: %s", 695 fdt_strerror(ret)); 696 goto out; 697 } 698 } 699 700 /* 701 * Walk through each memory node and set linux,usable-memory property 702 * for the corresponding node in kdump kernel's fdt. 703 */ 704 for_each_node_by_type(dn, "memory") { 705 ret = add_usable_mem_property(fdt, dn, &um_info); 706 if (ret) { 707 pr_err("Failed to set linux,usable-memory property for %s node", 708 dn->full_name); 709 of_node_put(dn); 710 goto out; 711 } 712 } 713 714 out: 715 kfree(um_info.buf); 716 return ret; 717 } 718 719 /** 720 * load_backup_segment - Locate a memory hole to place the backup region. 721 * @image: Kexec image. 722 * @kbuf: Buffer contents and memory parameters. 723 * 724 * Returns 0 on success, negative errno on error. 725 */ 726 static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf) 727 { 728 void *buf; 729 int ret; 730 731 /* 732 * Setup a source buffer for backup segment. 733 * 734 * A source buffer has no meaning for backup region as data will 735 * be copied from backup source, after crash, in the purgatory. 736 * But as load segment code doesn't recognize such segments, 737 * setup a dummy source buffer to keep it happy for now. 738 */ 739 buf = vzalloc(BACKUP_SRC_SIZE); 740 if (!buf) 741 return -ENOMEM; 742 743 kbuf->buffer = buf; 744 kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; 745 kbuf->bufsz = kbuf->memsz = BACKUP_SRC_SIZE; 746 kbuf->top_down = false; 747 748 ret = kexec_add_buffer(kbuf); 749 if (ret) { 750 vfree(buf); 751 return ret; 752 } 753 754 image->arch.backup_buf = buf; 755 image->arch.backup_start = kbuf->mem; 756 return 0; 757 } 758 759 /** 760 * update_backup_region_phdr - Update backup region's offset for the core to 761 * export the region appropriately. 762 * @image: Kexec image. 763 * @ehdr: ELF core header. 764 * 765 * Assumes an exclusive program header is setup for the backup region 766 * in the ELF headers 767 * 768 * Returns nothing. 769 */ 770 static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr) 771 { 772 Elf64_Phdr *phdr; 773 unsigned int i; 774 775 phdr = (Elf64_Phdr *)(ehdr + 1); 776 for (i = 0; i < ehdr->e_phnum; i++) { 777 if (phdr->p_paddr == BACKUP_SRC_START) { 778 phdr->p_offset = image->arch.backup_start; 779 kexec_dprintk("Backup region offset updated to 0x%lx\n", 780 image->arch.backup_start); 781 return; 782 } 783 } 784 } 785 786 /** 787 * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr 788 * segment needed to load kdump kernel. 789 * @image: Kexec image. 790 * @kbuf: Buffer contents and memory parameters. 791 * 792 * Returns 0 on success, negative errno on error. 793 */ 794 static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf) 795 { 796 struct crash_mem *cmem = NULL; 797 unsigned long headers_sz; 798 void *headers = NULL; 799 int ret; 800 801 ret = get_crash_memory_ranges(&cmem); 802 if (ret) 803 goto out; 804 805 /* Setup elfcorehdr segment */ 806 ret = crash_prepare_elf64_headers(cmem, false, &headers, &headers_sz); 807 if (ret) { 808 pr_err("Failed to prepare elf headers for the core\n"); 809 goto out; 810 } 811 812 /* Fix the offset for backup region in the ELF header */ 813 update_backup_region_phdr(image, headers); 814 815 kbuf->buffer = headers; 816 kbuf->mem = KEXEC_BUF_MEM_UNKNOWN; 817 kbuf->bufsz = kbuf->memsz = headers_sz; 818 kbuf->top_down = false; 819 820 ret = kexec_add_buffer(kbuf); 821 if (ret) { 822 vfree(headers); 823 goto out; 824 } 825 826 image->elf_load_addr = kbuf->mem; 827 image->elf_headers_sz = headers_sz; 828 image->elf_headers = headers; 829 out: 830 kfree(cmem); 831 return ret; 832 } 833 834 /** 835 * load_crashdump_segments_ppc64 - Initialize the additional segements needed 836 * to load kdump kernel. 837 * @image: Kexec image. 838 * @kbuf: Buffer contents and memory parameters. 839 * 840 * Returns 0 on success, negative errno on error. 841 */ 842 int load_crashdump_segments_ppc64(struct kimage *image, 843 struct kexec_buf *kbuf) 844 { 845 int ret; 846 847 /* Load backup segment - first 64K bytes of the crashing kernel */ 848 ret = load_backup_segment(image, kbuf); 849 if (ret) { 850 pr_err("Failed to load backup segment\n"); 851 return ret; 852 } 853 kexec_dprintk("Loaded the backup region at 0x%lx\n", kbuf->mem); 854 855 /* Load elfcorehdr segment - to export crashing kernel's vmcore */ 856 ret = load_elfcorehdr_segment(image, kbuf); 857 if (ret) { 858 pr_err("Failed to load elfcorehdr segment\n"); 859 return ret; 860 } 861 kexec_dprintk("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n", 862 image->elf_load_addr, kbuf->bufsz, kbuf->memsz); 863 864 return 0; 865 } 866 867 /** 868 * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global 869 * variables and call setup_purgatory() to initialize 870 * common global variable. 871 * @image: kexec image. 872 * @slave_code: Slave code for the purgatory. 873 * @fdt: Flattened device tree for the next kernel. 874 * @kernel_load_addr: Address where the kernel is loaded. 875 * @fdt_load_addr: Address where the flattened device tree is loaded. 876 * 877 * Returns 0 on success, negative errno on error. 878 */ 879 int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, 880 const void *fdt, unsigned long kernel_load_addr, 881 unsigned long fdt_load_addr) 882 { 883 struct device_node *dn = NULL; 884 int ret; 885 886 ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, 887 fdt_load_addr); 888 if (ret) 889 goto out; 890 891 if (image->type == KEXEC_TYPE_CRASH) { 892 u32 my_run_at_load = 1; 893 894 /* 895 * Tell relocatable kernel to run at load address 896 * via the word meant for that at 0x5c. 897 */ 898 ret = kexec_purgatory_get_set_symbol(image, "run_at_load", 899 &my_run_at_load, 900 sizeof(my_run_at_load), 901 false); 902 if (ret) 903 goto out; 904 } 905 906 /* Tell purgatory where to look for backup region */ 907 ret = kexec_purgatory_get_set_symbol(image, "backup_start", 908 &image->arch.backup_start, 909 sizeof(image->arch.backup_start), 910 false); 911 if (ret) 912 goto out; 913 914 /* Setup OPAL base & entry values */ 915 dn = of_find_node_by_path("/ibm,opal"); 916 if (dn) { 917 u64 val; 918 919 of_property_read_u64(dn, "opal-base-address", &val); 920 ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val, 921 sizeof(val), false); 922 if (ret) 923 goto out; 924 925 of_property_read_u64(dn, "opal-entry-address", &val); 926 ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val, 927 sizeof(val), false); 928 } 929 out: 930 if (ret) 931 pr_err("Failed to setup purgatory symbols"); 932 of_node_put(dn); 933 return ret; 934 } 935 936 /** 937 * cpu_node_size - Compute the size of a CPU node in the FDT. 938 * This should be done only once and the value is stored in 939 * a static variable. 940 * Returns the max size of a CPU node in the FDT. 941 */ 942 static unsigned int cpu_node_size(void) 943 { 944 static unsigned int size; 945 struct device_node *dn; 946 struct property *pp; 947 948 /* 949 * Don't compute it twice, we are assuming that the per CPU node size 950 * doesn't change during the system's life. 951 */ 952 if (size) 953 return size; 954 955 dn = of_find_node_by_type(NULL, "cpu"); 956 if (WARN_ON_ONCE(!dn)) { 957 // Unlikely to happen 958 return 0; 959 } 960 961 /* 962 * We compute the sub node size for a CPU node, assuming it 963 * will be the same for all. 964 */ 965 size += strlen(dn->name) + 5; 966 for_each_property_of_node(dn, pp) { 967 size += strlen(pp->name); 968 size += pp->length; 969 } 970 971 of_node_put(dn); 972 return size; 973 } 974 975 /** 976 * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to 977 * setup FDT for kexec/kdump kernel. 978 * @image: kexec image being loaded. 979 * 980 * Returns the estimated extra size needed for kexec/kdump kernel FDT. 981 */ 982 unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image) 983 { 984 unsigned int cpu_nodes, extra_size = 0; 985 struct device_node *dn; 986 u64 usm_entries; 987 988 // Budget some space for the password blob. There's already extra space 989 // for the key name 990 if (plpks_is_available()) 991 extra_size += (unsigned int)plpks_get_passwordlen(); 992 993 if (image->type != KEXEC_TYPE_CRASH) 994 return extra_size; 995 996 /* 997 * For kdump kernel, account for linux,usable-memory and 998 * linux,drconf-usable-memory properties. Get an approximate on the 999 * number of usable memory entries and use for FDT size estimation. 1000 */ 1001 if (drmem_lmb_size()) { 1002 usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) + 1003 (2 * (resource_size(&crashk_res) / drmem_lmb_size()))); 1004 extra_size += (unsigned int)(usm_entries * sizeof(u64)); 1005 } 1006 1007 /* 1008 * Get the number of CPU nodes in the current DT. This allows to 1009 * reserve places for CPU nodes added since the boot time. 1010 */ 1011 cpu_nodes = 0; 1012 for_each_node_by_type(dn, "cpu") { 1013 cpu_nodes++; 1014 } 1015 1016 if (cpu_nodes > boot_cpu_node_count) 1017 extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size(); 1018 1019 return extra_size; 1020 } 1021 1022 /** 1023 * add_node_props - Reads node properties from device node structure and add 1024 * them to fdt. 1025 * @fdt: Flattened device tree of the kernel 1026 * @node_offset: offset of the node to add a property at 1027 * @dn: device node pointer 1028 * 1029 * Returns 0 on success, negative errno on error. 1030 */ 1031 static int add_node_props(void *fdt, int node_offset, const struct device_node *dn) 1032 { 1033 int ret = 0; 1034 struct property *pp; 1035 1036 if (!dn) 1037 return -EINVAL; 1038 1039 for_each_property_of_node(dn, pp) { 1040 ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length); 1041 if (ret < 0) { 1042 pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret)); 1043 return ret; 1044 } 1045 } 1046 return ret; 1047 } 1048 1049 /** 1050 * update_cpus_node - Update cpus node of flattened device tree using of_root 1051 * device node. 1052 * @fdt: Flattened device tree of the kernel. 1053 * 1054 * Returns 0 on success, negative errno on error. 1055 */ 1056 static int update_cpus_node(void *fdt) 1057 { 1058 struct device_node *cpus_node, *dn; 1059 int cpus_offset, cpus_subnode_offset, ret = 0; 1060 1061 cpus_offset = fdt_path_offset(fdt, "/cpus"); 1062 if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) { 1063 pr_err("Malformed device tree: error reading /cpus node: %s\n", 1064 fdt_strerror(cpus_offset)); 1065 return cpus_offset; 1066 } 1067 1068 if (cpus_offset > 0) { 1069 ret = fdt_del_node(fdt, cpus_offset); 1070 if (ret < 0) { 1071 pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret)); 1072 return -EINVAL; 1073 } 1074 } 1075 1076 /* Add cpus node to fdt */ 1077 cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus"); 1078 if (cpus_offset < 0) { 1079 pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset)); 1080 return -EINVAL; 1081 } 1082 1083 /* Add cpus node properties */ 1084 cpus_node = of_find_node_by_path("/cpus"); 1085 ret = add_node_props(fdt, cpus_offset, cpus_node); 1086 of_node_put(cpus_node); 1087 if (ret < 0) 1088 return ret; 1089 1090 /* Loop through all subnodes of cpus and add them to fdt */ 1091 for_each_node_by_type(dn, "cpu") { 1092 cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name); 1093 if (cpus_subnode_offset < 0) { 1094 pr_err("Unable to add %s subnode: %s\n", dn->full_name, 1095 fdt_strerror(cpus_subnode_offset)); 1096 ret = cpus_subnode_offset; 1097 goto out; 1098 } 1099 1100 ret = add_node_props(fdt, cpus_subnode_offset, dn); 1101 if (ret < 0) 1102 goto out; 1103 } 1104 out: 1105 of_node_put(dn); 1106 return ret; 1107 } 1108 1109 static int copy_property(void *fdt, int node_offset, const struct device_node *dn, 1110 const char *propname) 1111 { 1112 const void *prop, *fdtprop; 1113 int len = 0, fdtlen = 0; 1114 1115 prop = of_get_property(dn, propname, &len); 1116 fdtprop = fdt_getprop(fdt, node_offset, propname, &fdtlen); 1117 1118 if (fdtprop && !prop) 1119 return fdt_delprop(fdt, node_offset, propname); 1120 else if (prop) 1121 return fdt_setprop(fdt, node_offset, propname, prop, len); 1122 else 1123 return -FDT_ERR_NOTFOUND; 1124 } 1125 1126 static int update_pci_dma_nodes(void *fdt, const char *dmapropname) 1127 { 1128 struct device_node *dn; 1129 int pci_offset, root_offset, ret = 0; 1130 1131 if (!firmware_has_feature(FW_FEATURE_LPAR)) 1132 return 0; 1133 1134 root_offset = fdt_path_offset(fdt, "/"); 1135 for_each_node_with_property(dn, dmapropname) { 1136 pci_offset = fdt_subnode_offset(fdt, root_offset, of_node_full_name(dn)); 1137 if (pci_offset < 0) 1138 continue; 1139 1140 ret = copy_property(fdt, pci_offset, dn, "ibm,dma-window"); 1141 if (ret < 0) { 1142 of_node_put(dn); 1143 break; 1144 } 1145 ret = copy_property(fdt, pci_offset, dn, dmapropname); 1146 if (ret < 0) { 1147 of_node_put(dn); 1148 break; 1149 } 1150 } 1151 1152 return ret; 1153 } 1154 1155 /** 1156 * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel 1157 * being loaded. 1158 * @image: kexec image being loaded. 1159 * @fdt: Flattened device tree for the next kernel. 1160 * @initrd_load_addr: Address where the next initrd will be loaded. 1161 * @initrd_len: Size of the next initrd, or 0 if there will be none. 1162 * @cmdline: Command line for the next kernel, or NULL if there will 1163 * be none. 1164 * 1165 * Returns 0 on success, negative errno on error. 1166 */ 1167 int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, 1168 unsigned long initrd_load_addr, 1169 unsigned long initrd_len, const char *cmdline) 1170 { 1171 struct crash_mem *umem = NULL, *rmem = NULL; 1172 int i, nr_ranges, ret; 1173 1174 /* 1175 * Restrict memory usage for kdump kernel by setting up 1176 * usable memory ranges and memory reserve map. 1177 */ 1178 if (image->type == KEXEC_TYPE_CRASH) { 1179 ret = get_usable_memory_ranges(&umem); 1180 if (ret) 1181 goto out; 1182 1183 ret = update_usable_mem_fdt(fdt, umem); 1184 if (ret) { 1185 pr_err("Error setting up usable-memory property for kdump kernel\n"); 1186 goto out; 1187 } 1188 1189 /* 1190 * Ensure we don't touch crashed kernel's memory except the 1191 * first 64K of RAM, which will be backed up. 1192 */ 1193 ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_END + 1, 1194 crashk_res.start - BACKUP_SRC_SIZE); 1195 if (ret) { 1196 pr_err("Error reserving crash memory: %s\n", 1197 fdt_strerror(ret)); 1198 goto out; 1199 } 1200 1201 /* Ensure backup region is not used by kdump/capture kernel */ 1202 ret = fdt_add_mem_rsv(fdt, image->arch.backup_start, 1203 BACKUP_SRC_SIZE); 1204 if (ret) { 1205 pr_err("Error reserving memory for backup: %s\n", 1206 fdt_strerror(ret)); 1207 goto out; 1208 } 1209 } 1210 1211 /* Update cpus nodes information to account hotplug CPUs. */ 1212 ret = update_cpus_node(fdt); 1213 if (ret < 0) 1214 goto out; 1215 1216 ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME); 1217 if (ret < 0) 1218 goto out; 1219 1220 ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME); 1221 if (ret < 0) 1222 goto out; 1223 1224 /* Update memory reserve map */ 1225 ret = get_reserved_memory_ranges(&rmem); 1226 if (ret) 1227 goto out; 1228 1229 nr_ranges = rmem ? rmem->nr_ranges : 0; 1230 for (i = 0; i < nr_ranges; i++) { 1231 u64 base, size; 1232 1233 base = rmem->ranges[i].start; 1234 size = rmem->ranges[i].end - base + 1; 1235 ret = fdt_add_mem_rsv(fdt, base, size); 1236 if (ret) { 1237 pr_err("Error updating memory reserve map: %s\n", 1238 fdt_strerror(ret)); 1239 goto out; 1240 } 1241 } 1242 1243 // If we have PLPKS active, we need to provide the password to the new kernel 1244 if (plpks_is_available()) 1245 ret = plpks_populate_fdt(fdt); 1246 1247 out: 1248 kfree(rmem); 1249 kfree(umem); 1250 return ret; 1251 } 1252 1253 /** 1254 * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal, 1255 * tce-table, reserved-ranges & such (exclude 1256 * memory ranges) as they can't be used for kexec 1257 * segment buffer. Sets kbuf->mem when a suitable 1258 * memory hole is found. 1259 * @kbuf: Buffer contents and memory parameters. 1260 * 1261 * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align. 1262 * 1263 * Returns 0 on success, negative errno on error. 1264 */ 1265 int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) 1266 { 1267 struct crash_mem **emem; 1268 u64 buf_min, buf_max; 1269 int ret; 1270 1271 /* Look up the exclude ranges list while locating the memory hole */ 1272 emem = &(kbuf->image->arch.exclude_ranges); 1273 if (!(*emem) || ((*emem)->nr_ranges == 0)) { 1274 pr_warn("No exclude range list. Using the default locate mem hole method\n"); 1275 return kexec_locate_mem_hole(kbuf); 1276 } 1277 1278 buf_min = kbuf->buf_min; 1279 buf_max = kbuf->buf_max; 1280 /* Segments for kdump kernel should be within crashkernel region */ 1281 if (kbuf->image->type == KEXEC_TYPE_CRASH) { 1282 buf_min = (buf_min < crashk_res.start ? 1283 crashk_res.start : buf_min); 1284 buf_max = (buf_max > crashk_res.end ? 1285 crashk_res.end : buf_max); 1286 } 1287 1288 if (buf_min > buf_max) { 1289 pr_err("Invalid buffer min and/or max values\n"); 1290 return -EINVAL; 1291 } 1292 1293 if (kbuf->top_down) 1294 ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max, 1295 *emem); 1296 else 1297 ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max, 1298 *emem); 1299 1300 /* Add the buffer allocated to the exclude list for the next lookup */ 1301 if (!ret) { 1302 add_mem_range(emem, kbuf->mem, kbuf->memsz); 1303 sort_memory_ranges(*emem, true); 1304 } else { 1305 pr_err("Failed to locate memory buffer of size %lu\n", 1306 kbuf->memsz); 1307 } 1308 return ret; 1309 } 1310 1311 /** 1312 * arch_kexec_kernel_image_probe - Does additional handling needed to setup 1313 * kexec segments. 1314 * @image: kexec image being loaded. 1315 * @buf: Buffer pointing to elf data. 1316 * @buf_len: Length of the buffer. 1317 * 1318 * Returns 0 on success, negative errno on error. 1319 */ 1320 int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, 1321 unsigned long buf_len) 1322 { 1323 int ret; 1324 1325 /* Get exclude memory ranges needed for setting up kexec segments */ 1326 ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges)); 1327 if (ret) { 1328 pr_err("Failed to setup exclude memory ranges for buffer lookup\n"); 1329 return ret; 1330 } 1331 1332 return kexec_image_probe_default(image, buf, buf_len); 1333 } 1334 1335 /** 1336 * arch_kimage_file_post_load_cleanup - Frees up all the allocations done 1337 * while loading the image. 1338 * @image: kexec image being loaded. 1339 * 1340 * Returns 0 on success, negative errno on error. 1341 */ 1342 int arch_kimage_file_post_load_cleanup(struct kimage *image) 1343 { 1344 kfree(image->arch.exclude_ranges); 1345 image->arch.exclude_ranges = NULL; 1346 1347 vfree(image->arch.backup_buf); 1348 image->arch.backup_buf = NULL; 1349 1350 vfree(image->elf_headers); 1351 image->elf_headers = NULL; 1352 image->elf_headers_sz = 0; 1353 1354 kvfree(image->arch.fdt); 1355 image->arch.fdt = NULL; 1356 1357 return kexec_image_post_load_cleanup_default(image); 1358 } 1359