1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * S390 kdump implementation 4 * 5 * Copyright IBM Corp. 2011 6 * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> 7 */ 8 9 #include <linux/crash_dump.h> 10 #include <asm/lowcore.h> 11 #include <linux/kernel.h> 12 #include <linux/init.h> 13 #include <linux/mm.h> 14 #include <linux/gfp.h> 15 #include <linux/slab.h> 16 #include <linux/memblock.h> 17 #include <linux/elf.h> 18 #include <linux/uio.h> 19 #include <asm/asm-offsets.h> 20 #include <asm/os_info.h> 21 #include <asm/elf.h> 22 #include <asm/ipl.h> 23 #include <asm/sclp.h> 24 #include <asm/maccess.h> 25 26 #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) 27 #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) 28 #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) 29 30 static struct memblock_region oldmem_region; 31 32 static struct memblock_type oldmem_type = { 33 .cnt = 1, 34 .max = 1, 35 .total_size = 0, 36 .regions = &oldmem_region, 37 .name = "oldmem", 38 }; 39 40 struct save_area { 41 struct list_head list; 42 u64 psw[2]; 43 u64 ctrs[16]; 44 u64 gprs[16]; 45 u32 acrs[16]; 46 u64 fprs[16]; 47 u32 fpc; 48 u32 prefix; 49 u32 todpreg; 50 u64 timer; 51 u64 todcmp; 52 u64 vxrs_low[16]; 53 __vector128 vxrs_high[16]; 54 }; 55 56 static LIST_HEAD(dump_save_areas); 57 58 /* 59 * Allocate a save area 60 */ 61 struct save_area * __init save_area_alloc(bool is_boot_cpu) 62 { 63 struct save_area *sa; 64 65 sa = memblock_alloc(sizeof(*sa), 8); 66 if (!sa) 67 return NULL; 68 69 if (is_boot_cpu) 70 list_add(&sa->list, &dump_save_areas); 71 else 72 list_add_tail(&sa->list, &dump_save_areas); 73 return sa; 74 } 75 76 /* 77 * Return the address of the save area for the boot CPU 78 */ 79 struct save_area * __init save_area_boot_cpu(void) 80 { 81 return list_first_entry_or_null(&dump_save_areas, struct save_area, list); 82 } 83 84 /* 85 * Copy CPU registers into the save area 86 */ 87 void __init save_area_add_regs(struct save_area *sa, void *regs) 88 { 89 struct lowcore *lc; 90 91 lc = (struct lowcore *)(regs - __LC_FPREGS_SAVE_AREA); 92 memcpy(&sa->psw, &lc->psw_save_area, sizeof(sa->psw)); 93 memcpy(&sa->ctrs, &lc->cregs_save_area, sizeof(sa->ctrs)); 94 memcpy(&sa->gprs, &lc->gpregs_save_area, sizeof(sa->gprs)); 95 memcpy(&sa->acrs, &lc->access_regs_save_area, sizeof(sa->acrs)); 96 memcpy(&sa->fprs, &lc->floating_pt_save_area, sizeof(sa->fprs)); 97 memcpy(&sa->fpc, &lc->fpt_creg_save_area, sizeof(sa->fpc)); 98 memcpy(&sa->prefix, &lc->prefixreg_save_area, sizeof(sa->prefix)); 99 memcpy(&sa->todpreg, &lc->tod_progreg_save_area, sizeof(sa->todpreg)); 100 memcpy(&sa->timer, &lc->cpu_timer_save_area, sizeof(sa->timer)); 101 memcpy(&sa->todcmp, &lc->clock_comp_save_area, sizeof(sa->todcmp)); 102 } 103 104 /* 105 * Copy vector registers into the save area 106 */ 107 void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs) 108 { 109 int i; 110 111 /* Copy lower halves of vector registers 0-15 */ 112 for (i = 0; i < 16; i++) 113 sa->vxrs_low[i] = vxrs[i].low; 114 /* Copy vector registers 16-31 */ 115 memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128)); 116 } 117 118 static size_t copy_oldmem_iter(struct iov_iter *iter, unsigned long src, size_t count) 119 { 120 size_t len, copied, res = 0; 121 122 while (count) { 123 if (!oldmem_data.start && src < sclp.hsa_size) { 124 /* Copy from zfcp/nvme dump HSA area */ 125 len = min(count, sclp.hsa_size - src); 126 copied = memcpy_hsa_iter(iter, src, len); 127 } else { 128 /* Check for swapped kdump oldmem areas */ 129 if (oldmem_data.start && src - oldmem_data.start < oldmem_data.size) { 130 src -= oldmem_data.start; 131 len = min(count, oldmem_data.size - src); 132 } else if (oldmem_data.start && src < oldmem_data.size) { 133 len = min(count, oldmem_data.size - src); 134 src += oldmem_data.start; 135 } else { 136 len = count; 137 } 138 copied = memcpy_real_iter(iter, src, len); 139 } 140 count -= copied; 141 src += copied; 142 res += copied; 143 if (copied < len) 144 break; 145 } 146 return res; 147 } 148 149 int copy_oldmem_kernel(void *dst, unsigned long src, size_t count) 150 { 151 struct iov_iter iter; 152 struct kvec kvec; 153 154 kvec.iov_base = dst; 155 kvec.iov_len = count; 156 iov_iter_kvec(&iter, ITER_DEST, &kvec, 1, count); 157 if (copy_oldmem_iter(&iter, src, count) < count) 158 return -EFAULT; 159 return 0; 160 } 161 162 /* 163 * Copy one page from "oldmem" 164 */ 165 ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, size_t csize, 166 unsigned long offset) 167 { 168 unsigned long src; 169 170 src = pfn_to_phys(pfn) + offset; 171 return copy_oldmem_iter(iter, src, csize); 172 } 173 174 /* 175 * Remap "oldmem" for kdump 176 * 177 * For the kdump reserved memory this functions performs a swap operation: 178 * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] 179 */ 180 static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, 181 unsigned long from, unsigned long pfn, 182 unsigned long size, pgprot_t prot) 183 { 184 unsigned long size_old; 185 int rc; 186 187 if (pfn < oldmem_data.size >> PAGE_SHIFT) { 188 size_old = min(size, oldmem_data.size - (pfn << PAGE_SHIFT)); 189 rc = remap_pfn_range(vma, from, 190 pfn + (oldmem_data.start >> PAGE_SHIFT), 191 size_old, prot); 192 if (rc || size == size_old) 193 return rc; 194 size -= size_old; 195 from += size_old; 196 pfn += size_old >> PAGE_SHIFT; 197 } 198 return remap_pfn_range(vma, from, pfn, size, prot); 199 } 200 201 /* 202 * Remap "oldmem" for zfcp/nvme dump 203 * 204 * We only map available memory above HSA size. Memory below HSA size 205 * is read on demand using the copy_oldmem_page() function. 206 */ 207 static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, 208 unsigned long from, 209 unsigned long pfn, 210 unsigned long size, pgprot_t prot) 211 { 212 unsigned long hsa_end = sclp.hsa_size; 213 unsigned long size_hsa; 214 215 if (pfn < hsa_end >> PAGE_SHIFT) { 216 size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT)); 217 if (size == size_hsa) 218 return 0; 219 size -= size_hsa; 220 from += size_hsa; 221 pfn += size_hsa >> PAGE_SHIFT; 222 } 223 return remap_pfn_range(vma, from, pfn, size, prot); 224 } 225 226 /* 227 * Remap "oldmem" for kdump or zfcp/nvme dump 228 */ 229 int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, 230 unsigned long pfn, unsigned long size, pgprot_t prot) 231 { 232 if (oldmem_data.start) 233 return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot); 234 else 235 return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size, 236 prot); 237 } 238 239 static const char *nt_name(Elf64_Word type) 240 { 241 const char *name = "LINUX"; 242 243 if (type == NT_PRPSINFO || type == NT_PRSTATUS || type == NT_PRFPREG) 244 name = KEXEC_CORE_NOTE_NAME; 245 return name; 246 } 247 248 /* 249 * Initialize ELF note 250 */ 251 static void *nt_init_name(void *buf, Elf64_Word type, void *desc, int d_len, 252 const char *name) 253 { 254 Elf64_Nhdr *note; 255 u64 len; 256 257 note = (Elf64_Nhdr *)buf; 258 note->n_namesz = strlen(name) + 1; 259 note->n_descsz = d_len; 260 note->n_type = type; 261 len = sizeof(Elf64_Nhdr); 262 263 memcpy(buf + len, name, note->n_namesz); 264 len = roundup(len + note->n_namesz, 4); 265 266 memcpy(buf + len, desc, note->n_descsz); 267 len = roundup(len + note->n_descsz, 4); 268 269 return PTR_ADD(buf, len); 270 } 271 272 static inline void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len) 273 { 274 return nt_init_name(buf, type, desc, d_len, nt_name(type)); 275 } 276 277 /* 278 * Calculate the size of ELF note 279 */ 280 static size_t nt_size_name(int d_len, const char *name) 281 { 282 size_t size; 283 284 size = sizeof(Elf64_Nhdr); 285 size += roundup(strlen(name) + 1, 4); 286 size += roundup(d_len, 4); 287 288 return size; 289 } 290 291 static inline size_t nt_size(Elf64_Word type, int d_len) 292 { 293 return nt_size_name(d_len, nt_name(type)); 294 } 295 296 /* 297 * Fill ELF notes for one CPU with save area registers 298 */ 299 static void *fill_cpu_elf_notes(void *ptr, int cpu, struct save_area *sa) 300 { 301 struct elf_prstatus nt_prstatus; 302 elf_fpregset_t nt_fpregset; 303 304 /* Prepare prstatus note */ 305 memset(&nt_prstatus, 0, sizeof(nt_prstatus)); 306 memcpy(&nt_prstatus.pr_reg.gprs, sa->gprs, sizeof(sa->gprs)); 307 memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); 308 memcpy(&nt_prstatus.pr_reg.acrs, sa->acrs, sizeof(sa->acrs)); 309 nt_prstatus.common.pr_pid = cpu; 310 /* Prepare fpregset (floating point) note */ 311 memset(&nt_fpregset, 0, sizeof(nt_fpregset)); 312 memcpy(&nt_fpregset.fpc, &sa->fpc, sizeof(sa->fpc)); 313 memcpy(&nt_fpregset.fprs, &sa->fprs, sizeof(sa->fprs)); 314 /* Create ELF notes for the CPU */ 315 ptr = nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus)); 316 ptr = nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset)); 317 ptr = nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer)); 318 ptr = nt_init(ptr, NT_S390_TODCMP, &sa->todcmp, sizeof(sa->todcmp)); 319 ptr = nt_init(ptr, NT_S390_TODPREG, &sa->todpreg, sizeof(sa->todpreg)); 320 ptr = nt_init(ptr, NT_S390_CTRS, &sa->ctrs, sizeof(sa->ctrs)); 321 ptr = nt_init(ptr, NT_S390_PREFIX, &sa->prefix, sizeof(sa->prefix)); 322 if (MACHINE_HAS_VX) { 323 ptr = nt_init(ptr, NT_S390_VXRS_HIGH, 324 &sa->vxrs_high, sizeof(sa->vxrs_high)); 325 ptr = nt_init(ptr, NT_S390_VXRS_LOW, 326 &sa->vxrs_low, sizeof(sa->vxrs_low)); 327 } 328 return ptr; 329 } 330 331 /* 332 * Calculate size of ELF notes per cpu 333 */ 334 static size_t get_cpu_elf_notes_size(void) 335 { 336 struct save_area *sa = NULL; 337 size_t size; 338 339 size = nt_size(NT_PRSTATUS, sizeof(struct elf_prstatus)); 340 size += nt_size(NT_PRFPREG, sizeof(elf_fpregset_t)); 341 size += nt_size(NT_S390_TIMER, sizeof(sa->timer)); 342 size += nt_size(NT_S390_TODCMP, sizeof(sa->todcmp)); 343 size += nt_size(NT_S390_TODPREG, sizeof(sa->todpreg)); 344 size += nt_size(NT_S390_CTRS, sizeof(sa->ctrs)); 345 size += nt_size(NT_S390_PREFIX, sizeof(sa->prefix)); 346 if (MACHINE_HAS_VX) { 347 size += nt_size(NT_S390_VXRS_HIGH, sizeof(sa->vxrs_high)); 348 size += nt_size(NT_S390_VXRS_LOW, sizeof(sa->vxrs_low)); 349 } 350 351 return size; 352 } 353 354 /* 355 * Initialize prpsinfo note (new kernel) 356 */ 357 static void *nt_prpsinfo(void *ptr) 358 { 359 struct elf_prpsinfo prpsinfo; 360 361 memset(&prpsinfo, 0, sizeof(prpsinfo)); 362 prpsinfo.pr_sname = 'R'; 363 strcpy(prpsinfo.pr_fname, "vmlinux"); 364 return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo)); 365 } 366 367 /* 368 * Get vmcoreinfo using lowcore->vmcore_info (new kernel) 369 */ 370 static void *get_vmcoreinfo_old(unsigned long *size) 371 { 372 char nt_name[11], *vmcoreinfo; 373 unsigned long addr; 374 Elf64_Nhdr note; 375 376 if (copy_oldmem_kernel(&addr, __LC_VMCORE_INFO, sizeof(addr))) 377 return NULL; 378 memset(nt_name, 0, sizeof(nt_name)); 379 if (copy_oldmem_kernel(¬e, addr, sizeof(note))) 380 return NULL; 381 if (copy_oldmem_kernel(nt_name, addr + sizeof(note), 382 sizeof(nt_name) - 1)) 383 return NULL; 384 if (strcmp(nt_name, VMCOREINFO_NOTE_NAME) != 0) 385 return NULL; 386 vmcoreinfo = kzalloc(note.n_descsz, GFP_KERNEL); 387 if (!vmcoreinfo) 388 return NULL; 389 if (copy_oldmem_kernel(vmcoreinfo, addr + 24, note.n_descsz)) { 390 kfree(vmcoreinfo); 391 return NULL; 392 } 393 *size = note.n_descsz; 394 return vmcoreinfo; 395 } 396 397 /* 398 * Initialize vmcoreinfo note (new kernel) 399 */ 400 static void *nt_vmcoreinfo(void *ptr) 401 { 402 const char *name = VMCOREINFO_NOTE_NAME; 403 unsigned long size; 404 void *vmcoreinfo; 405 406 vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); 407 if (vmcoreinfo) 408 return nt_init_name(ptr, 0, vmcoreinfo, size, name); 409 410 vmcoreinfo = get_vmcoreinfo_old(&size); 411 if (!vmcoreinfo) 412 return ptr; 413 ptr = nt_init_name(ptr, 0, vmcoreinfo, size, name); 414 kfree(vmcoreinfo); 415 return ptr; 416 } 417 418 static size_t nt_vmcoreinfo_size(void) 419 { 420 const char *name = VMCOREINFO_NOTE_NAME; 421 unsigned long size; 422 void *vmcoreinfo; 423 424 vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); 425 if (vmcoreinfo) 426 return nt_size_name(size, name); 427 428 vmcoreinfo = get_vmcoreinfo_old(&size); 429 if (!vmcoreinfo) 430 return 0; 431 432 kfree(vmcoreinfo); 433 return nt_size_name(size, name); 434 } 435 436 /* 437 * Initialize final note (needed for /proc/vmcore code) 438 */ 439 static void *nt_final(void *ptr) 440 { 441 Elf64_Nhdr *note; 442 443 note = (Elf64_Nhdr *) ptr; 444 note->n_namesz = 0; 445 note->n_descsz = 0; 446 note->n_type = 0; 447 return PTR_ADD(ptr, sizeof(Elf64_Nhdr)); 448 } 449 450 /* 451 * Initialize ELF header (new kernel) 452 */ 453 static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) 454 { 455 memset(ehdr, 0, sizeof(*ehdr)); 456 memcpy(ehdr->e_ident, ELFMAG, SELFMAG); 457 ehdr->e_ident[EI_CLASS] = ELFCLASS64; 458 ehdr->e_ident[EI_DATA] = ELFDATA2MSB; 459 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 460 memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); 461 ehdr->e_type = ET_CORE; 462 ehdr->e_machine = EM_S390; 463 ehdr->e_version = EV_CURRENT; 464 ehdr->e_phoff = sizeof(Elf64_Ehdr); 465 ehdr->e_ehsize = sizeof(Elf64_Ehdr); 466 ehdr->e_phentsize = sizeof(Elf64_Phdr); 467 ehdr->e_phnum = mem_chunk_cnt + 1; 468 return ehdr + 1; 469 } 470 471 /* 472 * Return CPU count for ELF header (new kernel) 473 */ 474 static int get_cpu_cnt(void) 475 { 476 struct save_area *sa; 477 int cpus = 0; 478 479 list_for_each_entry(sa, &dump_save_areas, list) 480 if (sa->prefix != 0) 481 cpus++; 482 return cpus; 483 } 484 485 /* 486 * Return memory chunk count for ELF header (new kernel) 487 */ 488 static int get_mem_chunk_cnt(void) 489 { 490 int cnt = 0; 491 u64 idx; 492 493 for_each_physmem_range(idx, &oldmem_type, NULL, NULL) 494 cnt++; 495 return cnt; 496 } 497 498 /* 499 * Initialize ELF loads (new kernel) 500 */ 501 static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) 502 { 503 phys_addr_t start, end; 504 u64 idx; 505 506 for_each_physmem_range(idx, &oldmem_type, &start, &end) { 507 phdr->p_filesz = end - start; 508 phdr->p_type = PT_LOAD; 509 phdr->p_offset = start; 510 phdr->p_vaddr = start; 511 phdr->p_paddr = start; 512 phdr->p_memsz = end - start; 513 phdr->p_flags = PF_R | PF_W | PF_X; 514 phdr->p_align = PAGE_SIZE; 515 phdr++; 516 } 517 } 518 519 /* 520 * Initialize notes (new kernel) 521 */ 522 static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) 523 { 524 struct save_area *sa; 525 void *ptr_start = ptr; 526 int cpu; 527 528 ptr = nt_prpsinfo(ptr); 529 530 cpu = 1; 531 list_for_each_entry(sa, &dump_save_areas, list) 532 if (sa->prefix != 0) 533 ptr = fill_cpu_elf_notes(ptr, cpu++, sa); 534 ptr = nt_vmcoreinfo(ptr); 535 ptr = nt_final(ptr); 536 memset(phdr, 0, sizeof(*phdr)); 537 phdr->p_type = PT_NOTE; 538 phdr->p_offset = notes_offset; 539 phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); 540 phdr->p_memsz = phdr->p_filesz; 541 return ptr; 542 } 543 544 static size_t get_elfcorehdr_size(int mem_chunk_cnt) 545 { 546 size_t size; 547 548 size = sizeof(Elf64_Ehdr); 549 /* PT_NOTES */ 550 size += sizeof(Elf64_Phdr); 551 /* nt_prpsinfo */ 552 size += nt_size(NT_PRPSINFO, sizeof(struct elf_prpsinfo)); 553 /* regsets */ 554 size += get_cpu_cnt() * get_cpu_elf_notes_size(); 555 /* nt_vmcoreinfo */ 556 size += nt_vmcoreinfo_size(); 557 /* nt_final */ 558 size += sizeof(Elf64_Nhdr); 559 /* PT_LOADS */ 560 size += mem_chunk_cnt * sizeof(Elf64_Phdr); 561 562 return size; 563 } 564 565 /* 566 * Create ELF core header (new kernel) 567 */ 568 int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) 569 { 570 Elf64_Phdr *phdr_notes, *phdr_loads; 571 size_t alloc_size; 572 int mem_chunk_cnt; 573 void *ptr, *hdr; 574 u64 hdr_off; 575 576 /* If we are not in kdump or zfcp/nvme dump mode return */ 577 if (!oldmem_data.start && !is_ipl_type_dump()) 578 return 0; 579 /* If we cannot get HSA size for zfcp/nvme dump return error */ 580 if (is_ipl_type_dump() && !sclp.hsa_size) 581 return -ENODEV; 582 583 /* For kdump, exclude previous crashkernel memory */ 584 if (oldmem_data.start) { 585 oldmem_region.base = oldmem_data.start; 586 oldmem_region.size = oldmem_data.size; 587 oldmem_type.total_size = oldmem_data.size; 588 } 589 590 mem_chunk_cnt = get_mem_chunk_cnt(); 591 592 alloc_size = get_elfcorehdr_size(mem_chunk_cnt); 593 594 hdr = kzalloc(alloc_size, GFP_KERNEL); 595 596 /* Without elfcorehdr /proc/vmcore cannot be created. Thus creating 597 * a dump with this crash kernel will fail. Panic now to allow other 598 * dump mechanisms to take over. 599 */ 600 if (!hdr) 601 panic("s390 kdump allocating elfcorehdr failed"); 602 603 /* Init elf header */ 604 ptr = ehdr_init(hdr, mem_chunk_cnt); 605 /* Init program headers */ 606 phdr_notes = ptr; 607 ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); 608 phdr_loads = ptr; 609 ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); 610 /* Init notes */ 611 hdr_off = PTR_DIFF(ptr, hdr); 612 ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); 613 /* Init loads */ 614 hdr_off = PTR_DIFF(ptr, hdr); 615 loads_init(phdr_loads, hdr_off); 616 *addr = (unsigned long long) hdr; 617 *size = (unsigned long long) hdr_off; 618 BUG_ON(elfcorehdr_size > alloc_size); 619 return 0; 620 } 621 622 /* 623 * Free ELF core header (new kernel) 624 */ 625 void elfcorehdr_free(unsigned long long addr) 626 { 627 kfree((void *)(unsigned long)addr); 628 } 629 630 /* 631 * Read from ELF header 632 */ 633 ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) 634 { 635 void *src = (void *)(unsigned long)*ppos; 636 637 memcpy(buf, src, count); 638 *ppos += count; 639 return count; 640 } 641 642 /* 643 * Read from ELF notes data 644 */ 645 ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) 646 { 647 void *src = (void *)(unsigned long)*ppos; 648 649 memcpy(buf, src, count); 650 *ppos += count; 651 return count; 652 } 653