1 /* 2 * S390 kdump implementation 3 * 4 * Copyright IBM Corp. 2011 5 * Author(s): Michael Holzheu <holzheu@linux.vnet.ibm.com> 6 */ 7 8 #include <linux/crash_dump.h> 9 #include <asm/lowcore.h> 10 #include <linux/kernel.h> 11 #include <linux/module.h> 12 #include <linux/gfp.h> 13 #include <linux/slab.h> 14 #include <linux/bootmem.h> 15 #include <linux/elf.h> 16 #include <linux/memblock.h> 17 #include <asm/os_info.h> 18 #include <asm/elf.h> 19 #include <asm/ipl.h> 20 #include <asm/sclp.h> 21 22 #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y))) 23 #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y))) 24 #define PTR_DIFF(x, y) ((unsigned long)(((char *) (x)) - ((unsigned long) (y)))) 25 26 static struct memblock_region oldmem_region; 27 28 static struct memblock_type oldmem_type = { 29 .cnt = 1, 30 .max = 1, 31 .total_size = 0, 32 .regions = &oldmem_region, 33 }; 34 35 #define for_each_dump_mem_range(i, nid, p_start, p_end, p_nid) \ 36 for (i = 0, __next_mem_range(&i, nid, MEMBLOCK_NONE, \ 37 &memblock.physmem, \ 38 &oldmem_type, p_start, \ 39 p_end, p_nid); \ 40 i != (u64)ULLONG_MAX; \ 41 __next_mem_range(&i, nid, MEMBLOCK_NONE, &memblock.physmem,\ 42 &oldmem_type, \ 43 p_start, p_end, p_nid)) 44 45 struct dump_save_areas dump_save_areas; 46 47 /* 48 * Return physical address for virtual address 49 */ 50 static inline void *load_real_addr(void *addr) 51 { 52 unsigned long real_addr; 53 54 asm volatile( 55 " lra %0,0(%1)\n" 56 " jz 0f\n" 57 " la %0,0\n" 58 "0:" 59 : "=a" (real_addr) : "a" (addr) : "cc"); 60 return (void *)real_addr; 61 } 62 63 /* 64 * Copy real to virtual or real memory 65 */ 66 static int copy_from_realmem(void *dest, void *src, size_t count) 67 { 68 unsigned long size; 69 70 if (!count) 71 return 0; 72 if (!is_vmalloc_or_module_addr(dest)) 73 return memcpy_real(dest, src, count); 74 do { 75 size = min(count, PAGE_SIZE - (__pa(dest) & ~PAGE_MASK)); 76 if (memcpy_real(load_real_addr(dest), src, size)) 77 return -EFAULT; 78 count -= size; 79 dest += size; 80 src += size; 81 } while (count); 82 return 0; 83 } 84 85 /* 86 * Pointer to ELF header in new kernel 87 */ 88 static void *elfcorehdr_newmem; 89 90 /* 91 * Copy one page from zfcpdump "oldmem" 92 * 93 * For pages below HSA size memory from the HSA is copied. Otherwise 94 * real memory copy is used. 95 */ 96 static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize, 97 unsigned long src, int userbuf) 98 { 99 int rc; 100 101 if (src < sclp.hsa_size) { 102 rc = memcpy_hsa(buf, src, csize, userbuf); 103 } else { 104 if (userbuf) 105 rc = copy_to_user_real((void __force __user *) buf, 106 (void *) src, csize); 107 else 108 rc = memcpy_real(buf, (void *) src, csize); 109 } 110 return rc ? rc : csize; 111 } 112 113 /* 114 * Copy one page from kdump "oldmem" 115 * 116 * For the kdump reserved memory this functions performs a swap operation: 117 * - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE]. 118 * - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] 119 */ 120 static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize, 121 unsigned long src, int userbuf) 122 123 { 124 int rc; 125 126 if (src < OLDMEM_SIZE) 127 src += OLDMEM_BASE; 128 else if (src > OLDMEM_BASE && 129 src < OLDMEM_BASE + OLDMEM_SIZE) 130 src -= OLDMEM_BASE; 131 if (userbuf) 132 rc = copy_to_user_real((void __force __user *) buf, 133 (void *) src, csize); 134 else 135 rc = copy_from_realmem(buf, (void *) src, csize); 136 return (rc == 0) ? rc : csize; 137 } 138 139 /* 140 * Copy one page from "oldmem" 141 */ 142 ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize, 143 unsigned long offset, int userbuf) 144 { 145 unsigned long src; 146 147 if (!csize) 148 return 0; 149 src = (pfn << PAGE_SHIFT) + offset; 150 if (OLDMEM_BASE) 151 return copy_oldmem_page_kdump(buf, csize, src, userbuf); 152 else 153 return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf); 154 } 155 156 /* 157 * Remap "oldmem" for kdump 158 * 159 * For the kdump reserved memory this functions performs a swap operation: 160 * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] 161 */ 162 static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma, 163 unsigned long from, unsigned long pfn, 164 unsigned long size, pgprot_t prot) 165 { 166 unsigned long size_old; 167 int rc; 168 169 if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) { 170 size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT)); 171 rc = remap_pfn_range(vma, from, 172 pfn + (OLDMEM_BASE >> PAGE_SHIFT), 173 size_old, prot); 174 if (rc || size == size_old) 175 return rc; 176 size -= size_old; 177 from += size_old; 178 pfn += size_old >> PAGE_SHIFT; 179 } 180 return remap_pfn_range(vma, from, pfn, size, prot); 181 } 182 183 /* 184 * Remap "oldmem" for zfcpdump 185 * 186 * We only map available memory above HSA size. Memory below HSA size 187 * is read on demand using the copy_oldmem_page() function. 188 */ 189 static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma, 190 unsigned long from, 191 unsigned long pfn, 192 unsigned long size, pgprot_t prot) 193 { 194 unsigned long hsa_end = sclp.hsa_size; 195 unsigned long size_hsa; 196 197 if (pfn < hsa_end >> PAGE_SHIFT) { 198 size_hsa = min(size, hsa_end - (pfn << PAGE_SHIFT)); 199 if (size == size_hsa) 200 return 0; 201 size -= size_hsa; 202 from += size_hsa; 203 pfn += size_hsa >> PAGE_SHIFT; 204 } 205 return remap_pfn_range(vma, from, pfn, size, prot); 206 } 207 208 /* 209 * Remap "oldmem" for kdump or zfcpdump 210 */ 211 int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from, 212 unsigned long pfn, unsigned long size, pgprot_t prot) 213 { 214 if (OLDMEM_BASE) 215 return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot); 216 else 217 return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size, 218 prot); 219 } 220 221 /* 222 * Copy memory from old kernel 223 */ 224 int copy_from_oldmem(void *dest, void *src, size_t count) 225 { 226 unsigned long copied = 0; 227 int rc; 228 229 if (OLDMEM_BASE) { 230 if ((unsigned long) src < OLDMEM_SIZE) { 231 copied = min(count, OLDMEM_SIZE - (unsigned long) src); 232 rc = copy_from_realmem(dest, src + OLDMEM_BASE, copied); 233 if (rc) 234 return rc; 235 } 236 } else { 237 unsigned long hsa_end = sclp.hsa_size; 238 if ((unsigned long) src < hsa_end) { 239 copied = min(count, hsa_end - (unsigned long) src); 240 rc = memcpy_hsa(dest, (unsigned long) src, copied, 0); 241 if (rc) 242 return rc; 243 } 244 } 245 return copy_from_realmem(dest + copied, src + copied, count - copied); 246 } 247 248 /* 249 * Alloc memory and panic in case of ENOMEM 250 */ 251 static void *kzalloc_panic(int len) 252 { 253 void *rc; 254 255 rc = kzalloc(len, GFP_KERNEL); 256 if (!rc) 257 panic("s390 kdump kzalloc (%d) failed", len); 258 return rc; 259 } 260 261 /* 262 * Initialize ELF note 263 */ 264 static void *nt_init(void *buf, Elf64_Word type, void *desc, int d_len, 265 const char *name) 266 { 267 Elf64_Nhdr *note; 268 u64 len; 269 270 note = (Elf64_Nhdr *)buf; 271 note->n_namesz = strlen(name) + 1; 272 note->n_descsz = d_len; 273 note->n_type = type; 274 len = sizeof(Elf64_Nhdr); 275 276 memcpy(buf + len, name, note->n_namesz); 277 len = roundup(len + note->n_namesz, 4); 278 279 memcpy(buf + len, desc, note->n_descsz); 280 len = roundup(len + note->n_descsz, 4); 281 282 return PTR_ADD(buf, len); 283 } 284 285 /* 286 * Initialize prstatus note 287 */ 288 static void *nt_prstatus(void *ptr, struct save_area *sa) 289 { 290 struct elf_prstatus nt_prstatus; 291 static int cpu_nr = 1; 292 293 memset(&nt_prstatus, 0, sizeof(nt_prstatus)); 294 memcpy(&nt_prstatus.pr_reg.gprs, sa->gp_regs, sizeof(sa->gp_regs)); 295 memcpy(&nt_prstatus.pr_reg.psw, sa->psw, sizeof(sa->psw)); 296 memcpy(&nt_prstatus.pr_reg.acrs, sa->acc_regs, sizeof(sa->acc_regs)); 297 nt_prstatus.pr_pid = cpu_nr; 298 cpu_nr++; 299 300 return nt_init(ptr, NT_PRSTATUS, &nt_prstatus, sizeof(nt_prstatus), 301 "CORE"); 302 } 303 304 /* 305 * Initialize fpregset (floating point) note 306 */ 307 static void *nt_fpregset(void *ptr, struct save_area *sa) 308 { 309 elf_fpregset_t nt_fpregset; 310 311 memset(&nt_fpregset, 0, sizeof(nt_fpregset)); 312 memcpy(&nt_fpregset.fpc, &sa->fp_ctrl_reg, sizeof(sa->fp_ctrl_reg)); 313 memcpy(&nt_fpregset.fprs, &sa->fp_regs, sizeof(sa->fp_regs)); 314 315 return nt_init(ptr, NT_PRFPREG, &nt_fpregset, sizeof(nt_fpregset), 316 "CORE"); 317 } 318 319 /* 320 * Initialize timer note 321 */ 322 static void *nt_s390_timer(void *ptr, struct save_area *sa) 323 { 324 return nt_init(ptr, NT_S390_TIMER, &sa->timer, sizeof(sa->timer), 325 KEXEC_CORE_NOTE_NAME); 326 } 327 328 /* 329 * Initialize TOD clock comparator note 330 */ 331 static void *nt_s390_tod_cmp(void *ptr, struct save_area *sa) 332 { 333 return nt_init(ptr, NT_S390_TODCMP, &sa->clk_cmp, 334 sizeof(sa->clk_cmp), KEXEC_CORE_NOTE_NAME); 335 } 336 337 /* 338 * Initialize TOD programmable register note 339 */ 340 static void *nt_s390_tod_preg(void *ptr, struct save_area *sa) 341 { 342 return nt_init(ptr, NT_S390_TODPREG, &sa->tod_reg, 343 sizeof(sa->tod_reg), KEXEC_CORE_NOTE_NAME); 344 } 345 346 /* 347 * Initialize control register note 348 */ 349 static void *nt_s390_ctrs(void *ptr, struct save_area *sa) 350 { 351 return nt_init(ptr, NT_S390_CTRS, &sa->ctrl_regs, 352 sizeof(sa->ctrl_regs), KEXEC_CORE_NOTE_NAME); 353 } 354 355 /* 356 * Initialize prefix register note 357 */ 358 static void *nt_s390_prefix(void *ptr, struct save_area *sa) 359 { 360 return nt_init(ptr, NT_S390_PREFIX, &sa->pref_reg, 361 sizeof(sa->pref_reg), KEXEC_CORE_NOTE_NAME); 362 } 363 364 /* 365 * Initialize vxrs high note (full 128 bit VX registers 16-31) 366 */ 367 static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs) 368 { 369 return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16], 370 16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME); 371 } 372 373 /* 374 * Initialize vxrs low note (lower halves of VX registers 0-15) 375 */ 376 static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs) 377 { 378 Elf64_Nhdr *note; 379 u64 len; 380 int i; 381 382 note = (Elf64_Nhdr *)ptr; 383 note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1; 384 note->n_descsz = 16 * 8; 385 note->n_type = NT_S390_VXRS_LOW; 386 len = sizeof(Elf64_Nhdr); 387 388 memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz); 389 len = roundup(len + note->n_namesz, 4); 390 391 ptr += len; 392 /* Copy lower halves of SIMD registers 0-15 */ 393 for (i = 0; i < 16; i++) { 394 memcpy(ptr, &vx_regs[i].u[2], 8); 395 ptr += 8; 396 } 397 return ptr; 398 } 399 400 /* 401 * Fill ELF notes for one CPU with save area registers 402 */ 403 void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vx_regs) 404 { 405 ptr = nt_prstatus(ptr, sa); 406 ptr = nt_fpregset(ptr, sa); 407 ptr = nt_s390_timer(ptr, sa); 408 ptr = nt_s390_tod_cmp(ptr, sa); 409 ptr = nt_s390_tod_preg(ptr, sa); 410 ptr = nt_s390_ctrs(ptr, sa); 411 ptr = nt_s390_prefix(ptr, sa); 412 if (MACHINE_HAS_VX && vx_regs) { 413 ptr = nt_s390_vx_low(ptr, vx_regs); 414 ptr = nt_s390_vx_high(ptr, vx_regs); 415 } 416 return ptr; 417 } 418 419 /* 420 * Initialize prpsinfo note (new kernel) 421 */ 422 static void *nt_prpsinfo(void *ptr) 423 { 424 struct elf_prpsinfo prpsinfo; 425 426 memset(&prpsinfo, 0, sizeof(prpsinfo)); 427 prpsinfo.pr_sname = 'R'; 428 strcpy(prpsinfo.pr_fname, "vmlinux"); 429 return nt_init(ptr, NT_PRPSINFO, &prpsinfo, sizeof(prpsinfo), 430 KEXEC_CORE_NOTE_NAME); 431 } 432 433 /* 434 * Get vmcoreinfo using lowcore->vmcore_info (new kernel) 435 */ 436 static void *get_vmcoreinfo_old(unsigned long *size) 437 { 438 char nt_name[11], *vmcoreinfo; 439 Elf64_Nhdr note; 440 void *addr; 441 442 if (copy_from_oldmem(&addr, &S390_lowcore.vmcore_info, sizeof(addr))) 443 return NULL; 444 memset(nt_name, 0, sizeof(nt_name)); 445 if (copy_from_oldmem(¬e, addr, sizeof(note))) 446 return NULL; 447 if (copy_from_oldmem(nt_name, addr + sizeof(note), sizeof(nt_name) - 1)) 448 return NULL; 449 if (strcmp(nt_name, "VMCOREINFO") != 0) 450 return NULL; 451 vmcoreinfo = kzalloc_panic(note.n_descsz); 452 if (copy_from_oldmem(vmcoreinfo, addr + 24, note.n_descsz)) 453 return NULL; 454 *size = note.n_descsz; 455 return vmcoreinfo; 456 } 457 458 /* 459 * Initialize vmcoreinfo note (new kernel) 460 */ 461 static void *nt_vmcoreinfo(void *ptr) 462 { 463 unsigned long size; 464 void *vmcoreinfo; 465 466 vmcoreinfo = os_info_old_entry(OS_INFO_VMCOREINFO, &size); 467 if (!vmcoreinfo) 468 vmcoreinfo = get_vmcoreinfo_old(&size); 469 if (!vmcoreinfo) 470 return ptr; 471 return nt_init(ptr, 0, vmcoreinfo, size, "VMCOREINFO"); 472 } 473 474 /* 475 * Initialize ELF header (new kernel) 476 */ 477 static void *ehdr_init(Elf64_Ehdr *ehdr, int mem_chunk_cnt) 478 { 479 memset(ehdr, 0, sizeof(*ehdr)); 480 memcpy(ehdr->e_ident, ELFMAG, SELFMAG); 481 ehdr->e_ident[EI_CLASS] = ELFCLASS64; 482 ehdr->e_ident[EI_DATA] = ELFDATA2MSB; 483 ehdr->e_ident[EI_VERSION] = EV_CURRENT; 484 memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); 485 ehdr->e_type = ET_CORE; 486 ehdr->e_machine = EM_S390; 487 ehdr->e_version = EV_CURRENT; 488 ehdr->e_phoff = sizeof(Elf64_Ehdr); 489 ehdr->e_ehsize = sizeof(Elf64_Ehdr); 490 ehdr->e_phentsize = sizeof(Elf64_Phdr); 491 ehdr->e_phnum = mem_chunk_cnt + 1; 492 return ehdr + 1; 493 } 494 495 /* 496 * Return CPU count for ELF header (new kernel) 497 */ 498 static int get_cpu_cnt(void) 499 { 500 int i, cpus = 0; 501 502 for (i = 0; i < dump_save_areas.count; i++) { 503 if (dump_save_areas.areas[i]->sa.pref_reg == 0) 504 continue; 505 cpus++; 506 } 507 return cpus; 508 } 509 510 /* 511 * Return memory chunk count for ELF header (new kernel) 512 */ 513 static int get_mem_chunk_cnt(void) 514 { 515 int cnt = 0; 516 u64 idx; 517 518 for_each_dump_mem_range(idx, NUMA_NO_NODE, NULL, NULL, NULL) 519 cnt++; 520 return cnt; 521 } 522 523 /* 524 * Initialize ELF loads (new kernel) 525 */ 526 static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) 527 { 528 phys_addr_t start, end; 529 u64 idx; 530 531 for_each_dump_mem_range(idx, NUMA_NO_NODE, &start, &end, NULL) { 532 phdr->p_filesz = end - start; 533 phdr->p_type = PT_LOAD; 534 phdr->p_offset = start; 535 phdr->p_vaddr = start; 536 phdr->p_paddr = start; 537 phdr->p_memsz = end - start; 538 phdr->p_flags = PF_R | PF_W | PF_X; 539 phdr->p_align = PAGE_SIZE; 540 phdr++; 541 } 542 } 543 544 /* 545 * Initialize notes (new kernel) 546 */ 547 static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset) 548 { 549 struct save_area_ext *sa_ext; 550 void *ptr_start = ptr; 551 int i; 552 553 ptr = nt_prpsinfo(ptr); 554 555 for (i = 0; i < dump_save_areas.count; i++) { 556 sa_ext = dump_save_areas.areas[i]; 557 if (sa_ext->sa.pref_reg == 0) 558 continue; 559 ptr = fill_cpu_elf_notes(ptr, &sa_ext->sa, sa_ext->vx_regs); 560 } 561 ptr = nt_vmcoreinfo(ptr); 562 memset(phdr, 0, sizeof(*phdr)); 563 phdr->p_type = PT_NOTE; 564 phdr->p_offset = notes_offset; 565 phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start); 566 phdr->p_memsz = phdr->p_filesz; 567 return ptr; 568 } 569 570 /* 571 * Create ELF core header (new kernel) 572 */ 573 int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size) 574 { 575 Elf64_Phdr *phdr_notes, *phdr_loads; 576 int mem_chunk_cnt; 577 void *ptr, *hdr; 578 u32 alloc_size; 579 u64 hdr_off; 580 581 /* If we are not in kdump or zfcpdump mode return */ 582 if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP) 583 return 0; 584 /* If elfcorehdr= has been passed via cmdline, we use that one */ 585 if (elfcorehdr_addr != ELFCORE_ADDR_MAX) 586 return 0; 587 /* If we cannot get HSA size for zfcpdump return error */ 588 if (ipl_info.type == IPL_TYPE_FCP_DUMP && !sclp.hsa_size) 589 return -ENODEV; 590 591 /* For kdump, exclude previous crashkernel memory */ 592 if (OLDMEM_BASE) { 593 oldmem_region.base = OLDMEM_BASE; 594 oldmem_region.size = OLDMEM_SIZE; 595 oldmem_type.total_size = OLDMEM_SIZE; 596 } 597 598 mem_chunk_cnt = get_mem_chunk_cnt(); 599 600 alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 + 601 mem_chunk_cnt * sizeof(Elf64_Phdr); 602 hdr = kzalloc_panic(alloc_size); 603 /* Init elf header */ 604 ptr = ehdr_init(hdr, mem_chunk_cnt); 605 /* Init program headers */ 606 phdr_notes = ptr; 607 ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr)); 608 phdr_loads = ptr; 609 ptr = PTR_ADD(ptr, sizeof(Elf64_Phdr) * mem_chunk_cnt); 610 /* Init notes */ 611 hdr_off = PTR_DIFF(ptr, hdr); 612 ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off); 613 /* Init loads */ 614 hdr_off = PTR_DIFF(ptr, hdr); 615 loads_init(phdr_loads, hdr_off); 616 *addr = (unsigned long long) hdr; 617 elfcorehdr_newmem = hdr; 618 *size = (unsigned long long) hdr_off; 619 BUG_ON(elfcorehdr_size > alloc_size); 620 return 0; 621 } 622 623 /* 624 * Free ELF core header (new kernel) 625 */ 626 void elfcorehdr_free(unsigned long long addr) 627 { 628 if (!elfcorehdr_newmem) 629 return; 630 kfree((void *)(unsigned long)addr); 631 } 632 633 /* 634 * Read from ELF header 635 */ 636 ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) 637 { 638 void *src = (void *)(unsigned long)*ppos; 639 640 src = elfcorehdr_newmem ? src : src - OLDMEM_BASE; 641 memcpy(buf, src, count); 642 *ppos += count; 643 return count; 644 } 645 646 /* 647 * Read from ELF notes data 648 */ 649 ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos) 650 { 651 void *src = (void *)(unsigned long)*ppos; 652 int rc; 653 654 if (elfcorehdr_newmem) { 655 memcpy(buf, src, count); 656 } else { 657 rc = copy_from_oldmem(buf, src, count); 658 if (rc) 659 return rc; 660 } 661 *ppos += count; 662 return count; 663 } 664