1 /* 2 * kexec.c - kexec system call core code. 3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 4 * 5 * This source code is licensed under the GNU General Public License, 6 * Version 2. See the file COPYING for more details. 7 */ 8 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/capability.h> 12 #include <linux/mm.h> 13 #include <linux/file.h> 14 #include <linux/slab.h> 15 #include <linux/fs.h> 16 #include <linux/kexec.h> 17 #include <linux/mutex.h> 18 #include <linux/list.h> 19 #include <linux/highmem.h> 20 #include <linux/syscalls.h> 21 #include <linux/reboot.h> 22 #include <linux/ioport.h> 23 #include <linux/hardirq.h> 24 #include <linux/elf.h> 25 #include <linux/elfcore.h> 26 #include <linux/utsname.h> 27 #include <linux/numa.h> 28 #include <linux/suspend.h> 29 #include <linux/device.h> 30 #include <linux/freezer.h> 31 #include <linux/pm.h> 32 #include <linux/cpu.h> 33 #include <linux/uaccess.h> 34 #include <linux/io.h> 35 #include <linux/console.h> 36 #include <linux/vmalloc.h> 37 #include <linux/swap.h> 38 #include <linux/syscore_ops.h> 39 #include <linux/compiler.h> 40 #include <linux/hugetlb.h> 41 42 #include <asm/page.h> 43 #include <asm/sections.h> 44 45 #include <crypto/hash.h> 46 #include <crypto/sha.h> 47 #include "kexec_internal.h" 48 49 DEFINE_MUTEX(kexec_mutex); 50 51 /* Per cpu memory for storing cpu states in case of system crash. */ 52 note_buf_t __percpu *crash_notes; 53 54 /* vmcoreinfo stuff */ 55 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; 56 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; 57 size_t vmcoreinfo_size; 58 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); 59 60 /* Flag to indicate we are going to kexec a new kernel */ 61 bool kexec_in_progress = false; 62 63 64 /* Location of the reserved area for the crash kernel */ 65 struct resource crashk_res = { 66 .name = "Crash kernel", 67 .start = 0, 68 .end = 0, 69 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 70 }; 71 struct resource crashk_low_res = { 72 .name = "Crash kernel", 73 .start = 0, 74 .end = 0, 75 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 76 }; 77 78 int kexec_should_crash(struct task_struct *p) 79 { 80 /* 81 * If crash_kexec_post_notifiers is enabled, don't run 82 * crash_kexec() here yet, which must be run after panic 83 * notifiers in panic(). 84 */ 85 if (crash_kexec_post_notifiers) 86 return 0; 87 /* 88 * There are 4 panic() calls in do_exit() path, each of which 89 * corresponds to each of these 4 conditions. 90 */ 91 if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) 92 return 1; 93 return 0; 94 } 95 96 /* 97 * When kexec transitions to the new kernel there is a one-to-one 98 * mapping between physical and virtual addresses. On processors 99 * where you can disable the MMU this is trivial, and easy. For 100 * others it is still a simple predictable page table to setup. 101 * 102 * In that environment kexec copies the new kernel to its final 103 * resting place. This means I can only support memory whose 104 * physical address can fit in an unsigned long. In particular 105 * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. 106 * If the assembly stub has more restrictive requirements 107 * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be 108 * defined more restrictively in <asm/kexec.h>. 109 * 110 * The code for the transition from the current kernel to the 111 * the new kernel is placed in the control_code_buffer, whose size 112 * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single 113 * page of memory is necessary, but some architectures require more. 114 * Because this memory must be identity mapped in the transition from 115 * virtual to physical addresses it must live in the range 116 * 0 - TASK_SIZE, as only the user space mappings are arbitrarily 117 * modifiable. 118 * 119 * The assembly stub in the control code buffer is passed a linked list 120 * of descriptor pages detailing the source pages of the new kernel, 121 * and the destination addresses of those source pages. As this data 122 * structure is not used in the context of the current OS, it must 123 * be self-contained. 124 * 125 * The code has been made to work with highmem pages and will use a 126 * destination page in its final resting place (if it happens 127 * to allocate it). The end product of this is that most of the 128 * physical address space, and most of RAM can be used. 129 * 130 * Future directions include: 131 * - allocating a page table with the control code buffer identity 132 * mapped, to simplify machine_kexec and make kexec_on_panic more 133 * reliable. 134 */ 135 136 /* 137 * KIMAGE_NO_DEST is an impossible destination address..., for 138 * allocating pages whose destination address we do not care about. 139 */ 140 #define KIMAGE_NO_DEST (-1UL) 141 142 static struct page *kimage_alloc_page(struct kimage *image, 143 gfp_t gfp_mask, 144 unsigned long dest); 145 146 int sanity_check_segment_list(struct kimage *image) 147 { 148 int result, i; 149 unsigned long nr_segments = image->nr_segments; 150 151 /* 152 * Verify we have good destination addresses. The caller is 153 * responsible for making certain we don't attempt to load 154 * the new image into invalid or reserved areas of RAM. This 155 * just verifies it is an address we can use. 156 * 157 * Since the kernel does everything in page size chunks ensure 158 * the destination addresses are page aligned. Too many 159 * special cases crop of when we don't do this. The most 160 * insidious is getting overlapping destination addresses 161 * simply because addresses are changed to page size 162 * granularity. 163 */ 164 result = -EADDRNOTAVAIL; 165 for (i = 0; i < nr_segments; i++) { 166 unsigned long mstart, mend; 167 168 mstart = image->segment[i].mem; 169 mend = mstart + image->segment[i].memsz; 170 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) 171 return result; 172 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) 173 return result; 174 } 175 176 /* Verify our destination addresses do not overlap. 177 * If we alloed overlapping destination addresses 178 * through very weird things can happen with no 179 * easy explanation as one segment stops on another. 180 */ 181 result = -EINVAL; 182 for (i = 0; i < nr_segments; i++) { 183 unsigned long mstart, mend; 184 unsigned long j; 185 186 mstart = image->segment[i].mem; 187 mend = mstart + image->segment[i].memsz; 188 for (j = 0; j < i; j++) { 189 unsigned long pstart, pend; 190 191 pstart = image->segment[j].mem; 192 pend = pstart + image->segment[j].memsz; 193 /* Do the segments overlap ? */ 194 if ((mend > pstart) && (mstart < pend)) 195 return result; 196 } 197 } 198 199 /* Ensure our buffer sizes are strictly less than 200 * our memory sizes. This should always be the case, 201 * and it is easier to check up front than to be surprised 202 * later on. 203 */ 204 result = -EINVAL; 205 for (i = 0; i < nr_segments; i++) { 206 if (image->segment[i].bufsz > image->segment[i].memsz) 207 return result; 208 } 209 210 /* 211 * Verify we have good destination addresses. Normally 212 * the caller is responsible for making certain we don't 213 * attempt to load the new image into invalid or reserved 214 * areas of RAM. But crash kernels are preloaded into a 215 * reserved area of ram. We must ensure the addresses 216 * are in the reserved area otherwise preloading the 217 * kernel could corrupt things. 218 */ 219 220 if (image->type == KEXEC_TYPE_CRASH) { 221 result = -EADDRNOTAVAIL; 222 for (i = 0; i < nr_segments; i++) { 223 unsigned long mstart, mend; 224 225 mstart = image->segment[i].mem; 226 mend = mstart + image->segment[i].memsz - 1; 227 /* Ensure we are within the crash kernel limits */ 228 if ((mstart < crashk_res.start) || 229 (mend > crashk_res.end)) 230 return result; 231 } 232 } 233 234 return 0; 235 } 236 237 struct kimage *do_kimage_alloc_init(void) 238 { 239 struct kimage *image; 240 241 /* Allocate a controlling structure */ 242 image = kzalloc(sizeof(*image), GFP_KERNEL); 243 if (!image) 244 return NULL; 245 246 image->head = 0; 247 image->entry = &image->head; 248 image->last_entry = &image->head; 249 image->control_page = ~0; /* By default this does not apply */ 250 image->type = KEXEC_TYPE_DEFAULT; 251 252 /* Initialize the list of control pages */ 253 INIT_LIST_HEAD(&image->control_pages); 254 255 /* Initialize the list of destination pages */ 256 INIT_LIST_HEAD(&image->dest_pages); 257 258 /* Initialize the list of unusable pages */ 259 INIT_LIST_HEAD(&image->unusable_pages); 260 261 return image; 262 } 263 264 int kimage_is_destination_range(struct kimage *image, 265 unsigned long start, 266 unsigned long end) 267 { 268 unsigned long i; 269 270 for (i = 0; i < image->nr_segments; i++) { 271 unsigned long mstart, mend; 272 273 mstart = image->segment[i].mem; 274 mend = mstart + image->segment[i].memsz; 275 if ((end > mstart) && (start < mend)) 276 return 1; 277 } 278 279 return 0; 280 } 281 282 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) 283 { 284 struct page *pages; 285 286 pages = alloc_pages(gfp_mask, order); 287 if (pages) { 288 unsigned int count, i; 289 290 pages->mapping = NULL; 291 set_page_private(pages, order); 292 count = 1 << order; 293 for (i = 0; i < count; i++) 294 SetPageReserved(pages + i); 295 } 296 297 return pages; 298 } 299 300 static void kimage_free_pages(struct page *page) 301 { 302 unsigned int order, count, i; 303 304 order = page_private(page); 305 count = 1 << order; 306 for (i = 0; i < count; i++) 307 ClearPageReserved(page + i); 308 __free_pages(page, order); 309 } 310 311 void kimage_free_page_list(struct list_head *list) 312 { 313 struct page *page, *next; 314 315 list_for_each_entry_safe(page, next, list, lru) { 316 list_del(&page->lru); 317 kimage_free_pages(page); 318 } 319 } 320 321 static struct page *kimage_alloc_normal_control_pages(struct kimage *image, 322 unsigned int order) 323 { 324 /* Control pages are special, they are the intermediaries 325 * that are needed while we copy the rest of the pages 326 * to their final resting place. As such they must 327 * not conflict with either the destination addresses 328 * or memory the kernel is already using. 329 * 330 * The only case where we really need more than one of 331 * these are for architectures where we cannot disable 332 * the MMU and must instead generate an identity mapped 333 * page table for all of the memory. 334 * 335 * At worst this runs in O(N) of the image size. 336 */ 337 struct list_head extra_pages; 338 struct page *pages; 339 unsigned int count; 340 341 count = 1 << order; 342 INIT_LIST_HEAD(&extra_pages); 343 344 /* Loop while I can allocate a page and the page allocated 345 * is a destination page. 346 */ 347 do { 348 unsigned long pfn, epfn, addr, eaddr; 349 350 pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); 351 if (!pages) 352 break; 353 pfn = page_to_pfn(pages); 354 epfn = pfn + count; 355 addr = pfn << PAGE_SHIFT; 356 eaddr = epfn << PAGE_SHIFT; 357 if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || 358 kimage_is_destination_range(image, addr, eaddr)) { 359 list_add(&pages->lru, &extra_pages); 360 pages = NULL; 361 } 362 } while (!pages); 363 364 if (pages) { 365 /* Remember the allocated page... */ 366 list_add(&pages->lru, &image->control_pages); 367 368 /* Because the page is already in it's destination 369 * location we will never allocate another page at 370 * that address. Therefore kimage_alloc_pages 371 * will not return it (again) and we don't need 372 * to give it an entry in image->segment[]. 373 */ 374 } 375 /* Deal with the destination pages I have inadvertently allocated. 376 * 377 * Ideally I would convert multi-page allocations into single 378 * page allocations, and add everything to image->dest_pages. 379 * 380 * For now it is simpler to just free the pages. 381 */ 382 kimage_free_page_list(&extra_pages); 383 384 return pages; 385 } 386 387 static struct page *kimage_alloc_crash_control_pages(struct kimage *image, 388 unsigned int order) 389 { 390 /* Control pages are special, they are the intermediaries 391 * that are needed while we copy the rest of the pages 392 * to their final resting place. As such they must 393 * not conflict with either the destination addresses 394 * or memory the kernel is already using. 395 * 396 * Control pages are also the only pags we must allocate 397 * when loading a crash kernel. All of the other pages 398 * are specified by the segments and we just memcpy 399 * into them directly. 400 * 401 * The only case where we really need more than one of 402 * these are for architectures where we cannot disable 403 * the MMU and must instead generate an identity mapped 404 * page table for all of the memory. 405 * 406 * Given the low demand this implements a very simple 407 * allocator that finds the first hole of the appropriate 408 * size in the reserved memory region, and allocates all 409 * of the memory up to and including the hole. 410 */ 411 unsigned long hole_start, hole_end, size; 412 struct page *pages; 413 414 pages = NULL; 415 size = (1 << order) << PAGE_SHIFT; 416 hole_start = (image->control_page + (size - 1)) & ~(size - 1); 417 hole_end = hole_start + size - 1; 418 while (hole_end <= crashk_res.end) { 419 unsigned long i; 420 421 if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) 422 break; 423 /* See if I overlap any of the segments */ 424 for (i = 0; i < image->nr_segments; i++) { 425 unsigned long mstart, mend; 426 427 mstart = image->segment[i].mem; 428 mend = mstart + image->segment[i].memsz - 1; 429 if ((hole_end >= mstart) && (hole_start <= mend)) { 430 /* Advance the hole to the end of the segment */ 431 hole_start = (mend + (size - 1)) & ~(size - 1); 432 hole_end = hole_start + size - 1; 433 break; 434 } 435 } 436 /* If I don't overlap any segments I have found my hole! */ 437 if (i == image->nr_segments) { 438 pages = pfn_to_page(hole_start >> PAGE_SHIFT); 439 image->control_page = hole_end; 440 break; 441 } 442 } 443 444 return pages; 445 } 446 447 448 struct page *kimage_alloc_control_pages(struct kimage *image, 449 unsigned int order) 450 { 451 struct page *pages = NULL; 452 453 switch (image->type) { 454 case KEXEC_TYPE_DEFAULT: 455 pages = kimage_alloc_normal_control_pages(image, order); 456 break; 457 case KEXEC_TYPE_CRASH: 458 pages = kimage_alloc_crash_control_pages(image, order); 459 break; 460 } 461 462 return pages; 463 } 464 465 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) 466 { 467 if (*image->entry != 0) 468 image->entry++; 469 470 if (image->entry == image->last_entry) { 471 kimage_entry_t *ind_page; 472 struct page *page; 473 474 page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); 475 if (!page) 476 return -ENOMEM; 477 478 ind_page = page_address(page); 479 *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; 480 image->entry = ind_page; 481 image->last_entry = ind_page + 482 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); 483 } 484 *image->entry = entry; 485 image->entry++; 486 *image->entry = 0; 487 488 return 0; 489 } 490 491 static int kimage_set_destination(struct kimage *image, 492 unsigned long destination) 493 { 494 int result; 495 496 destination &= PAGE_MASK; 497 result = kimage_add_entry(image, destination | IND_DESTINATION); 498 499 return result; 500 } 501 502 503 static int kimage_add_page(struct kimage *image, unsigned long page) 504 { 505 int result; 506 507 page &= PAGE_MASK; 508 result = kimage_add_entry(image, page | IND_SOURCE); 509 510 return result; 511 } 512 513 514 static void kimage_free_extra_pages(struct kimage *image) 515 { 516 /* Walk through and free any extra destination pages I may have */ 517 kimage_free_page_list(&image->dest_pages); 518 519 /* Walk through and free any unusable pages I have cached */ 520 kimage_free_page_list(&image->unusable_pages); 521 522 } 523 void kimage_terminate(struct kimage *image) 524 { 525 if (*image->entry != 0) 526 image->entry++; 527 528 *image->entry = IND_DONE; 529 } 530 531 #define for_each_kimage_entry(image, ptr, entry) \ 532 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ 533 ptr = (entry & IND_INDIRECTION) ? \ 534 phys_to_virt((entry & PAGE_MASK)) : ptr + 1) 535 536 static void kimage_free_entry(kimage_entry_t entry) 537 { 538 struct page *page; 539 540 page = pfn_to_page(entry >> PAGE_SHIFT); 541 kimage_free_pages(page); 542 } 543 544 void kimage_free(struct kimage *image) 545 { 546 kimage_entry_t *ptr, entry; 547 kimage_entry_t ind = 0; 548 549 if (!image) 550 return; 551 552 kimage_free_extra_pages(image); 553 for_each_kimage_entry(image, ptr, entry) { 554 if (entry & IND_INDIRECTION) { 555 /* Free the previous indirection page */ 556 if (ind & IND_INDIRECTION) 557 kimage_free_entry(ind); 558 /* Save this indirection page until we are 559 * done with it. 560 */ 561 ind = entry; 562 } else if (entry & IND_SOURCE) 563 kimage_free_entry(entry); 564 } 565 /* Free the final indirection page */ 566 if (ind & IND_INDIRECTION) 567 kimage_free_entry(ind); 568 569 /* Handle any machine specific cleanup */ 570 machine_kexec_cleanup(image); 571 572 /* Free the kexec control pages... */ 573 kimage_free_page_list(&image->control_pages); 574 575 /* 576 * Free up any temporary buffers allocated. This might hit if 577 * error occurred much later after buffer allocation. 578 */ 579 if (image->file_mode) 580 kimage_file_post_load_cleanup(image); 581 582 kfree(image); 583 } 584 585 static kimage_entry_t *kimage_dst_used(struct kimage *image, 586 unsigned long page) 587 { 588 kimage_entry_t *ptr, entry; 589 unsigned long destination = 0; 590 591 for_each_kimage_entry(image, ptr, entry) { 592 if (entry & IND_DESTINATION) 593 destination = entry & PAGE_MASK; 594 else if (entry & IND_SOURCE) { 595 if (page == destination) 596 return ptr; 597 destination += PAGE_SIZE; 598 } 599 } 600 601 return NULL; 602 } 603 604 static struct page *kimage_alloc_page(struct kimage *image, 605 gfp_t gfp_mask, 606 unsigned long destination) 607 { 608 /* 609 * Here we implement safeguards to ensure that a source page 610 * is not copied to its destination page before the data on 611 * the destination page is no longer useful. 612 * 613 * To do this we maintain the invariant that a source page is 614 * either its own destination page, or it is not a 615 * destination page at all. 616 * 617 * That is slightly stronger than required, but the proof 618 * that no problems will not occur is trivial, and the 619 * implementation is simply to verify. 620 * 621 * When allocating all pages normally this algorithm will run 622 * in O(N) time, but in the worst case it will run in O(N^2) 623 * time. If the runtime is a problem the data structures can 624 * be fixed. 625 */ 626 struct page *page; 627 unsigned long addr; 628 629 /* 630 * Walk through the list of destination pages, and see if I 631 * have a match. 632 */ 633 list_for_each_entry(page, &image->dest_pages, lru) { 634 addr = page_to_pfn(page) << PAGE_SHIFT; 635 if (addr == destination) { 636 list_del(&page->lru); 637 return page; 638 } 639 } 640 page = NULL; 641 while (1) { 642 kimage_entry_t *old; 643 644 /* Allocate a page, if we run out of memory give up */ 645 page = kimage_alloc_pages(gfp_mask, 0); 646 if (!page) 647 return NULL; 648 /* If the page cannot be used file it away */ 649 if (page_to_pfn(page) > 650 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { 651 list_add(&page->lru, &image->unusable_pages); 652 continue; 653 } 654 addr = page_to_pfn(page) << PAGE_SHIFT; 655 656 /* If it is the destination page we want use it */ 657 if (addr == destination) 658 break; 659 660 /* If the page is not a destination page use it */ 661 if (!kimage_is_destination_range(image, addr, 662 addr + PAGE_SIZE)) 663 break; 664 665 /* 666 * I know that the page is someones destination page. 667 * See if there is already a source page for this 668 * destination page. And if so swap the source pages. 669 */ 670 old = kimage_dst_used(image, addr); 671 if (old) { 672 /* If so move it */ 673 unsigned long old_addr; 674 struct page *old_page; 675 676 old_addr = *old & PAGE_MASK; 677 old_page = pfn_to_page(old_addr >> PAGE_SHIFT); 678 copy_highpage(page, old_page); 679 *old = addr | (*old & ~PAGE_MASK); 680 681 /* The old page I have found cannot be a 682 * destination page, so return it if it's 683 * gfp_flags honor the ones passed in. 684 */ 685 if (!(gfp_mask & __GFP_HIGHMEM) && 686 PageHighMem(old_page)) { 687 kimage_free_pages(old_page); 688 continue; 689 } 690 addr = old_addr; 691 page = old_page; 692 break; 693 } 694 /* Place the page on the destination list, to be used later */ 695 list_add(&page->lru, &image->dest_pages); 696 } 697 698 return page; 699 } 700 701 static int kimage_load_normal_segment(struct kimage *image, 702 struct kexec_segment *segment) 703 { 704 unsigned long maddr; 705 size_t ubytes, mbytes; 706 int result; 707 unsigned char __user *buf = NULL; 708 unsigned char *kbuf = NULL; 709 710 result = 0; 711 if (image->file_mode) 712 kbuf = segment->kbuf; 713 else 714 buf = segment->buf; 715 ubytes = segment->bufsz; 716 mbytes = segment->memsz; 717 maddr = segment->mem; 718 719 result = kimage_set_destination(image, maddr); 720 if (result < 0) 721 goto out; 722 723 while (mbytes) { 724 struct page *page; 725 char *ptr; 726 size_t uchunk, mchunk; 727 728 page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); 729 if (!page) { 730 result = -ENOMEM; 731 goto out; 732 } 733 result = kimage_add_page(image, page_to_pfn(page) 734 << PAGE_SHIFT); 735 if (result < 0) 736 goto out; 737 738 ptr = kmap(page); 739 /* Start with a clear page */ 740 clear_page(ptr); 741 ptr += maddr & ~PAGE_MASK; 742 mchunk = min_t(size_t, mbytes, 743 PAGE_SIZE - (maddr & ~PAGE_MASK)); 744 uchunk = min(ubytes, mchunk); 745 746 /* For file based kexec, source pages are in kernel memory */ 747 if (image->file_mode) 748 memcpy(ptr, kbuf, uchunk); 749 else 750 result = copy_from_user(ptr, buf, uchunk); 751 kunmap(page); 752 if (result) { 753 result = -EFAULT; 754 goto out; 755 } 756 ubytes -= uchunk; 757 maddr += mchunk; 758 if (image->file_mode) 759 kbuf += mchunk; 760 else 761 buf += mchunk; 762 mbytes -= mchunk; 763 } 764 out: 765 return result; 766 } 767 768 static int kimage_load_crash_segment(struct kimage *image, 769 struct kexec_segment *segment) 770 { 771 /* For crash dumps kernels we simply copy the data from 772 * user space to it's destination. 773 * We do things a page at a time for the sake of kmap. 774 */ 775 unsigned long maddr; 776 size_t ubytes, mbytes; 777 int result; 778 unsigned char __user *buf = NULL; 779 unsigned char *kbuf = NULL; 780 781 result = 0; 782 if (image->file_mode) 783 kbuf = segment->kbuf; 784 else 785 buf = segment->buf; 786 ubytes = segment->bufsz; 787 mbytes = segment->memsz; 788 maddr = segment->mem; 789 while (mbytes) { 790 struct page *page; 791 char *ptr; 792 size_t uchunk, mchunk; 793 794 page = pfn_to_page(maddr >> PAGE_SHIFT); 795 if (!page) { 796 result = -ENOMEM; 797 goto out; 798 } 799 ptr = kmap(page); 800 ptr += maddr & ~PAGE_MASK; 801 mchunk = min_t(size_t, mbytes, 802 PAGE_SIZE - (maddr & ~PAGE_MASK)); 803 uchunk = min(ubytes, mchunk); 804 if (mchunk > uchunk) { 805 /* Zero the trailing part of the page */ 806 memset(ptr + uchunk, 0, mchunk - uchunk); 807 } 808 809 /* For file based kexec, source pages are in kernel memory */ 810 if (image->file_mode) 811 memcpy(ptr, kbuf, uchunk); 812 else 813 result = copy_from_user(ptr, buf, uchunk); 814 kexec_flush_icache_page(page); 815 kunmap(page); 816 if (result) { 817 result = -EFAULT; 818 goto out; 819 } 820 ubytes -= uchunk; 821 maddr += mchunk; 822 if (image->file_mode) 823 kbuf += mchunk; 824 else 825 buf += mchunk; 826 mbytes -= mchunk; 827 } 828 out: 829 return result; 830 } 831 832 int kimage_load_segment(struct kimage *image, 833 struct kexec_segment *segment) 834 { 835 int result = -ENOMEM; 836 837 switch (image->type) { 838 case KEXEC_TYPE_DEFAULT: 839 result = kimage_load_normal_segment(image, segment); 840 break; 841 case KEXEC_TYPE_CRASH: 842 result = kimage_load_crash_segment(image, segment); 843 break; 844 } 845 846 return result; 847 } 848 849 struct kimage *kexec_image; 850 struct kimage *kexec_crash_image; 851 int kexec_load_disabled; 852 853 /* 854 * No panic_cpu check version of crash_kexec(). This function is called 855 * only when panic_cpu holds the current CPU number; this is the only CPU 856 * which processes crash_kexec routines. 857 */ 858 void __crash_kexec(struct pt_regs *regs) 859 { 860 /* Take the kexec_mutex here to prevent sys_kexec_load 861 * running on one cpu from replacing the crash kernel 862 * we are using after a panic on a different cpu. 863 * 864 * If the crash kernel was not located in a fixed area 865 * of memory the xchg(&kexec_crash_image) would be 866 * sufficient. But since I reuse the memory... 867 */ 868 if (mutex_trylock(&kexec_mutex)) { 869 if (kexec_crash_image) { 870 struct pt_regs fixed_regs; 871 872 crash_setup_regs(&fixed_regs, regs); 873 crash_save_vmcoreinfo(); 874 machine_crash_shutdown(&fixed_regs); 875 machine_kexec(kexec_crash_image); 876 } 877 mutex_unlock(&kexec_mutex); 878 } 879 } 880 881 void crash_kexec(struct pt_regs *regs) 882 { 883 int old_cpu, this_cpu; 884 885 /* 886 * Only one CPU is allowed to execute the crash_kexec() code as with 887 * panic(). Otherwise parallel calls of panic() and crash_kexec() 888 * may stop each other. To exclude them, we use panic_cpu here too. 889 */ 890 this_cpu = raw_smp_processor_id(); 891 old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); 892 if (old_cpu == PANIC_CPU_INVALID) { 893 /* This is the 1st CPU which comes here, so go ahead. */ 894 __crash_kexec(regs); 895 896 /* 897 * Reset panic_cpu to allow another panic()/crash_kexec() 898 * call. 899 */ 900 atomic_set(&panic_cpu, PANIC_CPU_INVALID); 901 } 902 } 903 904 size_t crash_get_memory_size(void) 905 { 906 size_t size = 0; 907 908 mutex_lock(&kexec_mutex); 909 if (crashk_res.end != crashk_res.start) 910 size = resource_size(&crashk_res); 911 mutex_unlock(&kexec_mutex); 912 return size; 913 } 914 915 void __weak crash_free_reserved_phys_range(unsigned long begin, 916 unsigned long end) 917 { 918 unsigned long addr; 919 920 for (addr = begin; addr < end; addr += PAGE_SIZE) 921 free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); 922 } 923 924 int crash_shrink_memory(unsigned long new_size) 925 { 926 int ret = 0; 927 unsigned long start, end; 928 unsigned long old_size; 929 struct resource *ram_res; 930 931 mutex_lock(&kexec_mutex); 932 933 if (kexec_crash_image) { 934 ret = -ENOENT; 935 goto unlock; 936 } 937 start = crashk_res.start; 938 end = crashk_res.end; 939 old_size = (end == 0) ? 0 : end - start + 1; 940 if (new_size >= old_size) { 941 ret = (new_size == old_size) ? 0 : -EINVAL; 942 goto unlock; 943 } 944 945 ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); 946 if (!ram_res) { 947 ret = -ENOMEM; 948 goto unlock; 949 } 950 951 start = roundup(start, KEXEC_CRASH_MEM_ALIGN); 952 end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); 953 954 crash_map_reserved_pages(); 955 crash_free_reserved_phys_range(end, crashk_res.end); 956 957 if ((start == end) && (crashk_res.parent != NULL)) 958 release_resource(&crashk_res); 959 960 ram_res->start = end; 961 ram_res->end = crashk_res.end; 962 ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; 963 ram_res->name = "System RAM"; 964 965 crashk_res.end = end - 1; 966 967 insert_resource(&iomem_resource, ram_res); 968 crash_unmap_reserved_pages(); 969 970 unlock: 971 mutex_unlock(&kexec_mutex); 972 return ret; 973 } 974 975 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, 976 size_t data_len) 977 { 978 struct elf_note note; 979 980 note.n_namesz = strlen(name) + 1; 981 note.n_descsz = data_len; 982 note.n_type = type; 983 memcpy(buf, ¬e, sizeof(note)); 984 buf += (sizeof(note) + 3)/4; 985 memcpy(buf, name, note.n_namesz); 986 buf += (note.n_namesz + 3)/4; 987 memcpy(buf, data, note.n_descsz); 988 buf += (note.n_descsz + 3)/4; 989 990 return buf; 991 } 992 993 static void final_note(u32 *buf) 994 { 995 struct elf_note note; 996 997 note.n_namesz = 0; 998 note.n_descsz = 0; 999 note.n_type = 0; 1000 memcpy(buf, ¬e, sizeof(note)); 1001 } 1002 1003 void crash_save_cpu(struct pt_regs *regs, int cpu) 1004 { 1005 struct elf_prstatus prstatus; 1006 u32 *buf; 1007 1008 if ((cpu < 0) || (cpu >= nr_cpu_ids)) 1009 return; 1010 1011 /* Using ELF notes here is opportunistic. 1012 * I need a well defined structure format 1013 * for the data I pass, and I need tags 1014 * on the data to indicate what information I have 1015 * squirrelled away. ELF notes happen to provide 1016 * all of that, so there is no need to invent something new. 1017 */ 1018 buf = (u32 *)per_cpu_ptr(crash_notes, cpu); 1019 if (!buf) 1020 return; 1021 memset(&prstatus, 0, sizeof(prstatus)); 1022 prstatus.pr_pid = current->pid; 1023 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); 1024 buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, 1025 &prstatus, sizeof(prstatus)); 1026 final_note(buf); 1027 } 1028 1029 static int __init crash_notes_memory_init(void) 1030 { 1031 /* Allocate memory for saving cpu registers. */ 1032 size_t size, align; 1033 1034 /* 1035 * crash_notes could be allocated across 2 vmalloc pages when percpu 1036 * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc 1037 * pages are also on 2 continuous physical pages. In this case the 1038 * 2nd part of crash_notes in 2nd page could be lost since only the 1039 * starting address and size of crash_notes are exported through sysfs. 1040 * Here round up the size of crash_notes to the nearest power of two 1041 * and pass it to __alloc_percpu as align value. This can make sure 1042 * crash_notes is allocated inside one physical page. 1043 */ 1044 size = sizeof(note_buf_t); 1045 align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); 1046 1047 /* 1048 * Break compile if size is bigger than PAGE_SIZE since crash_notes 1049 * definitely will be in 2 pages with that. 1050 */ 1051 BUILD_BUG_ON(size > PAGE_SIZE); 1052 1053 crash_notes = __alloc_percpu(size, align); 1054 if (!crash_notes) { 1055 pr_warn("Memory allocation for saving cpu register states failed\n"); 1056 return -ENOMEM; 1057 } 1058 return 0; 1059 } 1060 subsys_initcall(crash_notes_memory_init); 1061 1062 1063 /* 1064 * parsing the "crashkernel" commandline 1065 * 1066 * this code is intended to be called from architecture specific code 1067 */ 1068 1069 1070 /* 1071 * This function parses command lines in the format 1072 * 1073 * crashkernel=ramsize-range:size[,...][@offset] 1074 * 1075 * The function returns 0 on success and -EINVAL on failure. 1076 */ 1077 static int __init parse_crashkernel_mem(char *cmdline, 1078 unsigned long long system_ram, 1079 unsigned long long *crash_size, 1080 unsigned long long *crash_base) 1081 { 1082 char *cur = cmdline, *tmp; 1083 1084 /* for each entry of the comma-separated list */ 1085 do { 1086 unsigned long long start, end = ULLONG_MAX, size; 1087 1088 /* get the start of the range */ 1089 start = memparse(cur, &tmp); 1090 if (cur == tmp) { 1091 pr_warn("crashkernel: Memory value expected\n"); 1092 return -EINVAL; 1093 } 1094 cur = tmp; 1095 if (*cur != '-') { 1096 pr_warn("crashkernel: '-' expected\n"); 1097 return -EINVAL; 1098 } 1099 cur++; 1100 1101 /* if no ':' is here, than we read the end */ 1102 if (*cur != ':') { 1103 end = memparse(cur, &tmp); 1104 if (cur == tmp) { 1105 pr_warn("crashkernel: Memory value expected\n"); 1106 return -EINVAL; 1107 } 1108 cur = tmp; 1109 if (end <= start) { 1110 pr_warn("crashkernel: end <= start\n"); 1111 return -EINVAL; 1112 } 1113 } 1114 1115 if (*cur != ':') { 1116 pr_warn("crashkernel: ':' expected\n"); 1117 return -EINVAL; 1118 } 1119 cur++; 1120 1121 size = memparse(cur, &tmp); 1122 if (cur == tmp) { 1123 pr_warn("Memory value expected\n"); 1124 return -EINVAL; 1125 } 1126 cur = tmp; 1127 if (size >= system_ram) { 1128 pr_warn("crashkernel: invalid size\n"); 1129 return -EINVAL; 1130 } 1131 1132 /* match ? */ 1133 if (system_ram >= start && system_ram < end) { 1134 *crash_size = size; 1135 break; 1136 } 1137 } while (*cur++ == ','); 1138 1139 if (*crash_size > 0) { 1140 while (*cur && *cur != ' ' && *cur != '@') 1141 cur++; 1142 if (*cur == '@') { 1143 cur++; 1144 *crash_base = memparse(cur, &tmp); 1145 if (cur == tmp) { 1146 pr_warn("Memory value expected after '@'\n"); 1147 return -EINVAL; 1148 } 1149 } 1150 } 1151 1152 return 0; 1153 } 1154 1155 /* 1156 * That function parses "simple" (old) crashkernel command lines like 1157 * 1158 * crashkernel=size[@offset] 1159 * 1160 * It returns 0 on success and -EINVAL on failure. 1161 */ 1162 static int __init parse_crashkernel_simple(char *cmdline, 1163 unsigned long long *crash_size, 1164 unsigned long long *crash_base) 1165 { 1166 char *cur = cmdline; 1167 1168 *crash_size = memparse(cmdline, &cur); 1169 if (cmdline == cur) { 1170 pr_warn("crashkernel: memory value expected\n"); 1171 return -EINVAL; 1172 } 1173 1174 if (*cur == '@') 1175 *crash_base = memparse(cur+1, &cur); 1176 else if (*cur != ' ' && *cur != '\0') { 1177 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 1178 return -EINVAL; 1179 } 1180 1181 return 0; 1182 } 1183 1184 #define SUFFIX_HIGH 0 1185 #define SUFFIX_LOW 1 1186 #define SUFFIX_NULL 2 1187 static __initdata char *suffix_tbl[] = { 1188 [SUFFIX_HIGH] = ",high", 1189 [SUFFIX_LOW] = ",low", 1190 [SUFFIX_NULL] = NULL, 1191 }; 1192 1193 /* 1194 * That function parses "suffix" crashkernel command lines like 1195 * 1196 * crashkernel=size,[high|low] 1197 * 1198 * It returns 0 on success and -EINVAL on failure. 1199 */ 1200 static int __init parse_crashkernel_suffix(char *cmdline, 1201 unsigned long long *crash_size, 1202 const char *suffix) 1203 { 1204 char *cur = cmdline; 1205 1206 *crash_size = memparse(cmdline, &cur); 1207 if (cmdline == cur) { 1208 pr_warn("crashkernel: memory value expected\n"); 1209 return -EINVAL; 1210 } 1211 1212 /* check with suffix */ 1213 if (strncmp(cur, suffix, strlen(suffix))) { 1214 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 1215 return -EINVAL; 1216 } 1217 cur += strlen(suffix); 1218 if (*cur != ' ' && *cur != '\0') { 1219 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 1220 return -EINVAL; 1221 } 1222 1223 return 0; 1224 } 1225 1226 static __init char *get_last_crashkernel(char *cmdline, 1227 const char *name, 1228 const char *suffix) 1229 { 1230 char *p = cmdline, *ck_cmdline = NULL; 1231 1232 /* find crashkernel and use the last one if there are more */ 1233 p = strstr(p, name); 1234 while (p) { 1235 char *end_p = strchr(p, ' '); 1236 char *q; 1237 1238 if (!end_p) 1239 end_p = p + strlen(p); 1240 1241 if (!suffix) { 1242 int i; 1243 1244 /* skip the one with any known suffix */ 1245 for (i = 0; suffix_tbl[i]; i++) { 1246 q = end_p - strlen(suffix_tbl[i]); 1247 if (!strncmp(q, suffix_tbl[i], 1248 strlen(suffix_tbl[i]))) 1249 goto next; 1250 } 1251 ck_cmdline = p; 1252 } else { 1253 q = end_p - strlen(suffix); 1254 if (!strncmp(q, suffix, strlen(suffix))) 1255 ck_cmdline = p; 1256 } 1257 next: 1258 p = strstr(p+1, name); 1259 } 1260 1261 if (!ck_cmdline) 1262 return NULL; 1263 1264 return ck_cmdline; 1265 } 1266 1267 static int __init __parse_crashkernel(char *cmdline, 1268 unsigned long long system_ram, 1269 unsigned long long *crash_size, 1270 unsigned long long *crash_base, 1271 const char *name, 1272 const char *suffix) 1273 { 1274 char *first_colon, *first_space; 1275 char *ck_cmdline; 1276 1277 BUG_ON(!crash_size || !crash_base); 1278 *crash_size = 0; 1279 *crash_base = 0; 1280 1281 ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 1282 1283 if (!ck_cmdline) 1284 return -EINVAL; 1285 1286 ck_cmdline += strlen(name); 1287 1288 if (suffix) 1289 return parse_crashkernel_suffix(ck_cmdline, crash_size, 1290 suffix); 1291 /* 1292 * if the commandline contains a ':', then that's the extended 1293 * syntax -- if not, it must be the classic syntax 1294 */ 1295 first_colon = strchr(ck_cmdline, ':'); 1296 first_space = strchr(ck_cmdline, ' '); 1297 if (first_colon && (!first_space || first_colon < first_space)) 1298 return parse_crashkernel_mem(ck_cmdline, system_ram, 1299 crash_size, crash_base); 1300 1301 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 1302 } 1303 1304 /* 1305 * That function is the entry point for command line parsing and should be 1306 * called from the arch-specific code. 1307 */ 1308 int __init parse_crashkernel(char *cmdline, 1309 unsigned long long system_ram, 1310 unsigned long long *crash_size, 1311 unsigned long long *crash_base) 1312 { 1313 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1314 "crashkernel=", NULL); 1315 } 1316 1317 int __init parse_crashkernel_high(char *cmdline, 1318 unsigned long long system_ram, 1319 unsigned long long *crash_size, 1320 unsigned long long *crash_base) 1321 { 1322 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1323 "crashkernel=", suffix_tbl[SUFFIX_HIGH]); 1324 } 1325 1326 int __init parse_crashkernel_low(char *cmdline, 1327 unsigned long long system_ram, 1328 unsigned long long *crash_size, 1329 unsigned long long *crash_base) 1330 { 1331 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1332 "crashkernel=", suffix_tbl[SUFFIX_LOW]); 1333 } 1334 1335 static void update_vmcoreinfo_note(void) 1336 { 1337 u32 *buf = vmcoreinfo_note; 1338 1339 if (!vmcoreinfo_size) 1340 return; 1341 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 1342 vmcoreinfo_size); 1343 final_note(buf); 1344 } 1345 1346 void crash_save_vmcoreinfo(void) 1347 { 1348 vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); 1349 update_vmcoreinfo_note(); 1350 } 1351 1352 void vmcoreinfo_append_str(const char *fmt, ...) 1353 { 1354 va_list args; 1355 char buf[0x50]; 1356 size_t r; 1357 1358 va_start(args, fmt); 1359 r = vscnprintf(buf, sizeof(buf), fmt, args); 1360 va_end(args); 1361 1362 r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); 1363 1364 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 1365 1366 vmcoreinfo_size += r; 1367 } 1368 1369 /* 1370 * provide an empty default implementation here -- architecture 1371 * code may override this 1372 */ 1373 void __weak arch_crash_save_vmcoreinfo(void) 1374 {} 1375 1376 unsigned long __weak paddr_vmcoreinfo_note(void) 1377 { 1378 return __pa((unsigned long)(char *)&vmcoreinfo_note); 1379 } 1380 1381 static int __init crash_save_vmcoreinfo_init(void) 1382 { 1383 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 1384 VMCOREINFO_PAGESIZE(PAGE_SIZE); 1385 1386 VMCOREINFO_SYMBOL(init_uts_ns); 1387 VMCOREINFO_SYMBOL(node_online_map); 1388 #ifdef CONFIG_MMU 1389 VMCOREINFO_SYMBOL(swapper_pg_dir); 1390 #endif 1391 VMCOREINFO_SYMBOL(_stext); 1392 VMCOREINFO_SYMBOL(vmap_area_list); 1393 1394 #ifndef CONFIG_NEED_MULTIPLE_NODES 1395 VMCOREINFO_SYMBOL(mem_map); 1396 VMCOREINFO_SYMBOL(contig_page_data); 1397 #endif 1398 #ifdef CONFIG_SPARSEMEM 1399 VMCOREINFO_SYMBOL(mem_section); 1400 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 1401 VMCOREINFO_STRUCT_SIZE(mem_section); 1402 VMCOREINFO_OFFSET(mem_section, section_mem_map); 1403 #endif 1404 VMCOREINFO_STRUCT_SIZE(page); 1405 VMCOREINFO_STRUCT_SIZE(pglist_data); 1406 VMCOREINFO_STRUCT_SIZE(zone); 1407 VMCOREINFO_STRUCT_SIZE(free_area); 1408 VMCOREINFO_STRUCT_SIZE(list_head); 1409 VMCOREINFO_SIZE(nodemask_t); 1410 VMCOREINFO_OFFSET(page, flags); 1411 VMCOREINFO_OFFSET(page, _count); 1412 VMCOREINFO_OFFSET(page, mapping); 1413 VMCOREINFO_OFFSET(page, lru); 1414 VMCOREINFO_OFFSET(page, _mapcount); 1415 VMCOREINFO_OFFSET(page, private); 1416 VMCOREINFO_OFFSET(pglist_data, node_zones); 1417 VMCOREINFO_OFFSET(pglist_data, nr_zones); 1418 #ifdef CONFIG_FLAT_NODE_MEM_MAP 1419 VMCOREINFO_OFFSET(pglist_data, node_mem_map); 1420 #endif 1421 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); 1422 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); 1423 VMCOREINFO_OFFSET(pglist_data, node_id); 1424 VMCOREINFO_OFFSET(zone, free_area); 1425 VMCOREINFO_OFFSET(zone, vm_stat); 1426 VMCOREINFO_OFFSET(zone, spanned_pages); 1427 VMCOREINFO_OFFSET(free_area, free_list); 1428 VMCOREINFO_OFFSET(list_head, next); 1429 VMCOREINFO_OFFSET(list_head, prev); 1430 VMCOREINFO_OFFSET(vmap_area, va_start); 1431 VMCOREINFO_OFFSET(vmap_area, list); 1432 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); 1433 log_buf_kexec_setup(); 1434 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 1435 VMCOREINFO_NUMBER(NR_FREE_PAGES); 1436 VMCOREINFO_NUMBER(PG_lru); 1437 VMCOREINFO_NUMBER(PG_private); 1438 VMCOREINFO_NUMBER(PG_swapcache); 1439 VMCOREINFO_NUMBER(PG_slab); 1440 #ifdef CONFIG_MEMORY_FAILURE 1441 VMCOREINFO_NUMBER(PG_hwpoison); 1442 #endif 1443 VMCOREINFO_NUMBER(PG_head_mask); 1444 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 1445 #ifdef CONFIG_X86 1446 VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); 1447 #endif 1448 #ifdef CONFIG_HUGETLBFS 1449 VMCOREINFO_SYMBOL(free_huge_page); 1450 #endif 1451 1452 arch_crash_save_vmcoreinfo(); 1453 update_vmcoreinfo_note(); 1454 1455 return 0; 1456 } 1457 1458 subsys_initcall(crash_save_vmcoreinfo_init); 1459 1460 /* 1461 * Move into place and start executing a preloaded standalone 1462 * executable. If nothing was preloaded return an error. 1463 */ 1464 int kernel_kexec(void) 1465 { 1466 int error = 0; 1467 1468 if (!mutex_trylock(&kexec_mutex)) 1469 return -EBUSY; 1470 if (!kexec_image) { 1471 error = -EINVAL; 1472 goto Unlock; 1473 } 1474 1475 #ifdef CONFIG_KEXEC_JUMP 1476 if (kexec_image->preserve_context) { 1477 lock_system_sleep(); 1478 pm_prepare_console(); 1479 error = freeze_processes(); 1480 if (error) { 1481 error = -EBUSY; 1482 goto Restore_console; 1483 } 1484 suspend_console(); 1485 error = dpm_suspend_start(PMSG_FREEZE); 1486 if (error) 1487 goto Resume_console; 1488 /* At this point, dpm_suspend_start() has been called, 1489 * but *not* dpm_suspend_end(). We *must* call 1490 * dpm_suspend_end() now. Otherwise, drivers for 1491 * some devices (e.g. interrupt controllers) become 1492 * desynchronized with the actual state of the 1493 * hardware at resume time, and evil weirdness ensues. 1494 */ 1495 error = dpm_suspend_end(PMSG_FREEZE); 1496 if (error) 1497 goto Resume_devices; 1498 error = disable_nonboot_cpus(); 1499 if (error) 1500 goto Enable_cpus; 1501 local_irq_disable(); 1502 error = syscore_suspend(); 1503 if (error) 1504 goto Enable_irqs; 1505 } else 1506 #endif 1507 { 1508 kexec_in_progress = true; 1509 kernel_restart_prepare(NULL); 1510 migrate_to_reboot_cpu(); 1511 1512 /* 1513 * migrate_to_reboot_cpu() disables CPU hotplug assuming that 1514 * no further code needs to use CPU hotplug (which is true in 1515 * the reboot case). However, the kexec path depends on using 1516 * CPU hotplug again; so re-enable it here. 1517 */ 1518 cpu_hotplug_enable(); 1519 pr_emerg("Starting new kernel\n"); 1520 machine_shutdown(); 1521 } 1522 1523 machine_kexec(kexec_image); 1524 1525 #ifdef CONFIG_KEXEC_JUMP 1526 if (kexec_image->preserve_context) { 1527 syscore_resume(); 1528 Enable_irqs: 1529 local_irq_enable(); 1530 Enable_cpus: 1531 enable_nonboot_cpus(); 1532 dpm_resume_start(PMSG_RESTORE); 1533 Resume_devices: 1534 dpm_resume_end(PMSG_RESTORE); 1535 Resume_console: 1536 resume_console(); 1537 thaw_processes(); 1538 Restore_console: 1539 pm_restore_console(); 1540 unlock_system_sleep(); 1541 } 1542 #endif 1543 1544 Unlock: 1545 mutex_unlock(&kexec_mutex); 1546 return error; 1547 } 1548 1549 /* 1550 * Add and remove page tables for crashkernel memory 1551 * 1552 * Provide an empty default implementation here -- architecture 1553 * code may override this 1554 */ 1555 void __weak crash_map_reserved_pages(void) 1556 {} 1557 1558 void __weak crash_unmap_reserved_pages(void) 1559 {} 1560