1 /* 2 * kexec.c - kexec system call core code. 3 * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> 4 * 5 * This source code is licensed under the GNU General Public License, 6 * Version 2. See the file COPYING for more details. 7 */ 8 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/capability.h> 12 #include <linux/mm.h> 13 #include <linux/file.h> 14 #include <linux/slab.h> 15 #include <linux/fs.h> 16 #include <linux/kexec.h> 17 #include <linux/mutex.h> 18 #include <linux/list.h> 19 #include <linux/highmem.h> 20 #include <linux/syscalls.h> 21 #include <linux/reboot.h> 22 #include <linux/ioport.h> 23 #include <linux/hardirq.h> 24 #include <linux/elf.h> 25 #include <linux/elfcore.h> 26 #include <linux/utsname.h> 27 #include <linux/numa.h> 28 #include <linux/suspend.h> 29 #include <linux/device.h> 30 #include <linux/freezer.h> 31 #include <linux/pm.h> 32 #include <linux/cpu.h> 33 #include <linux/uaccess.h> 34 #include <linux/io.h> 35 #include <linux/console.h> 36 #include <linux/vmalloc.h> 37 #include <linux/swap.h> 38 #include <linux/syscore_ops.h> 39 #include <linux/compiler.h> 40 #include <linux/hugetlb.h> 41 42 #include <asm/page.h> 43 #include <asm/sections.h> 44 45 #include <crypto/hash.h> 46 #include <crypto/sha.h> 47 #include "kexec_internal.h" 48 49 DEFINE_MUTEX(kexec_mutex); 50 51 /* Per cpu memory for storing cpu states in case of system crash. */ 52 note_buf_t __percpu *crash_notes; 53 54 /* vmcoreinfo stuff */ 55 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; 56 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; 57 size_t vmcoreinfo_size; 58 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); 59 60 /* Flag to indicate we are going to kexec a new kernel */ 61 bool kexec_in_progress = false; 62 63 64 /* Location of the reserved area for the crash kernel */ 65 struct resource crashk_res = { 66 .name = "Crash kernel", 67 .start = 0, 68 .end = 0, 69 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 70 .desc = IORES_DESC_CRASH_KERNEL 71 }; 72 struct resource crashk_low_res = { 73 .name = "Crash kernel", 74 .start = 0, 75 .end = 0, 76 .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, 77 .desc = IORES_DESC_CRASH_KERNEL 78 }; 79 80 int kexec_should_crash(struct task_struct *p) 81 { 82 /* 83 * If crash_kexec_post_notifiers is enabled, don't run 84 * crash_kexec() here yet, which must be run after panic 85 * notifiers in panic(). 86 */ 87 if (crash_kexec_post_notifiers) 88 return 0; 89 /* 90 * There are 4 panic() calls in do_exit() path, each of which 91 * corresponds to each of these 4 conditions. 92 */ 93 if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) 94 return 1; 95 return 0; 96 } 97 98 /* 99 * When kexec transitions to the new kernel there is a one-to-one 100 * mapping between physical and virtual addresses. On processors 101 * where you can disable the MMU this is trivial, and easy. For 102 * others it is still a simple predictable page table to setup. 103 * 104 * In that environment kexec copies the new kernel to its final 105 * resting place. This means I can only support memory whose 106 * physical address can fit in an unsigned long. In particular 107 * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. 108 * If the assembly stub has more restrictive requirements 109 * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be 110 * defined more restrictively in <asm/kexec.h>. 111 * 112 * The code for the transition from the current kernel to the 113 * the new kernel is placed in the control_code_buffer, whose size 114 * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single 115 * page of memory is necessary, but some architectures require more. 116 * Because this memory must be identity mapped in the transition from 117 * virtual to physical addresses it must live in the range 118 * 0 - TASK_SIZE, as only the user space mappings are arbitrarily 119 * modifiable. 120 * 121 * The assembly stub in the control code buffer is passed a linked list 122 * of descriptor pages detailing the source pages of the new kernel, 123 * and the destination addresses of those source pages. As this data 124 * structure is not used in the context of the current OS, it must 125 * be self-contained. 126 * 127 * The code has been made to work with highmem pages and will use a 128 * destination page in its final resting place (if it happens 129 * to allocate it). The end product of this is that most of the 130 * physical address space, and most of RAM can be used. 131 * 132 * Future directions include: 133 * - allocating a page table with the control code buffer identity 134 * mapped, to simplify machine_kexec and make kexec_on_panic more 135 * reliable. 136 */ 137 138 /* 139 * KIMAGE_NO_DEST is an impossible destination address..., for 140 * allocating pages whose destination address we do not care about. 141 */ 142 #define KIMAGE_NO_DEST (-1UL) 143 144 static struct page *kimage_alloc_page(struct kimage *image, 145 gfp_t gfp_mask, 146 unsigned long dest); 147 148 int sanity_check_segment_list(struct kimage *image) 149 { 150 int result, i; 151 unsigned long nr_segments = image->nr_segments; 152 153 /* 154 * Verify we have good destination addresses. The caller is 155 * responsible for making certain we don't attempt to load 156 * the new image into invalid or reserved areas of RAM. This 157 * just verifies it is an address we can use. 158 * 159 * Since the kernel does everything in page size chunks ensure 160 * the destination addresses are page aligned. Too many 161 * special cases crop of when we don't do this. The most 162 * insidious is getting overlapping destination addresses 163 * simply because addresses are changed to page size 164 * granularity. 165 */ 166 result = -EADDRNOTAVAIL; 167 for (i = 0; i < nr_segments; i++) { 168 unsigned long mstart, mend; 169 170 mstart = image->segment[i].mem; 171 mend = mstart + image->segment[i].memsz; 172 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) 173 return result; 174 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) 175 return result; 176 } 177 178 /* Verify our destination addresses do not overlap. 179 * If we alloed overlapping destination addresses 180 * through very weird things can happen with no 181 * easy explanation as one segment stops on another. 182 */ 183 result = -EINVAL; 184 for (i = 0; i < nr_segments; i++) { 185 unsigned long mstart, mend; 186 unsigned long j; 187 188 mstart = image->segment[i].mem; 189 mend = mstart + image->segment[i].memsz; 190 for (j = 0; j < i; j++) { 191 unsigned long pstart, pend; 192 193 pstart = image->segment[j].mem; 194 pend = pstart + image->segment[j].memsz; 195 /* Do the segments overlap ? */ 196 if ((mend > pstart) && (mstart < pend)) 197 return result; 198 } 199 } 200 201 /* Ensure our buffer sizes are strictly less than 202 * our memory sizes. This should always be the case, 203 * and it is easier to check up front than to be surprised 204 * later on. 205 */ 206 result = -EINVAL; 207 for (i = 0; i < nr_segments; i++) { 208 if (image->segment[i].bufsz > image->segment[i].memsz) 209 return result; 210 } 211 212 /* 213 * Verify we have good destination addresses. Normally 214 * the caller is responsible for making certain we don't 215 * attempt to load the new image into invalid or reserved 216 * areas of RAM. But crash kernels are preloaded into a 217 * reserved area of ram. We must ensure the addresses 218 * are in the reserved area otherwise preloading the 219 * kernel could corrupt things. 220 */ 221 222 if (image->type == KEXEC_TYPE_CRASH) { 223 result = -EADDRNOTAVAIL; 224 for (i = 0; i < nr_segments; i++) { 225 unsigned long mstart, mend; 226 227 mstart = image->segment[i].mem; 228 mend = mstart + image->segment[i].memsz - 1; 229 /* Ensure we are within the crash kernel limits */ 230 if ((mstart < crashk_res.start) || 231 (mend > crashk_res.end)) 232 return result; 233 } 234 } 235 236 return 0; 237 } 238 239 struct kimage *do_kimage_alloc_init(void) 240 { 241 struct kimage *image; 242 243 /* Allocate a controlling structure */ 244 image = kzalloc(sizeof(*image), GFP_KERNEL); 245 if (!image) 246 return NULL; 247 248 image->head = 0; 249 image->entry = &image->head; 250 image->last_entry = &image->head; 251 image->control_page = ~0; /* By default this does not apply */ 252 image->type = KEXEC_TYPE_DEFAULT; 253 254 /* Initialize the list of control pages */ 255 INIT_LIST_HEAD(&image->control_pages); 256 257 /* Initialize the list of destination pages */ 258 INIT_LIST_HEAD(&image->dest_pages); 259 260 /* Initialize the list of unusable pages */ 261 INIT_LIST_HEAD(&image->unusable_pages); 262 263 return image; 264 } 265 266 int kimage_is_destination_range(struct kimage *image, 267 unsigned long start, 268 unsigned long end) 269 { 270 unsigned long i; 271 272 for (i = 0; i < image->nr_segments; i++) { 273 unsigned long mstart, mend; 274 275 mstart = image->segment[i].mem; 276 mend = mstart + image->segment[i].memsz; 277 if ((end > mstart) && (start < mend)) 278 return 1; 279 } 280 281 return 0; 282 } 283 284 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) 285 { 286 struct page *pages; 287 288 pages = alloc_pages(gfp_mask, order); 289 if (pages) { 290 unsigned int count, i; 291 292 pages->mapping = NULL; 293 set_page_private(pages, order); 294 count = 1 << order; 295 for (i = 0; i < count; i++) 296 SetPageReserved(pages + i); 297 } 298 299 return pages; 300 } 301 302 static void kimage_free_pages(struct page *page) 303 { 304 unsigned int order, count, i; 305 306 order = page_private(page); 307 count = 1 << order; 308 for (i = 0; i < count; i++) 309 ClearPageReserved(page + i); 310 __free_pages(page, order); 311 } 312 313 void kimage_free_page_list(struct list_head *list) 314 { 315 struct page *page, *next; 316 317 list_for_each_entry_safe(page, next, list, lru) { 318 list_del(&page->lru); 319 kimage_free_pages(page); 320 } 321 } 322 323 static struct page *kimage_alloc_normal_control_pages(struct kimage *image, 324 unsigned int order) 325 { 326 /* Control pages are special, they are the intermediaries 327 * that are needed while we copy the rest of the pages 328 * to their final resting place. As such they must 329 * not conflict with either the destination addresses 330 * or memory the kernel is already using. 331 * 332 * The only case where we really need more than one of 333 * these are for architectures where we cannot disable 334 * the MMU and must instead generate an identity mapped 335 * page table for all of the memory. 336 * 337 * At worst this runs in O(N) of the image size. 338 */ 339 struct list_head extra_pages; 340 struct page *pages; 341 unsigned int count; 342 343 count = 1 << order; 344 INIT_LIST_HEAD(&extra_pages); 345 346 /* Loop while I can allocate a page and the page allocated 347 * is a destination page. 348 */ 349 do { 350 unsigned long pfn, epfn, addr, eaddr; 351 352 pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); 353 if (!pages) 354 break; 355 pfn = page_to_pfn(pages); 356 epfn = pfn + count; 357 addr = pfn << PAGE_SHIFT; 358 eaddr = epfn << PAGE_SHIFT; 359 if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || 360 kimage_is_destination_range(image, addr, eaddr)) { 361 list_add(&pages->lru, &extra_pages); 362 pages = NULL; 363 } 364 } while (!pages); 365 366 if (pages) { 367 /* Remember the allocated page... */ 368 list_add(&pages->lru, &image->control_pages); 369 370 /* Because the page is already in it's destination 371 * location we will never allocate another page at 372 * that address. Therefore kimage_alloc_pages 373 * will not return it (again) and we don't need 374 * to give it an entry in image->segment[]. 375 */ 376 } 377 /* Deal with the destination pages I have inadvertently allocated. 378 * 379 * Ideally I would convert multi-page allocations into single 380 * page allocations, and add everything to image->dest_pages. 381 * 382 * For now it is simpler to just free the pages. 383 */ 384 kimage_free_page_list(&extra_pages); 385 386 return pages; 387 } 388 389 static struct page *kimage_alloc_crash_control_pages(struct kimage *image, 390 unsigned int order) 391 { 392 /* Control pages are special, they are the intermediaries 393 * that are needed while we copy the rest of the pages 394 * to their final resting place. As such they must 395 * not conflict with either the destination addresses 396 * or memory the kernel is already using. 397 * 398 * Control pages are also the only pags we must allocate 399 * when loading a crash kernel. All of the other pages 400 * are specified by the segments and we just memcpy 401 * into them directly. 402 * 403 * The only case where we really need more than one of 404 * these are for architectures where we cannot disable 405 * the MMU and must instead generate an identity mapped 406 * page table for all of the memory. 407 * 408 * Given the low demand this implements a very simple 409 * allocator that finds the first hole of the appropriate 410 * size in the reserved memory region, and allocates all 411 * of the memory up to and including the hole. 412 */ 413 unsigned long hole_start, hole_end, size; 414 struct page *pages; 415 416 pages = NULL; 417 size = (1 << order) << PAGE_SHIFT; 418 hole_start = (image->control_page + (size - 1)) & ~(size - 1); 419 hole_end = hole_start + size - 1; 420 while (hole_end <= crashk_res.end) { 421 unsigned long i; 422 423 if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) 424 break; 425 /* See if I overlap any of the segments */ 426 for (i = 0; i < image->nr_segments; i++) { 427 unsigned long mstart, mend; 428 429 mstart = image->segment[i].mem; 430 mend = mstart + image->segment[i].memsz - 1; 431 if ((hole_end >= mstart) && (hole_start <= mend)) { 432 /* Advance the hole to the end of the segment */ 433 hole_start = (mend + (size - 1)) & ~(size - 1); 434 hole_end = hole_start + size - 1; 435 break; 436 } 437 } 438 /* If I don't overlap any segments I have found my hole! */ 439 if (i == image->nr_segments) { 440 pages = pfn_to_page(hole_start >> PAGE_SHIFT); 441 image->control_page = hole_end; 442 break; 443 } 444 } 445 446 return pages; 447 } 448 449 450 struct page *kimage_alloc_control_pages(struct kimage *image, 451 unsigned int order) 452 { 453 struct page *pages = NULL; 454 455 switch (image->type) { 456 case KEXEC_TYPE_DEFAULT: 457 pages = kimage_alloc_normal_control_pages(image, order); 458 break; 459 case KEXEC_TYPE_CRASH: 460 pages = kimage_alloc_crash_control_pages(image, order); 461 break; 462 } 463 464 return pages; 465 } 466 467 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) 468 { 469 if (*image->entry != 0) 470 image->entry++; 471 472 if (image->entry == image->last_entry) { 473 kimage_entry_t *ind_page; 474 struct page *page; 475 476 page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); 477 if (!page) 478 return -ENOMEM; 479 480 ind_page = page_address(page); 481 *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; 482 image->entry = ind_page; 483 image->last_entry = ind_page + 484 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); 485 } 486 *image->entry = entry; 487 image->entry++; 488 *image->entry = 0; 489 490 return 0; 491 } 492 493 static int kimage_set_destination(struct kimage *image, 494 unsigned long destination) 495 { 496 int result; 497 498 destination &= PAGE_MASK; 499 result = kimage_add_entry(image, destination | IND_DESTINATION); 500 501 return result; 502 } 503 504 505 static int kimage_add_page(struct kimage *image, unsigned long page) 506 { 507 int result; 508 509 page &= PAGE_MASK; 510 result = kimage_add_entry(image, page | IND_SOURCE); 511 512 return result; 513 } 514 515 516 static void kimage_free_extra_pages(struct kimage *image) 517 { 518 /* Walk through and free any extra destination pages I may have */ 519 kimage_free_page_list(&image->dest_pages); 520 521 /* Walk through and free any unusable pages I have cached */ 522 kimage_free_page_list(&image->unusable_pages); 523 524 } 525 void kimage_terminate(struct kimage *image) 526 { 527 if (*image->entry != 0) 528 image->entry++; 529 530 *image->entry = IND_DONE; 531 } 532 533 #define for_each_kimage_entry(image, ptr, entry) \ 534 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ 535 ptr = (entry & IND_INDIRECTION) ? \ 536 phys_to_virt((entry & PAGE_MASK)) : ptr + 1) 537 538 static void kimage_free_entry(kimage_entry_t entry) 539 { 540 struct page *page; 541 542 page = pfn_to_page(entry >> PAGE_SHIFT); 543 kimage_free_pages(page); 544 } 545 546 void kimage_free(struct kimage *image) 547 { 548 kimage_entry_t *ptr, entry; 549 kimage_entry_t ind = 0; 550 551 if (!image) 552 return; 553 554 kimage_free_extra_pages(image); 555 for_each_kimage_entry(image, ptr, entry) { 556 if (entry & IND_INDIRECTION) { 557 /* Free the previous indirection page */ 558 if (ind & IND_INDIRECTION) 559 kimage_free_entry(ind); 560 /* Save this indirection page until we are 561 * done with it. 562 */ 563 ind = entry; 564 } else if (entry & IND_SOURCE) 565 kimage_free_entry(entry); 566 } 567 /* Free the final indirection page */ 568 if (ind & IND_INDIRECTION) 569 kimage_free_entry(ind); 570 571 /* Handle any machine specific cleanup */ 572 machine_kexec_cleanup(image); 573 574 /* Free the kexec control pages... */ 575 kimage_free_page_list(&image->control_pages); 576 577 /* 578 * Free up any temporary buffers allocated. This might hit if 579 * error occurred much later after buffer allocation. 580 */ 581 if (image->file_mode) 582 kimage_file_post_load_cleanup(image); 583 584 kfree(image); 585 } 586 587 static kimage_entry_t *kimage_dst_used(struct kimage *image, 588 unsigned long page) 589 { 590 kimage_entry_t *ptr, entry; 591 unsigned long destination = 0; 592 593 for_each_kimage_entry(image, ptr, entry) { 594 if (entry & IND_DESTINATION) 595 destination = entry & PAGE_MASK; 596 else if (entry & IND_SOURCE) { 597 if (page == destination) 598 return ptr; 599 destination += PAGE_SIZE; 600 } 601 } 602 603 return NULL; 604 } 605 606 static struct page *kimage_alloc_page(struct kimage *image, 607 gfp_t gfp_mask, 608 unsigned long destination) 609 { 610 /* 611 * Here we implement safeguards to ensure that a source page 612 * is not copied to its destination page before the data on 613 * the destination page is no longer useful. 614 * 615 * To do this we maintain the invariant that a source page is 616 * either its own destination page, or it is not a 617 * destination page at all. 618 * 619 * That is slightly stronger than required, but the proof 620 * that no problems will not occur is trivial, and the 621 * implementation is simply to verify. 622 * 623 * When allocating all pages normally this algorithm will run 624 * in O(N) time, but in the worst case it will run in O(N^2) 625 * time. If the runtime is a problem the data structures can 626 * be fixed. 627 */ 628 struct page *page; 629 unsigned long addr; 630 631 /* 632 * Walk through the list of destination pages, and see if I 633 * have a match. 634 */ 635 list_for_each_entry(page, &image->dest_pages, lru) { 636 addr = page_to_pfn(page) << PAGE_SHIFT; 637 if (addr == destination) { 638 list_del(&page->lru); 639 return page; 640 } 641 } 642 page = NULL; 643 while (1) { 644 kimage_entry_t *old; 645 646 /* Allocate a page, if we run out of memory give up */ 647 page = kimage_alloc_pages(gfp_mask, 0); 648 if (!page) 649 return NULL; 650 /* If the page cannot be used file it away */ 651 if (page_to_pfn(page) > 652 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { 653 list_add(&page->lru, &image->unusable_pages); 654 continue; 655 } 656 addr = page_to_pfn(page) << PAGE_SHIFT; 657 658 /* If it is the destination page we want use it */ 659 if (addr == destination) 660 break; 661 662 /* If the page is not a destination page use it */ 663 if (!kimage_is_destination_range(image, addr, 664 addr + PAGE_SIZE)) 665 break; 666 667 /* 668 * I know that the page is someones destination page. 669 * See if there is already a source page for this 670 * destination page. And if so swap the source pages. 671 */ 672 old = kimage_dst_used(image, addr); 673 if (old) { 674 /* If so move it */ 675 unsigned long old_addr; 676 struct page *old_page; 677 678 old_addr = *old & PAGE_MASK; 679 old_page = pfn_to_page(old_addr >> PAGE_SHIFT); 680 copy_highpage(page, old_page); 681 *old = addr | (*old & ~PAGE_MASK); 682 683 /* The old page I have found cannot be a 684 * destination page, so return it if it's 685 * gfp_flags honor the ones passed in. 686 */ 687 if (!(gfp_mask & __GFP_HIGHMEM) && 688 PageHighMem(old_page)) { 689 kimage_free_pages(old_page); 690 continue; 691 } 692 addr = old_addr; 693 page = old_page; 694 break; 695 } 696 /* Place the page on the destination list, to be used later */ 697 list_add(&page->lru, &image->dest_pages); 698 } 699 700 return page; 701 } 702 703 static int kimage_load_normal_segment(struct kimage *image, 704 struct kexec_segment *segment) 705 { 706 unsigned long maddr; 707 size_t ubytes, mbytes; 708 int result; 709 unsigned char __user *buf = NULL; 710 unsigned char *kbuf = NULL; 711 712 result = 0; 713 if (image->file_mode) 714 kbuf = segment->kbuf; 715 else 716 buf = segment->buf; 717 ubytes = segment->bufsz; 718 mbytes = segment->memsz; 719 maddr = segment->mem; 720 721 result = kimage_set_destination(image, maddr); 722 if (result < 0) 723 goto out; 724 725 while (mbytes) { 726 struct page *page; 727 char *ptr; 728 size_t uchunk, mchunk; 729 730 page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); 731 if (!page) { 732 result = -ENOMEM; 733 goto out; 734 } 735 result = kimage_add_page(image, page_to_pfn(page) 736 << PAGE_SHIFT); 737 if (result < 0) 738 goto out; 739 740 ptr = kmap(page); 741 /* Start with a clear page */ 742 clear_page(ptr); 743 ptr += maddr & ~PAGE_MASK; 744 mchunk = min_t(size_t, mbytes, 745 PAGE_SIZE - (maddr & ~PAGE_MASK)); 746 uchunk = min(ubytes, mchunk); 747 748 /* For file based kexec, source pages are in kernel memory */ 749 if (image->file_mode) 750 memcpy(ptr, kbuf, uchunk); 751 else 752 result = copy_from_user(ptr, buf, uchunk); 753 kunmap(page); 754 if (result) { 755 result = -EFAULT; 756 goto out; 757 } 758 ubytes -= uchunk; 759 maddr += mchunk; 760 if (image->file_mode) 761 kbuf += mchunk; 762 else 763 buf += mchunk; 764 mbytes -= mchunk; 765 } 766 out: 767 return result; 768 } 769 770 static int kimage_load_crash_segment(struct kimage *image, 771 struct kexec_segment *segment) 772 { 773 /* For crash dumps kernels we simply copy the data from 774 * user space to it's destination. 775 * We do things a page at a time for the sake of kmap. 776 */ 777 unsigned long maddr; 778 size_t ubytes, mbytes; 779 int result; 780 unsigned char __user *buf = NULL; 781 unsigned char *kbuf = NULL; 782 783 result = 0; 784 if (image->file_mode) 785 kbuf = segment->kbuf; 786 else 787 buf = segment->buf; 788 ubytes = segment->bufsz; 789 mbytes = segment->memsz; 790 maddr = segment->mem; 791 while (mbytes) { 792 struct page *page; 793 char *ptr; 794 size_t uchunk, mchunk; 795 796 page = pfn_to_page(maddr >> PAGE_SHIFT); 797 if (!page) { 798 result = -ENOMEM; 799 goto out; 800 } 801 ptr = kmap(page); 802 ptr += maddr & ~PAGE_MASK; 803 mchunk = min_t(size_t, mbytes, 804 PAGE_SIZE - (maddr & ~PAGE_MASK)); 805 uchunk = min(ubytes, mchunk); 806 if (mchunk > uchunk) { 807 /* Zero the trailing part of the page */ 808 memset(ptr + uchunk, 0, mchunk - uchunk); 809 } 810 811 /* For file based kexec, source pages are in kernel memory */ 812 if (image->file_mode) 813 memcpy(ptr, kbuf, uchunk); 814 else 815 result = copy_from_user(ptr, buf, uchunk); 816 kexec_flush_icache_page(page); 817 kunmap(page); 818 if (result) { 819 result = -EFAULT; 820 goto out; 821 } 822 ubytes -= uchunk; 823 maddr += mchunk; 824 if (image->file_mode) 825 kbuf += mchunk; 826 else 827 buf += mchunk; 828 mbytes -= mchunk; 829 } 830 out: 831 return result; 832 } 833 834 int kimage_load_segment(struct kimage *image, 835 struct kexec_segment *segment) 836 { 837 int result = -ENOMEM; 838 839 switch (image->type) { 840 case KEXEC_TYPE_DEFAULT: 841 result = kimage_load_normal_segment(image, segment); 842 break; 843 case KEXEC_TYPE_CRASH: 844 result = kimage_load_crash_segment(image, segment); 845 break; 846 } 847 848 return result; 849 } 850 851 struct kimage *kexec_image; 852 struct kimage *kexec_crash_image; 853 int kexec_load_disabled; 854 855 /* 856 * No panic_cpu check version of crash_kexec(). This function is called 857 * only when panic_cpu holds the current CPU number; this is the only CPU 858 * which processes crash_kexec routines. 859 */ 860 void __crash_kexec(struct pt_regs *regs) 861 { 862 /* Take the kexec_mutex here to prevent sys_kexec_load 863 * running on one cpu from replacing the crash kernel 864 * we are using after a panic on a different cpu. 865 * 866 * If the crash kernel was not located in a fixed area 867 * of memory the xchg(&kexec_crash_image) would be 868 * sufficient. But since I reuse the memory... 869 */ 870 if (mutex_trylock(&kexec_mutex)) { 871 if (kexec_crash_image) { 872 struct pt_regs fixed_regs; 873 874 crash_setup_regs(&fixed_regs, regs); 875 crash_save_vmcoreinfo(); 876 machine_crash_shutdown(&fixed_regs); 877 machine_kexec(kexec_crash_image); 878 } 879 mutex_unlock(&kexec_mutex); 880 } 881 } 882 883 void crash_kexec(struct pt_regs *regs) 884 { 885 int old_cpu, this_cpu; 886 887 /* 888 * Only one CPU is allowed to execute the crash_kexec() code as with 889 * panic(). Otherwise parallel calls of panic() and crash_kexec() 890 * may stop each other. To exclude them, we use panic_cpu here too. 891 */ 892 this_cpu = raw_smp_processor_id(); 893 old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); 894 if (old_cpu == PANIC_CPU_INVALID) { 895 /* This is the 1st CPU which comes here, so go ahead. */ 896 printk_nmi_flush_on_panic(); 897 __crash_kexec(regs); 898 899 /* 900 * Reset panic_cpu to allow another panic()/crash_kexec() 901 * call. 902 */ 903 atomic_set(&panic_cpu, PANIC_CPU_INVALID); 904 } 905 } 906 907 size_t crash_get_memory_size(void) 908 { 909 size_t size = 0; 910 911 mutex_lock(&kexec_mutex); 912 if (crashk_res.end != crashk_res.start) 913 size = resource_size(&crashk_res); 914 mutex_unlock(&kexec_mutex); 915 return size; 916 } 917 918 void __weak crash_free_reserved_phys_range(unsigned long begin, 919 unsigned long end) 920 { 921 unsigned long addr; 922 923 for (addr = begin; addr < end; addr += PAGE_SIZE) 924 free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); 925 } 926 927 int crash_shrink_memory(unsigned long new_size) 928 { 929 int ret = 0; 930 unsigned long start, end; 931 unsigned long old_size; 932 struct resource *ram_res; 933 934 mutex_lock(&kexec_mutex); 935 936 if (kexec_crash_image) { 937 ret = -ENOENT; 938 goto unlock; 939 } 940 start = crashk_res.start; 941 end = crashk_res.end; 942 old_size = (end == 0) ? 0 : end - start + 1; 943 if (new_size >= old_size) { 944 ret = (new_size == old_size) ? 0 : -EINVAL; 945 goto unlock; 946 } 947 948 ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); 949 if (!ram_res) { 950 ret = -ENOMEM; 951 goto unlock; 952 } 953 954 start = roundup(start, KEXEC_CRASH_MEM_ALIGN); 955 end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); 956 957 crash_free_reserved_phys_range(end, crashk_res.end); 958 959 if ((start == end) && (crashk_res.parent != NULL)) 960 release_resource(&crashk_res); 961 962 ram_res->start = end; 963 ram_res->end = crashk_res.end; 964 ram_res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; 965 ram_res->name = "System RAM"; 966 967 crashk_res.end = end - 1; 968 969 insert_resource(&iomem_resource, ram_res); 970 971 unlock: 972 mutex_unlock(&kexec_mutex); 973 return ret; 974 } 975 976 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, 977 size_t data_len) 978 { 979 struct elf_note note; 980 981 note.n_namesz = strlen(name) + 1; 982 note.n_descsz = data_len; 983 note.n_type = type; 984 memcpy(buf, ¬e, sizeof(note)); 985 buf += (sizeof(note) + 3)/4; 986 memcpy(buf, name, note.n_namesz); 987 buf += (note.n_namesz + 3)/4; 988 memcpy(buf, data, note.n_descsz); 989 buf += (note.n_descsz + 3)/4; 990 991 return buf; 992 } 993 994 static void final_note(u32 *buf) 995 { 996 struct elf_note note; 997 998 note.n_namesz = 0; 999 note.n_descsz = 0; 1000 note.n_type = 0; 1001 memcpy(buf, ¬e, sizeof(note)); 1002 } 1003 1004 void crash_save_cpu(struct pt_regs *regs, int cpu) 1005 { 1006 struct elf_prstatus prstatus; 1007 u32 *buf; 1008 1009 if ((cpu < 0) || (cpu >= nr_cpu_ids)) 1010 return; 1011 1012 /* Using ELF notes here is opportunistic. 1013 * I need a well defined structure format 1014 * for the data I pass, and I need tags 1015 * on the data to indicate what information I have 1016 * squirrelled away. ELF notes happen to provide 1017 * all of that, so there is no need to invent something new. 1018 */ 1019 buf = (u32 *)per_cpu_ptr(crash_notes, cpu); 1020 if (!buf) 1021 return; 1022 memset(&prstatus, 0, sizeof(prstatus)); 1023 prstatus.pr_pid = current->pid; 1024 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); 1025 buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, 1026 &prstatus, sizeof(prstatus)); 1027 final_note(buf); 1028 } 1029 1030 static int __init crash_notes_memory_init(void) 1031 { 1032 /* Allocate memory for saving cpu registers. */ 1033 size_t size, align; 1034 1035 /* 1036 * crash_notes could be allocated across 2 vmalloc pages when percpu 1037 * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc 1038 * pages are also on 2 continuous physical pages. In this case the 1039 * 2nd part of crash_notes in 2nd page could be lost since only the 1040 * starting address and size of crash_notes are exported through sysfs. 1041 * Here round up the size of crash_notes to the nearest power of two 1042 * and pass it to __alloc_percpu as align value. This can make sure 1043 * crash_notes is allocated inside one physical page. 1044 */ 1045 size = sizeof(note_buf_t); 1046 align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); 1047 1048 /* 1049 * Break compile if size is bigger than PAGE_SIZE since crash_notes 1050 * definitely will be in 2 pages with that. 1051 */ 1052 BUILD_BUG_ON(size > PAGE_SIZE); 1053 1054 crash_notes = __alloc_percpu(size, align); 1055 if (!crash_notes) { 1056 pr_warn("Memory allocation for saving cpu register states failed\n"); 1057 return -ENOMEM; 1058 } 1059 return 0; 1060 } 1061 subsys_initcall(crash_notes_memory_init); 1062 1063 1064 /* 1065 * parsing the "crashkernel" commandline 1066 * 1067 * this code is intended to be called from architecture specific code 1068 */ 1069 1070 1071 /* 1072 * This function parses command lines in the format 1073 * 1074 * crashkernel=ramsize-range:size[,...][@offset] 1075 * 1076 * The function returns 0 on success and -EINVAL on failure. 1077 */ 1078 static int __init parse_crashkernel_mem(char *cmdline, 1079 unsigned long long system_ram, 1080 unsigned long long *crash_size, 1081 unsigned long long *crash_base) 1082 { 1083 char *cur = cmdline, *tmp; 1084 1085 /* for each entry of the comma-separated list */ 1086 do { 1087 unsigned long long start, end = ULLONG_MAX, size; 1088 1089 /* get the start of the range */ 1090 start = memparse(cur, &tmp); 1091 if (cur == tmp) { 1092 pr_warn("crashkernel: Memory value expected\n"); 1093 return -EINVAL; 1094 } 1095 cur = tmp; 1096 if (*cur != '-') { 1097 pr_warn("crashkernel: '-' expected\n"); 1098 return -EINVAL; 1099 } 1100 cur++; 1101 1102 /* if no ':' is here, than we read the end */ 1103 if (*cur != ':') { 1104 end = memparse(cur, &tmp); 1105 if (cur == tmp) { 1106 pr_warn("crashkernel: Memory value expected\n"); 1107 return -EINVAL; 1108 } 1109 cur = tmp; 1110 if (end <= start) { 1111 pr_warn("crashkernel: end <= start\n"); 1112 return -EINVAL; 1113 } 1114 } 1115 1116 if (*cur != ':') { 1117 pr_warn("crashkernel: ':' expected\n"); 1118 return -EINVAL; 1119 } 1120 cur++; 1121 1122 size = memparse(cur, &tmp); 1123 if (cur == tmp) { 1124 pr_warn("Memory value expected\n"); 1125 return -EINVAL; 1126 } 1127 cur = tmp; 1128 if (size >= system_ram) { 1129 pr_warn("crashkernel: invalid size\n"); 1130 return -EINVAL; 1131 } 1132 1133 /* match ? */ 1134 if (system_ram >= start && system_ram < end) { 1135 *crash_size = size; 1136 break; 1137 } 1138 } while (*cur++ == ','); 1139 1140 if (*crash_size > 0) { 1141 while (*cur && *cur != ' ' && *cur != '@') 1142 cur++; 1143 if (*cur == '@') { 1144 cur++; 1145 *crash_base = memparse(cur, &tmp); 1146 if (cur == tmp) { 1147 pr_warn("Memory value expected after '@'\n"); 1148 return -EINVAL; 1149 } 1150 } 1151 } 1152 1153 return 0; 1154 } 1155 1156 /* 1157 * That function parses "simple" (old) crashkernel command lines like 1158 * 1159 * crashkernel=size[@offset] 1160 * 1161 * It returns 0 on success and -EINVAL on failure. 1162 */ 1163 static int __init parse_crashkernel_simple(char *cmdline, 1164 unsigned long long *crash_size, 1165 unsigned long long *crash_base) 1166 { 1167 char *cur = cmdline; 1168 1169 *crash_size = memparse(cmdline, &cur); 1170 if (cmdline == cur) { 1171 pr_warn("crashkernel: memory value expected\n"); 1172 return -EINVAL; 1173 } 1174 1175 if (*cur == '@') 1176 *crash_base = memparse(cur+1, &cur); 1177 else if (*cur != ' ' && *cur != '\0') { 1178 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 1179 return -EINVAL; 1180 } 1181 1182 return 0; 1183 } 1184 1185 #define SUFFIX_HIGH 0 1186 #define SUFFIX_LOW 1 1187 #define SUFFIX_NULL 2 1188 static __initdata char *suffix_tbl[] = { 1189 [SUFFIX_HIGH] = ",high", 1190 [SUFFIX_LOW] = ",low", 1191 [SUFFIX_NULL] = NULL, 1192 }; 1193 1194 /* 1195 * That function parses "suffix" crashkernel command lines like 1196 * 1197 * crashkernel=size,[high|low] 1198 * 1199 * It returns 0 on success and -EINVAL on failure. 1200 */ 1201 static int __init parse_crashkernel_suffix(char *cmdline, 1202 unsigned long long *crash_size, 1203 const char *suffix) 1204 { 1205 char *cur = cmdline; 1206 1207 *crash_size = memparse(cmdline, &cur); 1208 if (cmdline == cur) { 1209 pr_warn("crashkernel: memory value expected\n"); 1210 return -EINVAL; 1211 } 1212 1213 /* check with suffix */ 1214 if (strncmp(cur, suffix, strlen(suffix))) { 1215 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 1216 return -EINVAL; 1217 } 1218 cur += strlen(suffix); 1219 if (*cur != ' ' && *cur != '\0') { 1220 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 1221 return -EINVAL; 1222 } 1223 1224 return 0; 1225 } 1226 1227 static __init char *get_last_crashkernel(char *cmdline, 1228 const char *name, 1229 const char *suffix) 1230 { 1231 char *p = cmdline, *ck_cmdline = NULL; 1232 1233 /* find crashkernel and use the last one if there are more */ 1234 p = strstr(p, name); 1235 while (p) { 1236 char *end_p = strchr(p, ' '); 1237 char *q; 1238 1239 if (!end_p) 1240 end_p = p + strlen(p); 1241 1242 if (!suffix) { 1243 int i; 1244 1245 /* skip the one with any known suffix */ 1246 for (i = 0; suffix_tbl[i]; i++) { 1247 q = end_p - strlen(suffix_tbl[i]); 1248 if (!strncmp(q, suffix_tbl[i], 1249 strlen(suffix_tbl[i]))) 1250 goto next; 1251 } 1252 ck_cmdline = p; 1253 } else { 1254 q = end_p - strlen(suffix); 1255 if (!strncmp(q, suffix, strlen(suffix))) 1256 ck_cmdline = p; 1257 } 1258 next: 1259 p = strstr(p+1, name); 1260 } 1261 1262 if (!ck_cmdline) 1263 return NULL; 1264 1265 return ck_cmdline; 1266 } 1267 1268 static int __init __parse_crashkernel(char *cmdline, 1269 unsigned long long system_ram, 1270 unsigned long long *crash_size, 1271 unsigned long long *crash_base, 1272 const char *name, 1273 const char *suffix) 1274 { 1275 char *first_colon, *first_space; 1276 char *ck_cmdline; 1277 1278 BUG_ON(!crash_size || !crash_base); 1279 *crash_size = 0; 1280 *crash_base = 0; 1281 1282 ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 1283 1284 if (!ck_cmdline) 1285 return -EINVAL; 1286 1287 ck_cmdline += strlen(name); 1288 1289 if (suffix) 1290 return parse_crashkernel_suffix(ck_cmdline, crash_size, 1291 suffix); 1292 /* 1293 * if the commandline contains a ':', then that's the extended 1294 * syntax -- if not, it must be the classic syntax 1295 */ 1296 first_colon = strchr(ck_cmdline, ':'); 1297 first_space = strchr(ck_cmdline, ' '); 1298 if (first_colon && (!first_space || first_colon < first_space)) 1299 return parse_crashkernel_mem(ck_cmdline, system_ram, 1300 crash_size, crash_base); 1301 1302 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 1303 } 1304 1305 /* 1306 * That function is the entry point for command line parsing and should be 1307 * called from the arch-specific code. 1308 */ 1309 int __init parse_crashkernel(char *cmdline, 1310 unsigned long long system_ram, 1311 unsigned long long *crash_size, 1312 unsigned long long *crash_base) 1313 { 1314 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1315 "crashkernel=", NULL); 1316 } 1317 1318 int __init parse_crashkernel_high(char *cmdline, 1319 unsigned long long system_ram, 1320 unsigned long long *crash_size, 1321 unsigned long long *crash_base) 1322 { 1323 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1324 "crashkernel=", suffix_tbl[SUFFIX_HIGH]); 1325 } 1326 1327 int __init parse_crashkernel_low(char *cmdline, 1328 unsigned long long system_ram, 1329 unsigned long long *crash_size, 1330 unsigned long long *crash_base) 1331 { 1332 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1333 "crashkernel=", suffix_tbl[SUFFIX_LOW]); 1334 } 1335 1336 static void update_vmcoreinfo_note(void) 1337 { 1338 u32 *buf = vmcoreinfo_note; 1339 1340 if (!vmcoreinfo_size) 1341 return; 1342 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 1343 vmcoreinfo_size); 1344 final_note(buf); 1345 } 1346 1347 void crash_save_vmcoreinfo(void) 1348 { 1349 vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); 1350 update_vmcoreinfo_note(); 1351 } 1352 1353 void vmcoreinfo_append_str(const char *fmt, ...) 1354 { 1355 va_list args; 1356 char buf[0x50]; 1357 size_t r; 1358 1359 va_start(args, fmt); 1360 r = vscnprintf(buf, sizeof(buf), fmt, args); 1361 va_end(args); 1362 1363 r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); 1364 1365 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 1366 1367 vmcoreinfo_size += r; 1368 } 1369 1370 /* 1371 * provide an empty default implementation here -- architecture 1372 * code may override this 1373 */ 1374 void __weak arch_crash_save_vmcoreinfo(void) 1375 {} 1376 1377 unsigned long __weak paddr_vmcoreinfo_note(void) 1378 { 1379 return __pa((unsigned long)(char *)&vmcoreinfo_note); 1380 } 1381 1382 static int __init crash_save_vmcoreinfo_init(void) 1383 { 1384 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 1385 VMCOREINFO_PAGESIZE(PAGE_SIZE); 1386 1387 VMCOREINFO_SYMBOL(init_uts_ns); 1388 VMCOREINFO_SYMBOL(node_online_map); 1389 #ifdef CONFIG_MMU 1390 VMCOREINFO_SYMBOL(swapper_pg_dir); 1391 #endif 1392 VMCOREINFO_SYMBOL(_stext); 1393 VMCOREINFO_SYMBOL(vmap_area_list); 1394 1395 #ifndef CONFIG_NEED_MULTIPLE_NODES 1396 VMCOREINFO_SYMBOL(mem_map); 1397 VMCOREINFO_SYMBOL(contig_page_data); 1398 #endif 1399 #ifdef CONFIG_SPARSEMEM 1400 VMCOREINFO_SYMBOL(mem_section); 1401 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 1402 VMCOREINFO_STRUCT_SIZE(mem_section); 1403 VMCOREINFO_OFFSET(mem_section, section_mem_map); 1404 #endif 1405 VMCOREINFO_STRUCT_SIZE(page); 1406 VMCOREINFO_STRUCT_SIZE(pglist_data); 1407 VMCOREINFO_STRUCT_SIZE(zone); 1408 VMCOREINFO_STRUCT_SIZE(free_area); 1409 VMCOREINFO_STRUCT_SIZE(list_head); 1410 VMCOREINFO_SIZE(nodemask_t); 1411 VMCOREINFO_OFFSET(page, flags); 1412 VMCOREINFO_OFFSET(page, _refcount); 1413 VMCOREINFO_OFFSET(page, mapping); 1414 VMCOREINFO_OFFSET(page, lru); 1415 VMCOREINFO_OFFSET(page, _mapcount); 1416 VMCOREINFO_OFFSET(page, private); 1417 VMCOREINFO_OFFSET(page, compound_dtor); 1418 VMCOREINFO_OFFSET(page, compound_order); 1419 VMCOREINFO_OFFSET(page, compound_head); 1420 VMCOREINFO_OFFSET(pglist_data, node_zones); 1421 VMCOREINFO_OFFSET(pglist_data, nr_zones); 1422 #ifdef CONFIG_FLAT_NODE_MEM_MAP 1423 VMCOREINFO_OFFSET(pglist_data, node_mem_map); 1424 #endif 1425 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); 1426 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); 1427 VMCOREINFO_OFFSET(pglist_data, node_id); 1428 VMCOREINFO_OFFSET(zone, free_area); 1429 VMCOREINFO_OFFSET(zone, vm_stat); 1430 VMCOREINFO_OFFSET(zone, spanned_pages); 1431 VMCOREINFO_OFFSET(free_area, free_list); 1432 VMCOREINFO_OFFSET(list_head, next); 1433 VMCOREINFO_OFFSET(list_head, prev); 1434 VMCOREINFO_OFFSET(vmap_area, va_start); 1435 VMCOREINFO_OFFSET(vmap_area, list); 1436 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); 1437 log_buf_kexec_setup(); 1438 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 1439 VMCOREINFO_NUMBER(NR_FREE_PAGES); 1440 VMCOREINFO_NUMBER(PG_lru); 1441 VMCOREINFO_NUMBER(PG_private); 1442 VMCOREINFO_NUMBER(PG_swapcache); 1443 VMCOREINFO_NUMBER(PG_slab); 1444 #ifdef CONFIG_MEMORY_FAILURE 1445 VMCOREINFO_NUMBER(PG_hwpoison); 1446 #endif 1447 VMCOREINFO_NUMBER(PG_head_mask); 1448 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 1449 #ifdef CONFIG_X86 1450 VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); 1451 #endif 1452 #ifdef CONFIG_HUGETLB_PAGE 1453 VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR); 1454 #endif 1455 1456 arch_crash_save_vmcoreinfo(); 1457 update_vmcoreinfo_note(); 1458 1459 return 0; 1460 } 1461 1462 subsys_initcall(crash_save_vmcoreinfo_init); 1463 1464 /* 1465 * Move into place and start executing a preloaded standalone 1466 * executable. If nothing was preloaded return an error. 1467 */ 1468 int kernel_kexec(void) 1469 { 1470 int error = 0; 1471 1472 if (!mutex_trylock(&kexec_mutex)) 1473 return -EBUSY; 1474 if (!kexec_image) { 1475 error = -EINVAL; 1476 goto Unlock; 1477 } 1478 1479 #ifdef CONFIG_KEXEC_JUMP 1480 if (kexec_image->preserve_context) { 1481 lock_system_sleep(); 1482 pm_prepare_console(); 1483 error = freeze_processes(); 1484 if (error) { 1485 error = -EBUSY; 1486 goto Restore_console; 1487 } 1488 suspend_console(); 1489 error = dpm_suspend_start(PMSG_FREEZE); 1490 if (error) 1491 goto Resume_console; 1492 /* At this point, dpm_suspend_start() has been called, 1493 * but *not* dpm_suspend_end(). We *must* call 1494 * dpm_suspend_end() now. Otherwise, drivers for 1495 * some devices (e.g. interrupt controllers) become 1496 * desynchronized with the actual state of the 1497 * hardware at resume time, and evil weirdness ensues. 1498 */ 1499 error = dpm_suspend_end(PMSG_FREEZE); 1500 if (error) 1501 goto Resume_devices; 1502 error = disable_nonboot_cpus(); 1503 if (error) 1504 goto Enable_cpus; 1505 local_irq_disable(); 1506 error = syscore_suspend(); 1507 if (error) 1508 goto Enable_irqs; 1509 } else 1510 #endif 1511 { 1512 kexec_in_progress = true; 1513 kernel_restart_prepare(NULL); 1514 migrate_to_reboot_cpu(); 1515 1516 /* 1517 * migrate_to_reboot_cpu() disables CPU hotplug assuming that 1518 * no further code needs to use CPU hotplug (which is true in 1519 * the reboot case). However, the kexec path depends on using 1520 * CPU hotplug again; so re-enable it here. 1521 */ 1522 cpu_hotplug_enable(); 1523 pr_emerg("Starting new kernel\n"); 1524 machine_shutdown(); 1525 } 1526 1527 machine_kexec(kexec_image); 1528 1529 #ifdef CONFIG_KEXEC_JUMP 1530 if (kexec_image->preserve_context) { 1531 syscore_resume(); 1532 Enable_irqs: 1533 local_irq_enable(); 1534 Enable_cpus: 1535 enable_nonboot_cpus(); 1536 dpm_resume_start(PMSG_RESTORE); 1537 Resume_devices: 1538 dpm_resume_end(PMSG_RESTORE); 1539 Resume_console: 1540 resume_console(); 1541 thaw_processes(); 1542 Restore_console: 1543 pm_restore_console(); 1544 unlock_system_sleep(); 1545 } 1546 #endif 1547 1548 Unlock: 1549 mutex_unlock(&kexec_mutex); 1550 return error; 1551 } 1552 1553 /* 1554 * Protection mechanism for crashkernel reserved memory after 1555 * the kdump kernel is loaded. 1556 * 1557 * Provide an empty default implementation here -- architecture 1558 * code may override this 1559 */ 1560 void __weak arch_kexec_protect_crashkres(void) 1561 {} 1562 1563 void __weak arch_kexec_unprotect_crashkres(void) 1564 {} 1565