1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (c) 2012, Microsoft Corporation. 4 * 5 * Author: 6 * K. Y. Srinivasan <kys@microsoft.com> 7 */ 8 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/kernel.h> 12 #include <linux/jiffies.h> 13 #include <linux/mman.h> 14 #include <linux/debugfs.h> 15 #include <linux/delay.h> 16 #include <linux/init.h> 17 #include <linux/module.h> 18 #include <linux/slab.h> 19 #include <linux/kthread.h> 20 #include <linux/completion.h> 21 #include <linux/count_zeros.h> 22 #include <linux/memory_hotplug.h> 23 #include <linux/memory.h> 24 #include <linux/notifier.h> 25 #include <linux/percpu_counter.h> 26 #include <linux/page_reporting.h> 27 28 #include <linux/hyperv.h> 29 #include <asm/hyperv-tlfs.h> 30 31 #include <asm/mshyperv.h> 32 33 #define CREATE_TRACE_POINTS 34 #include "hv_trace_balloon.h" 35 36 /* 37 * We begin with definitions supporting the Dynamic Memory protocol 38 * with the host. 39 * 40 * Begin protocol definitions. 41 */ 42 43 44 45 /* 46 * Protocol versions. The low word is the minor version, the high word the major 47 * version. 48 * 49 * History: 50 * Initial version 1.0 51 * Changed to 0.1 on 2009/03/25 52 * Changes to 0.2 on 2009/05/14 53 * Changes to 0.3 on 2009/12/03 54 * Changed to 1.0 on 2011/04/05 55 */ 56 57 #define DYNMEM_MAKE_VERSION(Major, Minor) ((__u32)(((Major) << 16) | (Minor))) 58 #define DYNMEM_MAJOR_VERSION(Version) ((__u32)(Version) >> 16) 59 #define DYNMEM_MINOR_VERSION(Version) ((__u32)(Version) & 0xff) 60 61 enum { 62 DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3), 63 DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0), 64 DYNMEM_PROTOCOL_VERSION_3 = DYNMEM_MAKE_VERSION(2, 0), 65 66 DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1, 67 DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2, 68 DYNMEM_PROTOCOL_VERSION_WIN10 = DYNMEM_PROTOCOL_VERSION_3, 69 70 DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10 71 }; 72 73 74 75 /* 76 * Message Types 77 */ 78 79 enum dm_message_type { 80 /* 81 * Version 0.3 82 */ 83 DM_ERROR = 0, 84 DM_VERSION_REQUEST = 1, 85 DM_VERSION_RESPONSE = 2, 86 DM_CAPABILITIES_REPORT = 3, 87 DM_CAPABILITIES_RESPONSE = 4, 88 DM_STATUS_REPORT = 5, 89 DM_BALLOON_REQUEST = 6, 90 DM_BALLOON_RESPONSE = 7, 91 DM_UNBALLOON_REQUEST = 8, 92 DM_UNBALLOON_RESPONSE = 9, 93 DM_MEM_HOT_ADD_REQUEST = 10, 94 DM_MEM_HOT_ADD_RESPONSE = 11, 95 DM_VERSION_03_MAX = 11, 96 /* 97 * Version 1.0. 98 */ 99 DM_INFO_MESSAGE = 12, 100 DM_VERSION_1_MAX = 12 101 }; 102 103 104 /* 105 * Structures defining the dynamic memory management 106 * protocol. 107 */ 108 109 union dm_version { 110 struct { 111 __u16 minor_version; 112 __u16 major_version; 113 }; 114 __u32 version; 115 } __packed; 116 117 118 union dm_caps { 119 struct { 120 __u64 balloon:1; 121 __u64 hot_add:1; 122 /* 123 * To support guests that may have alignment 124 * limitations on hot-add, the guest can specify 125 * its alignment requirements; a value of n 126 * represents an alignment of 2^n in mega bytes. 127 */ 128 __u64 hot_add_alignment:4; 129 __u64 reservedz:58; 130 } cap_bits; 131 __u64 caps; 132 } __packed; 133 134 union dm_mem_page_range { 135 struct { 136 /* 137 * The PFN number of the first page in the range. 138 * 40 bits is the architectural limit of a PFN 139 * number for AMD64. 140 */ 141 __u64 start_page:40; 142 /* 143 * The number of pages in the range. 144 */ 145 __u64 page_cnt:24; 146 } finfo; 147 __u64 page_range; 148 } __packed; 149 150 151 152 /* 153 * The header for all dynamic memory messages: 154 * 155 * type: Type of the message. 156 * size: Size of the message in bytes; including the header. 157 * trans_id: The guest is responsible for manufacturing this ID. 158 */ 159 160 struct dm_header { 161 __u16 type; 162 __u16 size; 163 __u32 trans_id; 164 } __packed; 165 166 /* 167 * A generic message format for dynamic memory. 168 * Specific message formats are defined later in the file. 169 */ 170 171 struct dm_message { 172 struct dm_header hdr; 173 __u8 data[]; /* enclosed message */ 174 } __packed; 175 176 177 /* 178 * Specific message types supporting the dynamic memory protocol. 179 */ 180 181 /* 182 * Version negotiation message. Sent from the guest to the host. 183 * The guest is free to try different versions until the host 184 * accepts the version. 185 * 186 * dm_version: The protocol version requested. 187 * is_last_attempt: If TRUE, this is the last version guest will request. 188 * reservedz: Reserved field, set to zero. 189 */ 190 191 struct dm_version_request { 192 struct dm_header hdr; 193 union dm_version version; 194 __u32 is_last_attempt:1; 195 __u32 reservedz:31; 196 } __packed; 197 198 /* 199 * Version response message; Host to Guest and indicates 200 * if the host has accepted the version sent by the guest. 201 * 202 * is_accepted: If TRUE, host has accepted the version and the guest 203 * should proceed to the next stage of the protocol. FALSE indicates that 204 * guest should re-try with a different version. 205 * 206 * reservedz: Reserved field, set to zero. 207 */ 208 209 struct dm_version_response { 210 struct dm_header hdr; 211 __u64 is_accepted:1; 212 __u64 reservedz:63; 213 } __packed; 214 215 /* 216 * Message reporting capabilities. This is sent from the guest to the 217 * host. 218 */ 219 220 struct dm_capabilities { 221 struct dm_header hdr; 222 union dm_caps caps; 223 __u64 min_page_cnt; 224 __u64 max_page_number; 225 } __packed; 226 227 /* 228 * Response to the capabilities message. This is sent from the host to the 229 * guest. This message notifies if the host has accepted the guest's 230 * capabilities. If the host has not accepted, the guest must shutdown 231 * the service. 232 * 233 * is_accepted: Indicates if the host has accepted guest's capabilities. 234 * reservedz: Must be 0. 235 */ 236 237 struct dm_capabilities_resp_msg { 238 struct dm_header hdr; 239 __u64 is_accepted:1; 240 __u64 reservedz:63; 241 } __packed; 242 243 /* 244 * This message is used to report memory pressure from the guest. 245 * This message is not part of any transaction and there is no 246 * response to this message. 247 * 248 * num_avail: Available memory in pages. 249 * num_committed: Committed memory in pages. 250 * page_file_size: The accumulated size of all page files 251 * in the system in pages. 252 * zero_free: The number of zero and free pages. 253 * page_file_writes: The writes to the page file in pages. 254 * io_diff: An indicator of file cache efficiency or page file activity, 255 * calculated as File Cache Page Fault Count - Page Read Count. 256 * This value is in pages. 257 * 258 * Some of these metrics are Windows specific and fortunately 259 * the algorithm on the host side that computes the guest memory 260 * pressure only uses num_committed value. 261 */ 262 263 struct dm_status { 264 struct dm_header hdr; 265 __u64 num_avail; 266 __u64 num_committed; 267 __u64 page_file_size; 268 __u64 zero_free; 269 __u32 page_file_writes; 270 __u32 io_diff; 271 } __packed; 272 273 274 /* 275 * Message to ask the guest to allocate memory - balloon up message. 276 * This message is sent from the host to the guest. The guest may not be 277 * able to allocate as much memory as requested. 278 * 279 * num_pages: number of pages to allocate. 280 */ 281 282 struct dm_balloon { 283 struct dm_header hdr; 284 __u32 num_pages; 285 __u32 reservedz; 286 } __packed; 287 288 289 /* 290 * Balloon response message; this message is sent from the guest 291 * to the host in response to the balloon message. 292 * 293 * reservedz: Reserved; must be set to zero. 294 * more_pages: If FALSE, this is the last message of the transaction. 295 * if TRUE there will atleast one more message from the guest. 296 * 297 * range_count: The number of ranges in the range array. 298 * 299 * range_array: An array of page ranges returned to the host. 300 * 301 */ 302 303 struct dm_balloon_response { 304 struct dm_header hdr; 305 __u32 reservedz; 306 __u32 more_pages:1; 307 __u32 range_count:31; 308 union dm_mem_page_range range_array[]; 309 } __packed; 310 311 /* 312 * Un-balloon message; this message is sent from the host 313 * to the guest to give guest more memory. 314 * 315 * more_pages: If FALSE, this is the last message of the transaction. 316 * if TRUE there will atleast one more message from the guest. 317 * 318 * reservedz: Reserved; must be set to zero. 319 * 320 * range_count: The number of ranges in the range array. 321 * 322 * range_array: An array of page ranges returned to the host. 323 * 324 */ 325 326 struct dm_unballoon_request { 327 struct dm_header hdr; 328 __u32 more_pages:1; 329 __u32 reservedz:31; 330 __u32 range_count; 331 union dm_mem_page_range range_array[]; 332 } __packed; 333 334 /* 335 * Un-balloon response message; this message is sent from the guest 336 * to the host in response to an unballoon request. 337 * 338 */ 339 340 struct dm_unballoon_response { 341 struct dm_header hdr; 342 } __packed; 343 344 345 /* 346 * Hot add request message. Message sent from the host to the guest. 347 * 348 * mem_range: Memory range to hot add. 349 * 350 */ 351 352 struct dm_hot_add { 353 struct dm_header hdr; 354 union dm_mem_page_range range; 355 } __packed; 356 357 /* 358 * Hot add response message. 359 * This message is sent by the guest to report the status of a hot add request. 360 * If page_count is less than the requested page count, then the host should 361 * assume all further hot add requests will fail, since this indicates that 362 * the guest has hit an upper physical memory barrier. 363 * 364 * Hot adds may also fail due to low resources; in this case, the guest must 365 * not complete this message until the hot add can succeed, and the host must 366 * not send a new hot add request until the response is sent. 367 * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS 368 * times it fails the request. 369 * 370 * 371 * page_count: number of pages that were successfully hot added. 372 * 373 * result: result of the operation 1: success, 0: failure. 374 * 375 */ 376 377 struct dm_hot_add_response { 378 struct dm_header hdr; 379 __u32 page_count; 380 __u32 result; 381 } __packed; 382 383 /* 384 * Types of information sent from host to the guest. 385 */ 386 387 enum dm_info_type { 388 INFO_TYPE_MAX_PAGE_CNT = 0, 389 MAX_INFO_TYPE 390 }; 391 392 393 /* 394 * Header for the information message. 395 */ 396 397 struct dm_info_header { 398 enum dm_info_type type; 399 __u32 data_size; 400 } __packed; 401 402 /* 403 * This message is sent from the host to the guest to pass 404 * some relevant information (win8 addition). 405 * 406 * reserved: no used. 407 * info_size: size of the information blob. 408 * info: information blob. 409 */ 410 411 struct dm_info_msg { 412 struct dm_header hdr; 413 __u32 reserved; 414 __u32 info_size; 415 __u8 info[]; 416 }; 417 418 /* 419 * End protocol definitions. 420 */ 421 422 /* 423 * State to manage hot adding memory into the guest. 424 * The range start_pfn : end_pfn specifies the range 425 * that the host has asked us to hot add. The range 426 * start_pfn : ha_end_pfn specifies the range that we have 427 * currently hot added. We hot add in multiples of 128M 428 * chunks; it is possible that we may not be able to bring 429 * online all the pages in the region. The range 430 * covered_start_pfn:covered_end_pfn defines the pages that can 431 * be brough online. 432 */ 433 434 struct hv_hotadd_state { 435 struct list_head list; 436 unsigned long start_pfn; 437 unsigned long covered_start_pfn; 438 unsigned long covered_end_pfn; 439 unsigned long ha_end_pfn; 440 unsigned long end_pfn; 441 /* 442 * A list of gaps. 443 */ 444 struct list_head gap_list; 445 }; 446 447 struct hv_hotadd_gap { 448 struct list_head list; 449 unsigned long start_pfn; 450 unsigned long end_pfn; 451 }; 452 453 struct balloon_state { 454 __u32 num_pages; 455 struct work_struct wrk; 456 }; 457 458 struct hot_add_wrk { 459 union dm_mem_page_range ha_page_range; 460 union dm_mem_page_range ha_region_range; 461 struct work_struct wrk; 462 }; 463 464 static bool allow_hibernation; 465 static bool hot_add = true; 466 static bool do_hot_add; 467 /* 468 * Delay reporting memory pressure by 469 * the specified number of seconds. 470 */ 471 static uint pressure_report_delay = 45; 472 extern unsigned int page_reporting_order; 473 #define HV_MAX_FAILURES 2 474 475 /* 476 * The last time we posted a pressure report to host. 477 */ 478 static unsigned long last_post_time; 479 480 static int hv_hypercall_multi_failure; 481 482 module_param(hot_add, bool, (S_IRUGO | S_IWUSR)); 483 MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add"); 484 485 module_param(pressure_report_delay, uint, (S_IRUGO | S_IWUSR)); 486 MODULE_PARM_DESC(pressure_report_delay, "Delay in secs in reporting pressure"); 487 static atomic_t trans_id = ATOMIC_INIT(0); 488 489 static int dm_ring_size = VMBUS_RING_SIZE(16 * 1024); 490 491 /* 492 * Driver specific state. 493 */ 494 495 enum hv_dm_state { 496 DM_INITIALIZING = 0, 497 DM_INITIALIZED, 498 DM_BALLOON_UP, 499 DM_BALLOON_DOWN, 500 DM_HOT_ADD, 501 DM_INIT_ERROR 502 }; 503 504 505 static __u8 recv_buffer[HV_HYP_PAGE_SIZE]; 506 static __u8 balloon_up_send_buffer[HV_HYP_PAGE_SIZE]; 507 #define PAGES_IN_2M (2 * 1024 * 1024 / PAGE_SIZE) 508 #define HA_CHUNK (128 * 1024 * 1024 / PAGE_SIZE) 509 510 struct hv_dynmem_device { 511 struct hv_device *dev; 512 enum hv_dm_state state; 513 struct completion host_event; 514 struct completion config_event; 515 516 /* 517 * Number of pages we have currently ballooned out. 518 */ 519 unsigned int num_pages_ballooned; 520 unsigned int num_pages_onlined; 521 unsigned int num_pages_added; 522 523 /* 524 * State to manage the ballooning (up) operation. 525 */ 526 struct balloon_state balloon_wrk; 527 528 /* 529 * State to execute the "hot-add" operation. 530 */ 531 struct hot_add_wrk ha_wrk; 532 533 /* 534 * This state tracks if the host has specified a hot-add 535 * region. 536 */ 537 bool host_specified_ha_region; 538 539 /* 540 * State to synchronize hot-add. 541 */ 542 struct completion ol_waitevent; 543 /* 544 * This thread handles hot-add 545 * requests from the host as well as notifying 546 * the host with regards to memory pressure in 547 * the guest. 548 */ 549 struct task_struct *thread; 550 551 /* 552 * Protects ha_region_list, num_pages_onlined counter and individual 553 * regions from ha_region_list. 554 */ 555 spinlock_t ha_lock; 556 557 /* 558 * A list of hot-add regions. 559 */ 560 struct list_head ha_region_list; 561 562 /* 563 * We start with the highest version we can support 564 * and downgrade based on the host; we save here the 565 * next version to try. 566 */ 567 __u32 next_version; 568 569 /* 570 * The negotiated version agreed by host. 571 */ 572 __u32 version; 573 574 struct page_reporting_dev_info pr_dev_info; 575 576 /* 577 * Maximum number of pages that can be hot_add-ed 578 */ 579 __u64 max_dynamic_page_count; 580 }; 581 582 static struct hv_dynmem_device dm_device; 583 584 static void post_status(struct hv_dynmem_device *dm); 585 586 static void enable_page_reporting(void); 587 588 static void disable_page_reporting(void); 589 590 #ifdef CONFIG_MEMORY_HOTPLUG 591 static inline bool has_pfn_is_backed(struct hv_hotadd_state *has, 592 unsigned long pfn) 593 { 594 struct hv_hotadd_gap *gap; 595 596 /* The page is not backed. */ 597 if ((pfn < has->covered_start_pfn) || (pfn >= has->covered_end_pfn)) 598 return false; 599 600 /* Check for gaps. */ 601 list_for_each_entry(gap, &has->gap_list, list) { 602 if ((pfn >= gap->start_pfn) && (pfn < gap->end_pfn)) 603 return false; 604 } 605 606 return true; 607 } 608 609 static unsigned long hv_page_offline_check(unsigned long start_pfn, 610 unsigned long nr_pages) 611 { 612 unsigned long pfn = start_pfn, count = 0; 613 struct hv_hotadd_state *has; 614 bool found; 615 616 while (pfn < start_pfn + nr_pages) { 617 /* 618 * Search for HAS which covers the pfn and when we find one 619 * count how many consequitive PFNs are covered. 620 */ 621 found = false; 622 list_for_each_entry(has, &dm_device.ha_region_list, list) { 623 while ((pfn >= has->start_pfn) && 624 (pfn < has->end_pfn) && 625 (pfn < start_pfn + nr_pages)) { 626 found = true; 627 if (has_pfn_is_backed(has, pfn)) 628 count++; 629 pfn++; 630 } 631 } 632 633 /* 634 * This PFN is not in any HAS (e.g. we're offlining a region 635 * which was present at boot), no need to account for it. Go 636 * to the next one. 637 */ 638 if (!found) 639 pfn++; 640 } 641 642 return count; 643 } 644 645 static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, 646 void *v) 647 { 648 struct memory_notify *mem = (struct memory_notify *)v; 649 unsigned long flags, pfn_count; 650 651 switch (val) { 652 case MEM_ONLINE: 653 case MEM_CANCEL_ONLINE: 654 complete(&dm_device.ol_waitevent); 655 break; 656 657 case MEM_OFFLINE: 658 spin_lock_irqsave(&dm_device.ha_lock, flags); 659 pfn_count = hv_page_offline_check(mem->start_pfn, 660 mem->nr_pages); 661 if (pfn_count <= dm_device.num_pages_onlined) { 662 dm_device.num_pages_onlined -= pfn_count; 663 } else { 664 /* 665 * We're offlining more pages than we managed to online. 666 * This is unexpected. In any case don't let 667 * num_pages_onlined wrap around zero. 668 */ 669 WARN_ON_ONCE(1); 670 dm_device.num_pages_onlined = 0; 671 } 672 spin_unlock_irqrestore(&dm_device.ha_lock, flags); 673 break; 674 case MEM_GOING_ONLINE: 675 case MEM_GOING_OFFLINE: 676 case MEM_CANCEL_OFFLINE: 677 break; 678 } 679 return NOTIFY_OK; 680 } 681 682 static struct notifier_block hv_memory_nb = { 683 .notifier_call = hv_memory_notifier, 684 .priority = 0 685 }; 686 687 /* Check if the particular page is backed and can be onlined and online it. */ 688 static void hv_page_online_one(struct hv_hotadd_state *has, struct page *pg) 689 { 690 if (!has_pfn_is_backed(has, page_to_pfn(pg))) { 691 if (!PageOffline(pg)) 692 __SetPageOffline(pg); 693 return; 694 } 695 if (PageOffline(pg)) 696 __ClearPageOffline(pg); 697 698 /* This frame is currently backed; online the page. */ 699 generic_online_page(pg, 0); 700 701 lockdep_assert_held(&dm_device.ha_lock); 702 dm_device.num_pages_onlined++; 703 } 704 705 static void hv_bring_pgs_online(struct hv_hotadd_state *has, 706 unsigned long start_pfn, unsigned long size) 707 { 708 int i; 709 710 pr_debug("Online %lu pages starting at pfn 0x%lx\n", size, start_pfn); 711 for (i = 0; i < size; i++) 712 hv_page_online_one(has, pfn_to_page(start_pfn + i)); 713 } 714 715 static void hv_mem_hot_add(unsigned long start, unsigned long size, 716 unsigned long pfn_count, 717 struct hv_hotadd_state *has) 718 { 719 int ret = 0; 720 int i, nid; 721 unsigned long start_pfn; 722 unsigned long processed_pfn; 723 unsigned long total_pfn = pfn_count; 724 unsigned long flags; 725 726 for (i = 0; i < (size/HA_CHUNK); i++) { 727 start_pfn = start + (i * HA_CHUNK); 728 729 spin_lock_irqsave(&dm_device.ha_lock, flags); 730 has->ha_end_pfn += HA_CHUNK; 731 732 if (total_pfn > HA_CHUNK) { 733 processed_pfn = HA_CHUNK; 734 total_pfn -= HA_CHUNK; 735 } else { 736 processed_pfn = total_pfn; 737 total_pfn = 0; 738 } 739 740 has->covered_end_pfn += processed_pfn; 741 spin_unlock_irqrestore(&dm_device.ha_lock, flags); 742 743 reinit_completion(&dm_device.ol_waitevent); 744 745 nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); 746 ret = add_memory(nid, PFN_PHYS((start_pfn)), 747 (HA_CHUNK << PAGE_SHIFT), MHP_MERGE_RESOURCE); 748 749 if (ret) { 750 pr_err("hot_add memory failed error is %d\n", ret); 751 if (ret == -EEXIST) { 752 /* 753 * This error indicates that the error 754 * is not a transient failure. This is the 755 * case where the guest's physical address map 756 * precludes hot adding memory. Stop all further 757 * memory hot-add. 758 */ 759 do_hot_add = false; 760 } 761 spin_lock_irqsave(&dm_device.ha_lock, flags); 762 has->ha_end_pfn -= HA_CHUNK; 763 has->covered_end_pfn -= processed_pfn; 764 spin_unlock_irqrestore(&dm_device.ha_lock, flags); 765 break; 766 } 767 768 /* 769 * Wait for memory to get onlined. If the kernel onlined the 770 * memory when adding it, this will return directly. Otherwise, 771 * it will wait for user space to online the memory. This helps 772 * to avoid adding memory faster than it is getting onlined. As 773 * adding succeeded, it is ok to proceed even if the memory was 774 * not onlined in time. 775 */ 776 wait_for_completion_timeout(&dm_device.ol_waitevent, 5 * HZ); 777 post_status(&dm_device); 778 } 779 } 780 781 static void hv_online_page(struct page *pg, unsigned int order) 782 { 783 struct hv_hotadd_state *has; 784 unsigned long flags; 785 unsigned long pfn = page_to_pfn(pg); 786 787 spin_lock_irqsave(&dm_device.ha_lock, flags); 788 list_for_each_entry(has, &dm_device.ha_region_list, list) { 789 /* The page belongs to a different HAS. */ 790 if ((pfn < has->start_pfn) || 791 (pfn + (1UL << order) > has->end_pfn)) 792 continue; 793 794 hv_bring_pgs_online(has, pfn, 1UL << order); 795 break; 796 } 797 spin_unlock_irqrestore(&dm_device.ha_lock, flags); 798 } 799 800 static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) 801 { 802 struct hv_hotadd_state *has; 803 struct hv_hotadd_gap *gap; 804 unsigned long residual, new_inc; 805 int ret = 0; 806 unsigned long flags; 807 808 spin_lock_irqsave(&dm_device.ha_lock, flags); 809 list_for_each_entry(has, &dm_device.ha_region_list, list) { 810 /* 811 * If the pfn range we are dealing with is not in the current 812 * "hot add block", move on. 813 */ 814 if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn) 815 continue; 816 817 /* 818 * If the current start pfn is not where the covered_end 819 * is, create a gap and update covered_end_pfn. 820 */ 821 if (has->covered_end_pfn != start_pfn) { 822 gap = kzalloc(sizeof(struct hv_hotadd_gap), GFP_ATOMIC); 823 if (!gap) { 824 ret = -ENOMEM; 825 break; 826 } 827 828 INIT_LIST_HEAD(&gap->list); 829 gap->start_pfn = has->covered_end_pfn; 830 gap->end_pfn = start_pfn; 831 list_add_tail(&gap->list, &has->gap_list); 832 833 has->covered_end_pfn = start_pfn; 834 } 835 836 /* 837 * If the current hot add-request extends beyond 838 * our current limit; extend it. 839 */ 840 if ((start_pfn + pfn_cnt) > has->end_pfn) { 841 residual = (start_pfn + pfn_cnt - has->end_pfn); 842 /* 843 * Extend the region by multiples of HA_CHUNK. 844 */ 845 new_inc = (residual / HA_CHUNK) * HA_CHUNK; 846 if (residual % HA_CHUNK) 847 new_inc += HA_CHUNK; 848 849 has->end_pfn += new_inc; 850 } 851 852 ret = 1; 853 break; 854 } 855 spin_unlock_irqrestore(&dm_device.ha_lock, flags); 856 857 return ret; 858 } 859 860 static unsigned long handle_pg_range(unsigned long pg_start, 861 unsigned long pg_count) 862 { 863 unsigned long start_pfn = pg_start; 864 unsigned long pfn_cnt = pg_count; 865 unsigned long size; 866 struct hv_hotadd_state *has; 867 unsigned long pgs_ol = 0; 868 unsigned long old_covered_state; 869 unsigned long res = 0, flags; 870 871 pr_debug("Hot adding %lu pages starting at pfn 0x%lx.\n", pg_count, 872 pg_start); 873 874 spin_lock_irqsave(&dm_device.ha_lock, flags); 875 list_for_each_entry(has, &dm_device.ha_region_list, list) { 876 /* 877 * If the pfn range we are dealing with is not in the current 878 * "hot add block", move on. 879 */ 880 if (start_pfn < has->start_pfn || start_pfn >= has->end_pfn) 881 continue; 882 883 old_covered_state = has->covered_end_pfn; 884 885 if (start_pfn < has->ha_end_pfn) { 886 /* 887 * This is the case where we are backing pages 888 * in an already hot added region. Bring 889 * these pages online first. 890 */ 891 pgs_ol = has->ha_end_pfn - start_pfn; 892 if (pgs_ol > pfn_cnt) 893 pgs_ol = pfn_cnt; 894 895 has->covered_end_pfn += pgs_ol; 896 pfn_cnt -= pgs_ol; 897 /* 898 * Check if the corresponding memory block is already 899 * online. It is possible to observe struct pages still 900 * being uninitialized here so check section instead. 901 * In case the section is online we need to bring the 902 * rest of pfns (which were not backed previously) 903 * online too. 904 */ 905 if (start_pfn > has->start_pfn && 906 online_section_nr(pfn_to_section_nr(start_pfn))) 907 hv_bring_pgs_online(has, start_pfn, pgs_ol); 908 909 } 910 911 if ((has->ha_end_pfn < has->end_pfn) && (pfn_cnt > 0)) { 912 /* 913 * We have some residual hot add range 914 * that needs to be hot added; hot add 915 * it now. Hot add a multiple of 916 * HA_CHUNK that fully covers the pages 917 * we have. 918 */ 919 size = (has->end_pfn - has->ha_end_pfn); 920 if (pfn_cnt <= size) { 921 size = ((pfn_cnt / HA_CHUNK) * HA_CHUNK); 922 if (pfn_cnt % HA_CHUNK) 923 size += HA_CHUNK; 924 } else { 925 pfn_cnt = size; 926 } 927 spin_unlock_irqrestore(&dm_device.ha_lock, flags); 928 hv_mem_hot_add(has->ha_end_pfn, size, pfn_cnt, has); 929 spin_lock_irqsave(&dm_device.ha_lock, flags); 930 } 931 /* 932 * If we managed to online any pages that were given to us, 933 * we declare success. 934 */ 935 res = has->covered_end_pfn - old_covered_state; 936 break; 937 } 938 spin_unlock_irqrestore(&dm_device.ha_lock, flags); 939 940 return res; 941 } 942 943 static unsigned long process_hot_add(unsigned long pg_start, 944 unsigned long pfn_cnt, 945 unsigned long rg_start, 946 unsigned long rg_size) 947 { 948 struct hv_hotadd_state *ha_region = NULL; 949 int covered; 950 unsigned long flags; 951 952 if (pfn_cnt == 0) 953 return 0; 954 955 if (!dm_device.host_specified_ha_region) { 956 covered = pfn_covered(pg_start, pfn_cnt); 957 if (covered < 0) 958 return 0; 959 960 if (covered) 961 goto do_pg_range; 962 } 963 964 /* 965 * If the host has specified a hot-add range; deal with it first. 966 */ 967 968 if (rg_size != 0) { 969 ha_region = kzalloc(sizeof(struct hv_hotadd_state), GFP_KERNEL); 970 if (!ha_region) 971 return 0; 972 973 INIT_LIST_HEAD(&ha_region->list); 974 INIT_LIST_HEAD(&ha_region->gap_list); 975 976 ha_region->start_pfn = rg_start; 977 ha_region->ha_end_pfn = rg_start; 978 ha_region->covered_start_pfn = pg_start; 979 ha_region->covered_end_pfn = pg_start; 980 ha_region->end_pfn = rg_start + rg_size; 981 982 spin_lock_irqsave(&dm_device.ha_lock, flags); 983 list_add_tail(&ha_region->list, &dm_device.ha_region_list); 984 spin_unlock_irqrestore(&dm_device.ha_lock, flags); 985 } 986 987 do_pg_range: 988 /* 989 * Process the page range specified; bringing them 990 * online if possible. 991 */ 992 return handle_pg_range(pg_start, pfn_cnt); 993 } 994 995 #endif 996 997 static void hot_add_req(struct work_struct *dummy) 998 { 999 struct dm_hot_add_response resp; 1000 #ifdef CONFIG_MEMORY_HOTPLUG 1001 unsigned long pg_start, pfn_cnt; 1002 unsigned long rg_start, rg_sz; 1003 #endif 1004 struct hv_dynmem_device *dm = &dm_device; 1005 1006 memset(&resp, 0, sizeof(struct dm_hot_add_response)); 1007 resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE; 1008 resp.hdr.size = sizeof(struct dm_hot_add_response); 1009 1010 #ifdef CONFIG_MEMORY_HOTPLUG 1011 pg_start = dm->ha_wrk.ha_page_range.finfo.start_page; 1012 pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt; 1013 1014 rg_start = dm->ha_wrk.ha_region_range.finfo.start_page; 1015 rg_sz = dm->ha_wrk.ha_region_range.finfo.page_cnt; 1016 1017 if ((rg_start == 0) && (!dm->host_specified_ha_region)) { 1018 unsigned long region_size; 1019 unsigned long region_start; 1020 1021 /* 1022 * The host has not specified the hot-add region. 1023 * Based on the hot-add page range being specified, 1024 * compute a hot-add region that can cover the pages 1025 * that need to be hot-added while ensuring the alignment 1026 * and size requirements of Linux as it relates to hot-add. 1027 */ 1028 region_size = (pfn_cnt / HA_CHUNK) * HA_CHUNK; 1029 if (pfn_cnt % HA_CHUNK) 1030 region_size += HA_CHUNK; 1031 1032 region_start = (pg_start / HA_CHUNK) * HA_CHUNK; 1033 1034 rg_start = region_start; 1035 rg_sz = region_size; 1036 } 1037 1038 if (do_hot_add) 1039 resp.page_count = process_hot_add(pg_start, pfn_cnt, 1040 rg_start, rg_sz); 1041 1042 dm->num_pages_added += resp.page_count; 1043 #endif 1044 /* 1045 * The result field of the response structure has the 1046 * following semantics: 1047 * 1048 * 1. If all or some pages hot-added: Guest should return success. 1049 * 1050 * 2. If no pages could be hot-added: 1051 * 1052 * If the guest returns success, then the host 1053 * will not attempt any further hot-add operations. This 1054 * signifies a permanent failure. 1055 * 1056 * If the guest returns failure, then this failure will be 1057 * treated as a transient failure and the host may retry the 1058 * hot-add operation after some delay. 1059 */ 1060 if (resp.page_count > 0) 1061 resp.result = 1; 1062 else if (!do_hot_add) 1063 resp.result = 1; 1064 else 1065 resp.result = 0; 1066 1067 if (!do_hot_add || resp.page_count == 0) { 1068 if (!allow_hibernation) 1069 pr_err("Memory hot add failed\n"); 1070 else 1071 pr_info("Ignore hot-add request!\n"); 1072 } 1073 1074 dm->state = DM_INITIALIZED; 1075 resp.hdr.trans_id = atomic_inc_return(&trans_id); 1076 vmbus_sendpacket(dm->dev->channel, &resp, 1077 sizeof(struct dm_hot_add_response), 1078 (unsigned long)NULL, 1079 VM_PKT_DATA_INBAND, 0); 1080 } 1081 1082 static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg) 1083 { 1084 struct dm_info_header *info_hdr; 1085 1086 info_hdr = (struct dm_info_header *)msg->info; 1087 1088 switch (info_hdr->type) { 1089 case INFO_TYPE_MAX_PAGE_CNT: 1090 if (info_hdr->data_size == sizeof(__u64)) { 1091 __u64 *max_page_count = (__u64 *)&info_hdr[1]; 1092 1093 pr_info("Max. dynamic memory size: %llu MB\n", 1094 (*max_page_count) >> (20 - HV_HYP_PAGE_SHIFT)); 1095 dm->max_dynamic_page_count = *max_page_count; 1096 } 1097 1098 break; 1099 default: 1100 pr_warn("Received Unknown type: %d\n", info_hdr->type); 1101 } 1102 } 1103 1104 static unsigned long compute_balloon_floor(void) 1105 { 1106 unsigned long min_pages; 1107 unsigned long nr_pages = totalram_pages(); 1108 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) 1109 /* Simple continuous piecewiese linear function: 1110 * max MiB -> min MiB gradient 1111 * 0 0 1112 * 16 16 1113 * 32 24 1114 * 128 72 (1/2) 1115 * 512 168 (1/4) 1116 * 2048 360 (1/8) 1117 * 8192 744 (1/16) 1118 * 32768 1512 (1/32) 1119 */ 1120 if (nr_pages < MB2PAGES(128)) 1121 min_pages = MB2PAGES(8) + (nr_pages >> 1); 1122 else if (nr_pages < MB2PAGES(512)) 1123 min_pages = MB2PAGES(40) + (nr_pages >> 2); 1124 else if (nr_pages < MB2PAGES(2048)) 1125 min_pages = MB2PAGES(104) + (nr_pages >> 3); 1126 else if (nr_pages < MB2PAGES(8192)) 1127 min_pages = MB2PAGES(232) + (nr_pages >> 4); 1128 else 1129 min_pages = MB2PAGES(488) + (nr_pages >> 5); 1130 #undef MB2PAGES 1131 return min_pages; 1132 } 1133 1134 /* 1135 * Compute total committed memory pages 1136 */ 1137 1138 static unsigned long get_pages_committed(struct hv_dynmem_device *dm) 1139 { 1140 return vm_memory_committed() + 1141 dm->num_pages_ballooned + 1142 (dm->num_pages_added > dm->num_pages_onlined ? 1143 dm->num_pages_added - dm->num_pages_onlined : 0) + 1144 compute_balloon_floor(); 1145 } 1146 1147 /* 1148 * Post our status as it relates memory pressure to the 1149 * host. Host expects the guests to post this status 1150 * periodically at 1 second intervals. 1151 * 1152 * The metrics specified in this protocol are very Windows 1153 * specific and so we cook up numbers here to convey our memory 1154 * pressure. 1155 */ 1156 1157 static void post_status(struct hv_dynmem_device *dm) 1158 { 1159 struct dm_status status; 1160 unsigned long now = jiffies; 1161 unsigned long last_post = last_post_time; 1162 unsigned long num_pages_avail, num_pages_committed; 1163 1164 if (pressure_report_delay > 0) { 1165 --pressure_report_delay; 1166 return; 1167 } 1168 1169 if (!time_after(now, (last_post_time + HZ))) 1170 return; 1171 1172 memset(&status, 0, sizeof(struct dm_status)); 1173 status.hdr.type = DM_STATUS_REPORT; 1174 status.hdr.size = sizeof(struct dm_status); 1175 status.hdr.trans_id = atomic_inc_return(&trans_id); 1176 1177 /* 1178 * The host expects the guest to report free and committed memory. 1179 * Furthermore, the host expects the pressure information to include 1180 * the ballooned out pages. For a given amount of memory that we are 1181 * managing we need to compute a floor below which we should not 1182 * balloon. Compute this and add it to the pressure report. 1183 * We also need to report all offline pages (num_pages_added - 1184 * num_pages_onlined) as committed to the host, otherwise it can try 1185 * asking us to balloon them out. 1186 */ 1187 num_pages_avail = si_mem_available(); 1188 num_pages_committed = get_pages_committed(dm); 1189 1190 trace_balloon_status(num_pages_avail, num_pages_committed, 1191 vm_memory_committed(), dm->num_pages_ballooned, 1192 dm->num_pages_added, dm->num_pages_onlined); 1193 1194 /* Convert numbers of pages into numbers of HV_HYP_PAGEs. */ 1195 status.num_avail = num_pages_avail * NR_HV_HYP_PAGES_IN_PAGE; 1196 status.num_committed = num_pages_committed * NR_HV_HYP_PAGES_IN_PAGE; 1197 1198 /* 1199 * If our transaction ID is no longer current, just don't 1200 * send the status. This can happen if we were interrupted 1201 * after we picked our transaction ID. 1202 */ 1203 if (status.hdr.trans_id != atomic_read(&trans_id)) 1204 return; 1205 1206 /* 1207 * If the last post time that we sampled has changed, 1208 * we have raced, don't post the status. 1209 */ 1210 if (last_post != last_post_time) 1211 return; 1212 1213 last_post_time = jiffies; 1214 vmbus_sendpacket(dm->dev->channel, &status, 1215 sizeof(struct dm_status), 1216 (unsigned long)NULL, 1217 VM_PKT_DATA_INBAND, 0); 1218 1219 } 1220 1221 static void free_balloon_pages(struct hv_dynmem_device *dm, 1222 union dm_mem_page_range *range_array) 1223 { 1224 int num_pages = range_array->finfo.page_cnt; 1225 __u64 start_frame = range_array->finfo.start_page; 1226 struct page *pg; 1227 int i; 1228 1229 for (i = 0; i < num_pages; i++) { 1230 pg = pfn_to_page(i + start_frame); 1231 __ClearPageOffline(pg); 1232 __free_page(pg); 1233 dm->num_pages_ballooned--; 1234 adjust_managed_page_count(pg, 1); 1235 } 1236 } 1237 1238 1239 1240 static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, 1241 unsigned int num_pages, 1242 struct dm_balloon_response *bl_resp, 1243 int alloc_unit) 1244 { 1245 unsigned int i, j; 1246 struct page *pg; 1247 1248 for (i = 0; i < num_pages / alloc_unit; i++) { 1249 if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) > 1250 HV_HYP_PAGE_SIZE) 1251 return i * alloc_unit; 1252 1253 /* 1254 * We execute this code in a thread context. Furthermore, 1255 * we don't want the kernel to try too hard. 1256 */ 1257 pg = alloc_pages(GFP_HIGHUSER | __GFP_NORETRY | 1258 __GFP_NOMEMALLOC | __GFP_NOWARN, 1259 get_order(alloc_unit << PAGE_SHIFT)); 1260 1261 if (!pg) 1262 return i * alloc_unit; 1263 1264 dm->num_pages_ballooned += alloc_unit; 1265 1266 /* 1267 * If we allocatted 2M pages; split them so we 1268 * can free them in any order we get. 1269 */ 1270 1271 if (alloc_unit != 1) 1272 split_page(pg, get_order(alloc_unit << PAGE_SHIFT)); 1273 1274 /* mark all pages offline */ 1275 for (j = 0; j < alloc_unit; j++) { 1276 __SetPageOffline(pg + j); 1277 adjust_managed_page_count(pg + j, -1); 1278 } 1279 1280 bl_resp->range_count++; 1281 bl_resp->range_array[i].finfo.start_page = 1282 page_to_pfn(pg); 1283 bl_resp->range_array[i].finfo.page_cnt = alloc_unit; 1284 bl_resp->hdr.size += sizeof(union dm_mem_page_range); 1285 1286 } 1287 1288 return i * alloc_unit; 1289 } 1290 1291 static void balloon_up(struct work_struct *dummy) 1292 { 1293 unsigned int num_pages = dm_device.balloon_wrk.num_pages; 1294 unsigned int num_ballooned = 0; 1295 struct dm_balloon_response *bl_resp; 1296 int alloc_unit; 1297 int ret; 1298 bool done = false; 1299 int i; 1300 long avail_pages; 1301 unsigned long floor; 1302 1303 /* 1304 * We will attempt 2M allocations. However, if we fail to 1305 * allocate 2M chunks, we will go back to PAGE_SIZE allocations. 1306 */ 1307 alloc_unit = PAGES_IN_2M; 1308 1309 avail_pages = si_mem_available(); 1310 floor = compute_balloon_floor(); 1311 1312 /* Refuse to balloon below the floor. */ 1313 if (avail_pages < num_pages || avail_pages - num_pages < floor) { 1314 pr_info("Balloon request will be partially fulfilled. %s\n", 1315 avail_pages < num_pages ? "Not enough memory." : 1316 "Balloon floor reached."); 1317 1318 num_pages = avail_pages > floor ? (avail_pages - floor) : 0; 1319 } 1320 1321 while (!done) { 1322 memset(balloon_up_send_buffer, 0, HV_HYP_PAGE_SIZE); 1323 bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer; 1324 bl_resp->hdr.type = DM_BALLOON_RESPONSE; 1325 bl_resp->hdr.size = sizeof(struct dm_balloon_response); 1326 bl_resp->more_pages = 1; 1327 1328 num_pages -= num_ballooned; 1329 num_ballooned = alloc_balloon_pages(&dm_device, num_pages, 1330 bl_resp, alloc_unit); 1331 1332 if (alloc_unit != 1 && num_ballooned == 0) { 1333 alloc_unit = 1; 1334 continue; 1335 } 1336 1337 if (num_ballooned == 0 || num_ballooned == num_pages) { 1338 pr_debug("Ballooned %u out of %u requested pages.\n", 1339 num_pages, dm_device.balloon_wrk.num_pages); 1340 1341 bl_resp->more_pages = 0; 1342 done = true; 1343 dm_device.state = DM_INITIALIZED; 1344 } 1345 1346 /* 1347 * We are pushing a lot of data through the channel; 1348 * deal with transient failures caused because of the 1349 * lack of space in the ring buffer. 1350 */ 1351 1352 do { 1353 bl_resp->hdr.trans_id = atomic_inc_return(&trans_id); 1354 ret = vmbus_sendpacket(dm_device.dev->channel, 1355 bl_resp, 1356 bl_resp->hdr.size, 1357 (unsigned long)NULL, 1358 VM_PKT_DATA_INBAND, 0); 1359 1360 if (ret == -EAGAIN) 1361 msleep(20); 1362 post_status(&dm_device); 1363 } while (ret == -EAGAIN); 1364 1365 if (ret) { 1366 /* 1367 * Free up the memory we allocatted. 1368 */ 1369 pr_err("Balloon response failed\n"); 1370 1371 for (i = 0; i < bl_resp->range_count; i++) 1372 free_balloon_pages(&dm_device, 1373 &bl_resp->range_array[i]); 1374 1375 done = true; 1376 } 1377 } 1378 1379 } 1380 1381 static void balloon_down(struct hv_dynmem_device *dm, 1382 struct dm_unballoon_request *req) 1383 { 1384 union dm_mem_page_range *range_array = req->range_array; 1385 int range_count = req->range_count; 1386 struct dm_unballoon_response resp; 1387 int i; 1388 unsigned int prev_pages_ballooned = dm->num_pages_ballooned; 1389 1390 for (i = 0; i < range_count; i++) { 1391 free_balloon_pages(dm, &range_array[i]); 1392 complete(&dm_device.config_event); 1393 } 1394 1395 pr_debug("Freed %u ballooned pages.\n", 1396 prev_pages_ballooned - dm->num_pages_ballooned); 1397 1398 if (req->more_pages == 1) 1399 return; 1400 1401 memset(&resp, 0, sizeof(struct dm_unballoon_response)); 1402 resp.hdr.type = DM_UNBALLOON_RESPONSE; 1403 resp.hdr.trans_id = atomic_inc_return(&trans_id); 1404 resp.hdr.size = sizeof(struct dm_unballoon_response); 1405 1406 vmbus_sendpacket(dm_device.dev->channel, &resp, 1407 sizeof(struct dm_unballoon_response), 1408 (unsigned long)NULL, 1409 VM_PKT_DATA_INBAND, 0); 1410 1411 dm->state = DM_INITIALIZED; 1412 } 1413 1414 static void balloon_onchannelcallback(void *context); 1415 1416 static int dm_thread_func(void *dm_dev) 1417 { 1418 struct hv_dynmem_device *dm = dm_dev; 1419 1420 while (!kthread_should_stop()) { 1421 wait_for_completion_interruptible_timeout( 1422 &dm_device.config_event, 1*HZ); 1423 /* 1424 * The host expects us to post information on the memory 1425 * pressure every second. 1426 */ 1427 reinit_completion(&dm_device.config_event); 1428 post_status(dm); 1429 /* 1430 * disable free page reporting if multiple hypercall 1431 * failure flag set. It is not done in the page_reporting 1432 * callback context as that causes a deadlock between 1433 * page_reporting_process() and page_reporting_unregister() 1434 */ 1435 if (hv_hypercall_multi_failure >= HV_MAX_FAILURES) { 1436 pr_err("Multiple failures in cold memory discard hypercall, disabling page reporting\n"); 1437 disable_page_reporting(); 1438 /* Reset the flag after disabling reporting */ 1439 hv_hypercall_multi_failure = 0; 1440 } 1441 } 1442 1443 return 0; 1444 } 1445 1446 1447 static void version_resp(struct hv_dynmem_device *dm, 1448 struct dm_version_response *vresp) 1449 { 1450 struct dm_version_request version_req; 1451 int ret; 1452 1453 if (vresp->is_accepted) { 1454 /* 1455 * We are done; wakeup the 1456 * context waiting for version 1457 * negotiation. 1458 */ 1459 complete(&dm->host_event); 1460 return; 1461 } 1462 /* 1463 * If there are more versions to try, continue 1464 * with negotiations; if not 1465 * shutdown the service since we are not able 1466 * to negotiate a suitable version number 1467 * with the host. 1468 */ 1469 if (dm->next_version == 0) 1470 goto version_error; 1471 1472 memset(&version_req, 0, sizeof(struct dm_version_request)); 1473 version_req.hdr.type = DM_VERSION_REQUEST; 1474 version_req.hdr.size = sizeof(struct dm_version_request); 1475 version_req.hdr.trans_id = atomic_inc_return(&trans_id); 1476 version_req.version.version = dm->next_version; 1477 dm->version = version_req.version.version; 1478 1479 /* 1480 * Set the next version to try in case current version fails. 1481 * Win7 protocol ought to be the last one to try. 1482 */ 1483 switch (version_req.version.version) { 1484 case DYNMEM_PROTOCOL_VERSION_WIN8: 1485 dm->next_version = DYNMEM_PROTOCOL_VERSION_WIN7; 1486 version_req.is_last_attempt = 0; 1487 break; 1488 default: 1489 dm->next_version = 0; 1490 version_req.is_last_attempt = 1; 1491 } 1492 1493 ret = vmbus_sendpacket(dm->dev->channel, &version_req, 1494 sizeof(struct dm_version_request), 1495 (unsigned long)NULL, 1496 VM_PKT_DATA_INBAND, 0); 1497 1498 if (ret) 1499 goto version_error; 1500 1501 return; 1502 1503 version_error: 1504 dm->state = DM_INIT_ERROR; 1505 complete(&dm->host_event); 1506 } 1507 1508 static void cap_resp(struct hv_dynmem_device *dm, 1509 struct dm_capabilities_resp_msg *cap_resp) 1510 { 1511 if (!cap_resp->is_accepted) { 1512 pr_err("Capabilities not accepted by host\n"); 1513 dm->state = DM_INIT_ERROR; 1514 } 1515 complete(&dm->host_event); 1516 } 1517 1518 static void balloon_onchannelcallback(void *context) 1519 { 1520 struct hv_device *dev = context; 1521 u32 recvlen; 1522 u64 requestid; 1523 struct dm_message *dm_msg; 1524 struct dm_header *dm_hdr; 1525 struct hv_dynmem_device *dm = hv_get_drvdata(dev); 1526 struct dm_balloon *bal_msg; 1527 struct dm_hot_add *ha_msg; 1528 union dm_mem_page_range *ha_pg_range; 1529 union dm_mem_page_range *ha_region; 1530 1531 memset(recv_buffer, 0, sizeof(recv_buffer)); 1532 vmbus_recvpacket(dev->channel, recv_buffer, 1533 HV_HYP_PAGE_SIZE, &recvlen, &requestid); 1534 1535 if (recvlen > 0) { 1536 dm_msg = (struct dm_message *)recv_buffer; 1537 dm_hdr = &dm_msg->hdr; 1538 1539 switch (dm_hdr->type) { 1540 case DM_VERSION_RESPONSE: 1541 version_resp(dm, 1542 (struct dm_version_response *)dm_msg); 1543 break; 1544 1545 case DM_CAPABILITIES_RESPONSE: 1546 cap_resp(dm, 1547 (struct dm_capabilities_resp_msg *)dm_msg); 1548 break; 1549 1550 case DM_BALLOON_REQUEST: 1551 if (allow_hibernation) { 1552 pr_info("Ignore balloon-up request!\n"); 1553 break; 1554 } 1555 1556 if (dm->state == DM_BALLOON_UP) 1557 pr_warn("Currently ballooning\n"); 1558 bal_msg = (struct dm_balloon *)recv_buffer; 1559 dm->state = DM_BALLOON_UP; 1560 dm_device.balloon_wrk.num_pages = bal_msg->num_pages; 1561 schedule_work(&dm_device.balloon_wrk.wrk); 1562 break; 1563 1564 case DM_UNBALLOON_REQUEST: 1565 if (allow_hibernation) { 1566 pr_info("Ignore balloon-down request!\n"); 1567 break; 1568 } 1569 1570 dm->state = DM_BALLOON_DOWN; 1571 balloon_down(dm, 1572 (struct dm_unballoon_request *)recv_buffer); 1573 break; 1574 1575 case DM_MEM_HOT_ADD_REQUEST: 1576 if (dm->state == DM_HOT_ADD) 1577 pr_warn("Currently hot-adding\n"); 1578 dm->state = DM_HOT_ADD; 1579 ha_msg = (struct dm_hot_add *)recv_buffer; 1580 if (ha_msg->hdr.size == sizeof(struct dm_hot_add)) { 1581 /* 1582 * This is a normal hot-add request specifying 1583 * hot-add memory. 1584 */ 1585 dm->host_specified_ha_region = false; 1586 ha_pg_range = &ha_msg->range; 1587 dm->ha_wrk.ha_page_range = *ha_pg_range; 1588 dm->ha_wrk.ha_region_range.page_range = 0; 1589 } else { 1590 /* 1591 * Host is specifying that we first hot-add 1592 * a region and then partially populate this 1593 * region. 1594 */ 1595 dm->host_specified_ha_region = true; 1596 ha_pg_range = &ha_msg->range; 1597 ha_region = &ha_pg_range[1]; 1598 dm->ha_wrk.ha_page_range = *ha_pg_range; 1599 dm->ha_wrk.ha_region_range = *ha_region; 1600 } 1601 schedule_work(&dm_device.ha_wrk.wrk); 1602 break; 1603 1604 case DM_INFO_MESSAGE: 1605 process_info(dm, (struct dm_info_msg *)dm_msg); 1606 break; 1607 1608 default: 1609 pr_warn_ratelimited("Unhandled message: type: %d\n", dm_hdr->type); 1610 1611 } 1612 } 1613 1614 } 1615 1616 #define HV_LARGE_REPORTING_ORDER 9 1617 #define HV_LARGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << \ 1618 HV_LARGE_REPORTING_ORDER) 1619 static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info, 1620 struct scatterlist *sgl, unsigned int nents) 1621 { 1622 unsigned long flags; 1623 struct hv_memory_hint *hint; 1624 int i, order; 1625 u64 status; 1626 struct scatterlist *sg; 1627 1628 WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); 1629 WARN_ON_ONCE(sgl->length < (HV_HYP_PAGE_SIZE << page_reporting_order)); 1630 local_irq_save(flags); 1631 hint = *(struct hv_memory_hint **)this_cpu_ptr(hyperv_pcpu_input_arg); 1632 if (!hint) { 1633 local_irq_restore(flags); 1634 return -ENOSPC; 1635 } 1636 1637 hint->type = HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD; 1638 hint->reserved = 0; 1639 for_each_sg(sgl, sg, nents, i) { 1640 union hv_gpa_page_range *range; 1641 1642 range = &hint->ranges[i]; 1643 range->address_space = 0; 1644 order = get_order(sg->length); 1645 /* 1646 * Hyper-V expects the additional_pages field in the units 1647 * of one of these 3 sizes, 4Kbytes, 2Mbytes or 1Gbytes. 1648 * This is dictated by the values of the fields page.largesize 1649 * and page_size. 1650 * This code however, only uses 4Kbytes and 2Mbytes units 1651 * and not 1Gbytes unit. 1652 */ 1653 1654 /* page reporting for pages 2MB or higher */ 1655 if (order >= HV_LARGE_REPORTING_ORDER ) { 1656 range->page.largepage = 1; 1657 range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB; 1658 range->base_large_pfn = page_to_hvpfn( 1659 sg_page(sg)) >> HV_LARGE_REPORTING_ORDER; 1660 range->page.additional_pages = 1661 (sg->length / HV_LARGE_REPORTING_LEN) - 1; 1662 } else { 1663 /* Page reporting for pages below 2MB */ 1664 range->page.basepfn = page_to_hvpfn(sg_page(sg)); 1665 range->page.largepage = false; 1666 range->page.additional_pages = 1667 (sg->length / HV_HYP_PAGE_SIZE) - 1; 1668 } 1669 1670 } 1671 1672 status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0, 1673 hint, NULL); 1674 local_irq_restore(flags); 1675 if (!hv_result_success(status)) { 1676 1677 pr_err("Cold memory discard hypercall failed with status %llx\n", 1678 status); 1679 if (hv_hypercall_multi_failure > 0) 1680 hv_hypercall_multi_failure++; 1681 1682 if (hv_result(status) == HV_STATUS_INVALID_PARAMETER) { 1683 pr_err("Underlying Hyper-V does not support order less than 9. Hypercall failed\n"); 1684 pr_err("Defaulting to page_reporting_order %d\n", 1685 pageblock_order); 1686 page_reporting_order = pageblock_order; 1687 hv_hypercall_multi_failure++; 1688 return -EINVAL; 1689 } 1690 1691 return -EINVAL; 1692 } 1693 1694 return 0; 1695 } 1696 1697 static void enable_page_reporting(void) 1698 { 1699 int ret; 1700 1701 if (!hv_query_ext_cap(HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT)) { 1702 pr_debug("Cold memory discard hint not supported by Hyper-V\n"); 1703 return; 1704 } 1705 1706 BUILD_BUG_ON(PAGE_REPORTING_CAPACITY > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES); 1707 dm_device.pr_dev_info.report = hv_free_page_report; 1708 /* 1709 * We let the page_reporting_order parameter decide the order 1710 * in the page_reporting code 1711 */ 1712 dm_device.pr_dev_info.order = 0; 1713 ret = page_reporting_register(&dm_device.pr_dev_info); 1714 if (ret < 0) { 1715 dm_device.pr_dev_info.report = NULL; 1716 pr_err("Failed to enable cold memory discard: %d\n", ret); 1717 } else { 1718 pr_info("Cold memory discard hint enabled with order %d\n", 1719 page_reporting_order); 1720 } 1721 } 1722 1723 static void disable_page_reporting(void) 1724 { 1725 if (dm_device.pr_dev_info.report) { 1726 page_reporting_unregister(&dm_device.pr_dev_info); 1727 dm_device.pr_dev_info.report = NULL; 1728 } 1729 } 1730 1731 static int ballooning_enabled(void) 1732 { 1733 /* 1734 * Disable ballooning if the page size is not 4k (HV_HYP_PAGE_SIZE), 1735 * since currently it's unclear to us whether an unballoon request can 1736 * make sure all page ranges are guest page size aligned. 1737 */ 1738 if (PAGE_SIZE != HV_HYP_PAGE_SIZE) { 1739 pr_info("Ballooning disabled because page size is not 4096 bytes\n"); 1740 return 0; 1741 } 1742 1743 return 1; 1744 } 1745 1746 static int hot_add_enabled(void) 1747 { 1748 /* 1749 * Disable hot add on ARM64, because we currently rely on 1750 * memory_add_physaddr_to_nid() to get a node id of a hot add range, 1751 * however ARM64's memory_add_physaddr_to_nid() always return 0 and 1752 * DM_MEM_HOT_ADD_REQUEST doesn't have the NUMA node information for 1753 * add_memory(). 1754 */ 1755 if (IS_ENABLED(CONFIG_ARM64)) { 1756 pr_info("Memory hot add disabled on ARM64\n"); 1757 return 0; 1758 } 1759 1760 return 1; 1761 } 1762 1763 static int balloon_connect_vsp(struct hv_device *dev) 1764 { 1765 struct dm_version_request version_req; 1766 struct dm_capabilities cap_msg; 1767 unsigned long t; 1768 int ret; 1769 1770 /* 1771 * max_pkt_size should be large enough for one vmbus packet header plus 1772 * our receive buffer size. Hyper-V sends messages up to 1773 * HV_HYP_PAGE_SIZE bytes long on balloon channel. 1774 */ 1775 dev->channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2; 1776 1777 ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0, 1778 balloon_onchannelcallback, dev); 1779 if (ret) 1780 return ret; 1781 1782 /* 1783 * Initiate the hand shake with the host and negotiate 1784 * a version that the host can support. We start with the 1785 * highest version number and go down if the host cannot 1786 * support it. 1787 */ 1788 memset(&version_req, 0, sizeof(struct dm_version_request)); 1789 version_req.hdr.type = DM_VERSION_REQUEST; 1790 version_req.hdr.size = sizeof(struct dm_version_request); 1791 version_req.hdr.trans_id = atomic_inc_return(&trans_id); 1792 version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN10; 1793 version_req.is_last_attempt = 0; 1794 dm_device.version = version_req.version.version; 1795 1796 ret = vmbus_sendpacket(dev->channel, &version_req, 1797 sizeof(struct dm_version_request), 1798 (unsigned long)NULL, VM_PKT_DATA_INBAND, 0); 1799 if (ret) 1800 goto out; 1801 1802 t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); 1803 if (t == 0) { 1804 ret = -ETIMEDOUT; 1805 goto out; 1806 } 1807 1808 /* 1809 * If we could not negotiate a compatible version with the host 1810 * fail the probe function. 1811 */ 1812 if (dm_device.state == DM_INIT_ERROR) { 1813 ret = -EPROTO; 1814 goto out; 1815 } 1816 1817 pr_info("Using Dynamic Memory protocol version %u.%u\n", 1818 DYNMEM_MAJOR_VERSION(dm_device.version), 1819 DYNMEM_MINOR_VERSION(dm_device.version)); 1820 1821 /* 1822 * Now submit our capabilities to the host. 1823 */ 1824 memset(&cap_msg, 0, sizeof(struct dm_capabilities)); 1825 cap_msg.hdr.type = DM_CAPABILITIES_REPORT; 1826 cap_msg.hdr.size = sizeof(struct dm_capabilities); 1827 cap_msg.hdr.trans_id = atomic_inc_return(&trans_id); 1828 1829 /* 1830 * When hibernation (i.e. virtual ACPI S4 state) is enabled, the host 1831 * currently still requires the bits to be set, so we have to add code 1832 * to fail the host's hot-add and balloon up/down requests, if any. 1833 */ 1834 cap_msg.caps.cap_bits.balloon = ballooning_enabled(); 1835 cap_msg.caps.cap_bits.hot_add = hot_add_enabled(); 1836 1837 /* 1838 * Specify our alignment requirements as it relates 1839 * memory hot-add. Specify 128MB alignment. 1840 */ 1841 cap_msg.caps.cap_bits.hot_add_alignment = 7; 1842 1843 /* 1844 * Currently the host does not use these 1845 * values and we set them to what is done in the 1846 * Windows driver. 1847 */ 1848 cap_msg.min_page_cnt = 0; 1849 cap_msg.max_page_number = -1; 1850 1851 ret = vmbus_sendpacket(dev->channel, &cap_msg, 1852 sizeof(struct dm_capabilities), 1853 (unsigned long)NULL, VM_PKT_DATA_INBAND, 0); 1854 if (ret) 1855 goto out; 1856 1857 t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); 1858 if (t == 0) { 1859 ret = -ETIMEDOUT; 1860 goto out; 1861 } 1862 1863 /* 1864 * If the host does not like our capabilities, 1865 * fail the probe function. 1866 */ 1867 if (dm_device.state == DM_INIT_ERROR) { 1868 ret = -EPROTO; 1869 goto out; 1870 } 1871 1872 return 0; 1873 out: 1874 vmbus_close(dev->channel); 1875 return ret; 1876 } 1877 1878 /* 1879 * DEBUGFS Interface 1880 */ 1881 #ifdef CONFIG_DEBUG_FS 1882 1883 /** 1884 * hv_balloon_debug_show - shows statistics of balloon operations. 1885 * @f: pointer to the &struct seq_file. 1886 * @offset: ignored. 1887 * 1888 * Provides the statistics that can be accessed in hv-balloon in the debugfs. 1889 * 1890 * Return: zero on success or an error code. 1891 */ 1892 static int hv_balloon_debug_show(struct seq_file *f, void *offset) 1893 { 1894 struct hv_dynmem_device *dm = f->private; 1895 char *sname; 1896 1897 seq_printf(f, "%-22s: %u.%u\n", "host_version", 1898 DYNMEM_MAJOR_VERSION(dm->version), 1899 DYNMEM_MINOR_VERSION(dm->version)); 1900 1901 seq_printf(f, "%-22s:", "capabilities"); 1902 if (ballooning_enabled()) 1903 seq_puts(f, " enabled"); 1904 1905 if (hot_add_enabled()) 1906 seq_puts(f, " hot_add"); 1907 1908 seq_puts(f, "\n"); 1909 1910 seq_printf(f, "%-22s: %u", "state", dm->state); 1911 switch (dm->state) { 1912 case DM_INITIALIZING: 1913 sname = "Initializing"; 1914 break; 1915 case DM_INITIALIZED: 1916 sname = "Initialized"; 1917 break; 1918 case DM_BALLOON_UP: 1919 sname = "Balloon Up"; 1920 break; 1921 case DM_BALLOON_DOWN: 1922 sname = "Balloon Down"; 1923 break; 1924 case DM_HOT_ADD: 1925 sname = "Hot Add"; 1926 break; 1927 case DM_INIT_ERROR: 1928 sname = "Error"; 1929 break; 1930 default: 1931 sname = "Unknown"; 1932 } 1933 seq_printf(f, " (%s)\n", sname); 1934 1935 /* HV Page Size */ 1936 seq_printf(f, "%-22s: %ld\n", "page_size", HV_HYP_PAGE_SIZE); 1937 1938 /* Pages added with hot_add */ 1939 seq_printf(f, "%-22s: %u\n", "pages_added", dm->num_pages_added); 1940 1941 /* pages that are "onlined"/used from pages_added */ 1942 seq_printf(f, "%-22s: %u\n", "pages_onlined", dm->num_pages_onlined); 1943 1944 /* pages we have given back to host */ 1945 seq_printf(f, "%-22s: %u\n", "pages_ballooned", dm->num_pages_ballooned); 1946 1947 seq_printf(f, "%-22s: %lu\n", "total_pages_committed", 1948 get_pages_committed(dm)); 1949 1950 seq_printf(f, "%-22s: %llu\n", "max_dynamic_page_count", 1951 dm->max_dynamic_page_count); 1952 1953 return 0; 1954 } 1955 1956 DEFINE_SHOW_ATTRIBUTE(hv_balloon_debug); 1957 1958 static void hv_balloon_debugfs_init(struct hv_dynmem_device *b) 1959 { 1960 debugfs_create_file("hv-balloon", 0444, NULL, b, 1961 &hv_balloon_debug_fops); 1962 } 1963 1964 static void hv_balloon_debugfs_exit(struct hv_dynmem_device *b) 1965 { 1966 debugfs_lookup_and_remove("hv-balloon", NULL); 1967 } 1968 1969 #else 1970 1971 static inline void hv_balloon_debugfs_init(struct hv_dynmem_device *b) 1972 { 1973 } 1974 1975 static inline void hv_balloon_debugfs_exit(struct hv_dynmem_device *b) 1976 { 1977 } 1978 1979 #endif /* CONFIG_DEBUG_FS */ 1980 1981 static int balloon_probe(struct hv_device *dev, 1982 const struct hv_vmbus_device_id *dev_id) 1983 { 1984 int ret; 1985 1986 allow_hibernation = hv_is_hibernation_supported(); 1987 if (allow_hibernation) 1988 hot_add = false; 1989 1990 #ifdef CONFIG_MEMORY_HOTPLUG 1991 do_hot_add = hot_add; 1992 #else 1993 do_hot_add = false; 1994 #endif 1995 dm_device.dev = dev; 1996 dm_device.state = DM_INITIALIZING; 1997 dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8; 1998 init_completion(&dm_device.host_event); 1999 init_completion(&dm_device.config_event); 2000 INIT_LIST_HEAD(&dm_device.ha_region_list); 2001 spin_lock_init(&dm_device.ha_lock); 2002 INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); 2003 INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req); 2004 dm_device.host_specified_ha_region = false; 2005 2006 #ifdef CONFIG_MEMORY_HOTPLUG 2007 set_online_page_callback(&hv_online_page); 2008 init_completion(&dm_device.ol_waitevent); 2009 register_memory_notifier(&hv_memory_nb); 2010 #endif 2011 2012 hv_set_drvdata(dev, &dm_device); 2013 2014 ret = balloon_connect_vsp(dev); 2015 if (ret != 0) 2016 goto connect_error; 2017 2018 enable_page_reporting(); 2019 dm_device.state = DM_INITIALIZED; 2020 2021 dm_device.thread = 2022 kthread_run(dm_thread_func, &dm_device, "hv_balloon"); 2023 if (IS_ERR(dm_device.thread)) { 2024 ret = PTR_ERR(dm_device.thread); 2025 goto probe_error; 2026 } 2027 2028 hv_balloon_debugfs_init(&dm_device); 2029 2030 return 0; 2031 2032 probe_error: 2033 dm_device.state = DM_INIT_ERROR; 2034 dm_device.thread = NULL; 2035 disable_page_reporting(); 2036 vmbus_close(dev->channel); 2037 connect_error: 2038 #ifdef CONFIG_MEMORY_HOTPLUG 2039 unregister_memory_notifier(&hv_memory_nb); 2040 restore_online_page_callback(&hv_online_page); 2041 #endif 2042 return ret; 2043 } 2044 2045 static void balloon_remove(struct hv_device *dev) 2046 { 2047 struct hv_dynmem_device *dm = hv_get_drvdata(dev); 2048 struct hv_hotadd_state *has, *tmp; 2049 struct hv_hotadd_gap *gap, *tmp_gap; 2050 unsigned long flags; 2051 2052 if (dm->num_pages_ballooned != 0) 2053 pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned); 2054 2055 hv_balloon_debugfs_exit(dm); 2056 2057 cancel_work_sync(&dm->balloon_wrk.wrk); 2058 cancel_work_sync(&dm->ha_wrk.wrk); 2059 2060 kthread_stop(dm->thread); 2061 2062 /* 2063 * This is to handle the case when balloon_resume() 2064 * call has failed and some cleanup has been done as 2065 * a part of the error handling. 2066 */ 2067 if (dm_device.state != DM_INIT_ERROR) { 2068 disable_page_reporting(); 2069 vmbus_close(dev->channel); 2070 #ifdef CONFIG_MEMORY_HOTPLUG 2071 unregister_memory_notifier(&hv_memory_nb); 2072 restore_online_page_callback(&hv_online_page); 2073 #endif 2074 } 2075 2076 spin_lock_irqsave(&dm_device.ha_lock, flags); 2077 list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) { 2078 list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) { 2079 list_del(&gap->list); 2080 kfree(gap); 2081 } 2082 list_del(&has->list); 2083 kfree(has); 2084 } 2085 spin_unlock_irqrestore(&dm_device.ha_lock, flags); 2086 } 2087 2088 static int balloon_suspend(struct hv_device *hv_dev) 2089 { 2090 struct hv_dynmem_device *dm = hv_get_drvdata(hv_dev); 2091 2092 tasklet_disable(&hv_dev->channel->callback_event); 2093 2094 cancel_work_sync(&dm->balloon_wrk.wrk); 2095 cancel_work_sync(&dm->ha_wrk.wrk); 2096 2097 if (dm->thread) { 2098 kthread_stop(dm->thread); 2099 dm->thread = NULL; 2100 vmbus_close(hv_dev->channel); 2101 } 2102 2103 tasklet_enable(&hv_dev->channel->callback_event); 2104 2105 return 0; 2106 2107 } 2108 2109 static int balloon_resume(struct hv_device *dev) 2110 { 2111 int ret; 2112 2113 dm_device.state = DM_INITIALIZING; 2114 2115 ret = balloon_connect_vsp(dev); 2116 2117 if (ret != 0) 2118 goto out; 2119 2120 dm_device.thread = 2121 kthread_run(dm_thread_func, &dm_device, "hv_balloon"); 2122 if (IS_ERR(dm_device.thread)) { 2123 ret = PTR_ERR(dm_device.thread); 2124 dm_device.thread = NULL; 2125 goto close_channel; 2126 } 2127 2128 dm_device.state = DM_INITIALIZED; 2129 return 0; 2130 close_channel: 2131 vmbus_close(dev->channel); 2132 out: 2133 dm_device.state = DM_INIT_ERROR; 2134 disable_page_reporting(); 2135 #ifdef CONFIG_MEMORY_HOTPLUG 2136 unregister_memory_notifier(&hv_memory_nb); 2137 restore_online_page_callback(&hv_online_page); 2138 #endif 2139 return ret; 2140 } 2141 2142 static const struct hv_vmbus_device_id id_table[] = { 2143 /* Dynamic Memory Class ID */ 2144 /* 525074DC-8985-46e2-8057-A307DC18A502 */ 2145 { HV_DM_GUID, }, 2146 { }, 2147 }; 2148 2149 MODULE_DEVICE_TABLE(vmbus, id_table); 2150 2151 static struct hv_driver balloon_drv = { 2152 .name = "hv_balloon", 2153 .id_table = id_table, 2154 .probe = balloon_probe, 2155 .remove = balloon_remove, 2156 .suspend = balloon_suspend, 2157 .resume = balloon_resume, 2158 .driver = { 2159 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 2160 }, 2161 }; 2162 2163 static int __init init_balloon_drv(void) 2164 { 2165 2166 return vmbus_driver_register(&balloon_drv); 2167 } 2168 2169 module_init(init_balloon_drv); 2170 2171 MODULE_DESCRIPTION("Hyper-V Balloon"); 2172 MODULE_LICENSE("GPL"); 2173