1 /* 2 * Copyright (c) 2012, Microsoft Corporation. 3 * 4 * Author: 5 * K. Y. Srinivasan <kys@microsoft.com> 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published 9 * by the Free Software Foundation. 10 * 11 * This program is distributed in the hope that it will be useful, but 12 * WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 14 * NON INFRINGEMENT. See the GNU General Public License for more 15 * details. 16 * 17 */ 18 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 20 21 #include <linux/kernel.h> 22 #include <linux/mman.h> 23 #include <linux/delay.h> 24 #include <linux/init.h> 25 #include <linux/module.h> 26 #include <linux/slab.h> 27 #include <linux/kthread.h> 28 #include <linux/completion.h> 29 #include <linux/memory_hotplug.h> 30 #include <linux/memory.h> 31 #include <linux/notifier.h> 32 #include <linux/mman.h> 33 #include <linux/percpu_counter.h> 34 35 #include <linux/hyperv.h> 36 37 /* 38 * We begin with definitions supporting the Dynamic Memory protocol 39 * with the host. 40 * 41 * Begin protocol definitions. 42 */ 43 44 45 46 /* 47 * Protocol versions. The low word is the minor version, the high word the major 48 * version. 49 * 50 * History: 51 * Initial version 1.0 52 * Changed to 0.1 on 2009/03/25 53 * Changes to 0.2 on 2009/05/14 54 * Changes to 0.3 on 2009/12/03 55 * Changed to 1.0 on 2011/04/05 56 */ 57 58 #define DYNMEM_MAKE_VERSION(Major, Minor) ((__u32)(((Major) << 16) | (Minor))) 59 #define DYNMEM_MAJOR_VERSION(Version) ((__u32)(Version) >> 16) 60 #define DYNMEM_MINOR_VERSION(Version) ((__u32)(Version) & 0xff) 61 62 enum { 63 DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3), 64 DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0), 65 66 DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1, 67 DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2, 68 69 DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN8 70 }; 71 72 73 74 /* 75 * Message Types 76 */ 77 78 enum dm_message_type { 79 /* 80 * Version 0.3 81 */ 82 DM_ERROR = 0, 83 DM_VERSION_REQUEST = 1, 84 DM_VERSION_RESPONSE = 2, 85 DM_CAPABILITIES_REPORT = 3, 86 DM_CAPABILITIES_RESPONSE = 4, 87 DM_STATUS_REPORT = 5, 88 DM_BALLOON_REQUEST = 6, 89 DM_BALLOON_RESPONSE = 7, 90 DM_UNBALLOON_REQUEST = 8, 91 DM_UNBALLOON_RESPONSE = 9, 92 DM_MEM_HOT_ADD_REQUEST = 10, 93 DM_MEM_HOT_ADD_RESPONSE = 11, 94 DM_VERSION_03_MAX = 11, 95 /* 96 * Version 1.0. 97 */ 98 DM_INFO_MESSAGE = 12, 99 DM_VERSION_1_MAX = 12 100 }; 101 102 103 /* 104 * Structures defining the dynamic memory management 105 * protocol. 106 */ 107 108 union dm_version { 109 struct { 110 __u16 minor_version; 111 __u16 major_version; 112 }; 113 __u32 version; 114 } __packed; 115 116 117 union dm_caps { 118 struct { 119 __u64 balloon:1; 120 __u64 hot_add:1; 121 __u64 reservedz:62; 122 } cap_bits; 123 __u64 caps; 124 } __packed; 125 126 union dm_mem_page_range { 127 struct { 128 /* 129 * The PFN number of the first page in the range. 130 * 40 bits is the architectural limit of a PFN 131 * number for AMD64. 132 */ 133 __u64 start_page:40; 134 /* 135 * The number of pages in the range. 136 */ 137 __u64 page_cnt:24; 138 } finfo; 139 __u64 page_range; 140 } __packed; 141 142 143 144 /* 145 * The header for all dynamic memory messages: 146 * 147 * type: Type of the message. 148 * size: Size of the message in bytes; including the header. 149 * trans_id: The guest is responsible for manufacturing this ID. 150 */ 151 152 struct dm_header { 153 __u16 type; 154 __u16 size; 155 __u32 trans_id; 156 } __packed; 157 158 /* 159 * A generic message format for dynamic memory. 160 * Specific message formats are defined later in the file. 161 */ 162 163 struct dm_message { 164 struct dm_header hdr; 165 __u8 data[]; /* enclosed message */ 166 } __packed; 167 168 169 /* 170 * Specific message types supporting the dynamic memory protocol. 171 */ 172 173 /* 174 * Version negotiation message. Sent from the guest to the host. 175 * The guest is free to try different versions until the host 176 * accepts the version. 177 * 178 * dm_version: The protocol version requested. 179 * is_last_attempt: If TRUE, this is the last version guest will request. 180 * reservedz: Reserved field, set to zero. 181 */ 182 183 struct dm_version_request { 184 struct dm_header hdr; 185 union dm_version version; 186 __u32 is_last_attempt:1; 187 __u32 reservedz:31; 188 } __packed; 189 190 /* 191 * Version response message; Host to Guest and indicates 192 * if the host has accepted the version sent by the guest. 193 * 194 * is_accepted: If TRUE, host has accepted the version and the guest 195 * should proceed to the next stage of the protocol. FALSE indicates that 196 * guest should re-try with a different version. 197 * 198 * reservedz: Reserved field, set to zero. 199 */ 200 201 struct dm_version_response { 202 struct dm_header hdr; 203 __u64 is_accepted:1; 204 __u64 reservedz:63; 205 } __packed; 206 207 /* 208 * Message reporting capabilities. This is sent from the guest to the 209 * host. 210 */ 211 212 struct dm_capabilities { 213 struct dm_header hdr; 214 union dm_caps caps; 215 __u64 min_page_cnt; 216 __u64 max_page_number; 217 } __packed; 218 219 /* 220 * Response to the capabilities message. This is sent from the host to the 221 * guest. This message notifies if the host has accepted the guest's 222 * capabilities. If the host has not accepted, the guest must shutdown 223 * the service. 224 * 225 * is_accepted: Indicates if the host has accepted guest's capabilities. 226 * reservedz: Must be 0. 227 */ 228 229 struct dm_capabilities_resp_msg { 230 struct dm_header hdr; 231 __u64 is_accepted:1; 232 __u64 reservedz:63; 233 } __packed; 234 235 /* 236 * This message is used to report memory pressure from the guest. 237 * This message is not part of any transaction and there is no 238 * response to this message. 239 * 240 * num_avail: Available memory in pages. 241 * num_committed: Committed memory in pages. 242 * page_file_size: The accumulated size of all page files 243 * in the system in pages. 244 * zero_free: The nunber of zero and free pages. 245 * page_file_writes: The writes to the page file in pages. 246 * io_diff: An indicator of file cache efficiency or page file activity, 247 * calculated as File Cache Page Fault Count - Page Read Count. 248 * This value is in pages. 249 * 250 * Some of these metrics are Windows specific and fortunately 251 * the algorithm on the host side that computes the guest memory 252 * pressure only uses num_committed value. 253 */ 254 255 struct dm_status { 256 struct dm_header hdr; 257 __u64 num_avail; 258 __u64 num_committed; 259 __u64 page_file_size; 260 __u64 zero_free; 261 __u32 page_file_writes; 262 __u32 io_diff; 263 } __packed; 264 265 266 /* 267 * Message to ask the guest to allocate memory - balloon up message. 268 * This message is sent from the host to the guest. The guest may not be 269 * able to allocate as much memory as requested. 270 * 271 * num_pages: number of pages to allocate. 272 */ 273 274 struct dm_balloon { 275 struct dm_header hdr; 276 __u32 num_pages; 277 __u32 reservedz; 278 } __packed; 279 280 281 /* 282 * Balloon response message; this message is sent from the guest 283 * to the host in response to the balloon message. 284 * 285 * reservedz: Reserved; must be set to zero. 286 * more_pages: If FALSE, this is the last message of the transaction. 287 * if TRUE there will atleast one more message from the guest. 288 * 289 * range_count: The number of ranges in the range array. 290 * 291 * range_array: An array of page ranges returned to the host. 292 * 293 */ 294 295 struct dm_balloon_response { 296 struct dm_header hdr; 297 __u32 reservedz; 298 __u32 more_pages:1; 299 __u32 range_count:31; 300 union dm_mem_page_range range_array[]; 301 } __packed; 302 303 /* 304 * Un-balloon message; this message is sent from the host 305 * to the guest to give guest more memory. 306 * 307 * more_pages: If FALSE, this is the last message of the transaction. 308 * if TRUE there will atleast one more message from the guest. 309 * 310 * reservedz: Reserved; must be set to zero. 311 * 312 * range_count: The number of ranges in the range array. 313 * 314 * range_array: An array of page ranges returned to the host. 315 * 316 */ 317 318 struct dm_unballoon_request { 319 struct dm_header hdr; 320 __u32 more_pages:1; 321 __u32 reservedz:31; 322 __u32 range_count; 323 union dm_mem_page_range range_array[]; 324 } __packed; 325 326 /* 327 * Un-balloon response message; this message is sent from the guest 328 * to the host in response to an unballoon request. 329 * 330 */ 331 332 struct dm_unballoon_response { 333 struct dm_header hdr; 334 } __packed; 335 336 337 /* 338 * Hot add request message. Message sent from the host to the guest. 339 * 340 * mem_range: Memory range to hot add. 341 * 342 * On Linux we currently don't support this since we cannot hot add 343 * arbitrary granularity of memory. 344 */ 345 346 struct dm_hot_add { 347 struct dm_header hdr; 348 union dm_mem_page_range range; 349 } __packed; 350 351 /* 352 * Hot add response message. 353 * This message is sent by the guest to report the status of a hot add request. 354 * If page_count is less than the requested page count, then the host should 355 * assume all further hot add requests will fail, since this indicates that 356 * the guest has hit an upper physical memory barrier. 357 * 358 * Hot adds may also fail due to low resources; in this case, the guest must 359 * not complete this message until the hot add can succeed, and the host must 360 * not send a new hot add request until the response is sent. 361 * If VSC fails to hot add memory DYNMEM_NUMBER_OF_UNSUCCESSFUL_HOTADD_ATTEMPTS 362 * times it fails the request. 363 * 364 * 365 * page_count: number of pages that were successfully hot added. 366 * 367 * result: result of the operation 1: success, 0: failure. 368 * 369 */ 370 371 struct dm_hot_add_response { 372 struct dm_header hdr; 373 __u32 page_count; 374 __u32 result; 375 } __packed; 376 377 /* 378 * Types of information sent from host to the guest. 379 */ 380 381 enum dm_info_type { 382 INFO_TYPE_MAX_PAGE_CNT = 0, 383 MAX_INFO_TYPE 384 }; 385 386 387 /* 388 * Header for the information message. 389 */ 390 391 struct dm_info_header { 392 enum dm_info_type type; 393 __u32 data_size; 394 } __packed; 395 396 /* 397 * This message is sent from the host to the guest to pass 398 * some relevant information (win8 addition). 399 * 400 * reserved: no used. 401 * info_size: size of the information blob. 402 * info: information blob. 403 */ 404 405 struct dm_info_msg { 406 struct dm_info_header header; 407 __u32 reserved; 408 __u32 info_size; 409 __u8 info[]; 410 }; 411 412 /* 413 * End protocol definitions. 414 */ 415 416 static bool hot_add; 417 static bool do_hot_add; 418 419 module_param(hot_add, bool, (S_IRUGO | S_IWUSR)); 420 MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add"); 421 422 static atomic_t trans_id = ATOMIC_INIT(0); 423 424 static int dm_ring_size = (5 * PAGE_SIZE); 425 426 /* 427 * Driver specific state. 428 */ 429 430 enum hv_dm_state { 431 DM_INITIALIZING = 0, 432 DM_INITIALIZED, 433 DM_BALLOON_UP, 434 DM_BALLOON_DOWN, 435 DM_HOT_ADD, 436 DM_INIT_ERROR 437 }; 438 439 440 static __u8 recv_buffer[PAGE_SIZE]; 441 static __u8 *send_buffer; 442 #define PAGES_IN_2M 512 443 444 struct hv_dynmem_device { 445 struct hv_device *dev; 446 enum hv_dm_state state; 447 struct completion host_event; 448 struct completion config_event; 449 450 /* 451 * Number of pages we have currently ballooned out. 452 */ 453 unsigned int num_pages_ballooned; 454 455 /* 456 * This thread handles both balloon/hot-add 457 * requests from the host as well as notifying 458 * the host with regards to memory pressure in 459 * the guest. 460 */ 461 struct task_struct *thread; 462 463 /* 464 * We start with the highest version we can support 465 * and downgrade based on the host; we save here the 466 * next version to try. 467 */ 468 __u32 next_version; 469 }; 470 471 static struct hv_dynmem_device dm_device; 472 473 static void hot_add_req(struct hv_dynmem_device *dm, struct dm_hot_add *msg) 474 { 475 476 struct dm_hot_add_response resp; 477 478 if (do_hot_add) { 479 480 pr_info("Memory hot add not supported\n"); 481 482 /* 483 * Currently we do not support hot add. 484 * Just fail the request. 485 */ 486 } 487 488 memset(&resp, 0, sizeof(struct dm_hot_add_response)); 489 resp.hdr.type = DM_MEM_HOT_ADD_RESPONSE; 490 resp.hdr.size = sizeof(struct dm_hot_add_response); 491 resp.hdr.trans_id = atomic_inc_return(&trans_id); 492 493 resp.page_count = 0; 494 resp.result = 0; 495 496 dm->state = DM_INITIALIZED; 497 vmbus_sendpacket(dm->dev->channel, &resp, 498 sizeof(struct dm_hot_add_response), 499 (unsigned long)NULL, 500 VM_PKT_DATA_INBAND, 0); 501 502 } 503 504 static void process_info(struct hv_dynmem_device *dm, struct dm_info_msg *msg) 505 { 506 switch (msg->header.type) { 507 case INFO_TYPE_MAX_PAGE_CNT: 508 pr_info("Received INFO_TYPE_MAX_PAGE_CNT\n"); 509 pr_info("Data Size is %d\n", msg->header.data_size); 510 break; 511 default: 512 pr_info("Received Unknown type: %d\n", msg->header.type); 513 } 514 } 515 516 /* 517 * Post our status as it relates memory pressure to the 518 * host. Host expects the guests to post this status 519 * periodically at 1 second intervals. 520 * 521 * The metrics specified in this protocol are very Windows 522 * specific and so we cook up numbers here to convey our memory 523 * pressure. 524 */ 525 526 static void post_status(struct hv_dynmem_device *dm) 527 { 528 struct dm_status status; 529 530 531 memset(&status, 0, sizeof(struct dm_status)); 532 status.hdr.type = DM_STATUS_REPORT; 533 status.hdr.size = sizeof(struct dm_status); 534 status.hdr.trans_id = atomic_inc_return(&trans_id); 535 536 537 status.num_committed = vm_memory_committed(); 538 539 vmbus_sendpacket(dm->dev->channel, &status, 540 sizeof(struct dm_status), 541 (unsigned long)NULL, 542 VM_PKT_DATA_INBAND, 0); 543 544 } 545 546 547 548 static void free_balloon_pages(struct hv_dynmem_device *dm, 549 union dm_mem_page_range *range_array) 550 { 551 int num_pages = range_array->finfo.page_cnt; 552 __u64 start_frame = range_array->finfo.start_page; 553 struct page *pg; 554 int i; 555 556 for (i = 0; i < num_pages; i++) { 557 pg = pfn_to_page(i + start_frame); 558 __free_page(pg); 559 dm->num_pages_ballooned--; 560 } 561 } 562 563 564 565 static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages, 566 struct dm_balloon_response *bl_resp, int alloc_unit, 567 bool *alloc_error) 568 { 569 int i = 0; 570 struct page *pg; 571 572 if (num_pages < alloc_unit) 573 return 0; 574 575 for (i = 0; (i * alloc_unit) < num_pages; i++) { 576 if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) > 577 PAGE_SIZE) 578 return i * alloc_unit; 579 580 /* 581 * We execute this code in a thread context. Furthermore, 582 * we don't want the kernel to try too hard. 583 */ 584 pg = alloc_pages(GFP_HIGHUSER | __GFP_NORETRY | 585 __GFP_NOMEMALLOC | __GFP_NOWARN, 586 get_order(alloc_unit << PAGE_SHIFT)); 587 588 if (!pg) { 589 *alloc_error = true; 590 return i * alloc_unit; 591 } 592 593 594 dm->num_pages_ballooned += alloc_unit; 595 596 bl_resp->range_count++; 597 bl_resp->range_array[i].finfo.start_page = 598 page_to_pfn(pg); 599 bl_resp->range_array[i].finfo.page_cnt = alloc_unit; 600 bl_resp->hdr.size += sizeof(union dm_mem_page_range); 601 602 } 603 604 return num_pages; 605 } 606 607 608 609 static void balloon_up(struct hv_dynmem_device *dm, struct dm_balloon *req) 610 { 611 int num_pages = req->num_pages; 612 int num_ballooned = 0; 613 struct dm_balloon_response *bl_resp; 614 int alloc_unit; 615 int ret; 616 bool alloc_error = false; 617 bool done = false; 618 int i; 619 620 621 /* 622 * Currently, we only support 4k allocations. 623 */ 624 alloc_unit = 1; 625 626 while (!done) { 627 bl_resp = (struct dm_balloon_response *)send_buffer; 628 memset(send_buffer, 0, PAGE_SIZE); 629 bl_resp->hdr.type = DM_BALLOON_RESPONSE; 630 bl_resp->hdr.trans_id = atomic_inc_return(&trans_id); 631 bl_resp->hdr.size = sizeof(struct dm_balloon_response); 632 bl_resp->more_pages = 1; 633 634 635 num_pages -= num_ballooned; 636 num_ballooned = alloc_balloon_pages(dm, num_pages, 637 bl_resp, alloc_unit, 638 &alloc_error); 639 640 if ((alloc_error) || (num_ballooned == num_pages)) { 641 bl_resp->more_pages = 0; 642 done = true; 643 dm->state = DM_INITIALIZED; 644 } 645 646 /* 647 * We are pushing a lot of data through the channel; 648 * deal with transient failures caused because of the 649 * lack of space in the ring buffer. 650 */ 651 652 do { 653 ret = vmbus_sendpacket(dm_device.dev->channel, 654 bl_resp, 655 bl_resp->hdr.size, 656 (unsigned long)NULL, 657 VM_PKT_DATA_INBAND, 0); 658 659 if (ret == -EAGAIN) 660 msleep(20); 661 662 } while (ret == -EAGAIN); 663 664 if (ret) { 665 /* 666 * Free up the memory we allocatted. 667 */ 668 pr_info("Balloon response failed\n"); 669 670 for (i = 0; i < bl_resp->range_count; i++) 671 free_balloon_pages(dm, 672 &bl_resp->range_array[i]); 673 674 done = true; 675 } 676 } 677 678 } 679 680 static void balloon_down(struct hv_dynmem_device *dm, 681 struct dm_unballoon_request *req) 682 { 683 union dm_mem_page_range *range_array = req->range_array; 684 int range_count = req->range_count; 685 struct dm_unballoon_response resp; 686 int i; 687 688 for (i = 0; i < range_count; i++) 689 free_balloon_pages(dm, &range_array[i]); 690 691 if (req->more_pages == 1) 692 return; 693 694 memset(&resp, 0, sizeof(struct dm_unballoon_response)); 695 resp.hdr.type = DM_UNBALLOON_RESPONSE; 696 resp.hdr.trans_id = atomic_inc_return(&trans_id); 697 resp.hdr.size = sizeof(struct dm_unballoon_response); 698 699 vmbus_sendpacket(dm_device.dev->channel, &resp, 700 sizeof(struct dm_unballoon_response), 701 (unsigned long)NULL, 702 VM_PKT_DATA_INBAND, 0); 703 704 dm->state = DM_INITIALIZED; 705 } 706 707 static void balloon_onchannelcallback(void *context); 708 709 static int dm_thread_func(void *dm_dev) 710 { 711 struct hv_dynmem_device *dm = dm_dev; 712 int t; 713 unsigned long scan_start; 714 715 while (!kthread_should_stop()) { 716 t = wait_for_completion_timeout(&dm_device.config_event, 1*HZ); 717 /* 718 * The host expects us to post information on the memory 719 * pressure every second. 720 */ 721 722 if (t == 0) 723 post_status(dm); 724 725 scan_start = jiffies; 726 switch (dm->state) { 727 case DM_BALLOON_UP: 728 balloon_up(dm, (struct dm_balloon *)recv_buffer); 729 break; 730 731 case DM_HOT_ADD: 732 hot_add_req(dm, (struct dm_hot_add *)recv_buffer); 733 break; 734 default: 735 break; 736 } 737 738 if (!time_in_range(jiffies, scan_start, scan_start + HZ)) 739 post_status(dm); 740 741 } 742 743 return 0; 744 } 745 746 747 static void version_resp(struct hv_dynmem_device *dm, 748 struct dm_version_response *vresp) 749 { 750 struct dm_version_request version_req; 751 int ret; 752 753 if (vresp->is_accepted) { 754 /* 755 * We are done; wakeup the 756 * context waiting for version 757 * negotiation. 758 */ 759 complete(&dm->host_event); 760 return; 761 } 762 /* 763 * If there are more versions to try, continue 764 * with negotiations; if not 765 * shutdown the service since we are not able 766 * to negotiate a suitable version number 767 * with the host. 768 */ 769 if (dm->next_version == 0) 770 goto version_error; 771 772 dm->next_version = 0; 773 memset(&version_req, 0, sizeof(struct dm_version_request)); 774 version_req.hdr.type = DM_VERSION_REQUEST; 775 version_req.hdr.size = sizeof(struct dm_version_request); 776 version_req.hdr.trans_id = atomic_inc_return(&trans_id); 777 version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN7; 778 version_req.is_last_attempt = 1; 779 780 ret = vmbus_sendpacket(dm->dev->channel, &version_req, 781 sizeof(struct dm_version_request), 782 (unsigned long)NULL, 783 VM_PKT_DATA_INBAND, 0); 784 785 if (ret) 786 goto version_error; 787 788 return; 789 790 version_error: 791 dm->state = DM_INIT_ERROR; 792 complete(&dm->host_event); 793 } 794 795 static void cap_resp(struct hv_dynmem_device *dm, 796 struct dm_capabilities_resp_msg *cap_resp) 797 { 798 if (!cap_resp->is_accepted) { 799 pr_info("Capabilities not accepted by host\n"); 800 dm->state = DM_INIT_ERROR; 801 } 802 complete(&dm->host_event); 803 } 804 805 static void balloon_onchannelcallback(void *context) 806 { 807 struct hv_device *dev = context; 808 u32 recvlen; 809 u64 requestid; 810 struct dm_message *dm_msg; 811 struct dm_header *dm_hdr; 812 struct hv_dynmem_device *dm = hv_get_drvdata(dev); 813 814 memset(recv_buffer, 0, sizeof(recv_buffer)); 815 vmbus_recvpacket(dev->channel, recv_buffer, 816 PAGE_SIZE, &recvlen, &requestid); 817 818 if (recvlen > 0) { 819 dm_msg = (struct dm_message *)recv_buffer; 820 dm_hdr = &dm_msg->hdr; 821 822 switch (dm_hdr->type) { 823 case DM_VERSION_RESPONSE: 824 version_resp(dm, 825 (struct dm_version_response *)dm_msg); 826 break; 827 828 case DM_CAPABILITIES_RESPONSE: 829 cap_resp(dm, 830 (struct dm_capabilities_resp_msg *)dm_msg); 831 break; 832 833 case DM_BALLOON_REQUEST: 834 dm->state = DM_BALLOON_UP; 835 complete(&dm->config_event); 836 break; 837 838 case DM_UNBALLOON_REQUEST: 839 dm->state = DM_BALLOON_DOWN; 840 balloon_down(dm, 841 (struct dm_unballoon_request *)recv_buffer); 842 break; 843 844 case DM_MEM_HOT_ADD_REQUEST: 845 dm->state = DM_HOT_ADD; 846 complete(&dm->config_event); 847 break; 848 849 case DM_INFO_MESSAGE: 850 process_info(dm, (struct dm_info_msg *)dm_msg); 851 break; 852 853 default: 854 pr_err("Unhandled message: type: %d\n", dm_hdr->type); 855 856 } 857 } 858 859 } 860 861 static int balloon_probe(struct hv_device *dev, 862 const struct hv_vmbus_device_id *dev_id) 863 { 864 int ret, t; 865 struct dm_version_request version_req; 866 struct dm_capabilities cap_msg; 867 868 do_hot_add = hot_add; 869 870 /* 871 * First allocate a send buffer. 872 */ 873 874 send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); 875 if (!send_buffer) 876 return -ENOMEM; 877 878 ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0, 879 balloon_onchannelcallback, dev); 880 881 if (ret) 882 return ret; 883 884 dm_device.dev = dev; 885 dm_device.state = DM_INITIALIZING; 886 dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7; 887 init_completion(&dm_device.host_event); 888 init_completion(&dm_device.config_event); 889 890 dm_device.thread = 891 kthread_run(dm_thread_func, &dm_device, "hv_balloon"); 892 if (IS_ERR(dm_device.thread)) { 893 ret = PTR_ERR(dm_device.thread); 894 goto probe_error0; 895 } 896 897 hv_set_drvdata(dev, &dm_device); 898 /* 899 * Initiate the hand shake with the host and negotiate 900 * a version that the host can support. We start with the 901 * highest version number and go down if the host cannot 902 * support it. 903 */ 904 memset(&version_req, 0, sizeof(struct dm_version_request)); 905 version_req.hdr.type = DM_VERSION_REQUEST; 906 version_req.hdr.size = sizeof(struct dm_version_request); 907 version_req.hdr.trans_id = atomic_inc_return(&trans_id); 908 version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN8; 909 version_req.is_last_attempt = 0; 910 911 ret = vmbus_sendpacket(dev->channel, &version_req, 912 sizeof(struct dm_version_request), 913 (unsigned long)NULL, 914 VM_PKT_DATA_INBAND, 915 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 916 if (ret) 917 goto probe_error1; 918 919 t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); 920 if (t == 0) { 921 ret = -ETIMEDOUT; 922 goto probe_error1; 923 } 924 925 /* 926 * If we could not negotiate a compatible version with the host 927 * fail the probe function. 928 */ 929 if (dm_device.state == DM_INIT_ERROR) { 930 ret = -ETIMEDOUT; 931 goto probe_error1; 932 } 933 /* 934 * Now submit our capabilities to the host. 935 */ 936 memset(&cap_msg, 0, sizeof(struct dm_capabilities)); 937 cap_msg.hdr.type = DM_CAPABILITIES_REPORT; 938 cap_msg.hdr.size = sizeof(struct dm_capabilities); 939 cap_msg.hdr.trans_id = atomic_inc_return(&trans_id); 940 941 cap_msg.caps.cap_bits.balloon = 1; 942 /* 943 * While we currently don't support hot-add, 944 * we still advertise this capability since the 945 * host requires that guests partcipating in the 946 * dynamic memory protocol support hot add. 947 */ 948 cap_msg.caps.cap_bits.hot_add = 1; 949 950 /* 951 * Currently the host does not use these 952 * values and we set them to what is done in the 953 * Windows driver. 954 */ 955 cap_msg.min_page_cnt = 0; 956 cap_msg.max_page_number = -1; 957 958 ret = vmbus_sendpacket(dev->channel, &cap_msg, 959 sizeof(struct dm_capabilities), 960 (unsigned long)NULL, 961 VM_PKT_DATA_INBAND, 962 VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); 963 if (ret) 964 goto probe_error1; 965 966 t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ); 967 if (t == 0) { 968 ret = -ETIMEDOUT; 969 goto probe_error1; 970 } 971 972 /* 973 * If the host does not like our capabilities, 974 * fail the probe function. 975 */ 976 if (dm_device.state == DM_INIT_ERROR) { 977 ret = -ETIMEDOUT; 978 goto probe_error1; 979 } 980 981 dm_device.state = DM_INITIALIZED; 982 983 return 0; 984 985 probe_error1: 986 kthread_stop(dm_device.thread); 987 988 probe_error0: 989 vmbus_close(dev->channel); 990 return ret; 991 } 992 993 static int balloon_remove(struct hv_device *dev) 994 { 995 struct hv_dynmem_device *dm = hv_get_drvdata(dev); 996 997 if (dm->num_pages_ballooned != 0) 998 pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned); 999 1000 vmbus_close(dev->channel); 1001 kthread_stop(dm->thread); 1002 1003 return 0; 1004 } 1005 1006 static const struct hv_vmbus_device_id id_table[] = { 1007 /* Dynamic Memory Class ID */ 1008 /* 525074DC-8985-46e2-8057-A307DC18A502 */ 1009 { VMBUS_DEVICE(0xdc, 0x74, 0x50, 0X52, 0x85, 0x89, 0xe2, 0x46, 1010 0x80, 0x57, 0xa3, 0x07, 0xdc, 0x18, 0xa5, 0x02) 1011 }, 1012 { }, 1013 }; 1014 1015 MODULE_DEVICE_TABLE(vmbus, id_table); 1016 1017 static struct hv_driver balloon_drv = { 1018 .name = "hv_balloon", 1019 .id_table = id_table, 1020 .probe = balloon_probe, 1021 .remove = balloon_remove, 1022 }; 1023 1024 static int __init init_balloon_drv(void) 1025 { 1026 1027 return vmbus_driver_register(&balloon_drv); 1028 } 1029 1030 static void exit_balloon_drv(void) 1031 { 1032 1033 vmbus_driver_unregister(&balloon_drv); 1034 } 1035 1036 module_init(init_balloon_drv); 1037 module_exit(exit_balloon_drv); 1038 1039 MODULE_DESCRIPTION("Hyper-V Balloon"); 1040 MODULE_VERSION(HV_DRV_VERSION); 1041 MODULE_LICENSE("GPL"); 1042