1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * VMware Balloon driver. 4 * 5 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved. 6 * 7 * This is VMware physical memory management driver for Linux. The driver 8 * acts like a "balloon" that can be inflated to reclaim physical pages by 9 * reserving them in the guest and invalidating them in the monitor, 10 * freeing up the underlying machine pages so they can be allocated to 11 * other guests. The balloon can also be deflated to allow the guest to 12 * use more physical memory. Higher level policies can control the sizes 13 * of balloons in VMs in order to manage physical memory resources. 14 */ 15 16 //#define DEBUG 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <linux/types.h> 20 #include <linux/kernel.h> 21 #include <linux/mm.h> 22 #include <linux/vmalloc.h> 23 #include <linux/sched.h> 24 #include <linux/module.h> 25 #include <linux/workqueue.h> 26 #include <linux/debugfs.h> 27 #include <linux/seq_file.h> 28 #include <linux/rwsem.h> 29 #include <linux/slab.h> 30 #include <linux/spinlock.h> 31 #include <linux/vmw_vmci_defs.h> 32 #include <linux/vmw_vmci_api.h> 33 #include <asm/hypervisor.h> 34 35 MODULE_AUTHOR("VMware, Inc."); 36 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); 37 MODULE_VERSION("1.5.0.0-k"); 38 MODULE_ALIAS("dmi:*:svnVMware*:*"); 39 MODULE_ALIAS("vmware_vmmemctl"); 40 MODULE_LICENSE("GPL"); 41 42 /* 43 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't allow wait 44 * (__GFP_RECLAIM) for huge page allocations. Use __GFP_NOWARN, to suppress page 45 * allocation failure warnings. Disallow access to emergency low-memory pools. 46 */ 47 #define VMW_HUGE_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \ 48 __GFP_NOMEMALLOC) 49 50 /* 51 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We allow lightweight 52 * reclamation (__GFP_NORETRY). Use __GFP_NOWARN, to suppress page allocation 53 * failure warnings. Disallow access to emergency low-memory pools. 54 */ 55 #define VMW_PAGE_ALLOC_FLAGS (__GFP_HIGHMEM|__GFP_NOWARN| \ 56 __GFP_NOMEMALLOC|__GFP_NORETRY) 57 58 /* Maximum number of refused pages we accumulate during inflation cycle */ 59 #define VMW_BALLOON_MAX_REFUSED 16 60 61 /* 62 * Hypervisor communication port definitions. 63 */ 64 #define VMW_BALLOON_HV_PORT 0x5670 65 #define VMW_BALLOON_HV_MAGIC 0x456c6d6f 66 #define VMW_BALLOON_GUEST_ID 1 /* Linux */ 67 68 enum vmwballoon_capabilities { 69 /* 70 * Bit 0 is reserved and not associated to any capability. 71 */ 72 VMW_BALLOON_BASIC_CMDS = (1 << 1), 73 VMW_BALLOON_BATCHED_CMDS = (1 << 2), 74 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), 75 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), 76 }; 77 78 #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \ 79 | VMW_BALLOON_BATCHED_CMDS \ 80 | VMW_BALLOON_BATCHED_2M_CMDS \ 81 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) 82 83 #define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT) 84 85 enum vmballoon_page_size_type { 86 VMW_BALLOON_4K_PAGE, 87 VMW_BALLOON_2M_PAGE, 88 VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE 89 }; 90 91 #define VMW_BALLOON_NUM_PAGE_SIZES (VMW_BALLOON_LAST_SIZE + 1) 92 93 static const char * const vmballoon_page_size_names[] = { 94 [VMW_BALLOON_4K_PAGE] = "4k", 95 [VMW_BALLOON_2M_PAGE] = "2M" 96 }; 97 98 enum vmballoon_op { 99 VMW_BALLOON_INFLATE, 100 VMW_BALLOON_DEFLATE 101 }; 102 103 enum vmballoon_op_stat_type { 104 VMW_BALLOON_OP_STAT, 105 VMW_BALLOON_OP_FAIL_STAT 106 }; 107 108 #define VMW_BALLOON_OP_STAT_TYPES (VMW_BALLOON_OP_FAIL_STAT + 1) 109 110 /** 111 * enum vmballoon_cmd_type - backdoor commands. 112 * 113 * Availability of the commands is as followed: 114 * 115 * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and 116 * %VMW_BALLOON_CMD_GUEST_ID are always available. 117 * 118 * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then 119 * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available. 120 * 121 * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then 122 * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands 123 * are available. 124 * 125 * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then 126 * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 127 * are supported. 128 * 129 * If the host reports VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then 130 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported. 131 * 132 * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor. 133 * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size. 134 * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page. 135 * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about 136 * to be deflated from the balloon. 137 * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that 138 * runs in the VM. 139 * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of 140 * ballooned pages (up to 512). 141 * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of 142 * pages that are about to be deflated from the 143 * balloon (up to 512). 144 * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK 145 * for 2MB pages. 146 * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to 147 * @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB 148 * pages. 149 * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification 150 * that would be invoked when the balloon 151 * size changes. 152 * @VMW_BALLOON_CMD_LAST: Value of the last command. 153 */ 154 enum vmballoon_cmd_type { 155 VMW_BALLOON_CMD_START, 156 VMW_BALLOON_CMD_GET_TARGET, 157 VMW_BALLOON_CMD_LOCK, 158 VMW_BALLOON_CMD_UNLOCK, 159 VMW_BALLOON_CMD_GUEST_ID, 160 /* No command 5 */ 161 VMW_BALLOON_CMD_BATCHED_LOCK = 6, 162 VMW_BALLOON_CMD_BATCHED_UNLOCK, 163 VMW_BALLOON_CMD_BATCHED_2M_LOCK, 164 VMW_BALLOON_CMD_BATCHED_2M_UNLOCK, 165 VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 166 VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 167 }; 168 169 #define VMW_BALLOON_CMD_NUM (VMW_BALLOON_CMD_LAST + 1) 170 171 enum vmballoon_error_codes { 172 VMW_BALLOON_SUCCESS, 173 VMW_BALLOON_ERROR_CMD_INVALID, 174 VMW_BALLOON_ERROR_PPN_INVALID, 175 VMW_BALLOON_ERROR_PPN_LOCKED, 176 VMW_BALLOON_ERROR_PPN_UNLOCKED, 177 VMW_BALLOON_ERROR_PPN_PINNED, 178 VMW_BALLOON_ERROR_PPN_NOTNEEDED, 179 VMW_BALLOON_ERROR_RESET, 180 VMW_BALLOON_ERROR_BUSY 181 }; 182 183 #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) 184 185 #define VMW_BALLOON_CMD_WITH_TARGET_MASK \ 186 ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \ 187 (1UL << VMW_BALLOON_CMD_LOCK) | \ 188 (1UL << VMW_BALLOON_CMD_UNLOCK) | \ 189 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \ 190 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \ 191 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \ 192 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK)) 193 194 static const char * const vmballoon_cmd_names[] = { 195 [VMW_BALLOON_CMD_START] = "start", 196 [VMW_BALLOON_CMD_GET_TARGET] = "target", 197 [VMW_BALLOON_CMD_LOCK] = "lock", 198 [VMW_BALLOON_CMD_UNLOCK] = "unlock", 199 [VMW_BALLOON_CMD_GUEST_ID] = "guestType", 200 [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock", 201 [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock", 202 [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock", 203 [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock", 204 [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet" 205 }; 206 207 enum vmballoon_stat_page { 208 VMW_BALLOON_PAGE_STAT_ALLOC, 209 VMW_BALLOON_PAGE_STAT_ALLOC_FAIL, 210 VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC, 211 VMW_BALLOON_PAGE_STAT_REFUSED_FREE, 212 VMW_BALLOON_PAGE_STAT_FREE, 213 VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE 214 }; 215 216 #define VMW_BALLOON_PAGE_STAT_NUM (VMW_BALLOON_PAGE_STAT_LAST + 1) 217 218 enum vmballoon_stat_general { 219 VMW_BALLOON_STAT_TIMER, 220 VMW_BALLOON_STAT_DOORBELL, 221 VMW_BALLOON_STAT_RESET, 222 VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_RESET 223 }; 224 225 #define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1) 226 227 228 static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching); 229 static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled); 230 231 struct vmballoon_ctl { 232 struct list_head pages; 233 struct list_head refused_pages; 234 unsigned int n_refused_pages; 235 unsigned int n_pages; 236 enum vmballoon_page_size_type page_size; 237 enum vmballoon_op op; 238 }; 239 240 struct vmballoon_page_size { 241 /* list of reserved physical pages */ 242 struct list_head pages; 243 }; 244 245 /** 246 * struct vmballoon_batch_entry - a batch entry for lock or unlock. 247 * 248 * @status: the status of the operation, which is written by the hypervisor. 249 * @reserved: reserved for future use. Must be set to zero. 250 * @pfn: the physical frame number of the page to be locked or unlocked. 251 */ 252 struct vmballoon_batch_entry { 253 u64 status : 5; 254 u64 reserved : PAGE_SHIFT - 5; 255 u64 pfn : 52; 256 } __packed; 257 258 struct vmballoon { 259 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES]; 260 261 /** 262 * @max_page_size: maximum supported page size for ballooning. 263 * 264 * Protected by @conf_sem 265 */ 266 enum vmballoon_page_size_type max_page_size; 267 268 /** 269 * @size: balloon actual size in basic page size (frames). 270 * 271 * While we currently do not support size which is bigger than 32-bit, 272 * in preparation for future support, use 64-bits. 273 */ 274 atomic64_t size; 275 276 /** 277 * @target: balloon target size in basic page size (frames). 278 * 279 * We do not protect the target under the assumption that setting the 280 * value is always done through a single write. If this assumption ever 281 * breaks, we would have to use X_ONCE for accesses, and suffer the less 282 * optimized code. Although we may read stale target value if multiple 283 * accesses happen at once, the performance impact should be minor. 284 */ 285 unsigned long target; 286 287 /** 288 * @reset_required: reset flag 289 * 290 * Setting this flag may introduce races, but the code is expected to 291 * handle them gracefully. In the worst case, another operation will 292 * fail as reset did not take place. Clearing the flag is done while 293 * holding @conf_sem for write. 294 */ 295 bool reset_required; 296 297 /** 298 * @capabilities: hypervisor balloon capabilities. 299 * 300 * Protected by @conf_sem. 301 */ 302 unsigned long capabilities; 303 304 /** 305 * @batch_page: pointer to communication batch page. 306 * 307 * When batching is used, batch_page points to a page, which holds up to 308 * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking. 309 */ 310 struct vmballoon_batch_entry *batch_page; 311 312 /** 313 * @batch_max_pages: maximum pages that can be locked/unlocked. 314 * 315 * Indicates the number of pages that the hypervisor can lock or unlock 316 * at once, according to whether batching is enabled. If batching is 317 * disabled, only a single page can be locked/unlock on each operation. 318 * 319 * Protected by @conf_sem. 320 */ 321 unsigned int batch_max_pages; 322 323 /** 324 * @page: page to be locked/unlocked by the hypervisor 325 * 326 * @page is only used when batching is disabled and a single page is 327 * reclaimed on each iteration. 328 * 329 * Protected by @comm_lock. 330 */ 331 struct page *page; 332 333 /* statistics */ 334 struct vmballoon_stats *stats; 335 336 #ifdef CONFIG_DEBUG_FS 337 /* debugfs file exporting statistics */ 338 struct dentry *dbg_entry; 339 #endif 340 341 struct delayed_work dwork; 342 343 /** 344 * @vmci_doorbell. 345 * 346 * Protected by @conf_sem. 347 */ 348 struct vmci_handle vmci_doorbell; 349 350 /** 351 * @conf_sem: semaphore to protect the configuration and the statistics. 352 */ 353 struct rw_semaphore conf_sem; 354 355 /** 356 * @comm_lock: lock to protect the communication with the host. 357 * 358 * Lock ordering: @conf_sem -> @comm_lock . 359 */ 360 spinlock_t comm_lock; 361 }; 362 363 static struct vmballoon balloon; 364 365 struct vmballoon_stats { 366 /* timer / doorbell operations */ 367 atomic64_t general_stat[VMW_BALLOON_STAT_NUM]; 368 369 /* allocation statistics for huge and small pages */ 370 atomic64_t 371 page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES]; 372 373 /* Monitor operations: total operations, and failures */ 374 atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES]; 375 }; 376 377 static inline bool is_vmballoon_stats_on(void) 378 { 379 return IS_ENABLED(CONFIG_DEBUG_FS) && 380 static_branch_unlikely(&balloon_stat_enabled); 381 } 382 383 static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op, 384 enum vmballoon_op_stat_type type) 385 { 386 if (is_vmballoon_stats_on()) 387 atomic64_inc(&b->stats->ops[op][type]); 388 } 389 390 static inline void vmballoon_stats_gen_inc(struct vmballoon *b, 391 enum vmballoon_stat_general stat) 392 { 393 if (is_vmballoon_stats_on()) 394 atomic64_inc(&b->stats->general_stat[stat]); 395 } 396 397 static inline void vmballoon_stats_gen_add(struct vmballoon *b, 398 enum vmballoon_stat_general stat, 399 unsigned int val) 400 { 401 if (is_vmballoon_stats_on()) 402 atomic64_add(val, &b->stats->general_stat[stat]); 403 } 404 405 static inline void vmballoon_stats_page_inc(struct vmballoon *b, 406 enum vmballoon_stat_page stat, 407 enum vmballoon_page_size_type size) 408 { 409 if (is_vmballoon_stats_on()) 410 atomic64_inc(&b->stats->page_stat[stat][size]); 411 } 412 413 static inline void vmballoon_stats_page_add(struct vmballoon *b, 414 enum vmballoon_stat_page stat, 415 enum vmballoon_page_size_type size, 416 unsigned int val) 417 { 418 if (is_vmballoon_stats_on()) 419 atomic64_add(val, &b->stats->page_stat[stat][size]); 420 } 421 422 static inline unsigned long 423 __vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, 424 unsigned long arg2, unsigned long *result) 425 { 426 unsigned long status, dummy1, dummy2, dummy3, local_result; 427 428 vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT); 429 430 asm volatile ("inl %%dx" : 431 "=a"(status), 432 "=c"(dummy1), 433 "=d"(dummy2), 434 "=b"(local_result), 435 "=S"(dummy3) : 436 "0"(VMW_BALLOON_HV_MAGIC), 437 "1"(cmd), 438 "2"(VMW_BALLOON_HV_PORT), 439 "3"(arg1), 440 "4"(arg2) : 441 "memory"); 442 443 /* update the result if needed */ 444 if (result) 445 *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 : 446 local_result; 447 448 /* update target when applicable */ 449 if (status == VMW_BALLOON_SUCCESS && 450 ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK)) 451 WRITE_ONCE(b->target, local_result); 452 453 if (status != VMW_BALLOON_SUCCESS && 454 status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) { 455 vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT); 456 pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n", 457 __func__, vmballoon_cmd_names[cmd], arg1, arg2, 458 status); 459 } 460 461 /* mark reset required accordingly */ 462 if (status == VMW_BALLOON_ERROR_RESET) 463 b->reset_required = true; 464 465 return status; 466 } 467 468 static __always_inline unsigned long 469 vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, 470 unsigned long arg2) 471 { 472 unsigned long dummy; 473 474 return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy); 475 } 476 477 /* 478 * Send "start" command to the host, communicating supported version 479 * of the protocol. 480 */ 481 static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) 482 { 483 unsigned long status, capabilities; 484 485 status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0, 486 &capabilities); 487 488 switch (status) { 489 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: 490 b->capabilities = capabilities; 491 break; 492 case VMW_BALLOON_SUCCESS: 493 b->capabilities = VMW_BALLOON_BASIC_CMDS; 494 break; 495 default: 496 return -EIO; 497 } 498 499 /* 500 * 2MB pages are only supported with batching. If batching is for some 501 * reason disabled, do not use 2MB pages, since otherwise the legacy 502 * mechanism is used with 2MB pages, causing a failure. 503 */ 504 b->max_page_size = VMW_BALLOON_4K_PAGE; 505 if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) && 506 (b->capabilities & VMW_BALLOON_BATCHED_CMDS)) 507 b->max_page_size = VMW_BALLOON_2M_PAGE; 508 509 510 return 0; 511 } 512 513 /** 514 * vmballoon_send_guest_id - communicate guest type to the host. 515 * 516 * @b: pointer to the balloon. 517 * 518 * Communicate guest type to the host so that it can adjust ballooning 519 * algorithm to the one most appropriate for the guest. This command 520 * is normally issued after sending "start" command and is part of 521 * standard reset sequence. 522 * 523 * Return: zero on success or appropriate error code. 524 */ 525 static int vmballoon_send_guest_id(struct vmballoon *b) 526 { 527 unsigned long status; 528 529 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID, 530 VMW_BALLOON_GUEST_ID, 0); 531 532 return status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 533 } 534 535 /** 536 * vmballoon_page_order() - return the order of the page 537 * @page_size: the size of the page. 538 * 539 * Return: the allocation order. 540 */ 541 static inline 542 unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size) 543 { 544 return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0; 545 } 546 547 /** 548 * vmballoon_page_in_frames() - returns the number of frames in a page. 549 * @page_size: the size of the page. 550 * 551 * Return: the number of 4k frames. 552 */ 553 static inline unsigned int 554 vmballoon_page_in_frames(enum vmballoon_page_size_type page_size) 555 { 556 return 1 << vmballoon_page_order(page_size); 557 } 558 559 /** 560 * vmballoon_mark_page_offline() - mark a page as offline 561 * @page: pointer for the page. 562 * @page_size: the size of the page. 563 */ 564 static void 565 vmballoon_mark_page_offline(struct page *page, 566 enum vmballoon_page_size_type page_size) 567 { 568 int i; 569 570 for (i = 0; i < vmballoon_page_in_frames(page_size); i++) 571 __SetPageOffline(page + i); 572 } 573 574 /** 575 * vmballoon_mark_page_online() - mark a page as online 576 * @page: pointer for the page. 577 * @page_size: the size of the page. 578 */ 579 static void 580 vmballoon_mark_page_online(struct page *page, 581 enum vmballoon_page_size_type page_size) 582 { 583 int i; 584 585 for (i = 0; i < vmballoon_page_in_frames(page_size); i++) 586 __ClearPageOffline(page + i); 587 } 588 589 /** 590 * vmballoon_send_get_target() - Retrieve desired balloon size from the host. 591 * 592 * @b: pointer to the balloon. 593 * 594 * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required 595 * by the host-guest protocol and EIO if an error occurred in communicating with 596 * the host. 597 */ 598 static int vmballoon_send_get_target(struct vmballoon *b) 599 { 600 unsigned long status; 601 unsigned long limit; 602 603 limit = totalram_pages(); 604 605 /* Ensure limit fits in 32-bits */ 606 if (limit != (u32)limit) 607 return -EINVAL; 608 609 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0); 610 611 return status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 612 } 613 614 /** 615 * vmballoon_alloc_page_list - allocates a list of pages. 616 * 617 * @b: pointer to the balloon. 618 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. 619 * @req_n_pages: the number of requested pages. 620 * 621 * Tries to allocate @req_n_pages. Add them to the list of balloon pages in 622 * @ctl.pages and updates @ctl.n_pages to reflect the number of pages. 623 * 624 * Return: zero on success or error code otherwise. 625 */ 626 static int vmballoon_alloc_page_list(struct vmballoon *b, 627 struct vmballoon_ctl *ctl, 628 unsigned int req_n_pages) 629 { 630 struct page *page; 631 unsigned int i; 632 633 for (i = 0; i < req_n_pages; i++) { 634 if (ctl->page_size == VMW_BALLOON_2M_PAGE) 635 page = alloc_pages(VMW_HUGE_PAGE_ALLOC_FLAGS, 636 VMW_BALLOON_2M_ORDER); 637 else 638 page = alloc_page(VMW_PAGE_ALLOC_FLAGS); 639 640 /* Update statistics */ 641 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC, 642 ctl->page_size); 643 644 if (page) { 645 vmballoon_mark_page_offline(page, ctl->page_size); 646 /* Success. Add the page to the list and continue. */ 647 list_add(&page->lru, &ctl->pages); 648 continue; 649 } 650 651 /* Allocation failed. Update statistics and stop. */ 652 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL, 653 ctl->page_size); 654 break; 655 } 656 657 ctl->n_pages = i; 658 659 return req_n_pages == ctl->n_pages ? 0 : -ENOMEM; 660 } 661 662 /** 663 * vmballoon_handle_one_result - Handle lock/unlock result for a single page. 664 * 665 * @b: pointer for %struct vmballoon. 666 * @page: pointer for the page whose result should be handled. 667 * @page_size: size of the page. 668 * @status: status of the operation as provided by the hypervisor. 669 */ 670 static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page, 671 enum vmballoon_page_size_type page_size, 672 unsigned long status) 673 { 674 /* On success do nothing. The page is already on the balloon list. */ 675 if (likely(status == VMW_BALLOON_SUCCESS)) 676 return 0; 677 678 pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__, 679 page_to_pfn(page), status, 680 vmballoon_page_size_names[page_size]); 681 682 /* Error occurred */ 683 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC, 684 page_size); 685 686 return -EIO; 687 } 688 689 /** 690 * vmballoon_status_page - returns the status of (un)lock operation 691 * 692 * @b: pointer to the balloon. 693 * @idx: index for the page for which the operation is performed. 694 * @p: pointer to where the page struct is returned. 695 * 696 * Following a lock or unlock operation, returns the status of the operation for 697 * an individual page. Provides the page that the operation was performed on on 698 * the @page argument. 699 * 700 * Returns: The status of a lock or unlock operation for an individual page. 701 */ 702 static unsigned long vmballoon_status_page(struct vmballoon *b, int idx, 703 struct page **p) 704 { 705 if (static_branch_likely(&vmw_balloon_batching)) { 706 /* batching mode */ 707 *p = pfn_to_page(b->batch_page[idx].pfn); 708 return b->batch_page[idx].status; 709 } 710 711 /* non-batching mode */ 712 *p = b->page; 713 714 /* 715 * If a failure occurs, the indication will be provided in the status 716 * of the entire operation, which is considered before the individual 717 * page status. So for non-batching mode, the indication is always of 718 * success. 719 */ 720 return VMW_BALLOON_SUCCESS; 721 } 722 723 /** 724 * vmballoon_lock_op - notifies the host about inflated/deflated pages. 725 * @b: pointer to the balloon. 726 * @num_pages: number of inflated/deflated pages. 727 * @page_size: size of the page. 728 * @op: the type of operation (lock or unlock). 729 * 730 * Notify the host about page(s) that were ballooned (or removed from the 731 * balloon) so that host can use it without fear that guest will need it (or 732 * stop using them since the VM does). Host may reject some pages, we need to 733 * check the return value and maybe submit a different page. The pages that are 734 * inflated/deflated are pointed by @b->page. 735 * 736 * Return: result as provided by the hypervisor. 737 */ 738 static unsigned long vmballoon_lock_op(struct vmballoon *b, 739 unsigned int num_pages, 740 enum vmballoon_page_size_type page_size, 741 enum vmballoon_op op) 742 { 743 unsigned long cmd, pfn; 744 745 lockdep_assert_held(&b->comm_lock); 746 747 if (static_branch_likely(&vmw_balloon_batching)) { 748 if (op == VMW_BALLOON_INFLATE) 749 cmd = page_size == VMW_BALLOON_2M_PAGE ? 750 VMW_BALLOON_CMD_BATCHED_2M_LOCK : 751 VMW_BALLOON_CMD_BATCHED_LOCK; 752 else 753 cmd = page_size == VMW_BALLOON_2M_PAGE ? 754 VMW_BALLOON_CMD_BATCHED_2M_UNLOCK : 755 VMW_BALLOON_CMD_BATCHED_UNLOCK; 756 757 pfn = PHYS_PFN(virt_to_phys(b->batch_page)); 758 } else { 759 cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK : 760 VMW_BALLOON_CMD_UNLOCK; 761 pfn = page_to_pfn(b->page); 762 763 /* In non-batching mode, PFNs must fit in 32-bit */ 764 if (unlikely(pfn != (u32)pfn)) 765 return VMW_BALLOON_ERROR_PPN_INVALID; 766 } 767 768 return vmballoon_cmd(b, cmd, pfn, num_pages); 769 } 770 771 /** 772 * vmballoon_add_page - adds a page towards lock/unlock operation. 773 * 774 * @b: pointer to the balloon. 775 * @idx: index of the page to be ballooned in this batch. 776 * @p: pointer to the page that is about to be ballooned. 777 * 778 * Adds the page to be ballooned. Must be called while holding @comm_lock. 779 */ 780 static void vmballoon_add_page(struct vmballoon *b, unsigned int idx, 781 struct page *p) 782 { 783 lockdep_assert_held(&b->comm_lock); 784 785 if (static_branch_likely(&vmw_balloon_batching)) 786 b->batch_page[idx] = (struct vmballoon_batch_entry) 787 { .pfn = page_to_pfn(p) }; 788 else 789 b->page = p; 790 } 791 792 /** 793 * vmballoon_lock - lock or unlock a batch of pages. 794 * 795 * @b: pointer to the balloon. 796 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. 797 * 798 * Notifies the host of about ballooned pages (after inflation or deflation, 799 * according to @ctl). If the host rejects the page put it on the 800 * @ctl refuse list. These refused page are then released when moving to the 801 * next size of pages. 802 * 803 * Note that we neither free any @page here nor put them back on the ballooned 804 * pages list. Instead we queue it for later processing. We do that for several 805 * reasons. First, we do not want to free the page under the lock. Second, it 806 * allows us to unify the handling of lock and unlock. In the inflate case, the 807 * caller will check if there are too many refused pages and release them. 808 * Although it is not identical to the past behavior, it should not affect 809 * performance. 810 */ 811 static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl) 812 { 813 unsigned long batch_status; 814 struct page *page; 815 unsigned int i, num_pages; 816 817 num_pages = ctl->n_pages; 818 if (num_pages == 0) 819 return 0; 820 821 /* communication with the host is done under the communication lock */ 822 spin_lock(&b->comm_lock); 823 824 i = 0; 825 list_for_each_entry(page, &ctl->pages, lru) 826 vmballoon_add_page(b, i++, page); 827 828 batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size, 829 ctl->op); 830 831 /* 832 * Iterate over the pages in the provided list. Since we are changing 833 * @ctl->n_pages we are saving the original value in @num_pages and 834 * use this value to bound the loop. 835 */ 836 for (i = 0; i < num_pages; i++) { 837 unsigned long status; 838 839 status = vmballoon_status_page(b, i, &page); 840 841 /* 842 * Failure of the whole batch overrides a single operation 843 * results. 844 */ 845 if (batch_status != VMW_BALLOON_SUCCESS) 846 status = batch_status; 847 848 /* Continue if no error happened */ 849 if (!vmballoon_handle_one_result(b, page, ctl->page_size, 850 status)) 851 continue; 852 853 /* 854 * Error happened. Move the pages to the refused list and update 855 * the pages number. 856 */ 857 list_move(&page->lru, &ctl->refused_pages); 858 ctl->n_pages--; 859 ctl->n_refused_pages++; 860 } 861 862 spin_unlock(&b->comm_lock); 863 864 return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 865 } 866 867 /** 868 * vmballoon_release_page_list() - Releases a page list 869 * 870 * @page_list: list of pages to release. 871 * @n_pages: pointer to the number of pages. 872 * @page_size: whether the pages in the list are 2MB (or else 4KB). 873 * 874 * Releases the list of pages and zeros the number of pages. 875 */ 876 static void vmballoon_release_page_list(struct list_head *page_list, 877 int *n_pages, 878 enum vmballoon_page_size_type page_size) 879 { 880 struct page *page, *tmp; 881 882 list_for_each_entry_safe(page, tmp, page_list, lru) { 883 list_del(&page->lru); 884 vmballoon_mark_page_online(page, page_size); 885 __free_pages(page, vmballoon_page_order(page_size)); 886 } 887 888 *n_pages = 0; 889 } 890 891 892 /* 893 * Release pages that were allocated while attempting to inflate the 894 * balloon but were refused by the host for one reason or another. 895 */ 896 static void vmballoon_release_refused_pages(struct vmballoon *b, 897 struct vmballoon_ctl *ctl) 898 { 899 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE, 900 ctl->page_size); 901 902 vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages, 903 ctl->page_size); 904 } 905 906 /** 907 * vmballoon_change - retrieve the required balloon change 908 * 909 * @b: pointer for the balloon. 910 * 911 * Return: the required change for the balloon size. A positive number 912 * indicates inflation, a negative number indicates a deflation. 913 */ 914 static int64_t vmballoon_change(struct vmballoon *b) 915 { 916 int64_t size, target; 917 918 size = atomic64_read(&b->size); 919 target = READ_ONCE(b->target); 920 921 /* 922 * We must cast first because of int sizes 923 * Otherwise we might get huge positives instead of negatives 924 */ 925 926 if (b->reset_required) 927 return 0; 928 929 /* consider a 2MB slack on deflate, unless the balloon is emptied */ 930 if (target < size && target != 0 && 931 size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)) 932 return 0; 933 934 return target - size; 935 } 936 937 /** 938 * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation. 939 * 940 * @b: pointer to balloon. 941 * @pages: list of pages to enqueue. 942 * @n_pages: pointer to number of pages in list. The value is zeroed. 943 * @page_size: whether the pages are 2MB or 4KB pages. 944 * 945 * Enqueues the provides list of pages in the ballooned page list, clears the 946 * list and zeroes the number of pages that was provided. 947 */ 948 static void vmballoon_enqueue_page_list(struct vmballoon *b, 949 struct list_head *pages, 950 unsigned int *n_pages, 951 enum vmballoon_page_size_type page_size) 952 { 953 struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size]; 954 955 list_splice_init(pages, &page_size_info->pages); 956 *n_pages = 0; 957 } 958 959 /** 960 * vmballoon_dequeue_page_list() - Dequeues page lists for deflation. 961 * 962 * @b: pointer to balloon. 963 * @pages: list of pages to enqueue. 964 * @n_pages: pointer to number of pages in list. The value is zeroed. 965 * @page_size: whether the pages are 2MB or 4KB pages. 966 * @n_req_pages: the number of requested pages. 967 * 968 * Dequeues the number of requested pages from the balloon for deflation. The 969 * number of dequeued pages may be lower, if not enough pages in the requested 970 * size are available. 971 */ 972 static void vmballoon_dequeue_page_list(struct vmballoon *b, 973 struct list_head *pages, 974 unsigned int *n_pages, 975 enum vmballoon_page_size_type page_size, 976 unsigned int n_req_pages) 977 { 978 struct vmballoon_page_size *page_size_info = &b->page_sizes[page_size]; 979 struct page *page, *tmp; 980 unsigned int i = 0; 981 982 list_for_each_entry_safe(page, tmp, &page_size_info->pages, lru) { 983 list_move(&page->lru, pages); 984 if (++i == n_req_pages) 985 break; 986 } 987 *n_pages = i; 988 } 989 990 /** 991 * vmballoon_inflate() - Inflate the balloon towards its target size. 992 * 993 * @b: pointer to the balloon. 994 */ 995 static void vmballoon_inflate(struct vmballoon *b) 996 { 997 int64_t to_inflate_frames; 998 struct vmballoon_ctl ctl = { 999 .pages = LIST_HEAD_INIT(ctl.pages), 1000 .refused_pages = LIST_HEAD_INIT(ctl.refused_pages), 1001 .page_size = b->max_page_size, 1002 .op = VMW_BALLOON_INFLATE 1003 }; 1004 1005 while ((to_inflate_frames = vmballoon_change(b)) > 0) { 1006 unsigned int to_inflate_pages, page_in_frames; 1007 int alloc_error, lock_error = 0; 1008 1009 VM_BUG_ON(!list_empty(&ctl.pages)); 1010 VM_BUG_ON(ctl.n_pages != 0); 1011 1012 page_in_frames = vmballoon_page_in_frames(ctl.page_size); 1013 1014 to_inflate_pages = min_t(unsigned long, b->batch_max_pages, 1015 DIV_ROUND_UP_ULL(to_inflate_frames, 1016 page_in_frames)); 1017 1018 /* Start by allocating */ 1019 alloc_error = vmballoon_alloc_page_list(b, &ctl, 1020 to_inflate_pages); 1021 1022 /* Actually lock the pages by telling the hypervisor */ 1023 lock_error = vmballoon_lock(b, &ctl); 1024 1025 /* 1026 * If an error indicates that something serious went wrong, 1027 * stop the inflation. 1028 */ 1029 if (lock_error) 1030 break; 1031 1032 /* Update the balloon size */ 1033 atomic64_add(ctl.n_pages * page_in_frames, &b->size); 1034 1035 vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages, 1036 ctl.page_size); 1037 1038 /* 1039 * If allocation failed or the number of refused pages exceeds 1040 * the maximum allowed, move to the next page size. 1041 */ 1042 if (alloc_error || 1043 ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) { 1044 if (ctl.page_size == VMW_BALLOON_4K_PAGE) 1045 break; 1046 1047 /* 1048 * Ignore errors from locking as we now switch to 4k 1049 * pages and we might get different errors. 1050 */ 1051 vmballoon_release_refused_pages(b, &ctl); 1052 ctl.page_size--; 1053 } 1054 1055 cond_resched(); 1056 } 1057 1058 /* 1059 * Release pages that were allocated while attempting to inflate the 1060 * balloon but were refused by the host for one reason or another, 1061 * and update the statistics. 1062 */ 1063 if (ctl.n_refused_pages != 0) 1064 vmballoon_release_refused_pages(b, &ctl); 1065 } 1066 1067 /** 1068 * vmballoon_deflate() - Decrease the size of the balloon. 1069 * 1070 * @b: pointer to the balloon 1071 * @n_frames: the number of frames to deflate. If zero, automatically 1072 * calculated according to the target size. 1073 * @coordinated: whether to coordinate with the host 1074 * 1075 * Decrease the size of the balloon allowing guest to use more memory. 1076 * 1077 * Return: The number of deflated frames (i.e., basic page size units) 1078 */ 1079 static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames, 1080 bool coordinated) 1081 { 1082 unsigned long deflated_frames = 0; 1083 unsigned long tried_frames = 0; 1084 struct vmballoon_ctl ctl = { 1085 .pages = LIST_HEAD_INIT(ctl.pages), 1086 .refused_pages = LIST_HEAD_INIT(ctl.refused_pages), 1087 .page_size = VMW_BALLOON_4K_PAGE, 1088 .op = VMW_BALLOON_DEFLATE 1089 }; 1090 1091 /* free pages to reach target */ 1092 while (true) { 1093 unsigned int to_deflate_pages, n_unlocked_frames; 1094 unsigned int page_in_frames; 1095 int64_t to_deflate_frames; 1096 bool deflated_all; 1097 1098 page_in_frames = vmballoon_page_in_frames(ctl.page_size); 1099 1100 VM_BUG_ON(!list_empty(&ctl.pages)); 1101 VM_BUG_ON(ctl.n_pages); 1102 VM_BUG_ON(!list_empty(&ctl.refused_pages)); 1103 VM_BUG_ON(ctl.n_refused_pages); 1104 1105 /* 1106 * If we were requested a specific number of frames, we try to 1107 * deflate this number of frames. Otherwise, deflation is 1108 * performed according to the target and balloon size. 1109 */ 1110 to_deflate_frames = n_frames ? n_frames - tried_frames : 1111 -vmballoon_change(b); 1112 1113 /* break if no work to do */ 1114 if (to_deflate_frames <= 0) 1115 break; 1116 1117 /* 1118 * Calculate the number of frames based on current page size, 1119 * but limit the deflated frames to a single chunk 1120 */ 1121 to_deflate_pages = min_t(unsigned long, b->batch_max_pages, 1122 DIV_ROUND_UP_ULL(to_deflate_frames, 1123 page_in_frames)); 1124 1125 /* First take the pages from the balloon pages. */ 1126 vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages, 1127 ctl.page_size, to_deflate_pages); 1128 1129 /* 1130 * Before pages are moving to the refused list, count their 1131 * frames as frames that we tried to deflate. 1132 */ 1133 tried_frames += ctl.n_pages * page_in_frames; 1134 1135 /* 1136 * Unlock the pages by communicating with the hypervisor if the 1137 * communication is coordinated (i.e., not pop). We ignore the 1138 * return code. Instead we check if all the pages we manage to 1139 * unlock all the pages. If we failed, we will move to the next 1140 * page size, and would eventually try again later. 1141 */ 1142 if (coordinated) 1143 vmballoon_lock(b, &ctl); 1144 1145 /* 1146 * Check if we deflated enough. We will move to the next page 1147 * size if we did not manage to do so. This calculation takes 1148 * place now, as once the pages are released, the number of 1149 * pages is zeroed. 1150 */ 1151 deflated_all = (ctl.n_pages == to_deflate_pages); 1152 1153 /* Update local and global counters */ 1154 n_unlocked_frames = ctl.n_pages * page_in_frames; 1155 atomic64_sub(n_unlocked_frames, &b->size); 1156 deflated_frames += n_unlocked_frames; 1157 1158 vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE, 1159 ctl.page_size, ctl.n_pages); 1160 1161 /* free the ballooned pages */ 1162 vmballoon_release_page_list(&ctl.pages, &ctl.n_pages, 1163 ctl.page_size); 1164 1165 /* Return the refused pages to the ballooned list. */ 1166 vmballoon_enqueue_page_list(b, &ctl.refused_pages, 1167 &ctl.n_refused_pages, 1168 ctl.page_size); 1169 1170 /* If we failed to unlock all the pages, move to next size. */ 1171 if (!deflated_all) { 1172 if (ctl.page_size == b->max_page_size) 1173 break; 1174 ctl.page_size++; 1175 } 1176 1177 cond_resched(); 1178 } 1179 1180 return deflated_frames; 1181 } 1182 1183 /** 1184 * vmballoon_deinit_batching - disables batching mode. 1185 * 1186 * @b: pointer to &struct vmballoon. 1187 * 1188 * Disables batching, by deallocating the page for communication with the 1189 * hypervisor and disabling the static key to indicate that batching is off. 1190 */ 1191 static void vmballoon_deinit_batching(struct vmballoon *b) 1192 { 1193 free_page((unsigned long)b->batch_page); 1194 b->batch_page = NULL; 1195 static_branch_disable(&vmw_balloon_batching); 1196 b->batch_max_pages = 1; 1197 } 1198 1199 /** 1200 * vmballoon_init_batching - enable batching mode. 1201 * 1202 * @b: pointer to &struct vmballoon. 1203 * 1204 * Enables batching, by allocating a page for communication with the hypervisor 1205 * and enabling the static_key to use batching. 1206 * 1207 * Return: zero on success or an appropriate error-code. 1208 */ 1209 static int vmballoon_init_batching(struct vmballoon *b) 1210 { 1211 struct page *page; 1212 1213 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 1214 if (!page) 1215 return -ENOMEM; 1216 1217 b->batch_page = page_address(page); 1218 b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry); 1219 1220 static_branch_enable(&vmw_balloon_batching); 1221 1222 return 0; 1223 } 1224 1225 /* 1226 * Receive notification and resize balloon 1227 */ 1228 static void vmballoon_doorbell(void *client_data) 1229 { 1230 struct vmballoon *b = client_data; 1231 1232 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL); 1233 1234 mod_delayed_work(system_freezable_wq, &b->dwork, 0); 1235 } 1236 1237 /* 1238 * Clean up vmci doorbell 1239 */ 1240 static void vmballoon_vmci_cleanup(struct vmballoon *b) 1241 { 1242 vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 1243 VMCI_INVALID_ID, VMCI_INVALID_ID); 1244 1245 if (!vmci_handle_is_invalid(b->vmci_doorbell)) { 1246 vmci_doorbell_destroy(b->vmci_doorbell); 1247 b->vmci_doorbell = VMCI_INVALID_HANDLE; 1248 } 1249 } 1250 1251 /** 1252 * vmballoon_vmci_init - Initialize vmci doorbell. 1253 * 1254 * @b: pointer to the balloon. 1255 * 1256 * Return: zero on success or when wakeup command not supported. Error-code 1257 * otherwise. 1258 * 1259 * Initialize vmci doorbell, to get notified as soon as balloon changes. 1260 */ 1261 static int vmballoon_vmci_init(struct vmballoon *b) 1262 { 1263 unsigned long error; 1264 1265 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0) 1266 return 0; 1267 1268 error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB, 1269 VMCI_PRIVILEGE_FLAG_RESTRICTED, 1270 vmballoon_doorbell, b); 1271 1272 if (error != VMCI_SUCCESS) 1273 goto fail; 1274 1275 error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 1276 b->vmci_doorbell.context, 1277 b->vmci_doorbell.resource, NULL); 1278 1279 if (error != VMW_BALLOON_SUCCESS) 1280 goto fail; 1281 1282 return 0; 1283 fail: 1284 vmballoon_vmci_cleanup(b); 1285 return -EIO; 1286 } 1287 1288 /** 1289 * vmballoon_pop - Quickly release all pages allocate for the balloon. 1290 * 1291 * @b: pointer to the balloon. 1292 * 1293 * This function is called when host decides to "reset" balloon for one reason 1294 * or another. Unlike normal "deflate" we do not (shall not) notify host of the 1295 * pages being released. 1296 */ 1297 static void vmballoon_pop(struct vmballoon *b) 1298 { 1299 unsigned long size; 1300 1301 while ((size = atomic64_read(&b->size))) 1302 vmballoon_deflate(b, size, false); 1303 } 1304 1305 /* 1306 * Perform standard reset sequence by popping the balloon (in case it 1307 * is not empty) and then restarting protocol. This operation normally 1308 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. 1309 */ 1310 static void vmballoon_reset(struct vmballoon *b) 1311 { 1312 int error; 1313 1314 down_write(&b->conf_sem); 1315 1316 vmballoon_vmci_cleanup(b); 1317 1318 /* free all pages, skipping monitor unlock */ 1319 vmballoon_pop(b); 1320 1321 if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) 1322 return; 1323 1324 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { 1325 if (vmballoon_init_batching(b)) { 1326 /* 1327 * We failed to initialize batching, inform the monitor 1328 * about it by sending a null capability. 1329 * 1330 * The guest will retry in one second. 1331 */ 1332 vmballoon_send_start(b, 0); 1333 return; 1334 } 1335 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { 1336 vmballoon_deinit_batching(b); 1337 } 1338 1339 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET); 1340 b->reset_required = false; 1341 1342 error = vmballoon_vmci_init(b); 1343 if (error) 1344 pr_err("failed to initialize vmci doorbell\n"); 1345 1346 if (vmballoon_send_guest_id(b)) 1347 pr_err("failed to send guest ID to the host\n"); 1348 1349 up_write(&b->conf_sem); 1350 } 1351 1352 /** 1353 * vmballoon_work - periodic balloon worker for reset, inflation and deflation. 1354 * 1355 * @work: pointer to the &work_struct which is provided by the workqueue. 1356 * 1357 * Resets the protocol if needed, gets the new size and adjusts balloon as 1358 * needed. Repeat in 1 sec. 1359 */ 1360 static void vmballoon_work(struct work_struct *work) 1361 { 1362 struct delayed_work *dwork = to_delayed_work(work); 1363 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); 1364 int64_t change = 0; 1365 1366 if (b->reset_required) 1367 vmballoon_reset(b); 1368 1369 down_read(&b->conf_sem); 1370 1371 /* 1372 * Update the stats while holding the semaphore to ensure that 1373 * @stats_enabled is consistent with whether the stats are actually 1374 * enabled 1375 */ 1376 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER); 1377 1378 if (!vmballoon_send_get_target(b)) 1379 change = vmballoon_change(b); 1380 1381 if (change != 0) { 1382 pr_debug("%s - size: %llu, target %lu\n", __func__, 1383 atomic64_read(&b->size), READ_ONCE(b->target)); 1384 1385 if (change > 0) 1386 vmballoon_inflate(b); 1387 else /* (change < 0) */ 1388 vmballoon_deflate(b, 0, true); 1389 } 1390 1391 up_read(&b->conf_sem); 1392 1393 /* 1394 * We are using a freezable workqueue so that balloon operations are 1395 * stopped while the system transitions to/from sleep/hibernation. 1396 */ 1397 queue_delayed_work(system_freezable_wq, 1398 dwork, round_jiffies_relative(HZ)); 1399 1400 } 1401 1402 /* 1403 * DEBUGFS Interface 1404 */ 1405 #ifdef CONFIG_DEBUG_FS 1406 1407 static const char * const vmballoon_stat_page_names[] = { 1408 [VMW_BALLOON_PAGE_STAT_ALLOC] = "alloc", 1409 [VMW_BALLOON_PAGE_STAT_ALLOC_FAIL] = "allocFail", 1410 [VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC] = "errAlloc", 1411 [VMW_BALLOON_PAGE_STAT_REFUSED_FREE] = "errFree", 1412 [VMW_BALLOON_PAGE_STAT_FREE] = "free" 1413 }; 1414 1415 static const char * const vmballoon_stat_names[] = { 1416 [VMW_BALLOON_STAT_TIMER] = "timer", 1417 [VMW_BALLOON_STAT_DOORBELL] = "doorbell", 1418 [VMW_BALLOON_STAT_RESET] = "reset", 1419 }; 1420 1421 static int vmballoon_enable_stats(struct vmballoon *b) 1422 { 1423 int r = 0; 1424 1425 down_write(&b->conf_sem); 1426 1427 /* did we somehow race with another reader which enabled stats? */ 1428 if (b->stats) 1429 goto out; 1430 1431 b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL); 1432 1433 if (!b->stats) { 1434 /* allocation failed */ 1435 r = -ENOMEM; 1436 goto out; 1437 } 1438 static_key_enable(&balloon_stat_enabled.key); 1439 out: 1440 up_write(&b->conf_sem); 1441 return r; 1442 } 1443 1444 /** 1445 * vmballoon_debug_show - shows statistics of balloon operations. 1446 * @f: pointer to the &struct seq_file. 1447 * @offset: ignored. 1448 * 1449 * Provides the statistics that can be accessed in vmmemctl in the debugfs. 1450 * To avoid the overhead - mainly that of memory - of collecting the statistics, 1451 * we only collect statistics after the first time the counters are read. 1452 * 1453 * Return: zero on success or an error code. 1454 */ 1455 static int vmballoon_debug_show(struct seq_file *f, void *offset) 1456 { 1457 struct vmballoon *b = f->private; 1458 int i, j; 1459 1460 /* enables stats if they are disabled */ 1461 if (!b->stats) { 1462 int r = vmballoon_enable_stats(b); 1463 1464 if (r) 1465 return r; 1466 } 1467 1468 /* format capabilities info */ 1469 seq_printf(f, "%-22s: %#16x\n", "balloon capabilities", 1470 VMW_BALLOON_CAPABILITIES); 1471 seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities); 1472 seq_printf(f, "%-22s: %16s\n", "is resetting", 1473 b->reset_required ? "y" : "n"); 1474 1475 /* format size info */ 1476 seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target)); 1477 seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size)); 1478 1479 for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) { 1480 if (vmballoon_cmd_names[i] == NULL) 1481 continue; 1482 1483 seq_printf(f, "%-22s: %16llu (%llu failed)\n", 1484 vmballoon_cmd_names[i], 1485 atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]), 1486 atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT])); 1487 } 1488 1489 for (i = 0; i < VMW_BALLOON_STAT_NUM; i++) 1490 seq_printf(f, "%-22s: %16llu\n", 1491 vmballoon_stat_names[i], 1492 atomic64_read(&b->stats->general_stat[i])); 1493 1494 for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) { 1495 for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++) 1496 seq_printf(f, "%-18s(%s): %16llu\n", 1497 vmballoon_stat_page_names[i], 1498 vmballoon_page_size_names[j], 1499 atomic64_read(&b->stats->page_stat[i][j])); 1500 } 1501 1502 return 0; 1503 } 1504 1505 DEFINE_SHOW_ATTRIBUTE(vmballoon_debug); 1506 1507 static int __init vmballoon_debugfs_init(struct vmballoon *b) 1508 { 1509 int error; 1510 1511 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, 1512 &vmballoon_debug_fops); 1513 if (IS_ERR(b->dbg_entry)) { 1514 error = PTR_ERR(b->dbg_entry); 1515 pr_err("failed to create debugfs entry, error: %d\n", error); 1516 return error; 1517 } 1518 1519 return 0; 1520 } 1521 1522 static void __exit vmballoon_debugfs_exit(struct vmballoon *b) 1523 { 1524 static_key_disable(&balloon_stat_enabled.key); 1525 debugfs_remove(b->dbg_entry); 1526 kfree(b->stats); 1527 b->stats = NULL; 1528 } 1529 1530 #else 1531 1532 static inline int vmballoon_debugfs_init(struct vmballoon *b) 1533 { 1534 return 0; 1535 } 1536 1537 static inline void vmballoon_debugfs_exit(struct vmballoon *b) 1538 { 1539 } 1540 1541 #endif /* CONFIG_DEBUG_FS */ 1542 1543 static int __init vmballoon_init(void) 1544 { 1545 enum vmballoon_page_size_type page_size; 1546 int error; 1547 1548 /* 1549 * Check if we are running on VMware's hypervisor and bail out 1550 * if we are not. 1551 */ 1552 if (x86_hyper_type != X86_HYPER_VMWARE) 1553 return -ENODEV; 1554 1555 for (page_size = VMW_BALLOON_4K_PAGE; 1556 page_size <= VMW_BALLOON_LAST_SIZE; page_size++) 1557 INIT_LIST_HEAD(&balloon.page_sizes[page_size].pages); 1558 1559 1560 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); 1561 1562 error = vmballoon_debugfs_init(&balloon); 1563 if (error) 1564 return error; 1565 1566 spin_lock_init(&balloon.comm_lock); 1567 init_rwsem(&balloon.conf_sem); 1568 balloon.vmci_doorbell = VMCI_INVALID_HANDLE; 1569 balloon.batch_page = NULL; 1570 balloon.page = NULL; 1571 balloon.reset_required = true; 1572 1573 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); 1574 1575 return 0; 1576 } 1577 1578 /* 1579 * Using late_initcall() instead of module_init() allows the balloon to use the 1580 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the 1581 * VMCI is probed only after the balloon is initialized. If the balloon is used 1582 * as a module, late_initcall() is equivalent to module_init(). 1583 */ 1584 late_initcall(vmballoon_init); 1585 1586 static void __exit vmballoon_exit(void) 1587 { 1588 vmballoon_vmci_cleanup(&balloon); 1589 cancel_delayed_work_sync(&balloon.dwork); 1590 1591 vmballoon_debugfs_exit(&balloon); 1592 1593 /* 1594 * Deallocate all reserved memory, and reset connection with monitor. 1595 * Reset connection before deallocating memory to avoid potential for 1596 * additional spurious resets from guest touching deallocated pages. 1597 */ 1598 vmballoon_send_start(&balloon, 0); 1599 vmballoon_pop(&balloon); 1600 } 1601 module_exit(vmballoon_exit); 1602