1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * VMware Balloon driver. 4 * 5 * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved. 6 * 7 * This is VMware physical memory management driver for Linux. The driver 8 * acts like a "balloon" that can be inflated to reclaim physical pages by 9 * reserving them in the guest and invalidating them in the monitor, 10 * freeing up the underlying machine pages so they can be allocated to 11 * other guests. The balloon can also be deflated to allow the guest to 12 * use more physical memory. Higher level policies can control the sizes 13 * of balloons in VMs in order to manage physical memory resources. 14 */ 15 16 //#define DEBUG 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 19 #include <linux/types.h> 20 #include <linux/io.h> 21 #include <linux/kernel.h> 22 #include <linux/mm.h> 23 #include <linux/vmalloc.h> 24 #include <linux/sched.h> 25 #include <linux/module.h> 26 #include <linux/workqueue.h> 27 #include <linux/debugfs.h> 28 #include <linux/seq_file.h> 29 #include <linux/rwsem.h> 30 #include <linux/slab.h> 31 #include <linux/spinlock.h> 32 #include <linux/balloon.h> 33 #include <linux/vmw_vmci_defs.h> 34 #include <linux/vmw_vmci_api.h> 35 #include <asm/hypervisor.h> 36 37 MODULE_AUTHOR("VMware, Inc."); 38 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver"); 39 MODULE_ALIAS("dmi:*:svnVMware*:*"); 40 MODULE_ALIAS("vmware_vmmemctl"); 41 MODULE_LICENSE("GPL"); 42 43 static bool __read_mostly vmwballoon_shrinker_enable; 44 module_param(vmwballoon_shrinker_enable, bool, 0444); 45 MODULE_PARM_DESC(vmwballoon_shrinker_enable, 46 "Enable non-cooperative out-of-memory protection. Disabled by default as it may degrade performance."); 47 48 /* Delay in seconds after shrink before inflation. */ 49 #define VMBALLOON_SHRINK_DELAY (5) 50 51 /* Maximum number of refused pages we accumulate during inflation cycle */ 52 #define VMW_BALLOON_MAX_REFUSED 16 53 54 /* Magic number for the balloon mount-point */ 55 #define BALLOON_VMW_MAGIC 0x0ba11007 56 57 /* 58 * Hypervisor communication port definitions. 59 */ 60 #define VMW_BALLOON_HV_PORT 0x5670 61 #define VMW_BALLOON_HV_MAGIC 0x456c6d6f 62 #define VMW_BALLOON_GUEST_ID 1 /* Linux */ 63 64 enum vmwballoon_capabilities { 65 /* 66 * Bit 0 is reserved and not associated to any capability. 67 */ 68 VMW_BALLOON_BASIC_CMDS = (1 << 1), 69 VMW_BALLOON_BATCHED_CMDS = (1 << 2), 70 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3), 71 VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4), 72 VMW_BALLOON_64_BIT_TARGET = (1 << 5) 73 }; 74 75 #define VMW_BALLOON_CAPABILITIES_COMMON (VMW_BALLOON_BASIC_CMDS \ 76 | VMW_BALLOON_BATCHED_CMDS \ 77 | VMW_BALLOON_BATCHED_2M_CMDS \ 78 | VMW_BALLOON_SIGNALLED_WAKEUP_CMD) 79 80 #define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT) 81 82 /* 83 * 64-bit targets are only supported in 64-bit 84 */ 85 #ifdef CONFIG_64BIT 86 #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_CAPABILITIES_COMMON \ 87 | VMW_BALLOON_64_BIT_TARGET) 88 #else 89 #define VMW_BALLOON_CAPABILITIES VMW_BALLOON_CAPABILITIES_COMMON 90 #endif 91 92 enum vmballoon_page_size_type { 93 VMW_BALLOON_4K_PAGE, 94 VMW_BALLOON_2M_PAGE, 95 VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE 96 }; 97 98 #define VMW_BALLOON_NUM_PAGE_SIZES (VMW_BALLOON_LAST_SIZE + 1) 99 100 static const char * const vmballoon_page_size_names[] = { 101 [VMW_BALLOON_4K_PAGE] = "4k", 102 [VMW_BALLOON_2M_PAGE] = "2M" 103 }; 104 105 enum vmballoon_op { 106 VMW_BALLOON_INFLATE, 107 VMW_BALLOON_DEFLATE 108 }; 109 110 enum vmballoon_op_stat_type { 111 VMW_BALLOON_OP_STAT, 112 VMW_BALLOON_OP_FAIL_STAT 113 }; 114 115 #define VMW_BALLOON_OP_STAT_TYPES (VMW_BALLOON_OP_FAIL_STAT + 1) 116 117 /** 118 * enum vmballoon_cmd_type - backdoor commands. 119 * 120 * Availability of the commands is as followed: 121 * 122 * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and 123 * %VMW_BALLOON_CMD_GUEST_ID are always available. 124 * 125 * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then 126 * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available. 127 * 128 * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then 129 * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands 130 * are available. 131 * 132 * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then 133 * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 134 * are supported. 135 * 136 * If the host reports VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then 137 * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported. 138 * 139 * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor. 140 * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size. 141 * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page. 142 * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about 143 * to be deflated from the balloon. 144 * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that 145 * runs in the VM. 146 * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of 147 * ballooned pages (up to 512). 148 * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of 149 * pages that are about to be deflated from the 150 * balloon (up to 512). 151 * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK 152 * for 2MB pages. 153 * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to 154 * @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB 155 * pages. 156 * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification 157 * that would be invoked when the balloon 158 * size changes. 159 * @VMW_BALLOON_CMD_LAST: Value of the last command. 160 */ 161 enum vmballoon_cmd_type { 162 VMW_BALLOON_CMD_START, 163 VMW_BALLOON_CMD_GET_TARGET, 164 VMW_BALLOON_CMD_LOCK, 165 VMW_BALLOON_CMD_UNLOCK, 166 VMW_BALLOON_CMD_GUEST_ID, 167 /* No command 5 */ 168 VMW_BALLOON_CMD_BATCHED_LOCK = 6, 169 VMW_BALLOON_CMD_BATCHED_UNLOCK, 170 VMW_BALLOON_CMD_BATCHED_2M_LOCK, 171 VMW_BALLOON_CMD_BATCHED_2M_UNLOCK, 172 VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 173 VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 174 }; 175 176 #define VMW_BALLOON_CMD_NUM (VMW_BALLOON_CMD_LAST + 1) 177 178 enum vmballoon_error_codes { 179 VMW_BALLOON_SUCCESS, 180 VMW_BALLOON_ERROR_CMD_INVALID, 181 VMW_BALLOON_ERROR_PPN_INVALID, 182 VMW_BALLOON_ERROR_PPN_LOCKED, 183 VMW_BALLOON_ERROR_PPN_UNLOCKED, 184 VMW_BALLOON_ERROR_PPN_PINNED, 185 VMW_BALLOON_ERROR_PPN_NOTNEEDED, 186 VMW_BALLOON_ERROR_RESET, 187 VMW_BALLOON_ERROR_BUSY 188 }; 189 190 #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000) 191 192 #define VMW_BALLOON_CMD_WITH_TARGET_MASK \ 193 ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \ 194 (1UL << VMW_BALLOON_CMD_LOCK) | \ 195 (1UL << VMW_BALLOON_CMD_UNLOCK) | \ 196 (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \ 197 (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \ 198 (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \ 199 (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK)) 200 201 static const char * const vmballoon_cmd_names[] = { 202 [VMW_BALLOON_CMD_START] = "start", 203 [VMW_BALLOON_CMD_GET_TARGET] = "target", 204 [VMW_BALLOON_CMD_LOCK] = "lock", 205 [VMW_BALLOON_CMD_UNLOCK] = "unlock", 206 [VMW_BALLOON_CMD_GUEST_ID] = "guestType", 207 [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock", 208 [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock", 209 [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock", 210 [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock", 211 [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet" 212 }; 213 214 enum vmballoon_stat_page { 215 VMW_BALLOON_PAGE_STAT_ALLOC, 216 VMW_BALLOON_PAGE_STAT_ALLOC_FAIL, 217 VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC, 218 VMW_BALLOON_PAGE_STAT_REFUSED_FREE, 219 VMW_BALLOON_PAGE_STAT_FREE, 220 VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE 221 }; 222 223 #define VMW_BALLOON_PAGE_STAT_NUM (VMW_BALLOON_PAGE_STAT_LAST + 1) 224 225 enum vmballoon_stat_general { 226 VMW_BALLOON_STAT_TIMER, 227 VMW_BALLOON_STAT_DOORBELL, 228 VMW_BALLOON_STAT_RESET, 229 VMW_BALLOON_STAT_SHRINK, 230 VMW_BALLOON_STAT_SHRINK_FREE, 231 VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_SHRINK_FREE 232 }; 233 234 #define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1) 235 236 static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching); 237 static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled); 238 239 struct vmballoon_ctl { 240 struct list_head pages; 241 struct list_head refused_pages; 242 struct list_head prealloc_pages; 243 unsigned int n_refused_pages; 244 unsigned int n_pages; 245 enum vmballoon_page_size_type page_size; 246 enum vmballoon_op op; 247 }; 248 249 /** 250 * struct vmballoon_batch_entry - a batch entry for lock or unlock. 251 * 252 * @status: the status of the operation, which is written by the hypervisor. 253 * @reserved: reserved for future use. Must be set to zero. 254 * @pfn: the physical frame number of the page to be locked or unlocked. 255 */ 256 struct vmballoon_batch_entry { 257 u64 status : 5; 258 u64 reserved : PAGE_SHIFT - 5; 259 u64 pfn : 52; 260 } __packed; 261 262 struct vmballoon { 263 /** 264 * @max_page_size: maximum supported page size for ballooning. 265 * 266 * Protected by @conf_sem 267 */ 268 enum vmballoon_page_size_type max_page_size; 269 270 /** 271 * @size: balloon actual size in basic page size (frames). 272 * 273 * While we currently do not support size which is bigger than 32-bit, 274 * in preparation for future support, use 64-bits. 275 */ 276 atomic64_t size; 277 278 /** 279 * @target: balloon target size in basic page size (frames). 280 * 281 * We do not protect the target under the assumption that setting the 282 * value is always done through a single write. If this assumption ever 283 * breaks, we would have to use X_ONCE for accesses, and suffer the less 284 * optimized code. Although we may read stale target value if multiple 285 * accesses happen at once, the performance impact should be minor. 286 */ 287 unsigned long target; 288 289 /** 290 * @reset_required: reset flag 291 * 292 * Setting this flag may introduce races, but the code is expected to 293 * handle them gracefully. In the worst case, another operation will 294 * fail as reset did not take place. Clearing the flag is done while 295 * holding @conf_sem for write. 296 */ 297 bool reset_required; 298 299 /** 300 * @capabilities: hypervisor balloon capabilities. 301 * 302 * Protected by @conf_sem. 303 */ 304 unsigned long capabilities; 305 306 /** 307 * @batch_page: pointer to communication batch page. 308 * 309 * When batching is used, batch_page points to a page, which holds up to 310 * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking. 311 */ 312 struct vmballoon_batch_entry *batch_page; 313 314 /** 315 * @batch_max_pages: maximum pages that can be locked/unlocked. 316 * 317 * Indicates the number of pages that the hypervisor can lock or unlock 318 * at once, according to whether batching is enabled. If batching is 319 * disabled, only a single page can be locked/unlock on each operation. 320 * 321 * Protected by @conf_sem. 322 */ 323 unsigned int batch_max_pages; 324 325 /** 326 * @page: page to be locked/unlocked by the hypervisor 327 * 328 * @page is only used when batching is disabled and a single page is 329 * reclaimed on each iteration. 330 * 331 * Protected by @comm_lock. 332 */ 333 struct page *page; 334 335 /** 336 * @shrink_timeout: timeout until the next inflation. 337 * 338 * After an shrink event, indicates the time in jiffies after which 339 * inflation is allowed again. Can be written concurrently with reads, 340 * so must use READ_ONCE/WRITE_ONCE when accessing. 341 */ 342 unsigned long shrink_timeout; 343 344 /* statistics */ 345 struct vmballoon_stats *stats; 346 347 /** 348 * @b_dev_info: balloon device information descriptor. 349 */ 350 struct balloon_dev_info b_dev_info; 351 352 struct delayed_work dwork; 353 354 /** 355 * @huge_pages - list of the inflated 2MB pages. 356 * 357 * Protected by @huge_pages_lock. 358 */ 359 struct list_head huge_pages; 360 361 /** 362 * @huge_pages_lock: lock for the list of inflated 2MB pages. 363 */ 364 spinlock_t huge_pages_lock; 365 366 /** 367 * @vmci_doorbell. 368 * 369 * Protected by @conf_sem. 370 */ 371 struct vmci_handle vmci_doorbell; 372 373 /** 374 * @conf_sem: semaphore to protect the configuration and the statistics. 375 */ 376 struct rw_semaphore conf_sem; 377 378 /** 379 * @comm_lock: lock to protect the communication with the host. 380 * 381 * Lock ordering: @conf_sem -> @comm_lock . 382 */ 383 spinlock_t comm_lock; 384 385 /** 386 * @shrinker: shrinker interface that is used to avoid over-inflation. 387 */ 388 struct shrinker *shrinker; 389 }; 390 391 static struct vmballoon balloon; 392 393 struct vmballoon_stats { 394 /* timer / doorbell operations */ 395 atomic64_t general_stat[VMW_BALLOON_STAT_NUM]; 396 397 /* allocation statistics for huge and small pages */ 398 atomic64_t 399 page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES]; 400 401 /* Monitor operations: total operations, and failures */ 402 atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES]; 403 }; 404 405 static inline bool is_vmballoon_stats_on(void) 406 { 407 return IS_ENABLED(CONFIG_DEBUG_FS) && 408 static_branch_unlikely(&balloon_stat_enabled); 409 } 410 411 static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op, 412 enum vmballoon_op_stat_type type) 413 { 414 if (is_vmballoon_stats_on()) 415 atomic64_inc(&b->stats->ops[op][type]); 416 } 417 418 static inline void vmballoon_stats_gen_inc(struct vmballoon *b, 419 enum vmballoon_stat_general stat) 420 { 421 if (is_vmballoon_stats_on()) 422 atomic64_inc(&b->stats->general_stat[stat]); 423 } 424 425 static inline void vmballoon_stats_gen_add(struct vmballoon *b, 426 enum vmballoon_stat_general stat, 427 unsigned int val) 428 { 429 if (is_vmballoon_stats_on()) 430 atomic64_add(val, &b->stats->general_stat[stat]); 431 } 432 433 static inline void vmballoon_stats_page_inc(struct vmballoon *b, 434 enum vmballoon_stat_page stat, 435 enum vmballoon_page_size_type size) 436 { 437 if (is_vmballoon_stats_on()) 438 atomic64_inc(&b->stats->page_stat[stat][size]); 439 } 440 441 static inline void vmballoon_stats_page_add(struct vmballoon *b, 442 enum vmballoon_stat_page stat, 443 enum vmballoon_page_size_type size, 444 unsigned int val) 445 { 446 if (is_vmballoon_stats_on()) 447 atomic64_add(val, &b->stats->page_stat[stat][size]); 448 } 449 450 static inline unsigned long 451 __vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, 452 unsigned long arg2, unsigned long *result) 453 { 454 unsigned long status, dummy1, dummy2, dummy3, local_result; 455 456 vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT); 457 458 asm volatile ("inl %%dx" : 459 "=a"(status), 460 "=c"(dummy1), 461 "=d"(dummy2), 462 "=b"(local_result), 463 "=S"(dummy3) : 464 "0"(VMW_BALLOON_HV_MAGIC), 465 "1"(cmd), 466 "2"(VMW_BALLOON_HV_PORT), 467 "3"(arg1), 468 "4"(arg2) : 469 "memory"); 470 471 /* update the result if needed */ 472 if (result) 473 *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 : 474 local_result; 475 476 /* update target when applicable */ 477 if (status == VMW_BALLOON_SUCCESS && 478 ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK)) 479 WRITE_ONCE(b->target, local_result); 480 481 if (status != VMW_BALLOON_SUCCESS && 482 status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) { 483 vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT); 484 pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n", 485 __func__, vmballoon_cmd_names[cmd], arg1, arg2, 486 status); 487 } 488 489 /* mark reset required accordingly */ 490 if (status == VMW_BALLOON_ERROR_RESET) 491 b->reset_required = true; 492 493 return status; 494 } 495 496 static __always_inline unsigned long 497 vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1, 498 unsigned long arg2) 499 { 500 unsigned long dummy; 501 502 return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy); 503 } 504 505 /* 506 * Send "start" command to the host, communicating supported version 507 * of the protocol. 508 */ 509 static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps) 510 { 511 unsigned long status, capabilities; 512 513 status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0, 514 &capabilities); 515 516 switch (status) { 517 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES: 518 b->capabilities = capabilities; 519 break; 520 case VMW_BALLOON_SUCCESS: 521 b->capabilities = VMW_BALLOON_BASIC_CMDS; 522 break; 523 default: 524 return -EIO; 525 } 526 527 /* 528 * 2MB pages are only supported with batching. If batching is for some 529 * reason disabled, do not use 2MB pages, since otherwise the legacy 530 * mechanism is used with 2MB pages, causing a failure. 531 */ 532 b->max_page_size = VMW_BALLOON_4K_PAGE; 533 if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) && 534 (b->capabilities & VMW_BALLOON_BATCHED_CMDS)) 535 b->max_page_size = VMW_BALLOON_2M_PAGE; 536 537 538 return 0; 539 } 540 541 /** 542 * vmballoon_send_guest_id - communicate guest type to the host. 543 * 544 * @b: pointer to the balloon. 545 * 546 * Communicate guest type to the host so that it can adjust ballooning 547 * algorithm to the one most appropriate for the guest. This command 548 * is normally issued after sending "start" command and is part of 549 * standard reset sequence. 550 * 551 * Return: zero on success or appropriate error code. 552 */ 553 static int vmballoon_send_guest_id(struct vmballoon *b) 554 { 555 unsigned long status; 556 557 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID, 558 VMW_BALLOON_GUEST_ID, 0); 559 560 return status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 561 } 562 563 /** 564 * vmballoon_page_order() - return the order of the page 565 * @page_size: the size of the page. 566 * 567 * Return: the allocation order. 568 */ 569 static inline 570 unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size) 571 { 572 return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0; 573 } 574 575 /** 576 * vmballoon_page_in_frames() - returns the number of frames in a page. 577 * @page_size: the size of the page. 578 * 579 * Return: the number of 4k frames. 580 */ 581 static inline unsigned int 582 vmballoon_page_in_frames(enum vmballoon_page_size_type page_size) 583 { 584 return 1 << vmballoon_page_order(page_size); 585 } 586 587 /** 588 * vmballoon_mark_page_offline() - mark a page as offline 589 * @page: pointer for the page. 590 * @page_size: the size of the page. 591 */ 592 static void 593 vmballoon_mark_page_offline(struct page *page, 594 enum vmballoon_page_size_type page_size) 595 { 596 int i; 597 598 for (i = 0; i < vmballoon_page_in_frames(page_size); i++) 599 __SetPageOffline(page + i); 600 } 601 602 /** 603 * vmballoon_mark_page_online() - mark a page as online 604 * @page: pointer for the page. 605 * @page_size: the size of the page. 606 */ 607 static void 608 vmballoon_mark_page_online(struct page *page, 609 enum vmballoon_page_size_type page_size) 610 { 611 int i; 612 613 for (i = 0; i < vmballoon_page_in_frames(page_size); i++) 614 __ClearPageOffline(page + i); 615 } 616 617 /** 618 * vmballoon_send_get_target() - Retrieve desired balloon size from the host. 619 * 620 * @b: pointer to the balloon. 621 * 622 * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required 623 * by the host-guest protocol and EIO if an error occurred in communicating with 624 * the host. 625 */ 626 static int vmballoon_send_get_target(struct vmballoon *b) 627 { 628 unsigned long status; 629 unsigned long limit; 630 631 limit = totalram_pages(); 632 633 /* Ensure limit fits in 32-bits if 64-bit targets are not supported */ 634 if (!(b->capabilities & VMW_BALLOON_64_BIT_TARGET) && 635 limit != (u32)limit) 636 return -EINVAL; 637 638 status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0); 639 640 return status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 641 } 642 643 /** 644 * vmballoon_alloc_page_list - allocates a list of pages. 645 * 646 * @b: pointer to the balloon. 647 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. 648 * @req_n_pages: the number of requested pages. 649 * 650 * Tries to allocate @req_n_pages. Add them to the list of balloon pages in 651 * @ctl.pages and updates @ctl.n_pages to reflect the number of pages. 652 * 653 * Return: zero on success or error code otherwise. 654 */ 655 static int vmballoon_alloc_page_list(struct vmballoon *b, 656 struct vmballoon_ctl *ctl, 657 unsigned int req_n_pages) 658 { 659 struct page *page; 660 unsigned int i; 661 662 for (i = 0; i < req_n_pages; i++) { 663 /* 664 * First check if we happen to have pages that were allocated 665 * before. This happens when 2MB page rejected during inflation 666 * by the hypervisor, and then split into 4KB pages. 667 */ 668 if (!list_empty(&ctl->prealloc_pages)) { 669 page = list_first_entry(&ctl->prealloc_pages, 670 struct page, lru); 671 list_del(&page->lru); 672 } else { 673 if (ctl->page_size == VMW_BALLOON_2M_PAGE) 674 page = alloc_pages(__GFP_HIGHMEM|__GFP_NOWARN| 675 __GFP_NOMEMALLOC, VMW_BALLOON_2M_ORDER); 676 else 677 page = balloon_page_alloc(); 678 679 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC, 680 ctl->page_size); 681 } 682 683 if (page) { 684 /* Success. Add the page to the list and continue. */ 685 list_add(&page->lru, &ctl->pages); 686 continue; 687 } 688 689 /* Allocation failed. Update statistics and stop. */ 690 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL, 691 ctl->page_size); 692 break; 693 } 694 695 ctl->n_pages = i; 696 697 return req_n_pages == ctl->n_pages ? 0 : -ENOMEM; 698 } 699 700 /** 701 * vmballoon_handle_one_result - Handle lock/unlock result for a single page. 702 * 703 * @b: pointer for %struct vmballoon. 704 * @page: pointer for the page whose result should be handled. 705 * @page_size: size of the page. 706 * @status: status of the operation as provided by the hypervisor. 707 */ 708 static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page, 709 enum vmballoon_page_size_type page_size, 710 unsigned long status) 711 { 712 /* On success do nothing. The page is already on the balloon list. */ 713 if (likely(status == VMW_BALLOON_SUCCESS)) 714 return 0; 715 716 pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__, 717 page_to_pfn(page), status, 718 vmballoon_page_size_names[page_size]); 719 720 /* Error occurred */ 721 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC, 722 page_size); 723 724 return -EIO; 725 } 726 727 /** 728 * vmballoon_status_page - returns the status of (un)lock operation 729 * 730 * @b: pointer to the balloon. 731 * @idx: index for the page for which the operation is performed. 732 * @p: pointer to where the page struct is returned. 733 * 734 * Following a lock or unlock operation, returns the status of the operation for 735 * an individual page. Provides the page that the operation was performed on on 736 * the @page argument. 737 * 738 * Returns: The status of a lock or unlock operation for an individual page. 739 */ 740 static unsigned long vmballoon_status_page(struct vmballoon *b, int idx, 741 struct page **p) 742 { 743 if (static_branch_likely(&vmw_balloon_batching)) { 744 /* batching mode */ 745 *p = pfn_to_page(b->batch_page[idx].pfn); 746 return b->batch_page[idx].status; 747 } 748 749 /* non-batching mode */ 750 *p = b->page; 751 752 /* 753 * If a failure occurs, the indication will be provided in the status 754 * of the entire operation, which is considered before the individual 755 * page status. So for non-batching mode, the indication is always of 756 * success. 757 */ 758 return VMW_BALLOON_SUCCESS; 759 } 760 761 /** 762 * vmballoon_lock_op - notifies the host about inflated/deflated pages. 763 * @b: pointer to the balloon. 764 * @num_pages: number of inflated/deflated pages. 765 * @page_size: size of the page. 766 * @op: the type of operation (lock or unlock). 767 * 768 * Notify the host about page(s) that were ballooned (or removed from the 769 * balloon) so that host can use it without fear that guest will need it (or 770 * stop using them since the VM does). Host may reject some pages, we need to 771 * check the return value and maybe submit a different page. The pages that are 772 * inflated/deflated are pointed by @b->page. 773 * 774 * Return: result as provided by the hypervisor. 775 */ 776 static unsigned long vmballoon_lock_op(struct vmballoon *b, 777 unsigned int num_pages, 778 enum vmballoon_page_size_type page_size, 779 enum vmballoon_op op) 780 { 781 unsigned long cmd, pfn; 782 783 lockdep_assert_held(&b->comm_lock); 784 785 if (static_branch_likely(&vmw_balloon_batching)) { 786 if (op == VMW_BALLOON_INFLATE) 787 cmd = page_size == VMW_BALLOON_2M_PAGE ? 788 VMW_BALLOON_CMD_BATCHED_2M_LOCK : 789 VMW_BALLOON_CMD_BATCHED_LOCK; 790 else 791 cmd = page_size == VMW_BALLOON_2M_PAGE ? 792 VMW_BALLOON_CMD_BATCHED_2M_UNLOCK : 793 VMW_BALLOON_CMD_BATCHED_UNLOCK; 794 795 pfn = PHYS_PFN(virt_to_phys(b->batch_page)); 796 } else { 797 cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK : 798 VMW_BALLOON_CMD_UNLOCK; 799 pfn = page_to_pfn(b->page); 800 801 /* In non-batching mode, PFNs must fit in 32-bit */ 802 if (unlikely(pfn != (u32)pfn)) 803 return VMW_BALLOON_ERROR_PPN_INVALID; 804 } 805 806 return vmballoon_cmd(b, cmd, pfn, num_pages); 807 } 808 809 /** 810 * vmballoon_add_page - adds a page towards lock/unlock operation. 811 * 812 * @b: pointer to the balloon. 813 * @idx: index of the page to be ballooned in this batch. 814 * @p: pointer to the page that is about to be ballooned. 815 * 816 * Adds the page to be ballooned. Must be called while holding @comm_lock. 817 */ 818 static void vmballoon_add_page(struct vmballoon *b, unsigned int idx, 819 struct page *p) 820 { 821 lockdep_assert_held(&b->comm_lock); 822 823 if (static_branch_likely(&vmw_balloon_batching)) 824 b->batch_page[idx] = (struct vmballoon_batch_entry) 825 { .pfn = page_to_pfn(p) }; 826 else 827 b->page = p; 828 } 829 830 /** 831 * vmballoon_lock - lock or unlock a batch of pages. 832 * 833 * @b: pointer to the balloon. 834 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. 835 * 836 * Notifies the host of about ballooned pages (after inflation or deflation, 837 * according to @ctl). If the host rejects the page put it on the 838 * @ctl refuse list. These refused page are then released when moving to the 839 * next size of pages. 840 * 841 * Note that we neither free any @page here nor put them back on the ballooned 842 * pages list. Instead we queue it for later processing. We do that for several 843 * reasons. First, we do not want to free the page under the lock. Second, it 844 * allows us to unify the handling of lock and unlock. In the inflate case, the 845 * caller will check if there are too many refused pages and release them. 846 * Although it is not identical to the past behavior, it should not affect 847 * performance. 848 */ 849 static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl) 850 { 851 unsigned long batch_status; 852 struct page *page; 853 unsigned int i, num_pages; 854 855 num_pages = ctl->n_pages; 856 if (num_pages == 0) 857 return 0; 858 859 /* communication with the host is done under the communication lock */ 860 spin_lock(&b->comm_lock); 861 862 i = 0; 863 list_for_each_entry(page, &ctl->pages, lru) 864 vmballoon_add_page(b, i++, page); 865 866 batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size, 867 ctl->op); 868 869 /* 870 * Iterate over the pages in the provided list. Since we are changing 871 * @ctl->n_pages we are saving the original value in @num_pages and 872 * use this value to bound the loop. 873 */ 874 for (i = 0; i < num_pages; i++) { 875 unsigned long status; 876 877 status = vmballoon_status_page(b, i, &page); 878 879 /* 880 * Failure of the whole batch overrides a single operation 881 * results. 882 */ 883 if (batch_status != VMW_BALLOON_SUCCESS) 884 status = batch_status; 885 886 /* Continue if no error happened */ 887 if (!vmballoon_handle_one_result(b, page, ctl->page_size, 888 status)) 889 continue; 890 891 /* 892 * Error happened. Move the pages to the refused list and update 893 * the pages number. 894 */ 895 list_move(&page->lru, &ctl->refused_pages); 896 ctl->n_pages--; 897 ctl->n_refused_pages++; 898 } 899 900 spin_unlock(&b->comm_lock); 901 902 return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO; 903 } 904 905 /** 906 * vmballoon_release_page_list() - Releases a page list 907 * 908 * @page_list: list of pages to release. 909 * @n_pages: pointer to the number of pages. 910 * @page_size: whether the pages in the list are 2MB (or else 4KB). 911 * 912 * Releases the list of pages and zeros the number of pages. 913 */ 914 static void vmballoon_release_page_list(struct list_head *page_list, 915 int *n_pages, 916 enum vmballoon_page_size_type page_size) 917 { 918 struct page *page, *tmp; 919 920 list_for_each_entry_safe(page, tmp, page_list, lru) { 921 list_del(&page->lru); 922 __free_pages(page, vmballoon_page_order(page_size)); 923 } 924 925 if (n_pages) 926 *n_pages = 0; 927 } 928 929 930 /* 931 * Release pages that were allocated while attempting to inflate the 932 * balloon but were refused by the host for one reason or another. 933 */ 934 static void vmballoon_release_refused_pages(struct vmballoon *b, 935 struct vmballoon_ctl *ctl) 936 { 937 vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE, 938 ctl->page_size); 939 940 vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages, 941 ctl->page_size); 942 } 943 944 /** 945 * vmballoon_change - retrieve the required balloon change 946 * 947 * @b: pointer for the balloon. 948 * 949 * Return: the required change for the balloon size. A positive number 950 * indicates inflation, a negative number indicates a deflation. 951 */ 952 static int64_t vmballoon_change(struct vmballoon *b) 953 { 954 int64_t size, target; 955 956 size = atomic64_read(&b->size); 957 target = READ_ONCE(b->target); 958 959 /* 960 * We must cast first because of int sizes 961 * Otherwise we might get huge positives instead of negatives 962 */ 963 964 if (b->reset_required) 965 return 0; 966 967 /* consider a 2MB slack on deflate, unless the balloon is emptied */ 968 if (target < size && target != 0 && 969 size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)) 970 return 0; 971 972 /* If an out-of-memory recently occurred, inflation is disallowed. */ 973 if (target > size && time_before(jiffies, READ_ONCE(b->shrink_timeout))) 974 return 0; 975 976 return target - size; 977 } 978 979 /** 980 * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation. 981 * 982 * @b: pointer to balloon. 983 * @pages: list of pages to enqueue. 984 * @n_pages: pointer to number of pages in list. The value is zeroed. 985 * @page_size: whether the pages are 2MB or 4KB pages. 986 * 987 * Enqueues the provides list of pages in the ballooned page list, clears the 988 * list and zeroes the number of pages that was provided. 989 */ 990 static void vmballoon_enqueue_page_list(struct vmballoon *b, 991 struct list_head *pages, 992 unsigned int *n_pages, 993 enum vmballoon_page_size_type page_size) 994 { 995 struct page *page; 996 997 if (page_size == VMW_BALLOON_4K_PAGE) { 998 balloon_page_list_enqueue(&b->b_dev_info, pages); 999 } else { 1000 /* 1001 * Keep the huge pages in a local list which is not available 1002 * for the balloon page migration. 1003 */ 1004 spin_lock(&b->huge_pages_lock); 1005 1006 list_for_each_entry(page, pages, lru) { 1007 vmballoon_mark_page_offline(page, VMW_BALLOON_2M_PAGE); 1008 } 1009 1010 list_splice_init(pages, &b->huge_pages); 1011 __count_vm_events(BALLOON_INFLATE, *n_pages * 1012 vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)); 1013 spin_unlock(&b->huge_pages_lock); 1014 } 1015 1016 *n_pages = 0; 1017 } 1018 1019 /** 1020 * vmballoon_dequeue_page_list() - Dequeues page lists for deflation. 1021 * 1022 * @b: pointer to balloon. 1023 * @pages: list of pages to enqueue. 1024 * @n_pages: pointer to number of pages in list. The value is zeroed. 1025 * @page_size: whether the pages are 2MB or 4KB pages. 1026 * @n_req_pages: the number of requested pages. 1027 * 1028 * Dequeues the number of requested pages from the balloon for deflation. The 1029 * number of dequeued pages may be lower, if not enough pages in the requested 1030 * size are available. 1031 */ 1032 static void vmballoon_dequeue_page_list(struct vmballoon *b, 1033 struct list_head *pages, 1034 unsigned int *n_pages, 1035 enum vmballoon_page_size_type page_size, 1036 unsigned int n_req_pages) 1037 { 1038 struct page *page, *tmp; 1039 unsigned int i = 0; 1040 1041 /* In the case of 4k pages, use the compaction infrastructure */ 1042 if (page_size == VMW_BALLOON_4K_PAGE) { 1043 *n_pages = balloon_page_list_dequeue(&b->b_dev_info, pages, 1044 n_req_pages); 1045 return; 1046 } 1047 1048 /* 2MB pages */ 1049 spin_lock(&b->huge_pages_lock); 1050 list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) { 1051 vmballoon_mark_page_online(page, VMW_BALLOON_2M_PAGE); 1052 1053 list_move(&page->lru, pages); 1054 if (++i == n_req_pages) 1055 break; 1056 } 1057 1058 __count_vm_events(BALLOON_DEFLATE, 1059 i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE)); 1060 spin_unlock(&b->huge_pages_lock); 1061 *n_pages = i; 1062 } 1063 1064 /** 1065 * vmballoon_split_refused_pages() - Split the 2MB refused pages to 4k. 1066 * 1067 * If inflation of 2MB pages was denied by the hypervisor, it is likely to be 1068 * due to one or few 4KB pages. These 2MB pages may keep being allocated and 1069 * then being refused. To prevent this case, this function splits the refused 1070 * pages into 4KB pages and adds them into @prealloc_pages list. 1071 * 1072 * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation. 1073 */ 1074 static void vmballoon_split_refused_pages(struct vmballoon_ctl *ctl) 1075 { 1076 struct page *page, *tmp; 1077 unsigned int i, order; 1078 1079 order = vmballoon_page_order(ctl->page_size); 1080 1081 list_for_each_entry_safe(page, tmp, &ctl->refused_pages, lru) { 1082 list_del(&page->lru); 1083 split_page(page, order); 1084 for (i = 0; i < (1 << order); i++) 1085 list_add(&page[i].lru, &ctl->prealloc_pages); 1086 } 1087 ctl->n_refused_pages = 0; 1088 } 1089 1090 /** 1091 * vmballoon_inflate() - Inflate the balloon towards its target size. 1092 * 1093 * @b: pointer to the balloon. 1094 */ 1095 static void vmballoon_inflate(struct vmballoon *b) 1096 { 1097 int64_t to_inflate_frames; 1098 struct vmballoon_ctl ctl = { 1099 .pages = LIST_HEAD_INIT(ctl.pages), 1100 .refused_pages = LIST_HEAD_INIT(ctl.refused_pages), 1101 .prealloc_pages = LIST_HEAD_INIT(ctl.prealloc_pages), 1102 .page_size = b->max_page_size, 1103 .op = VMW_BALLOON_INFLATE 1104 }; 1105 1106 while ((to_inflate_frames = vmballoon_change(b)) > 0) { 1107 unsigned int to_inflate_pages, page_in_frames; 1108 int alloc_error, lock_error = 0; 1109 1110 VM_BUG_ON(!list_empty(&ctl.pages)); 1111 VM_BUG_ON(ctl.n_pages != 0); 1112 1113 page_in_frames = vmballoon_page_in_frames(ctl.page_size); 1114 1115 to_inflate_pages = min_t(unsigned long, b->batch_max_pages, 1116 DIV_ROUND_UP_ULL(to_inflate_frames, 1117 page_in_frames)); 1118 1119 /* Start by allocating */ 1120 alloc_error = vmballoon_alloc_page_list(b, &ctl, 1121 to_inflate_pages); 1122 1123 /* Actually lock the pages by telling the hypervisor */ 1124 lock_error = vmballoon_lock(b, &ctl); 1125 1126 /* 1127 * If an error indicates that something serious went wrong, 1128 * stop the inflation. 1129 */ 1130 if (lock_error) 1131 break; 1132 1133 /* Update the balloon size */ 1134 atomic64_add(ctl.n_pages * page_in_frames, &b->size); 1135 1136 vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages, 1137 ctl.page_size); 1138 1139 /* 1140 * If allocation failed or the number of refused pages exceeds 1141 * the maximum allowed, move to the next page size. 1142 */ 1143 if (alloc_error || 1144 ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) { 1145 if (ctl.page_size == VMW_BALLOON_4K_PAGE) 1146 break; 1147 1148 /* 1149 * Split the refused pages to 4k. This will also empty 1150 * the refused pages list. 1151 */ 1152 vmballoon_split_refused_pages(&ctl); 1153 ctl.page_size--; 1154 } 1155 1156 cond_resched(); 1157 } 1158 1159 /* 1160 * Release pages that were allocated while attempting to inflate the 1161 * balloon but were refused by the host for one reason or another, 1162 * and update the statistics. 1163 */ 1164 if (ctl.n_refused_pages != 0) 1165 vmballoon_release_refused_pages(b, &ctl); 1166 1167 vmballoon_release_page_list(&ctl.prealloc_pages, NULL, ctl.page_size); 1168 } 1169 1170 /** 1171 * vmballoon_deflate() - Decrease the size of the balloon. 1172 * 1173 * @b: pointer to the balloon 1174 * @n_frames: the number of frames to deflate. If zero, automatically 1175 * calculated according to the target size. 1176 * @coordinated: whether to coordinate with the host 1177 * 1178 * Decrease the size of the balloon allowing guest to use more memory. 1179 * 1180 * Return: The number of deflated frames (i.e., basic page size units) 1181 */ 1182 static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames, 1183 bool coordinated) 1184 { 1185 unsigned long deflated_frames = 0; 1186 unsigned long tried_frames = 0; 1187 struct vmballoon_ctl ctl = { 1188 .pages = LIST_HEAD_INIT(ctl.pages), 1189 .refused_pages = LIST_HEAD_INIT(ctl.refused_pages), 1190 .page_size = VMW_BALLOON_4K_PAGE, 1191 .op = VMW_BALLOON_DEFLATE 1192 }; 1193 1194 /* free pages to reach target */ 1195 while (true) { 1196 unsigned int to_deflate_pages, n_unlocked_frames; 1197 unsigned int page_in_frames; 1198 int64_t to_deflate_frames; 1199 bool deflated_all; 1200 1201 page_in_frames = vmballoon_page_in_frames(ctl.page_size); 1202 1203 VM_BUG_ON(!list_empty(&ctl.pages)); 1204 VM_BUG_ON(ctl.n_pages); 1205 VM_BUG_ON(!list_empty(&ctl.refused_pages)); 1206 VM_BUG_ON(ctl.n_refused_pages); 1207 1208 /* 1209 * If we were requested a specific number of frames, we try to 1210 * deflate this number of frames. Otherwise, deflation is 1211 * performed according to the target and balloon size. 1212 */ 1213 to_deflate_frames = n_frames ? n_frames - tried_frames : 1214 -vmballoon_change(b); 1215 1216 /* break if no work to do */ 1217 if (to_deflate_frames <= 0) 1218 break; 1219 1220 /* 1221 * Calculate the number of frames based on current page size, 1222 * but limit the deflated frames to a single chunk 1223 */ 1224 to_deflate_pages = min_t(unsigned long, b->batch_max_pages, 1225 DIV_ROUND_UP_ULL(to_deflate_frames, 1226 page_in_frames)); 1227 1228 /* First take the pages from the balloon pages. */ 1229 vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages, 1230 ctl.page_size, to_deflate_pages); 1231 1232 /* 1233 * Before pages are moving to the refused list, count their 1234 * frames as frames that we tried to deflate. 1235 */ 1236 tried_frames += ctl.n_pages * page_in_frames; 1237 1238 /* 1239 * Unlock the pages by communicating with the hypervisor if the 1240 * communication is coordinated (i.e., not pop). We ignore the 1241 * return code. Instead we check if all the pages we manage to 1242 * unlock all the pages. If we failed, we will move to the next 1243 * page size, and would eventually try again later. 1244 */ 1245 if (coordinated) 1246 vmballoon_lock(b, &ctl); 1247 1248 /* 1249 * Check if we deflated enough. We will move to the next page 1250 * size if we did not manage to do so. This calculation takes 1251 * place now, as once the pages are released, the number of 1252 * pages is zeroed. 1253 */ 1254 deflated_all = (ctl.n_pages == to_deflate_pages); 1255 1256 /* Update local and global counters */ 1257 n_unlocked_frames = ctl.n_pages * page_in_frames; 1258 atomic64_sub(n_unlocked_frames, &b->size); 1259 deflated_frames += n_unlocked_frames; 1260 1261 vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE, 1262 ctl.page_size, ctl.n_pages); 1263 1264 /* free the ballooned pages */ 1265 vmballoon_release_page_list(&ctl.pages, &ctl.n_pages, 1266 ctl.page_size); 1267 1268 /* Return the refused pages to the ballooned list. */ 1269 vmballoon_enqueue_page_list(b, &ctl.refused_pages, 1270 &ctl.n_refused_pages, 1271 ctl.page_size); 1272 1273 /* If we failed to unlock all the pages, move to next size. */ 1274 if (!deflated_all) { 1275 if (ctl.page_size == b->max_page_size) 1276 break; 1277 ctl.page_size++; 1278 } 1279 1280 cond_resched(); 1281 } 1282 1283 return deflated_frames; 1284 } 1285 1286 /** 1287 * vmballoon_deinit_batching - disables batching mode. 1288 * 1289 * @b: pointer to &struct vmballoon. 1290 * 1291 * Disables batching, by deallocating the page for communication with the 1292 * hypervisor and disabling the static key to indicate that batching is off. 1293 */ 1294 static void vmballoon_deinit_batching(struct vmballoon *b) 1295 { 1296 free_page((unsigned long)b->batch_page); 1297 b->batch_page = NULL; 1298 static_branch_disable(&vmw_balloon_batching); 1299 b->batch_max_pages = 1; 1300 } 1301 1302 /** 1303 * vmballoon_init_batching - enable batching mode. 1304 * 1305 * @b: pointer to &struct vmballoon. 1306 * 1307 * Enables batching, by allocating a page for communication with the hypervisor 1308 * and enabling the static_key to use batching. 1309 * 1310 * Return: zero on success or an appropriate error-code. 1311 */ 1312 static int vmballoon_init_batching(struct vmballoon *b) 1313 { 1314 struct page *page; 1315 1316 page = alloc_page(GFP_KERNEL | __GFP_ZERO); 1317 if (!page) 1318 return -ENOMEM; 1319 1320 b->batch_page = page_address(page); 1321 b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry); 1322 1323 static_branch_enable(&vmw_balloon_batching); 1324 1325 return 0; 1326 } 1327 1328 /* 1329 * Receive notification and resize balloon 1330 */ 1331 static void vmballoon_doorbell(void *client_data) 1332 { 1333 struct vmballoon *b = client_data; 1334 1335 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL); 1336 1337 mod_delayed_work(system_freezable_wq, &b->dwork, 0); 1338 } 1339 1340 /* 1341 * Clean up vmci doorbell 1342 */ 1343 static void vmballoon_vmci_cleanup(struct vmballoon *b) 1344 { 1345 vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 1346 VMCI_INVALID_ID, VMCI_INVALID_ID); 1347 1348 if (!vmci_handle_is_invalid(b->vmci_doorbell)) { 1349 vmci_doorbell_destroy(b->vmci_doorbell); 1350 b->vmci_doorbell = VMCI_INVALID_HANDLE; 1351 } 1352 } 1353 1354 /** 1355 * vmballoon_vmci_init - Initialize vmci doorbell. 1356 * 1357 * @b: pointer to the balloon. 1358 * 1359 * Return: zero on success or when wakeup command not supported. Error-code 1360 * otherwise. 1361 * 1362 * Initialize vmci doorbell, to get notified as soon as balloon changes. 1363 */ 1364 static int vmballoon_vmci_init(struct vmballoon *b) 1365 { 1366 unsigned long error; 1367 1368 if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0) 1369 return 0; 1370 1371 error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB, 1372 VMCI_PRIVILEGE_FLAG_RESTRICTED, 1373 vmballoon_doorbell, b); 1374 1375 if (error != VMCI_SUCCESS) 1376 goto fail; 1377 1378 error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET, 1379 b->vmci_doorbell.context, 1380 b->vmci_doorbell.resource, NULL); 1381 1382 if (error != VMW_BALLOON_SUCCESS) 1383 goto fail; 1384 1385 return 0; 1386 fail: 1387 vmballoon_vmci_cleanup(b); 1388 return -EIO; 1389 } 1390 1391 /** 1392 * vmballoon_pop - Quickly release all pages allocate for the balloon. 1393 * 1394 * @b: pointer to the balloon. 1395 * 1396 * This function is called when host decides to "reset" balloon for one reason 1397 * or another. Unlike normal "deflate" we do not (shall not) notify host of the 1398 * pages being released. 1399 */ 1400 static void vmballoon_pop(struct vmballoon *b) 1401 { 1402 unsigned long size; 1403 1404 while ((size = atomic64_read(&b->size))) 1405 vmballoon_deflate(b, size, false); 1406 } 1407 1408 /* 1409 * Perform standard reset sequence by popping the balloon (in case it 1410 * is not empty) and then restarting protocol. This operation normally 1411 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command. 1412 */ 1413 static void vmballoon_reset(struct vmballoon *b) 1414 { 1415 int error; 1416 1417 down_write(&b->conf_sem); 1418 1419 vmballoon_vmci_cleanup(b); 1420 1421 /* free all pages, skipping monitor unlock */ 1422 vmballoon_pop(b); 1423 1424 if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES)) 1425 goto unlock; 1426 1427 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { 1428 if (vmballoon_init_batching(b)) { 1429 /* 1430 * We failed to initialize batching, inform the monitor 1431 * about it by sending a null capability. 1432 * 1433 * The guest will retry in one second. 1434 */ 1435 vmballoon_send_start(b, 0); 1436 goto unlock; 1437 } 1438 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { 1439 vmballoon_deinit_batching(b); 1440 } 1441 1442 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET); 1443 b->reset_required = false; 1444 1445 error = vmballoon_vmci_init(b); 1446 if (error) 1447 pr_err_once("failed to initialize vmci doorbell\n"); 1448 1449 if (vmballoon_send_guest_id(b)) 1450 pr_err_once("failed to send guest ID to the host\n"); 1451 1452 unlock: 1453 up_write(&b->conf_sem); 1454 } 1455 1456 /** 1457 * vmballoon_work - periodic balloon worker for reset, inflation and deflation. 1458 * 1459 * @work: pointer to the &work_struct which is provided by the workqueue. 1460 * 1461 * Resets the protocol if needed, gets the new size and adjusts balloon as 1462 * needed. Repeat in 1 sec. 1463 */ 1464 static void vmballoon_work(struct work_struct *work) 1465 { 1466 struct delayed_work *dwork = to_delayed_work(work); 1467 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork); 1468 int64_t change = 0; 1469 1470 if (b->reset_required) 1471 vmballoon_reset(b); 1472 1473 down_read(&b->conf_sem); 1474 1475 /* 1476 * Update the stats while holding the semaphore to ensure that 1477 * @stats_enabled is consistent with whether the stats are actually 1478 * enabled 1479 */ 1480 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER); 1481 1482 if (!vmballoon_send_get_target(b)) 1483 change = vmballoon_change(b); 1484 1485 if (change != 0) { 1486 pr_debug("%s - size: %llu, target %lu\n", __func__, 1487 atomic64_read(&b->size), READ_ONCE(b->target)); 1488 1489 if (change > 0) 1490 vmballoon_inflate(b); 1491 else /* (change < 0) */ 1492 vmballoon_deflate(b, 0, true); 1493 } 1494 1495 up_read(&b->conf_sem); 1496 1497 /* 1498 * We are using a freezable workqueue so that balloon operations are 1499 * stopped while the system transitions to/from sleep/hibernation. 1500 */ 1501 queue_delayed_work(system_freezable_wq, 1502 dwork, round_jiffies_relative(HZ)); 1503 1504 } 1505 1506 /** 1507 * vmballoon_shrinker_scan() - deflate the balloon due to memory pressure. 1508 * @shrinker: pointer to the balloon shrinker. 1509 * @sc: page reclaim information. 1510 * 1511 * Returns: number of pages that were freed during deflation. 1512 */ 1513 static unsigned long vmballoon_shrinker_scan(struct shrinker *shrinker, 1514 struct shrink_control *sc) 1515 { 1516 struct vmballoon *b = &balloon; 1517 unsigned long deflated_frames; 1518 1519 pr_debug("%s - size: %llu", __func__, atomic64_read(&b->size)); 1520 1521 vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_SHRINK); 1522 1523 /* 1524 * If the lock is also contended for read, we cannot easily reclaim and 1525 * we bail out. 1526 */ 1527 if (!down_read_trylock(&b->conf_sem)) 1528 return 0; 1529 1530 deflated_frames = vmballoon_deflate(b, sc->nr_to_scan, true); 1531 1532 vmballoon_stats_gen_add(b, VMW_BALLOON_STAT_SHRINK_FREE, 1533 deflated_frames); 1534 1535 /* 1536 * Delay future inflation for some time to mitigate the situations in 1537 * which balloon continuously grows and shrinks. Use WRITE_ONCE() since 1538 * the access is asynchronous. 1539 */ 1540 WRITE_ONCE(b->shrink_timeout, jiffies + HZ * VMBALLOON_SHRINK_DELAY); 1541 1542 up_read(&b->conf_sem); 1543 1544 return deflated_frames; 1545 } 1546 1547 /** 1548 * vmballoon_shrinker_count() - return the number of ballooned pages. 1549 * @shrinker: pointer to the balloon shrinker. 1550 * @sc: page reclaim information. 1551 * 1552 * Returns: number of 4k pages that are allocated for the balloon and can 1553 * therefore be reclaimed under pressure. 1554 */ 1555 static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker, 1556 struct shrink_control *sc) 1557 { 1558 struct vmballoon *b = &balloon; 1559 1560 return atomic64_read(&b->size); 1561 } 1562 1563 static void vmballoon_unregister_shrinker(struct vmballoon *b) 1564 { 1565 shrinker_free(b->shrinker); 1566 b->shrinker = NULL; 1567 } 1568 1569 static int vmballoon_register_shrinker(struct vmballoon *b) 1570 { 1571 /* Do nothing if the shrinker is not enabled */ 1572 if (!vmwballoon_shrinker_enable) 1573 return 0; 1574 1575 b->shrinker = shrinker_alloc(0, "vmw-balloon"); 1576 if (!b->shrinker) 1577 return -ENOMEM; 1578 1579 b->shrinker->scan_objects = vmballoon_shrinker_scan; 1580 b->shrinker->count_objects = vmballoon_shrinker_count; 1581 b->shrinker->private_data = b; 1582 1583 shrinker_register(b->shrinker); 1584 1585 return 0; 1586 } 1587 1588 /* 1589 * DEBUGFS Interface 1590 */ 1591 #ifdef CONFIG_DEBUG_FS 1592 1593 static const char * const vmballoon_stat_page_names[] = { 1594 [VMW_BALLOON_PAGE_STAT_ALLOC] = "alloc", 1595 [VMW_BALLOON_PAGE_STAT_ALLOC_FAIL] = "allocFail", 1596 [VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC] = "errAlloc", 1597 [VMW_BALLOON_PAGE_STAT_REFUSED_FREE] = "errFree", 1598 [VMW_BALLOON_PAGE_STAT_FREE] = "free" 1599 }; 1600 1601 static const char * const vmballoon_stat_names[] = { 1602 [VMW_BALLOON_STAT_TIMER] = "timer", 1603 [VMW_BALLOON_STAT_DOORBELL] = "doorbell", 1604 [VMW_BALLOON_STAT_RESET] = "reset", 1605 [VMW_BALLOON_STAT_SHRINK] = "shrink", 1606 [VMW_BALLOON_STAT_SHRINK_FREE] = "shrinkFree" 1607 }; 1608 1609 static int vmballoon_enable_stats(struct vmballoon *b) 1610 { 1611 int r = 0; 1612 1613 down_write(&b->conf_sem); 1614 1615 /* did we somehow race with another reader which enabled stats? */ 1616 if (b->stats) 1617 goto out; 1618 1619 b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL); 1620 1621 if (!b->stats) { 1622 /* allocation failed */ 1623 r = -ENOMEM; 1624 goto out; 1625 } 1626 static_key_enable(&balloon_stat_enabled.key); 1627 out: 1628 up_write(&b->conf_sem); 1629 return r; 1630 } 1631 1632 /** 1633 * vmballoon_debug_show - shows statistics of balloon operations. 1634 * @f: pointer to the &struct seq_file. 1635 * @offset: ignored. 1636 * 1637 * Provides the statistics that can be accessed in vmmemctl in the debugfs. 1638 * To avoid the overhead - mainly that of memory - of collecting the statistics, 1639 * we only collect statistics after the first time the counters are read. 1640 * 1641 * Return: zero on success or an error code. 1642 */ 1643 static int vmballoon_debug_show(struct seq_file *f, void *offset) 1644 { 1645 struct vmballoon *b = f->private; 1646 int i, j; 1647 1648 /* enables stats if they are disabled */ 1649 if (!b->stats) { 1650 int r = vmballoon_enable_stats(b); 1651 1652 if (r) 1653 return r; 1654 } 1655 1656 /* format capabilities info */ 1657 seq_printf(f, "%-22s: %#16x\n", "balloon capabilities", 1658 VMW_BALLOON_CAPABILITIES); 1659 seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities); 1660 seq_printf(f, "%-22s: %16s\n", "is resetting", 1661 b->reset_required ? "y" : "n"); 1662 1663 /* format size info */ 1664 seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target)); 1665 seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size)); 1666 1667 for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) { 1668 if (vmballoon_cmd_names[i] == NULL) 1669 continue; 1670 1671 seq_printf(f, "%-22s: %16llu (%llu failed)\n", 1672 vmballoon_cmd_names[i], 1673 atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]), 1674 atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT])); 1675 } 1676 1677 for (i = 0; i < VMW_BALLOON_STAT_NUM; i++) 1678 seq_printf(f, "%-22s: %16llu\n", 1679 vmballoon_stat_names[i], 1680 atomic64_read(&b->stats->general_stat[i])); 1681 1682 for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) { 1683 for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++) 1684 seq_printf(f, "%-18s(%s): %16llu\n", 1685 vmballoon_stat_page_names[i], 1686 vmballoon_page_size_names[j], 1687 atomic64_read(&b->stats->page_stat[i][j])); 1688 } 1689 1690 return 0; 1691 } 1692 1693 DEFINE_SHOW_ATTRIBUTE(vmballoon_debug); 1694 1695 static void __init vmballoon_debugfs_init(struct vmballoon *b) 1696 { 1697 debugfs_create_file("vmmemctl", S_IRUGO, NULL, b, 1698 &vmballoon_debug_fops); 1699 } 1700 1701 static void __exit vmballoon_debugfs_exit(struct vmballoon *b) 1702 { 1703 static_key_disable(&balloon_stat_enabled.key); 1704 debugfs_lookup_and_remove("vmmemctl", NULL); 1705 kfree(b->stats); 1706 b->stats = NULL; 1707 } 1708 1709 #else 1710 1711 static inline void vmballoon_debugfs_init(struct vmballoon *b) 1712 { 1713 } 1714 1715 static inline void vmballoon_debugfs_exit(struct vmballoon *b) 1716 { 1717 } 1718 1719 #endif /* CONFIG_DEBUG_FS */ 1720 1721 1722 #ifdef CONFIG_BALLOON_MIGRATION 1723 /** 1724 * vmballoon_migratepage() - migrates a balloon page. 1725 * @b_dev_info: balloon device information descriptor. 1726 * @newpage: the page to which @page should be migrated. 1727 * @page: a ballooned page that should be migrated. 1728 * @mode: migration mode, ignored. 1729 * 1730 * Return: zero on success, -EAGAIN when migration cannot be performed 1731 * momentarily, -EBUSY if migration failed and should be retried 1732 * with that specific page, and -ENOENT when deflating @page 1733 * succeeded but inflating @newpage failed, effectively deflating 1734 * the balloon. 1735 */ 1736 static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, 1737 struct page *newpage, struct page *page, 1738 enum migrate_mode mode) 1739 { 1740 unsigned long status; 1741 struct vmballoon *b; 1742 int ret = 0; 1743 1744 b = container_of(b_dev_info, struct vmballoon, b_dev_info); 1745 1746 /* 1747 * If the semaphore is taken, there is ongoing configuration change 1748 * (i.e., balloon reset), so try again. 1749 */ 1750 if (!down_read_trylock(&b->conf_sem)) 1751 return -EAGAIN; 1752 1753 spin_lock(&b->comm_lock); 1754 /* 1755 * We must start by deflating and not inflating, as otherwise the 1756 * hypervisor may tell us that it has enough memory and the new page is 1757 * not needed. Since the old page is isolated, we cannot use the list 1758 * interface to unlock it, as the LRU field is used for isolation. 1759 * Instead, we use the native interface directly. 1760 */ 1761 vmballoon_add_page(b, 0, page); 1762 status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE, 1763 VMW_BALLOON_DEFLATE); 1764 1765 if (status == VMW_BALLOON_SUCCESS) 1766 status = vmballoon_status_page(b, 0, &page); 1767 1768 /* 1769 * If a failure happened, let the migration mechanism know that it 1770 * should not retry. 1771 */ 1772 if (status != VMW_BALLOON_SUCCESS) { 1773 spin_unlock(&b->comm_lock); 1774 ret = -EBUSY; 1775 goto out_unlock; 1776 } 1777 1778 /* Inflate */ 1779 vmballoon_add_page(b, 0, newpage); 1780 status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE, 1781 VMW_BALLOON_INFLATE); 1782 1783 if (status == VMW_BALLOON_SUCCESS) 1784 status = vmballoon_status_page(b, 0, &newpage); 1785 1786 spin_unlock(&b->comm_lock); 1787 1788 if (status != VMW_BALLOON_SUCCESS) { 1789 /* 1790 * A failure happened. While we can deflate the page we just 1791 * inflated, this deflation can also encounter an error. Instead 1792 * we will decrease the size of the balloon to reflect the 1793 * change. 1794 */ 1795 atomic64_dec(&b->size); 1796 /* 1797 * Tell the core that we're deflating the old page and don't 1798 * need the new page. 1799 */ 1800 ret = -ENOENT; 1801 } 1802 out_unlock: 1803 up_read(&b->conf_sem); 1804 return ret; 1805 } 1806 #else /* CONFIG_BALLOON_MIGRATION */ 1807 int vmballoon_migratepage(struct balloon_dev_info *b_dev_info, 1808 struct page *newpage, struct page *page, 1809 enum migrate_mode mode); 1810 #endif /* CONFIG_BALLOON_MIGRATION */ 1811 1812 static int __init vmballoon_init(void) 1813 { 1814 int error; 1815 1816 /* 1817 * Check if we are running on VMware's hypervisor and bail out 1818 * if we are not. 1819 */ 1820 if (x86_hyper_type != X86_HYPER_VMWARE) 1821 return -ENODEV; 1822 1823 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work); 1824 1825 error = vmballoon_register_shrinker(&balloon); 1826 if (error) 1827 return error; 1828 1829 balloon_devinfo_init(&balloon.b_dev_info); 1830 if (IS_ENABLED(CONFIG_BALLOON_MIGRATION)) 1831 balloon.b_dev_info.migratepage = vmballoon_migratepage; 1832 1833 INIT_LIST_HEAD(&balloon.huge_pages); 1834 spin_lock_init(&balloon.huge_pages_lock); 1835 spin_lock_init(&balloon.comm_lock); 1836 init_rwsem(&balloon.conf_sem); 1837 balloon.vmci_doorbell = VMCI_INVALID_HANDLE; 1838 balloon.batch_page = NULL; 1839 balloon.page = NULL; 1840 balloon.reset_required = true; 1841 1842 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0); 1843 1844 vmballoon_debugfs_init(&balloon); 1845 1846 return 0; 1847 } 1848 1849 /* 1850 * Using late_initcall() instead of module_init() allows the balloon to use the 1851 * VMCI doorbell even when the balloon is built into the kernel. Otherwise the 1852 * VMCI is probed only after the balloon is initialized. If the balloon is used 1853 * as a module, late_initcall() is equivalent to module_init(). 1854 */ 1855 late_initcall(vmballoon_init); 1856 1857 static void __exit vmballoon_exit(void) 1858 { 1859 vmballoon_unregister_shrinker(&balloon); 1860 vmballoon_vmci_cleanup(&balloon); 1861 cancel_delayed_work_sync(&balloon.dwork); 1862 1863 vmballoon_debugfs_exit(&balloon); 1864 1865 /* 1866 * Deallocate all reserved memory, and reset connection with monitor. 1867 * Reset connection before deallocating memory to avoid potential for 1868 * additional spurious resets from guest touching deallocated pages. 1869 */ 1870 vmballoon_send_start(&balloon, 0); 1871 vmballoon_pop(&balloon); 1872 } 1873 module_exit(vmballoon_exit); 1874