1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Virtio-mem device driver. 4 * 5 * Copyright Red Hat, Inc. 2020 6 * 7 * Author(s): David Hildenbrand <david@redhat.com> 8 */ 9 10 #include <linux/virtio.h> 11 #include <linux/virtio_mem.h> 12 #include <linux/workqueue.h> 13 #include <linux/slab.h> 14 #include <linux/module.h> 15 #include <linux/mm.h> 16 #include <linux/memory_hotplug.h> 17 #include <linux/memory.h> 18 #include <linux/hrtimer.h> 19 #include <linux/crash_dump.h> 20 #include <linux/mutex.h> 21 #include <linux/bitmap.h> 22 #include <linux/lockdep.h> 23 #include <linux/log2.h> 24 #include <linux/vmalloc.h> 25 #include <linux/suspend.h> 26 27 #include <acpi/acpi_numa.h> 28 29 static bool unplug_online = true; 30 module_param(unplug_online, bool, 0644); 31 MODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); 32 33 static bool force_bbm; 34 module_param(force_bbm, bool, 0444); 35 MODULE_PARM_DESC(force_bbm, 36 "Force Big Block Mode. Default is 0 (auto-selection)"); 37 38 static unsigned long bbm_block_size; 39 module_param(bbm_block_size, ulong, 0444); 40 MODULE_PARM_DESC(bbm_block_size, 41 "Big Block size in bytes. Default is 0 (auto-detection)."); 42 43 /* 44 * virtio-mem currently supports the following modes of operation: 45 * 46 * * Sub Block Mode (SBM): A Linux memory block spans 2..X subblocks (SB). The 47 * size of a Sub Block (SB) is determined based on the device block size, the 48 * pageblock size, and the maximum allocation granularity of the buddy. 49 * Subblocks within a Linux memory block might either be plugged or unplugged. 50 * Memory is added/removed to Linux MM in Linux memory block granularity. 51 * 52 * * Big Block Mode (BBM): A Big Block (BB) spans 1..X Linux memory blocks. 53 * Memory is added/removed to Linux MM in Big Block granularity. 54 * 55 * The mode is determined automatically based on the Linux memory block size 56 * and the device block size. 57 * 58 * User space / core MM (auto onlining) is responsible for onlining added 59 * Linux memory blocks - and for selecting a zone. Linux Memory Blocks are 60 * always onlined separately, and all memory within a Linux memory block is 61 * onlined to the same zone - virtio-mem relies on this behavior. 62 */ 63 64 /* 65 * State of a Linux memory block in SBM. 66 */ 67 enum virtio_mem_sbm_mb_state { 68 /* Unplugged, not added to Linux. Can be reused later. */ 69 VIRTIO_MEM_SBM_MB_UNUSED = 0, 70 /* (Partially) plugged, not added to Linux. Error on add_memory(). */ 71 VIRTIO_MEM_SBM_MB_PLUGGED, 72 /* Fully plugged, fully added to Linux, offline. */ 73 VIRTIO_MEM_SBM_MB_OFFLINE, 74 /* Partially plugged, fully added to Linux, offline. */ 75 VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, 76 /* Fully plugged, fully added to Linux, onlined to a kernel zone. */ 77 VIRTIO_MEM_SBM_MB_KERNEL, 78 /* Partially plugged, fully added to Linux, online to a kernel zone */ 79 VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL, 80 /* Fully plugged, fully added to Linux, onlined to ZONE_MOVABLE. */ 81 VIRTIO_MEM_SBM_MB_MOVABLE, 82 /* Partially plugged, fully added to Linux, onlined to ZONE_MOVABLE. */ 83 VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL, 84 VIRTIO_MEM_SBM_MB_COUNT 85 }; 86 87 /* 88 * State of a Big Block (BB) in BBM, covering 1..X Linux memory blocks. 89 */ 90 enum virtio_mem_bbm_bb_state { 91 /* Unplugged, not added to Linux. Can be reused later. */ 92 VIRTIO_MEM_BBM_BB_UNUSED = 0, 93 /* Plugged, not added to Linux. Error on add_memory(). */ 94 VIRTIO_MEM_BBM_BB_PLUGGED, 95 /* Plugged and added to Linux. */ 96 VIRTIO_MEM_BBM_BB_ADDED, 97 /* All online parts are fake-offline, ready to remove. */ 98 VIRTIO_MEM_BBM_BB_FAKE_OFFLINE, 99 VIRTIO_MEM_BBM_BB_COUNT 100 }; 101 102 struct virtio_mem { 103 struct virtio_device *vdev; 104 105 /* We might first have to unplug all memory when starting up. */ 106 bool unplug_all_required; 107 108 /* Workqueue that processes the plug/unplug requests. */ 109 struct work_struct wq; 110 atomic_t wq_active; 111 atomic_t config_changed; 112 113 /* Virtqueue for guest->host requests. */ 114 struct virtqueue *vq; 115 116 /* Wait for a host response to a guest request. */ 117 wait_queue_head_t host_resp; 118 119 /* Space for one guest request and the host response. */ 120 struct virtio_mem_req req; 121 struct virtio_mem_resp resp; 122 123 /* The current size of the device. */ 124 uint64_t plugged_size; 125 /* The requested size of the device. */ 126 uint64_t requested_size; 127 128 /* The device block size (for communicating with the device). */ 129 uint64_t device_block_size; 130 /* The determined node id for all memory of the device. */ 131 int nid; 132 /* Physical start address of the memory region. */ 133 uint64_t addr; 134 /* Maximum region size in bytes. */ 135 uint64_t region_size; 136 /* Usable region size in bytes. */ 137 uint64_t usable_region_size; 138 139 /* The parent resource for all memory added via this device. */ 140 struct resource *parent_resource; 141 /* 142 * Copy of "System RAM (virtio_mem)" to be used for 143 * add_memory_driver_managed(). 144 */ 145 const char *resource_name; 146 /* Memory group identification. */ 147 int mgid; 148 149 /* 150 * We don't want to add too much memory if it's not getting onlined, 151 * to avoid running OOM. Besides this threshold, we allow to have at 152 * least two offline blocks at a time (whatever is bigger). 153 */ 154 #define VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD (1024 * 1024 * 1024) 155 atomic64_t offline_size; 156 uint64_t offline_threshold; 157 158 /* If set, the driver is in SBM, otherwise in BBM. */ 159 bool in_sbm; 160 161 union { 162 struct { 163 /* Id of the first memory block of this device. */ 164 unsigned long first_mb_id; 165 /* Id of the last usable memory block of this device. */ 166 unsigned long last_usable_mb_id; 167 /* Id of the next memory bock to prepare when needed. */ 168 unsigned long next_mb_id; 169 170 /* The subblock size. */ 171 uint64_t sb_size; 172 /* The number of subblocks per Linux memory block. */ 173 uint32_t sbs_per_mb; 174 175 /* 176 * Some of the Linux memory blocks tracked as "partially 177 * plugged" are completely unplugged and can be offlined 178 * and removed -- which previously failed. 179 */ 180 bool have_unplugged_mb; 181 182 /* Summary of all memory block states. */ 183 unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; 184 185 /* 186 * One byte state per memory block. Allocated via 187 * vmalloc(). Resized (alloc+copy+free) on demand. 188 * 189 * With 128 MiB memory blocks, we have states for 512 190 * GiB of memory in one 4 KiB page. 191 */ 192 uint8_t *mb_states; 193 194 /* 195 * Bitmap: one bit per subblock. Allocated similar to 196 * sbm.mb_states. 197 * 198 * A set bit means the corresponding subblock is 199 * plugged, otherwise it's unblocked. 200 * 201 * With 4 MiB subblocks, we manage 128 GiB of memory 202 * in one 4 KiB page. 203 */ 204 unsigned long *sb_states; 205 } sbm; 206 207 struct { 208 /* Id of the first big block of this device. */ 209 unsigned long first_bb_id; 210 /* Id of the last usable big block of this device. */ 211 unsigned long last_usable_bb_id; 212 /* Id of the next device bock to prepare when needed. */ 213 unsigned long next_bb_id; 214 215 /* Summary of all big block states. */ 216 unsigned long bb_count[VIRTIO_MEM_BBM_BB_COUNT]; 217 218 /* One byte state per big block. See sbm.mb_states. */ 219 uint8_t *bb_states; 220 221 /* The block size used for plugging/adding/removing. */ 222 uint64_t bb_size; 223 } bbm; 224 }; 225 226 /* 227 * Mutex that protects the sbm.mb_count, sbm.mb_states, 228 * sbm.sb_states, bbm.bb_count, and bbm.bb_states 229 * 230 * When this lock is held the pointers can't change, ONLINE and 231 * OFFLINE blocks can't change the state and no subblocks will get 232 * plugged/unplugged. 233 * 234 * In kdump mode, used to serialize requests, last_block_addr and 235 * last_block_plugged. 236 */ 237 struct mutex hotplug_mutex; 238 bool hotplug_active; 239 240 /* An error occurred we cannot handle - stop processing requests. */ 241 bool broken; 242 243 /* Cached valued of is_kdump_kernel() when the device was probed. */ 244 bool in_kdump; 245 246 /* The driver is being removed. */ 247 spinlock_t removal_lock; 248 bool removing; 249 250 /* Timer for retrying to plug/unplug memory. */ 251 struct hrtimer retry_timer; 252 unsigned int retry_timer_ms; 253 #define VIRTIO_MEM_RETRY_TIMER_MIN_MS 50000 254 #define VIRTIO_MEM_RETRY_TIMER_MAX_MS 300000 255 256 /* Memory notifier (online/offline events). */ 257 struct notifier_block memory_notifier; 258 259 /* Notifier to block hibernation image storing/reloading. */ 260 struct notifier_block pm_notifier; 261 262 #ifdef CONFIG_PROC_VMCORE 263 /* vmcore callback for /proc/vmcore handling in kdump mode */ 264 struct vmcore_cb vmcore_cb; 265 uint64_t last_block_addr; 266 bool last_block_plugged; 267 #endif /* CONFIG_PROC_VMCORE */ 268 269 /* Next device in the list of virtio-mem devices. */ 270 struct list_head next; 271 }; 272 273 /* 274 * We have to share a single online_page callback among all virtio-mem 275 * devices. We use RCU to iterate the list in the callback. 276 */ 277 static DEFINE_MUTEX(virtio_mem_mutex); 278 static LIST_HEAD(virtio_mem_devices); 279 280 static void virtio_mem_online_page_cb(struct page *page, unsigned int order); 281 static void virtio_mem_fake_offline_going_offline(unsigned long pfn, 282 unsigned long nr_pages); 283 static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, 284 unsigned long nr_pages); 285 static void virtio_mem_retry(struct virtio_mem *vm); 286 static int virtio_mem_create_resource(struct virtio_mem *vm); 287 static void virtio_mem_delete_resource(struct virtio_mem *vm); 288 289 /* 290 * Register a virtio-mem device so it will be considered for the online_page 291 * callback. 292 */ 293 static int register_virtio_mem_device(struct virtio_mem *vm) 294 { 295 int rc = 0; 296 297 /* First device registers the callback. */ 298 mutex_lock(&virtio_mem_mutex); 299 if (list_empty(&virtio_mem_devices)) 300 rc = set_online_page_callback(&virtio_mem_online_page_cb); 301 if (!rc) 302 list_add_rcu(&vm->next, &virtio_mem_devices); 303 mutex_unlock(&virtio_mem_mutex); 304 305 return rc; 306 } 307 308 /* 309 * Unregister a virtio-mem device so it will no longer be considered for the 310 * online_page callback. 311 */ 312 static void unregister_virtio_mem_device(struct virtio_mem *vm) 313 { 314 /* Last device unregisters the callback. */ 315 mutex_lock(&virtio_mem_mutex); 316 list_del_rcu(&vm->next); 317 if (list_empty(&virtio_mem_devices)) 318 restore_online_page_callback(&virtio_mem_online_page_cb); 319 mutex_unlock(&virtio_mem_mutex); 320 321 synchronize_rcu(); 322 } 323 324 /* 325 * Calculate the memory block id of a given address. 326 */ 327 static unsigned long virtio_mem_phys_to_mb_id(unsigned long addr) 328 { 329 return addr / memory_block_size_bytes(); 330 } 331 332 /* 333 * Calculate the physical start address of a given memory block id. 334 */ 335 static unsigned long virtio_mem_mb_id_to_phys(unsigned long mb_id) 336 { 337 return mb_id * memory_block_size_bytes(); 338 } 339 340 /* 341 * Calculate the big block id of a given address. 342 */ 343 static unsigned long virtio_mem_phys_to_bb_id(struct virtio_mem *vm, 344 uint64_t addr) 345 { 346 return addr / vm->bbm.bb_size; 347 } 348 349 /* 350 * Calculate the physical start address of a given big block id. 351 */ 352 static uint64_t virtio_mem_bb_id_to_phys(struct virtio_mem *vm, 353 unsigned long bb_id) 354 { 355 return bb_id * vm->bbm.bb_size; 356 } 357 358 /* 359 * Calculate the subblock id of a given address. 360 */ 361 static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm, 362 unsigned long addr) 363 { 364 const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr); 365 const unsigned long mb_addr = virtio_mem_mb_id_to_phys(mb_id); 366 367 return (addr - mb_addr) / vm->sbm.sb_size; 368 } 369 370 /* 371 * Set the state of a big block, taking care of the state counter. 372 */ 373 static void virtio_mem_bbm_set_bb_state(struct virtio_mem *vm, 374 unsigned long bb_id, 375 enum virtio_mem_bbm_bb_state state) 376 { 377 const unsigned long idx = bb_id - vm->bbm.first_bb_id; 378 enum virtio_mem_bbm_bb_state old_state; 379 380 old_state = vm->bbm.bb_states[idx]; 381 vm->bbm.bb_states[idx] = state; 382 383 BUG_ON(vm->bbm.bb_count[old_state] == 0); 384 vm->bbm.bb_count[old_state]--; 385 vm->bbm.bb_count[state]++; 386 } 387 388 /* 389 * Get the state of a big block. 390 */ 391 static enum virtio_mem_bbm_bb_state virtio_mem_bbm_get_bb_state(struct virtio_mem *vm, 392 unsigned long bb_id) 393 { 394 return vm->bbm.bb_states[bb_id - vm->bbm.first_bb_id]; 395 } 396 397 /* 398 * Prepare the big block state array for the next big block. 399 */ 400 static int virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem *vm) 401 { 402 unsigned long old_bytes = vm->bbm.next_bb_id - vm->bbm.first_bb_id; 403 unsigned long new_bytes = old_bytes + 1; 404 int old_pages = PFN_UP(old_bytes); 405 int new_pages = PFN_UP(new_bytes); 406 uint8_t *new_array; 407 408 if (vm->bbm.bb_states && old_pages == new_pages) 409 return 0; 410 411 new_array = vzalloc(new_pages * PAGE_SIZE); 412 if (!new_array) 413 return -ENOMEM; 414 415 mutex_lock(&vm->hotplug_mutex); 416 if (vm->bbm.bb_states) 417 memcpy(new_array, vm->bbm.bb_states, old_pages * PAGE_SIZE); 418 vfree(vm->bbm.bb_states); 419 vm->bbm.bb_states = new_array; 420 mutex_unlock(&vm->hotplug_mutex); 421 422 return 0; 423 } 424 425 #define virtio_mem_bbm_for_each_bb(_vm, _bb_id, _state) \ 426 for (_bb_id = vm->bbm.first_bb_id; \ 427 _bb_id < vm->bbm.next_bb_id && _vm->bbm.bb_count[_state]; \ 428 _bb_id++) \ 429 if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) 430 431 #define virtio_mem_bbm_for_each_bb_rev(_vm, _bb_id, _state) \ 432 for (_bb_id = vm->bbm.next_bb_id - 1; \ 433 _bb_id >= vm->bbm.first_bb_id && _vm->bbm.bb_count[_state]; \ 434 _bb_id--) \ 435 if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) 436 437 /* 438 * Set the state of a memory block, taking care of the state counter. 439 */ 440 static void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm, 441 unsigned long mb_id, uint8_t state) 442 { 443 const unsigned long idx = mb_id - vm->sbm.first_mb_id; 444 uint8_t old_state; 445 446 old_state = vm->sbm.mb_states[idx]; 447 vm->sbm.mb_states[idx] = state; 448 449 BUG_ON(vm->sbm.mb_count[old_state] == 0); 450 vm->sbm.mb_count[old_state]--; 451 vm->sbm.mb_count[state]++; 452 } 453 454 /* 455 * Get the state of a memory block. 456 */ 457 static uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm, 458 unsigned long mb_id) 459 { 460 const unsigned long idx = mb_id - vm->sbm.first_mb_id; 461 462 return vm->sbm.mb_states[idx]; 463 } 464 465 /* 466 * Prepare the state array for the next memory block. 467 */ 468 static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) 469 { 470 int old_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id); 471 int new_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id + 1); 472 uint8_t *new_array; 473 474 if (vm->sbm.mb_states && old_pages == new_pages) 475 return 0; 476 477 new_array = vzalloc(new_pages * PAGE_SIZE); 478 if (!new_array) 479 return -ENOMEM; 480 481 mutex_lock(&vm->hotplug_mutex); 482 if (vm->sbm.mb_states) 483 memcpy(new_array, vm->sbm.mb_states, old_pages * PAGE_SIZE); 484 vfree(vm->sbm.mb_states); 485 vm->sbm.mb_states = new_array; 486 mutex_unlock(&vm->hotplug_mutex); 487 488 return 0; 489 } 490 491 #define virtio_mem_sbm_for_each_mb(_vm, _mb_id, _state) \ 492 for (_mb_id = _vm->sbm.first_mb_id; \ 493 _mb_id < _vm->sbm.next_mb_id && _vm->sbm.mb_count[_state]; \ 494 _mb_id++) \ 495 if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) 496 497 #define virtio_mem_sbm_for_each_mb_rev(_vm, _mb_id, _state) \ 498 for (_mb_id = _vm->sbm.next_mb_id - 1; \ 499 _mb_id >= _vm->sbm.first_mb_id && _vm->sbm.mb_count[_state]; \ 500 _mb_id--) \ 501 if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) 502 503 /* 504 * Calculate the bit number in the subblock bitmap for the given subblock 505 * inside the given memory block. 506 */ 507 static int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm, 508 unsigned long mb_id, int sb_id) 509 { 510 return (mb_id - vm->sbm.first_mb_id) * vm->sbm.sbs_per_mb + sb_id; 511 } 512 513 /* 514 * Mark all selected subblocks plugged. 515 * 516 * Will not modify the state of the memory block. 517 */ 518 static void virtio_mem_sbm_set_sb_plugged(struct virtio_mem *vm, 519 unsigned long mb_id, int sb_id, 520 int count) 521 { 522 const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); 523 524 __bitmap_set(vm->sbm.sb_states, bit, count); 525 } 526 527 /* 528 * Mark all selected subblocks unplugged. 529 * 530 * Will not modify the state of the memory block. 531 */ 532 static void virtio_mem_sbm_set_sb_unplugged(struct virtio_mem *vm, 533 unsigned long mb_id, int sb_id, 534 int count) 535 { 536 const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); 537 538 __bitmap_clear(vm->sbm.sb_states, bit, count); 539 } 540 541 /* 542 * Test if all selected subblocks are plugged. 543 */ 544 static bool virtio_mem_sbm_test_sb_plugged(struct virtio_mem *vm, 545 unsigned long mb_id, int sb_id, 546 int count) 547 { 548 const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); 549 550 if (count == 1) 551 return test_bit(bit, vm->sbm.sb_states); 552 553 /* TODO: Helper similar to bitmap_set() */ 554 return find_next_zero_bit(vm->sbm.sb_states, bit + count, bit) >= 555 bit + count; 556 } 557 558 /* 559 * Test if all selected subblocks are unplugged. 560 */ 561 static bool virtio_mem_sbm_test_sb_unplugged(struct virtio_mem *vm, 562 unsigned long mb_id, int sb_id, 563 int count) 564 { 565 const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); 566 567 /* TODO: Helper similar to bitmap_set() */ 568 return find_next_bit(vm->sbm.sb_states, bit + count, bit) >= 569 bit + count; 570 } 571 572 /* 573 * Find the first unplugged subblock. Returns vm->sbm.sbs_per_mb in case there is 574 * none. 575 */ 576 static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, 577 unsigned long mb_id) 578 { 579 const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, 0); 580 581 return find_next_zero_bit(vm->sbm.sb_states, 582 bit + vm->sbm.sbs_per_mb, bit) - bit; 583 } 584 585 /* 586 * Prepare the subblock bitmap for the next memory block. 587 */ 588 static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm) 589 { 590 const unsigned long old_nb_mb = vm->sbm.next_mb_id - vm->sbm.first_mb_id; 591 const unsigned long old_nb_bits = old_nb_mb * vm->sbm.sbs_per_mb; 592 const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->sbm.sbs_per_mb; 593 int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long)); 594 int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long)); 595 unsigned long *new_bitmap, *old_bitmap; 596 597 if (vm->sbm.sb_states && old_pages == new_pages) 598 return 0; 599 600 new_bitmap = vzalloc(new_pages * PAGE_SIZE); 601 if (!new_bitmap) 602 return -ENOMEM; 603 604 mutex_lock(&vm->hotplug_mutex); 605 if (vm->sbm.sb_states) 606 memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE); 607 608 old_bitmap = vm->sbm.sb_states; 609 vm->sbm.sb_states = new_bitmap; 610 mutex_unlock(&vm->hotplug_mutex); 611 612 vfree(old_bitmap); 613 return 0; 614 } 615 616 /* 617 * Test if we could add memory without creating too much offline memory - 618 * to avoid running OOM if memory is getting onlined deferred. 619 */ 620 static bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size) 621 { 622 if (WARN_ON_ONCE(size > vm->offline_threshold)) 623 return false; 624 625 return atomic64_read(&vm->offline_size) + size <= vm->offline_threshold; 626 } 627 628 /* 629 * Try adding memory to Linux. Will usually only fail if out of memory. 630 * 631 * Must not be called with the vm->hotplug_mutex held (possible deadlock with 632 * onlining code). 633 * 634 * Will not modify the state of memory blocks in virtio-mem. 635 */ 636 static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr, 637 uint64_t size) 638 { 639 int rc; 640 641 /* 642 * When force-unloading the driver and we still have memory added to 643 * Linux, the resource name has to stay. 644 */ 645 if (!vm->resource_name) { 646 vm->resource_name = kstrdup_const("System RAM (virtio_mem)", 647 GFP_KERNEL); 648 if (!vm->resource_name) 649 return -ENOMEM; 650 } 651 652 dev_dbg(&vm->vdev->dev, "adding memory: 0x%llx - 0x%llx\n", addr, 653 addr + size - 1); 654 /* Memory might get onlined immediately. */ 655 atomic64_add(size, &vm->offline_size); 656 rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name, 657 MHP_MERGE_RESOURCE | MHP_NID_IS_MGID); 658 if (rc) { 659 atomic64_sub(size, &vm->offline_size); 660 dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc); 661 /* 662 * TODO: Linux MM does not properly clean up yet in all cases 663 * where adding of memory failed - especially on -ENOMEM. 664 */ 665 } 666 return rc; 667 } 668 669 /* 670 * See virtio_mem_add_memory(): Try adding a single Linux memory block. 671 */ 672 static int virtio_mem_sbm_add_mb(struct virtio_mem *vm, unsigned long mb_id) 673 { 674 const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); 675 const uint64_t size = memory_block_size_bytes(); 676 677 return virtio_mem_add_memory(vm, addr, size); 678 } 679 680 /* 681 * See virtio_mem_add_memory(): Try adding a big block. 682 */ 683 static int virtio_mem_bbm_add_bb(struct virtio_mem *vm, unsigned long bb_id) 684 { 685 const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); 686 const uint64_t size = vm->bbm.bb_size; 687 688 return virtio_mem_add_memory(vm, addr, size); 689 } 690 691 /* 692 * Try removing memory from Linux. Will only fail if memory blocks aren't 693 * offline. 694 * 695 * Must not be called with the vm->hotplug_mutex held (possible deadlock with 696 * onlining code). 697 * 698 * Will not modify the state of memory blocks in virtio-mem. 699 */ 700 static int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr, 701 uint64_t size) 702 { 703 int rc; 704 705 dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr, 706 addr + size - 1); 707 rc = remove_memory(addr, size); 708 if (!rc) { 709 atomic64_sub(size, &vm->offline_size); 710 /* 711 * We might have freed up memory we can now unplug, retry 712 * immediately instead of waiting. 713 */ 714 virtio_mem_retry(vm); 715 } else { 716 dev_dbg(&vm->vdev->dev, "removing memory failed: %d\n", rc); 717 } 718 return rc; 719 } 720 721 /* 722 * See virtio_mem_remove_memory(): Try removing a single Linux memory block. 723 */ 724 static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id) 725 { 726 const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); 727 const uint64_t size = memory_block_size_bytes(); 728 729 return virtio_mem_remove_memory(vm, addr, size); 730 } 731 732 /* 733 * Try offlining and removing memory from Linux. 734 * 735 * Must not be called with the vm->hotplug_mutex held (possible deadlock with 736 * onlining code). 737 * 738 * Will not modify the state of memory blocks in virtio-mem. 739 */ 740 static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm, 741 uint64_t addr, 742 uint64_t size) 743 { 744 int rc; 745 746 dev_dbg(&vm->vdev->dev, 747 "offlining and removing memory: 0x%llx - 0x%llx\n", addr, 748 addr + size - 1); 749 750 rc = offline_and_remove_memory(addr, size); 751 if (!rc) { 752 atomic64_sub(size, &vm->offline_size); 753 /* 754 * We might have freed up memory we can now unplug, retry 755 * immediately instead of waiting. 756 */ 757 virtio_mem_retry(vm); 758 return 0; 759 } 760 dev_dbg(&vm->vdev->dev, "offlining and removing memory failed: %d\n", rc); 761 /* 762 * We don't really expect this to fail, because we fake-offlined all 763 * memory already. But it could fail in corner cases. 764 */ 765 WARN_ON_ONCE(rc != -ENOMEM && rc != -EBUSY); 766 return rc == -ENOMEM ? -ENOMEM : -EBUSY; 767 } 768 769 /* 770 * See virtio_mem_offline_and_remove_memory(): Try offlining and removing 771 * a single Linux memory block. 772 */ 773 static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm, 774 unsigned long mb_id) 775 { 776 const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); 777 const uint64_t size = memory_block_size_bytes(); 778 779 return virtio_mem_offline_and_remove_memory(vm, addr, size); 780 } 781 782 /* 783 * Try (offlining and) removing memory from Linux in case all subblocks are 784 * unplugged. Can be called on online and offline memory blocks. 785 * 786 * May modify the state of memory blocks in virtio-mem. 787 */ 788 static int virtio_mem_sbm_try_remove_unplugged_mb(struct virtio_mem *vm, 789 unsigned long mb_id) 790 { 791 int rc; 792 793 /* 794 * Once all subblocks of a memory block were unplugged, offline and 795 * remove it. 796 */ 797 if (!virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) 798 return 0; 799 800 /* offline_and_remove_memory() works for online and offline memory. */ 801 mutex_unlock(&vm->hotplug_mutex); 802 rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id); 803 mutex_lock(&vm->hotplug_mutex); 804 if (!rc) 805 virtio_mem_sbm_set_mb_state(vm, mb_id, 806 VIRTIO_MEM_SBM_MB_UNUSED); 807 return rc; 808 } 809 810 /* 811 * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a 812 * all Linux memory blocks covered by the big block. 813 */ 814 static int virtio_mem_bbm_offline_and_remove_bb(struct virtio_mem *vm, 815 unsigned long bb_id) 816 { 817 const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); 818 const uint64_t size = vm->bbm.bb_size; 819 820 return virtio_mem_offline_and_remove_memory(vm, addr, size); 821 } 822 823 /* 824 * Trigger the workqueue so the device can perform its magic. 825 */ 826 static void virtio_mem_retry(struct virtio_mem *vm) 827 { 828 unsigned long flags; 829 830 spin_lock_irqsave(&vm->removal_lock, flags); 831 if (!vm->removing) 832 queue_work(system_freezable_wq, &vm->wq); 833 spin_unlock_irqrestore(&vm->removal_lock, flags); 834 } 835 836 static int virtio_mem_translate_node_id(struct virtio_mem *vm, uint16_t node_id) 837 { 838 int node = NUMA_NO_NODE; 839 840 #if defined(CONFIG_ACPI_NUMA) 841 if (virtio_has_feature(vm->vdev, VIRTIO_MEM_F_ACPI_PXM)) 842 node = pxm_to_node(node_id); 843 #endif 844 return node; 845 } 846 847 /* 848 * Test if a virtio-mem device overlaps with the given range. Can be called 849 * from (notifier) callbacks lockless. 850 */ 851 static bool virtio_mem_overlaps_range(struct virtio_mem *vm, uint64_t start, 852 uint64_t size) 853 { 854 return start < vm->addr + vm->region_size && vm->addr < start + size; 855 } 856 857 /* 858 * Test if a virtio-mem device contains a given range. Can be called from 859 * (notifier) callbacks lockless. 860 */ 861 static bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start, 862 uint64_t size) 863 { 864 return start >= vm->addr && start + size <= vm->addr + vm->region_size; 865 } 866 867 static int virtio_mem_sbm_notify_going_online(struct virtio_mem *vm, 868 unsigned long mb_id) 869 { 870 switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { 871 case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: 872 case VIRTIO_MEM_SBM_MB_OFFLINE: 873 return NOTIFY_OK; 874 default: 875 break; 876 } 877 dev_warn_ratelimited(&vm->vdev->dev, 878 "memory block onlining denied\n"); 879 return NOTIFY_BAD; 880 } 881 882 static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm, 883 unsigned long mb_id) 884 { 885 switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { 886 case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL: 887 case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL: 888 virtio_mem_sbm_set_mb_state(vm, mb_id, 889 VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); 890 break; 891 case VIRTIO_MEM_SBM_MB_KERNEL: 892 case VIRTIO_MEM_SBM_MB_MOVABLE: 893 virtio_mem_sbm_set_mb_state(vm, mb_id, 894 VIRTIO_MEM_SBM_MB_OFFLINE); 895 break; 896 default: 897 BUG(); 898 break; 899 } 900 } 901 902 static void virtio_mem_sbm_notify_online(struct virtio_mem *vm, 903 unsigned long mb_id, 904 unsigned long start_pfn) 905 { 906 const bool is_movable = is_zone_movable_page(pfn_to_page(start_pfn)); 907 int new_state; 908 909 switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { 910 case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: 911 new_state = VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL; 912 if (is_movable) 913 new_state = VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL; 914 break; 915 case VIRTIO_MEM_SBM_MB_OFFLINE: 916 new_state = VIRTIO_MEM_SBM_MB_KERNEL; 917 if (is_movable) 918 new_state = VIRTIO_MEM_SBM_MB_MOVABLE; 919 break; 920 default: 921 BUG(); 922 break; 923 } 924 virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); 925 } 926 927 static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm, 928 unsigned long mb_id) 929 { 930 const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); 931 unsigned long pfn; 932 int sb_id; 933 934 for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { 935 if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) 936 continue; 937 pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + 938 sb_id * vm->sbm.sb_size); 939 virtio_mem_fake_offline_going_offline(pfn, nr_pages); 940 } 941 } 942 943 static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm, 944 unsigned long mb_id) 945 { 946 const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); 947 unsigned long pfn; 948 int sb_id; 949 950 for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { 951 if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) 952 continue; 953 pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + 954 sb_id * vm->sbm.sb_size); 955 virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); 956 } 957 } 958 959 static void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm, 960 unsigned long bb_id, 961 unsigned long pfn, 962 unsigned long nr_pages) 963 { 964 /* 965 * When marked as "fake-offline", all online memory of this device block 966 * is allocated by us. Otherwise, we don't have any memory allocated. 967 */ 968 if (virtio_mem_bbm_get_bb_state(vm, bb_id) != 969 VIRTIO_MEM_BBM_BB_FAKE_OFFLINE) 970 return; 971 virtio_mem_fake_offline_going_offline(pfn, nr_pages); 972 } 973 974 static void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm, 975 unsigned long bb_id, 976 unsigned long pfn, 977 unsigned long nr_pages) 978 { 979 if (virtio_mem_bbm_get_bb_state(vm, bb_id) != 980 VIRTIO_MEM_BBM_BB_FAKE_OFFLINE) 981 return; 982 virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); 983 } 984 985 /* 986 * This callback will either be called synchronously from add_memory() or 987 * asynchronously (e.g., triggered via user space). We have to be careful 988 * with locking when calling add_memory(). 989 */ 990 static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, 991 unsigned long action, void *arg) 992 { 993 struct virtio_mem *vm = container_of(nb, struct virtio_mem, 994 memory_notifier); 995 struct memory_notify *mhp = arg; 996 const unsigned long start = PFN_PHYS(mhp->start_pfn); 997 const unsigned long size = PFN_PHYS(mhp->nr_pages); 998 int rc = NOTIFY_OK; 999 unsigned long id; 1000 1001 if (!virtio_mem_overlaps_range(vm, start, size)) 1002 return NOTIFY_DONE; 1003 1004 if (vm->in_sbm) { 1005 id = virtio_mem_phys_to_mb_id(start); 1006 /* 1007 * In SBM, we add memory in separate memory blocks - we expect 1008 * it to be onlined/offlined in the same granularity. Bail out 1009 * if this ever changes. 1010 */ 1011 if (WARN_ON_ONCE(size != memory_block_size_bytes() || 1012 !IS_ALIGNED(start, memory_block_size_bytes()))) 1013 return NOTIFY_BAD; 1014 } else { 1015 id = virtio_mem_phys_to_bb_id(vm, start); 1016 /* 1017 * In BBM, we only care about onlining/offlining happening 1018 * within a single big block, we don't care about the 1019 * actual granularity as we don't track individual Linux 1020 * memory blocks. 1021 */ 1022 if (WARN_ON_ONCE(id != virtio_mem_phys_to_bb_id(vm, start + size - 1))) 1023 return NOTIFY_BAD; 1024 } 1025 1026 /* 1027 * Avoid circular locking lockdep warnings. We lock the mutex 1028 * e.g., in MEM_GOING_ONLINE and unlock it in MEM_ONLINE. The 1029 * blocking_notifier_call_chain() has it's own lock, which gets unlocked 1030 * between both notifier calls and will bail out. False positive. 1031 */ 1032 lockdep_off(); 1033 1034 switch (action) { 1035 case MEM_GOING_OFFLINE: 1036 mutex_lock(&vm->hotplug_mutex); 1037 if (vm->removing) { 1038 rc = notifier_from_errno(-EBUSY); 1039 mutex_unlock(&vm->hotplug_mutex); 1040 break; 1041 } 1042 vm->hotplug_active = true; 1043 if (vm->in_sbm) 1044 virtio_mem_sbm_notify_going_offline(vm, id); 1045 else 1046 virtio_mem_bbm_notify_going_offline(vm, id, 1047 mhp->start_pfn, 1048 mhp->nr_pages); 1049 break; 1050 case MEM_GOING_ONLINE: 1051 mutex_lock(&vm->hotplug_mutex); 1052 if (vm->removing) { 1053 rc = notifier_from_errno(-EBUSY); 1054 mutex_unlock(&vm->hotplug_mutex); 1055 break; 1056 } 1057 vm->hotplug_active = true; 1058 if (vm->in_sbm) 1059 rc = virtio_mem_sbm_notify_going_online(vm, id); 1060 break; 1061 case MEM_OFFLINE: 1062 if (vm->in_sbm) 1063 virtio_mem_sbm_notify_offline(vm, id); 1064 1065 atomic64_add(size, &vm->offline_size); 1066 /* 1067 * Trigger the workqueue. Now that we have some offline memory, 1068 * maybe we can handle pending unplug requests. 1069 */ 1070 if (!unplug_online) 1071 virtio_mem_retry(vm); 1072 1073 vm->hotplug_active = false; 1074 mutex_unlock(&vm->hotplug_mutex); 1075 break; 1076 case MEM_ONLINE: 1077 if (vm->in_sbm) 1078 virtio_mem_sbm_notify_online(vm, id, mhp->start_pfn); 1079 1080 atomic64_sub(size, &vm->offline_size); 1081 /* 1082 * Start adding more memory once we onlined half of our 1083 * threshold. Don't trigger if it's possibly due to our actipn 1084 * (e.g., us adding memory which gets onlined immediately from 1085 * the core). 1086 */ 1087 if (!atomic_read(&vm->wq_active) && 1088 virtio_mem_could_add_memory(vm, vm->offline_threshold / 2)) 1089 virtio_mem_retry(vm); 1090 1091 vm->hotplug_active = false; 1092 mutex_unlock(&vm->hotplug_mutex); 1093 break; 1094 case MEM_CANCEL_OFFLINE: 1095 if (!vm->hotplug_active) 1096 break; 1097 if (vm->in_sbm) 1098 virtio_mem_sbm_notify_cancel_offline(vm, id); 1099 else 1100 virtio_mem_bbm_notify_cancel_offline(vm, id, 1101 mhp->start_pfn, 1102 mhp->nr_pages); 1103 vm->hotplug_active = false; 1104 mutex_unlock(&vm->hotplug_mutex); 1105 break; 1106 case MEM_CANCEL_ONLINE: 1107 if (!vm->hotplug_active) 1108 break; 1109 vm->hotplug_active = false; 1110 mutex_unlock(&vm->hotplug_mutex); 1111 break; 1112 default: 1113 break; 1114 } 1115 1116 lockdep_on(); 1117 1118 return rc; 1119 } 1120 1121 static int virtio_mem_pm_notifier_cb(struct notifier_block *nb, 1122 unsigned long action, void *arg) 1123 { 1124 struct virtio_mem *vm = container_of(nb, struct virtio_mem, 1125 pm_notifier); 1126 switch (action) { 1127 case PM_HIBERNATION_PREPARE: 1128 case PM_RESTORE_PREPARE: 1129 /* 1130 * When restarting the VM, all memory is unplugged. Don't 1131 * allow to hibernate and restore from an image. 1132 */ 1133 dev_err(&vm->vdev->dev, "hibernation is not supported.\n"); 1134 return NOTIFY_BAD; 1135 default: 1136 return NOTIFY_OK; 1137 } 1138 } 1139 1140 /* 1141 * Set a range of pages PG_offline. Remember pages that were never onlined 1142 * (via generic_online_page()) using PageDirty(). 1143 */ 1144 static void virtio_mem_set_fake_offline(unsigned long pfn, 1145 unsigned long nr_pages, bool onlined) 1146 { 1147 page_offline_begin(); 1148 for (; nr_pages--; pfn++) { 1149 struct page *page = pfn_to_page(pfn); 1150 1151 if (!onlined) 1152 /* 1153 * Pages that have not been onlined yet were initialized 1154 * to PageOffline(). Remember that we have to route them 1155 * through generic_online_page(). 1156 */ 1157 SetPageDirty(page); 1158 else 1159 __SetPageOffline(page); 1160 VM_WARN_ON_ONCE(!PageOffline(page)); 1161 } 1162 page_offline_end(); 1163 } 1164 1165 /* 1166 * Clear PG_offline from a range of pages. If the pages were never onlined, 1167 * (via generic_online_page()), clear PageDirty(). 1168 */ 1169 static void virtio_mem_clear_fake_offline(unsigned long pfn, 1170 unsigned long nr_pages, bool onlined) 1171 { 1172 for (; nr_pages--; pfn++) { 1173 struct page *page = pfn_to_page(pfn); 1174 1175 if (!onlined) 1176 /* generic_online_page() will clear PageOffline(). */ 1177 ClearPageDirty(page); 1178 else 1179 __ClearPageOffline(page); 1180 } 1181 } 1182 1183 /* 1184 * Release a range of fake-offline pages to the buddy, effectively 1185 * fake-onlining them. 1186 */ 1187 static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) 1188 { 1189 unsigned long order = MAX_PAGE_ORDER; 1190 unsigned long i; 1191 1192 /* 1193 * We might get called for ranges that don't cover properly aligned 1194 * MAX_PAGE_ORDER pages; however, we can only online properly aligned 1195 * pages with an order of MAX_PAGE_ORDER at maximum. 1196 */ 1197 while (!IS_ALIGNED(pfn | nr_pages, 1 << order)) 1198 order--; 1199 1200 for (i = 0; i < nr_pages; i += 1 << order) { 1201 struct page *page = pfn_to_page(pfn + i); 1202 1203 /* 1204 * If the page is PageDirty(), it was kept fake-offline when 1205 * onlining the memory block. Otherwise, it was allocated 1206 * using alloc_contig_range(). All pages in a subblock are 1207 * alike. 1208 */ 1209 if (PageDirty(page)) { 1210 virtio_mem_clear_fake_offline(pfn + i, 1 << order, false); 1211 generic_online_page(page, order); 1212 } else { 1213 virtio_mem_clear_fake_offline(pfn + i, 1 << order, true); 1214 free_contig_range(pfn + i, 1 << order); 1215 adjust_managed_page_count(page, 1 << order); 1216 } 1217 } 1218 } 1219 1220 /* 1221 * Try to allocate a range, marking pages fake-offline, effectively 1222 * fake-offlining them. 1223 */ 1224 static int virtio_mem_fake_offline(struct virtio_mem *vm, unsigned long pfn, 1225 unsigned long nr_pages) 1226 { 1227 const bool is_movable = is_zone_movable_page(pfn_to_page(pfn)); 1228 int rc, retry_count; 1229 1230 /* 1231 * TODO: We want an alloc_contig_range() mode that tries to allocate 1232 * harder (e.g., dealing with temporarily pinned pages, PCP), especially 1233 * with ZONE_MOVABLE. So for now, retry a couple of times with 1234 * ZONE_MOVABLE before giving up - because that zone is supposed to give 1235 * some guarantees. 1236 */ 1237 for (retry_count = 0; retry_count < 5; retry_count++) { 1238 /* 1239 * If the config changed, stop immediately and go back to the 1240 * main loop: avoid trying to keep unplugging if the device 1241 * might have decided to not remove any more memory. 1242 */ 1243 if (atomic_read(&vm->config_changed)) 1244 return -EAGAIN; 1245 1246 rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, 1247 GFP_KERNEL); 1248 if (rc == -ENOMEM) 1249 /* whoops, out of memory */ 1250 return rc; 1251 else if (rc && !is_movable) 1252 break; 1253 else if (rc) 1254 continue; 1255 1256 virtio_mem_set_fake_offline(pfn, nr_pages, true); 1257 adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); 1258 return 0; 1259 } 1260 1261 return -EBUSY; 1262 } 1263 1264 /* 1265 * Handle fake-offline pages when memory is going offline - such that the 1266 * pages can be skipped by mm-core when offlining. 1267 */ 1268 static void virtio_mem_fake_offline_going_offline(unsigned long pfn, 1269 unsigned long nr_pages) 1270 { 1271 struct page *page; 1272 unsigned long i; 1273 1274 /* Drop our reference to the pages so the memory can get offlined. */ 1275 for (i = 0; i < nr_pages; i++) { 1276 page = pfn_to_page(pfn + i); 1277 if (WARN_ON(!page_ref_dec_and_test(page))) 1278 dump_page(page, "fake-offline page referenced"); 1279 } 1280 } 1281 1282 /* 1283 * Handle fake-offline pages when memory offlining is canceled - to undo 1284 * what we did in virtio_mem_fake_offline_going_offline(). 1285 */ 1286 static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, 1287 unsigned long nr_pages) 1288 { 1289 unsigned long i; 1290 1291 /* 1292 * Get the reference again that we dropped via page_ref_dec_and_test() 1293 * when going offline. 1294 */ 1295 for (i = 0; i < nr_pages; i++) 1296 page_ref_inc(pfn_to_page(pfn + i)); 1297 } 1298 1299 static void virtio_mem_online_page(struct virtio_mem *vm, 1300 struct page *page, unsigned int order) 1301 { 1302 const unsigned long start = page_to_phys(page); 1303 const unsigned long end = start + PFN_PHYS(1 << order); 1304 unsigned long addr, next, id, sb_id, count; 1305 bool do_online; 1306 1307 /* 1308 * We can get called with any order up to MAX_PAGE_ORDER. If our subblock 1309 * size is smaller than that and we have a mixture of plugged and 1310 * unplugged subblocks within such a page, we have to process in 1311 * smaller granularity. In that case we'll adjust the order exactly once 1312 * within the loop. 1313 */ 1314 for (addr = start; addr < end; ) { 1315 next = addr + PFN_PHYS(1 << order); 1316 1317 if (vm->in_sbm) { 1318 id = virtio_mem_phys_to_mb_id(addr); 1319 sb_id = virtio_mem_phys_to_sb_id(vm, addr); 1320 count = virtio_mem_phys_to_sb_id(vm, next - 1) - sb_id + 1; 1321 1322 if (virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, count)) { 1323 /* Fully plugged. */ 1324 do_online = true; 1325 } else if (count == 1 || 1326 virtio_mem_sbm_test_sb_unplugged(vm, id, sb_id, count)) { 1327 /* Fully unplugged. */ 1328 do_online = false; 1329 } else { 1330 /* 1331 * Mixture, process sub-blocks instead. This 1332 * will be at least the size of a pageblock. 1333 * We'll run into this case exactly once. 1334 */ 1335 order = ilog2(vm->sbm.sb_size) - PAGE_SHIFT; 1336 do_online = virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, 1); 1337 continue; 1338 } 1339 } else { 1340 /* 1341 * If the whole block is marked fake offline, keep 1342 * everything that way. 1343 */ 1344 id = virtio_mem_phys_to_bb_id(vm, addr); 1345 do_online = virtio_mem_bbm_get_bb_state(vm, id) != 1346 VIRTIO_MEM_BBM_BB_FAKE_OFFLINE; 1347 } 1348 1349 if (do_online) 1350 generic_online_page(pfn_to_page(PFN_DOWN(addr)), order); 1351 else 1352 virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order, 1353 false); 1354 addr = next; 1355 } 1356 } 1357 1358 static void virtio_mem_online_page_cb(struct page *page, unsigned int order) 1359 { 1360 const unsigned long addr = page_to_phys(page); 1361 struct virtio_mem *vm; 1362 1363 rcu_read_lock(); 1364 list_for_each_entry_rcu(vm, &virtio_mem_devices, next) { 1365 /* 1366 * Pages we're onlining will never cross memory blocks and, 1367 * therefore, not virtio-mem devices. 1368 */ 1369 if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order))) 1370 continue; 1371 1372 /* 1373 * virtio_mem_set_fake_offline() might sleep. We can safely 1374 * drop the RCU lock at this point because the device 1375 * cannot go away. See virtio_mem_remove() how races 1376 * between memory onlining and device removal are handled. 1377 */ 1378 rcu_read_unlock(); 1379 1380 virtio_mem_online_page(vm, page, order); 1381 return; 1382 } 1383 rcu_read_unlock(); 1384 1385 /* not virtio-mem memory, but e.g., a DIMM. online it */ 1386 generic_online_page(page, order); 1387 } 1388 1389 static uint64_t virtio_mem_send_request(struct virtio_mem *vm, 1390 const struct virtio_mem_req *req) 1391 { 1392 struct scatterlist *sgs[2], sg_req, sg_resp; 1393 unsigned int len; 1394 int rc; 1395 1396 /* don't use the request residing on the stack (vaddr) */ 1397 vm->req = *req; 1398 1399 /* out: buffer for request */ 1400 sg_init_one(&sg_req, &vm->req, sizeof(vm->req)); 1401 sgs[0] = &sg_req; 1402 1403 /* in: buffer for response */ 1404 sg_init_one(&sg_resp, &vm->resp, sizeof(vm->resp)); 1405 sgs[1] = &sg_resp; 1406 1407 rc = virtqueue_add_sgs(vm->vq, sgs, 1, 1, vm, GFP_KERNEL); 1408 if (rc < 0) 1409 return rc; 1410 1411 virtqueue_kick(vm->vq); 1412 1413 /* wait for a response */ 1414 wait_event(vm->host_resp, virtqueue_get_buf(vm->vq, &len)); 1415 1416 return virtio16_to_cpu(vm->vdev, vm->resp.type); 1417 } 1418 1419 static int virtio_mem_send_plug_request(struct virtio_mem *vm, uint64_t addr, 1420 uint64_t size) 1421 { 1422 const uint64_t nb_vm_blocks = size / vm->device_block_size; 1423 const struct virtio_mem_req req = { 1424 .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_PLUG), 1425 .u.plug.addr = cpu_to_virtio64(vm->vdev, addr), 1426 .u.plug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), 1427 }; 1428 int rc = -ENOMEM; 1429 1430 if (atomic_read(&vm->config_changed)) 1431 return -EAGAIN; 1432 1433 dev_dbg(&vm->vdev->dev, "plugging memory: 0x%llx - 0x%llx\n", addr, 1434 addr + size - 1); 1435 1436 switch (virtio_mem_send_request(vm, &req)) { 1437 case VIRTIO_MEM_RESP_ACK: 1438 vm->plugged_size += size; 1439 return 0; 1440 case VIRTIO_MEM_RESP_NACK: 1441 rc = -EAGAIN; 1442 break; 1443 case VIRTIO_MEM_RESP_BUSY: 1444 rc = -ETXTBSY; 1445 break; 1446 case VIRTIO_MEM_RESP_ERROR: 1447 rc = -EINVAL; 1448 break; 1449 default: 1450 break; 1451 } 1452 1453 dev_dbg(&vm->vdev->dev, "plugging memory failed: %d\n", rc); 1454 return rc; 1455 } 1456 1457 static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr, 1458 uint64_t size) 1459 { 1460 const uint64_t nb_vm_blocks = size / vm->device_block_size; 1461 const struct virtio_mem_req req = { 1462 .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG), 1463 .u.unplug.addr = cpu_to_virtio64(vm->vdev, addr), 1464 .u.unplug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), 1465 }; 1466 int rc = -ENOMEM; 1467 1468 if (atomic_read(&vm->config_changed)) 1469 return -EAGAIN; 1470 1471 dev_dbg(&vm->vdev->dev, "unplugging memory: 0x%llx - 0x%llx\n", addr, 1472 addr + size - 1); 1473 1474 switch (virtio_mem_send_request(vm, &req)) { 1475 case VIRTIO_MEM_RESP_ACK: 1476 vm->plugged_size -= size; 1477 return 0; 1478 case VIRTIO_MEM_RESP_BUSY: 1479 rc = -ETXTBSY; 1480 break; 1481 case VIRTIO_MEM_RESP_ERROR: 1482 rc = -EINVAL; 1483 break; 1484 default: 1485 break; 1486 } 1487 1488 dev_dbg(&vm->vdev->dev, "unplugging memory failed: %d\n", rc); 1489 return rc; 1490 } 1491 1492 static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) 1493 { 1494 const struct virtio_mem_req req = { 1495 .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG_ALL), 1496 }; 1497 int rc = -ENOMEM; 1498 1499 dev_dbg(&vm->vdev->dev, "unplugging all memory"); 1500 1501 switch (virtio_mem_send_request(vm, &req)) { 1502 case VIRTIO_MEM_RESP_ACK: 1503 vm->unplug_all_required = false; 1504 vm->plugged_size = 0; 1505 /* usable region might have shrunk */ 1506 atomic_set(&vm->config_changed, 1); 1507 return 0; 1508 case VIRTIO_MEM_RESP_BUSY: 1509 rc = -ETXTBSY; 1510 break; 1511 default: 1512 break; 1513 } 1514 1515 dev_dbg(&vm->vdev->dev, "unplugging all memory failed: %d\n", rc); 1516 return rc; 1517 } 1518 1519 /* 1520 * Plug selected subblocks. Updates the plugged state, but not the state 1521 * of the memory block. 1522 */ 1523 static int virtio_mem_sbm_plug_sb(struct virtio_mem *vm, unsigned long mb_id, 1524 int sb_id, int count) 1525 { 1526 const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + 1527 sb_id * vm->sbm.sb_size; 1528 const uint64_t size = count * vm->sbm.sb_size; 1529 int rc; 1530 1531 rc = virtio_mem_send_plug_request(vm, addr, size); 1532 if (!rc) 1533 virtio_mem_sbm_set_sb_plugged(vm, mb_id, sb_id, count); 1534 return rc; 1535 } 1536 1537 /* 1538 * Unplug selected subblocks. Updates the plugged state, but not the state 1539 * of the memory block. 1540 */ 1541 static int virtio_mem_sbm_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, 1542 int sb_id, int count) 1543 { 1544 const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + 1545 sb_id * vm->sbm.sb_size; 1546 const uint64_t size = count * vm->sbm.sb_size; 1547 int rc; 1548 1549 rc = virtio_mem_send_unplug_request(vm, addr, size); 1550 if (!rc) 1551 virtio_mem_sbm_set_sb_unplugged(vm, mb_id, sb_id, count); 1552 return rc; 1553 } 1554 1555 /* 1556 * Request to unplug a big block. 1557 * 1558 * Will not modify the state of the big block. 1559 */ 1560 static int virtio_mem_bbm_unplug_bb(struct virtio_mem *vm, unsigned long bb_id) 1561 { 1562 const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); 1563 const uint64_t size = vm->bbm.bb_size; 1564 1565 return virtio_mem_send_unplug_request(vm, addr, size); 1566 } 1567 1568 /* 1569 * Request to plug a big block. 1570 * 1571 * Will not modify the state of the big block. 1572 */ 1573 static int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id) 1574 { 1575 const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); 1576 const uint64_t size = vm->bbm.bb_size; 1577 1578 return virtio_mem_send_plug_request(vm, addr, size); 1579 } 1580 1581 /* 1582 * Unplug the desired number of plugged subblocks of a offline or not-added 1583 * memory block. Will fail if any subblock cannot get unplugged (instead of 1584 * skipping it). 1585 * 1586 * Will not modify the state of the memory block. 1587 * 1588 * Note: can fail after some subblocks were unplugged. 1589 */ 1590 static int virtio_mem_sbm_unplug_any_sb_raw(struct virtio_mem *vm, 1591 unsigned long mb_id, uint64_t *nb_sb) 1592 { 1593 int sb_id, count; 1594 int rc; 1595 1596 sb_id = vm->sbm.sbs_per_mb - 1; 1597 while (*nb_sb) { 1598 /* Find the next candidate subblock */ 1599 while (sb_id >= 0 && 1600 virtio_mem_sbm_test_sb_unplugged(vm, mb_id, sb_id, 1)) 1601 sb_id--; 1602 if (sb_id < 0) 1603 break; 1604 /* Try to unplug multiple subblocks at a time */ 1605 count = 1; 1606 while (count < *nb_sb && sb_id > 0 && 1607 virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) { 1608 count++; 1609 sb_id--; 1610 } 1611 1612 rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); 1613 if (rc) 1614 return rc; 1615 *nb_sb -= count; 1616 sb_id--; 1617 } 1618 1619 return 0; 1620 } 1621 1622 /* 1623 * Unplug all plugged subblocks of an offline or not-added memory block. 1624 * 1625 * Will not modify the state of the memory block. 1626 * 1627 * Note: can fail after some subblocks were unplugged. 1628 */ 1629 static int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id) 1630 { 1631 uint64_t nb_sb = vm->sbm.sbs_per_mb; 1632 1633 return virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, &nb_sb); 1634 } 1635 1636 /* 1637 * Prepare tracking data for the next memory block. 1638 */ 1639 static int virtio_mem_sbm_prepare_next_mb(struct virtio_mem *vm, 1640 unsigned long *mb_id) 1641 { 1642 int rc; 1643 1644 if (vm->sbm.next_mb_id > vm->sbm.last_usable_mb_id) 1645 return -ENOSPC; 1646 1647 /* Resize the state array if required. */ 1648 rc = virtio_mem_sbm_mb_states_prepare_next_mb(vm); 1649 if (rc) 1650 return rc; 1651 1652 /* Resize the subblock bitmap if required. */ 1653 rc = virtio_mem_sbm_sb_states_prepare_next_mb(vm); 1654 if (rc) 1655 return rc; 1656 1657 vm->sbm.mb_count[VIRTIO_MEM_SBM_MB_UNUSED]++; 1658 *mb_id = vm->sbm.next_mb_id++; 1659 return 0; 1660 } 1661 1662 /* 1663 * Try to plug the desired number of subblocks and add the memory block 1664 * to Linux. 1665 * 1666 * Will modify the state of the memory block. 1667 */ 1668 static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm, 1669 unsigned long mb_id, uint64_t *nb_sb) 1670 { 1671 const int count = min_t(int, *nb_sb, vm->sbm.sbs_per_mb); 1672 int rc; 1673 1674 if (WARN_ON_ONCE(!count)) 1675 return -EINVAL; 1676 1677 /* 1678 * Plug the requested number of subblocks before adding it to linux, 1679 * so that onlining will directly online all plugged subblocks. 1680 */ 1681 rc = virtio_mem_sbm_plug_sb(vm, mb_id, 0, count); 1682 if (rc) 1683 return rc; 1684 1685 /* 1686 * Mark the block properly offline before adding it to Linux, 1687 * so the memory notifiers will find the block in the right state. 1688 */ 1689 if (count == vm->sbm.sbs_per_mb) 1690 virtio_mem_sbm_set_mb_state(vm, mb_id, 1691 VIRTIO_MEM_SBM_MB_OFFLINE); 1692 else 1693 virtio_mem_sbm_set_mb_state(vm, mb_id, 1694 VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); 1695 1696 /* Add the memory block to linux - if that fails, try to unplug. */ 1697 rc = virtio_mem_sbm_add_mb(vm, mb_id); 1698 if (rc) { 1699 int new_state = VIRTIO_MEM_SBM_MB_UNUSED; 1700 1701 if (virtio_mem_sbm_unplug_sb(vm, mb_id, 0, count)) 1702 new_state = VIRTIO_MEM_SBM_MB_PLUGGED; 1703 virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); 1704 return rc; 1705 } 1706 1707 *nb_sb -= count; 1708 return 0; 1709 } 1710 1711 /* 1712 * Try to plug the desired number of subblocks of a memory block that 1713 * is already added to Linux. 1714 * 1715 * Will modify the state of the memory block. 1716 * 1717 * Note: Can fail after some subblocks were successfully plugged. 1718 */ 1719 static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm, 1720 unsigned long mb_id, uint64_t *nb_sb) 1721 { 1722 const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); 1723 unsigned long pfn, nr_pages; 1724 int sb_id, count; 1725 int rc; 1726 1727 if (WARN_ON_ONCE(!*nb_sb)) 1728 return -EINVAL; 1729 1730 while (*nb_sb) { 1731 sb_id = virtio_mem_sbm_first_unplugged_sb(vm, mb_id); 1732 if (sb_id >= vm->sbm.sbs_per_mb) 1733 break; 1734 count = 1; 1735 while (count < *nb_sb && 1736 sb_id + count < vm->sbm.sbs_per_mb && 1737 !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1)) 1738 count++; 1739 1740 rc = virtio_mem_sbm_plug_sb(vm, mb_id, sb_id, count); 1741 if (rc) 1742 return rc; 1743 *nb_sb -= count; 1744 if (old_state == VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) 1745 continue; 1746 1747 /* fake-online the pages if the memory block is online */ 1748 pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + 1749 sb_id * vm->sbm.sb_size); 1750 nr_pages = PFN_DOWN(count * vm->sbm.sb_size); 1751 virtio_mem_fake_online(pfn, nr_pages); 1752 } 1753 1754 if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) 1755 virtio_mem_sbm_set_mb_state(vm, mb_id, old_state - 1); 1756 1757 return 0; 1758 } 1759 1760 static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff) 1761 { 1762 const int mb_states[] = { 1763 VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL, 1764 VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL, 1765 VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, 1766 }; 1767 uint64_t nb_sb = diff / vm->sbm.sb_size; 1768 unsigned long mb_id; 1769 int rc, i; 1770 1771 if (!nb_sb) 1772 return 0; 1773 1774 /* Don't race with onlining/offlining */ 1775 mutex_lock(&vm->hotplug_mutex); 1776 1777 for (i = 0; i < ARRAY_SIZE(mb_states); i++) { 1778 virtio_mem_sbm_for_each_mb(vm, mb_id, mb_states[i]) { 1779 rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb); 1780 if (rc || !nb_sb) 1781 goto out_unlock; 1782 cond_resched(); 1783 } 1784 } 1785 1786 /* 1787 * We won't be working on online/offline memory blocks from this point, 1788 * so we can't race with memory onlining/offlining. Drop the mutex. 1789 */ 1790 mutex_unlock(&vm->hotplug_mutex); 1791 1792 /* Try to plug and add unused blocks */ 1793 virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED) { 1794 if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) 1795 return -ENOSPC; 1796 1797 rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb); 1798 if (rc || !nb_sb) 1799 return rc; 1800 cond_resched(); 1801 } 1802 1803 /* Try to prepare, plug and add new blocks */ 1804 while (nb_sb) { 1805 if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) 1806 return -ENOSPC; 1807 1808 rc = virtio_mem_sbm_prepare_next_mb(vm, &mb_id); 1809 if (rc) 1810 return rc; 1811 rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb); 1812 if (rc) 1813 return rc; 1814 cond_resched(); 1815 } 1816 1817 return 0; 1818 out_unlock: 1819 mutex_unlock(&vm->hotplug_mutex); 1820 return rc; 1821 } 1822 1823 /* 1824 * Plug a big block and add it to Linux. 1825 * 1826 * Will modify the state of the big block. 1827 */ 1828 static int virtio_mem_bbm_plug_and_add_bb(struct virtio_mem *vm, 1829 unsigned long bb_id) 1830 { 1831 int rc; 1832 1833 if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != 1834 VIRTIO_MEM_BBM_BB_UNUSED)) 1835 return -EINVAL; 1836 1837 rc = virtio_mem_bbm_plug_bb(vm, bb_id); 1838 if (rc) 1839 return rc; 1840 virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED); 1841 1842 rc = virtio_mem_bbm_add_bb(vm, bb_id); 1843 if (rc) { 1844 if (!virtio_mem_bbm_unplug_bb(vm, bb_id)) 1845 virtio_mem_bbm_set_bb_state(vm, bb_id, 1846 VIRTIO_MEM_BBM_BB_UNUSED); 1847 else 1848 /* Retry from the main loop. */ 1849 virtio_mem_bbm_set_bb_state(vm, bb_id, 1850 VIRTIO_MEM_BBM_BB_PLUGGED); 1851 return rc; 1852 } 1853 return 0; 1854 } 1855 1856 /* 1857 * Prepare tracking data for the next big block. 1858 */ 1859 static int virtio_mem_bbm_prepare_next_bb(struct virtio_mem *vm, 1860 unsigned long *bb_id) 1861 { 1862 int rc; 1863 1864 if (vm->bbm.next_bb_id > vm->bbm.last_usable_bb_id) 1865 return -ENOSPC; 1866 1867 /* Resize the big block state array if required. */ 1868 rc = virtio_mem_bbm_bb_states_prepare_next_bb(vm); 1869 if (rc) 1870 return rc; 1871 1872 vm->bbm.bb_count[VIRTIO_MEM_BBM_BB_UNUSED]++; 1873 *bb_id = vm->bbm.next_bb_id; 1874 vm->bbm.next_bb_id++; 1875 return 0; 1876 } 1877 1878 static int virtio_mem_bbm_plug_request(struct virtio_mem *vm, uint64_t diff) 1879 { 1880 uint64_t nb_bb = diff / vm->bbm.bb_size; 1881 unsigned long bb_id; 1882 int rc; 1883 1884 if (!nb_bb) 1885 return 0; 1886 1887 /* Try to plug and add unused big blocks */ 1888 virtio_mem_bbm_for_each_bb(vm, bb_id, VIRTIO_MEM_BBM_BB_UNUSED) { 1889 if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size)) 1890 return -ENOSPC; 1891 1892 rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id); 1893 if (!rc) 1894 nb_bb--; 1895 if (rc || !nb_bb) 1896 return rc; 1897 cond_resched(); 1898 } 1899 1900 /* Try to prepare, plug and add new big blocks */ 1901 while (nb_bb) { 1902 if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size)) 1903 return -ENOSPC; 1904 1905 rc = virtio_mem_bbm_prepare_next_bb(vm, &bb_id); 1906 if (rc) 1907 return rc; 1908 rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id); 1909 if (!rc) 1910 nb_bb--; 1911 if (rc) 1912 return rc; 1913 cond_resched(); 1914 } 1915 1916 return 0; 1917 } 1918 1919 /* 1920 * Try to plug the requested amount of memory. 1921 */ 1922 static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) 1923 { 1924 if (vm->in_sbm) 1925 return virtio_mem_sbm_plug_request(vm, diff); 1926 return virtio_mem_bbm_plug_request(vm, diff); 1927 } 1928 1929 /* 1930 * Unplug the desired number of plugged subblocks of an offline memory block. 1931 * Will fail if any subblock cannot get unplugged (instead of skipping it). 1932 * 1933 * Will modify the state of the memory block. Might temporarily drop the 1934 * hotplug_mutex. 1935 * 1936 * Note: Can fail after some subblocks were successfully unplugged. 1937 */ 1938 static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm, 1939 unsigned long mb_id, 1940 uint64_t *nb_sb) 1941 { 1942 int rc; 1943 1944 rc = virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, nb_sb); 1945 1946 /* some subblocks might have been unplugged even on failure */ 1947 if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) 1948 virtio_mem_sbm_set_mb_state(vm, mb_id, 1949 VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); 1950 if (rc) 1951 return rc; 1952 1953 if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { 1954 /* 1955 * Remove the block from Linux - this should never fail. 1956 * Hinder the block from getting onlined by marking it 1957 * unplugged. Temporarily drop the mutex, so 1958 * any pending GOING_ONLINE requests can be serviced/rejected. 1959 */ 1960 virtio_mem_sbm_set_mb_state(vm, mb_id, 1961 VIRTIO_MEM_SBM_MB_UNUSED); 1962 1963 mutex_unlock(&vm->hotplug_mutex); 1964 rc = virtio_mem_sbm_remove_mb(vm, mb_id); 1965 BUG_ON(rc); 1966 mutex_lock(&vm->hotplug_mutex); 1967 } 1968 return 0; 1969 } 1970 1971 /* 1972 * Unplug the given plugged subblocks of an online memory block. 1973 * 1974 * Will modify the state of the memory block. 1975 */ 1976 static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, 1977 unsigned long mb_id, int sb_id, 1978 int count) 1979 { 1980 const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count; 1981 const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); 1982 unsigned long start_pfn; 1983 int rc; 1984 1985 start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + 1986 sb_id * vm->sbm.sb_size); 1987 1988 rc = virtio_mem_fake_offline(vm, start_pfn, nr_pages); 1989 if (rc) 1990 return rc; 1991 1992 /* Try to unplug the allocated memory */ 1993 rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); 1994 if (rc) { 1995 /* Return the memory to the buddy. */ 1996 virtio_mem_fake_online(start_pfn, nr_pages); 1997 return rc; 1998 } 1999 2000 switch (old_state) { 2001 case VIRTIO_MEM_SBM_MB_KERNEL: 2002 virtio_mem_sbm_set_mb_state(vm, mb_id, 2003 VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL); 2004 break; 2005 case VIRTIO_MEM_SBM_MB_MOVABLE: 2006 virtio_mem_sbm_set_mb_state(vm, mb_id, 2007 VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL); 2008 break; 2009 } 2010 2011 return 0; 2012 } 2013 2014 /* 2015 * Unplug the desired number of plugged subblocks of an online memory block. 2016 * Will skip subblock that are busy. 2017 * 2018 * Will modify the state of the memory block. Might temporarily drop the 2019 * hotplug_mutex. 2020 * 2021 * Note: Can fail after some subblocks were successfully unplugged. Can 2022 * return 0 even if subblocks were busy and could not get unplugged. 2023 */ 2024 static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm, 2025 unsigned long mb_id, 2026 uint64_t *nb_sb) 2027 { 2028 int rc, sb_id; 2029 2030 /* If possible, try to unplug the complete block in one shot. */ 2031 if (*nb_sb >= vm->sbm.sbs_per_mb && 2032 virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { 2033 rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, 0, 2034 vm->sbm.sbs_per_mb); 2035 if (!rc) { 2036 *nb_sb -= vm->sbm.sbs_per_mb; 2037 goto unplugged; 2038 } else if (rc != -EBUSY) 2039 return rc; 2040 } 2041 2042 /* Fallback to single subblocks. */ 2043 for (sb_id = vm->sbm.sbs_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) { 2044 /* Find the next candidate subblock */ 2045 while (sb_id >= 0 && 2046 !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) 2047 sb_id--; 2048 if (sb_id < 0) 2049 break; 2050 2051 rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id, 1); 2052 if (rc == -EBUSY) 2053 continue; 2054 else if (rc) 2055 return rc; 2056 *nb_sb -= 1; 2057 } 2058 2059 unplugged: 2060 rc = virtio_mem_sbm_try_remove_unplugged_mb(vm, mb_id); 2061 if (rc) 2062 vm->sbm.have_unplugged_mb = 1; 2063 /* Ignore errors, this is not critical. We'll retry later. */ 2064 return 0; 2065 } 2066 2067 /* 2068 * Unplug the desired number of plugged subblocks of a memory block that is 2069 * already added to Linux. Will skip subblock of online memory blocks that are 2070 * busy (by the OS). Will fail if any subblock that's not busy cannot get 2071 * unplugged. 2072 * 2073 * Will modify the state of the memory block. Might temporarily drop the 2074 * hotplug_mutex. 2075 * 2076 * Note: Can fail after some subblocks were successfully unplugged. Can 2077 * return 0 even if subblocks were busy and could not get unplugged. 2078 */ 2079 static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm, 2080 unsigned long mb_id, 2081 uint64_t *nb_sb) 2082 { 2083 const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id); 2084 2085 switch (old_state) { 2086 case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL: 2087 case VIRTIO_MEM_SBM_MB_KERNEL: 2088 case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL: 2089 case VIRTIO_MEM_SBM_MB_MOVABLE: 2090 return virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, nb_sb); 2091 case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: 2092 case VIRTIO_MEM_SBM_MB_OFFLINE: 2093 return virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, nb_sb); 2094 } 2095 return -EINVAL; 2096 } 2097 2098 static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff) 2099 { 2100 const int mb_states[] = { 2101 VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, 2102 VIRTIO_MEM_SBM_MB_OFFLINE, 2103 VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL, 2104 VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL, 2105 VIRTIO_MEM_SBM_MB_MOVABLE, 2106 VIRTIO_MEM_SBM_MB_KERNEL, 2107 }; 2108 uint64_t nb_sb = diff / vm->sbm.sb_size; 2109 unsigned long mb_id; 2110 int rc, i; 2111 2112 if (!nb_sb) 2113 return 0; 2114 2115 /* 2116 * We'll drop the mutex a couple of times when it is safe to do so. 2117 * This might result in some blocks switching the state (online/offline) 2118 * and we could miss them in this run - we will retry again later. 2119 */ 2120 mutex_lock(&vm->hotplug_mutex); 2121 2122 /* 2123 * We try unplug from partially plugged blocks first, to try removing 2124 * whole memory blocks along with metadata. We prioritize ZONE_MOVABLE 2125 * as it's more reliable to unplug memory and remove whole memory 2126 * blocks, and we don't want to trigger a zone imbalances by 2127 * accidentially removing too much kernel memory. 2128 */ 2129 for (i = 0; i < ARRAY_SIZE(mb_states); i++) { 2130 virtio_mem_sbm_for_each_mb_rev(vm, mb_id, mb_states[i]) { 2131 rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb); 2132 if (rc || !nb_sb) 2133 goto out_unlock; 2134 mutex_unlock(&vm->hotplug_mutex); 2135 cond_resched(); 2136 mutex_lock(&vm->hotplug_mutex); 2137 } 2138 if (!unplug_online && i == 1) { 2139 mutex_unlock(&vm->hotplug_mutex); 2140 return 0; 2141 } 2142 } 2143 2144 mutex_unlock(&vm->hotplug_mutex); 2145 return nb_sb ? -EBUSY : 0; 2146 out_unlock: 2147 mutex_unlock(&vm->hotplug_mutex); 2148 return rc; 2149 } 2150 2151 /* 2152 * Try to offline and remove a big block from Linux and unplug it. Will fail 2153 * with -EBUSY if some memory is busy and cannot get unplugged. 2154 * 2155 * Will modify the state of the memory block. Might temporarily drop the 2156 * hotplug_mutex. 2157 */ 2158 static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, 2159 unsigned long bb_id) 2160 { 2161 const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); 2162 const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); 2163 unsigned long end_pfn = start_pfn + nr_pages; 2164 unsigned long pfn; 2165 struct page *page; 2166 int rc; 2167 2168 if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != 2169 VIRTIO_MEM_BBM_BB_ADDED)) 2170 return -EINVAL; 2171 2172 /* 2173 * Start by fake-offlining all memory. Once we marked the device 2174 * block as fake-offline, all newly onlined memory will 2175 * automatically be kept fake-offline. Protect from concurrent 2176 * onlining/offlining until we have a consistent state. 2177 */ 2178 mutex_lock(&vm->hotplug_mutex); 2179 virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_FAKE_OFFLINE); 2180 2181 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 2182 page = pfn_to_online_page(pfn); 2183 if (!page) 2184 continue; 2185 2186 rc = virtio_mem_fake_offline(vm, pfn, PAGES_PER_SECTION); 2187 if (rc) { 2188 end_pfn = pfn; 2189 goto rollback; 2190 } 2191 } 2192 mutex_unlock(&vm->hotplug_mutex); 2193 2194 rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id); 2195 if (rc) { 2196 mutex_lock(&vm->hotplug_mutex); 2197 goto rollback; 2198 } 2199 2200 rc = virtio_mem_bbm_unplug_bb(vm, bb_id); 2201 if (rc) 2202 virtio_mem_bbm_set_bb_state(vm, bb_id, 2203 VIRTIO_MEM_BBM_BB_PLUGGED); 2204 else 2205 virtio_mem_bbm_set_bb_state(vm, bb_id, 2206 VIRTIO_MEM_BBM_BB_UNUSED); 2207 return rc; 2208 2209 rollback: 2210 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { 2211 page = pfn_to_online_page(pfn); 2212 if (!page) 2213 continue; 2214 virtio_mem_fake_online(pfn, PAGES_PER_SECTION); 2215 } 2216 virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED); 2217 mutex_unlock(&vm->hotplug_mutex); 2218 return rc; 2219 } 2220 2221 /* 2222 * Test if a big block is completely offline. 2223 */ 2224 static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm, 2225 unsigned long bb_id) 2226 { 2227 const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); 2228 const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); 2229 unsigned long pfn; 2230 2231 for (pfn = start_pfn; pfn < start_pfn + nr_pages; 2232 pfn += PAGES_PER_SECTION) { 2233 if (pfn_to_online_page(pfn)) 2234 return false; 2235 } 2236 2237 return true; 2238 } 2239 2240 /* 2241 * Test if a big block is completely onlined to ZONE_MOVABLE (or offline). 2242 */ 2243 static bool virtio_mem_bbm_bb_is_movable(struct virtio_mem *vm, 2244 unsigned long bb_id) 2245 { 2246 const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); 2247 const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); 2248 struct page *page; 2249 unsigned long pfn; 2250 2251 for (pfn = start_pfn; pfn < start_pfn + nr_pages; 2252 pfn += PAGES_PER_SECTION) { 2253 page = pfn_to_online_page(pfn); 2254 if (!page) 2255 continue; 2256 if (page_zonenum(page) != ZONE_MOVABLE) 2257 return false; 2258 } 2259 2260 return true; 2261 } 2262 2263 static int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff) 2264 { 2265 uint64_t nb_bb = diff / vm->bbm.bb_size; 2266 uint64_t bb_id; 2267 int rc, i; 2268 2269 if (!nb_bb) 2270 return 0; 2271 2272 /* 2273 * Try to unplug big blocks. Similar to SBM, start with offline 2274 * big blocks. 2275 */ 2276 for (i = 0; i < 3; i++) { 2277 virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) { 2278 cond_resched(); 2279 2280 /* 2281 * As we're holding no locks, these checks are racy, 2282 * but we don't care. 2283 */ 2284 if (i == 0 && !virtio_mem_bbm_bb_is_offline(vm, bb_id)) 2285 continue; 2286 if (i == 1 && !virtio_mem_bbm_bb_is_movable(vm, bb_id)) 2287 continue; 2288 rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id); 2289 if (rc == -EBUSY) 2290 continue; 2291 if (!rc) 2292 nb_bb--; 2293 if (rc || !nb_bb) 2294 return rc; 2295 } 2296 if (i == 0 && !unplug_online) 2297 return 0; 2298 } 2299 2300 return nb_bb ? -EBUSY : 0; 2301 } 2302 2303 /* 2304 * Try to unplug the requested amount of memory. 2305 */ 2306 static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) 2307 { 2308 if (vm->in_sbm) 2309 return virtio_mem_sbm_unplug_request(vm, diff); 2310 return virtio_mem_bbm_unplug_request(vm, diff); 2311 } 2312 2313 /* 2314 * Try to unplug all blocks that couldn't be unplugged before, for example, 2315 * because the hypervisor was busy. Further, offline and remove any memory 2316 * blocks where we previously failed. 2317 */ 2318 static int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm) 2319 { 2320 unsigned long id; 2321 int rc = 0; 2322 2323 if (!vm->in_sbm) { 2324 virtio_mem_bbm_for_each_bb(vm, id, 2325 VIRTIO_MEM_BBM_BB_PLUGGED) { 2326 rc = virtio_mem_bbm_unplug_bb(vm, id); 2327 if (rc) 2328 return rc; 2329 virtio_mem_bbm_set_bb_state(vm, id, 2330 VIRTIO_MEM_BBM_BB_UNUSED); 2331 } 2332 return 0; 2333 } 2334 2335 virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_PLUGGED) { 2336 rc = virtio_mem_sbm_unplug_mb(vm, id); 2337 if (rc) 2338 return rc; 2339 virtio_mem_sbm_set_mb_state(vm, id, 2340 VIRTIO_MEM_SBM_MB_UNUSED); 2341 } 2342 2343 if (!vm->sbm.have_unplugged_mb) 2344 return 0; 2345 2346 /* 2347 * Let's retry (offlining and) removing completely unplugged Linux 2348 * memory blocks. 2349 */ 2350 vm->sbm.have_unplugged_mb = false; 2351 2352 mutex_lock(&vm->hotplug_mutex); 2353 virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL) 2354 rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); 2355 virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL) 2356 rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); 2357 virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) 2358 rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id); 2359 mutex_unlock(&vm->hotplug_mutex); 2360 2361 if (rc) 2362 vm->sbm.have_unplugged_mb = true; 2363 /* Ignore errors, this is not critical. We'll retry later. */ 2364 return 0; 2365 } 2366 2367 /* 2368 * Update all parts of the config that could have changed. 2369 */ 2370 static void virtio_mem_refresh_config(struct virtio_mem *vm) 2371 { 2372 const struct range pluggable_range = mhp_get_pluggable_range(true); 2373 uint64_t new_plugged_size, end_addr; 2374 2375 /* the plugged_size is just a reflection of what _we_ did previously */ 2376 virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, 2377 &new_plugged_size); 2378 if (WARN_ON_ONCE(new_plugged_size != vm->plugged_size)) 2379 vm->plugged_size = new_plugged_size; 2380 2381 /* calculate the last usable memory block id */ 2382 virtio_cread_le(vm->vdev, struct virtio_mem_config, 2383 usable_region_size, &vm->usable_region_size); 2384 end_addr = min(vm->addr + vm->usable_region_size - 1, 2385 pluggable_range.end); 2386 2387 if (vm->in_sbm) { 2388 vm->sbm.last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr); 2389 if (!IS_ALIGNED(end_addr + 1, memory_block_size_bytes())) 2390 vm->sbm.last_usable_mb_id--; 2391 } else { 2392 vm->bbm.last_usable_bb_id = virtio_mem_phys_to_bb_id(vm, 2393 end_addr); 2394 if (!IS_ALIGNED(end_addr + 1, vm->bbm.bb_size)) 2395 vm->bbm.last_usable_bb_id--; 2396 } 2397 /* 2398 * If we cannot plug any of our device memory (e.g., nothing in the 2399 * usable region is addressable), the last usable memory block id will 2400 * be smaller than the first usable memory block id. We'll stop 2401 * attempting to add memory with -ENOSPC from our main loop. 2402 */ 2403 2404 /* see if there is a request to change the size */ 2405 virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size, 2406 &vm->requested_size); 2407 2408 dev_info(&vm->vdev->dev, "plugged size: 0x%llx", vm->plugged_size); 2409 dev_info(&vm->vdev->dev, "requested size: 0x%llx", vm->requested_size); 2410 } 2411 2412 /* 2413 * Workqueue function for handling plug/unplug requests and config updates. 2414 */ 2415 static void virtio_mem_run_wq(struct work_struct *work) 2416 { 2417 struct virtio_mem *vm = container_of(work, struct virtio_mem, wq); 2418 uint64_t diff; 2419 int rc; 2420 2421 if (unlikely(vm->in_kdump)) { 2422 dev_warn_once(&vm->vdev->dev, 2423 "unexpected workqueue run in kdump kernel\n"); 2424 return; 2425 } 2426 2427 hrtimer_cancel(&vm->retry_timer); 2428 2429 if (vm->broken) 2430 return; 2431 2432 atomic_set(&vm->wq_active, 1); 2433 retry: 2434 rc = 0; 2435 2436 /* Make sure we start with a clean state if there are leftovers. */ 2437 if (unlikely(vm->unplug_all_required)) 2438 rc = virtio_mem_send_unplug_all_request(vm); 2439 2440 if (atomic_read(&vm->config_changed)) { 2441 atomic_set(&vm->config_changed, 0); 2442 virtio_mem_refresh_config(vm); 2443 } 2444 2445 /* Cleanup any leftovers from previous runs */ 2446 if (!rc) 2447 rc = virtio_mem_cleanup_pending_mb(vm); 2448 2449 if (!rc && vm->requested_size != vm->plugged_size) { 2450 if (vm->requested_size > vm->plugged_size) { 2451 diff = vm->requested_size - vm->plugged_size; 2452 rc = virtio_mem_plug_request(vm, diff); 2453 } else { 2454 diff = vm->plugged_size - vm->requested_size; 2455 rc = virtio_mem_unplug_request(vm, diff); 2456 } 2457 } 2458 2459 /* 2460 * Keep retrying to offline and remove completely unplugged Linux 2461 * memory blocks. 2462 */ 2463 if (!rc && vm->in_sbm && vm->sbm.have_unplugged_mb) 2464 rc = -EBUSY; 2465 2466 switch (rc) { 2467 case 0: 2468 vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS; 2469 break; 2470 case -ENOSPC: 2471 /* 2472 * We cannot add any more memory (alignment, physical limit) 2473 * or we have too many offline memory blocks. 2474 */ 2475 break; 2476 case -ETXTBSY: 2477 /* 2478 * The hypervisor cannot process our request right now 2479 * (e.g., out of memory, migrating); 2480 */ 2481 case -EBUSY: 2482 /* 2483 * We cannot free up any memory to unplug it (all plugged memory 2484 * is busy). 2485 */ 2486 case -ENOMEM: 2487 /* Out of memory, try again later. */ 2488 hrtimer_start(&vm->retry_timer, ms_to_ktime(vm->retry_timer_ms), 2489 HRTIMER_MODE_REL); 2490 break; 2491 case -EAGAIN: 2492 /* Retry immediately (e.g., the config changed). */ 2493 goto retry; 2494 default: 2495 /* Unknown error, mark as broken */ 2496 dev_err(&vm->vdev->dev, 2497 "unknown error, marking device broken: %d\n", rc); 2498 vm->broken = true; 2499 } 2500 2501 atomic_set(&vm->wq_active, 0); 2502 } 2503 2504 static enum hrtimer_restart virtio_mem_timer_expired(struct hrtimer *timer) 2505 { 2506 struct virtio_mem *vm = container_of(timer, struct virtio_mem, 2507 retry_timer); 2508 2509 virtio_mem_retry(vm); 2510 vm->retry_timer_ms = min_t(unsigned int, vm->retry_timer_ms * 2, 2511 VIRTIO_MEM_RETRY_TIMER_MAX_MS); 2512 return HRTIMER_NORESTART; 2513 } 2514 2515 static void virtio_mem_handle_response(struct virtqueue *vq) 2516 { 2517 struct virtio_mem *vm = vq->vdev->priv; 2518 2519 wake_up(&vm->host_resp); 2520 } 2521 2522 static int virtio_mem_init_vq(struct virtio_mem *vm) 2523 { 2524 struct virtqueue *vq; 2525 2526 vq = virtio_find_single_vq(vm->vdev, virtio_mem_handle_response, 2527 "guest-request"); 2528 if (IS_ERR(vq)) 2529 return PTR_ERR(vq); 2530 vm->vq = vq; 2531 2532 return 0; 2533 } 2534 2535 static int virtio_mem_init_hotplug(struct virtio_mem *vm) 2536 { 2537 const struct range pluggable_range = mhp_get_pluggable_range(true); 2538 uint64_t unit_pages, sb_size, addr; 2539 int rc; 2540 2541 /* bad device setup - warn only */ 2542 if (!IS_ALIGNED(vm->addr, memory_block_size_bytes())) 2543 dev_warn(&vm->vdev->dev, 2544 "The alignment of the physical start address can make some memory unusable.\n"); 2545 if (!IS_ALIGNED(vm->addr + vm->region_size, memory_block_size_bytes())) 2546 dev_warn(&vm->vdev->dev, 2547 "The alignment of the physical end address can make some memory unusable.\n"); 2548 if (vm->addr < pluggable_range.start || 2549 vm->addr + vm->region_size - 1 > pluggable_range.end) 2550 dev_warn(&vm->vdev->dev, 2551 "Some device memory is not addressable/pluggable. This can make some memory unusable.\n"); 2552 2553 /* Prepare the offline threshold - make sure we can add two blocks. */ 2554 vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(), 2555 VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD); 2556 2557 /* 2558 * alloc_contig_range() works reliably with pageblock 2559 * granularity on ZONE_NORMAL, use pageblock_nr_pages. 2560 */ 2561 sb_size = PAGE_SIZE * pageblock_nr_pages; 2562 sb_size = max_t(uint64_t, vm->device_block_size, sb_size); 2563 2564 if (sb_size < memory_block_size_bytes() && !force_bbm) { 2565 /* SBM: At least two subblocks per Linux memory block. */ 2566 vm->in_sbm = true; 2567 vm->sbm.sb_size = sb_size; 2568 vm->sbm.sbs_per_mb = memory_block_size_bytes() / 2569 vm->sbm.sb_size; 2570 2571 /* Round up to the next full memory block */ 2572 addr = max_t(uint64_t, vm->addr, pluggable_range.start) + 2573 memory_block_size_bytes() - 1; 2574 vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr); 2575 vm->sbm.next_mb_id = vm->sbm.first_mb_id; 2576 } else { 2577 /* BBM: At least one Linux memory block. */ 2578 vm->bbm.bb_size = max_t(uint64_t, vm->device_block_size, 2579 memory_block_size_bytes()); 2580 2581 if (bbm_block_size) { 2582 if (!is_power_of_2(bbm_block_size)) { 2583 dev_warn(&vm->vdev->dev, 2584 "bbm_block_size is not a power of 2"); 2585 } else if (bbm_block_size < vm->bbm.bb_size) { 2586 dev_warn(&vm->vdev->dev, 2587 "bbm_block_size is too small"); 2588 } else { 2589 vm->bbm.bb_size = bbm_block_size; 2590 } 2591 } 2592 2593 /* Round up to the next aligned big block */ 2594 addr = max_t(uint64_t, vm->addr, pluggable_range.start) + 2595 vm->bbm.bb_size - 1; 2596 vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr); 2597 vm->bbm.next_bb_id = vm->bbm.first_bb_id; 2598 2599 /* Make sure we can add two big blocks. */ 2600 vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size, 2601 vm->offline_threshold); 2602 } 2603 2604 dev_info(&vm->vdev->dev, "memory block size: 0x%lx", 2605 memory_block_size_bytes()); 2606 if (vm->in_sbm) 2607 dev_info(&vm->vdev->dev, "subblock size: 0x%llx", 2608 (unsigned long long)vm->sbm.sb_size); 2609 else 2610 dev_info(&vm->vdev->dev, "big block size: 0x%llx", 2611 (unsigned long long)vm->bbm.bb_size); 2612 2613 /* create the parent resource for all memory */ 2614 rc = virtio_mem_create_resource(vm); 2615 if (rc) 2616 return rc; 2617 2618 /* use a single dynamic memory group to cover the whole memory device */ 2619 if (vm->in_sbm) 2620 unit_pages = PHYS_PFN(memory_block_size_bytes()); 2621 else 2622 unit_pages = PHYS_PFN(vm->bbm.bb_size); 2623 rc = memory_group_register_dynamic(vm->nid, unit_pages); 2624 if (rc < 0) 2625 goto out_del_resource; 2626 vm->mgid = rc; 2627 2628 /* 2629 * If we still have memory plugged, we have to unplug all memory first. 2630 * Registering our parent resource makes sure that this memory isn't 2631 * actually in use (e.g., trying to reload the driver). 2632 */ 2633 if (vm->plugged_size) { 2634 vm->unplug_all_required = true; 2635 dev_info(&vm->vdev->dev, "unplugging all memory is required\n"); 2636 } 2637 2638 /* register callbacks */ 2639 vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb; 2640 rc = register_memory_notifier(&vm->memory_notifier); 2641 if (rc) 2642 goto out_unreg_group; 2643 /* Block hibernation as early as possible. */ 2644 vm->pm_notifier.priority = INT_MAX; 2645 vm->pm_notifier.notifier_call = virtio_mem_pm_notifier_cb; 2646 rc = register_pm_notifier(&vm->pm_notifier); 2647 if (rc) 2648 goto out_unreg_mem; 2649 rc = register_virtio_mem_device(vm); 2650 if (rc) 2651 goto out_unreg_pm; 2652 2653 virtio_device_ready(vm->vdev); 2654 return 0; 2655 out_unreg_pm: 2656 unregister_pm_notifier(&vm->pm_notifier); 2657 out_unreg_mem: 2658 unregister_memory_notifier(&vm->memory_notifier); 2659 out_unreg_group: 2660 memory_group_unregister(vm->mgid); 2661 out_del_resource: 2662 virtio_mem_delete_resource(vm); 2663 return rc; 2664 } 2665 2666 #ifdef CONFIG_PROC_VMCORE 2667 static int virtio_mem_send_state_request(struct virtio_mem *vm, uint64_t addr, 2668 uint64_t size) 2669 { 2670 const uint64_t nb_vm_blocks = size / vm->device_block_size; 2671 const struct virtio_mem_req req = { 2672 .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_STATE), 2673 .u.state.addr = cpu_to_virtio64(vm->vdev, addr), 2674 .u.state.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), 2675 }; 2676 int rc = -ENOMEM; 2677 2678 dev_dbg(&vm->vdev->dev, "requesting state: 0x%llx - 0x%llx\n", addr, 2679 addr + size - 1); 2680 2681 switch (virtio_mem_send_request(vm, &req)) { 2682 case VIRTIO_MEM_RESP_ACK: 2683 return virtio16_to_cpu(vm->vdev, vm->resp.u.state.state); 2684 case VIRTIO_MEM_RESP_ERROR: 2685 rc = -EINVAL; 2686 break; 2687 default: 2688 break; 2689 } 2690 2691 dev_dbg(&vm->vdev->dev, "requesting state failed: %d\n", rc); 2692 return rc; 2693 } 2694 2695 static bool virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb *cb, 2696 unsigned long pfn) 2697 { 2698 struct virtio_mem *vm = container_of(cb, struct virtio_mem, 2699 vmcore_cb); 2700 uint64_t addr = PFN_PHYS(pfn); 2701 bool is_ram; 2702 int rc; 2703 2704 if (!virtio_mem_contains_range(vm, addr, PAGE_SIZE)) 2705 return true; 2706 if (!vm->plugged_size) 2707 return false; 2708 2709 /* 2710 * We have to serialize device requests and access to the information 2711 * about the block queried last. 2712 */ 2713 mutex_lock(&vm->hotplug_mutex); 2714 2715 addr = ALIGN_DOWN(addr, vm->device_block_size); 2716 if (addr != vm->last_block_addr) { 2717 rc = virtio_mem_send_state_request(vm, addr, 2718 vm->device_block_size); 2719 /* On any kind of error, we're going to signal !ram. */ 2720 if (rc == VIRTIO_MEM_STATE_PLUGGED) 2721 vm->last_block_plugged = true; 2722 else 2723 vm->last_block_plugged = false; 2724 vm->last_block_addr = addr; 2725 } 2726 2727 is_ram = vm->last_block_plugged; 2728 mutex_unlock(&vm->hotplug_mutex); 2729 return is_ram; 2730 } 2731 2732 #ifdef CONFIG_PROC_VMCORE_DEVICE_RAM 2733 static int virtio_mem_vmcore_add_device_ram(struct virtio_mem *vm, 2734 struct list_head *list, uint64_t start, uint64_t end) 2735 { 2736 int rc; 2737 2738 rc = vmcore_alloc_add_range(list, start, end - start); 2739 if (rc) 2740 dev_err(&vm->vdev->dev, 2741 "Error adding device RAM range: %d\n", rc); 2742 return rc; 2743 } 2744 2745 static int virtio_mem_vmcore_get_device_ram(struct vmcore_cb *cb, 2746 struct list_head *list) 2747 { 2748 struct virtio_mem *vm = container_of(cb, struct virtio_mem, 2749 vmcore_cb); 2750 const uint64_t device_start = vm->addr; 2751 const uint64_t device_end = vm->addr + vm->usable_region_size; 2752 uint64_t chunk_size, cur_start, cur_end, plugged_range_start = 0; 2753 LIST_HEAD(tmp_list); 2754 int rc; 2755 2756 if (!vm->plugged_size) 2757 return 0; 2758 2759 /* Process memory sections, unless the device block size is bigger. */ 2760 chunk_size = max_t(uint64_t, PFN_PHYS(PAGES_PER_SECTION), 2761 vm->device_block_size); 2762 2763 mutex_lock(&vm->hotplug_mutex); 2764 2765 /* 2766 * We process larger chunks and indicate the complete chunk if any 2767 * block in there is plugged. This reduces the number of pfn_is_ram() 2768 * callbacks and mimic what is effectively being done when the old 2769 * kernel would add complete memory sections/blocks to the elfcore hdr. 2770 */ 2771 cur_start = device_start; 2772 for (cur_start = device_start; cur_start < device_end; cur_start = cur_end) { 2773 cur_end = ALIGN_DOWN(cur_start + chunk_size, chunk_size); 2774 cur_end = min_t(uint64_t, cur_end, device_end); 2775 2776 rc = virtio_mem_send_state_request(vm, cur_start, 2777 cur_end - cur_start); 2778 2779 if (rc < 0) { 2780 dev_err(&vm->vdev->dev, 2781 "Error querying block states: %d\n", rc); 2782 goto out; 2783 } else if (rc != VIRTIO_MEM_STATE_UNPLUGGED) { 2784 /* Merge ranges with plugged memory. */ 2785 if (!plugged_range_start) 2786 plugged_range_start = cur_start; 2787 continue; 2788 } 2789 2790 /* Flush any plugged range. */ 2791 if (plugged_range_start) { 2792 rc = virtio_mem_vmcore_add_device_ram(vm, &tmp_list, 2793 plugged_range_start, 2794 cur_start); 2795 if (rc) 2796 goto out; 2797 plugged_range_start = 0; 2798 } 2799 } 2800 2801 /* Flush any plugged range. */ 2802 if (plugged_range_start) 2803 rc = virtio_mem_vmcore_add_device_ram(vm, &tmp_list, 2804 plugged_range_start, 2805 cur_start); 2806 out: 2807 mutex_unlock(&vm->hotplug_mutex); 2808 if (rc < 0) { 2809 vmcore_free_ranges(&tmp_list); 2810 return rc; 2811 } 2812 list_splice_tail(&tmp_list, list); 2813 return 0; 2814 } 2815 #endif /* CONFIG_PROC_VMCORE_DEVICE_RAM */ 2816 #endif /* CONFIG_PROC_VMCORE */ 2817 2818 static int virtio_mem_init_kdump(struct virtio_mem *vm) 2819 { 2820 /* We must be prepared to receive a callback immediately. */ 2821 virtio_device_ready(vm->vdev); 2822 #ifdef CONFIG_PROC_VMCORE 2823 dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n"); 2824 vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram; 2825 #ifdef CONFIG_PROC_VMCORE_DEVICE_RAM 2826 vm->vmcore_cb.get_device_ram = virtio_mem_vmcore_get_device_ram; 2827 #endif /* CONFIG_PROC_VMCORE_DEVICE_RAM */ 2828 register_vmcore_cb(&vm->vmcore_cb); 2829 return 0; 2830 #else /* CONFIG_PROC_VMCORE */ 2831 dev_warn(&vm->vdev->dev, "disabled in kdump kernel without vmcore\n"); 2832 return -EBUSY; 2833 #endif /* CONFIG_PROC_VMCORE */ 2834 } 2835 2836 static int virtio_mem_init(struct virtio_mem *vm) 2837 { 2838 uint16_t node_id; 2839 2840 if (!vm->vdev->config->get) { 2841 dev_err(&vm->vdev->dev, "config access disabled\n"); 2842 return -EINVAL; 2843 } 2844 2845 /* Fetch all properties that can't change. */ 2846 virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, 2847 &vm->plugged_size); 2848 virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size, 2849 &vm->device_block_size); 2850 virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id, 2851 &node_id); 2852 vm->nid = virtio_mem_translate_node_id(vm, node_id); 2853 virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr); 2854 virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, 2855 &vm->region_size); 2856 virtio_cread_le(vm->vdev, struct virtio_mem_config, usable_region_size, 2857 &vm->usable_region_size); 2858 2859 /* Determine the nid for the device based on the lowest address. */ 2860 if (vm->nid == NUMA_NO_NODE) 2861 vm->nid = memory_add_physaddr_to_nid(vm->addr); 2862 2863 dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); 2864 dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); 2865 dev_info(&vm->vdev->dev, "device block size: 0x%llx", 2866 (unsigned long long)vm->device_block_size); 2867 if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) 2868 dev_info(&vm->vdev->dev, "nid: %d", vm->nid); 2869 2870 /* 2871 * We don't want to (un)plug or reuse any memory when in kdump. The 2872 * memory is still accessible (but not exposed to Linux). 2873 */ 2874 if (vm->in_kdump) 2875 return virtio_mem_init_kdump(vm); 2876 return virtio_mem_init_hotplug(vm); 2877 } 2878 2879 static int virtio_mem_create_resource(struct virtio_mem *vm) 2880 { 2881 /* 2882 * When force-unloading the driver and removing the device, we 2883 * could have a garbage pointer. Duplicate the string. 2884 */ 2885 const char *name = kstrdup(dev_name(&vm->vdev->dev), GFP_KERNEL); 2886 2887 if (!name) 2888 return -ENOMEM; 2889 2890 /* Disallow mapping device memory via /dev/mem completely. */ 2891 vm->parent_resource = __request_mem_region(vm->addr, vm->region_size, 2892 name, IORESOURCE_SYSTEM_RAM | 2893 IORESOURCE_EXCLUSIVE); 2894 if (!vm->parent_resource) { 2895 kfree(name); 2896 dev_warn(&vm->vdev->dev, "could not reserve device region\n"); 2897 dev_info(&vm->vdev->dev, 2898 "reloading the driver is not supported\n"); 2899 return -EBUSY; 2900 } 2901 2902 /* The memory is not actually busy - make add_memory() work. */ 2903 vm->parent_resource->flags &= ~IORESOURCE_BUSY; 2904 return 0; 2905 } 2906 2907 static void virtio_mem_delete_resource(struct virtio_mem *vm) 2908 { 2909 const char *name; 2910 2911 if (!vm->parent_resource) 2912 return; 2913 2914 name = vm->parent_resource->name; 2915 release_resource(vm->parent_resource); 2916 kfree(vm->parent_resource); 2917 kfree(name); 2918 vm->parent_resource = NULL; 2919 } 2920 2921 static int virtio_mem_range_has_system_ram(struct resource *res, void *arg) 2922 { 2923 return 1; 2924 } 2925 2926 static bool virtio_mem_has_memory_added(struct virtio_mem *vm) 2927 { 2928 const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; 2929 2930 return walk_iomem_res_desc(IORES_DESC_NONE, flags, vm->addr, 2931 vm->addr + vm->region_size, NULL, 2932 virtio_mem_range_has_system_ram) == 1; 2933 } 2934 2935 static int virtio_mem_probe(struct virtio_device *vdev) 2936 { 2937 struct virtio_mem *vm; 2938 int rc; 2939 2940 BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24); 2941 BUILD_BUG_ON(sizeof(struct virtio_mem_resp) != 10); 2942 2943 vdev->priv = vm = kzalloc(sizeof(*vm), GFP_KERNEL); 2944 if (!vm) 2945 return -ENOMEM; 2946 2947 init_waitqueue_head(&vm->host_resp); 2948 vm->vdev = vdev; 2949 INIT_WORK(&vm->wq, virtio_mem_run_wq); 2950 mutex_init(&vm->hotplug_mutex); 2951 INIT_LIST_HEAD(&vm->next); 2952 spin_lock_init(&vm->removal_lock); 2953 hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2954 vm->retry_timer.function = virtio_mem_timer_expired; 2955 vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS; 2956 vm->in_kdump = is_kdump_kernel(); 2957 2958 /* register the virtqueue */ 2959 rc = virtio_mem_init_vq(vm); 2960 if (rc) 2961 goto out_free_vm; 2962 2963 /* initialize the device by querying the config */ 2964 rc = virtio_mem_init(vm); 2965 if (rc) 2966 goto out_del_vq; 2967 2968 /* trigger a config update to start processing the requested_size */ 2969 if (!vm->in_kdump) { 2970 atomic_set(&vm->config_changed, 1); 2971 queue_work(system_freezable_wq, &vm->wq); 2972 } 2973 2974 return 0; 2975 out_del_vq: 2976 vdev->config->del_vqs(vdev); 2977 out_free_vm: 2978 kfree(vm); 2979 vdev->priv = NULL; 2980 2981 return rc; 2982 } 2983 2984 static void virtio_mem_deinit_hotplug(struct virtio_mem *vm) 2985 { 2986 unsigned long mb_id; 2987 int rc; 2988 2989 /* 2990 * Make sure the workqueue won't be triggered anymore and no memory 2991 * blocks can be onlined/offlined until we're finished here. 2992 */ 2993 mutex_lock(&vm->hotplug_mutex); 2994 spin_lock_irq(&vm->removal_lock); 2995 vm->removing = true; 2996 spin_unlock_irq(&vm->removal_lock); 2997 mutex_unlock(&vm->hotplug_mutex); 2998 2999 /* wait until the workqueue stopped */ 3000 cancel_work_sync(&vm->wq); 3001 hrtimer_cancel(&vm->retry_timer); 3002 3003 if (vm->in_sbm) { 3004 /* 3005 * After we unregistered our callbacks, user space can online 3006 * partially plugged offline blocks. Make sure to remove them. 3007 */ 3008 virtio_mem_sbm_for_each_mb(vm, mb_id, 3009 VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { 3010 rc = virtio_mem_sbm_remove_mb(vm, mb_id); 3011 BUG_ON(rc); 3012 virtio_mem_sbm_set_mb_state(vm, mb_id, 3013 VIRTIO_MEM_SBM_MB_UNUSED); 3014 } 3015 /* 3016 * After we unregistered our callbacks, user space can no longer 3017 * offline partially plugged online memory blocks. No need to 3018 * worry about them. 3019 */ 3020 } 3021 3022 /* unregister callbacks */ 3023 unregister_virtio_mem_device(vm); 3024 unregister_pm_notifier(&vm->pm_notifier); 3025 unregister_memory_notifier(&vm->memory_notifier); 3026 3027 /* 3028 * There is no way we could reliably remove all memory we have added to 3029 * the system. And there is no way to stop the driver/device from going 3030 * away. Warn at least. 3031 */ 3032 if (virtio_mem_has_memory_added(vm)) { 3033 dev_warn(&vm->vdev->dev, 3034 "device still has system memory added\n"); 3035 } else { 3036 virtio_mem_delete_resource(vm); 3037 kfree_const(vm->resource_name); 3038 memory_group_unregister(vm->mgid); 3039 } 3040 3041 /* remove all tracking data - no locking needed */ 3042 if (vm->in_sbm) { 3043 vfree(vm->sbm.mb_states); 3044 vfree(vm->sbm.sb_states); 3045 } else { 3046 vfree(vm->bbm.bb_states); 3047 } 3048 } 3049 3050 static void virtio_mem_deinit_kdump(struct virtio_mem *vm) 3051 { 3052 #ifdef CONFIG_PROC_VMCORE 3053 unregister_vmcore_cb(&vm->vmcore_cb); 3054 #endif /* CONFIG_PROC_VMCORE */ 3055 } 3056 3057 static void virtio_mem_remove(struct virtio_device *vdev) 3058 { 3059 struct virtio_mem *vm = vdev->priv; 3060 3061 if (vm->in_kdump) 3062 virtio_mem_deinit_kdump(vm); 3063 else 3064 virtio_mem_deinit_hotplug(vm); 3065 3066 /* reset the device and cleanup the queues */ 3067 virtio_reset_device(vdev); 3068 vdev->config->del_vqs(vdev); 3069 3070 kfree(vm); 3071 vdev->priv = NULL; 3072 } 3073 3074 static void virtio_mem_config_changed(struct virtio_device *vdev) 3075 { 3076 struct virtio_mem *vm = vdev->priv; 3077 3078 if (unlikely(vm->in_kdump)) 3079 return; 3080 3081 atomic_set(&vm->config_changed, 1); 3082 virtio_mem_retry(vm); 3083 } 3084 3085 #ifdef CONFIG_PM_SLEEP 3086 static int virtio_mem_freeze(struct virtio_device *vdev) 3087 { 3088 struct virtio_mem *vm = vdev->priv; 3089 3090 /* 3091 * We block hibernation using the PM notifier completely. The workqueue 3092 * is already frozen by the PM core at this point, so we simply 3093 * reset the device and cleanup the queues. 3094 */ 3095 if (pm_suspend_target_state != PM_SUSPEND_TO_IDLE && 3096 vm->plugged_size && 3097 !virtio_has_feature(vm->vdev, VIRTIO_MEM_F_PERSISTENT_SUSPEND)) { 3098 dev_err(&vm->vdev->dev, 3099 "suspending with plugged memory is not supported\n"); 3100 return -EPERM; 3101 } 3102 3103 virtio_reset_device(vdev); 3104 vdev->config->del_vqs(vdev); 3105 vm->vq = NULL; 3106 return 0; 3107 } 3108 3109 static int virtio_mem_restore(struct virtio_device *vdev) 3110 { 3111 struct virtio_mem *vm = vdev->priv; 3112 int ret; 3113 3114 ret = virtio_mem_init_vq(vm); 3115 if (ret) 3116 return ret; 3117 virtio_device_ready(vdev); 3118 3119 /* Let's check if anything changed. */ 3120 virtio_mem_config_changed(vdev); 3121 return 0; 3122 } 3123 #endif 3124 3125 static unsigned int virtio_mem_features[] = { 3126 #if defined(CONFIG_NUMA) && defined(CONFIG_ACPI_NUMA) 3127 VIRTIO_MEM_F_ACPI_PXM, 3128 #endif 3129 VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, 3130 VIRTIO_MEM_F_PERSISTENT_SUSPEND, 3131 }; 3132 3133 static const struct virtio_device_id virtio_mem_id_table[] = { 3134 { VIRTIO_ID_MEM, VIRTIO_DEV_ANY_ID }, 3135 { 0 }, 3136 }; 3137 3138 static struct virtio_driver virtio_mem_driver = { 3139 .feature_table = virtio_mem_features, 3140 .feature_table_size = ARRAY_SIZE(virtio_mem_features), 3141 .driver.name = KBUILD_MODNAME, 3142 .id_table = virtio_mem_id_table, 3143 .probe = virtio_mem_probe, 3144 .remove = virtio_mem_remove, 3145 .config_changed = virtio_mem_config_changed, 3146 #ifdef CONFIG_PM_SLEEP 3147 .freeze = virtio_mem_freeze, 3148 .restore = virtio_mem_restore, 3149 #endif 3150 }; 3151 3152 module_virtio_driver(virtio_mem_driver); 3153 MODULE_DEVICE_TABLE(virtio, virtio_mem_id_table); 3154 MODULE_AUTHOR("David Hildenbrand <david@redhat.com>"); 3155 MODULE_DESCRIPTION("Virtio-mem driver"); 3156 MODULE_LICENSE("GPL"); 3157