1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Collaborative memory management interface. 4 * 5 * Copyright (C) 2008 IBM Corporation 6 * Author(s): Brian King (brking@linux.vnet.ibm.com), 7 */ 8 9 #include <linux/ctype.h> 10 #include <linux/delay.h> 11 #include <linux/errno.h> 12 #include <linux/fs.h> 13 #include <linux/gfp.h> 14 #include <linux/kthread.h> 15 #include <linux/module.h> 16 #include <linux/oom.h> 17 #include <linux/reboot.h> 18 #include <linux/sched.h> 19 #include <linux/stringify.h> 20 #include <linux/swap.h> 21 #include <linux/device.h> 22 #include <linux/balloon_compaction.h> 23 #include <asm/firmware.h> 24 #include <asm/hvcall.h> 25 #include <asm/mmu.h> 26 #include <linux/uaccess.h> 27 #include <linux/memory.h> 28 #include <asm/plpar_wrappers.h> 29 30 #include "pseries.h" 31 32 #define CMM_DRIVER_VERSION "1.0.0" 33 #define CMM_DEFAULT_DELAY 1 34 #define CMM_HOTPLUG_DELAY 5 35 #define CMM_DEBUG 0 36 #define CMM_DISABLE 0 37 #define CMM_OOM_KB 1024 38 #define CMM_MIN_MEM_MB 256 39 #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) 40 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) 41 42 #define CMM_MEM_HOTPLUG_PRI 1 43 44 static unsigned int delay = CMM_DEFAULT_DELAY; 45 static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY; 46 static unsigned int oom_kb = CMM_OOM_KB; 47 static unsigned int cmm_debug = CMM_DEBUG; 48 static unsigned int cmm_disabled = CMM_DISABLE; 49 static unsigned long min_mem_mb = CMM_MIN_MEM_MB; 50 static bool __read_mostly simulate; 51 static unsigned long simulate_loan_target_kb; 52 static struct device cmm_dev; 53 54 MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>"); 55 MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager"); 56 MODULE_LICENSE("GPL"); 57 MODULE_VERSION(CMM_DRIVER_VERSION); 58 59 module_param_named(delay, delay, uint, 0644); 60 MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " 61 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); 62 module_param_named(hotplug_delay, hotplug_delay, uint, 0644); 63 MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove " 64 "before loaning resumes. " 65 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]"); 66 module_param_named(oom_kb, oom_kb, uint, 0644); 67 MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " 68 "[Default=" __stringify(CMM_OOM_KB) "]"); 69 module_param_named(min_mem_mb, min_mem_mb, ulong, 0644); 70 MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. " 71 "[Default=" __stringify(CMM_MIN_MEM_MB) "]"); 72 module_param_named(debug, cmm_debug, uint, 0644); 73 MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " 74 "[Default=" __stringify(CMM_DEBUG) "]"); 75 module_param_named(simulate, simulate, bool, 0444); 76 MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw)."); 77 78 #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); } 79 80 static atomic_long_t loaned_pages; 81 static unsigned long loaned_pages_target; 82 static unsigned long oom_freed_pages; 83 84 static DEFINE_MUTEX(hotplug_mutex); 85 static int hotplug_occurred; /* protected by the hotplug mutex */ 86 87 static struct task_struct *cmm_thread_ptr; 88 static struct balloon_dev_info b_dev_info; 89 90 static long plpar_page_set_loaned(struct page *page) 91 { 92 const unsigned long vpa = page_to_phys(page); 93 unsigned long cmo_page_sz = cmo_get_page_size(); 94 long rc = 0; 95 int i; 96 97 if (unlikely(simulate)) 98 return 0; 99 100 for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) 101 rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); 102 103 for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) 104 plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, 105 vpa + i - cmo_page_sz, 0); 106 107 return rc; 108 } 109 110 static long plpar_page_set_active(struct page *page) 111 { 112 const unsigned long vpa = page_to_phys(page); 113 unsigned long cmo_page_sz = cmo_get_page_size(); 114 long rc = 0; 115 int i; 116 117 if (unlikely(simulate)) 118 return 0; 119 120 for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) 121 rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); 122 123 for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) 124 plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, 125 vpa + i - cmo_page_sz, 0); 126 127 return rc; 128 } 129 130 /** 131 * cmm_alloc_pages - Allocate pages and mark them as loaned 132 * @nr: number of pages to allocate 133 * 134 * Return value: 135 * number of pages requested to be allocated which were not 136 **/ 137 static long cmm_alloc_pages(long nr) 138 { 139 struct page *page; 140 long rc; 141 142 cmm_dbg("Begin request for %ld pages\n", nr); 143 144 while (nr) { 145 /* Exit if a hotplug operation is in progress or occurred */ 146 if (mutex_trylock(&hotplug_mutex)) { 147 if (hotplug_occurred) { 148 mutex_unlock(&hotplug_mutex); 149 break; 150 } 151 mutex_unlock(&hotplug_mutex); 152 } else { 153 break; 154 } 155 156 page = balloon_page_alloc(); 157 if (!page) 158 break; 159 rc = plpar_page_set_loaned(page); 160 if (rc) { 161 pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc); 162 __free_page(page); 163 break; 164 } 165 166 balloon_page_enqueue(&b_dev_info, page); 167 atomic_long_inc(&loaned_pages); 168 adjust_managed_page_count(page, -1); 169 nr--; 170 } 171 172 cmm_dbg("End request with %ld pages unfulfilled\n", nr); 173 return nr; 174 } 175 176 /** 177 * cmm_free_pages - Free pages and mark them as active 178 * @nr: number of pages to free 179 * 180 * Return value: 181 * number of pages requested to be freed which were not 182 **/ 183 static long cmm_free_pages(long nr) 184 { 185 struct page *page; 186 187 cmm_dbg("Begin free of %ld pages.\n", nr); 188 while (nr) { 189 page = balloon_page_dequeue(&b_dev_info); 190 if (!page) 191 break; 192 plpar_page_set_active(page); 193 adjust_managed_page_count(page, 1); 194 __free_page(page); 195 atomic_long_dec(&loaned_pages); 196 nr--; 197 } 198 cmm_dbg("End request with %ld pages unfulfilled\n", nr); 199 return nr; 200 } 201 202 /** 203 * cmm_oom_notify - OOM notifier 204 * @self: notifier block struct 205 * @dummy: not used 206 * @parm: returned - number of pages freed 207 * 208 * Return value: 209 * NOTIFY_OK 210 **/ 211 static int cmm_oom_notify(struct notifier_block *self, 212 unsigned long dummy, void *parm) 213 { 214 unsigned long *freed = parm; 215 long nr = KB2PAGES(oom_kb); 216 217 cmm_dbg("OOM processing started\n"); 218 nr = cmm_free_pages(nr); 219 loaned_pages_target = atomic_long_read(&loaned_pages); 220 *freed += KB2PAGES(oom_kb) - nr; 221 oom_freed_pages += KB2PAGES(oom_kb) - nr; 222 cmm_dbg("OOM processing complete\n"); 223 return NOTIFY_OK; 224 } 225 226 /** 227 * cmm_get_mpp - Read memory performance parameters 228 * 229 * Makes hcall to query the current page loan request from the hypervisor. 230 * 231 * Return value: 232 * nothing 233 **/ 234 static void cmm_get_mpp(void) 235 { 236 const long __loaned_pages = atomic_long_read(&loaned_pages); 237 const long total_pages = totalram_pages() + __loaned_pages; 238 int rc; 239 struct hvcall_mpp_data mpp_data; 240 signed long active_pages_target, page_loan_request, target; 241 signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE; 242 243 if (likely(!simulate)) { 244 rc = h_get_mpp(&mpp_data); 245 if (rc != H_SUCCESS) 246 return; 247 page_loan_request = div_s64((s64)mpp_data.loan_request, 248 PAGE_SIZE); 249 target = page_loan_request + __loaned_pages; 250 } else { 251 target = KB2PAGES(simulate_loan_target_kb); 252 page_loan_request = target - __loaned_pages; 253 } 254 255 if (target < 0 || total_pages < min_mem_pages) 256 target = 0; 257 258 if (target > oom_freed_pages) 259 target -= oom_freed_pages; 260 else 261 target = 0; 262 263 active_pages_target = total_pages - target; 264 265 if (min_mem_pages > active_pages_target) 266 target = total_pages - min_mem_pages; 267 268 if (target < 0) 269 target = 0; 270 271 loaned_pages_target = target; 272 273 cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", 274 page_loan_request, __loaned_pages, loaned_pages_target, 275 oom_freed_pages, totalram_pages()); 276 } 277 278 static struct notifier_block cmm_oom_nb = { 279 .notifier_call = cmm_oom_notify 280 }; 281 282 /** 283 * cmm_thread - CMM task thread 284 * @dummy: not used 285 * 286 * Return value: 287 * 0 288 **/ 289 static int cmm_thread(void *dummy) 290 { 291 unsigned long timeleft; 292 long __loaned_pages; 293 294 while (1) { 295 timeleft = msleep_interruptible(delay * 1000); 296 297 if (kthread_should_stop() || timeleft) 298 break; 299 300 if (mutex_trylock(&hotplug_mutex)) { 301 if (hotplug_occurred) { 302 hotplug_occurred = 0; 303 mutex_unlock(&hotplug_mutex); 304 cmm_dbg("Hotplug operation has occurred, " 305 "loaning activity suspended " 306 "for %d seconds.\n", 307 hotplug_delay); 308 timeleft = msleep_interruptible(hotplug_delay * 309 1000); 310 if (kthread_should_stop() || timeleft) 311 break; 312 continue; 313 } 314 mutex_unlock(&hotplug_mutex); 315 } else { 316 cmm_dbg("Hotplug operation in progress, activity " 317 "suspended\n"); 318 continue; 319 } 320 321 cmm_get_mpp(); 322 323 __loaned_pages = atomic_long_read(&loaned_pages); 324 if (loaned_pages_target > __loaned_pages) { 325 if (cmm_alloc_pages(loaned_pages_target - __loaned_pages)) 326 loaned_pages_target = __loaned_pages; 327 } else if (loaned_pages_target < __loaned_pages) 328 cmm_free_pages(__loaned_pages - loaned_pages_target); 329 } 330 return 0; 331 } 332 333 #define CMM_SHOW(name, format, args...) \ 334 static ssize_t show_##name(struct device *dev, \ 335 struct device_attribute *attr, \ 336 char *buf) \ 337 { \ 338 return sprintf(buf, format, ##args); \ 339 } \ 340 static DEVICE_ATTR(name, 0444, show_##name, NULL) 341 342 CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages))); 343 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target)); 344 345 static ssize_t show_oom_pages(struct device *dev, 346 struct device_attribute *attr, char *buf) 347 { 348 return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages)); 349 } 350 351 static ssize_t store_oom_pages(struct device *dev, 352 struct device_attribute *attr, 353 const char *buf, size_t count) 354 { 355 unsigned long val = simple_strtoul (buf, NULL, 10); 356 357 if (!capable(CAP_SYS_ADMIN)) 358 return -EPERM; 359 if (val != 0) 360 return -EBADMSG; 361 362 oom_freed_pages = 0; 363 return count; 364 } 365 366 static DEVICE_ATTR(oom_freed_kb, 0644, 367 show_oom_pages, store_oom_pages); 368 369 static struct device_attribute *cmm_attrs[] = { 370 &dev_attr_loaned_kb, 371 &dev_attr_loaned_target_kb, 372 &dev_attr_oom_freed_kb, 373 }; 374 375 static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644, 376 simulate_loan_target_kb); 377 378 static struct bus_type cmm_subsys = { 379 .name = "cmm", 380 .dev_name = "cmm", 381 }; 382 383 static void cmm_release_device(struct device *dev) 384 { 385 } 386 387 /** 388 * cmm_sysfs_register - Register with sysfs 389 * 390 * Return value: 391 * 0 on success / other on failure 392 **/ 393 static int cmm_sysfs_register(struct device *dev) 394 { 395 int i, rc; 396 397 if ((rc = subsys_system_register(&cmm_subsys, NULL))) 398 return rc; 399 400 dev->id = 0; 401 dev->bus = &cmm_subsys; 402 dev->release = cmm_release_device; 403 404 if ((rc = device_register(dev))) 405 goto subsys_unregister; 406 407 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) { 408 if ((rc = device_create_file(dev, cmm_attrs[i]))) 409 goto fail; 410 } 411 412 if (!simulate) 413 return 0; 414 rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr); 415 if (rc) 416 goto fail; 417 return 0; 418 419 fail: 420 while (--i >= 0) 421 device_remove_file(dev, cmm_attrs[i]); 422 device_unregister(dev); 423 subsys_unregister: 424 bus_unregister(&cmm_subsys); 425 return rc; 426 } 427 428 /** 429 * cmm_unregister_sysfs - Unregister from sysfs 430 * 431 **/ 432 static void cmm_unregister_sysfs(struct device *dev) 433 { 434 int i; 435 436 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) 437 device_remove_file(dev, cmm_attrs[i]); 438 device_unregister(dev); 439 bus_unregister(&cmm_subsys); 440 } 441 442 /** 443 * cmm_reboot_notifier - Make sure pages are not still marked as "loaned" 444 * 445 **/ 446 static int cmm_reboot_notifier(struct notifier_block *nb, 447 unsigned long action, void *unused) 448 { 449 if (action == SYS_RESTART) { 450 if (cmm_thread_ptr) 451 kthread_stop(cmm_thread_ptr); 452 cmm_thread_ptr = NULL; 453 cmm_free_pages(atomic_long_read(&loaned_pages)); 454 } 455 return NOTIFY_DONE; 456 } 457 458 static struct notifier_block cmm_reboot_nb = { 459 .notifier_call = cmm_reboot_notifier, 460 }; 461 462 /** 463 * cmm_memory_cb - Handle memory hotplug notifier calls 464 * @self: notifier block struct 465 * @action: action to take 466 * @arg: struct memory_notify data for handler 467 * 468 * Return value: 469 * NOTIFY_OK or notifier error based on subfunction return value 470 * 471 **/ 472 static int cmm_memory_cb(struct notifier_block *self, 473 unsigned long action, void *arg) 474 { 475 switch (action) { 476 case MEM_GOING_OFFLINE: 477 mutex_lock(&hotplug_mutex); 478 hotplug_occurred = 1; 479 break; 480 case MEM_OFFLINE: 481 case MEM_CANCEL_OFFLINE: 482 mutex_unlock(&hotplug_mutex); 483 cmm_dbg("Memory offline operation complete.\n"); 484 break; 485 case MEM_GOING_ONLINE: 486 case MEM_ONLINE: 487 case MEM_CANCEL_ONLINE: 488 break; 489 } 490 491 return NOTIFY_OK; 492 } 493 494 static struct notifier_block cmm_mem_nb = { 495 .notifier_call = cmm_memory_cb, 496 .priority = CMM_MEM_HOTPLUG_PRI 497 }; 498 499 #ifdef CONFIG_BALLOON_COMPACTION 500 static int cmm_migratepage(struct balloon_dev_info *b_dev_info, 501 struct page *newpage, struct page *page, 502 enum migrate_mode mode) 503 { 504 unsigned long flags; 505 506 /* 507 * loan/"inflate" the newpage first. 508 * 509 * We might race against the cmm_thread who might discover after our 510 * loan request that another page is to be unloaned. However, once 511 * the cmm_thread runs again later, this error will automatically 512 * be corrected. 513 */ 514 if (plpar_page_set_loaned(newpage)) { 515 /* Unlikely, but possible. Tell the caller not to retry now. */ 516 pr_err_ratelimited("%s: Cannot set page to loaned.", __func__); 517 return -EBUSY; 518 } 519 520 /* balloon page list reference */ 521 get_page(newpage); 522 523 /* 524 * When we migrate a page to a different zone, we have to fixup the 525 * count of both involved zones as we adjusted the managed page count 526 * when inflating. 527 */ 528 if (page_zone(page) != page_zone(newpage)) { 529 adjust_managed_page_count(page, 1); 530 adjust_managed_page_count(newpage, -1); 531 } 532 533 spin_lock_irqsave(&b_dev_info->pages_lock, flags); 534 balloon_page_insert(b_dev_info, newpage); 535 balloon_page_delete(page); 536 b_dev_info->isolated_pages--; 537 spin_unlock_irqrestore(&b_dev_info->pages_lock, flags); 538 539 /* 540 * activate/"deflate" the old page. We ignore any errors just like the 541 * other callers. 542 */ 543 plpar_page_set_active(page); 544 545 /* balloon page list reference */ 546 put_page(page); 547 548 return MIGRATEPAGE_SUCCESS; 549 } 550 551 static void cmm_balloon_compaction_init(void) 552 { 553 balloon_devinfo_init(&b_dev_info); 554 b_dev_info.migratepage = cmm_migratepage; 555 } 556 #else /* CONFIG_BALLOON_COMPACTION */ 557 static void cmm_balloon_compaction_init(void) 558 { 559 } 560 #endif /* CONFIG_BALLOON_COMPACTION */ 561 562 /** 563 * cmm_init - Module initialization 564 * 565 * Return value: 566 * 0 on success / other on failure 567 **/ 568 static int cmm_init(void) 569 { 570 int rc; 571 572 if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate) 573 return -EOPNOTSUPP; 574 575 cmm_balloon_compaction_init(); 576 577 rc = register_oom_notifier(&cmm_oom_nb); 578 if (rc < 0) 579 goto out_balloon_compaction; 580 581 if ((rc = register_reboot_notifier(&cmm_reboot_nb))) 582 goto out_oom_notifier; 583 584 if ((rc = cmm_sysfs_register(&cmm_dev))) 585 goto out_reboot_notifier; 586 587 rc = register_memory_notifier(&cmm_mem_nb); 588 if (rc) 589 goto out_unregister_notifier; 590 591 if (cmm_disabled) 592 return 0; 593 594 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); 595 if (IS_ERR(cmm_thread_ptr)) { 596 rc = PTR_ERR(cmm_thread_ptr); 597 goto out_unregister_notifier; 598 } 599 600 return 0; 601 out_unregister_notifier: 602 unregister_memory_notifier(&cmm_mem_nb); 603 cmm_unregister_sysfs(&cmm_dev); 604 out_reboot_notifier: 605 unregister_reboot_notifier(&cmm_reboot_nb); 606 out_oom_notifier: 607 unregister_oom_notifier(&cmm_oom_nb); 608 out_balloon_compaction: 609 return rc; 610 } 611 612 /** 613 * cmm_exit - Module exit 614 * 615 * Return value: 616 * nothing 617 **/ 618 static void cmm_exit(void) 619 { 620 if (cmm_thread_ptr) 621 kthread_stop(cmm_thread_ptr); 622 unregister_oom_notifier(&cmm_oom_nb); 623 unregister_reboot_notifier(&cmm_reboot_nb); 624 unregister_memory_notifier(&cmm_mem_nb); 625 cmm_free_pages(atomic_long_read(&loaned_pages)); 626 cmm_unregister_sysfs(&cmm_dev); 627 } 628 629 /** 630 * cmm_set_disable - Disable/Enable CMM 631 * 632 * Return value: 633 * 0 on success / other on failure 634 **/ 635 static int cmm_set_disable(const char *val, const struct kernel_param *kp) 636 { 637 int disable = simple_strtoul(val, NULL, 10); 638 639 if (disable != 0 && disable != 1) 640 return -EINVAL; 641 642 if (disable && !cmm_disabled) { 643 if (cmm_thread_ptr) 644 kthread_stop(cmm_thread_ptr); 645 cmm_thread_ptr = NULL; 646 cmm_free_pages(atomic_long_read(&loaned_pages)); 647 } else if (!disable && cmm_disabled) { 648 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); 649 if (IS_ERR(cmm_thread_ptr)) 650 return PTR_ERR(cmm_thread_ptr); 651 } 652 653 cmm_disabled = disable; 654 return 0; 655 } 656 657 module_param_call(disable, cmm_set_disable, param_get_uint, 658 &cmm_disabled, 0644); 659 MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. " 660 "[Default=" __stringify(CMM_DISABLE) "]"); 661 662 module_init(cmm_init); 663 module_exit(cmm_exit); 664