1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Collaborative memory management interface. 4 * 5 * Copyright (C) 2008 IBM Corporation 6 * Author(s): Brian King (brking@linux.vnet.ibm.com), 7 */ 8 9 #include <linux/ctype.h> 10 #include <linux/delay.h> 11 #include <linux/errno.h> 12 #include <linux/fs.h> 13 #include <linux/gfp.h> 14 #include <linux/kthread.h> 15 #include <linux/module.h> 16 #include <linux/oom.h> 17 #include <linux/reboot.h> 18 #include <linux/sched.h> 19 #include <linux/stringify.h> 20 #include <linux/swap.h> 21 #include <linux/sysfs.h> 22 #include <linux/device.h> 23 #include <linux/balloon.h> 24 #include <asm/firmware.h> 25 #include <asm/hvcall.h> 26 #include <asm/mmu.h> 27 #include <linux/uaccess.h> 28 #include <linux/memory.h> 29 #include <asm/plpar_wrappers.h> 30 31 #include "pseries.h" 32 33 #define CMM_DRIVER_VERSION "1.0.0" 34 #define CMM_DEFAULT_DELAY 1 35 #define CMM_HOTPLUG_DELAY 5 36 #define CMM_DEBUG 0 37 #define CMM_DISABLE 0 38 #define CMM_OOM_KB 1024 39 #define CMM_MIN_MEM_MB 256 40 #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) 41 #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) 42 43 #define CMM_MEM_HOTPLUG_PRI 1 44 45 static unsigned int delay = CMM_DEFAULT_DELAY; 46 static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY; 47 static unsigned int oom_kb = CMM_OOM_KB; 48 static unsigned int cmm_debug = CMM_DEBUG; 49 static unsigned int cmm_disabled = CMM_DISABLE; 50 static unsigned long min_mem_mb = CMM_MIN_MEM_MB; 51 static bool __read_mostly simulate; 52 static unsigned long simulate_loan_target_kb; 53 static struct device cmm_dev; 54 55 MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>"); 56 MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager"); 57 MODULE_LICENSE("GPL"); 58 MODULE_VERSION(CMM_DRIVER_VERSION); 59 60 module_param_named(delay, delay, uint, 0644); 61 MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " 62 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); 63 module_param_named(hotplug_delay, hotplug_delay, uint, 0644); 64 MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove " 65 "before loaning resumes. " 66 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]"); 67 module_param_named(oom_kb, oom_kb, uint, 0644); 68 MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " 69 "[Default=" __stringify(CMM_OOM_KB) "]"); 70 module_param_named(min_mem_mb, min_mem_mb, ulong, 0644); 71 MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. " 72 "[Default=" __stringify(CMM_MIN_MEM_MB) "]"); 73 module_param_named(debug, cmm_debug, uint, 0644); 74 MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. " 75 "[Default=" __stringify(CMM_DEBUG) "]"); 76 module_param_named(simulate, simulate, bool, 0444); 77 MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw)."); 78 79 #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); } 80 81 static atomic_long_t loaned_pages; 82 static unsigned long loaned_pages_target; 83 static unsigned long oom_freed_pages; 84 85 static DEFINE_MUTEX(hotplug_mutex); 86 static int hotplug_occurred; /* protected by the hotplug mutex */ 87 88 static struct task_struct *cmm_thread_ptr; 89 static struct balloon_dev_info b_dev_info; 90 91 static long plpar_page_set_loaned(struct page *page) 92 { 93 const unsigned long vpa = page_to_phys(page); 94 unsigned long cmo_page_sz = cmo_get_page_size(); 95 long rc = 0; 96 int i; 97 98 if (unlikely(simulate)) 99 return 0; 100 101 for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) 102 rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0); 103 104 for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) 105 plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, 106 vpa + i - cmo_page_sz, 0); 107 108 return rc; 109 } 110 111 static long plpar_page_set_active(struct page *page) 112 { 113 const unsigned long vpa = page_to_phys(page); 114 unsigned long cmo_page_sz = cmo_get_page_size(); 115 long rc = 0; 116 int i; 117 118 if (unlikely(simulate)) 119 return 0; 120 121 for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz) 122 rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0); 123 124 for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz) 125 plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, 126 vpa + i - cmo_page_sz, 0); 127 128 return rc; 129 } 130 131 /** 132 * cmm_alloc_pages - Allocate pages and mark them as loaned 133 * @nr: number of pages to allocate 134 * 135 * Return value: 136 * number of pages requested to be allocated which were not 137 **/ 138 static long cmm_alloc_pages(long nr) 139 { 140 struct page *page; 141 long rc; 142 143 cmm_dbg("Begin request for %ld pages\n", nr); 144 145 while (nr) { 146 /* Exit if a hotplug operation is in progress or occurred */ 147 if (mutex_trylock(&hotplug_mutex)) { 148 if (hotplug_occurred) { 149 mutex_unlock(&hotplug_mutex); 150 break; 151 } 152 mutex_unlock(&hotplug_mutex); 153 } else { 154 break; 155 } 156 157 page = balloon_page_alloc(); 158 if (!page) 159 break; 160 rc = plpar_page_set_loaned(page); 161 if (rc) { 162 pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc); 163 __free_page(page); 164 break; 165 } 166 167 balloon_page_enqueue(&b_dev_info, page); 168 atomic_long_inc(&loaned_pages); 169 nr--; 170 } 171 172 cmm_dbg("End request with %ld pages unfulfilled\n", nr); 173 return nr; 174 } 175 176 /** 177 * cmm_free_pages - Free pages and mark them as active 178 * @nr: number of pages to free 179 * 180 * Return value: 181 * number of pages requested to be freed which were not 182 **/ 183 static long cmm_free_pages(long nr) 184 { 185 struct page *page; 186 187 cmm_dbg("Begin free of %ld pages.\n", nr); 188 while (nr) { 189 page = balloon_page_dequeue(&b_dev_info); 190 if (!page) 191 break; 192 plpar_page_set_active(page); 193 __free_page(page); 194 atomic_long_dec(&loaned_pages); 195 nr--; 196 } 197 cmm_dbg("End request with %ld pages unfulfilled\n", nr); 198 return nr; 199 } 200 201 /** 202 * cmm_oom_notify - OOM notifier 203 * @self: notifier block struct 204 * @dummy: not used 205 * @parm: returned - number of pages freed 206 * 207 * Return value: 208 * NOTIFY_OK 209 **/ 210 static int cmm_oom_notify(struct notifier_block *self, 211 unsigned long dummy, void *parm) 212 { 213 unsigned long *freed = parm; 214 long nr = KB2PAGES(oom_kb); 215 216 cmm_dbg("OOM processing started\n"); 217 nr = cmm_free_pages(nr); 218 loaned_pages_target = atomic_long_read(&loaned_pages); 219 *freed += KB2PAGES(oom_kb) - nr; 220 oom_freed_pages += KB2PAGES(oom_kb) - nr; 221 cmm_dbg("OOM processing complete\n"); 222 return NOTIFY_OK; 223 } 224 225 /** 226 * cmm_get_mpp - Read memory performance parameters 227 * 228 * Makes hcall to query the current page loan request from the hypervisor. 229 * 230 * Return value: 231 * nothing 232 **/ 233 static void cmm_get_mpp(void) 234 { 235 const long __loaned_pages = atomic_long_read(&loaned_pages); 236 const long total_pages = totalram_pages() + __loaned_pages; 237 int rc; 238 struct hvcall_mpp_data mpp_data; 239 signed long active_pages_target, page_loan_request, target; 240 signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE; 241 242 if (likely(!simulate)) { 243 rc = h_get_mpp(&mpp_data); 244 if (rc != H_SUCCESS) 245 return; 246 page_loan_request = div_s64((s64)mpp_data.loan_request, 247 PAGE_SIZE); 248 target = page_loan_request + __loaned_pages; 249 } else { 250 target = KB2PAGES(simulate_loan_target_kb); 251 page_loan_request = target - __loaned_pages; 252 } 253 254 if (target < 0 || total_pages < min_mem_pages) 255 target = 0; 256 257 if (target > oom_freed_pages) 258 target -= oom_freed_pages; 259 else 260 target = 0; 261 262 active_pages_target = total_pages - target; 263 264 if (min_mem_pages > active_pages_target) 265 target = total_pages - min_mem_pages; 266 267 if (target < 0) 268 target = 0; 269 270 loaned_pages_target = target; 271 272 cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", 273 page_loan_request, __loaned_pages, loaned_pages_target, 274 oom_freed_pages, totalram_pages()); 275 } 276 277 static struct notifier_block cmm_oom_nb = { 278 .notifier_call = cmm_oom_notify 279 }; 280 281 /** 282 * cmm_thread - CMM task thread 283 * @dummy: not used 284 * 285 * Return value: 286 * 0 287 **/ 288 static int cmm_thread(void *dummy) 289 { 290 unsigned long timeleft; 291 long __loaned_pages; 292 293 while (1) { 294 timeleft = msleep_interruptible(delay * 1000); 295 296 if (kthread_should_stop() || timeleft) 297 break; 298 299 if (mutex_trylock(&hotplug_mutex)) { 300 if (hotplug_occurred) { 301 hotplug_occurred = 0; 302 mutex_unlock(&hotplug_mutex); 303 cmm_dbg("Hotplug operation has occurred, " 304 "loaning activity suspended " 305 "for %d seconds.\n", 306 hotplug_delay); 307 timeleft = msleep_interruptible(hotplug_delay * 308 1000); 309 if (kthread_should_stop() || timeleft) 310 break; 311 continue; 312 } 313 mutex_unlock(&hotplug_mutex); 314 } else { 315 cmm_dbg("Hotplug operation in progress, activity " 316 "suspended\n"); 317 continue; 318 } 319 320 cmm_get_mpp(); 321 322 __loaned_pages = atomic_long_read(&loaned_pages); 323 if (loaned_pages_target > __loaned_pages) { 324 if (cmm_alloc_pages(loaned_pages_target - __loaned_pages)) 325 loaned_pages_target = __loaned_pages; 326 } else if (loaned_pages_target < __loaned_pages) 327 cmm_free_pages(__loaned_pages - loaned_pages_target); 328 } 329 return 0; 330 } 331 332 #define CMM_SHOW(name, format, args...) \ 333 static ssize_t show_##name(struct device *dev, \ 334 struct device_attribute *attr, \ 335 char *buf) \ 336 { \ 337 return sysfs_emit(buf, format, ##args); \ 338 } \ 339 static DEVICE_ATTR(name, 0444, show_##name, NULL) 340 341 CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages))); 342 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target)); 343 344 static ssize_t show_oom_pages(struct device *dev, 345 struct device_attribute *attr, char *buf) 346 { 347 return sysfs_emit(buf, "%lu\n", PAGES2KB(oom_freed_pages)); 348 } 349 350 static ssize_t store_oom_pages(struct device *dev, 351 struct device_attribute *attr, 352 const char *buf, size_t count) 353 { 354 unsigned long val = simple_strtoul (buf, NULL, 10); 355 356 if (!capable(CAP_SYS_ADMIN)) 357 return -EPERM; 358 if (val != 0) 359 return -EBADMSG; 360 361 oom_freed_pages = 0; 362 return count; 363 } 364 365 static DEVICE_ATTR(oom_freed_kb, 0644, 366 show_oom_pages, store_oom_pages); 367 368 static struct device_attribute *cmm_attrs[] = { 369 &dev_attr_loaned_kb, 370 &dev_attr_loaned_target_kb, 371 &dev_attr_oom_freed_kb, 372 }; 373 374 static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644, 375 simulate_loan_target_kb); 376 377 static const struct bus_type cmm_subsys = { 378 .name = "cmm", 379 .dev_name = "cmm", 380 }; 381 382 static void cmm_release_device(struct device *dev) 383 { 384 } 385 386 /** 387 * cmm_sysfs_register - Register with sysfs 388 * 389 * Return value: 390 * 0 on success / other on failure 391 **/ 392 static int cmm_sysfs_register(struct device *dev) 393 { 394 int i, rc; 395 396 if ((rc = subsys_system_register(&cmm_subsys, NULL))) 397 return rc; 398 399 dev->id = 0; 400 dev->bus = &cmm_subsys; 401 dev->release = cmm_release_device; 402 403 if ((rc = device_register(dev))) 404 goto subsys_unregister; 405 406 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) { 407 if ((rc = device_create_file(dev, cmm_attrs[i]))) 408 goto fail; 409 } 410 411 if (!simulate) 412 return 0; 413 rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr); 414 if (rc) 415 goto fail; 416 return 0; 417 418 fail: 419 while (--i >= 0) 420 device_remove_file(dev, cmm_attrs[i]); 421 device_unregister(dev); 422 subsys_unregister: 423 bus_unregister(&cmm_subsys); 424 return rc; 425 } 426 427 /** 428 * cmm_unregister_sysfs - Unregister from sysfs 429 * 430 **/ 431 static void cmm_unregister_sysfs(struct device *dev) 432 { 433 int i; 434 435 for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) 436 device_remove_file(dev, cmm_attrs[i]); 437 device_unregister(dev); 438 bus_unregister(&cmm_subsys); 439 } 440 441 /** 442 * cmm_reboot_notifier - Make sure pages are not still marked as "loaned" 443 * 444 **/ 445 static int cmm_reboot_notifier(struct notifier_block *nb, 446 unsigned long action, void *unused) 447 { 448 if (action == SYS_RESTART) { 449 if (cmm_thread_ptr) 450 kthread_stop(cmm_thread_ptr); 451 cmm_thread_ptr = NULL; 452 cmm_free_pages(atomic_long_read(&loaned_pages)); 453 } 454 return NOTIFY_DONE; 455 } 456 457 static struct notifier_block cmm_reboot_nb = { 458 .notifier_call = cmm_reboot_notifier, 459 }; 460 461 /** 462 * cmm_memory_cb - Handle memory hotplug notifier calls 463 * @self: notifier block struct 464 * @action: action to take 465 * @arg: struct memory_notify data for handler 466 * 467 * Return value: 468 * NOTIFY_OK or notifier error based on subfunction return value 469 * 470 **/ 471 static int cmm_memory_cb(struct notifier_block *self, 472 unsigned long action, void *arg) 473 { 474 switch (action) { 475 case MEM_GOING_OFFLINE: 476 mutex_lock(&hotplug_mutex); 477 hotplug_occurred = 1; 478 break; 479 case MEM_OFFLINE: 480 case MEM_CANCEL_OFFLINE: 481 mutex_unlock(&hotplug_mutex); 482 cmm_dbg("Memory offline operation complete.\n"); 483 break; 484 case MEM_GOING_ONLINE: 485 case MEM_ONLINE: 486 case MEM_CANCEL_ONLINE: 487 break; 488 } 489 490 return NOTIFY_OK; 491 } 492 493 static struct notifier_block cmm_mem_nb = { 494 .notifier_call = cmm_memory_cb, 495 .priority = CMM_MEM_HOTPLUG_PRI 496 }; 497 498 #ifdef CONFIG_BALLOON_MIGRATION 499 static int cmm_migratepage(struct balloon_dev_info *b_dev_info, 500 struct page *newpage, struct page *page, 501 enum migrate_mode mode) 502 { 503 /* 504 * loan/"inflate" the newpage first. 505 * 506 * We might race against the cmm_thread who might discover after our 507 * loan request that another page is to be unloaned. However, once 508 * the cmm_thread runs again later, this error will automatically 509 * be corrected. 510 */ 511 if (plpar_page_set_loaned(newpage)) { 512 /* Unlikely, but possible. Tell the caller not to retry now. */ 513 pr_err_ratelimited("%s: Cannot set page to loaned.", __func__); 514 return -EBUSY; 515 } 516 517 /* 518 * activate/"deflate" the old page. We ignore any errors just like the 519 * other callers. 520 */ 521 plpar_page_set_active(page); 522 return 0; 523 } 524 #else /* CONFIG_BALLOON_MIGRATION */ 525 int cmm_migratepage(struct balloon_dev_info *b_dev_info, struct page *newpage, 526 struct page *page, enum migrate_mode mode); 527 #endif /* CONFIG_BALLOON_MIGRATION */ 528 529 /** 530 * cmm_init - Module initialization 531 * 532 * Return value: 533 * 0 on success / other on failure 534 **/ 535 static int cmm_init(void) 536 { 537 int rc; 538 539 if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate) 540 return -EOPNOTSUPP; 541 542 balloon_devinfo_init(&b_dev_info); 543 b_dev_info.adjust_managed_page_count = true; 544 if (IS_ENABLED(CONFIG_BALLOON_MIGRATION)) 545 b_dev_info.migratepage = cmm_migratepage; 546 547 rc = register_oom_notifier(&cmm_oom_nb); 548 if (rc < 0) 549 return rc; 550 551 if ((rc = register_reboot_notifier(&cmm_reboot_nb))) 552 goto out_oom_notifier; 553 554 if ((rc = cmm_sysfs_register(&cmm_dev))) 555 goto out_reboot_notifier; 556 557 rc = register_memory_notifier(&cmm_mem_nb); 558 if (rc) 559 goto out_unregister_notifier; 560 561 if (cmm_disabled) 562 return 0; 563 564 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); 565 if (IS_ERR(cmm_thread_ptr)) { 566 rc = PTR_ERR(cmm_thread_ptr); 567 goto out_unregister_notifier; 568 } 569 570 return 0; 571 out_unregister_notifier: 572 unregister_memory_notifier(&cmm_mem_nb); 573 cmm_unregister_sysfs(&cmm_dev); 574 out_reboot_notifier: 575 unregister_reboot_notifier(&cmm_reboot_nb); 576 out_oom_notifier: 577 unregister_oom_notifier(&cmm_oom_nb); 578 return rc; 579 } 580 581 /** 582 * cmm_exit - Module exit 583 * 584 * Return value: 585 * nothing 586 **/ 587 static void cmm_exit(void) 588 { 589 if (cmm_thread_ptr) 590 kthread_stop(cmm_thread_ptr); 591 unregister_oom_notifier(&cmm_oom_nb); 592 unregister_reboot_notifier(&cmm_reboot_nb); 593 unregister_memory_notifier(&cmm_mem_nb); 594 cmm_free_pages(atomic_long_read(&loaned_pages)); 595 cmm_unregister_sysfs(&cmm_dev); 596 } 597 598 /** 599 * cmm_set_disable - Disable/Enable CMM 600 * 601 * Return value: 602 * 0 on success / other on failure 603 **/ 604 static int cmm_set_disable(const char *val, const struct kernel_param *kp) 605 { 606 int disable = simple_strtoul(val, NULL, 10); 607 608 if (disable != 0 && disable != 1) 609 return -EINVAL; 610 611 if (disable && !cmm_disabled) { 612 if (cmm_thread_ptr) 613 kthread_stop(cmm_thread_ptr); 614 cmm_thread_ptr = NULL; 615 cmm_free_pages(atomic_long_read(&loaned_pages)); 616 } else if (!disable && cmm_disabled) { 617 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); 618 if (IS_ERR(cmm_thread_ptr)) 619 return PTR_ERR(cmm_thread_ptr); 620 } 621 622 cmm_disabled = disable; 623 return 0; 624 } 625 626 module_param_call(disable, cmm_set_disable, param_get_uint, 627 &cmm_disabled, 0644); 628 MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. " 629 "[Default=" __stringify(CMM_DISABLE) "]"); 630 631 module_init(cmm_init); 632 module_exit(cmm_exit); 633