xref: /linux/arch/powerpc/platforms/pseries/cmm.c (revision bba2c3615bd6cfee7456d1130f2e6b01b3f4e9ba)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Collaborative memory management interface.
4  *
5  * Copyright (C) 2008 IBM Corporation
6  * Author(s): Brian King (brking@linux.vnet.ibm.com),
7  */
8 
9 #include <linux/ctype.h>
10 #include <linux/delay.h>
11 #include <linux/errno.h>
12 #include <linux/fs.h>
13 #include <linux/gfp.h>
14 #include <linux/kthread.h>
15 #include <linux/module.h>
16 #include <linux/oom.h>
17 #include <linux/reboot.h>
18 #include <linux/sched.h>
19 #include <linux/stringify.h>
20 #include <linux/swap.h>
21 #include <linux/sysfs.h>
22 #include <linux/device.h>
23 #include <linux/balloon.h>
24 #include <asm/firmware.h>
25 #include <asm/hvcall.h>
26 #include <asm/mmu.h>
27 #include <linux/uaccess.h>
28 #include <linux/memory.h>
29 #include <asm/plpar_wrappers.h>
30 
31 #include "pseries.h"
32 
33 #define CMM_DRIVER_VERSION	"1.0.0"
34 #define CMM_DEFAULT_DELAY	1
35 #define CMM_HOTPLUG_DELAY	5
36 #define CMM_DEBUG			0
37 #define CMM_DISABLE		0
38 #define CMM_OOM_KB		1024
39 #define CMM_MIN_MEM_MB		256
40 #define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
41 #define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
42 
43 #define CMM_MEM_HOTPLUG_PRI	1
44 
45 static unsigned int delay = CMM_DEFAULT_DELAY;
46 static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
47 static unsigned int oom_kb = CMM_OOM_KB;
48 static unsigned int cmm_debug = CMM_DEBUG;
49 static unsigned int cmm_disabled = CMM_DISABLE;
50 static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
51 static bool __read_mostly simulate;
52 static unsigned long simulate_loan_target_kb;
53 static struct device cmm_dev;
54 
55 MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
56 MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
57 MODULE_LICENSE("GPL");
58 MODULE_VERSION(CMM_DRIVER_VERSION);
59 
60 module_param_named(delay, delay, uint, 0644);
61 MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
62 		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
63 module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
64 MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
65 		 "before loaning resumes. "
66 		 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
67 module_param_named(oom_kb, oom_kb, uint, 0644);
68 MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
69 		 "[Default=" __stringify(CMM_OOM_KB) "]");
70 module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
71 MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
72 		 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
73 module_param_named(debug, cmm_debug, uint, 0644);
74 MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
75 		 "[Default=" __stringify(CMM_DEBUG) "]");
76 module_param_named(simulate, simulate, bool, 0444);
77 MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
78 
79 #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
80 
81 static atomic_long_t loaned_pages;
82 static unsigned long loaned_pages_target;
83 static unsigned long oom_freed_pages;
84 
85 static DEFINE_MUTEX(hotplug_mutex);
86 static int hotplug_occurred; /* protected by the hotplug mutex */
87 
88 static struct task_struct *cmm_thread_ptr;
89 static struct balloon_dev_info b_dev_info;
90 
91 static long plpar_page_set_loaned(struct page *page)
92 {
93 	const unsigned long vpa = page_to_phys(page);
94 	unsigned long cmo_page_sz = cmo_get_page_size();
95 	long rc = 0;
96 	int i;
97 
98 	if (unlikely(simulate))
99 		return 0;
100 
101 	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
102 		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
103 
104 	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
105 		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
106 				   vpa + i - cmo_page_sz, 0);
107 
108 	return rc;
109 }
110 
111 static long plpar_page_set_active(struct page *page)
112 {
113 	const unsigned long vpa = page_to_phys(page);
114 	unsigned long cmo_page_sz = cmo_get_page_size();
115 	long rc = 0;
116 	int i;
117 
118 	if (unlikely(simulate))
119 		return 0;
120 
121 	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
122 		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
123 
124 	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
125 		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
126 				   vpa + i - cmo_page_sz, 0);
127 
128 	return rc;
129 }
130 
131 /**
132  * cmm_alloc_pages - Allocate pages and mark them as loaned
133  * @nr:	number of pages to allocate
134  *
135  * Return value:
136  * 	number of pages requested to be allocated which were not
137  **/
138 static long cmm_alloc_pages(long nr)
139 {
140 	struct page *page;
141 	long rc;
142 
143 	cmm_dbg("Begin request for %ld pages\n", nr);
144 
145 	while (nr) {
146 		/* Exit if a hotplug operation is in progress or occurred */
147 		if (mutex_trylock(&hotplug_mutex)) {
148 			if (hotplug_occurred) {
149 				mutex_unlock(&hotplug_mutex);
150 				break;
151 			}
152 			mutex_unlock(&hotplug_mutex);
153 		} else {
154 			break;
155 		}
156 
157 		page = balloon_page_alloc();
158 		if (!page)
159 			break;
160 		rc = plpar_page_set_loaned(page);
161 		if (rc) {
162 			pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
163 			__free_page(page);
164 			break;
165 		}
166 
167 		balloon_page_enqueue(&b_dev_info, page);
168 		atomic_long_inc(&loaned_pages);
169 		nr--;
170 	}
171 
172 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
173 	return nr;
174 }
175 
176 /**
177  * cmm_free_pages - Free pages and mark them as active
178  * @nr:	number of pages to free
179  *
180  * Return value:
181  * 	number of pages requested to be freed which were not
182  **/
183 static long cmm_free_pages(long nr)
184 {
185 	struct page *page;
186 
187 	cmm_dbg("Begin free of %ld pages.\n", nr);
188 	while (nr) {
189 		page = balloon_page_dequeue(&b_dev_info);
190 		if (!page)
191 			break;
192 		plpar_page_set_active(page);
193 		__free_page(page);
194 		atomic_long_dec(&loaned_pages);
195 		nr--;
196 	}
197 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
198 	return nr;
199 }
200 
201 /**
202  * cmm_oom_notify - OOM notifier
203  * @self:	notifier block struct
204  * @dummy:	not used
205  * @parm:	returned - number of pages freed
206  *
207  * Return value:
208  * 	NOTIFY_OK
209  **/
210 static int cmm_oom_notify(struct notifier_block *self,
211 			  unsigned long dummy, void *parm)
212 {
213 	unsigned long *freed = parm;
214 	long nr = KB2PAGES(oom_kb);
215 
216 	cmm_dbg("OOM processing started\n");
217 	nr = cmm_free_pages(nr);
218 	loaned_pages_target = atomic_long_read(&loaned_pages);
219 	*freed += KB2PAGES(oom_kb) - nr;
220 	oom_freed_pages += KB2PAGES(oom_kb) - nr;
221 	cmm_dbg("OOM processing complete\n");
222 	return NOTIFY_OK;
223 }
224 
225 /**
226  * cmm_get_mpp - Read memory performance parameters
227  *
228  * Makes hcall to query the current page loan request from the hypervisor.
229  *
230  * Return value:
231  * 	nothing
232  **/
233 static void cmm_get_mpp(void)
234 {
235 	const long __loaned_pages = atomic_long_read(&loaned_pages);
236 	const long total_pages = totalram_pages() + __loaned_pages;
237 	int rc;
238 	struct hvcall_mpp_data mpp_data;
239 	signed long active_pages_target, page_loan_request, target;
240 	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
241 
242 	if (likely(!simulate)) {
243 		rc = h_get_mpp(&mpp_data);
244 		if (rc != H_SUCCESS)
245 			return;
246 		page_loan_request = div_s64((s64)mpp_data.loan_request,
247 					    PAGE_SIZE);
248 		target = page_loan_request + __loaned_pages;
249 	} else {
250 		target = KB2PAGES(simulate_loan_target_kb);
251 		page_loan_request = target - __loaned_pages;
252 	}
253 
254 	if (target < 0 || total_pages < min_mem_pages)
255 		target = 0;
256 
257 	if (target > oom_freed_pages)
258 		target -= oom_freed_pages;
259 	else
260 		target = 0;
261 
262 	active_pages_target = total_pages - target;
263 
264 	if (min_mem_pages > active_pages_target)
265 		target = total_pages - min_mem_pages;
266 
267 	if (target < 0)
268 		target = 0;
269 
270 	loaned_pages_target = target;
271 
272 	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
273 		page_loan_request, __loaned_pages, loaned_pages_target,
274 		oom_freed_pages, totalram_pages());
275 }
276 
277 static struct notifier_block cmm_oom_nb = {
278 	.notifier_call = cmm_oom_notify
279 };
280 
281 /**
282  * cmm_thread - CMM task thread
283  * @dummy:	not used
284  *
285  * Return value:
286  * 	0
287  **/
288 static int cmm_thread(void *dummy)
289 {
290 	unsigned long timeleft;
291 	long __loaned_pages;
292 
293 	while (1) {
294 		timeleft = msleep_interruptible(delay * 1000);
295 
296 		if (kthread_should_stop() || timeleft)
297 			break;
298 
299 		if (mutex_trylock(&hotplug_mutex)) {
300 			if (hotplug_occurred) {
301 				hotplug_occurred = 0;
302 				mutex_unlock(&hotplug_mutex);
303 				cmm_dbg("Hotplug operation has occurred, "
304 						"loaning activity suspended "
305 						"for %d seconds.\n",
306 						hotplug_delay);
307 				timeleft = msleep_interruptible(hotplug_delay *
308 						1000);
309 				if (kthread_should_stop() || timeleft)
310 					break;
311 				continue;
312 			}
313 			mutex_unlock(&hotplug_mutex);
314 		} else {
315 			cmm_dbg("Hotplug operation in progress, activity "
316 					"suspended\n");
317 			continue;
318 		}
319 
320 		cmm_get_mpp();
321 
322 		__loaned_pages = atomic_long_read(&loaned_pages);
323 		if (loaned_pages_target > __loaned_pages) {
324 			if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
325 				loaned_pages_target = __loaned_pages;
326 		} else if (loaned_pages_target < __loaned_pages)
327 			cmm_free_pages(__loaned_pages - loaned_pages_target);
328 	}
329 	return 0;
330 }
331 
332 #define CMM_SHOW(name, format, args...)			\
333 	static ssize_t show_##name(struct device *dev,	\
334 				   struct device_attribute *attr,	\
335 				   char *buf)			\
336 	{							\
337 		return sysfs_emit(buf, format, ##args);		\
338 	}							\
339 	static DEVICE_ATTR(name, 0444, show_##name, NULL)
340 
341 CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
342 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
343 
344 static ssize_t show_oom_pages(struct device *dev,
345 			      struct device_attribute *attr, char *buf)
346 {
347 	return sysfs_emit(buf, "%lu\n", PAGES2KB(oom_freed_pages));
348 }
349 
350 static ssize_t store_oom_pages(struct device *dev,
351 			       struct device_attribute *attr,
352 			       const char *buf, size_t count)
353 {
354 	unsigned long val = simple_strtoul (buf, NULL, 10);
355 
356 	if (!capable(CAP_SYS_ADMIN))
357 		return -EPERM;
358 	if (val != 0)
359 		return -EBADMSG;
360 
361 	oom_freed_pages = 0;
362 	return count;
363 }
364 
365 static DEVICE_ATTR(oom_freed_kb, 0644,
366 		   show_oom_pages, store_oom_pages);
367 
368 static struct device_attribute *cmm_attrs[] = {
369 	&dev_attr_loaned_kb,
370 	&dev_attr_loaned_target_kb,
371 	&dev_attr_oom_freed_kb,
372 };
373 
374 static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
375 			 simulate_loan_target_kb);
376 
377 static const struct bus_type cmm_subsys = {
378 	.name = "cmm",
379 	.dev_name = "cmm",
380 };
381 
382 static void cmm_release_device(struct device *dev)
383 {
384 }
385 
386 /**
387  * cmm_sysfs_register - Register with sysfs
388  *
389  * Return value:
390  * 	0 on success / other on failure
391  **/
392 static int cmm_sysfs_register(struct device *dev)
393 {
394 	int i, rc;
395 
396 	if ((rc = subsys_system_register(&cmm_subsys, NULL)))
397 		return rc;
398 
399 	dev->id = 0;
400 	dev->bus = &cmm_subsys;
401 	dev->release = cmm_release_device;
402 
403 	if ((rc = device_register(dev)))
404 		goto subsys_unregister;
405 
406 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
407 		if ((rc = device_create_file(dev, cmm_attrs[i])))
408 			goto fail;
409 	}
410 
411 	if (!simulate)
412 		return 0;
413 	rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
414 	if (rc)
415 		goto fail;
416 	return 0;
417 
418 fail:
419 	while (--i >= 0)
420 		device_remove_file(dev, cmm_attrs[i]);
421 	device_unregister(dev);
422 subsys_unregister:
423 	bus_unregister(&cmm_subsys);
424 	return rc;
425 }
426 
427 /**
428  * cmm_unregister_sysfs - Unregister from sysfs
429  *
430  **/
431 static void cmm_unregister_sysfs(struct device *dev)
432 {
433 	int i;
434 
435 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
436 		device_remove_file(dev, cmm_attrs[i]);
437 	device_unregister(dev);
438 	bus_unregister(&cmm_subsys);
439 }
440 
441 /**
442  * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
443  *
444  **/
445 static int cmm_reboot_notifier(struct notifier_block *nb,
446 			       unsigned long action, void *unused)
447 {
448 	if (action == SYS_RESTART) {
449 		if (cmm_thread_ptr)
450 			kthread_stop(cmm_thread_ptr);
451 		cmm_thread_ptr = NULL;
452 		cmm_free_pages(atomic_long_read(&loaned_pages));
453 	}
454 	return NOTIFY_DONE;
455 }
456 
457 static struct notifier_block cmm_reboot_nb = {
458 	.notifier_call = cmm_reboot_notifier,
459 };
460 
461 /**
462  * cmm_memory_cb - Handle memory hotplug notifier calls
463  * @self:	notifier block struct
464  * @action:	action to take
465  * @arg:	struct memory_notify data for handler
466  *
467  * Return value:
468  *	NOTIFY_OK or notifier error based on subfunction return value
469  *
470  **/
471 static int cmm_memory_cb(struct notifier_block *self,
472 			unsigned long action, void *arg)
473 {
474 	switch (action) {
475 	case MEM_GOING_OFFLINE:
476 		mutex_lock(&hotplug_mutex);
477 		hotplug_occurred = 1;
478 		break;
479 	case MEM_OFFLINE:
480 	case MEM_CANCEL_OFFLINE:
481 		mutex_unlock(&hotplug_mutex);
482 		cmm_dbg("Memory offline operation complete.\n");
483 		break;
484 	case MEM_GOING_ONLINE:
485 	case MEM_ONLINE:
486 	case MEM_CANCEL_ONLINE:
487 		break;
488 	}
489 
490 	return NOTIFY_OK;
491 }
492 
493 static struct notifier_block cmm_mem_nb = {
494 	.notifier_call = cmm_memory_cb,
495 	.priority = CMM_MEM_HOTPLUG_PRI
496 };
497 
498 #ifdef CONFIG_BALLOON_MIGRATION
499 static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
500 			   struct page *newpage, struct page *page,
501 			   enum migrate_mode mode)
502 {
503 	/*
504 	 * loan/"inflate" the newpage first.
505 	 *
506 	 * We might race against the cmm_thread who might discover after our
507 	 * loan request that another page is to be unloaned. However, once
508 	 * the cmm_thread runs again later, this error will automatically
509 	 * be corrected.
510 	 */
511 	if (plpar_page_set_loaned(newpage)) {
512 		/* Unlikely, but possible. Tell the caller not to retry now. */
513 		pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
514 		return -EBUSY;
515 	}
516 
517 	/*
518 	 * activate/"deflate" the old page. We ignore any errors just like the
519 	 * other callers.
520 	 */
521 	plpar_page_set_active(page);
522 	return 0;
523 }
524 #else /* CONFIG_BALLOON_MIGRATION */
525 int cmm_migratepage(struct balloon_dev_info *b_dev_info, struct page *newpage,
526 		    struct page *page, enum migrate_mode mode);
527 #endif /* CONFIG_BALLOON_MIGRATION */
528 
529 /**
530  * cmm_init - Module initialization
531  *
532  * Return value:
533  * 	0 on success / other on failure
534  **/
535 static int cmm_init(void)
536 {
537 	int rc;
538 
539 	if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
540 		return -EOPNOTSUPP;
541 
542 	balloon_devinfo_init(&b_dev_info);
543 	b_dev_info.adjust_managed_page_count = true;
544 	if (IS_ENABLED(CONFIG_BALLOON_MIGRATION))
545 		b_dev_info.migratepage = cmm_migratepage;
546 
547 	rc = register_oom_notifier(&cmm_oom_nb);
548 	if (rc < 0)
549 		return rc;
550 
551 	if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
552 		goto out_oom_notifier;
553 
554 	if ((rc = cmm_sysfs_register(&cmm_dev)))
555 		goto out_reboot_notifier;
556 
557 	rc = register_memory_notifier(&cmm_mem_nb);
558 	if (rc)
559 		goto out_unregister_notifier;
560 
561 	if (cmm_disabled)
562 		return 0;
563 
564 	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
565 	if (IS_ERR(cmm_thread_ptr)) {
566 		rc = PTR_ERR(cmm_thread_ptr);
567 		goto out_unregister_notifier;
568 	}
569 
570 	return 0;
571 out_unregister_notifier:
572 	unregister_memory_notifier(&cmm_mem_nb);
573 	cmm_unregister_sysfs(&cmm_dev);
574 out_reboot_notifier:
575 	unregister_reboot_notifier(&cmm_reboot_nb);
576 out_oom_notifier:
577 	unregister_oom_notifier(&cmm_oom_nb);
578 	return rc;
579 }
580 
581 /**
582  * cmm_exit - Module exit
583  *
584  * Return value:
585  * 	nothing
586  **/
587 static void cmm_exit(void)
588 {
589 	if (cmm_thread_ptr)
590 		kthread_stop(cmm_thread_ptr);
591 	unregister_oom_notifier(&cmm_oom_nb);
592 	unregister_reboot_notifier(&cmm_reboot_nb);
593 	unregister_memory_notifier(&cmm_mem_nb);
594 	cmm_free_pages(atomic_long_read(&loaned_pages));
595 	cmm_unregister_sysfs(&cmm_dev);
596 }
597 
598 /**
599  * cmm_set_disable - Disable/Enable CMM
600  *
601  * Return value:
602  * 	0 on success / other on failure
603  **/
604 static int cmm_set_disable(const char *val, const struct kernel_param *kp)
605 {
606 	int disable = simple_strtoul(val, NULL, 10);
607 
608 	if (disable != 0 && disable != 1)
609 		return -EINVAL;
610 
611 	if (disable && !cmm_disabled) {
612 		if (cmm_thread_ptr)
613 			kthread_stop(cmm_thread_ptr);
614 		cmm_thread_ptr = NULL;
615 		cmm_free_pages(atomic_long_read(&loaned_pages));
616 	} else if (!disable && cmm_disabled) {
617 		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
618 		if (IS_ERR(cmm_thread_ptr))
619 			return PTR_ERR(cmm_thread_ptr);
620 	}
621 
622 	cmm_disabled = disable;
623 	return 0;
624 }
625 
626 module_param_call(disable, cmm_set_disable, param_get_uint,
627 		  &cmm_disabled, 0644);
628 MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
629 		 "[Default=" __stringify(CMM_DISABLE) "]");
630 
631 module_init(cmm_init);
632 module_exit(cmm_exit);
633