xref: /linux/arch/powerpc/platforms/pseries/cmm.c (revision cd8e95d80bc29b3c72288bd31e845b11755ef6a5)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Collaborative memory management interface.
4  *
5  * Copyright (C) 2008 IBM Corporation
6  * Author(s): Brian King (brking@linux.vnet.ibm.com),
7  */
8 
9 #include <linux/ctype.h>
10 #include <linux/delay.h>
11 #include <linux/errno.h>
12 #include <linux/fs.h>
13 #include <linux/gfp.h>
14 #include <linux/kthread.h>
15 #include <linux/module.h>
16 #include <linux/oom.h>
17 #include <linux/reboot.h>
18 #include <linux/sched.h>
19 #include <linux/stringify.h>
20 #include <linux/swap.h>
21 #include <linux/device.h>
22 #include <linux/balloon.h>
23 #include <asm/firmware.h>
24 #include <asm/hvcall.h>
25 #include <asm/mmu.h>
26 #include <linux/uaccess.h>
27 #include <linux/memory.h>
28 #include <asm/plpar_wrappers.h>
29 
30 #include "pseries.h"
31 
32 #define CMM_DRIVER_VERSION	"1.0.0"
33 #define CMM_DEFAULT_DELAY	1
34 #define CMM_HOTPLUG_DELAY	5
35 #define CMM_DEBUG			0
36 #define CMM_DISABLE		0
37 #define CMM_OOM_KB		1024
38 #define CMM_MIN_MEM_MB		256
39 #define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
40 #define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
41 
42 #define CMM_MEM_HOTPLUG_PRI	1
43 
44 static unsigned int delay = CMM_DEFAULT_DELAY;
45 static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
46 static unsigned int oom_kb = CMM_OOM_KB;
47 static unsigned int cmm_debug = CMM_DEBUG;
48 static unsigned int cmm_disabled = CMM_DISABLE;
49 static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
50 static bool __read_mostly simulate;
51 static unsigned long simulate_loan_target_kb;
52 static struct device cmm_dev;
53 
54 MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
55 MODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
56 MODULE_LICENSE("GPL");
57 MODULE_VERSION(CMM_DRIVER_VERSION);
58 
59 module_param_named(delay, delay, uint, 0644);
60 MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
61 		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
62 module_param_named(hotplug_delay, hotplug_delay, uint, 0644);
63 MODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
64 		 "before loaning resumes. "
65 		 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
66 module_param_named(oom_kb, oom_kb, uint, 0644);
67 MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
68 		 "[Default=" __stringify(CMM_OOM_KB) "]");
69 module_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
70 MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
71 		 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
72 module_param_named(debug, cmm_debug, uint, 0644);
73 MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
74 		 "[Default=" __stringify(CMM_DEBUG) "]");
75 module_param_named(simulate, simulate, bool, 0444);
76 MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
77 
78 #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
79 
80 static atomic_long_t loaned_pages;
81 static unsigned long loaned_pages_target;
82 static unsigned long oom_freed_pages;
83 
84 static DEFINE_MUTEX(hotplug_mutex);
85 static int hotplug_occurred; /* protected by the hotplug mutex */
86 
87 static struct task_struct *cmm_thread_ptr;
88 static struct balloon_dev_info b_dev_info;
89 
90 static long plpar_page_set_loaned(struct page *page)
91 {
92 	const unsigned long vpa = page_to_phys(page);
93 	unsigned long cmo_page_sz = cmo_get_page_size();
94 	long rc = 0;
95 	int i;
96 
97 	if (unlikely(simulate))
98 		return 0;
99 
100 	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
101 		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
102 
103 	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
104 		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
105 				   vpa + i - cmo_page_sz, 0);
106 
107 	return rc;
108 }
109 
110 static long plpar_page_set_active(struct page *page)
111 {
112 	const unsigned long vpa = page_to_phys(page);
113 	unsigned long cmo_page_sz = cmo_get_page_size();
114 	long rc = 0;
115 	int i;
116 
117 	if (unlikely(simulate))
118 		return 0;
119 
120 	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
121 		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
122 
123 	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
124 		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
125 				   vpa + i - cmo_page_sz, 0);
126 
127 	return rc;
128 }
129 
130 /**
131  * cmm_alloc_pages - Allocate pages and mark them as loaned
132  * @nr:	number of pages to allocate
133  *
134  * Return value:
135  * 	number of pages requested to be allocated which were not
136  **/
137 static long cmm_alloc_pages(long nr)
138 {
139 	struct page *page;
140 	long rc;
141 
142 	cmm_dbg("Begin request for %ld pages\n", nr);
143 
144 	while (nr) {
145 		/* Exit if a hotplug operation is in progress or occurred */
146 		if (mutex_trylock(&hotplug_mutex)) {
147 			if (hotplug_occurred) {
148 				mutex_unlock(&hotplug_mutex);
149 				break;
150 			}
151 			mutex_unlock(&hotplug_mutex);
152 		} else {
153 			break;
154 		}
155 
156 		page = balloon_page_alloc();
157 		if (!page)
158 			break;
159 		rc = plpar_page_set_loaned(page);
160 		if (rc) {
161 			pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
162 			__free_page(page);
163 			break;
164 		}
165 
166 		balloon_page_enqueue(&b_dev_info, page);
167 		atomic_long_inc(&loaned_pages);
168 		nr--;
169 	}
170 
171 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
172 	return nr;
173 }
174 
175 /**
176  * cmm_free_pages - Free pages and mark them as active
177  * @nr:	number of pages to free
178  *
179  * Return value:
180  * 	number of pages requested to be freed which were not
181  **/
182 static long cmm_free_pages(long nr)
183 {
184 	struct page *page;
185 
186 	cmm_dbg("Begin free of %ld pages.\n", nr);
187 	while (nr) {
188 		page = balloon_page_dequeue(&b_dev_info);
189 		if (!page)
190 			break;
191 		plpar_page_set_active(page);
192 		__free_page(page);
193 		atomic_long_dec(&loaned_pages);
194 		nr--;
195 	}
196 	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
197 	return nr;
198 }
199 
200 /**
201  * cmm_oom_notify - OOM notifier
202  * @self:	notifier block struct
203  * @dummy:	not used
204  * @parm:	returned - number of pages freed
205  *
206  * Return value:
207  * 	NOTIFY_OK
208  **/
209 static int cmm_oom_notify(struct notifier_block *self,
210 			  unsigned long dummy, void *parm)
211 {
212 	unsigned long *freed = parm;
213 	long nr = KB2PAGES(oom_kb);
214 
215 	cmm_dbg("OOM processing started\n");
216 	nr = cmm_free_pages(nr);
217 	loaned_pages_target = atomic_long_read(&loaned_pages);
218 	*freed += KB2PAGES(oom_kb) - nr;
219 	oom_freed_pages += KB2PAGES(oom_kb) - nr;
220 	cmm_dbg("OOM processing complete\n");
221 	return NOTIFY_OK;
222 }
223 
224 /**
225  * cmm_get_mpp - Read memory performance parameters
226  *
227  * Makes hcall to query the current page loan request from the hypervisor.
228  *
229  * Return value:
230  * 	nothing
231  **/
232 static void cmm_get_mpp(void)
233 {
234 	const long __loaned_pages = atomic_long_read(&loaned_pages);
235 	const long total_pages = totalram_pages() + __loaned_pages;
236 	int rc;
237 	struct hvcall_mpp_data mpp_data;
238 	signed long active_pages_target, page_loan_request, target;
239 	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
240 
241 	if (likely(!simulate)) {
242 		rc = h_get_mpp(&mpp_data);
243 		if (rc != H_SUCCESS)
244 			return;
245 		page_loan_request = div_s64((s64)mpp_data.loan_request,
246 					    PAGE_SIZE);
247 		target = page_loan_request + __loaned_pages;
248 	} else {
249 		target = KB2PAGES(simulate_loan_target_kb);
250 		page_loan_request = target - __loaned_pages;
251 	}
252 
253 	if (target < 0 || total_pages < min_mem_pages)
254 		target = 0;
255 
256 	if (target > oom_freed_pages)
257 		target -= oom_freed_pages;
258 	else
259 		target = 0;
260 
261 	active_pages_target = total_pages - target;
262 
263 	if (min_mem_pages > active_pages_target)
264 		target = total_pages - min_mem_pages;
265 
266 	if (target < 0)
267 		target = 0;
268 
269 	loaned_pages_target = target;
270 
271 	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
272 		page_loan_request, __loaned_pages, loaned_pages_target,
273 		oom_freed_pages, totalram_pages());
274 }
275 
276 static struct notifier_block cmm_oom_nb = {
277 	.notifier_call = cmm_oom_notify
278 };
279 
280 /**
281  * cmm_thread - CMM task thread
282  * @dummy:	not used
283  *
284  * Return value:
285  * 	0
286  **/
287 static int cmm_thread(void *dummy)
288 {
289 	unsigned long timeleft;
290 	long __loaned_pages;
291 
292 	while (1) {
293 		timeleft = msleep_interruptible(delay * 1000);
294 
295 		if (kthread_should_stop() || timeleft)
296 			break;
297 
298 		if (mutex_trylock(&hotplug_mutex)) {
299 			if (hotplug_occurred) {
300 				hotplug_occurred = 0;
301 				mutex_unlock(&hotplug_mutex);
302 				cmm_dbg("Hotplug operation has occurred, "
303 						"loaning activity suspended "
304 						"for %d seconds.\n",
305 						hotplug_delay);
306 				timeleft = msleep_interruptible(hotplug_delay *
307 						1000);
308 				if (kthread_should_stop() || timeleft)
309 					break;
310 				continue;
311 			}
312 			mutex_unlock(&hotplug_mutex);
313 		} else {
314 			cmm_dbg("Hotplug operation in progress, activity "
315 					"suspended\n");
316 			continue;
317 		}
318 
319 		cmm_get_mpp();
320 
321 		__loaned_pages = atomic_long_read(&loaned_pages);
322 		if (loaned_pages_target > __loaned_pages) {
323 			if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
324 				loaned_pages_target = __loaned_pages;
325 		} else if (loaned_pages_target < __loaned_pages)
326 			cmm_free_pages(__loaned_pages - loaned_pages_target);
327 	}
328 	return 0;
329 }
330 
331 #define CMM_SHOW(name, format, args...)			\
332 	static ssize_t show_##name(struct device *dev,	\
333 				   struct device_attribute *attr,	\
334 				   char *buf)			\
335 	{							\
336 		return sprintf(buf, format, ##args);		\
337 	}							\
338 	static DEVICE_ATTR(name, 0444, show_##name, NULL)
339 
340 CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
341 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
342 
343 static ssize_t show_oom_pages(struct device *dev,
344 			      struct device_attribute *attr, char *buf)
345 {
346 	return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
347 }
348 
349 static ssize_t store_oom_pages(struct device *dev,
350 			       struct device_attribute *attr,
351 			       const char *buf, size_t count)
352 {
353 	unsigned long val = simple_strtoul (buf, NULL, 10);
354 
355 	if (!capable(CAP_SYS_ADMIN))
356 		return -EPERM;
357 	if (val != 0)
358 		return -EBADMSG;
359 
360 	oom_freed_pages = 0;
361 	return count;
362 }
363 
364 static DEVICE_ATTR(oom_freed_kb, 0644,
365 		   show_oom_pages, store_oom_pages);
366 
367 static struct device_attribute *cmm_attrs[] = {
368 	&dev_attr_loaned_kb,
369 	&dev_attr_loaned_target_kb,
370 	&dev_attr_oom_freed_kb,
371 };
372 
373 static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
374 			 simulate_loan_target_kb);
375 
376 static const struct bus_type cmm_subsys = {
377 	.name = "cmm",
378 	.dev_name = "cmm",
379 };
380 
381 static void cmm_release_device(struct device *dev)
382 {
383 }
384 
385 /**
386  * cmm_sysfs_register - Register with sysfs
387  *
388  * Return value:
389  * 	0 on success / other on failure
390  **/
391 static int cmm_sysfs_register(struct device *dev)
392 {
393 	int i, rc;
394 
395 	if ((rc = subsys_system_register(&cmm_subsys, NULL)))
396 		return rc;
397 
398 	dev->id = 0;
399 	dev->bus = &cmm_subsys;
400 	dev->release = cmm_release_device;
401 
402 	if ((rc = device_register(dev)))
403 		goto subsys_unregister;
404 
405 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
406 		if ((rc = device_create_file(dev, cmm_attrs[i])))
407 			goto fail;
408 	}
409 
410 	if (!simulate)
411 		return 0;
412 	rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
413 	if (rc)
414 		goto fail;
415 	return 0;
416 
417 fail:
418 	while (--i >= 0)
419 		device_remove_file(dev, cmm_attrs[i]);
420 	device_unregister(dev);
421 subsys_unregister:
422 	bus_unregister(&cmm_subsys);
423 	return rc;
424 }
425 
426 /**
427  * cmm_unregister_sysfs - Unregister from sysfs
428  *
429  **/
430 static void cmm_unregister_sysfs(struct device *dev)
431 {
432 	int i;
433 
434 	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
435 		device_remove_file(dev, cmm_attrs[i]);
436 	device_unregister(dev);
437 	bus_unregister(&cmm_subsys);
438 }
439 
440 /**
441  * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
442  *
443  **/
444 static int cmm_reboot_notifier(struct notifier_block *nb,
445 			       unsigned long action, void *unused)
446 {
447 	if (action == SYS_RESTART) {
448 		if (cmm_thread_ptr)
449 			kthread_stop(cmm_thread_ptr);
450 		cmm_thread_ptr = NULL;
451 		cmm_free_pages(atomic_long_read(&loaned_pages));
452 	}
453 	return NOTIFY_DONE;
454 }
455 
456 static struct notifier_block cmm_reboot_nb = {
457 	.notifier_call = cmm_reboot_notifier,
458 };
459 
460 /**
461  * cmm_memory_cb - Handle memory hotplug notifier calls
462  * @self:	notifier block struct
463  * @action:	action to take
464  * @arg:	struct memory_notify data for handler
465  *
466  * Return value:
467  *	NOTIFY_OK or notifier error based on subfunction return value
468  *
469  **/
470 static int cmm_memory_cb(struct notifier_block *self,
471 			unsigned long action, void *arg)
472 {
473 	switch (action) {
474 	case MEM_GOING_OFFLINE:
475 		mutex_lock(&hotplug_mutex);
476 		hotplug_occurred = 1;
477 		break;
478 	case MEM_OFFLINE:
479 	case MEM_CANCEL_OFFLINE:
480 		mutex_unlock(&hotplug_mutex);
481 		cmm_dbg("Memory offline operation complete.\n");
482 		break;
483 	case MEM_GOING_ONLINE:
484 	case MEM_ONLINE:
485 	case MEM_CANCEL_ONLINE:
486 		break;
487 	}
488 
489 	return NOTIFY_OK;
490 }
491 
492 static struct notifier_block cmm_mem_nb = {
493 	.notifier_call = cmm_memory_cb,
494 	.priority = CMM_MEM_HOTPLUG_PRI
495 };
496 
497 #ifdef CONFIG_BALLOON_MIGRATION
498 static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
499 			   struct page *newpage, struct page *page,
500 			   enum migrate_mode mode)
501 {
502 	/*
503 	 * loan/"inflate" the newpage first.
504 	 *
505 	 * We might race against the cmm_thread who might discover after our
506 	 * loan request that another page is to be unloaned. However, once
507 	 * the cmm_thread runs again later, this error will automatically
508 	 * be corrected.
509 	 */
510 	if (plpar_page_set_loaned(newpage)) {
511 		/* Unlikely, but possible. Tell the caller not to retry now. */
512 		pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
513 		return -EBUSY;
514 	}
515 
516 	/*
517 	 * activate/"deflate" the old page. We ignore any errors just like the
518 	 * other callers.
519 	 */
520 	plpar_page_set_active(page);
521 	return 0;
522 }
523 #else /* CONFIG_BALLOON_MIGRATION */
524 int cmm_migratepage(struct balloon_dev_info *b_dev_info, struct page *newpage,
525 		    struct page *page, enum migrate_mode mode);
526 #endif /* CONFIG_BALLOON_MIGRATION */
527 
528 /**
529  * cmm_init - Module initialization
530  *
531  * Return value:
532  * 	0 on success / other on failure
533  **/
534 static int cmm_init(void)
535 {
536 	int rc;
537 
538 	if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
539 		return -EOPNOTSUPP;
540 
541 	balloon_devinfo_init(&b_dev_info);
542 	b_dev_info.adjust_managed_page_count = true;
543 	if (IS_ENABLED(CONFIG_BALLOON_MIGRATION))
544 		b_dev_info.migratepage = cmm_migratepage;
545 
546 	rc = register_oom_notifier(&cmm_oom_nb);
547 	if (rc < 0)
548 		return rc;
549 
550 	if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
551 		goto out_oom_notifier;
552 
553 	if ((rc = cmm_sysfs_register(&cmm_dev)))
554 		goto out_reboot_notifier;
555 
556 	rc = register_memory_notifier(&cmm_mem_nb);
557 	if (rc)
558 		goto out_unregister_notifier;
559 
560 	if (cmm_disabled)
561 		return 0;
562 
563 	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
564 	if (IS_ERR(cmm_thread_ptr)) {
565 		rc = PTR_ERR(cmm_thread_ptr);
566 		goto out_unregister_notifier;
567 	}
568 
569 	return 0;
570 out_unregister_notifier:
571 	unregister_memory_notifier(&cmm_mem_nb);
572 	cmm_unregister_sysfs(&cmm_dev);
573 out_reboot_notifier:
574 	unregister_reboot_notifier(&cmm_reboot_nb);
575 out_oom_notifier:
576 	unregister_oom_notifier(&cmm_oom_nb);
577 	return rc;
578 }
579 
580 /**
581  * cmm_exit - Module exit
582  *
583  * Return value:
584  * 	nothing
585  **/
586 static void cmm_exit(void)
587 {
588 	if (cmm_thread_ptr)
589 		kthread_stop(cmm_thread_ptr);
590 	unregister_oom_notifier(&cmm_oom_nb);
591 	unregister_reboot_notifier(&cmm_reboot_nb);
592 	unregister_memory_notifier(&cmm_mem_nb);
593 	cmm_free_pages(atomic_long_read(&loaned_pages));
594 	cmm_unregister_sysfs(&cmm_dev);
595 }
596 
597 /**
598  * cmm_set_disable - Disable/Enable CMM
599  *
600  * Return value:
601  * 	0 on success / other on failure
602  **/
603 static int cmm_set_disable(const char *val, const struct kernel_param *kp)
604 {
605 	int disable = simple_strtoul(val, NULL, 10);
606 
607 	if (disable != 0 && disable != 1)
608 		return -EINVAL;
609 
610 	if (disable && !cmm_disabled) {
611 		if (cmm_thread_ptr)
612 			kthread_stop(cmm_thread_ptr);
613 		cmm_thread_ptr = NULL;
614 		cmm_free_pages(atomic_long_read(&loaned_pages));
615 	} else if (!disable && cmm_disabled) {
616 		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
617 		if (IS_ERR(cmm_thread_ptr))
618 			return PTR_ERR(cmm_thread_ptr);
619 	}
620 
621 	cmm_disabled = disable;
622 	return 0;
623 }
624 
625 module_param_call(disable, cmm_set_disable, param_get_uint,
626 		  &cmm_disabled, 0644);
627 MODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
628 		 "[Default=" __stringify(CMM_DISABLE) "]");
629 
630 module_init(cmm_init);
631 module_exit(cmm_exit);
632