xref: /freebsd/sys/dev/xen/balloon/balloon.c (revision 89e0f4d24c4a430a0893930e4400ff6a63e63864)
1 /******************************************************************************
2  * balloon.c
3  *
4  * Xen balloon driver - enables returning/claiming memory to/from Xen.
5  *
6  * Copyright (c) 2003, B Dragovic
7  * Copyright (c) 2003-2004, M Williamson, K Fraser
8  * Copyright (c) 2005 Dan M. Smith, IBM Corporation
9  *
10  * This file may be distributed separately from the Linux kernel, or
11  * incorporated into other software packages, subject to the following license:
12  *
13  * Permission is hereby granted, free of charge, to any person obtaining a copy
14  * of this source file (the "Software"), to deal in the Software without
15  * restriction, including without limitation the rights to use, copy, modify,
16  * merge, publish, distribute, sublicense, and/or sell copies of the Software,
17  * and to permit persons to whom the Software is furnished to do so, subject to
18  * the following conditions:
19  *
20  * The above copyright notice and this permission notice shall be included in
21  * all copies or substantial portions of the Software.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29  * IN THE SOFTWARE.
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/lock.h>
37 #include <sys/mutex.h>
38 
39 #include <machine/hypervisor-ifs.h>
40 #include <machine/xen-os.h>
41 #include <machine/xenbus.h>
42 
43 /*
44  * Protects atomic reservation decrease/increase against concurrent increases.
45  * Also protects non-atomic updates of current_pages and driver_pages, and
46  * balloon lists.
47  */
48 struct mtx balloon_lock;
49 #ifdef notyet
50 
51 /* We aim for 'current allocation' == 'target allocation'. */
52 static unsigned long current_pages;
53 static unsigned long target_pages;
54 
55 /* VM /proc information for memory */
56 extern unsigned long totalram_pages;
57 
58 /* We may hit the hard limit in Xen. If we do then we remember it. */
59 static unsigned long hard_limit;
60 
61 /*
62  * Drivers may alter the memory reservation independently, but they must
63  * inform the balloon driver so that we can avoid hitting the hard limit.
64  */
65 static unsigned long driver_pages;
66 
67 struct balloon_entry {
68 	vm_page_t page;
69 	STAILQ_ENTRY(balloon_entry) list;
70 };
71 
72 /* List of ballooned pages, threaded through the mem_map array. */
73 static STAILQ_HEAD(,balloon_entry) ballooned_pages;
74 
75 static unsigned long balloon_low, balloon_high;
76 
77 
78 /* Main work function, always executed in process context. */
79 static void balloon_process(void *unused);
80 
81 #define IPRINTK(fmt, args...) \
82 	printk(KERN_INFO "xen_mem: " fmt, ##args)
83 #define WPRINTK(fmt, args...) \
84 	printk(KERN_WARNING "xen_mem: " fmt, ##args)
85 
86 /* balloon_append: add the given page to the balloon. */
87 static void
88 balloon_append(vm_page_t page)
89 {
90 	struct balloon_entry *entry;
91 
92 	entry = malloc(sizeof(struct balloon_entry), M_WAITOK);
93 
94 	STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
95 	balloon_low++;
96 }
97 
98 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
99 static vm_page_t
100 balloon_retrieve(void)
101 {
102 	vm_page_t page;
103 	struct balloon_entry *entry;
104 
105 	if (STAILQ_EMPTY(&ballooned_pages))
106 		return NULL;
107 
108 	entry = STAILQ_FIRST(&ballooned_pages);
109 	STAILQ_REMOVE_HEAD(&ballooned_pages, list);
110 
111 	page = entry->page;
112 	free(entry, M_DEVBUF);
113 
114 	balloon_low--;
115 
116 	return page;
117 }
118 
119 static void
120 balloon_alarm(unsigned long unused)
121 {
122 	wakeup(balloon_process);
123 }
124 
125 static unsigned long
126 current_target(void)
127 {
128 	unsigned long target = min(target_pages, hard_limit);
129 	if (target > (current_pages + balloon_low + balloon_high))
130 		target = current_pages + balloon_low + balloon_high;
131 	return target;
132 }
133 
134 static int
135 increase_reservation(unsigned long nr_pages)
136 {
137 	unsigned long *mfn_list, pfn, i, flags;
138 	struct page   *page;
139 	long           rc;
140 	struct xen_memory_reservation reservation = {
141 		.address_bits = 0,
142 		.extent_order = 0,
143 		.domid        = DOMID_SELF
144 	};
145 
146 	if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
147 		nr_pages = PAGE_SIZE / sizeof(unsigned long);
148 
149 	mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
150 	if (mfn_list == NULL)
151 		return ENOMEM;
152 
153 
154 	reservation.extent_start = mfn_list;
155 	reservation.nr_extents   = nr_pages;
156 	rc = HYPERVISOR_memory_op(
157 		XENMEM_increase_reservation, &reservation);
158 	if (rc < nr_pages) {
159 		int ret;
160 		/* We hit the Xen hard limit: reprobe. */
161 		reservation.extent_start = mfn_list;
162 		reservation.nr_extents   = rc;
163 		ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
164 				&reservation);
165 		PANIC_IF(ret != rc);
166 		hard_limit = current_pages + rc - driver_pages;
167 		goto out;
168 	}
169 
170 	for (i = 0; i < nr_pages; i++) {
171 		page = balloon_retrieve();
172 		PANIC_IF(page == NULL);
173 
174 		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
175 		PANIC_IF(phys_to_machine_mapping_valid(pfn));
176 
177 		/* Update P->M and M->P tables. */
178 		PFNTOMFN(pfn) = mfn_list[i];
179 		xen_machphys_update(mfn_list[i], pfn);
180 
181 		/* Relinquish the page back to the allocator. */
182 		ClearPageReserved(page);
183 		set_page_count(page, 1);
184 		vm_page_free(page);
185 	}
186 
187 	current_pages += nr_pages;
188 	totalram_pages = current_pages;
189 
190  out:
191 	balloon_unlock(flags);
192 
193 	free((mfn_list);
194 
195 	return 0;
196 }
197 
198 static int
199 decrease_reservation(unsigned long nr_pages)
200 {
201 	unsigned long *mfn_list, pfn, i, flags;
202 	struct page   *page;
203 	void          *v;
204 	int            need_sleep = 0;
205 	int ret;
206 	struct xen_memory_reservation reservation = {
207 		.address_bits = 0,
208 		.extent_order = 0,
209 		.domid        = DOMID_SELF
210 	};
211 
212 	if (nr_pages > (PAGE_SIZE / sizeof(unsigned long)))
213 		nr_pages = PAGE_SIZE / sizeof(unsigned long);
214 
215 	mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
216 	if (mfn_list == NULL)
217 		return ENOMEM;
218 
219 	for (i = 0; i < nr_pages; i++) {
220 		int color = 0;
221 		if ((page = vm_page_alloc(NULL, color++,
222 					  VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
223 					  VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
224 			nr_pages = i;
225 			need_sleep = 1;
226 			break;
227 		}
228 		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
229 		mfn_list[i] = PFNTOMFN(pfn);
230 	}
231 
232 	balloon_lock(flags);
233 
234 	/* No more mappings: invalidate P2M and add to balloon. */
235 	for (i = 0; i < nr_pages; i++) {
236 		pfn = MFNTOPFN(mfn_list[i]);
237 		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
238 		balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
239 	}
240 
241 	reservation.extent_start = mfn_list;
242 	reservation.nr_extents   = nr_pages;
243 	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
244 	PANIC_IF(ret != nr_pages);
245 
246 	current_pages -= nr_pages;
247 	totalram_pages = current_pages;
248 
249 	balloon_unlock(flags);
250 
251 	free(mfn_list, M_DEVBUF);
252 
253 	return need_sleep;
254 }
255 
256 /*
257  * We avoid multiple worker processes conflicting via the balloon mutex.
258  * We may of course race updates of the target counts (which are protected
259  * by the balloon lock), or with changes to the Xen hard limit, but we will
260  * recover from these in time.
261  */
262 static void
263 balloon_process(void *unused)
264 {
265 	int need_sleep = 0;
266 	long credit;
267 
268 	for (;;) {
269 		do {
270 			credit = current_target() - current_pages;
271 			if (credit > 0)
272 				need_sleep = (increase_reservation(credit) != 0);
273 			if (credit < 0)
274 				need_sleep = (decrease_reservation(-credit) != 0);
275 
276 #ifndef CONFIG_PREEMPT
277 			if (need_resched())
278 				schedule();
279 #endif
280 		} while ((credit != 0) && !need_sleep);
281 
282 		/* Schedule more work if there is some still to be done. */
283 		if (current_target() != current_pages)
284 			timeout(balloon_alarm, NULL, ticks + HZ);
285 
286 			msleep(balloon_process, balloon_lock, 0, "balloon", -1);
287 	}
288 
289 }
290 
291 /* Resets the Xen limit, sets new target, and kicks off processing. */
292 static void
293 set_new_target(unsigned long target)
294 {
295 	/* No need for lock. Not read-modify-write updates. */
296 	hard_limit   = ~0UL;
297 	target_pages = target;
298 	wakeup(balloon_process);
299 }
300 
301 static struct xenbus_watch target_watch =
302 {
303 	.node = "memory/target"
304 };
305 
306 /* React to a change in the target key */
307 static void
308 watch_target(struct xenbus_watch *watch,
309 	     const char **vec, unsigned int len)
310 {
311 	unsigned long long new_target;
312 	int err;
313 
314 	err = xenbus_scanf(NULL, "memory", "target", "%llu", &new_target);
315 	if (err != 1) {
316 		/* This is ok (for domain0 at least) - so just return */
317 		return;
318 	}
319 
320 	/* The given memory/target value is in KiB, so it needs converting to
321 	   pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
322 	*/
323 	set_new_target(new_target >> (PAGE_SHIFT - 10));
324 
325 }
326 
327 static void
328 balloon_init_watcher(void *)
329 {
330 	int err;
331 
332 	err = register_xenbus_watch(&target_watch);
333 	if (err)
334 		printf("Failed to set balloon watcher\n");
335 
336 }
337 
338 static void
339 balloon_init(void *)
340 {
341 	unsigned long pfn;
342 	struct page *page;
343 
344 	IPRINTK("Initialising balloon driver.\n");
345 
346 	if (xen_init() < 0)
347 		return -1;
348 
349 	current_pages = min(xen_start_info->nr_pages, max_pfn);
350 	target_pages  = current_pages;
351 	balloon_low   = 0;
352 	balloon_high  = 0;
353 	driver_pages  = 0UL;
354 	hard_limit    = ~0UL;
355 
356 	init_timer(&balloon_timer);
357 	balloon_timer.data = 0;
358 	balloon_timer.function = balloon_alarm;
359 
360 	/* Initialise the balloon with excess memory space. */
361 	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
362 		page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
363 		balloon_append(page);
364 	}
365 
366 	target_watch.callback = watch_target;
367 
368 	return 0;
369 }
370 
371 void
372 balloon_update_driver_allowance(long delta)
373 {
374 	unsigned long flags;
375 
376 	balloon_lock(flags);
377 	driver_pages += delta;
378 	balloon_unlock(flags);
379 }
380 
381 #if 0
382 static int dealloc_pte_fn(
383 	pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
384 {
385 	unsigned long mfn = pte_mfn(*pte);
386 	int ret;
387 	struct xen_memory_reservation reservation = {
388 		.extent_start = &mfn,
389 		.nr_extents   = 1,
390 		.extent_order = 0,
391 		.domid        = DOMID_SELF
392 	};
393 	set_pte_at(&init_mm, addr, pte, __pte_ma(0));
394 	set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
395 	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
396 	PANIC_IF(ret != 1);
397 	return 0;
398 }
399 
400 #endif
401 vm_page_t
402 balloon_alloc_empty_page_range(unsigned long nr_pages)
403 {
404 	unsigned long flags;
405 	vm_page_t pages;
406 	int i;
407 	unsigned long *mfn_list;
408 	struct xen_memory_reservation reservation = {
409 		.address_bits = 0,
410 		.extent_order = 0,
411 		.domid        = DOMID_SELF
412 	};
413 
414 	pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4)
415 	if (pages == NULL)
416 		return NULL;
417 
418 	mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK);
419 
420 	for (i = 0; i < nr_pages; i++) {
421 		mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT);
422 		PFNTOMFN(i) = INVALID_P2M_ENTRY;
423 		reservation.extent_start = mfn_list;
424 		reservation.nr_extents = nr_pages;
425 		PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != nr_pages);
426 	}
427 
428 	current_pages -= nr_pages;
429 
430 	wakeup(balloon_process);
431 
432 	return pages;
433 }
434 
435 void
436 balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
437 {
438 	unsigned long i, flags;
439 
440 	for (i = 0; i < nr_pages; i++)
441 		balloon_append(page + i);
442 
443 	wakeup(balloon_process);
444 }
445 
446 #endif
447