189e0f4d2SKip Macy /****************************************************************************** 289e0f4d2SKip Macy * balloon.c 389e0f4d2SKip Macy * 489e0f4d2SKip Macy * Xen balloon driver - enables returning/claiming memory to/from Xen. 589e0f4d2SKip Macy * 689e0f4d2SKip Macy * Copyright (c) 2003, B Dragovic 789e0f4d2SKip Macy * Copyright (c) 2003-2004, M Williamson, K Fraser 889e0f4d2SKip Macy * Copyright (c) 2005 Dan M. Smith, IBM Corporation 989e0f4d2SKip Macy * 1089e0f4d2SKip Macy * This file may be distributed separately from the Linux kernel, or 1189e0f4d2SKip Macy * incorporated into other software packages, subject to the following license: 1289e0f4d2SKip Macy * 1389e0f4d2SKip Macy * Permission is hereby granted, free of charge, to any person obtaining a copy 1489e0f4d2SKip Macy * of this source file (the "Software"), to deal in the Software without 1589e0f4d2SKip Macy * restriction, including without limitation the rights to use, copy, modify, 1689e0f4d2SKip Macy * merge, publish, distribute, sublicense, and/or sell copies of the Software, 1789e0f4d2SKip Macy * and to permit persons to whom the Software is furnished to do so, subject to 1889e0f4d2SKip Macy * the following conditions: 1989e0f4d2SKip Macy * 2089e0f4d2SKip Macy * The above copyright notice and this permission notice shall be included in 2189e0f4d2SKip Macy * all copies or substantial portions of the Software. 2289e0f4d2SKip Macy * 2389e0f4d2SKip Macy * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 2489e0f4d2SKip Macy * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 2589e0f4d2SKip Macy * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 2689e0f4d2SKip Macy * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2789e0f4d2SKip Macy * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2889e0f4d2SKip Macy * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2989e0f4d2SKip Macy * IN THE SOFTWARE. 3089e0f4d2SKip Macy */ 3189e0f4d2SKip Macy 3289e0f4d2SKip Macy #include <sys/cdefs.h> 3389e0f4d2SKip Macy __FBSDID("$FreeBSD$"); 3489e0f4d2SKip Macy 3589e0f4d2SKip Macy #include <sys/param.h> 3689e0f4d2SKip Macy #include <sys/lock.h> 3712678024SDoug Rabson #include <sys/kernel.h> 3812678024SDoug Rabson #include <sys/kthread.h> 3912678024SDoug Rabson #include <sys/malloc.h> 4089e0f4d2SKip Macy #include <sys/mutex.h> 4112678024SDoug Rabson #include <sys/sysctl.h> 4289e0f4d2SKip Macy 4312678024SDoug Rabson #include <machine/xen/xen-os.h> 4412678024SDoug Rabson #include <machine/xen/xenvar.h> 452913e88cSRobert Watson #include <machine/xen/xenfunc.h> 4612678024SDoug Rabson #include <xen/hypervisor.h> 47ff662b5cSJustin T. Gibbs #include <xen/xenstore/xenstorevar.h> 4812678024SDoug Rabson 4912678024SDoug Rabson #include <vm/vm.h> 5012678024SDoug Rabson #include <vm/vm_page.h> 5112678024SDoug Rabson 5212678024SDoug Rabson MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); 5312678024SDoug Rabson 5412678024SDoug Rabson struct mtx balloon_mutex; 5589e0f4d2SKip Macy 5689e0f4d2SKip Macy /* 5789e0f4d2SKip Macy * Protects atomic reservation decrease/increase against concurrent increases. 5889e0f4d2SKip Macy * Also protects non-atomic updates of current_pages and driver_pages, and 5989e0f4d2SKip Macy * balloon lists. 6089e0f4d2SKip Macy */ 6189e0f4d2SKip Macy struct mtx balloon_lock; 6289e0f4d2SKip Macy 6312678024SDoug Rabson /* We increase/decrease in batches which fit in a page */ 6412678024SDoug Rabson static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 6512678024SDoug Rabson #define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0])) 6612678024SDoug Rabson 6712678024SDoug Rabson struct balloon_stats { 6889e0f4d2SKip Macy /* We aim for 'current allocation' == 'target allocation'. */ 6912678024SDoug Rabson unsigned long current_pages; 7012678024SDoug Rabson unsigned long target_pages; 7189e0f4d2SKip Macy /* We may hit the hard limit in Xen. If we do then we remember it. */ 7212678024SDoug Rabson unsigned long hard_limit; 7389e0f4d2SKip Macy /* 7412678024SDoug Rabson * Drivers may alter the memory reservation independently, but they 7512678024SDoug Rabson * must inform the balloon driver so we avoid hitting the hard limit. 7689e0f4d2SKip Macy */ 7712678024SDoug Rabson unsigned long driver_pages; 7812678024SDoug Rabson /* Number of pages in high- and low-memory balloons. */ 7912678024SDoug Rabson unsigned long balloon_low; 8012678024SDoug Rabson unsigned long balloon_high; 8112678024SDoug Rabson }; 8212678024SDoug Rabson 8312678024SDoug Rabson static struct balloon_stats balloon_stats; 8412678024SDoug Rabson #define bs balloon_stats 8512678024SDoug Rabson 8612678024SDoug Rabson SYSCTL_DECL(_dev_xen); 8712678024SDoug Rabson SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon"); 8812678024SDoug Rabson SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, 8912678024SDoug Rabson &bs.current_pages, 0, "Current allocation"); 9012678024SDoug Rabson SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, 9112678024SDoug Rabson &bs.target_pages, 0, "Target allocation"); 9212678024SDoug Rabson SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, 9312678024SDoug Rabson &bs.driver_pages, 0, "Driver pages"); 9412678024SDoug Rabson SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, 9512678024SDoug Rabson &bs.hard_limit, 0, "Xen hard limit"); 9612678024SDoug Rabson SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, 9712678024SDoug Rabson &bs.balloon_low, 0, "Low-mem balloon"); 9812678024SDoug Rabson SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, 9912678024SDoug Rabson &bs.balloon_high, 0, "High-mem balloon"); 10089e0f4d2SKip Macy 10189e0f4d2SKip Macy struct balloon_entry { 10289e0f4d2SKip Macy vm_page_t page; 10389e0f4d2SKip Macy STAILQ_ENTRY(balloon_entry) list; 10489e0f4d2SKip Macy }; 10589e0f4d2SKip Macy 10689e0f4d2SKip Macy /* List of ballooned pages, threaded through the mem_map array. */ 10789e0f4d2SKip Macy static STAILQ_HEAD(,balloon_entry) ballooned_pages; 10889e0f4d2SKip Macy 10989e0f4d2SKip Macy /* Main work function, always executed in process context. */ 11089e0f4d2SKip Macy static void balloon_process(void *unused); 11189e0f4d2SKip Macy 11289e0f4d2SKip Macy #define IPRINTK(fmt, args...) \ 11389e0f4d2SKip Macy printk(KERN_INFO "xen_mem: " fmt, ##args) 11489e0f4d2SKip Macy #define WPRINTK(fmt, args...) \ 11589e0f4d2SKip Macy printk(KERN_WARNING "xen_mem: " fmt, ##args) 11689e0f4d2SKip Macy 11789e0f4d2SKip Macy /* balloon_append: add the given page to the balloon. */ 11889e0f4d2SKip Macy static void 11989e0f4d2SKip Macy balloon_append(vm_page_t page) 12089e0f4d2SKip Macy { 12189e0f4d2SKip Macy struct balloon_entry *entry; 12289e0f4d2SKip Macy 12312678024SDoug Rabson entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK); 12412678024SDoug Rabson entry->page = page; 12589e0f4d2SKip Macy STAILQ_INSERT_HEAD(&ballooned_pages, entry, list); 12612678024SDoug Rabson bs.balloon_low++; 12789e0f4d2SKip Macy } 12889e0f4d2SKip Macy 12989e0f4d2SKip Macy /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 13089e0f4d2SKip Macy static vm_page_t 13189e0f4d2SKip Macy balloon_retrieve(void) 13289e0f4d2SKip Macy { 13389e0f4d2SKip Macy vm_page_t page; 13489e0f4d2SKip Macy struct balloon_entry *entry; 13589e0f4d2SKip Macy 13689e0f4d2SKip Macy if (STAILQ_EMPTY(&ballooned_pages)) 13789e0f4d2SKip Macy return NULL; 13889e0f4d2SKip Macy 13989e0f4d2SKip Macy entry = STAILQ_FIRST(&ballooned_pages); 14089e0f4d2SKip Macy STAILQ_REMOVE_HEAD(&ballooned_pages, list); 14189e0f4d2SKip Macy 14289e0f4d2SKip Macy page = entry->page; 14389e0f4d2SKip Macy free(entry, M_DEVBUF); 14489e0f4d2SKip Macy 14512678024SDoug Rabson bs.balloon_low--; 14689e0f4d2SKip Macy 14789e0f4d2SKip Macy return page; 14889e0f4d2SKip Macy } 14989e0f4d2SKip Macy 15089e0f4d2SKip Macy static unsigned long 15189e0f4d2SKip Macy current_target(void) 15289e0f4d2SKip Macy { 15312678024SDoug Rabson unsigned long target = min(bs.target_pages, bs.hard_limit); 15412678024SDoug Rabson if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) 15512678024SDoug Rabson target = bs.current_pages + bs.balloon_low + bs.balloon_high; 15689e0f4d2SKip Macy return target; 15789e0f4d2SKip Macy } 15889e0f4d2SKip Macy 15912678024SDoug Rabson static unsigned long 16012678024SDoug Rabson minimum_target(void) 16112678024SDoug Rabson { 16212678024SDoug Rabson #ifdef XENHVM 16312678024SDoug Rabson #define max_pfn physmem 1643e33218dSDoug Rabson #else 1653e33218dSDoug Rabson #define max_pfn HYPERVISOR_shared_info->arch.max_pfn 16612678024SDoug Rabson #endif 16712678024SDoug Rabson unsigned long min_pages, curr_pages = current_target(); 16812678024SDoug Rabson 16912678024SDoug Rabson #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) 17012678024SDoug Rabson /* Simple continuous piecewiese linear function: 17112678024SDoug Rabson * max MiB -> min MiB gradient 17212678024SDoug Rabson * 0 0 17312678024SDoug Rabson * 16 16 17412678024SDoug Rabson * 32 24 17512678024SDoug Rabson * 128 72 (1/2) 17612678024SDoug Rabson * 512 168 (1/4) 17712678024SDoug Rabson * 2048 360 (1/8) 17812678024SDoug Rabson * 8192 552 (1/32) 17912678024SDoug Rabson * 32768 1320 18012678024SDoug Rabson * 131072 4392 18112678024SDoug Rabson */ 18212678024SDoug Rabson if (max_pfn < MB2PAGES(128)) 18312678024SDoug Rabson min_pages = MB2PAGES(8) + (max_pfn >> 1); 18412678024SDoug Rabson else if (max_pfn < MB2PAGES(512)) 18512678024SDoug Rabson min_pages = MB2PAGES(40) + (max_pfn >> 2); 18612678024SDoug Rabson else if (max_pfn < MB2PAGES(2048)) 18712678024SDoug Rabson min_pages = MB2PAGES(104) + (max_pfn >> 3); 18812678024SDoug Rabson else 18912678024SDoug Rabson min_pages = MB2PAGES(296) + (max_pfn >> 5); 19012678024SDoug Rabson #undef MB2PAGES 19112678024SDoug Rabson 19212678024SDoug Rabson /* Don't enforce growth */ 19312678024SDoug Rabson return min(min_pages, curr_pages); 19412678024SDoug Rabson #ifndef CONFIG_XEN 19512678024SDoug Rabson #undef max_pfn 19612678024SDoug Rabson #endif 19712678024SDoug Rabson } 19812678024SDoug Rabson 19989e0f4d2SKip Macy static int 20089e0f4d2SKip Macy increase_reservation(unsigned long nr_pages) 20189e0f4d2SKip Macy { 20212678024SDoug Rabson unsigned long pfn, i; 20312678024SDoug Rabson struct balloon_entry *entry; 20412678024SDoug Rabson vm_page_t page; 20589e0f4d2SKip Macy long rc; 20689e0f4d2SKip Macy struct xen_memory_reservation reservation = { 20789e0f4d2SKip Macy .address_bits = 0, 20889e0f4d2SKip Macy .extent_order = 0, 20989e0f4d2SKip Macy .domid = DOMID_SELF 21089e0f4d2SKip Macy }; 21189e0f4d2SKip Macy 21212678024SDoug Rabson if (nr_pages > ARRAY_SIZE(frame_list)) 21312678024SDoug Rabson nr_pages = ARRAY_SIZE(frame_list); 21489e0f4d2SKip Macy 21512678024SDoug Rabson mtx_lock(&balloon_lock); 21689e0f4d2SKip Macy 21712678024SDoug Rabson for (entry = STAILQ_FIRST(&ballooned_pages), i = 0; 21812678024SDoug Rabson i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) { 21912678024SDoug Rabson KASSERT(entry, ("ballooned_pages list corrupt")); 22012678024SDoug Rabson page = entry->page; 22112678024SDoug Rabson frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 22212678024SDoug Rabson } 22389e0f4d2SKip Macy 22412678024SDoug Rabson set_xen_guest_handle(reservation.extent_start, frame_list); 22589e0f4d2SKip Macy reservation.nr_extents = nr_pages; 22689e0f4d2SKip Macy rc = HYPERVISOR_memory_op( 22712678024SDoug Rabson XENMEM_populate_physmap, &reservation); 22889e0f4d2SKip Macy if (rc < nr_pages) { 22912678024SDoug Rabson if (rc > 0) { 23089e0f4d2SKip Macy int ret; 23112678024SDoug Rabson 23289e0f4d2SKip Macy /* We hit the Xen hard limit: reprobe. */ 23389e0f4d2SKip Macy reservation.nr_extents = rc; 23489e0f4d2SKip Macy ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 23589e0f4d2SKip Macy &reservation); 23612678024SDoug Rabson KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); 23712678024SDoug Rabson } 23812678024SDoug Rabson if (rc >= 0) 23912678024SDoug Rabson bs.hard_limit = (bs.current_pages + rc - 24012678024SDoug Rabson bs.driver_pages); 24189e0f4d2SKip Macy goto out; 24289e0f4d2SKip Macy } 24389e0f4d2SKip Macy 24489e0f4d2SKip Macy for (i = 0; i < nr_pages; i++) { 24589e0f4d2SKip Macy page = balloon_retrieve(); 24612678024SDoug Rabson KASSERT(page, ("balloon_retrieve failed")); 24789e0f4d2SKip Macy 24889e0f4d2SKip Macy pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 24912678024SDoug Rabson KASSERT((xen_feature(XENFEAT_auto_translated_physmap) || 25012678024SDoug Rabson !phys_to_machine_mapping_valid(pfn)), 25112678024SDoug Rabson ("auto translated physmap but mapping is valid")); 25289e0f4d2SKip Macy 25312678024SDoug Rabson set_phys_to_machine(pfn, frame_list[i]); 25412678024SDoug Rabson 2553e33218dSDoug Rabson #if 0 25612678024SDoug Rabson #ifndef XENHVM 25712678024SDoug Rabson /* Link back into the page tables if not highmem. */ 25812678024SDoug Rabson if (pfn < max_low_pfn) { 25912678024SDoug Rabson int ret; 26012678024SDoug Rabson ret = HYPERVISOR_update_va_mapping( 26112678024SDoug Rabson (unsigned long)__va(pfn << PAGE_SHIFT), 26212678024SDoug Rabson pfn_pte_ma(frame_list[i], PAGE_KERNEL), 26312678024SDoug Rabson 0); 26412678024SDoug Rabson PASSING(ret == 0, 26512678024SDoug Rabson ("HYPERVISOR_update_va_mapping failed")); 26612678024SDoug Rabson } 26712678024SDoug Rabson #endif 2683e33218dSDoug Rabson #endif 26989e0f4d2SKip Macy 27089e0f4d2SKip Macy /* Relinquish the page back to the allocator. */ 27112678024SDoug Rabson vm_page_unwire(page, 0); 27289e0f4d2SKip Macy vm_page_free(page); 27389e0f4d2SKip Macy } 27489e0f4d2SKip Macy 27512678024SDoug Rabson bs.current_pages += nr_pages; 27612678024SDoug Rabson //totalram_pages = bs.current_pages; 27789e0f4d2SKip Macy 27889e0f4d2SKip Macy out: 27912678024SDoug Rabson mtx_unlock(&balloon_lock); 28089e0f4d2SKip Macy 28189e0f4d2SKip Macy return 0; 28289e0f4d2SKip Macy } 28389e0f4d2SKip Macy 28489e0f4d2SKip Macy static int 28589e0f4d2SKip Macy decrease_reservation(unsigned long nr_pages) 28689e0f4d2SKip Macy { 28712678024SDoug Rabson unsigned long pfn, i; 28812678024SDoug Rabson vm_page_t page; 28989e0f4d2SKip Macy int need_sleep = 0; 29089e0f4d2SKip Macy int ret; 29189e0f4d2SKip Macy struct xen_memory_reservation reservation = { 29289e0f4d2SKip Macy .address_bits = 0, 29389e0f4d2SKip Macy .extent_order = 0, 29489e0f4d2SKip Macy .domid = DOMID_SELF 29589e0f4d2SKip Macy }; 29689e0f4d2SKip Macy 29712678024SDoug Rabson if (nr_pages > ARRAY_SIZE(frame_list)) 29812678024SDoug Rabson nr_pages = ARRAY_SIZE(frame_list); 29989e0f4d2SKip Macy 30089e0f4d2SKip Macy for (i = 0; i < nr_pages; i++) { 301*703dec68SAlan Cox if ((page = vm_page_alloc(NULL, 0, 30289e0f4d2SKip Macy VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 30389e0f4d2SKip Macy VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 30489e0f4d2SKip Macy nr_pages = i; 30589e0f4d2SKip Macy need_sleep = 1; 30689e0f4d2SKip Macy break; 30789e0f4d2SKip Macy } 30812678024SDoug Rabson 30989e0f4d2SKip Macy pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 31012678024SDoug Rabson frame_list[i] = PFNTOMFN(pfn); 31112678024SDoug Rabson 31212678024SDoug Rabson #if 0 31312678024SDoug Rabson if (!PageHighMem(page)) { 31412678024SDoug Rabson v = phys_to_virt(pfn << PAGE_SHIFT); 31512678024SDoug Rabson scrub_pages(v, 1); 31612678024SDoug Rabson #ifdef CONFIG_XEN 31712678024SDoug Rabson ret = HYPERVISOR_update_va_mapping( 31812678024SDoug Rabson (unsigned long)v, __pte_ma(0), 0); 31912678024SDoug Rabson BUG_ON(ret); 32012678024SDoug Rabson #endif 32112678024SDoug Rabson } 32212678024SDoug Rabson #endif 32312678024SDoug Rabson #ifdef CONFIG_XEN_SCRUB_PAGES 32412678024SDoug Rabson else { 32512678024SDoug Rabson v = kmap(page); 32612678024SDoug Rabson scrub_pages(v, 1); 32712678024SDoug Rabson kunmap(page); 32812678024SDoug Rabson } 32912678024SDoug Rabson #endif 33089e0f4d2SKip Macy } 33189e0f4d2SKip Macy 33212678024SDoug Rabson #ifdef CONFIG_XEN 33312678024SDoug Rabson /* Ensure that ballooned highmem pages don't have kmaps. */ 33412678024SDoug Rabson kmap_flush_unused(); 33512678024SDoug Rabson flush_tlb_all(); 33612678024SDoug Rabson #endif 33712678024SDoug Rabson 33812678024SDoug Rabson mtx_lock(&balloon_lock); 33989e0f4d2SKip Macy 34089e0f4d2SKip Macy /* No more mappings: invalidate P2M and add to balloon. */ 34189e0f4d2SKip Macy for (i = 0; i < nr_pages; i++) { 34212678024SDoug Rabson pfn = MFNTOPFN(frame_list[i]); 34312678024SDoug Rabson set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 34489e0f4d2SKip Macy balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT)); 34589e0f4d2SKip Macy } 34689e0f4d2SKip Macy 34712678024SDoug Rabson set_xen_guest_handle(reservation.extent_start, frame_list); 34889e0f4d2SKip Macy reservation.nr_extents = nr_pages; 34989e0f4d2SKip Macy ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 35012678024SDoug Rabson KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); 35189e0f4d2SKip Macy 35212678024SDoug Rabson bs.current_pages -= nr_pages; 35312678024SDoug Rabson //totalram_pages = bs.current_pages; 35489e0f4d2SKip Macy 35512678024SDoug Rabson mtx_unlock(&balloon_lock); 35689e0f4d2SKip Macy 35712678024SDoug Rabson return (need_sleep); 35889e0f4d2SKip Macy } 35989e0f4d2SKip Macy 36089e0f4d2SKip Macy /* 36189e0f4d2SKip Macy * We avoid multiple worker processes conflicting via the balloon mutex. 36289e0f4d2SKip Macy * We may of course race updates of the target counts (which are protected 36389e0f4d2SKip Macy * by the balloon lock), or with changes to the Xen hard limit, but we will 36489e0f4d2SKip Macy * recover from these in time. 36589e0f4d2SKip Macy */ 36689e0f4d2SKip Macy static void 36789e0f4d2SKip Macy balloon_process(void *unused) 36889e0f4d2SKip Macy { 36989e0f4d2SKip Macy int need_sleep = 0; 37089e0f4d2SKip Macy long credit; 37189e0f4d2SKip Macy 37212678024SDoug Rabson mtx_lock(&balloon_mutex); 37389e0f4d2SKip Macy for (;;) { 374c4f9a105SAlexander Motin int sleep_time; 375c4f9a105SAlexander Motin 37689e0f4d2SKip Macy do { 37712678024SDoug Rabson credit = current_target() - bs.current_pages; 37889e0f4d2SKip Macy if (credit > 0) 37989e0f4d2SKip Macy need_sleep = (increase_reservation(credit) != 0); 38089e0f4d2SKip Macy if (credit < 0) 38189e0f4d2SKip Macy need_sleep = (decrease_reservation(-credit) != 0); 38289e0f4d2SKip Macy 38389e0f4d2SKip Macy } while ((credit != 0) && !need_sleep); 38489e0f4d2SKip Macy 38589e0f4d2SKip Macy /* Schedule more work if there is some still to be done. */ 38612678024SDoug Rabson if (current_target() != bs.current_pages) 387c4f9a105SAlexander Motin sleep_time = hz; 388c4f9a105SAlexander Motin else 389c4f9a105SAlexander Motin sleep_time = 0; 39089e0f4d2SKip Macy 391c4f9a105SAlexander Motin msleep(balloon_process, &balloon_mutex, 0, "balloon", 392c4f9a105SAlexander Motin sleep_time); 39389e0f4d2SKip Macy } 39412678024SDoug Rabson mtx_unlock(&balloon_mutex); 39589e0f4d2SKip Macy } 39689e0f4d2SKip Macy 39789e0f4d2SKip Macy /* Resets the Xen limit, sets new target, and kicks off processing. */ 39889e0f4d2SKip Macy static void 39989e0f4d2SKip Macy set_new_target(unsigned long target) 40089e0f4d2SKip Macy { 40189e0f4d2SKip Macy /* No need for lock. Not read-modify-write updates. */ 40212678024SDoug Rabson bs.hard_limit = ~0UL; 40312678024SDoug Rabson bs.target_pages = max(target, minimum_target()); 40489e0f4d2SKip Macy wakeup(balloon_process); 40589e0f4d2SKip Macy } 40689e0f4d2SKip Macy 407ff662b5cSJustin T. Gibbs static struct xs_watch target_watch = 40889e0f4d2SKip Macy { 40989e0f4d2SKip Macy .node = "memory/target" 41089e0f4d2SKip Macy }; 41189e0f4d2SKip Macy 41289e0f4d2SKip Macy /* React to a change in the target key */ 41389e0f4d2SKip Macy static void 414ff662b5cSJustin T. Gibbs watch_target(struct xs_watch *watch, 41589e0f4d2SKip Macy const char **vec, unsigned int len) 41689e0f4d2SKip Macy { 41789e0f4d2SKip Macy unsigned long long new_target; 41889e0f4d2SKip Macy int err; 41989e0f4d2SKip Macy 420ff662b5cSJustin T. Gibbs err = xs_scanf(XST_NIL, "memory", "target", NULL, 42112678024SDoug Rabson "%llu", &new_target); 42212678024SDoug Rabson if (err) { 42389e0f4d2SKip Macy /* This is ok (for domain0 at least) - so just return */ 42489e0f4d2SKip Macy return; 42589e0f4d2SKip Macy } 42689e0f4d2SKip Macy 42789e0f4d2SKip Macy /* The given memory/target value is in KiB, so it needs converting to 42889e0f4d2SKip Macy pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 42989e0f4d2SKip Macy */ 43089e0f4d2SKip Macy set_new_target(new_target >> (PAGE_SHIFT - 10)); 43189e0f4d2SKip Macy 43289e0f4d2SKip Macy } 43389e0f4d2SKip Macy 43489e0f4d2SKip Macy static void 43512678024SDoug Rabson balloon_init_watcher(void *arg) 43689e0f4d2SKip Macy { 43789e0f4d2SKip Macy int err; 43889e0f4d2SKip Macy 439ff662b5cSJustin T. Gibbs err = xs_register_watch(&target_watch); 44089e0f4d2SKip Macy if (err) 44189e0f4d2SKip Macy printf("Failed to set balloon watcher\n"); 44289e0f4d2SKip Macy 44389e0f4d2SKip Macy } 44412678024SDoug Rabson SYSINIT(balloon_init_watcher, SI_SUB_PSEUDO, SI_ORDER_ANY, 44512678024SDoug Rabson balloon_init_watcher, NULL); 44689e0f4d2SKip Macy 44789e0f4d2SKip Macy static void 44812678024SDoug Rabson balloon_init(void *arg) 44989e0f4d2SKip Macy { 45012678024SDoug Rabson #ifndef XENHVM 45112678024SDoug Rabson vm_page_t page; 4523e33218dSDoug Rabson unsigned long pfn; 4533e33218dSDoug Rabson 4543e33218dSDoug Rabson #define max_pfn HYPERVISOR_shared_info->arch.max_pfn 45512678024SDoug Rabson #endif 45689e0f4d2SKip Macy 45712678024SDoug Rabson if (!is_running_on_xen()) 45812678024SDoug Rabson return; 45989e0f4d2SKip Macy 46012678024SDoug Rabson mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF); 46112678024SDoug Rabson mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); 46289e0f4d2SKip Macy 46312678024SDoug Rabson #ifndef XENHVM 46412678024SDoug Rabson bs.current_pages = min(xen_start_info->nr_pages, max_pfn); 46512678024SDoug Rabson #else 46612678024SDoug Rabson bs.current_pages = physmem; 46712678024SDoug Rabson #endif 46812678024SDoug Rabson bs.target_pages = bs.current_pages; 46912678024SDoug Rabson bs.balloon_low = 0; 47012678024SDoug Rabson bs.balloon_high = 0; 47112678024SDoug Rabson bs.driver_pages = 0UL; 47212678024SDoug Rabson bs.hard_limit = ~0UL; 47389e0f4d2SKip Macy 47412678024SDoug Rabson kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon"); 47589e0f4d2SKip Macy 47612678024SDoug Rabson #ifndef XENHVM 47789e0f4d2SKip Macy /* Initialise the balloon with excess memory space. */ 47889e0f4d2SKip Macy for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { 47989e0f4d2SKip Macy page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT); 48089e0f4d2SKip Macy balloon_append(page); 48189e0f4d2SKip Macy } 4823e33218dSDoug Rabson #undef max_pfn 48312678024SDoug Rabson #endif 48489e0f4d2SKip Macy 48589e0f4d2SKip Macy target_watch.callback = watch_target; 48689e0f4d2SKip Macy 48712678024SDoug Rabson return; 48889e0f4d2SKip Macy } 48912678024SDoug Rabson SYSINIT(balloon_init, SI_SUB_PSEUDO, SI_ORDER_ANY, balloon_init, NULL); 49012678024SDoug Rabson 49112678024SDoug Rabson void balloon_update_driver_allowance(long delta); 49289e0f4d2SKip Macy 49389e0f4d2SKip Macy void 49489e0f4d2SKip Macy balloon_update_driver_allowance(long delta) 49589e0f4d2SKip Macy { 49612678024SDoug Rabson mtx_lock(&balloon_lock); 49712678024SDoug Rabson bs.driver_pages += delta; 49812678024SDoug Rabson mtx_unlock(&balloon_lock); 49989e0f4d2SKip Macy } 50089e0f4d2SKip Macy 50189e0f4d2SKip Macy #if 0 50289e0f4d2SKip Macy static int dealloc_pte_fn( 50389e0f4d2SKip Macy pte_t *pte, struct page *pte_page, unsigned long addr, void *data) 50489e0f4d2SKip Macy { 50589e0f4d2SKip Macy unsigned long mfn = pte_mfn(*pte); 50689e0f4d2SKip Macy int ret; 50789e0f4d2SKip Macy struct xen_memory_reservation reservation = { 50889e0f4d2SKip Macy .extent_start = &mfn, 50989e0f4d2SKip Macy .nr_extents = 1, 51089e0f4d2SKip Macy .extent_order = 0, 51189e0f4d2SKip Macy .domid = DOMID_SELF 51289e0f4d2SKip Macy }; 51389e0f4d2SKip Macy set_pte_at(&init_mm, addr, pte, __pte_ma(0)); 51489e0f4d2SKip Macy set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); 51589e0f4d2SKip Macy ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 51612678024SDoug Rabson KASSERT(ret == 1, ("HYPERVISOR_memory_op failed")); 51789e0f4d2SKip Macy return 0; 51889e0f4d2SKip Macy } 51989e0f4d2SKip Macy 52089e0f4d2SKip Macy #endif 52112678024SDoug Rabson 52212678024SDoug Rabson #if 0 52389e0f4d2SKip Macy vm_page_t 52489e0f4d2SKip Macy balloon_alloc_empty_page_range(unsigned long nr_pages) 52589e0f4d2SKip Macy { 52689e0f4d2SKip Macy vm_page_t pages; 52712678024SDoug Rabson int i, rc; 52889e0f4d2SKip Macy unsigned long *mfn_list; 52989e0f4d2SKip Macy struct xen_memory_reservation reservation = { 53089e0f4d2SKip Macy .address_bits = 0, 53189e0f4d2SKip Macy .extent_order = 0, 53289e0f4d2SKip Macy .domid = DOMID_SELF 53389e0f4d2SKip Macy }; 53489e0f4d2SKip Macy 53589e0f4d2SKip Macy pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4) 53689e0f4d2SKip Macy if (pages == NULL) 53789e0f4d2SKip Macy return NULL; 53889e0f4d2SKip Macy 53989e0f4d2SKip Macy mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK); 54089e0f4d2SKip Macy 54189e0f4d2SKip Macy for (i = 0; i < nr_pages; i++) { 54289e0f4d2SKip Macy mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT); 54389e0f4d2SKip Macy PFNTOMFN(i) = INVALID_P2M_ENTRY; 54489e0f4d2SKip Macy reservation.extent_start = mfn_list; 54589e0f4d2SKip Macy reservation.nr_extents = nr_pages; 54612678024SDoug Rabson rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 54712678024SDoug Rabson &reservation); 54812678024SDoug Rabson KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed")); 54989e0f4d2SKip Macy } 55089e0f4d2SKip Macy 55189e0f4d2SKip Macy current_pages -= nr_pages; 55289e0f4d2SKip Macy 55389e0f4d2SKip Macy wakeup(balloon_process); 55489e0f4d2SKip Macy 55589e0f4d2SKip Macy return pages; 55689e0f4d2SKip Macy } 55789e0f4d2SKip Macy 55889e0f4d2SKip Macy void 55989e0f4d2SKip Macy balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages) 56089e0f4d2SKip Macy { 56112678024SDoug Rabson unsigned long i; 56289e0f4d2SKip Macy 56389e0f4d2SKip Macy for (i = 0; i < nr_pages; i++) 56489e0f4d2SKip Macy balloon_append(page + i); 56589e0f4d2SKip Macy 56689e0f4d2SKip Macy wakeup(balloon_process); 56789e0f4d2SKip Macy } 56889e0f4d2SKip Macy #endif 569