1 /****************************************************************************** 2 * balloon.c 3 * 4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 5 * 6 * Copyright (c) 2003, B Dragovic 7 * Copyright (c) 2003-2004, M Williamson, K Fraser 8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation 9 * 10 * This file may be distributed separately from the Linux kernel, or 11 * incorporated into other software packages, subject to the following license: 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this source file (the "Software"), to deal in the Software without 15 * restriction, including without limitation the rights to use, copy, modify, 16 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 17 * and to permit persons to whom the Software is furnished to do so, subject to 18 * the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 29 * IN THE SOFTWARE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/lock.h> 37 #include <sys/kernel.h> 38 #include <sys/kthread.h> 39 #include <sys/malloc.h> 40 #include <sys/mutex.h> 41 #include <sys/sysctl.h> 42 43 #include <vm/vm.h> 44 #include <vm/vm_page.h> 45 46 #include <xen/xen-os.h> 47 #include <xen/hypervisor.h> 48 #include <xen/features.h> 49 #include <xen/xenstore/xenstorevar.h> 50 51 #include <machine/xen/xenvar.h> 52 53 static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); 54 55 /* Convert from KB (as fetched from xenstore) to number of PAGES */ 56 #define KB_TO_PAGE_SHIFT (PAGE_SHIFT - 10) 57 58 struct mtx balloon_mutex; 59 60 /* We increase/decrease in batches which fit in a page */ 61 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 62 63 struct balloon_stats { 64 /* We aim for 'current allocation' == 'target allocation'. */ 65 unsigned long current_pages; 66 unsigned long target_pages; 67 /* We may hit the hard limit in Xen. If we do then we remember it. */ 68 unsigned long hard_limit; 69 /* 70 * Drivers may alter the memory reservation independently, but they 71 * must inform the balloon driver so we avoid hitting the hard limit. 72 */ 73 unsigned long driver_pages; 74 /* Number of pages in high- and low-memory balloons. */ 75 unsigned long balloon_low; 76 unsigned long balloon_high; 77 }; 78 79 static struct balloon_stats balloon_stats; 80 #define bs balloon_stats 81 82 SYSCTL_DECL(_dev_xen); 83 static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon"); 84 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, 85 &bs.current_pages, 0, "Current allocation"); 86 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, 87 &bs.target_pages, 0, "Target allocation"); 88 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, 89 &bs.driver_pages, 0, "Driver pages"); 90 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, 91 &bs.hard_limit, 0, "Xen hard limit"); 92 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, 93 &bs.balloon_low, 0, "Low-mem balloon"); 94 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, 95 &bs.balloon_high, 0, "High-mem balloon"); 96 97 /* List of ballooned pages, threaded through the mem_map array. */ 98 static TAILQ_HEAD(,vm_page) ballooned_pages; 99 100 /* Main work function, always executed in process context. */ 101 static void balloon_process(void *unused); 102 103 #define IPRINTK(fmt, args...) \ 104 printk(KERN_INFO "xen_mem: " fmt, ##args) 105 #define WPRINTK(fmt, args...) \ 106 printk(KERN_WARNING "xen_mem: " fmt, ##args) 107 108 static unsigned long 109 current_target(void) 110 { 111 unsigned long target = min(bs.target_pages, bs.hard_limit); 112 if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) 113 target = bs.current_pages + bs.balloon_low + bs.balloon_high; 114 return (target); 115 } 116 117 static unsigned long 118 minimum_target(void) 119 { 120 #ifdef XENHVM 121 #define max_pfn realmem 122 #else 123 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn 124 #endif 125 unsigned long min_pages, curr_pages = current_target(); 126 127 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) 128 /* 129 * Simple continuous piecewiese linear function: 130 * max MiB -> min MiB gradient 131 * 0 0 132 * 16 16 133 * 32 24 134 * 128 72 (1/2) 135 * 512 168 (1/4) 136 * 2048 360 (1/8) 137 * 8192 552 (1/32) 138 * 32768 1320 139 * 131072 4392 140 */ 141 if (max_pfn < MB2PAGES(128)) 142 min_pages = MB2PAGES(8) + (max_pfn >> 1); 143 else if (max_pfn < MB2PAGES(512)) 144 min_pages = MB2PAGES(40) + (max_pfn >> 2); 145 else if (max_pfn < MB2PAGES(2048)) 146 min_pages = MB2PAGES(104) + (max_pfn >> 3); 147 else 148 min_pages = MB2PAGES(296) + (max_pfn >> 5); 149 #undef MB2PAGES 150 #undef max_pfn 151 152 /* Don't enforce growth */ 153 return (min(min_pages, curr_pages)); 154 } 155 156 static int 157 increase_reservation(unsigned long nr_pages) 158 { 159 unsigned long pfn, i; 160 vm_page_t page; 161 long rc; 162 struct xen_memory_reservation reservation = { 163 .address_bits = 0, 164 .extent_order = 0, 165 .domid = DOMID_SELF 166 }; 167 168 mtx_assert(&balloon_mutex, MA_OWNED); 169 170 if (nr_pages > nitems(frame_list)) 171 nr_pages = nitems(frame_list); 172 173 for (page = TAILQ_FIRST(&ballooned_pages), i = 0; 174 i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) { 175 KASSERT(page != NULL, ("ballooned_pages list corrupt")); 176 frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 177 } 178 179 set_xen_guest_handle(reservation.extent_start, frame_list); 180 reservation.nr_extents = nr_pages; 181 rc = HYPERVISOR_memory_op( 182 XENMEM_populate_physmap, &reservation); 183 if (rc < nr_pages) { 184 if (rc > 0) { 185 int ret; 186 187 /* We hit the Xen hard limit: reprobe. */ 188 reservation.nr_extents = rc; 189 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 190 &reservation); 191 KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); 192 } 193 if (rc >= 0) 194 bs.hard_limit = (bs.current_pages + rc - 195 bs.driver_pages); 196 goto out; 197 } 198 199 for (i = 0; i < nr_pages; i++) { 200 page = TAILQ_FIRST(&ballooned_pages); 201 KASSERT(page != NULL, ("Unable to get ballooned page")); 202 TAILQ_REMOVE(&ballooned_pages, page, plinks.q); 203 bs.balloon_low--; 204 205 pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 206 KASSERT((xen_feature(XENFEAT_auto_translated_physmap) || 207 !phys_to_machine_mapping_valid(pfn)), 208 ("auto translated physmap but mapping is valid")); 209 210 set_phys_to_machine(pfn, frame_list[i]); 211 212 vm_page_free(page); 213 } 214 215 bs.current_pages += nr_pages; 216 217 out: 218 return (0); 219 } 220 221 static int 222 decrease_reservation(unsigned long nr_pages) 223 { 224 unsigned long pfn, i; 225 vm_page_t page; 226 int need_sleep = 0; 227 int ret; 228 struct xen_memory_reservation reservation = { 229 .address_bits = 0, 230 .extent_order = 0, 231 .domid = DOMID_SELF 232 }; 233 234 mtx_assert(&balloon_mutex, MA_OWNED); 235 236 if (nr_pages > nitems(frame_list)) 237 nr_pages = nitems(frame_list); 238 239 for (i = 0; i < nr_pages; i++) { 240 if ((page = vm_page_alloc(NULL, 0, 241 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 242 VM_ALLOC_ZERO)) == NULL) { 243 nr_pages = i; 244 need_sleep = 1; 245 break; 246 } 247 248 if ((page->flags & PG_ZERO) == 0) { 249 /* 250 * Zero the page, or else we might be leaking 251 * important data to other domains on the same 252 * host. Xen doesn't scrub ballooned out memory 253 * pages, the guest is in charge of making 254 * sure that no information is leaked. 255 */ 256 pmap_zero_page(page); 257 } 258 259 pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 260 frame_list[i] = PFNTOMFN(pfn); 261 262 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 263 TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q); 264 bs.balloon_low++; 265 } 266 267 set_xen_guest_handle(reservation.extent_start, frame_list); 268 reservation.nr_extents = nr_pages; 269 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 270 KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); 271 272 bs.current_pages -= nr_pages; 273 274 return (need_sleep); 275 } 276 277 /* 278 * We avoid multiple worker processes conflicting via the balloon mutex. 279 * We may of course race updates of the target counts (which are protected 280 * by the balloon lock), or with changes to the Xen hard limit, but we will 281 * recover from these in time. 282 */ 283 static void 284 balloon_process(void *unused) 285 { 286 int need_sleep = 0; 287 long credit; 288 289 mtx_lock(&balloon_mutex); 290 for (;;) { 291 int sleep_time; 292 293 do { 294 credit = current_target() - bs.current_pages; 295 if (credit > 0) 296 need_sleep = (increase_reservation(credit) != 0); 297 if (credit < 0) 298 need_sleep = (decrease_reservation(-credit) != 0); 299 300 } while ((credit != 0) && !need_sleep); 301 302 /* Schedule more work if there is some still to be done. */ 303 if (current_target() != bs.current_pages) 304 sleep_time = hz; 305 else 306 sleep_time = 0; 307 308 msleep(balloon_process, &balloon_mutex, 0, "balloon", 309 sleep_time); 310 } 311 mtx_unlock(&balloon_mutex); 312 } 313 314 /* Resets the Xen limit, sets new target, and kicks off processing. */ 315 static void 316 set_new_target(unsigned long target) 317 { 318 /* No need for lock. Not read-modify-write updates. */ 319 bs.hard_limit = ~0UL; 320 bs.target_pages = max(target, minimum_target()); 321 wakeup(balloon_process); 322 } 323 324 static struct xs_watch target_watch = 325 { 326 .node = "memory/target" 327 }; 328 329 /* React to a change in the target key */ 330 static void 331 watch_target(struct xs_watch *watch, 332 const char **vec, unsigned int len) 333 { 334 unsigned long long new_target; 335 int err; 336 337 err = xs_scanf(XST_NIL, "memory", "target", NULL, 338 "%llu", &new_target); 339 if (err) { 340 /* This is ok (for domain0 at least) - so just return */ 341 return; 342 } 343 344 /* 345 * The given memory/target value is in KiB, so it needs converting to 346 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 347 */ 348 set_new_target(new_target >> KB_TO_PAGE_SHIFT); 349 } 350 351 static void 352 balloon_init_watcher(void *arg) 353 { 354 int err; 355 356 if (!is_running_on_xen()) 357 return; 358 359 err = xs_register_watch(&target_watch); 360 if (err) 361 printf("Failed to set balloon watcher\n"); 362 363 } 364 SYSINIT(balloon_init_watcher, SI_SUB_PSEUDO, SI_ORDER_ANY, 365 balloon_init_watcher, NULL); 366 367 static void 368 balloon_init(void *arg) 369 { 370 #ifndef XENHVM 371 vm_page_t page; 372 unsigned long pfn; 373 374 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn 375 #endif 376 377 if (!is_running_on_xen()) 378 return; 379 380 mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); 381 382 #ifndef XENHVM 383 bs.current_pages = min(xen_start_info->nr_pages, max_pfn); 384 #else 385 bs.current_pages = realmem; 386 #endif 387 bs.target_pages = bs.current_pages; 388 bs.balloon_low = 0; 389 bs.balloon_high = 0; 390 bs.driver_pages = 0UL; 391 bs.hard_limit = ~0UL; 392 393 kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon"); 394 395 #ifndef XENHVM 396 /* Initialise the balloon with excess memory space. */ 397 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { 398 page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT); 399 TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q); 400 bs.balloon_low++; 401 } 402 #undef max_pfn 403 #endif 404 405 target_watch.callback = watch_target; 406 407 return; 408 } 409 SYSINIT(balloon_init, SI_SUB_PSEUDO, SI_ORDER_ANY, balloon_init, NULL); 410 411 void balloon_update_driver_allowance(long delta); 412 413 void 414 balloon_update_driver_allowance(long delta) 415 { 416 mtx_lock(&balloon_mutex); 417 bs.driver_pages += delta; 418 mtx_unlock(&balloon_mutex); 419 } 420