1 /****************************************************************************** 2 * balloon.c 3 * 4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 5 * 6 * Copyright (c) 2003, B Dragovic 7 * Copyright (c) 2003-2004, M Williamson, K Fraser 8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation 9 * 10 * This file may be distributed separately from the Linux kernel, or 11 * incorporated into other software packages, subject to the following license: 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this source file (the "Software"), to deal in the Software without 15 * restriction, including without limitation the rights to use, copy, modify, 16 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 17 * and to permit persons to whom the Software is furnished to do so, subject to 18 * the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 29 * IN THE SOFTWARE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/lock.h> 37 #include <sys/kernel.h> 38 #include <sys/kthread.h> 39 #include <sys/malloc.h> 40 #include <sys/mutex.h> 41 #include <sys/sysctl.h> 42 43 #include <vm/vm.h> 44 #include <vm/vm_page.h> 45 46 #include <xen/xen-os.h> 47 #include <xen/hypervisor.h> 48 #include <xen/features.h> 49 #include <xen/xenstore/xenstorevar.h> 50 51 #include <machine/xen/xenvar.h> 52 53 static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); 54 55 /* Convert from KB (as fetched from xenstore) to number of PAGES */ 56 #define KB_TO_PAGE_SHIFT (PAGE_SHIFT - 10) 57 58 struct mtx balloon_mutex; 59 60 /* We increase/decrease in batches which fit in a page */ 61 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 62 63 struct balloon_stats { 64 /* We aim for 'current allocation' == 'target allocation'. */ 65 unsigned long current_pages; 66 unsigned long target_pages; 67 /* We may hit the hard limit in Xen. If we do then we remember it. */ 68 unsigned long hard_limit; 69 /* 70 * Drivers may alter the memory reservation independently, but they 71 * must inform the balloon driver so we avoid hitting the hard limit. 72 */ 73 unsigned long driver_pages; 74 /* Number of pages in high- and low-memory balloons. */ 75 unsigned long balloon_low; 76 unsigned long balloon_high; 77 }; 78 79 static struct balloon_stats balloon_stats; 80 #define bs balloon_stats 81 82 SYSCTL_DECL(_dev_xen); 83 static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon"); 84 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, 85 &bs.current_pages, 0, "Current allocation"); 86 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, 87 &bs.target_pages, 0, "Target allocation"); 88 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, 89 &bs.driver_pages, 0, "Driver pages"); 90 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, 91 &bs.hard_limit, 0, "Xen hard limit"); 92 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, 93 &bs.balloon_low, 0, "Low-mem balloon"); 94 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, 95 &bs.balloon_high, 0, "High-mem balloon"); 96 97 struct balloon_entry { 98 vm_page_t page; 99 STAILQ_ENTRY(balloon_entry) list; 100 }; 101 102 /* List of ballooned pages, threaded through the mem_map array. */ 103 static STAILQ_HEAD(,balloon_entry) ballooned_pages; 104 105 /* Main work function, always executed in process context. */ 106 static void balloon_process(void *unused); 107 108 #define IPRINTK(fmt, args...) \ 109 printk(KERN_INFO "xen_mem: " fmt, ##args) 110 #define WPRINTK(fmt, args...) \ 111 printk(KERN_WARNING "xen_mem: " fmt, ##args) 112 113 /* balloon_append: add the given page to the balloon. */ 114 static int 115 balloon_append(vm_page_t page) 116 { 117 struct balloon_entry *entry; 118 119 mtx_assert(&balloon_mutex, MA_OWNED); 120 121 entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_NOWAIT); 122 if (!entry) 123 return (ENOMEM); 124 entry->page = page; 125 STAILQ_INSERT_HEAD(&ballooned_pages, entry, list); 126 bs.balloon_low++; 127 128 return (0); 129 } 130 131 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 132 static vm_page_t 133 balloon_retrieve(void) 134 { 135 vm_page_t page; 136 struct balloon_entry *entry; 137 138 mtx_assert(&balloon_mutex, MA_OWNED); 139 140 if (STAILQ_EMPTY(&ballooned_pages)) 141 return (NULL); 142 143 entry = STAILQ_FIRST(&ballooned_pages); 144 STAILQ_REMOVE_HEAD(&ballooned_pages, list); 145 146 page = entry->page; 147 free(entry, M_BALLOON); 148 149 bs.balloon_low--; 150 151 return (page); 152 } 153 154 static unsigned long 155 current_target(void) 156 { 157 unsigned long target = min(bs.target_pages, bs.hard_limit); 158 if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) 159 target = bs.current_pages + bs.balloon_low + bs.balloon_high; 160 return (target); 161 } 162 163 static unsigned long 164 minimum_target(void) 165 { 166 #ifdef XENHVM 167 #define max_pfn realmem 168 #else 169 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn 170 #endif 171 unsigned long min_pages, curr_pages = current_target(); 172 173 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) 174 /* 175 * Simple continuous piecewiese linear function: 176 * max MiB -> min MiB gradient 177 * 0 0 178 * 16 16 179 * 32 24 180 * 128 72 (1/2) 181 * 512 168 (1/4) 182 * 2048 360 (1/8) 183 * 8192 552 (1/32) 184 * 32768 1320 185 * 131072 4392 186 */ 187 if (max_pfn < MB2PAGES(128)) 188 min_pages = MB2PAGES(8) + (max_pfn >> 1); 189 else if (max_pfn < MB2PAGES(512)) 190 min_pages = MB2PAGES(40) + (max_pfn >> 2); 191 else if (max_pfn < MB2PAGES(2048)) 192 min_pages = MB2PAGES(104) + (max_pfn >> 3); 193 else 194 min_pages = MB2PAGES(296) + (max_pfn >> 5); 195 #undef MB2PAGES 196 #undef max_pfn 197 198 /* Don't enforce growth */ 199 return (min(min_pages, curr_pages)); 200 } 201 202 static int 203 increase_reservation(unsigned long nr_pages) 204 { 205 unsigned long pfn, i; 206 struct balloon_entry *entry; 207 vm_page_t page; 208 long rc; 209 struct xen_memory_reservation reservation = { 210 .address_bits = 0, 211 .extent_order = 0, 212 .domid = DOMID_SELF 213 }; 214 215 mtx_assert(&balloon_mutex, MA_OWNED); 216 217 if (nr_pages > nitems(frame_list)) 218 nr_pages = nitems(frame_list); 219 220 for (entry = STAILQ_FIRST(&ballooned_pages), i = 0; 221 i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) { 222 KASSERT(entry, ("ballooned_pages list corrupt")); 223 page = entry->page; 224 frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 225 } 226 227 set_xen_guest_handle(reservation.extent_start, frame_list); 228 reservation.nr_extents = nr_pages; 229 rc = HYPERVISOR_memory_op( 230 XENMEM_populate_physmap, &reservation); 231 if (rc < nr_pages) { 232 if (rc > 0) { 233 int ret; 234 235 /* We hit the Xen hard limit: reprobe. */ 236 reservation.nr_extents = rc; 237 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 238 &reservation); 239 KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); 240 } 241 if (rc >= 0) 242 bs.hard_limit = (bs.current_pages + rc - 243 bs.driver_pages); 244 goto out; 245 } 246 247 for (i = 0; i < nr_pages; i++) { 248 page = balloon_retrieve(); 249 KASSERT(page, ("balloon_retrieve failed")); 250 251 pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 252 KASSERT((xen_feature(XENFEAT_auto_translated_physmap) || 253 !phys_to_machine_mapping_valid(pfn)), 254 ("auto translated physmap but mapping is valid")); 255 256 set_phys_to_machine(pfn, frame_list[i]); 257 258 vm_page_unwire(page, 0); 259 vm_page_free(page); 260 } 261 262 bs.current_pages += nr_pages; 263 264 out: 265 return (0); 266 } 267 268 static int 269 decrease_reservation(unsigned long nr_pages) 270 { 271 unsigned long pfn, i; 272 vm_page_t page; 273 int need_sleep = 0; 274 int ret; 275 struct xen_memory_reservation reservation = { 276 .address_bits = 0, 277 .extent_order = 0, 278 .domid = DOMID_SELF 279 }; 280 281 mtx_assert(&balloon_mutex, MA_OWNED); 282 283 if (nr_pages > nitems(frame_list)) 284 nr_pages = nitems(frame_list); 285 286 for (i = 0; i < nr_pages; i++) { 287 if ((page = vm_page_alloc(NULL, 0, 288 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 289 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 290 nr_pages = i; 291 need_sleep = 1; 292 break; 293 } 294 295 pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 296 frame_list[i] = PFNTOMFN(pfn); 297 298 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 299 if (balloon_append(page) != 0) { 300 vm_page_unwire(page, 0); 301 vm_page_free(page); 302 303 nr_pages = i; 304 need_sleep = 1; 305 break; 306 } 307 } 308 309 set_xen_guest_handle(reservation.extent_start, frame_list); 310 reservation.nr_extents = nr_pages; 311 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 312 KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); 313 314 bs.current_pages -= nr_pages; 315 316 return (need_sleep); 317 } 318 319 /* 320 * We avoid multiple worker processes conflicting via the balloon mutex. 321 * We may of course race updates of the target counts (which are protected 322 * by the balloon lock), or with changes to the Xen hard limit, but we will 323 * recover from these in time. 324 */ 325 static void 326 balloon_process(void *unused) 327 { 328 int need_sleep = 0; 329 long credit; 330 331 mtx_lock(&balloon_mutex); 332 for (;;) { 333 int sleep_time; 334 335 do { 336 credit = current_target() - bs.current_pages; 337 if (credit > 0) 338 need_sleep = (increase_reservation(credit) != 0); 339 if (credit < 0) 340 need_sleep = (decrease_reservation(-credit) != 0); 341 342 } while ((credit != 0) && !need_sleep); 343 344 /* Schedule more work if there is some still to be done. */ 345 if (current_target() != bs.current_pages) 346 sleep_time = hz; 347 else 348 sleep_time = 0; 349 350 msleep(balloon_process, &balloon_mutex, 0, "balloon", 351 sleep_time); 352 } 353 mtx_unlock(&balloon_mutex); 354 } 355 356 /* Resets the Xen limit, sets new target, and kicks off processing. */ 357 static void 358 set_new_target(unsigned long target) 359 { 360 /* No need for lock. Not read-modify-write updates. */ 361 bs.hard_limit = ~0UL; 362 bs.target_pages = max(target, minimum_target()); 363 wakeup(balloon_process); 364 } 365 366 static struct xs_watch target_watch = 367 { 368 .node = "memory/target" 369 }; 370 371 /* React to a change in the target key */ 372 static void 373 watch_target(struct xs_watch *watch, 374 const char **vec, unsigned int len) 375 { 376 unsigned long long new_target; 377 int err; 378 379 err = xs_scanf(XST_NIL, "memory", "target", NULL, 380 "%llu", &new_target); 381 if (err) { 382 /* This is ok (for domain0 at least) - so just return */ 383 return; 384 } 385 386 /* 387 * The given memory/target value is in KiB, so it needs converting to 388 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 389 */ 390 set_new_target(new_target >> KB_TO_PAGE_SHIFT); 391 } 392 393 static void 394 balloon_init_watcher(void *arg) 395 { 396 int err; 397 398 if (!is_running_on_xen()) 399 return; 400 401 err = xs_register_watch(&target_watch); 402 if (err) 403 printf("Failed to set balloon watcher\n"); 404 405 } 406 SYSINIT(balloon_init_watcher, SI_SUB_PSEUDO, SI_ORDER_ANY, 407 balloon_init_watcher, NULL); 408 409 static void 410 balloon_init(void *arg) 411 { 412 #ifndef XENHVM 413 vm_page_t page; 414 unsigned long pfn; 415 416 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn 417 #endif 418 419 if (!is_running_on_xen()) 420 return; 421 422 mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); 423 424 #ifndef XENHVM 425 bs.current_pages = min(xen_start_info->nr_pages, max_pfn); 426 #else 427 bs.current_pages = realmem; 428 #endif 429 bs.target_pages = bs.current_pages; 430 bs.balloon_low = 0; 431 bs.balloon_high = 0; 432 bs.driver_pages = 0UL; 433 bs.hard_limit = ~0UL; 434 435 kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon"); 436 437 #ifndef XENHVM 438 /* Initialise the balloon with excess memory space. */ 439 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { 440 page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT); 441 balloon_append(page); 442 } 443 #undef max_pfn 444 #endif 445 446 target_watch.callback = watch_target; 447 448 return; 449 } 450 SYSINIT(balloon_init, SI_SUB_PSEUDO, SI_ORDER_ANY, balloon_init, NULL); 451 452 void balloon_update_driver_allowance(long delta); 453 454 void 455 balloon_update_driver_allowance(long delta) 456 { 457 mtx_lock(&balloon_mutex); 458 bs.driver_pages += delta; 459 mtx_unlock(&balloon_mutex); 460 } 461