1 /****************************************************************************** 2 * balloon.c 3 * 4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 5 * 6 * Copyright (c) 2003, B Dragovic 7 * Copyright (c) 2003-2004, M Williamson, K Fraser 8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation 9 * 10 * This file may be distributed separately from the Linux kernel, or 11 * incorporated into other software packages, subject to the following license: 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this source file (the "Software"), to deal in the Software without 15 * restriction, including without limitation the rights to use, copy, modify, 16 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 17 * and to permit persons to whom the Software is furnished to do so, subject to 18 * the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 29 * IN THE SOFTWARE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/lock.h> 37 #include <sys/kernel.h> 38 #include <sys/kthread.h> 39 #include <sys/malloc.h> 40 #include <sys/mutex.h> 41 #include <sys/sysctl.h> 42 43 #include <machine/xen/xen-os.h> 44 #include <machine/xen/xenvar.h> 45 #include <machine/xen/xenfunc.h> 46 #include <xen/hypervisor.h> 47 #include <xen/xenstore/xenstorevar.h> 48 49 #include <vm/vm.h> 50 #include <vm/vm_page.h> 51 52 MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); 53 54 struct mtx balloon_mutex; 55 56 /* 57 * Protects atomic reservation decrease/increase against concurrent increases. 58 * Also protects non-atomic updates of current_pages and driver_pages, and 59 * balloon lists. 60 */ 61 struct mtx balloon_lock; 62 63 /* We increase/decrease in batches which fit in a page */ 64 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 65 #define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0])) 66 67 struct balloon_stats { 68 /* We aim for 'current allocation' == 'target allocation'. */ 69 unsigned long current_pages; 70 unsigned long target_pages; 71 /* We may hit the hard limit in Xen. If we do then we remember it. */ 72 unsigned long hard_limit; 73 /* 74 * Drivers may alter the memory reservation independently, but they 75 * must inform the balloon driver so we avoid hitting the hard limit. 76 */ 77 unsigned long driver_pages; 78 /* Number of pages in high- and low-memory balloons. */ 79 unsigned long balloon_low; 80 unsigned long balloon_high; 81 }; 82 83 static struct balloon_stats balloon_stats; 84 #define bs balloon_stats 85 86 SYSCTL_DECL(_dev_xen); 87 SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon"); 88 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, 89 &bs.current_pages, 0, "Current allocation"); 90 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, 91 &bs.target_pages, 0, "Target allocation"); 92 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, 93 &bs.driver_pages, 0, "Driver pages"); 94 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, 95 &bs.hard_limit, 0, "Xen hard limit"); 96 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, 97 &bs.balloon_low, 0, "Low-mem balloon"); 98 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, 99 &bs.balloon_high, 0, "High-mem balloon"); 100 101 struct balloon_entry { 102 vm_page_t page; 103 STAILQ_ENTRY(balloon_entry) list; 104 }; 105 106 /* List of ballooned pages, threaded through the mem_map array. */ 107 static STAILQ_HEAD(,balloon_entry) ballooned_pages; 108 109 /* Main work function, always executed in process context. */ 110 static void balloon_process(void *unused); 111 112 #define IPRINTK(fmt, args...) \ 113 printk(KERN_INFO "xen_mem: " fmt, ##args) 114 #define WPRINTK(fmt, args...) \ 115 printk(KERN_WARNING "xen_mem: " fmt, ##args) 116 117 /* balloon_append: add the given page to the balloon. */ 118 static void 119 balloon_append(vm_page_t page) 120 { 121 struct balloon_entry *entry; 122 123 entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK); 124 entry->page = page; 125 STAILQ_INSERT_HEAD(&ballooned_pages, entry, list); 126 bs.balloon_low++; 127 } 128 129 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 130 static vm_page_t 131 balloon_retrieve(void) 132 { 133 vm_page_t page; 134 struct balloon_entry *entry; 135 136 if (STAILQ_EMPTY(&ballooned_pages)) 137 return NULL; 138 139 entry = STAILQ_FIRST(&ballooned_pages); 140 STAILQ_REMOVE_HEAD(&ballooned_pages, list); 141 142 page = entry->page; 143 free(entry, M_DEVBUF); 144 145 bs.balloon_low--; 146 147 return page; 148 } 149 150 static void 151 balloon_alarm(void *unused) 152 { 153 wakeup(balloon_process); 154 } 155 156 static unsigned long 157 current_target(void) 158 { 159 unsigned long target = min(bs.target_pages, bs.hard_limit); 160 if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) 161 target = bs.current_pages + bs.balloon_low + bs.balloon_high; 162 return target; 163 } 164 165 static unsigned long 166 minimum_target(void) 167 { 168 #ifdef XENHVM 169 #define max_pfn physmem 170 #else 171 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn 172 #endif 173 unsigned long min_pages, curr_pages = current_target(); 174 175 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) 176 /* Simple continuous piecewiese linear function: 177 * max MiB -> min MiB gradient 178 * 0 0 179 * 16 16 180 * 32 24 181 * 128 72 (1/2) 182 * 512 168 (1/4) 183 * 2048 360 (1/8) 184 * 8192 552 (1/32) 185 * 32768 1320 186 * 131072 4392 187 */ 188 if (max_pfn < MB2PAGES(128)) 189 min_pages = MB2PAGES(8) + (max_pfn >> 1); 190 else if (max_pfn < MB2PAGES(512)) 191 min_pages = MB2PAGES(40) + (max_pfn >> 2); 192 else if (max_pfn < MB2PAGES(2048)) 193 min_pages = MB2PAGES(104) + (max_pfn >> 3); 194 else 195 min_pages = MB2PAGES(296) + (max_pfn >> 5); 196 #undef MB2PAGES 197 198 /* Don't enforce growth */ 199 return min(min_pages, curr_pages); 200 #ifndef CONFIG_XEN 201 #undef max_pfn 202 #endif 203 } 204 205 static int 206 increase_reservation(unsigned long nr_pages) 207 { 208 unsigned long pfn, i; 209 struct balloon_entry *entry; 210 vm_page_t page; 211 long rc; 212 struct xen_memory_reservation reservation = { 213 .address_bits = 0, 214 .extent_order = 0, 215 .domid = DOMID_SELF 216 }; 217 218 if (nr_pages > ARRAY_SIZE(frame_list)) 219 nr_pages = ARRAY_SIZE(frame_list); 220 221 mtx_lock(&balloon_lock); 222 223 for (entry = STAILQ_FIRST(&ballooned_pages), i = 0; 224 i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) { 225 KASSERT(entry, ("ballooned_pages list corrupt")); 226 page = entry->page; 227 frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 228 } 229 230 set_xen_guest_handle(reservation.extent_start, frame_list); 231 reservation.nr_extents = nr_pages; 232 rc = HYPERVISOR_memory_op( 233 XENMEM_populate_physmap, &reservation); 234 if (rc < nr_pages) { 235 if (rc > 0) { 236 int ret; 237 238 /* We hit the Xen hard limit: reprobe. */ 239 reservation.nr_extents = rc; 240 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 241 &reservation); 242 KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); 243 } 244 if (rc >= 0) 245 bs.hard_limit = (bs.current_pages + rc - 246 bs.driver_pages); 247 goto out; 248 } 249 250 for (i = 0; i < nr_pages; i++) { 251 page = balloon_retrieve(); 252 KASSERT(page, ("balloon_retrieve failed")); 253 254 pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 255 KASSERT((xen_feature(XENFEAT_auto_translated_physmap) || 256 !phys_to_machine_mapping_valid(pfn)), 257 ("auto translated physmap but mapping is valid")); 258 259 set_phys_to_machine(pfn, frame_list[i]); 260 261 #if 0 262 #ifndef XENHVM 263 /* Link back into the page tables if not highmem. */ 264 if (pfn < max_low_pfn) { 265 int ret; 266 ret = HYPERVISOR_update_va_mapping( 267 (unsigned long)__va(pfn << PAGE_SHIFT), 268 pfn_pte_ma(frame_list[i], PAGE_KERNEL), 269 0); 270 PASSING(ret == 0, 271 ("HYPERVISOR_update_va_mapping failed")); 272 } 273 #endif 274 #endif 275 276 /* Relinquish the page back to the allocator. */ 277 vm_page_unwire(page, 0); 278 vm_page_free(page); 279 } 280 281 bs.current_pages += nr_pages; 282 //totalram_pages = bs.current_pages; 283 284 out: 285 mtx_unlock(&balloon_lock); 286 287 return 0; 288 } 289 290 static int 291 decrease_reservation(unsigned long nr_pages) 292 { 293 unsigned long pfn, i; 294 vm_page_t page; 295 int need_sleep = 0; 296 int ret; 297 struct xen_memory_reservation reservation = { 298 .address_bits = 0, 299 .extent_order = 0, 300 .domid = DOMID_SELF 301 }; 302 303 if (nr_pages > ARRAY_SIZE(frame_list)) 304 nr_pages = ARRAY_SIZE(frame_list); 305 306 for (i = 0; i < nr_pages; i++) { 307 int color = 0; 308 if ((page = vm_page_alloc(NULL, color++, 309 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 310 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 311 nr_pages = i; 312 need_sleep = 1; 313 break; 314 } 315 316 pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 317 frame_list[i] = PFNTOMFN(pfn); 318 319 #if 0 320 if (!PageHighMem(page)) { 321 v = phys_to_virt(pfn << PAGE_SHIFT); 322 scrub_pages(v, 1); 323 #ifdef CONFIG_XEN 324 ret = HYPERVISOR_update_va_mapping( 325 (unsigned long)v, __pte_ma(0), 0); 326 BUG_ON(ret); 327 #endif 328 } 329 #endif 330 #ifdef CONFIG_XEN_SCRUB_PAGES 331 else { 332 v = kmap(page); 333 scrub_pages(v, 1); 334 kunmap(page); 335 } 336 #endif 337 } 338 339 #ifdef CONFIG_XEN 340 /* Ensure that ballooned highmem pages don't have kmaps. */ 341 kmap_flush_unused(); 342 flush_tlb_all(); 343 #endif 344 345 mtx_lock(&balloon_lock); 346 347 /* No more mappings: invalidate P2M and add to balloon. */ 348 for (i = 0; i < nr_pages; i++) { 349 pfn = MFNTOPFN(frame_list[i]); 350 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 351 balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT)); 352 } 353 354 set_xen_guest_handle(reservation.extent_start, frame_list); 355 reservation.nr_extents = nr_pages; 356 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 357 KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); 358 359 bs.current_pages -= nr_pages; 360 //totalram_pages = bs.current_pages; 361 362 mtx_unlock(&balloon_lock); 363 364 return (need_sleep); 365 } 366 367 /* 368 * We avoid multiple worker processes conflicting via the balloon mutex. 369 * We may of course race updates of the target counts (which are protected 370 * by the balloon lock), or with changes to the Xen hard limit, but we will 371 * recover from these in time. 372 */ 373 static void 374 balloon_process(void *unused) 375 { 376 int need_sleep = 0; 377 long credit; 378 379 mtx_lock(&balloon_mutex); 380 for (;;) { 381 do { 382 credit = current_target() - bs.current_pages; 383 if (credit > 0) 384 need_sleep = (increase_reservation(credit) != 0); 385 if (credit < 0) 386 need_sleep = (decrease_reservation(-credit) != 0); 387 388 } while ((credit != 0) && !need_sleep); 389 390 /* Schedule more work if there is some still to be done. */ 391 if (current_target() != bs.current_pages) 392 timeout(balloon_alarm, NULL, ticks + hz); 393 394 msleep(balloon_process, &balloon_mutex, 0, "balloon", -1); 395 } 396 mtx_unlock(&balloon_mutex); 397 } 398 399 /* Resets the Xen limit, sets new target, and kicks off processing. */ 400 static void 401 set_new_target(unsigned long target) 402 { 403 /* No need for lock. Not read-modify-write updates. */ 404 bs.hard_limit = ~0UL; 405 bs.target_pages = max(target, minimum_target()); 406 wakeup(balloon_process); 407 } 408 409 static struct xs_watch target_watch = 410 { 411 .node = "memory/target" 412 }; 413 414 /* React to a change in the target key */ 415 static void 416 watch_target(struct xs_watch *watch, 417 const char **vec, unsigned int len) 418 { 419 unsigned long long new_target; 420 int err; 421 422 err = xs_scanf(XST_NIL, "memory", "target", NULL, 423 "%llu", &new_target); 424 if (err) { 425 /* This is ok (for domain0 at least) - so just return */ 426 return; 427 } 428 429 /* The given memory/target value is in KiB, so it needs converting to 430 pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 431 */ 432 set_new_target(new_target >> (PAGE_SHIFT - 10)); 433 434 } 435 436 static void 437 balloon_init_watcher(void *arg) 438 { 439 int err; 440 441 err = xs_register_watch(&target_watch); 442 if (err) 443 printf("Failed to set balloon watcher\n"); 444 445 } 446 SYSINIT(balloon_init_watcher, SI_SUB_PSEUDO, SI_ORDER_ANY, 447 balloon_init_watcher, NULL); 448 449 static void 450 balloon_init(void *arg) 451 { 452 #ifndef XENHVM 453 vm_page_t page; 454 unsigned long pfn; 455 456 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn 457 #endif 458 459 if (!is_running_on_xen()) 460 return; 461 462 mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF); 463 mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); 464 465 #ifndef XENHVM 466 bs.current_pages = min(xen_start_info->nr_pages, max_pfn); 467 #else 468 bs.current_pages = physmem; 469 #endif 470 bs.target_pages = bs.current_pages; 471 bs.balloon_low = 0; 472 bs.balloon_high = 0; 473 bs.driver_pages = 0UL; 474 bs.hard_limit = ~0UL; 475 476 kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon"); 477 // init_timer(&balloon_timer); 478 // balloon_timer.data = 0; 479 // balloon_timer.function = balloon_alarm; 480 481 #ifndef XENHVM 482 /* Initialise the balloon with excess memory space. */ 483 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { 484 page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT); 485 balloon_append(page); 486 } 487 #undef max_pfn 488 #endif 489 490 target_watch.callback = watch_target; 491 492 return; 493 } 494 SYSINIT(balloon_init, SI_SUB_PSEUDO, SI_ORDER_ANY, balloon_init, NULL); 495 496 void balloon_update_driver_allowance(long delta); 497 498 void 499 balloon_update_driver_allowance(long delta) 500 { 501 mtx_lock(&balloon_lock); 502 bs.driver_pages += delta; 503 mtx_unlock(&balloon_lock); 504 } 505 506 #if 0 507 static int dealloc_pte_fn( 508 pte_t *pte, struct page *pte_page, unsigned long addr, void *data) 509 { 510 unsigned long mfn = pte_mfn(*pte); 511 int ret; 512 struct xen_memory_reservation reservation = { 513 .extent_start = &mfn, 514 .nr_extents = 1, 515 .extent_order = 0, 516 .domid = DOMID_SELF 517 }; 518 set_pte_at(&init_mm, addr, pte, __pte_ma(0)); 519 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); 520 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 521 KASSERT(ret == 1, ("HYPERVISOR_memory_op failed")); 522 return 0; 523 } 524 525 #endif 526 527 #if 0 528 vm_page_t 529 balloon_alloc_empty_page_range(unsigned long nr_pages) 530 { 531 vm_page_t pages; 532 int i, rc; 533 unsigned long *mfn_list; 534 struct xen_memory_reservation reservation = { 535 .address_bits = 0, 536 .extent_order = 0, 537 .domid = DOMID_SELF 538 }; 539 540 pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4) 541 if (pages == NULL) 542 return NULL; 543 544 mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK); 545 546 for (i = 0; i < nr_pages; i++) { 547 mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT); 548 PFNTOMFN(i) = INVALID_P2M_ENTRY; 549 reservation.extent_start = mfn_list; 550 reservation.nr_extents = nr_pages; 551 rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 552 &reservation); 553 KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed")); 554 } 555 556 current_pages -= nr_pages; 557 558 wakeup(balloon_process); 559 560 return pages; 561 } 562 563 void 564 balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages) 565 { 566 unsigned long i; 567 568 for (i = 0; i < nr_pages; i++) 569 balloon_append(page + i); 570 571 wakeup(balloon_process); 572 } 573 #endif 574