1 /****************************************************************************** 2 * balloon.c 3 * 4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 5 * 6 * Copyright (c) 2003, B Dragovic 7 * Copyright (c) 2003-2004, M Williamson, K Fraser 8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation 9 * 10 * This file may be distributed separately from the Linux kernel, or 11 * incorporated into other software packages, subject to the following license: 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this source file (the "Software"), to deal in the Software without 15 * restriction, including without limitation the rights to use, copy, modify, 16 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 17 * and to permit persons to whom the Software is furnished to do so, subject to 18 * the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 29 * IN THE SOFTWARE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/lock.h> 37 #include <sys/kernel.h> 38 #include <sys/kthread.h> 39 #include <sys/malloc.h> 40 #include <sys/mutex.h> 41 #include <sys/sysctl.h> 42 43 #include <vm/vm.h> 44 #include <vm/vm_page.h> 45 46 #include <xen/xen-os.h> 47 #include <xen/hypervisor.h> 48 #include <xen/features.h> 49 #include <xen/xenstore/xenstorevar.h> 50 51 #include <machine/xen/xenvar.h> 52 53 static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); 54 55 struct mtx balloon_mutex; 56 57 /* 58 * Protects atomic reservation decrease/increase against concurrent increases. 59 * Also protects non-atomic updates of current_pages and driver_pages, and 60 * balloon lists. 61 */ 62 struct mtx balloon_lock; 63 64 /* We increase/decrease in batches which fit in a page */ 65 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 66 #define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0])) 67 68 struct balloon_stats { 69 /* We aim for 'current allocation' == 'target allocation'. */ 70 unsigned long current_pages; 71 unsigned long target_pages; 72 /* We may hit the hard limit in Xen. If we do then we remember it. */ 73 unsigned long hard_limit; 74 /* 75 * Drivers may alter the memory reservation independently, but they 76 * must inform the balloon driver so we avoid hitting the hard limit. 77 */ 78 unsigned long driver_pages; 79 /* Number of pages in high- and low-memory balloons. */ 80 unsigned long balloon_low; 81 unsigned long balloon_high; 82 }; 83 84 static struct balloon_stats balloon_stats; 85 #define bs balloon_stats 86 87 SYSCTL_DECL(_dev_xen); 88 static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon"); 89 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, 90 &bs.current_pages, 0, "Current allocation"); 91 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, 92 &bs.target_pages, 0, "Target allocation"); 93 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, 94 &bs.driver_pages, 0, "Driver pages"); 95 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, 96 &bs.hard_limit, 0, "Xen hard limit"); 97 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, 98 &bs.balloon_low, 0, "Low-mem balloon"); 99 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, 100 &bs.balloon_high, 0, "High-mem balloon"); 101 102 struct balloon_entry { 103 vm_page_t page; 104 STAILQ_ENTRY(balloon_entry) list; 105 }; 106 107 /* List of ballooned pages, threaded through the mem_map array. */ 108 static STAILQ_HEAD(,balloon_entry) ballooned_pages; 109 110 /* Main work function, always executed in process context. */ 111 static void balloon_process(void *unused); 112 113 #define IPRINTK(fmt, args...) \ 114 printk(KERN_INFO "xen_mem: " fmt, ##args) 115 #define WPRINTK(fmt, args...) \ 116 printk(KERN_WARNING "xen_mem: " fmt, ##args) 117 118 /* balloon_append: add the given page to the balloon. */ 119 static void 120 balloon_append(vm_page_t page) 121 { 122 struct balloon_entry *entry; 123 124 entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK); 125 entry->page = page; 126 STAILQ_INSERT_HEAD(&ballooned_pages, entry, list); 127 bs.balloon_low++; 128 } 129 130 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 131 static vm_page_t 132 balloon_retrieve(void) 133 { 134 vm_page_t page; 135 struct balloon_entry *entry; 136 137 if (STAILQ_EMPTY(&ballooned_pages)) 138 return NULL; 139 140 entry = STAILQ_FIRST(&ballooned_pages); 141 STAILQ_REMOVE_HEAD(&ballooned_pages, list); 142 143 page = entry->page; 144 free(entry, M_BALLOON); 145 146 bs.balloon_low--; 147 148 return page; 149 } 150 151 static unsigned long 152 current_target(void) 153 { 154 unsigned long target = min(bs.target_pages, bs.hard_limit); 155 if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) 156 target = bs.current_pages + bs.balloon_low + bs.balloon_high; 157 return target; 158 } 159 160 static unsigned long 161 minimum_target(void) 162 { 163 #ifdef XENHVM 164 #define max_pfn physmem 165 #else 166 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn 167 #endif 168 unsigned long min_pages, curr_pages = current_target(); 169 170 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) 171 /* Simple continuous piecewiese linear function: 172 * max MiB -> min MiB gradient 173 * 0 0 174 * 16 16 175 * 32 24 176 * 128 72 (1/2) 177 * 512 168 (1/4) 178 * 2048 360 (1/8) 179 * 8192 552 (1/32) 180 * 32768 1320 181 * 131072 4392 182 */ 183 if (max_pfn < MB2PAGES(128)) 184 min_pages = MB2PAGES(8) + (max_pfn >> 1); 185 else if (max_pfn < MB2PAGES(512)) 186 min_pages = MB2PAGES(40) + (max_pfn >> 2); 187 else if (max_pfn < MB2PAGES(2048)) 188 min_pages = MB2PAGES(104) + (max_pfn >> 3); 189 else 190 min_pages = MB2PAGES(296) + (max_pfn >> 5); 191 #undef MB2PAGES 192 193 /* Don't enforce growth */ 194 return min(min_pages, curr_pages); 195 #ifndef CONFIG_XEN 196 #undef max_pfn 197 #endif 198 } 199 200 static int 201 increase_reservation(unsigned long nr_pages) 202 { 203 unsigned long pfn, i; 204 struct balloon_entry *entry; 205 vm_page_t page; 206 long rc; 207 struct xen_memory_reservation reservation = { 208 .address_bits = 0, 209 .extent_order = 0, 210 .domid = DOMID_SELF 211 }; 212 213 if (nr_pages > ARRAY_SIZE(frame_list)) 214 nr_pages = ARRAY_SIZE(frame_list); 215 216 mtx_lock(&balloon_lock); 217 218 for (entry = STAILQ_FIRST(&ballooned_pages), i = 0; 219 i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) { 220 KASSERT(entry, ("ballooned_pages list corrupt")); 221 page = entry->page; 222 frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 223 } 224 225 set_xen_guest_handle(reservation.extent_start, frame_list); 226 reservation.nr_extents = nr_pages; 227 rc = HYPERVISOR_memory_op( 228 XENMEM_populate_physmap, &reservation); 229 if (rc < nr_pages) { 230 if (rc > 0) { 231 int ret; 232 233 /* We hit the Xen hard limit: reprobe. */ 234 reservation.nr_extents = rc; 235 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 236 &reservation); 237 KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); 238 } 239 if (rc >= 0) 240 bs.hard_limit = (bs.current_pages + rc - 241 bs.driver_pages); 242 goto out; 243 } 244 245 for (i = 0; i < nr_pages; i++) { 246 page = balloon_retrieve(); 247 KASSERT(page, ("balloon_retrieve failed")); 248 249 pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 250 KASSERT((xen_feature(XENFEAT_auto_translated_physmap) || 251 !phys_to_machine_mapping_valid(pfn)), 252 ("auto translated physmap but mapping is valid")); 253 254 set_phys_to_machine(pfn, frame_list[i]); 255 256 #if 0 257 #ifndef XENHVM 258 /* Link back into the page tables if not highmem. */ 259 if (pfn < max_low_pfn) { 260 int ret; 261 ret = HYPERVISOR_update_va_mapping( 262 (unsigned long)__va(pfn << PAGE_SHIFT), 263 pfn_pte_ma(frame_list[i], PAGE_KERNEL), 264 0); 265 PASSING(ret == 0, 266 ("HYPERVISOR_update_va_mapping failed")); 267 } 268 #endif 269 #endif 270 271 /* Relinquish the page back to the allocator. */ 272 vm_page_unwire(page, 0); 273 vm_page_free(page); 274 } 275 276 bs.current_pages += nr_pages; 277 //totalram_pages = bs.current_pages; 278 279 out: 280 mtx_unlock(&balloon_lock); 281 282 return 0; 283 } 284 285 static int 286 decrease_reservation(unsigned long nr_pages) 287 { 288 unsigned long pfn, i; 289 vm_page_t page; 290 int need_sleep = 0; 291 int ret; 292 struct xen_memory_reservation reservation = { 293 .address_bits = 0, 294 .extent_order = 0, 295 .domid = DOMID_SELF 296 }; 297 298 if (nr_pages > ARRAY_SIZE(frame_list)) 299 nr_pages = ARRAY_SIZE(frame_list); 300 301 for (i = 0; i < nr_pages; i++) { 302 if ((page = vm_page_alloc(NULL, 0, 303 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 304 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 305 nr_pages = i; 306 need_sleep = 1; 307 break; 308 } 309 310 pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 311 frame_list[i] = PFNTOMFN(pfn); 312 313 #if 0 314 if (!PageHighMem(page)) { 315 v = phys_to_virt(pfn << PAGE_SHIFT); 316 scrub_pages(v, 1); 317 #ifdef CONFIG_XEN 318 ret = HYPERVISOR_update_va_mapping( 319 (unsigned long)v, __pte_ma(0), 0); 320 BUG_ON(ret); 321 #endif 322 } 323 #endif 324 #ifdef CONFIG_XEN_SCRUB_PAGES 325 else { 326 v = kmap(page); 327 scrub_pages(v, 1); 328 kunmap(page); 329 } 330 #endif 331 } 332 333 #ifdef CONFIG_XEN 334 /* Ensure that ballooned highmem pages don't have kmaps. */ 335 kmap_flush_unused(); 336 flush_tlb_all(); 337 #endif 338 339 mtx_lock(&balloon_lock); 340 341 /* No more mappings: invalidate P2M and add to balloon. */ 342 for (i = 0; i < nr_pages; i++) { 343 pfn = MFNTOPFN(frame_list[i]); 344 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 345 balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT)); 346 } 347 348 set_xen_guest_handle(reservation.extent_start, frame_list); 349 reservation.nr_extents = nr_pages; 350 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 351 KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); 352 353 bs.current_pages -= nr_pages; 354 //totalram_pages = bs.current_pages; 355 356 mtx_unlock(&balloon_lock); 357 358 return (need_sleep); 359 } 360 361 /* 362 * We avoid multiple worker processes conflicting via the balloon mutex. 363 * We may of course race updates of the target counts (which are protected 364 * by the balloon lock), or with changes to the Xen hard limit, but we will 365 * recover from these in time. 366 */ 367 static void 368 balloon_process(void *unused) 369 { 370 int need_sleep = 0; 371 long credit; 372 373 mtx_lock(&balloon_mutex); 374 for (;;) { 375 int sleep_time; 376 377 do { 378 credit = current_target() - bs.current_pages; 379 if (credit > 0) 380 need_sleep = (increase_reservation(credit) != 0); 381 if (credit < 0) 382 need_sleep = (decrease_reservation(-credit) != 0); 383 384 } while ((credit != 0) && !need_sleep); 385 386 /* Schedule more work if there is some still to be done. */ 387 if (current_target() != bs.current_pages) 388 sleep_time = hz; 389 else 390 sleep_time = 0; 391 392 msleep(balloon_process, &balloon_mutex, 0, "balloon", 393 sleep_time); 394 } 395 mtx_unlock(&balloon_mutex); 396 } 397 398 /* Resets the Xen limit, sets new target, and kicks off processing. */ 399 static void 400 set_new_target(unsigned long target) 401 { 402 /* No need for lock. Not read-modify-write updates. */ 403 bs.hard_limit = ~0UL; 404 bs.target_pages = max(target, minimum_target()); 405 wakeup(balloon_process); 406 } 407 408 static struct xs_watch target_watch = 409 { 410 .node = "memory/target" 411 }; 412 413 /* React to a change in the target key */ 414 static void 415 watch_target(struct xs_watch *watch, 416 const char **vec, unsigned int len) 417 { 418 unsigned long long new_target; 419 int err; 420 421 err = xs_scanf(XST_NIL, "memory", "target", NULL, 422 "%llu", &new_target); 423 if (err) { 424 /* This is ok (for domain0 at least) - so just return */ 425 return; 426 } 427 428 /* The given memory/target value is in KiB, so it needs converting to 429 pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 430 */ 431 set_new_target(new_target >> (PAGE_SHIFT - 10)); 432 433 } 434 435 static void 436 balloon_init_watcher(void *arg) 437 { 438 int err; 439 440 if (!is_running_on_xen()) 441 return; 442 443 err = xs_register_watch(&target_watch); 444 if (err) 445 printf("Failed to set balloon watcher\n"); 446 447 } 448 SYSINIT(balloon_init_watcher, SI_SUB_PSEUDO, SI_ORDER_ANY, 449 balloon_init_watcher, NULL); 450 451 static void 452 balloon_init(void *arg) 453 { 454 #ifndef XENHVM 455 vm_page_t page; 456 unsigned long pfn; 457 458 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn 459 #endif 460 461 if (!is_running_on_xen()) 462 return; 463 464 mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF); 465 mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); 466 467 #ifndef XENHVM 468 bs.current_pages = min(xen_start_info->nr_pages, max_pfn); 469 #else 470 bs.current_pages = physmem; 471 #endif 472 bs.target_pages = bs.current_pages; 473 bs.balloon_low = 0; 474 bs.balloon_high = 0; 475 bs.driver_pages = 0UL; 476 bs.hard_limit = ~0UL; 477 478 kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon"); 479 480 #ifndef XENHVM 481 /* Initialise the balloon with excess memory space. */ 482 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { 483 page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT); 484 balloon_append(page); 485 } 486 #undef max_pfn 487 #endif 488 489 target_watch.callback = watch_target; 490 491 return; 492 } 493 SYSINIT(balloon_init, SI_SUB_PSEUDO, SI_ORDER_ANY, balloon_init, NULL); 494 495 void balloon_update_driver_allowance(long delta); 496 497 void 498 balloon_update_driver_allowance(long delta) 499 { 500 mtx_lock(&balloon_lock); 501 bs.driver_pages += delta; 502 mtx_unlock(&balloon_lock); 503 } 504 505 #if 0 506 static int dealloc_pte_fn( 507 pte_t *pte, struct page *pte_page, unsigned long addr, void *data) 508 { 509 unsigned long mfn = pte_mfn(*pte); 510 int ret; 511 struct xen_memory_reservation reservation = { 512 .extent_start = &mfn, 513 .nr_extents = 1, 514 .extent_order = 0, 515 .domid = DOMID_SELF 516 }; 517 set_pte_at(&init_mm, addr, pte, __pte_ma(0)); 518 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); 519 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 520 KASSERT(ret == 1, ("HYPERVISOR_memory_op failed")); 521 return 0; 522 } 523 524 #endif 525 526 #if 0 527 vm_page_t 528 balloon_alloc_empty_page_range(unsigned long nr_pages) 529 { 530 vm_page_t pages; 531 int i, rc; 532 unsigned long *mfn_list; 533 struct xen_memory_reservation reservation = { 534 .address_bits = 0, 535 .extent_order = 0, 536 .domid = DOMID_SELF 537 }; 538 539 pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4) 540 if (pages == NULL) 541 return NULL; 542 543 mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK); 544 545 for (i = 0; i < nr_pages; i++) { 546 mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT); 547 PFNTOMFN(i) = INVALID_P2M_ENTRY; 548 reservation.extent_start = mfn_list; 549 reservation.nr_extents = nr_pages; 550 rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 551 &reservation); 552 KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed")); 553 } 554 555 current_pages -= nr_pages; 556 557 wakeup(balloon_process); 558 559 return pages; 560 } 561 562 void 563 balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages) 564 { 565 unsigned long i; 566 567 for (i = 0; i < nr_pages; i++) 568 balloon_append(page + i); 569 570 wakeup(balloon_process); 571 } 572 #endif 573