1 /****************************************************************************** 2 * balloon.c 3 * 4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 5 * 6 * Copyright (c) 2003, B Dragovic 7 * Copyright (c) 2003-2004, M Williamson, K Fraser 8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation 9 * 10 * This file may be distributed separately from the Linux kernel, or 11 * incorporated into other software packages, subject to the following license: 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this source file (the "Software"), to deal in the Software without 15 * restriction, including without limitation the rights to use, copy, modify, 16 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 17 * and to permit persons to whom the Software is furnished to do so, subject to 18 * the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 29 * IN THE SOFTWARE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/lock.h> 37 #include <sys/kernel.h> 38 #include <sys/kthread.h> 39 #include <sys/malloc.h> 40 #include <sys/mutex.h> 41 #include <sys/sysctl.h> 42 #include <sys/module.h> 43 44 #include <vm/vm.h> 45 #include <vm/vm_page.h> 46 47 #include <xen/xen-os.h> 48 #include <xen/hypervisor.h> 49 #include <xen/features.h> 50 #include <xen/xenstore/xenstorevar.h> 51 52 #include <machine/xen/xenvar.h> 53 54 static MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); 55 56 /* Convert from KB (as fetched from xenstore) to number of PAGES */ 57 #define KB_TO_PAGE_SHIFT (PAGE_SHIFT - 10) 58 59 struct mtx balloon_mutex; 60 61 /* We increase/decrease in batches which fit in a page */ 62 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 63 64 struct balloon_stats { 65 /* We aim for 'current allocation' == 'target allocation'. */ 66 unsigned long current_pages; 67 unsigned long target_pages; 68 /* We may hit the hard limit in Xen. If we do then we remember it. */ 69 unsigned long hard_limit; 70 /* 71 * Drivers may alter the memory reservation independently, but they 72 * must inform the balloon driver so we avoid hitting the hard limit. 73 */ 74 unsigned long driver_pages; 75 /* Number of pages in high- and low-memory balloons. */ 76 unsigned long balloon_low; 77 unsigned long balloon_high; 78 }; 79 80 static struct balloon_stats balloon_stats; 81 #define bs balloon_stats 82 83 SYSCTL_DECL(_dev_xen); 84 static SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon"); 85 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, 86 &bs.current_pages, 0, "Current allocation"); 87 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, 88 &bs.target_pages, 0, "Target allocation"); 89 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, 90 &bs.driver_pages, 0, "Driver pages"); 91 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, 92 &bs.hard_limit, 0, "Xen hard limit"); 93 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, 94 &bs.balloon_low, 0, "Low-mem balloon"); 95 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, 96 &bs.balloon_high, 0, "High-mem balloon"); 97 98 /* List of ballooned pages, threaded through the mem_map array. */ 99 static TAILQ_HEAD(,vm_page) ballooned_pages; 100 101 /* Main work function, always executed in process context. */ 102 static void balloon_process(void *unused); 103 104 #define IPRINTK(fmt, args...) \ 105 printk(KERN_INFO "xen_mem: " fmt, ##args) 106 #define WPRINTK(fmt, args...) \ 107 printk(KERN_WARNING "xen_mem: " fmt, ##args) 108 109 static unsigned long 110 current_target(void) 111 { 112 unsigned long target = min(bs.target_pages, bs.hard_limit); 113 if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) 114 target = bs.current_pages + bs.balloon_low + bs.balloon_high; 115 return (target); 116 } 117 118 static unsigned long 119 minimum_target(void) 120 { 121 #ifdef XENHVM 122 #define max_pfn realmem 123 #else 124 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn 125 #endif 126 unsigned long min_pages, curr_pages = current_target(); 127 128 #define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) 129 /* 130 * Simple continuous piecewiese linear function: 131 * max MiB -> min MiB gradient 132 * 0 0 133 * 16 16 134 * 32 24 135 * 128 72 (1/2) 136 * 512 168 (1/4) 137 * 2048 360 (1/8) 138 * 8192 552 (1/32) 139 * 32768 1320 140 * 131072 4392 141 */ 142 if (max_pfn < MB2PAGES(128)) 143 min_pages = MB2PAGES(8) + (max_pfn >> 1); 144 else if (max_pfn < MB2PAGES(512)) 145 min_pages = MB2PAGES(40) + (max_pfn >> 2); 146 else if (max_pfn < MB2PAGES(2048)) 147 min_pages = MB2PAGES(104) + (max_pfn >> 3); 148 else 149 min_pages = MB2PAGES(296) + (max_pfn >> 5); 150 #undef MB2PAGES 151 #undef max_pfn 152 153 /* Don't enforce growth */ 154 return (min(min_pages, curr_pages)); 155 } 156 157 static int 158 increase_reservation(unsigned long nr_pages) 159 { 160 unsigned long pfn, i; 161 vm_page_t page; 162 long rc; 163 struct xen_memory_reservation reservation = { 164 .address_bits = 0, 165 .extent_order = 0, 166 .domid = DOMID_SELF 167 }; 168 169 mtx_assert(&balloon_mutex, MA_OWNED); 170 171 if (nr_pages > nitems(frame_list)) 172 nr_pages = nitems(frame_list); 173 174 for (page = TAILQ_FIRST(&ballooned_pages), i = 0; 175 i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) { 176 KASSERT(page != NULL, ("ballooned_pages list corrupt")); 177 frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 178 } 179 180 set_xen_guest_handle(reservation.extent_start, frame_list); 181 reservation.nr_extents = nr_pages; 182 rc = HYPERVISOR_memory_op( 183 XENMEM_populate_physmap, &reservation); 184 if (rc < nr_pages) { 185 if (rc > 0) { 186 int ret; 187 188 /* We hit the Xen hard limit: reprobe. */ 189 reservation.nr_extents = rc; 190 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 191 &reservation); 192 KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); 193 } 194 if (rc >= 0) 195 bs.hard_limit = (bs.current_pages + rc - 196 bs.driver_pages); 197 goto out; 198 } 199 200 for (i = 0; i < nr_pages; i++) { 201 page = TAILQ_FIRST(&ballooned_pages); 202 KASSERT(page != NULL, ("Unable to get ballooned page")); 203 TAILQ_REMOVE(&ballooned_pages, page, plinks.q); 204 bs.balloon_low--; 205 206 pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 207 KASSERT((xen_feature(XENFEAT_auto_translated_physmap) || 208 !phys_to_machine_mapping_valid(pfn)), 209 ("auto translated physmap but mapping is valid")); 210 211 set_phys_to_machine(pfn, frame_list[i]); 212 213 vm_page_free(page); 214 } 215 216 bs.current_pages += nr_pages; 217 218 out: 219 return (0); 220 } 221 222 static int 223 decrease_reservation(unsigned long nr_pages) 224 { 225 unsigned long pfn, i; 226 vm_page_t page; 227 int need_sleep = 0; 228 int ret; 229 struct xen_memory_reservation reservation = { 230 .address_bits = 0, 231 .extent_order = 0, 232 .domid = DOMID_SELF 233 }; 234 235 mtx_assert(&balloon_mutex, MA_OWNED); 236 237 if (nr_pages > nitems(frame_list)) 238 nr_pages = nitems(frame_list); 239 240 for (i = 0; i < nr_pages; i++) { 241 if ((page = vm_page_alloc(NULL, 0, 242 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 243 VM_ALLOC_ZERO)) == NULL) { 244 nr_pages = i; 245 need_sleep = 1; 246 break; 247 } 248 249 if ((page->flags & PG_ZERO) == 0) { 250 /* 251 * Zero the page, or else we might be leaking 252 * important data to other domains on the same 253 * host. Xen doesn't scrub ballooned out memory 254 * pages, the guest is in charge of making 255 * sure that no information is leaked. 256 */ 257 pmap_zero_page(page); 258 } 259 260 pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 261 frame_list[i] = PFNTOMFN(pfn); 262 263 set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 264 TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q); 265 bs.balloon_low++; 266 } 267 268 set_xen_guest_handle(reservation.extent_start, frame_list); 269 reservation.nr_extents = nr_pages; 270 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 271 KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); 272 273 bs.current_pages -= nr_pages; 274 275 return (need_sleep); 276 } 277 278 /* 279 * We avoid multiple worker processes conflicting via the balloon mutex. 280 * We may of course race updates of the target counts (which are protected 281 * by the balloon lock), or with changes to the Xen hard limit, but we will 282 * recover from these in time. 283 */ 284 static void 285 balloon_process(void *unused) 286 { 287 int need_sleep = 0; 288 long credit; 289 290 mtx_lock(&balloon_mutex); 291 for (;;) { 292 int sleep_time; 293 294 do { 295 credit = current_target() - bs.current_pages; 296 if (credit > 0) 297 need_sleep = (increase_reservation(credit) != 0); 298 if (credit < 0) 299 need_sleep = (decrease_reservation(-credit) != 0); 300 301 } while ((credit != 0) && !need_sleep); 302 303 /* Schedule more work if there is some still to be done. */ 304 if (current_target() != bs.current_pages) 305 sleep_time = hz; 306 else 307 sleep_time = 0; 308 309 msleep(balloon_process, &balloon_mutex, 0, "balloon", 310 sleep_time); 311 } 312 mtx_unlock(&balloon_mutex); 313 } 314 315 /* Resets the Xen limit, sets new target, and kicks off processing. */ 316 static void 317 set_new_target(unsigned long target) 318 { 319 /* No need for lock. Not read-modify-write updates. */ 320 bs.hard_limit = ~0UL; 321 bs.target_pages = max(target, minimum_target()); 322 wakeup(balloon_process); 323 } 324 325 static struct xs_watch target_watch = 326 { 327 .node = "memory/target" 328 }; 329 330 /* React to a change in the target key */ 331 static void 332 watch_target(struct xs_watch *watch, 333 const char **vec, unsigned int len) 334 { 335 unsigned long long new_target; 336 int err; 337 338 err = xs_scanf(XST_NIL, "memory", "target", NULL, 339 "%llu", &new_target); 340 if (err) { 341 /* This is ok (for domain0 at least) - so just return */ 342 return; 343 } 344 345 /* 346 * The given memory/target value is in KiB, so it needs converting to 347 * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 348 */ 349 set_new_target(new_target >> KB_TO_PAGE_SHIFT); 350 } 351 352 /*------------------ Private Device Attachment Functions --------------------*/ 353 /** 354 * \brief Identify instances of this device type in the system. 355 * 356 * \param driver The driver performing this identify action. 357 * \param parent The NewBus parent device for any devices this method adds. 358 */ 359 static void 360 xenballoon_identify(driver_t *driver __unused, device_t parent) 361 { 362 /* 363 * A single device instance for our driver is always present 364 * in a system operating under Xen. 365 */ 366 BUS_ADD_CHILD(parent, 0, driver->name, 0); 367 } 368 369 /** 370 * \brief Probe for the existance of the Xen Balloon device 371 * 372 * \param dev NewBus device_t for this Xen control instance. 373 * 374 * \return Always returns 0 indicating success. 375 */ 376 static int 377 xenballoon_probe(device_t dev) 378 { 379 380 device_set_desc(dev, "Xen Balloon Device"); 381 return (0); 382 } 383 384 /** 385 * \brief Attach the Xen Balloon device. 386 * 387 * \param dev NewBus device_t for this Xen control instance. 388 * 389 * \return On success, 0. Otherwise an errno value indicating the 390 * type of failure. 391 */ 392 static int 393 xenballoon_attach(device_t dev) 394 { 395 int err; 396 #ifndef XENHVM 397 vm_page_t page; 398 unsigned long pfn; 399 400 #define max_pfn HYPERVISOR_shared_info->arch.max_pfn 401 #endif 402 403 mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); 404 405 #ifndef XENHVM 406 bs.current_pages = min(xen_start_info->nr_pages, max_pfn); 407 #else 408 bs.current_pages = xen_pv_domain() ? 409 HYPERVISOR_start_info->nr_pages : realmem; 410 #endif 411 bs.target_pages = bs.current_pages; 412 bs.balloon_low = 0; 413 bs.balloon_high = 0; 414 bs.driver_pages = 0UL; 415 bs.hard_limit = ~0UL; 416 417 kproc_create(balloon_process, NULL, NULL, 0, 0, "balloon"); 418 419 #ifndef XENHVM 420 /* Initialise the balloon with excess memory space. */ 421 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { 422 page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT); 423 TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q); 424 bs.balloon_low++; 425 } 426 #undef max_pfn 427 #endif 428 429 target_watch.callback = watch_target; 430 431 err = xs_register_watch(&target_watch); 432 if (err) 433 device_printf(dev, 434 "xenballon: failed to set balloon watcher\n"); 435 436 return (err); 437 } 438 439 /*-------------------- Private Device Attachment Data -----------------------*/ 440 static device_method_t xenballoon_methods[] = { 441 /* Device interface */ 442 DEVMETHOD(device_identify, xenballoon_identify), 443 DEVMETHOD(device_probe, xenballoon_probe), 444 DEVMETHOD(device_attach, xenballoon_attach), 445 446 DEVMETHOD_END 447 }; 448 449 DEFINE_CLASS_0(xenballoon, xenballoon_driver, xenballoon_methods, 0); 450 devclass_t xenballoon_devclass; 451 452 DRIVER_MODULE(xenballoon, xenstore, xenballoon_driver, xenballoon_devclass, 453 NULL, NULL); 454