1 /****************************************************************************** 2 * balloon.c 3 * 4 * Xen balloon driver - enables returning/claiming memory to/from Xen. 5 * 6 * Copyright (c) 2003, B Dragovic 7 * Copyright (c) 2003-2004, M Williamson, K Fraser 8 * Copyright (c) 2005 Dan M. Smith, IBM Corporation 9 * 10 * This file may be distributed separately from the Linux kernel, or 11 * incorporated into other software packages, subject to the following license: 12 * 13 * Permission is hereby granted, free of charge, to any person obtaining a copy 14 * of this source file (the "Software"), to deal in the Software without 15 * restriction, including without limitation the rights to use, copy, modify, 16 * merge, publish, distribute, sublicense, and/or sell copies of the Software, 17 * and to permit persons to whom the Software is furnished to do so, subject to 18 * the following conditions: 19 * 20 * The above copyright notice and this permission notice shall be included in 21 * all copies or substantial portions of the Software. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 29 * IN THE SOFTWARE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/lock.h> 37 #include <sys/mutex.h> 38 39 #include <machine/hypervisor-ifs.h> 40 #include <machine/xen-os.h> 41 #include <machine/xenbus.h> 42 43 /* 44 * Protects atomic reservation decrease/increase against concurrent increases. 45 * Also protects non-atomic updates of current_pages and driver_pages, and 46 * balloon lists. 47 */ 48 struct mtx balloon_lock; 49 #ifdef notyet 50 51 /* We aim for 'current allocation' == 'target allocation'. */ 52 static unsigned long current_pages; 53 static unsigned long target_pages; 54 55 /* VM /proc information for memory */ 56 extern unsigned long totalram_pages; 57 58 /* We may hit the hard limit in Xen. If we do then we remember it. */ 59 static unsigned long hard_limit; 60 61 /* 62 * Drivers may alter the memory reservation independently, but they must 63 * inform the balloon driver so that we can avoid hitting the hard limit. 64 */ 65 static unsigned long driver_pages; 66 67 struct balloon_entry { 68 vm_page_t page; 69 STAILQ_ENTRY(balloon_entry) list; 70 }; 71 72 /* List of ballooned pages, threaded through the mem_map array. */ 73 static STAILQ_HEAD(,balloon_entry) ballooned_pages; 74 75 static unsigned long balloon_low, balloon_high; 76 77 78 /* Main work function, always executed in process context. */ 79 static void balloon_process(void *unused); 80 81 #define IPRINTK(fmt, args...) \ 82 printk(KERN_INFO "xen_mem: " fmt, ##args) 83 #define WPRINTK(fmt, args...) \ 84 printk(KERN_WARNING "xen_mem: " fmt, ##args) 85 86 /* balloon_append: add the given page to the balloon. */ 87 static void 88 balloon_append(vm_page_t page) 89 { 90 struct balloon_entry *entry; 91 92 entry = malloc(sizeof(struct balloon_entry), M_WAITOK); 93 94 STAILQ_INSERT_HEAD(&ballooned_pages, entry, list); 95 balloon_low++; 96 } 97 98 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ 99 static vm_page_t 100 balloon_retrieve(void) 101 { 102 vm_page_t page; 103 struct balloon_entry *entry; 104 105 if (STAILQ_EMPTY(&ballooned_pages)) 106 return NULL; 107 108 entry = STAILQ_FIRST(&ballooned_pages); 109 STAILQ_REMOVE_HEAD(&ballooned_pages, list); 110 111 page = entry->page; 112 free(entry, M_DEVBUF); 113 114 balloon_low--; 115 116 return page; 117 } 118 119 static void 120 balloon_alarm(unsigned long unused) 121 { 122 wakeup(balloon_process); 123 } 124 125 static unsigned long 126 current_target(void) 127 { 128 unsigned long target = min(target_pages, hard_limit); 129 if (target > (current_pages + balloon_low + balloon_high)) 130 target = current_pages + balloon_low + balloon_high; 131 return target; 132 } 133 134 static int 135 increase_reservation(unsigned long nr_pages) 136 { 137 unsigned long *mfn_list, pfn, i, flags; 138 struct page *page; 139 long rc; 140 struct xen_memory_reservation reservation = { 141 .address_bits = 0, 142 .extent_order = 0, 143 .domid = DOMID_SELF 144 }; 145 146 if (nr_pages > (PAGE_SIZE / sizeof(unsigned long))) 147 nr_pages = PAGE_SIZE / sizeof(unsigned long); 148 149 mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 150 if (mfn_list == NULL) 151 return ENOMEM; 152 153 154 reservation.extent_start = mfn_list; 155 reservation.nr_extents = nr_pages; 156 rc = HYPERVISOR_memory_op( 157 XENMEM_increase_reservation, &reservation); 158 if (rc < nr_pages) { 159 int ret; 160 /* We hit the Xen hard limit: reprobe. */ 161 reservation.extent_start = mfn_list; 162 reservation.nr_extents = rc; 163 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, 164 &reservation); 165 PANIC_IF(ret != rc); 166 hard_limit = current_pages + rc - driver_pages; 167 goto out; 168 } 169 170 for (i = 0; i < nr_pages; i++) { 171 page = balloon_retrieve(); 172 PANIC_IF(page == NULL); 173 174 pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 175 PANIC_IF(phys_to_machine_mapping_valid(pfn)); 176 177 /* Update P->M and M->P tables. */ 178 PFNTOMFN(pfn) = mfn_list[i]; 179 xen_machphys_update(mfn_list[i], pfn); 180 181 /* Relinquish the page back to the allocator. */ 182 ClearPageReserved(page); 183 set_page_count(page, 1); 184 vm_page_free(page); 185 } 186 187 current_pages += nr_pages; 188 totalram_pages = current_pages; 189 190 out: 191 balloon_unlock(flags); 192 193 free((mfn_list); 194 195 return 0; 196 } 197 198 static int 199 decrease_reservation(unsigned long nr_pages) 200 { 201 unsigned long *mfn_list, pfn, i, flags; 202 struct page *page; 203 void *v; 204 int need_sleep = 0; 205 int ret; 206 struct xen_memory_reservation reservation = { 207 .address_bits = 0, 208 .extent_order = 0, 209 .domid = DOMID_SELF 210 }; 211 212 if (nr_pages > (PAGE_SIZE / sizeof(unsigned long))) 213 nr_pages = PAGE_SIZE / sizeof(unsigned long); 214 215 mfn_list = (unsigned long *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); 216 if (mfn_list == NULL) 217 return ENOMEM; 218 219 for (i = 0; i < nr_pages; i++) { 220 int color = 0; 221 if ((page = vm_page_alloc(NULL, color++, 222 VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 223 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 224 nr_pages = i; 225 need_sleep = 1; 226 break; 227 } 228 pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); 229 mfn_list[i] = PFNTOMFN(pfn); 230 } 231 232 balloon_lock(flags); 233 234 /* No more mappings: invalidate P2M and add to balloon. */ 235 for (i = 0; i < nr_pages; i++) { 236 pfn = MFNTOPFN(mfn_list[i]); 237 PFNTOMFN(pfn) = INVALID_P2M_ENTRY; 238 balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT)); 239 } 240 241 reservation.extent_start = mfn_list; 242 reservation.nr_extents = nr_pages; 243 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 244 PANIC_IF(ret != nr_pages); 245 246 current_pages -= nr_pages; 247 totalram_pages = current_pages; 248 249 balloon_unlock(flags); 250 251 free(mfn_list, M_DEVBUF); 252 253 return need_sleep; 254 } 255 256 /* 257 * We avoid multiple worker processes conflicting via the balloon mutex. 258 * We may of course race updates of the target counts (which are protected 259 * by the balloon lock), or with changes to the Xen hard limit, but we will 260 * recover from these in time. 261 */ 262 static void 263 balloon_process(void *unused) 264 { 265 int need_sleep = 0; 266 long credit; 267 268 for (;;) { 269 do { 270 credit = current_target() - current_pages; 271 if (credit > 0) 272 need_sleep = (increase_reservation(credit) != 0); 273 if (credit < 0) 274 need_sleep = (decrease_reservation(-credit) != 0); 275 276 #ifndef CONFIG_PREEMPT 277 if (need_resched()) 278 schedule(); 279 #endif 280 } while ((credit != 0) && !need_sleep); 281 282 /* Schedule more work if there is some still to be done. */ 283 if (current_target() != current_pages) 284 timeout(balloon_alarm, NULL, ticks + HZ); 285 286 msleep(balloon_process, balloon_lock, 0, "balloon", -1); 287 } 288 289 } 290 291 /* Resets the Xen limit, sets new target, and kicks off processing. */ 292 static void 293 set_new_target(unsigned long target) 294 { 295 /* No need for lock. Not read-modify-write updates. */ 296 hard_limit = ~0UL; 297 target_pages = target; 298 wakeup(balloon_process); 299 } 300 301 static struct xenbus_watch target_watch = 302 { 303 .node = "memory/target" 304 }; 305 306 /* React to a change in the target key */ 307 static void 308 watch_target(struct xenbus_watch *watch, 309 const char **vec, unsigned int len) 310 { 311 unsigned long long new_target; 312 int err; 313 314 err = xenbus_scanf(NULL, "memory", "target", "%llu", &new_target); 315 if (err != 1) { 316 /* This is ok (for domain0 at least) - so just return */ 317 return; 318 } 319 320 /* The given memory/target value is in KiB, so it needs converting to 321 pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. 322 */ 323 set_new_target(new_target >> (PAGE_SHIFT - 10)); 324 325 } 326 327 static void 328 balloon_init_watcher(void *) 329 { 330 int err; 331 332 err = register_xenbus_watch(&target_watch); 333 if (err) 334 printf("Failed to set balloon watcher\n"); 335 336 } 337 338 static void 339 balloon_init(void *) 340 { 341 unsigned long pfn; 342 struct page *page; 343 344 IPRINTK("Initialising balloon driver.\n"); 345 346 if (xen_init() < 0) 347 return -1; 348 349 current_pages = min(xen_start_info->nr_pages, max_pfn); 350 target_pages = current_pages; 351 balloon_low = 0; 352 balloon_high = 0; 353 driver_pages = 0UL; 354 hard_limit = ~0UL; 355 356 init_timer(&balloon_timer); 357 balloon_timer.data = 0; 358 balloon_timer.function = balloon_alarm; 359 360 /* Initialise the balloon with excess memory space. */ 361 for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { 362 page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT); 363 balloon_append(page); 364 } 365 366 target_watch.callback = watch_target; 367 368 return 0; 369 } 370 371 void 372 balloon_update_driver_allowance(long delta) 373 { 374 unsigned long flags; 375 376 balloon_lock(flags); 377 driver_pages += delta; 378 balloon_unlock(flags); 379 } 380 381 #if 0 382 static int dealloc_pte_fn( 383 pte_t *pte, struct page *pte_page, unsigned long addr, void *data) 384 { 385 unsigned long mfn = pte_mfn(*pte); 386 int ret; 387 struct xen_memory_reservation reservation = { 388 .extent_start = &mfn, 389 .nr_extents = 1, 390 .extent_order = 0, 391 .domid = DOMID_SELF 392 }; 393 set_pte_at(&init_mm, addr, pte, __pte_ma(0)); 394 set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); 395 ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); 396 PANIC_IF(ret != 1); 397 return 0; 398 } 399 400 #endif 401 vm_page_t 402 balloon_alloc_empty_page_range(unsigned long nr_pages) 403 { 404 unsigned long flags; 405 vm_page_t pages; 406 int i; 407 unsigned long *mfn_list; 408 struct xen_memory_reservation reservation = { 409 .address_bits = 0, 410 .extent_order = 0, 411 .domid = DOMID_SELF 412 }; 413 414 pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4) 415 if (pages == NULL) 416 return NULL; 417 418 mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK); 419 420 for (i = 0; i < nr_pages; i++) { 421 mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT); 422 PFNTOMFN(i) = INVALID_P2M_ENTRY; 423 reservation.extent_start = mfn_list; 424 reservation.nr_extents = nr_pages; 425 PANIC_IF(HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation) != nr_pages); 426 } 427 428 current_pages -= nr_pages; 429 430 wakeup(balloon_process); 431 432 return pages; 433 } 434 435 void 436 balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages) 437 { 438 unsigned long i, flags; 439 440 for (i = 0; i < nr_pages; i++) 441 balloon_append(page + i); 442 443 wakeup(balloon_process); 444 } 445 446 #endif 447