1 /*- 2 * Copyright (c)2006,2007,2008,2009 YAMAMOTO Takashi, 3 * Copyright (c) 2013 EMC Corp. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * From: 30 * $NetBSD: vmem_impl.h,v 1.2 2013/01/29 21:26:24 para Exp $ 31 * $NetBSD: subr_vmem.c,v 1.83 2013/03/06 11:20:10 yamt Exp $ 32 */ 33 34 /* 35 * reference: 36 * - Magazines and Vmem: Extending the Slab Allocator 37 * to Many CPUs and Arbitrary Resources 38 * http://www.usenix.org/event/usenix01/bonwick.html 39 */ 40 41 #include <sys/cdefs.h> 42 __FBSDID("$FreeBSD$"); 43 44 #include "opt_ddb.h" 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/kernel.h> 49 #include <sys/queue.h> 50 #include <sys/callout.h> 51 #include <sys/hash.h> 52 #include <sys/lock.h> 53 #include <sys/malloc.h> 54 #include <sys/mutex.h> 55 #include <sys/smp.h> 56 #include <sys/condvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/taskqueue.h> 59 #include <sys/vmem.h> 60 61 #include "opt_vm.h" 62 63 #include <vm/uma.h> 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_map.h> 67 #include <vm/vm_object.h> 68 #include <vm/vm_kern.h> 69 #include <vm/vm_extern.h> 70 #include <vm/vm_param.h> 71 #include <vm/vm_pageout.h> 72 73 #define VMEM_MAXORDER (sizeof(vmem_size_t) * NBBY) 74 75 #define VMEM_HASHSIZE_MIN 16 76 #define VMEM_HASHSIZE_MAX 131072 77 78 #define VMEM_QCACHE_IDX_MAX 16 79 80 #define VMEM_FITMASK (M_BESTFIT | M_FIRSTFIT) 81 82 #define VMEM_FLAGS \ 83 (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM | M_BESTFIT | M_FIRSTFIT) 84 85 #define BT_FLAGS (M_NOWAIT | M_WAITOK | M_USE_RESERVE | M_NOVM) 86 87 #define QC_NAME_MAX 16 88 89 /* 90 * Data structures private to vmem. 91 */ 92 MALLOC_DEFINE(M_VMEM, "vmem", "vmem internal structures"); 93 94 typedef struct vmem_btag bt_t; 95 96 TAILQ_HEAD(vmem_seglist, vmem_btag); 97 LIST_HEAD(vmem_freelist, vmem_btag); 98 LIST_HEAD(vmem_hashlist, vmem_btag); 99 100 struct qcache { 101 uma_zone_t qc_cache; 102 vmem_t *qc_vmem; 103 vmem_size_t qc_size; 104 char qc_name[QC_NAME_MAX]; 105 }; 106 typedef struct qcache qcache_t; 107 #define QC_POOL_TO_QCACHE(pool) ((qcache_t *)(pool->pr_qcache)) 108 109 #define VMEM_NAME_MAX 16 110 111 /* vmem arena */ 112 struct vmem { 113 struct mtx_padalign vm_lock; 114 struct cv vm_cv; 115 char vm_name[VMEM_NAME_MAX+1]; 116 LIST_ENTRY(vmem) vm_alllist; 117 struct vmem_hashlist vm_hash0[VMEM_HASHSIZE_MIN]; 118 struct vmem_freelist vm_freelist[VMEM_MAXORDER]; 119 struct vmem_seglist vm_seglist; 120 struct vmem_hashlist *vm_hashlist; 121 vmem_size_t vm_hashsize; 122 123 /* Constant after init */ 124 vmem_size_t vm_qcache_max; 125 vmem_size_t vm_quantum_mask; 126 vmem_size_t vm_import_quantum; 127 int vm_quantum_shift; 128 129 /* Written on alloc/free */ 130 LIST_HEAD(, vmem_btag) vm_freetags; 131 int vm_nfreetags; 132 int vm_nbusytag; 133 vmem_size_t vm_inuse; 134 vmem_size_t vm_size; 135 136 /* Used on import. */ 137 vmem_import_t *vm_importfn; 138 vmem_release_t *vm_releasefn; 139 void *vm_arg; 140 141 /* Space exhaustion callback. */ 142 vmem_reclaim_t *vm_reclaimfn; 143 144 /* quantum cache */ 145 qcache_t vm_qcache[VMEM_QCACHE_IDX_MAX]; 146 }; 147 148 /* boundary tag */ 149 struct vmem_btag { 150 TAILQ_ENTRY(vmem_btag) bt_seglist; 151 union { 152 LIST_ENTRY(vmem_btag) u_freelist; /* BT_TYPE_FREE */ 153 LIST_ENTRY(vmem_btag) u_hashlist; /* BT_TYPE_BUSY */ 154 } bt_u; 155 #define bt_hashlist bt_u.u_hashlist 156 #define bt_freelist bt_u.u_freelist 157 vmem_addr_t bt_start; 158 vmem_size_t bt_size; 159 int bt_type; 160 }; 161 162 #define BT_TYPE_SPAN 1 /* Allocated from importfn */ 163 #define BT_TYPE_SPAN_STATIC 2 /* vmem_add() or create. */ 164 #define BT_TYPE_FREE 3 /* Available space. */ 165 #define BT_TYPE_BUSY 4 /* Used space. */ 166 #define BT_ISSPAN_P(bt) ((bt)->bt_type <= BT_TYPE_SPAN_STATIC) 167 168 #define BT_END(bt) ((bt)->bt_start + (bt)->bt_size - 1) 169 170 #if defined(DIAGNOSTIC) 171 static int enable_vmem_check = 1; 172 SYSCTL_INT(_debug, OID_AUTO, vmem_check, CTLFLAG_RW, 173 &enable_vmem_check, 0, "Enable vmem check"); 174 static void vmem_check(vmem_t *); 175 #endif 176 177 static struct callout vmem_periodic_ch; 178 static int vmem_periodic_interval; 179 static struct task vmem_periodic_wk; 180 181 static struct mtx_padalign vmem_list_lock; 182 static LIST_HEAD(, vmem) vmem_list = LIST_HEAD_INITIALIZER(vmem_list); 183 184 /* ---- misc */ 185 #define VMEM_CONDVAR_INIT(vm, wchan) cv_init(&vm->vm_cv, wchan) 186 #define VMEM_CONDVAR_DESTROY(vm) cv_destroy(&vm->vm_cv) 187 #define VMEM_CONDVAR_WAIT(vm) cv_wait(&vm->vm_cv, &vm->vm_lock) 188 #define VMEM_CONDVAR_BROADCAST(vm) cv_broadcast(&vm->vm_cv) 189 190 191 #define VMEM_LOCK(vm) mtx_lock(&vm->vm_lock) 192 #define VMEM_TRYLOCK(vm) mtx_trylock(&vm->vm_lock) 193 #define VMEM_UNLOCK(vm) mtx_unlock(&vm->vm_lock) 194 #define VMEM_LOCK_INIT(vm, name) mtx_init(&vm->vm_lock, (name), NULL, MTX_DEF) 195 #define VMEM_LOCK_DESTROY(vm) mtx_destroy(&vm->vm_lock) 196 #define VMEM_ASSERT_LOCKED(vm) mtx_assert(&vm->vm_lock, MA_OWNED); 197 198 #define VMEM_ALIGNUP(addr, align) (-(-(addr) & -(align))) 199 200 #define VMEM_CROSS_P(addr1, addr2, boundary) \ 201 ((((addr1) ^ (addr2)) & -(boundary)) != 0) 202 203 #define ORDER2SIZE(order) ((vmem_size_t)1 << (order)) 204 #define SIZE2ORDER(size) ((int)flsl(size) - 1) 205 206 /* 207 * Maximum number of boundary tags that may be required to satisfy an 208 * allocation. Two may be required to import. Another two may be 209 * required to clip edges. 210 */ 211 #define BT_MAXALLOC 4 212 213 /* 214 * Max free limits the number of locally cached boundary tags. We 215 * just want to avoid hitting the zone allocator for every call. 216 */ 217 #define BT_MAXFREE (BT_MAXALLOC * 8) 218 219 /* Allocator for boundary tags. */ 220 static uma_zone_t vmem_bt_zone; 221 222 /* boot time arena storage. */ 223 static struct vmem kernel_arena_storage; 224 static struct vmem kmem_arena_storage; 225 static struct vmem buffer_arena_storage; 226 static struct vmem transient_arena_storage; 227 vmem_t *kernel_arena = &kernel_arena_storage; 228 vmem_t *kmem_arena = &kmem_arena_storage; 229 vmem_t *buffer_arena = &buffer_arena_storage; 230 vmem_t *transient_arena = &transient_arena_storage; 231 232 #ifdef DEBUG_MEMGUARD 233 static struct vmem memguard_arena_storage; 234 vmem_t *memguard_arena = &memguard_arena_storage; 235 #endif 236 237 /* 238 * Fill the vmem's boundary tag cache. We guarantee that boundary tag 239 * allocation will not fail once bt_fill() passes. To do so we cache 240 * at least the maximum possible tag allocations in the arena. 241 */ 242 static int 243 bt_fill(vmem_t *vm, int flags) 244 { 245 bt_t *bt; 246 247 VMEM_ASSERT_LOCKED(vm); 248 249 /* 250 * Only allow the kmem arena to dip into reserve tags. It is the 251 * vmem where new tags come from. 252 */ 253 flags &= BT_FLAGS; 254 if (vm != kmem_arena) 255 flags &= ~M_USE_RESERVE; 256 257 /* 258 * Loop until we meet the reserve. To minimize the lock shuffle 259 * and prevent simultaneous fills we first try a NOWAIT regardless 260 * of the caller's flags. Specify M_NOVM so we don't recurse while 261 * holding a vmem lock. 262 */ 263 while (vm->vm_nfreetags < BT_MAXALLOC) { 264 bt = uma_zalloc(vmem_bt_zone, 265 (flags & M_USE_RESERVE) | M_NOWAIT | M_NOVM); 266 if (bt == NULL) { 267 VMEM_UNLOCK(vm); 268 bt = uma_zalloc(vmem_bt_zone, flags); 269 VMEM_LOCK(vm); 270 if (bt == NULL && (flags & M_NOWAIT) != 0) 271 break; 272 } 273 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 274 vm->vm_nfreetags++; 275 } 276 277 if (vm->vm_nfreetags < BT_MAXALLOC) 278 return ENOMEM; 279 280 return 0; 281 } 282 283 /* 284 * Pop a tag off of the freetag stack. 285 */ 286 static bt_t * 287 bt_alloc(vmem_t *vm) 288 { 289 bt_t *bt; 290 291 VMEM_ASSERT_LOCKED(vm); 292 bt = LIST_FIRST(&vm->vm_freetags); 293 MPASS(bt != NULL); 294 LIST_REMOVE(bt, bt_freelist); 295 vm->vm_nfreetags--; 296 297 return bt; 298 } 299 300 /* 301 * Trim the per-vmem free list. Returns with the lock released to 302 * avoid allocator recursions. 303 */ 304 static void 305 bt_freetrim(vmem_t *vm, int freelimit) 306 { 307 LIST_HEAD(, vmem_btag) freetags; 308 bt_t *bt; 309 310 LIST_INIT(&freetags); 311 VMEM_ASSERT_LOCKED(vm); 312 while (vm->vm_nfreetags > freelimit) { 313 bt = LIST_FIRST(&vm->vm_freetags); 314 LIST_REMOVE(bt, bt_freelist); 315 vm->vm_nfreetags--; 316 LIST_INSERT_HEAD(&freetags, bt, bt_freelist); 317 } 318 VMEM_UNLOCK(vm); 319 while ((bt = LIST_FIRST(&freetags)) != NULL) { 320 LIST_REMOVE(bt, bt_freelist); 321 uma_zfree(vmem_bt_zone, bt); 322 } 323 } 324 325 static inline void 326 bt_free(vmem_t *vm, bt_t *bt) 327 { 328 329 VMEM_ASSERT_LOCKED(vm); 330 MPASS(LIST_FIRST(&vm->vm_freetags) != bt); 331 LIST_INSERT_HEAD(&vm->vm_freetags, bt, bt_freelist); 332 vm->vm_nfreetags++; 333 } 334 335 /* 336 * freelist[0] ... [1, 1] 337 * freelist[1] ... [2, 3] 338 * freelist[2] ... [4, 7] 339 * freelist[3] ... [8, 15] 340 * : 341 * freelist[n] ... [(1 << n), (1 << (n + 1)) - 1] 342 * : 343 */ 344 345 static struct vmem_freelist * 346 bt_freehead_tofree(vmem_t *vm, vmem_size_t size) 347 { 348 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 349 const int idx = SIZE2ORDER(qsize); 350 351 MPASS(size != 0 && qsize != 0); 352 MPASS((size & vm->vm_quantum_mask) == 0); 353 MPASS(idx >= 0); 354 MPASS(idx < VMEM_MAXORDER); 355 356 return &vm->vm_freelist[idx]; 357 } 358 359 /* 360 * bt_freehead_toalloc: return the freelist for the given size and allocation 361 * strategy. 362 * 363 * For M_FIRSTFIT, return the list in which any blocks are large enough 364 * for the requested size. otherwise, return the list which can have blocks 365 * large enough for the requested size. 366 */ 367 static struct vmem_freelist * 368 bt_freehead_toalloc(vmem_t *vm, vmem_size_t size, int strat) 369 { 370 const vmem_size_t qsize = size >> vm->vm_quantum_shift; 371 int idx = SIZE2ORDER(qsize); 372 373 MPASS(size != 0 && qsize != 0); 374 MPASS((size & vm->vm_quantum_mask) == 0); 375 376 if (strat == M_FIRSTFIT && ORDER2SIZE(idx) != qsize) { 377 idx++; 378 /* check too large request? */ 379 } 380 MPASS(idx >= 0); 381 MPASS(idx < VMEM_MAXORDER); 382 383 return &vm->vm_freelist[idx]; 384 } 385 386 /* ---- boundary tag hash */ 387 388 static struct vmem_hashlist * 389 bt_hashhead(vmem_t *vm, vmem_addr_t addr) 390 { 391 struct vmem_hashlist *list; 392 unsigned int hash; 393 394 hash = hash32_buf(&addr, sizeof(addr), 0); 395 list = &vm->vm_hashlist[hash % vm->vm_hashsize]; 396 397 return list; 398 } 399 400 static bt_t * 401 bt_lookupbusy(vmem_t *vm, vmem_addr_t addr) 402 { 403 struct vmem_hashlist *list; 404 bt_t *bt; 405 406 VMEM_ASSERT_LOCKED(vm); 407 list = bt_hashhead(vm, addr); 408 LIST_FOREACH(bt, list, bt_hashlist) { 409 if (bt->bt_start == addr) { 410 break; 411 } 412 } 413 414 return bt; 415 } 416 417 static void 418 bt_rembusy(vmem_t *vm, bt_t *bt) 419 { 420 421 VMEM_ASSERT_LOCKED(vm); 422 MPASS(vm->vm_nbusytag > 0); 423 vm->vm_inuse -= bt->bt_size; 424 vm->vm_nbusytag--; 425 LIST_REMOVE(bt, bt_hashlist); 426 } 427 428 static void 429 bt_insbusy(vmem_t *vm, bt_t *bt) 430 { 431 struct vmem_hashlist *list; 432 433 VMEM_ASSERT_LOCKED(vm); 434 MPASS(bt->bt_type == BT_TYPE_BUSY); 435 436 list = bt_hashhead(vm, bt->bt_start); 437 LIST_INSERT_HEAD(list, bt, bt_hashlist); 438 vm->vm_nbusytag++; 439 vm->vm_inuse += bt->bt_size; 440 } 441 442 /* ---- boundary tag list */ 443 444 static void 445 bt_remseg(vmem_t *vm, bt_t *bt) 446 { 447 448 TAILQ_REMOVE(&vm->vm_seglist, bt, bt_seglist); 449 bt_free(vm, bt); 450 } 451 452 static void 453 bt_insseg(vmem_t *vm, bt_t *bt, bt_t *prev) 454 { 455 456 TAILQ_INSERT_AFTER(&vm->vm_seglist, prev, bt, bt_seglist); 457 } 458 459 static void 460 bt_insseg_tail(vmem_t *vm, bt_t *bt) 461 { 462 463 TAILQ_INSERT_TAIL(&vm->vm_seglist, bt, bt_seglist); 464 } 465 466 static void 467 bt_remfree(vmem_t *vm, bt_t *bt) 468 { 469 470 MPASS(bt->bt_type == BT_TYPE_FREE); 471 472 LIST_REMOVE(bt, bt_freelist); 473 } 474 475 static void 476 bt_insfree(vmem_t *vm, bt_t *bt) 477 { 478 struct vmem_freelist *list; 479 480 list = bt_freehead_tofree(vm, bt->bt_size); 481 LIST_INSERT_HEAD(list, bt, bt_freelist); 482 } 483 484 /* ---- vmem internal functions */ 485 486 /* 487 * Import from the arena into the quantum cache in UMA. 488 */ 489 static int 490 qc_import(void *arg, void **store, int cnt, int flags) 491 { 492 qcache_t *qc; 493 vmem_addr_t addr; 494 int i; 495 496 qc = arg; 497 flags |= M_BESTFIT; 498 for (i = 0; i < cnt; i++) { 499 if (vmem_xalloc(qc->qc_vmem, qc->qc_size, 0, 0, 0, 500 VMEM_ADDR_MIN, VMEM_ADDR_MAX, flags, &addr) != 0) 501 break; 502 store[i] = (void *)addr; 503 /* Only guarantee one allocation. */ 504 flags &= ~M_WAITOK; 505 flags |= M_NOWAIT; 506 } 507 return i; 508 } 509 510 /* 511 * Release memory from the UMA cache to the arena. 512 */ 513 static void 514 qc_release(void *arg, void **store, int cnt) 515 { 516 qcache_t *qc; 517 int i; 518 519 qc = arg; 520 for (i = 0; i < cnt; i++) 521 vmem_xfree(qc->qc_vmem, (vmem_addr_t)store[i], qc->qc_size); 522 } 523 524 static void 525 qc_init(vmem_t *vm, vmem_size_t qcache_max) 526 { 527 qcache_t *qc; 528 vmem_size_t size; 529 int qcache_idx_max; 530 int i; 531 532 MPASS((qcache_max & vm->vm_quantum_mask) == 0); 533 qcache_idx_max = MIN(qcache_max >> vm->vm_quantum_shift, 534 VMEM_QCACHE_IDX_MAX); 535 vm->vm_qcache_max = qcache_idx_max << vm->vm_quantum_shift; 536 for (i = 0; i < qcache_idx_max; i++) { 537 qc = &vm->vm_qcache[i]; 538 size = (i + 1) << vm->vm_quantum_shift; 539 snprintf(qc->qc_name, sizeof(qc->qc_name), "%s-%zu", 540 vm->vm_name, size); 541 qc->qc_vmem = vm; 542 qc->qc_size = size; 543 qc->qc_cache = uma_zcache_create(qc->qc_name, size, 544 NULL, NULL, NULL, NULL, qc_import, qc_release, qc, 545 UMA_ZONE_VM); 546 MPASS(qc->qc_cache); 547 } 548 } 549 550 static void 551 qc_destroy(vmem_t *vm) 552 { 553 int qcache_idx_max; 554 int i; 555 556 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 557 for (i = 0; i < qcache_idx_max; i++) 558 uma_zdestroy(vm->vm_qcache[i].qc_cache); 559 } 560 561 static void 562 qc_drain(vmem_t *vm) 563 { 564 int qcache_idx_max; 565 int i; 566 567 qcache_idx_max = vm->vm_qcache_max >> vm->vm_quantum_shift; 568 for (i = 0; i < qcache_idx_max; i++) 569 zone_drain(vm->vm_qcache[i].qc_cache); 570 } 571 572 #ifndef UMA_MD_SMALL_ALLOC 573 574 static struct mtx_padalign vmem_bt_lock; 575 576 /* 577 * vmem_bt_alloc: Allocate a new page of boundary tags. 578 * 579 * On architectures with uma_small_alloc there is no recursion; no address 580 * space need be allocated to allocate boundary tags. For the others, we 581 * must handle recursion. Boundary tags are necessary to allocate new 582 * boundary tags. 583 * 584 * UMA guarantees that enough tags are held in reserve to allocate a new 585 * page of kva. We dip into this reserve by specifying M_USE_RESERVE only 586 * when allocating the page to hold new boundary tags. In this way the 587 * reserve is automatically filled by the allocation that uses the reserve. 588 * 589 * We still have to guarantee that the new tags are allocated atomically since 590 * many threads may try concurrently. The bt_lock provides this guarantee. 591 * We convert WAITOK allocations to NOWAIT and then handle the blocking here 592 * on failure. It's ok to return NULL for a WAITOK allocation as UMA will 593 * loop again after checking to see if we lost the race to allocate. 594 * 595 * There is a small race between vmem_bt_alloc() returning the page and the 596 * zone lock being acquired to add the page to the zone. For WAITOK 597 * allocations we just pause briefly. NOWAIT may experience a transient 598 * failure. To alleviate this we permit a small number of simultaneous 599 * fills to proceed concurrently so NOWAIT is less likely to fail unless 600 * we are really out of KVA. 601 */ 602 static void * 603 vmem_bt_alloc(uma_zone_t zone, int bytes, uint8_t *pflag, int wait) 604 { 605 vmem_addr_t addr; 606 607 *pflag = UMA_SLAB_KMEM; 608 609 /* 610 * Single thread boundary tag allocation so that the address space 611 * and memory are added in one atomic operation. 612 */ 613 mtx_lock(&vmem_bt_lock); 614 if (vmem_xalloc(kmem_arena, bytes, 0, 0, 0, VMEM_ADDR_MIN, 615 VMEM_ADDR_MAX, M_NOWAIT | M_NOVM | M_USE_RESERVE | M_BESTFIT, 616 &addr) == 0) { 617 if (kmem_back(kmem_object, addr, bytes, 618 M_NOWAIT | M_USE_RESERVE) == 0) { 619 mtx_unlock(&vmem_bt_lock); 620 return ((void *)addr); 621 } 622 vmem_xfree(kmem_arena, addr, bytes); 623 mtx_unlock(&vmem_bt_lock); 624 /* 625 * Out of memory, not address space. This may not even be 626 * possible due to M_USE_RESERVE page allocation. 627 */ 628 if (wait & M_WAITOK) 629 VM_WAIT; 630 return (NULL); 631 } 632 mtx_unlock(&vmem_bt_lock); 633 /* 634 * We're either out of address space or lost a fill race. 635 */ 636 if (wait & M_WAITOK) 637 pause("btalloc", 1); 638 639 return (NULL); 640 } 641 #endif 642 643 void 644 vmem_startup(void) 645 { 646 647 mtx_init(&vmem_list_lock, "vmem list lock", NULL, MTX_DEF); 648 vmem_bt_zone = uma_zcreate("vmem btag", 649 sizeof(struct vmem_btag), NULL, NULL, NULL, NULL, 650 UMA_ALIGN_PTR, UMA_ZONE_VM); 651 #ifndef UMA_MD_SMALL_ALLOC 652 mtx_init(&vmem_bt_lock, "btag lock", NULL, MTX_DEF); 653 uma_prealloc(vmem_bt_zone, BT_MAXALLOC); 654 /* 655 * Reserve enough tags to allocate new tags. We allow multiple 656 * CPUs to attempt to allocate new tags concurrently to limit 657 * false restarts in UMA. 658 */ 659 uma_zone_reserve(vmem_bt_zone, BT_MAXALLOC * (mp_ncpus + 1) / 2); 660 uma_zone_set_allocf(vmem_bt_zone, vmem_bt_alloc); 661 #endif 662 } 663 664 /* ---- rehash */ 665 666 static int 667 vmem_rehash(vmem_t *vm, vmem_size_t newhashsize) 668 { 669 bt_t *bt; 670 int i; 671 struct vmem_hashlist *newhashlist; 672 struct vmem_hashlist *oldhashlist; 673 vmem_size_t oldhashsize; 674 675 MPASS(newhashsize > 0); 676 677 newhashlist = malloc(sizeof(struct vmem_hashlist) * newhashsize, 678 M_VMEM, M_NOWAIT); 679 if (newhashlist == NULL) 680 return ENOMEM; 681 for (i = 0; i < newhashsize; i++) { 682 LIST_INIT(&newhashlist[i]); 683 } 684 685 VMEM_LOCK(vm); 686 oldhashlist = vm->vm_hashlist; 687 oldhashsize = vm->vm_hashsize; 688 vm->vm_hashlist = newhashlist; 689 vm->vm_hashsize = newhashsize; 690 if (oldhashlist == NULL) { 691 VMEM_UNLOCK(vm); 692 return 0; 693 } 694 for (i = 0; i < oldhashsize; i++) { 695 while ((bt = LIST_FIRST(&oldhashlist[i])) != NULL) { 696 bt_rembusy(vm, bt); 697 bt_insbusy(vm, bt); 698 } 699 } 700 VMEM_UNLOCK(vm); 701 702 if (oldhashlist != vm->vm_hash0) { 703 free(oldhashlist, M_VMEM); 704 } 705 706 return 0; 707 } 708 709 static void 710 vmem_periodic_kick(void *dummy) 711 { 712 713 taskqueue_enqueue(taskqueue_thread, &vmem_periodic_wk); 714 } 715 716 static void 717 vmem_periodic(void *unused, int pending) 718 { 719 vmem_t *vm; 720 vmem_size_t desired; 721 vmem_size_t current; 722 723 mtx_lock(&vmem_list_lock); 724 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 725 #ifdef DIAGNOSTIC 726 /* Convenient time to verify vmem state. */ 727 if (enable_vmem_check == 1) { 728 VMEM_LOCK(vm); 729 vmem_check(vm); 730 VMEM_UNLOCK(vm); 731 } 732 #endif 733 desired = 1 << flsl(vm->vm_nbusytag); 734 desired = MIN(MAX(desired, VMEM_HASHSIZE_MIN), 735 VMEM_HASHSIZE_MAX); 736 current = vm->vm_hashsize; 737 738 /* Grow in powers of two. Shrink less aggressively. */ 739 if (desired >= current * 2 || desired * 4 <= current) 740 vmem_rehash(vm, desired); 741 } 742 mtx_unlock(&vmem_list_lock); 743 744 callout_reset(&vmem_periodic_ch, vmem_periodic_interval, 745 vmem_periodic_kick, NULL); 746 } 747 748 static void 749 vmem_start_callout(void *unused) 750 { 751 752 TASK_INIT(&vmem_periodic_wk, 0, vmem_periodic, NULL); 753 vmem_periodic_interval = hz * 10; 754 callout_init(&vmem_periodic_ch, CALLOUT_MPSAFE); 755 callout_reset(&vmem_periodic_ch, vmem_periodic_interval, 756 vmem_periodic_kick, NULL); 757 } 758 SYSINIT(vfs, SI_SUB_CONFIGURE, SI_ORDER_ANY, vmem_start_callout, NULL); 759 760 static void 761 vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int type) 762 { 763 bt_t *btspan; 764 bt_t *btfree; 765 766 MPASS(type == BT_TYPE_SPAN || type == BT_TYPE_SPAN_STATIC); 767 MPASS((size & vm->vm_quantum_mask) == 0); 768 769 btspan = bt_alloc(vm); 770 btspan->bt_type = type; 771 btspan->bt_start = addr; 772 btspan->bt_size = size; 773 bt_insseg_tail(vm, btspan); 774 775 btfree = bt_alloc(vm); 776 btfree->bt_type = BT_TYPE_FREE; 777 btfree->bt_start = addr; 778 btfree->bt_size = size; 779 bt_insseg(vm, btfree, btspan); 780 bt_insfree(vm, btfree); 781 782 vm->vm_size += size; 783 } 784 785 static void 786 vmem_destroy1(vmem_t *vm) 787 { 788 bt_t *bt; 789 790 /* 791 * Drain per-cpu quantum caches. 792 */ 793 qc_destroy(vm); 794 795 /* 796 * The vmem should now only contain empty segments. 797 */ 798 VMEM_LOCK(vm); 799 MPASS(vm->vm_nbusytag == 0); 800 801 while ((bt = TAILQ_FIRST(&vm->vm_seglist)) != NULL) 802 bt_remseg(vm, bt); 803 804 if (vm->vm_hashlist != NULL && vm->vm_hashlist != vm->vm_hash0) 805 free(vm->vm_hashlist, M_VMEM); 806 807 bt_freetrim(vm, 0); 808 809 VMEM_CONDVAR_DESTROY(vm); 810 VMEM_LOCK_DESTROY(vm); 811 free(vm, M_VMEM); 812 } 813 814 static int 815 vmem_import(vmem_t *vm, vmem_size_t size, vmem_size_t align, int flags) 816 { 817 vmem_addr_t addr; 818 int error; 819 820 if (vm->vm_importfn == NULL) 821 return EINVAL; 822 823 /* 824 * To make sure we get a span that meets the alignment we double it 825 * and add the size to the tail. This slightly overestimates. 826 */ 827 if (align != vm->vm_quantum_mask + 1) 828 size = (align * 2) + size; 829 size = roundup(size, vm->vm_import_quantum); 830 831 /* 832 * Hide MAXALLOC tags so we're guaranteed to be able to add this 833 * span and the tag we want to allocate from it. 834 */ 835 MPASS(vm->vm_nfreetags >= BT_MAXALLOC); 836 vm->vm_nfreetags -= BT_MAXALLOC; 837 VMEM_UNLOCK(vm); 838 error = (vm->vm_importfn)(vm->vm_arg, size, flags, &addr); 839 VMEM_LOCK(vm); 840 vm->vm_nfreetags += BT_MAXALLOC; 841 if (error) 842 return ENOMEM; 843 844 vmem_add1(vm, addr, size, BT_TYPE_SPAN); 845 846 return 0; 847 } 848 849 /* 850 * vmem_fit: check if a bt can satisfy the given restrictions. 851 * 852 * it's a caller's responsibility to ensure the region is big enough 853 * before calling us. 854 */ 855 static int 856 vmem_fit(const bt_t *bt, vmem_size_t size, vmem_size_t align, 857 vmem_size_t phase, vmem_size_t nocross, vmem_addr_t minaddr, 858 vmem_addr_t maxaddr, vmem_addr_t *addrp) 859 { 860 vmem_addr_t start; 861 vmem_addr_t end; 862 863 MPASS(size > 0); 864 MPASS(bt->bt_size >= size); /* caller's responsibility */ 865 866 /* 867 * XXX assumption: vmem_addr_t and vmem_size_t are 868 * unsigned integer of the same size. 869 */ 870 871 start = bt->bt_start; 872 if (start < minaddr) { 873 start = minaddr; 874 } 875 end = BT_END(bt); 876 if (end > maxaddr) 877 end = maxaddr; 878 if (start > end) 879 return (ENOMEM); 880 881 start = VMEM_ALIGNUP(start - phase, align) + phase; 882 if (start < bt->bt_start) 883 start += align; 884 if (VMEM_CROSS_P(start, start + size - 1, nocross)) { 885 MPASS(align < nocross); 886 start = VMEM_ALIGNUP(start - phase, nocross) + phase; 887 } 888 if (start <= end && end - start >= size - 1) { 889 MPASS((start & (align - 1)) == phase); 890 MPASS(!VMEM_CROSS_P(start, start + size - 1, nocross)); 891 MPASS(minaddr <= start); 892 MPASS(maxaddr == 0 || start + size - 1 <= maxaddr); 893 MPASS(bt->bt_start <= start); 894 MPASS(BT_END(bt) - start >= size - 1); 895 *addrp = start; 896 897 return (0); 898 } 899 return (ENOMEM); 900 } 901 902 /* 903 * vmem_clip: Trim the boundary tag edges to the requested start and size. 904 */ 905 static void 906 vmem_clip(vmem_t *vm, bt_t *bt, vmem_addr_t start, vmem_size_t size) 907 { 908 bt_t *btnew; 909 bt_t *btprev; 910 911 VMEM_ASSERT_LOCKED(vm); 912 MPASS(bt->bt_type == BT_TYPE_FREE); 913 MPASS(bt->bt_size >= size); 914 bt_remfree(vm, bt); 915 if (bt->bt_start != start) { 916 btprev = bt_alloc(vm); 917 btprev->bt_type = BT_TYPE_FREE; 918 btprev->bt_start = bt->bt_start; 919 btprev->bt_size = start - bt->bt_start; 920 bt->bt_start = start; 921 bt->bt_size -= btprev->bt_size; 922 bt_insfree(vm, btprev); 923 bt_insseg(vm, btprev, 924 TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 925 } 926 MPASS(bt->bt_start == start); 927 if (bt->bt_size != size && bt->bt_size - size > vm->vm_quantum_mask) { 928 /* split */ 929 btnew = bt_alloc(vm); 930 btnew->bt_type = BT_TYPE_BUSY; 931 btnew->bt_start = bt->bt_start; 932 btnew->bt_size = size; 933 bt->bt_start = bt->bt_start + size; 934 bt->bt_size -= size; 935 bt_insfree(vm, bt); 936 bt_insseg(vm, btnew, 937 TAILQ_PREV(bt, vmem_seglist, bt_seglist)); 938 bt_insbusy(vm, btnew); 939 bt = btnew; 940 } else { 941 bt->bt_type = BT_TYPE_BUSY; 942 bt_insbusy(vm, bt); 943 } 944 MPASS(bt->bt_size >= size); 945 bt->bt_type = BT_TYPE_BUSY; 946 } 947 948 /* ---- vmem API */ 949 950 void 951 vmem_set_import(vmem_t *vm, vmem_import_t *importfn, 952 vmem_release_t *releasefn, void *arg, vmem_size_t import_quantum) 953 { 954 955 VMEM_LOCK(vm); 956 vm->vm_importfn = importfn; 957 vm->vm_releasefn = releasefn; 958 vm->vm_arg = arg; 959 vm->vm_import_quantum = import_quantum; 960 VMEM_UNLOCK(vm); 961 } 962 963 void 964 vmem_set_reclaim(vmem_t *vm, vmem_reclaim_t *reclaimfn) 965 { 966 967 VMEM_LOCK(vm); 968 vm->vm_reclaimfn = reclaimfn; 969 VMEM_UNLOCK(vm); 970 } 971 972 /* 973 * vmem_init: Initializes vmem arena. 974 */ 975 vmem_t * 976 vmem_init(vmem_t *vm, const char *name, vmem_addr_t base, vmem_size_t size, 977 vmem_size_t quantum, vmem_size_t qcache_max, int flags) 978 { 979 int i; 980 981 MPASS(quantum > 0); 982 983 bzero(vm, sizeof(*vm)); 984 985 VMEM_CONDVAR_INIT(vm, name); 986 VMEM_LOCK_INIT(vm, name); 987 vm->vm_nfreetags = 0; 988 LIST_INIT(&vm->vm_freetags); 989 strlcpy(vm->vm_name, name, sizeof(vm->vm_name)); 990 vm->vm_quantum_mask = quantum - 1; 991 vm->vm_quantum_shift = SIZE2ORDER(quantum); 992 MPASS(ORDER2SIZE(vm->vm_quantum_shift) == quantum); 993 vm->vm_nbusytag = 0; 994 vm->vm_size = 0; 995 vm->vm_inuse = 0; 996 qc_init(vm, qcache_max); 997 998 TAILQ_INIT(&vm->vm_seglist); 999 for (i = 0; i < VMEM_MAXORDER; i++) { 1000 LIST_INIT(&vm->vm_freelist[i]); 1001 } 1002 memset(&vm->vm_hash0, 0, sizeof(vm->vm_hash0)); 1003 vm->vm_hashsize = VMEM_HASHSIZE_MIN; 1004 vm->vm_hashlist = vm->vm_hash0; 1005 1006 if (size != 0) { 1007 if (vmem_add(vm, base, size, flags) != 0) { 1008 vmem_destroy1(vm); 1009 return NULL; 1010 } 1011 } 1012 1013 mtx_lock(&vmem_list_lock); 1014 LIST_INSERT_HEAD(&vmem_list, vm, vm_alllist); 1015 mtx_unlock(&vmem_list_lock); 1016 1017 return vm; 1018 } 1019 1020 /* 1021 * vmem_create: create an arena. 1022 */ 1023 vmem_t * 1024 vmem_create(const char *name, vmem_addr_t base, vmem_size_t size, 1025 vmem_size_t quantum, vmem_size_t qcache_max, int flags) 1026 { 1027 1028 vmem_t *vm; 1029 1030 vm = malloc(sizeof(*vm), M_VMEM, flags & (M_WAITOK|M_NOWAIT)); 1031 if (vm == NULL) 1032 return (NULL); 1033 if (vmem_init(vm, name, base, size, quantum, qcache_max, 1034 flags) == NULL) { 1035 free(vm, M_VMEM); 1036 return (NULL); 1037 } 1038 return (vm); 1039 } 1040 1041 void 1042 vmem_destroy(vmem_t *vm) 1043 { 1044 1045 mtx_lock(&vmem_list_lock); 1046 LIST_REMOVE(vm, vm_alllist); 1047 mtx_unlock(&vmem_list_lock); 1048 1049 vmem_destroy1(vm); 1050 } 1051 1052 vmem_size_t 1053 vmem_roundup_size(vmem_t *vm, vmem_size_t size) 1054 { 1055 1056 return (size + vm->vm_quantum_mask) & ~vm->vm_quantum_mask; 1057 } 1058 1059 /* 1060 * vmem_alloc: allocate resource from the arena. 1061 */ 1062 int 1063 vmem_alloc(vmem_t *vm, vmem_size_t size, int flags, vmem_addr_t *addrp) 1064 { 1065 const int strat __unused = flags & VMEM_FITMASK; 1066 qcache_t *qc; 1067 1068 flags &= VMEM_FLAGS; 1069 MPASS(size > 0); 1070 MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT); 1071 if ((flags & M_NOWAIT) == 0) 1072 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_alloc"); 1073 1074 if (size <= vm->vm_qcache_max) { 1075 qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift]; 1076 *addrp = (vmem_addr_t)uma_zalloc(qc->qc_cache, flags); 1077 if (*addrp == 0) 1078 return (ENOMEM); 1079 return (0); 1080 } 1081 1082 return vmem_xalloc(vm, size, 0, 0, 0, VMEM_ADDR_MIN, VMEM_ADDR_MAX, 1083 flags, addrp); 1084 } 1085 1086 int 1087 vmem_xalloc(vmem_t *vm, const vmem_size_t size0, vmem_size_t align, 1088 const vmem_size_t phase, const vmem_size_t nocross, 1089 const vmem_addr_t minaddr, const vmem_addr_t maxaddr, int flags, 1090 vmem_addr_t *addrp) 1091 { 1092 const vmem_size_t size = vmem_roundup_size(vm, size0); 1093 struct vmem_freelist *list; 1094 struct vmem_freelist *first; 1095 struct vmem_freelist *end; 1096 vmem_size_t avail; 1097 bt_t *bt; 1098 int error; 1099 int strat; 1100 1101 flags &= VMEM_FLAGS; 1102 strat = flags & VMEM_FITMASK; 1103 MPASS(size0 > 0); 1104 MPASS(size > 0); 1105 MPASS(strat == M_BESTFIT || strat == M_FIRSTFIT); 1106 MPASS((flags & (M_NOWAIT|M_WAITOK)) != (M_NOWAIT|M_WAITOK)); 1107 if ((flags & M_NOWAIT) == 0) 1108 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "vmem_xalloc"); 1109 MPASS((align & vm->vm_quantum_mask) == 0); 1110 MPASS((align & (align - 1)) == 0); 1111 MPASS((phase & vm->vm_quantum_mask) == 0); 1112 MPASS((nocross & vm->vm_quantum_mask) == 0); 1113 MPASS((nocross & (nocross - 1)) == 0); 1114 MPASS((align == 0 && phase == 0) || phase < align); 1115 MPASS(nocross == 0 || nocross >= size); 1116 MPASS(minaddr <= maxaddr); 1117 MPASS(!VMEM_CROSS_P(phase, phase + size - 1, nocross)); 1118 1119 if (align == 0) 1120 align = vm->vm_quantum_mask + 1; 1121 1122 *addrp = 0; 1123 end = &vm->vm_freelist[VMEM_MAXORDER]; 1124 /* 1125 * choose a free block from which we allocate. 1126 */ 1127 first = bt_freehead_toalloc(vm, size, strat); 1128 VMEM_LOCK(vm); 1129 for (;;) { 1130 /* 1131 * Make sure we have enough tags to complete the 1132 * operation. 1133 */ 1134 if (vm->vm_nfreetags < BT_MAXALLOC && 1135 bt_fill(vm, flags) != 0) { 1136 error = ENOMEM; 1137 break; 1138 } 1139 /* 1140 * Scan freelists looking for a tag that satisfies the 1141 * allocation. If we're doing BESTFIT we may encounter 1142 * sizes below the request. If we're doing FIRSTFIT we 1143 * inspect only the first element from each list. 1144 */ 1145 for (list = first; list < end; list++) { 1146 LIST_FOREACH(bt, list, bt_freelist) { 1147 if (bt->bt_size >= size) { 1148 error = vmem_fit(bt, size, align, phase, 1149 nocross, minaddr, maxaddr, addrp); 1150 if (error == 0) { 1151 vmem_clip(vm, bt, *addrp, size); 1152 goto out; 1153 } 1154 } 1155 /* FIRST skips to the next list. */ 1156 if (strat == M_FIRSTFIT) 1157 break; 1158 } 1159 } 1160 /* 1161 * Retry if the fast algorithm failed. 1162 */ 1163 if (strat == M_FIRSTFIT) { 1164 strat = M_BESTFIT; 1165 first = bt_freehead_toalloc(vm, size, strat); 1166 continue; 1167 } 1168 /* 1169 * XXX it is possible to fail to meet restrictions with the 1170 * imported region. It is up to the user to specify the 1171 * import quantum such that it can satisfy any allocation. 1172 */ 1173 if (vmem_import(vm, size, align, flags) == 0) 1174 continue; 1175 1176 /* 1177 * Try to free some space from the quantum cache or reclaim 1178 * functions if available. 1179 */ 1180 if (vm->vm_qcache_max != 0 || vm->vm_reclaimfn != NULL) { 1181 avail = vm->vm_size - vm->vm_inuse; 1182 VMEM_UNLOCK(vm); 1183 if (vm->vm_qcache_max != 0) 1184 qc_drain(vm); 1185 if (vm->vm_reclaimfn != NULL) 1186 vm->vm_reclaimfn(vm, flags); 1187 VMEM_LOCK(vm); 1188 /* If we were successful retry even NOWAIT. */ 1189 if (vm->vm_size - vm->vm_inuse > avail) 1190 continue; 1191 } 1192 if ((flags & M_NOWAIT) != 0) { 1193 error = ENOMEM; 1194 break; 1195 } 1196 VMEM_CONDVAR_WAIT(vm); 1197 } 1198 out: 1199 VMEM_UNLOCK(vm); 1200 if (error != 0 && (flags & M_NOWAIT) == 0) 1201 panic("failed to allocate waiting allocation\n"); 1202 1203 return (error); 1204 } 1205 1206 /* 1207 * vmem_free: free the resource to the arena. 1208 */ 1209 void 1210 vmem_free(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1211 { 1212 qcache_t *qc; 1213 MPASS(size > 0); 1214 1215 if (size <= vm->vm_qcache_max) { 1216 qc = &vm->vm_qcache[(size - 1) >> vm->vm_quantum_shift]; 1217 uma_zfree(qc->qc_cache, (void *)addr); 1218 } else 1219 vmem_xfree(vm, addr, size); 1220 } 1221 1222 void 1223 vmem_xfree(vmem_t *vm, vmem_addr_t addr, vmem_size_t size) 1224 { 1225 bt_t *bt; 1226 bt_t *t; 1227 1228 MPASS(size > 0); 1229 1230 VMEM_LOCK(vm); 1231 bt = bt_lookupbusy(vm, addr); 1232 MPASS(bt != NULL); 1233 MPASS(bt->bt_start == addr); 1234 MPASS(bt->bt_size == vmem_roundup_size(vm, size) || 1235 bt->bt_size - vmem_roundup_size(vm, size) <= vm->vm_quantum_mask); 1236 MPASS(bt->bt_type == BT_TYPE_BUSY); 1237 bt_rembusy(vm, bt); 1238 bt->bt_type = BT_TYPE_FREE; 1239 1240 /* coalesce */ 1241 t = TAILQ_NEXT(bt, bt_seglist); 1242 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1243 MPASS(BT_END(bt) < t->bt_start); /* YYY */ 1244 bt->bt_size += t->bt_size; 1245 bt_remfree(vm, t); 1246 bt_remseg(vm, t); 1247 } 1248 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1249 if (t != NULL && t->bt_type == BT_TYPE_FREE) { 1250 MPASS(BT_END(t) < bt->bt_start); /* YYY */ 1251 bt->bt_size += t->bt_size; 1252 bt->bt_start = t->bt_start; 1253 bt_remfree(vm, t); 1254 bt_remseg(vm, t); 1255 } 1256 1257 t = TAILQ_PREV(bt, vmem_seglist, bt_seglist); 1258 MPASS(t != NULL); 1259 MPASS(BT_ISSPAN_P(t) || t->bt_type == BT_TYPE_BUSY); 1260 if (vm->vm_releasefn != NULL && t->bt_type == BT_TYPE_SPAN && 1261 t->bt_size == bt->bt_size) { 1262 vmem_addr_t spanaddr; 1263 vmem_size_t spansize; 1264 1265 MPASS(t->bt_start == bt->bt_start); 1266 spanaddr = bt->bt_start; 1267 spansize = bt->bt_size; 1268 bt_remseg(vm, bt); 1269 bt_remseg(vm, t); 1270 vm->vm_size -= spansize; 1271 VMEM_CONDVAR_BROADCAST(vm); 1272 bt_freetrim(vm, BT_MAXFREE); 1273 (*vm->vm_releasefn)(vm->vm_arg, spanaddr, spansize); 1274 } else { 1275 bt_insfree(vm, bt); 1276 VMEM_CONDVAR_BROADCAST(vm); 1277 bt_freetrim(vm, BT_MAXFREE); 1278 } 1279 } 1280 1281 /* 1282 * vmem_add: 1283 * 1284 */ 1285 int 1286 vmem_add(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int flags) 1287 { 1288 int error; 1289 1290 error = 0; 1291 flags &= VMEM_FLAGS; 1292 VMEM_LOCK(vm); 1293 if (vm->vm_nfreetags >= BT_MAXALLOC || bt_fill(vm, flags) == 0) 1294 vmem_add1(vm, addr, size, BT_TYPE_SPAN_STATIC); 1295 else 1296 error = ENOMEM; 1297 VMEM_UNLOCK(vm); 1298 1299 return (error); 1300 } 1301 1302 /* 1303 * vmem_size: information about arenas size 1304 */ 1305 vmem_size_t 1306 vmem_size(vmem_t *vm, int typemask) 1307 { 1308 1309 switch (typemask) { 1310 case VMEM_ALLOC: 1311 return vm->vm_inuse; 1312 case VMEM_FREE: 1313 return vm->vm_size - vm->vm_inuse; 1314 case VMEM_FREE|VMEM_ALLOC: 1315 return vm->vm_size; 1316 default: 1317 panic("vmem_size"); 1318 } 1319 } 1320 1321 /* ---- debug */ 1322 1323 #if defined(DDB) || defined(DIAGNOSTIC) 1324 1325 static void bt_dump(const bt_t *, int (*)(const char *, ...) 1326 __printflike(1, 2)); 1327 1328 static const char * 1329 bt_type_string(int type) 1330 { 1331 1332 switch (type) { 1333 case BT_TYPE_BUSY: 1334 return "busy"; 1335 case BT_TYPE_FREE: 1336 return "free"; 1337 case BT_TYPE_SPAN: 1338 return "span"; 1339 case BT_TYPE_SPAN_STATIC: 1340 return "static span"; 1341 default: 1342 break; 1343 } 1344 return "BOGUS"; 1345 } 1346 1347 static void 1348 bt_dump(const bt_t *bt, int (*pr)(const char *, ...)) 1349 { 1350 1351 (*pr)("\t%p: %jx %jx, %d(%s)\n", 1352 bt, (intmax_t)bt->bt_start, (intmax_t)bt->bt_size, 1353 bt->bt_type, bt_type_string(bt->bt_type)); 1354 } 1355 1356 static void 1357 vmem_dump(const vmem_t *vm , int (*pr)(const char *, ...) __printflike(1, 2)) 1358 { 1359 const bt_t *bt; 1360 int i; 1361 1362 (*pr)("vmem %p '%s'\n", vm, vm->vm_name); 1363 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1364 bt_dump(bt, pr); 1365 } 1366 1367 for (i = 0; i < VMEM_MAXORDER; i++) { 1368 const struct vmem_freelist *fl = &vm->vm_freelist[i]; 1369 1370 if (LIST_EMPTY(fl)) { 1371 continue; 1372 } 1373 1374 (*pr)("freelist[%d]\n", i); 1375 LIST_FOREACH(bt, fl, bt_freelist) { 1376 bt_dump(bt, pr); 1377 } 1378 } 1379 } 1380 1381 #endif /* defined(DDB) || defined(DIAGNOSTIC) */ 1382 1383 #if defined(DDB) 1384 static bt_t * 1385 vmem_whatis_lookup(vmem_t *vm, vmem_addr_t addr) 1386 { 1387 bt_t *bt; 1388 1389 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1390 if (BT_ISSPAN_P(bt)) { 1391 continue; 1392 } 1393 if (bt->bt_start <= addr && addr <= BT_END(bt)) { 1394 return bt; 1395 } 1396 } 1397 1398 return NULL; 1399 } 1400 1401 void 1402 vmem_whatis(vmem_addr_t addr, int (*pr)(const char *, ...)) 1403 { 1404 vmem_t *vm; 1405 1406 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1407 bt_t *bt; 1408 1409 bt = vmem_whatis_lookup(vm, addr); 1410 if (bt == NULL) { 1411 continue; 1412 } 1413 (*pr)("%p is %p+%zu in VMEM '%s' (%s)\n", 1414 (void *)addr, (void *)bt->bt_start, 1415 (vmem_size_t)(addr - bt->bt_start), vm->vm_name, 1416 (bt->bt_type == BT_TYPE_BUSY) ? "allocated" : "free"); 1417 } 1418 } 1419 1420 void 1421 vmem_printall(const char *modif, int (*pr)(const char *, ...)) 1422 { 1423 const vmem_t *vm; 1424 1425 LIST_FOREACH(vm, &vmem_list, vm_alllist) { 1426 vmem_dump(vm, pr); 1427 } 1428 } 1429 1430 void 1431 vmem_print(vmem_addr_t addr, const char *modif, int (*pr)(const char *, ...)) 1432 { 1433 const vmem_t *vm = (const void *)addr; 1434 1435 vmem_dump(vm, pr); 1436 } 1437 #endif /* defined(DDB) */ 1438 1439 #define vmem_printf printf 1440 1441 #if defined(DIAGNOSTIC) 1442 1443 static bool 1444 vmem_check_sanity(vmem_t *vm) 1445 { 1446 const bt_t *bt, *bt2; 1447 1448 MPASS(vm != NULL); 1449 1450 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1451 if (bt->bt_start > BT_END(bt)) { 1452 printf("corrupted tag\n"); 1453 bt_dump(bt, vmem_printf); 1454 return false; 1455 } 1456 } 1457 TAILQ_FOREACH(bt, &vm->vm_seglist, bt_seglist) { 1458 TAILQ_FOREACH(bt2, &vm->vm_seglist, bt_seglist) { 1459 if (bt == bt2) { 1460 continue; 1461 } 1462 if (BT_ISSPAN_P(bt) != BT_ISSPAN_P(bt2)) { 1463 continue; 1464 } 1465 if (bt->bt_start <= BT_END(bt2) && 1466 bt2->bt_start <= BT_END(bt)) { 1467 printf("overwrapped tags\n"); 1468 bt_dump(bt, vmem_printf); 1469 bt_dump(bt2, vmem_printf); 1470 return false; 1471 } 1472 } 1473 } 1474 1475 return true; 1476 } 1477 1478 static void 1479 vmem_check(vmem_t *vm) 1480 { 1481 1482 if (!vmem_check_sanity(vm)) { 1483 panic("insanity vmem %p", vm); 1484 } 1485 } 1486 1487 #endif /* defined(DIAGNOSTIC) */ 1488