1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Big Theory Statement for the virtual memory allocator. 31 * 32 * For a more complete description of the main ideas, see: 33 * 34 * Jeff Bonwick and Jonathan Adams, 35 * 36 * Magazines and vmem: Extending the Slab Allocator to Many CPUs and 37 * Arbitrary Resources. 38 * 39 * Proceedings of the 2001 Usenix Conference. 40 * Available as http://www.usenix.org/event/usenix01/bonwick.html 41 * 42 * 43 * 1. General Concepts 44 * ------------------- 45 * 46 * 1.1 Overview 47 * ------------ 48 * We divide the kernel address space into a number of logically distinct 49 * pieces, or *arenas*: text, data, heap, stack, and so on. Within these 50 * arenas we often subdivide further; for example, we use heap addresses 51 * not only for the kernel heap (kmem_alloc() space), but also for DVMA, 52 * bp_mapin(), /dev/kmem, and even some device mappings like the TOD chip. 53 * The kernel address space, therefore, is most accurately described as 54 * a tree of arenas in which each node of the tree *imports* some subset 55 * of its parent. The virtual memory allocator manages these arenas and 56 * supports their natural hierarchical structure. 57 * 58 * 1.2 Arenas 59 * ---------- 60 * An arena is nothing more than a set of integers. These integers most 61 * commonly represent virtual addresses, but in fact they can represent 62 * anything at all. For example, we could use an arena containing the 63 * integers minpid through maxpid to allocate process IDs. vmem_create() 64 * and vmem_destroy() create and destroy vmem arenas. In order to 65 * differentiate between arenas used for adresses and arenas used for 66 * identifiers, the VMC_IDENTIFIER flag is passed to vmem_create(). This 67 * prevents identifier exhaustion from being diagnosed as general memory 68 * failure. 69 * 70 * 1.3 Spans 71 * --------- 72 * We represent the integers in an arena as a collection of *spans*, or 73 * contiguous ranges of integers. For example, the kernel heap consists 74 * of just one span: [kernelheap, ekernelheap). Spans can be added to an 75 * arena in two ways: explicitly, by vmem_add(), or implicitly, by 76 * importing, as described in Section 1.5 below. 77 * 78 * 1.4 Segments 79 * ------------ 80 * Spans are subdivided into *segments*, each of which is either allocated 81 * or free. A segment, like a span, is a contiguous range of integers. 82 * Each allocated segment [addr, addr + size) represents exactly one 83 * vmem_alloc(size) that returned addr. Free segments represent the space 84 * between allocated segments. If two free segments are adjacent, we 85 * coalesce them into one larger segment; that is, if segments [a, b) and 86 * [b, c) are both free, we merge them into a single segment [a, c). 87 * The segments within a span are linked together in increasing-address order 88 * so we can easily determine whether coalescing is possible. 89 * 90 * Segments never cross span boundaries. When all segments within 91 * an imported span become free, we return the span to its source. 92 * 93 * 1.5 Imported Memory 94 * ------------------- 95 * As mentioned in the overview, some arenas are logical subsets of 96 * other arenas. For example, kmem_va_arena (a virtual address cache 97 * that satisfies most kmem_slab_create() requests) is just a subset 98 * of heap_arena (the kernel heap) that provides caching for the most 99 * common slab sizes. When kmem_va_arena runs out of virtual memory, 100 * it *imports* more from the heap; we say that heap_arena is the 101 * *vmem source* for kmem_va_arena. vmem_create() allows you to 102 * specify any existing vmem arena as the source for your new arena. 103 * Topologically, since every arena is a child of at most one source, 104 * the set of all arenas forms a collection of trees. 105 * 106 * 1.6 Constrained Allocations 107 * --------------------------- 108 * Some vmem clients are quite picky about the kind of address they want. 109 * For example, the DVMA code may need an address that is at a particular 110 * phase with respect to some alignment (to get good cache coloring), or 111 * that lies within certain limits (the addressable range of a device), 112 * or that doesn't cross some boundary (a DMA counter restriction) -- 113 * or all of the above. vmem_xalloc() allows the client to specify any 114 * or all of these constraints. 115 * 116 * 1.7 The Vmem Quantum 117 * -------------------- 118 * Every arena has a notion of 'quantum', specified at vmem_create() time, 119 * that defines the arena's minimum unit of currency. Most commonly the 120 * quantum is either 1 or PAGESIZE, but any power of 2 is legal. 121 * All vmem allocations are guaranteed to be quantum-aligned. 122 * 123 * 1.8 Quantum Caching 124 * ------------------- 125 * A vmem arena may be so hot (frequently used) that the scalability of vmem 126 * allocation is a significant concern. We address this by allowing the most 127 * common allocation sizes to be serviced by the kernel memory allocator, 128 * which provides low-latency per-cpu caching. The qcache_max argument to 129 * vmem_create() specifies the largest allocation size to cache. 130 * 131 * 1.9 Relationship to Kernel Memory Allocator 132 * ------------------------------------------- 133 * Every kmem cache has a vmem arena as its slab supplier. The kernel memory 134 * allocator uses vmem_alloc() and vmem_free() to create and destroy slabs. 135 * 136 * 137 * 2. Implementation 138 * ----------------- 139 * 140 * 2.1 Segment lists and markers 141 * ----------------------------- 142 * The segment structure (vmem_seg_t) contains two doubly-linked lists. 143 * 144 * The arena list (vs_anext/vs_aprev) links all segments in the arena. 145 * In addition to the allocated and free segments, the arena contains 146 * special marker segments at span boundaries. Span markers simplify 147 * coalescing and importing logic by making it easy to tell both when 148 * we're at a span boundary (so we don't coalesce across it), and when 149 * a span is completely free (its neighbors will both be span markers). 150 * 151 * Imported spans will have vs_import set. 152 * 153 * The next-of-kin list (vs_knext/vs_kprev) links segments of the same type: 154 * (1) for allocated segments, vs_knext is the hash chain linkage; 155 * (2) for free segments, vs_knext is the freelist linkage; 156 * (3) for span marker segments, vs_knext is the next span marker. 157 * 158 * 2.2 Allocation hashing 159 * ---------------------- 160 * We maintain a hash table of all allocated segments, hashed by address. 161 * This allows vmem_free() to discover the target segment in constant time. 162 * vmem_update() periodically resizes hash tables to keep hash chains short. 163 * 164 * 2.3 Freelist management 165 * ----------------------- 166 * We maintain power-of-2 freelists for free segments, i.e. free segments 167 * of size >= 2^n reside in vmp->vm_freelist[n]. To ensure constant-time 168 * allocation, vmem_xalloc() looks not in the first freelist that *might* 169 * satisfy the allocation, but in the first freelist that *definitely* 170 * satisfies the allocation (unless VM_BESTFIT is specified, or all larger 171 * freelists are empty). For example, a 1000-byte allocation will be 172 * satisfied not from the 512..1023-byte freelist, whose members *might* 173 * contains a 1000-byte segment, but from a 1024-byte or larger freelist, 174 * the first member of which will *definitely* satisfy the allocation. 175 * This ensures that vmem_xalloc() works in constant time. 176 * 177 * We maintain a bit map to determine quickly which freelists are non-empty. 178 * vmp->vm_freemap & (1 << n) is non-zero iff vmp->vm_freelist[n] is non-empty. 179 * 180 * The different freelists are linked together into one large freelist, 181 * with the freelist heads serving as markers. Freelist markers simplify 182 * the maintenance of vm_freemap by making it easy to tell when we're taking 183 * the last member of a freelist (both of its neighbors will be markers). 184 * 185 * 2.4 Vmem Locking 186 * ---------------- 187 * For simplicity, all arena state is protected by a per-arena lock. 188 * For very hot arenas, use quantum caching for scalability. 189 * 190 * 2.5 Vmem Population 191 * ------------------- 192 * Any internal vmem routine that might need to allocate new segment 193 * structures must prepare in advance by calling vmem_populate(), which 194 * will preallocate enough vmem_seg_t's to get is through the entire 195 * operation without dropping the arena lock. 196 * 197 * 2.6 Auditing 198 * ------------ 199 * If KMF_AUDIT is set in kmem_flags, we audit vmem allocations as well. 200 * Since virtual addresses cannot be scribbled on, there is no equivalent 201 * in vmem to redzone checking, deadbeef, or other kmem debugging features. 202 * Moreover, we do not audit frees because segment coalescing destroys the 203 * association between an address and its segment structure. Auditing is 204 * thus intended primarily to keep track of who's consuming the arena. 205 * Debugging support could certainly be extended in the future if it proves 206 * necessary, but we do so much live checking via the allocation hash table 207 * that even non-DEBUG systems get quite a bit of sanity checking already. 208 */ 209 210 #include <sys/vmem_impl.h> 211 #include <sys/kmem.h> 212 #include <sys/kstat.h> 213 #include <sys/param.h> 214 #include <sys/systm.h> 215 #include <sys/atomic.h> 216 #include <sys/bitmap.h> 217 #include <sys/sysmacros.h> 218 #include <sys/cmn_err.h> 219 #include <sys/debug.h> 220 #include <sys/panic.h> 221 222 #define VMEM_INITIAL 10 /* early vmem arenas */ 223 #define VMEM_SEG_INITIAL 200 /* early segments */ 224 225 /* 226 * Adding a new span to an arena requires two segment structures: one to 227 * represent the span, and one to represent the free segment it contains. 228 */ 229 #define VMEM_SEGS_PER_SPAN_CREATE 2 230 231 /* 232 * Allocating a piece of an existing segment requires 0-2 segment structures 233 * depending on how much of the segment we're allocating. 234 * 235 * To allocate the entire segment, no new segment structures are needed; we 236 * simply move the existing segment structure from the freelist to the 237 * allocation hash table. 238 * 239 * To allocate a piece from the left or right end of the segment, we must 240 * split the segment into two pieces (allocated part and remainder), so we 241 * need one new segment structure to represent the remainder. 242 * 243 * To allocate from the middle of a segment, we need two new segment strucures 244 * to represent the remainders on either side of the allocated part. 245 */ 246 #define VMEM_SEGS_PER_EXACT_ALLOC 0 247 #define VMEM_SEGS_PER_LEFT_ALLOC 1 248 #define VMEM_SEGS_PER_RIGHT_ALLOC 1 249 #define VMEM_SEGS_PER_MIDDLE_ALLOC 2 250 251 /* 252 * vmem_populate() preallocates segment structures for vmem to do its work. 253 * It must preallocate enough for the worst case, which is when we must import 254 * a new span and then allocate from the middle of it. 255 */ 256 #define VMEM_SEGS_PER_ALLOC_MAX \ 257 (VMEM_SEGS_PER_SPAN_CREATE + VMEM_SEGS_PER_MIDDLE_ALLOC) 258 259 /* 260 * The segment structures themselves are allocated from vmem_seg_arena, so 261 * we have a recursion problem when vmem_seg_arena needs to populate itself. 262 * We address this by working out the maximum number of segment structures 263 * this act will require, and multiplying by the maximum number of threads 264 * that we'll allow to do it simultaneously. 265 * 266 * The worst-case segment consumption to populate vmem_seg_arena is as 267 * follows (depicted as a stack trace to indicate why events are occurring): 268 * 269 * (In order to lower the fragmentation in the heap_arena, we specify a 270 * minimum import size for the vmem_metadata_arena which is the same size 271 * as the kmem_va quantum cache allocations. This causes the worst-case 272 * allocation from the vmem_metadata_arena to be 3 segments.) 273 * 274 * vmem_alloc(vmem_seg_arena) -> 2 segs (span create + exact alloc) 275 * segkmem_alloc(vmem_metadata_arena) 276 * vmem_alloc(vmem_metadata_arena) -> 3 segs (span create + left alloc) 277 * vmem_alloc(heap_arena) -> 1 seg (left alloc) 278 * page_create() 279 * hat_memload() 280 * kmem_cache_alloc() 281 * kmem_slab_create() 282 * vmem_alloc(hat_memload_arena) -> 2 segs (span create + exact alloc) 283 * segkmem_alloc(heap_arena) 284 * vmem_alloc(heap_arena) -> 1 seg (left alloc) 285 * page_create() 286 * hat_memload() -> (hat layer won't recurse further) 287 * 288 * The worst-case consumption for each arena is 3 segment structures. 289 * Of course, a 3-seg reserve could easily be blown by multiple threads. 290 * Therefore, we serialize all allocations from vmem_seg_arena (which is OK 291 * because they're rare). We cannot allow a non-blocking allocation to get 292 * tied up behind a blocking allocation, however, so we use separate locks 293 * for VM_SLEEP and VM_NOSLEEP allocations. In addition, if the system is 294 * panicking then we must keep enough resources for panic_thread to do its 295 * work. Thus we have at most three threads trying to allocate from 296 * vmem_seg_arena, and each thread consumes at most three segment structures, 297 * so we must maintain a 9-seg reserve. 298 */ 299 #define VMEM_POPULATE_RESERVE 9 300 301 /* 302 * vmem_populate() ensures that each arena has VMEM_MINFREE seg structures 303 * so that it can satisfy the worst-case allocation *and* participate in 304 * worst-case allocation from vmem_seg_arena. 305 */ 306 #define VMEM_MINFREE (VMEM_POPULATE_RESERVE + VMEM_SEGS_PER_ALLOC_MAX) 307 308 static vmem_t vmem0[VMEM_INITIAL]; 309 static vmem_t *vmem_populator[VMEM_INITIAL]; 310 static uint32_t vmem_id; 311 static uint32_t vmem_populators; 312 static vmem_seg_t vmem_seg0[VMEM_SEG_INITIAL]; 313 static vmem_seg_t *vmem_segfree; 314 static kmutex_t vmem_list_lock; 315 static kmutex_t vmem_segfree_lock; 316 static kmutex_t vmem_sleep_lock; 317 static kmutex_t vmem_nosleep_lock; 318 static kmutex_t vmem_panic_lock; 319 static vmem_t *vmem_list; 320 static vmem_t *vmem_metadata_arena; 321 static vmem_t *vmem_seg_arena; 322 static vmem_t *vmem_hash_arena; 323 static vmem_t *vmem_vmem_arena; 324 static long vmem_update_interval = 15; /* vmem_update() every 15 seconds */ 325 uint32_t vmem_mtbf; /* mean time between failures [default: off] */ 326 size_t vmem_seg_size = sizeof (vmem_seg_t); 327 328 static vmem_kstat_t vmem_kstat_template = { 329 { "mem_inuse", KSTAT_DATA_UINT64 }, 330 { "mem_import", KSTAT_DATA_UINT64 }, 331 { "mem_total", KSTAT_DATA_UINT64 }, 332 { "vmem_source", KSTAT_DATA_UINT32 }, 333 { "alloc", KSTAT_DATA_UINT64 }, 334 { "free", KSTAT_DATA_UINT64 }, 335 { "wait", KSTAT_DATA_UINT64 }, 336 { "fail", KSTAT_DATA_UINT64 }, 337 { "lookup", KSTAT_DATA_UINT64 }, 338 { "search", KSTAT_DATA_UINT64 }, 339 { "populate_wait", KSTAT_DATA_UINT64 }, 340 { "populate_fail", KSTAT_DATA_UINT64 }, 341 { "contains", KSTAT_DATA_UINT64 }, 342 { "contains_search", KSTAT_DATA_UINT64 }, 343 }; 344 345 /* 346 * Insert/delete from arena list (type 'a') or next-of-kin list (type 'k'). 347 */ 348 #define VMEM_INSERT(vprev, vsp, type) \ 349 { \ 350 vmem_seg_t *vnext = (vprev)->vs_##type##next; \ 351 (vsp)->vs_##type##next = (vnext); \ 352 (vsp)->vs_##type##prev = (vprev); \ 353 (vprev)->vs_##type##next = (vsp); \ 354 (vnext)->vs_##type##prev = (vsp); \ 355 } 356 357 #define VMEM_DELETE(vsp, type) \ 358 { \ 359 vmem_seg_t *vprev = (vsp)->vs_##type##prev; \ 360 vmem_seg_t *vnext = (vsp)->vs_##type##next; \ 361 (vprev)->vs_##type##next = (vnext); \ 362 (vnext)->vs_##type##prev = (vprev); \ 363 } 364 365 /* 366 * Get a vmem_seg_t from the global segfree list. 367 */ 368 static vmem_seg_t * 369 vmem_getseg_global(void) 370 { 371 vmem_seg_t *vsp; 372 373 mutex_enter(&vmem_segfree_lock); 374 if ((vsp = vmem_segfree) != NULL) 375 vmem_segfree = vsp->vs_knext; 376 mutex_exit(&vmem_segfree_lock); 377 378 return (vsp); 379 } 380 381 /* 382 * Put a vmem_seg_t on the global segfree list. 383 */ 384 static void 385 vmem_putseg_global(vmem_seg_t *vsp) 386 { 387 mutex_enter(&vmem_segfree_lock); 388 vsp->vs_knext = vmem_segfree; 389 vmem_segfree = vsp; 390 mutex_exit(&vmem_segfree_lock); 391 } 392 393 /* 394 * Get a vmem_seg_t from vmp's segfree list. 395 */ 396 static vmem_seg_t * 397 vmem_getseg(vmem_t *vmp) 398 { 399 vmem_seg_t *vsp; 400 401 ASSERT(vmp->vm_nsegfree > 0); 402 403 vsp = vmp->vm_segfree; 404 vmp->vm_segfree = vsp->vs_knext; 405 vmp->vm_nsegfree--; 406 407 return (vsp); 408 } 409 410 /* 411 * Put a vmem_seg_t on vmp's segfree list. 412 */ 413 static void 414 vmem_putseg(vmem_t *vmp, vmem_seg_t *vsp) 415 { 416 vsp->vs_knext = vmp->vm_segfree; 417 vmp->vm_segfree = vsp; 418 vmp->vm_nsegfree++; 419 } 420 421 /* 422 * Add vsp to the appropriate freelist. 423 */ 424 static void 425 vmem_freelist_insert(vmem_t *vmp, vmem_seg_t *vsp) 426 { 427 vmem_seg_t *vprev; 428 429 ASSERT(*VMEM_HASH(vmp, vsp->vs_start) != vsp); 430 431 vprev = (vmem_seg_t *)&vmp->vm_freelist[highbit(VS_SIZE(vsp)) - 1]; 432 vsp->vs_type = VMEM_FREE; 433 vmp->vm_freemap |= VS_SIZE(vprev); 434 VMEM_INSERT(vprev, vsp, k); 435 436 cv_broadcast(&vmp->vm_cv); 437 } 438 439 /* 440 * Take vsp from the freelist. 441 */ 442 static void 443 vmem_freelist_delete(vmem_t *vmp, vmem_seg_t *vsp) 444 { 445 ASSERT(*VMEM_HASH(vmp, vsp->vs_start) != vsp); 446 ASSERT(vsp->vs_type == VMEM_FREE); 447 448 if (vsp->vs_knext->vs_start == 0 && vsp->vs_kprev->vs_start == 0) { 449 /* 450 * The segments on both sides of 'vsp' are freelist heads, 451 * so taking vsp leaves the freelist at vsp->vs_kprev empty. 452 */ 453 ASSERT(vmp->vm_freemap & VS_SIZE(vsp->vs_kprev)); 454 vmp->vm_freemap ^= VS_SIZE(vsp->vs_kprev); 455 } 456 VMEM_DELETE(vsp, k); 457 } 458 459 /* 460 * Add vsp to the allocated-segment hash table and update kstats. 461 */ 462 static void 463 vmem_hash_insert(vmem_t *vmp, vmem_seg_t *vsp) 464 { 465 vmem_seg_t **bucket; 466 467 vsp->vs_type = VMEM_ALLOC; 468 bucket = VMEM_HASH(vmp, vsp->vs_start); 469 vsp->vs_knext = *bucket; 470 *bucket = vsp; 471 472 if (vmem_seg_size == sizeof (vmem_seg_t)) { 473 vsp->vs_depth = (uint8_t)getpcstack(vsp->vs_stack, 474 VMEM_STACK_DEPTH); 475 vsp->vs_thread = curthread; 476 vsp->vs_timestamp = gethrtime(); 477 } else { 478 vsp->vs_depth = 0; 479 } 480 481 vmp->vm_kstat.vk_alloc.value.ui64++; 482 vmp->vm_kstat.vk_mem_inuse.value.ui64 += VS_SIZE(vsp); 483 } 484 485 /* 486 * Remove vsp from the allocated-segment hash table and update kstats. 487 */ 488 static vmem_seg_t * 489 vmem_hash_delete(vmem_t *vmp, uintptr_t addr, size_t size) 490 { 491 vmem_seg_t *vsp, **prev_vspp; 492 493 prev_vspp = VMEM_HASH(vmp, addr); 494 while ((vsp = *prev_vspp) != NULL) { 495 if (vsp->vs_start == addr) { 496 *prev_vspp = vsp->vs_knext; 497 break; 498 } 499 vmp->vm_kstat.vk_lookup.value.ui64++; 500 prev_vspp = &vsp->vs_knext; 501 } 502 503 if (vsp == NULL) 504 panic("vmem_hash_delete(%p, %lx, %lu): bad free", 505 vmp, addr, size); 506 if (VS_SIZE(vsp) != size) 507 panic("vmem_hash_delete(%p, %lx, %lu): wrong size (expect %lu)", 508 vmp, addr, size, VS_SIZE(vsp)); 509 510 vmp->vm_kstat.vk_free.value.ui64++; 511 vmp->vm_kstat.vk_mem_inuse.value.ui64 -= size; 512 513 return (vsp); 514 } 515 516 /* 517 * Create a segment spanning the range [start, end) and add it to the arena. 518 */ 519 static vmem_seg_t * 520 vmem_seg_create(vmem_t *vmp, vmem_seg_t *vprev, uintptr_t start, uintptr_t end) 521 { 522 vmem_seg_t *newseg = vmem_getseg(vmp); 523 524 newseg->vs_start = start; 525 newseg->vs_end = end; 526 newseg->vs_type = 0; 527 newseg->vs_import = 0; 528 529 VMEM_INSERT(vprev, newseg, a); 530 531 return (newseg); 532 } 533 534 /* 535 * Remove segment vsp from the arena. 536 */ 537 static void 538 vmem_seg_destroy(vmem_t *vmp, vmem_seg_t *vsp) 539 { 540 ASSERT(vsp->vs_type != VMEM_ROTOR); 541 VMEM_DELETE(vsp, a); 542 543 vmem_putseg(vmp, vsp); 544 } 545 546 /* 547 * Add the span [vaddr, vaddr + size) to vmp and update kstats. 548 */ 549 static vmem_seg_t * 550 vmem_span_create(vmem_t *vmp, void *vaddr, size_t size, uint8_t import) 551 { 552 vmem_seg_t *newseg, *span; 553 uintptr_t start = (uintptr_t)vaddr; 554 uintptr_t end = start + size; 555 556 ASSERT(MUTEX_HELD(&vmp->vm_lock)); 557 558 if ((start | end) & (vmp->vm_quantum - 1)) 559 panic("vmem_span_create(%p, %p, %lu): misaligned", 560 vmp, vaddr, size); 561 562 span = vmem_seg_create(vmp, vmp->vm_seg0.vs_aprev, start, end); 563 span->vs_type = VMEM_SPAN; 564 span->vs_import = import; 565 VMEM_INSERT(vmp->vm_seg0.vs_kprev, span, k); 566 567 newseg = vmem_seg_create(vmp, span, start, end); 568 vmem_freelist_insert(vmp, newseg); 569 570 if (import) 571 vmp->vm_kstat.vk_mem_import.value.ui64 += size; 572 vmp->vm_kstat.vk_mem_total.value.ui64 += size; 573 574 return (newseg); 575 } 576 577 /* 578 * Remove span vsp from vmp and update kstats. 579 */ 580 static void 581 vmem_span_destroy(vmem_t *vmp, vmem_seg_t *vsp) 582 { 583 vmem_seg_t *span = vsp->vs_aprev; 584 size_t size = VS_SIZE(vsp); 585 586 ASSERT(MUTEX_HELD(&vmp->vm_lock)); 587 ASSERT(span->vs_type == VMEM_SPAN); 588 589 if (span->vs_import) 590 vmp->vm_kstat.vk_mem_import.value.ui64 -= size; 591 vmp->vm_kstat.vk_mem_total.value.ui64 -= size; 592 593 VMEM_DELETE(span, k); 594 595 vmem_seg_destroy(vmp, vsp); 596 vmem_seg_destroy(vmp, span); 597 } 598 599 /* 600 * Allocate the subrange [addr, addr + size) from segment vsp. 601 * If there are leftovers on either side, place them on the freelist. 602 * Returns a pointer to the segment representing [addr, addr + size). 603 */ 604 static vmem_seg_t * 605 vmem_seg_alloc(vmem_t *vmp, vmem_seg_t *vsp, uintptr_t addr, size_t size) 606 { 607 uintptr_t vs_start = vsp->vs_start; 608 uintptr_t vs_end = vsp->vs_end; 609 size_t vs_size = vs_end - vs_start; 610 size_t realsize = P2ROUNDUP(size, vmp->vm_quantum); 611 uintptr_t addr_end = addr + realsize; 612 613 ASSERT(P2PHASE(vs_start, vmp->vm_quantum) == 0); 614 ASSERT(P2PHASE(addr, vmp->vm_quantum) == 0); 615 ASSERT(vsp->vs_type == VMEM_FREE); 616 ASSERT(addr >= vs_start && addr_end - 1 <= vs_end - 1); 617 ASSERT(addr - 1 <= addr_end - 1); 618 619 /* 620 * If we're allocating from the start of the segment, and the 621 * remainder will be on the same freelist, we can save quite 622 * a bit of work. 623 */ 624 if (P2SAMEHIGHBIT(vs_size, vs_size - realsize) && addr == vs_start) { 625 ASSERT(highbit(vs_size) == highbit(vs_size - realsize)); 626 vsp->vs_start = addr_end; 627 vsp = vmem_seg_create(vmp, vsp->vs_aprev, addr, addr + size); 628 vmem_hash_insert(vmp, vsp); 629 return (vsp); 630 } 631 632 vmem_freelist_delete(vmp, vsp); 633 634 if (vs_end != addr_end) 635 vmem_freelist_insert(vmp, 636 vmem_seg_create(vmp, vsp, addr_end, vs_end)); 637 638 if (vs_start != addr) 639 vmem_freelist_insert(vmp, 640 vmem_seg_create(vmp, vsp->vs_aprev, vs_start, addr)); 641 642 vsp->vs_start = addr; 643 vsp->vs_end = addr + size; 644 645 vmem_hash_insert(vmp, vsp); 646 return (vsp); 647 } 648 649 /* 650 * Returns 1 if we are populating, 0 otherwise. 651 * Call it if we want to prevent recursion from HAT. 652 */ 653 int 654 vmem_is_populator() 655 { 656 return (mutex_owner(&vmem_sleep_lock) == curthread || 657 mutex_owner(&vmem_nosleep_lock) == curthread || 658 mutex_owner(&vmem_panic_lock) == curthread); 659 } 660 661 /* 662 * Populate vmp's segfree list with VMEM_MINFREE vmem_seg_t structures. 663 */ 664 static int 665 vmem_populate(vmem_t *vmp, int vmflag) 666 { 667 char *p; 668 vmem_seg_t *vsp; 669 ssize_t nseg; 670 size_t size; 671 kmutex_t *lp; 672 int i; 673 674 while (vmp->vm_nsegfree < VMEM_MINFREE && 675 (vsp = vmem_getseg_global()) != NULL) 676 vmem_putseg(vmp, vsp); 677 678 if (vmp->vm_nsegfree >= VMEM_MINFREE) 679 return (1); 680 681 /* 682 * If we're already populating, tap the reserve. 683 */ 684 if (vmem_is_populator()) { 685 ASSERT(vmp->vm_cflags & VMC_POPULATOR); 686 return (1); 687 } 688 689 mutex_exit(&vmp->vm_lock); 690 691 if (panic_thread == curthread) 692 lp = &vmem_panic_lock; 693 else if (vmflag & VM_NOSLEEP) 694 lp = &vmem_nosleep_lock; 695 else 696 lp = &vmem_sleep_lock; 697 698 mutex_enter(lp); 699 700 nseg = VMEM_MINFREE + vmem_populators * VMEM_POPULATE_RESERVE; 701 size = P2ROUNDUP(nseg * vmem_seg_size, vmem_seg_arena->vm_quantum); 702 nseg = size / vmem_seg_size; 703 704 /* 705 * The following vmem_alloc() may need to populate vmem_seg_arena 706 * and all the things it imports from. When doing so, it will tap 707 * each arena's reserve to prevent recursion (see the block comment 708 * above the definition of VMEM_POPULATE_RESERVE). 709 */ 710 p = vmem_alloc(vmem_seg_arena, size, vmflag & VM_KMFLAGS); 711 if (p == NULL) { 712 mutex_exit(lp); 713 mutex_enter(&vmp->vm_lock); 714 vmp->vm_kstat.vk_populate_fail.value.ui64++; 715 return (0); 716 } 717 718 /* 719 * Restock the arenas that may have been depleted during population. 720 */ 721 for (i = 0; i < vmem_populators; i++) { 722 mutex_enter(&vmem_populator[i]->vm_lock); 723 while (vmem_populator[i]->vm_nsegfree < VMEM_POPULATE_RESERVE) 724 vmem_putseg(vmem_populator[i], 725 (vmem_seg_t *)(p + --nseg * vmem_seg_size)); 726 mutex_exit(&vmem_populator[i]->vm_lock); 727 } 728 729 mutex_exit(lp); 730 mutex_enter(&vmp->vm_lock); 731 732 /* 733 * Now take our own segments. 734 */ 735 ASSERT(nseg >= VMEM_MINFREE); 736 while (vmp->vm_nsegfree < VMEM_MINFREE) 737 vmem_putseg(vmp, (vmem_seg_t *)(p + --nseg * vmem_seg_size)); 738 739 /* 740 * Give the remainder to charity. 741 */ 742 while (nseg > 0) 743 vmem_putseg_global((vmem_seg_t *)(p + --nseg * vmem_seg_size)); 744 745 return (1); 746 } 747 748 /* 749 * Advance a walker from its previous position to 'afterme'. 750 * Note: may drop and reacquire vmp->vm_lock. 751 */ 752 static void 753 vmem_advance(vmem_t *vmp, vmem_seg_t *walker, vmem_seg_t *afterme) 754 { 755 vmem_seg_t *vprev = walker->vs_aprev; 756 vmem_seg_t *vnext = walker->vs_anext; 757 vmem_seg_t *vsp = NULL; 758 759 VMEM_DELETE(walker, a); 760 761 if (afterme != NULL) 762 VMEM_INSERT(afterme, walker, a); 763 764 /* 765 * The walker segment's presence may have prevented its neighbors 766 * from coalescing. If so, coalesce them now. 767 */ 768 if (vprev->vs_type == VMEM_FREE) { 769 if (vnext->vs_type == VMEM_FREE) { 770 ASSERT(vprev->vs_end == vnext->vs_start); 771 vmem_freelist_delete(vmp, vnext); 772 vmem_freelist_delete(vmp, vprev); 773 vprev->vs_end = vnext->vs_end; 774 vmem_freelist_insert(vmp, vprev); 775 vmem_seg_destroy(vmp, vnext); 776 } 777 vsp = vprev; 778 } else if (vnext->vs_type == VMEM_FREE) { 779 vsp = vnext; 780 } 781 782 /* 783 * vsp could represent a complete imported span, 784 * in which case we must return it to the source. 785 */ 786 if (vsp != NULL && vsp->vs_aprev->vs_import && 787 vmp->vm_source_free != NULL && 788 vsp->vs_aprev->vs_type == VMEM_SPAN && 789 vsp->vs_anext->vs_type == VMEM_SPAN) { 790 void *vaddr = (void *)vsp->vs_start; 791 size_t size = VS_SIZE(vsp); 792 ASSERT(size == VS_SIZE(vsp->vs_aprev)); 793 vmem_freelist_delete(vmp, vsp); 794 vmem_span_destroy(vmp, vsp); 795 mutex_exit(&vmp->vm_lock); 796 vmp->vm_source_free(vmp->vm_source, vaddr, size); 797 mutex_enter(&vmp->vm_lock); 798 } 799 } 800 801 /* 802 * VM_NEXTFIT allocations deliberately cycle through all virtual addresses 803 * in an arena, so that we avoid reusing addresses for as long as possible. 804 * This helps to catch used-after-freed bugs. It's also the perfect policy 805 * for allocating things like process IDs, where we want to cycle through 806 * all values in order. 807 */ 808 static void * 809 vmem_nextfit_alloc(vmem_t *vmp, size_t size, int vmflag) 810 { 811 vmem_seg_t *vsp, *rotor; 812 uintptr_t addr; 813 size_t realsize = P2ROUNDUP(size, vmp->vm_quantum); 814 size_t vs_size; 815 816 mutex_enter(&vmp->vm_lock); 817 818 if (vmp->vm_nsegfree < VMEM_MINFREE && !vmem_populate(vmp, vmflag)) { 819 mutex_exit(&vmp->vm_lock); 820 return (NULL); 821 } 822 823 /* 824 * The common case is that the segment right after the rotor is free, 825 * and large enough that extracting 'size' bytes won't change which 826 * freelist it's on. In this case we can avoid a *lot* of work. 827 * Instead of the normal vmem_seg_alloc(), we just advance the start 828 * address of the victim segment. Instead of moving the rotor, we 829 * create the new segment structure *behind the rotor*, which has 830 * the same effect. And finally, we know we don't have to coalesce 831 * the rotor's neighbors because the new segment lies between them. 832 */ 833 rotor = &vmp->vm_rotor; 834 vsp = rotor->vs_anext; 835 if (vsp->vs_type == VMEM_FREE && (vs_size = VS_SIZE(vsp)) > realsize && 836 P2SAMEHIGHBIT(vs_size, vs_size - realsize)) { 837 ASSERT(highbit(vs_size) == highbit(vs_size - realsize)); 838 addr = vsp->vs_start; 839 vsp->vs_start = addr + realsize; 840 vmem_hash_insert(vmp, 841 vmem_seg_create(vmp, rotor->vs_aprev, addr, addr + size)); 842 mutex_exit(&vmp->vm_lock); 843 return ((void *)addr); 844 } 845 846 /* 847 * Starting at the rotor, look for a segment large enough to 848 * satisfy the allocation. 849 */ 850 for (;;) { 851 vmp->vm_kstat.vk_search.value.ui64++; 852 if (vsp->vs_type == VMEM_FREE && VS_SIZE(vsp) >= size) 853 break; 854 vsp = vsp->vs_anext; 855 if (vsp == rotor) { 856 /* 857 * We've come full circle. One possibility is that the 858 * there's actually enough space, but the rotor itself 859 * is preventing the allocation from succeeding because 860 * it's sitting between two free segments. Therefore, 861 * we advance the rotor and see if that liberates a 862 * suitable segment. 863 */ 864 vmem_advance(vmp, rotor, rotor->vs_anext); 865 vsp = rotor->vs_aprev; 866 if (vsp->vs_type == VMEM_FREE && VS_SIZE(vsp) >= size) 867 break; 868 /* 869 * If there's a lower arena we can import from, or it's 870 * a VM_NOSLEEP allocation, let vmem_xalloc() handle it. 871 * Otherwise, wait until another thread frees something. 872 */ 873 if (vmp->vm_source_alloc != NULL || 874 (vmflag & VM_NOSLEEP)) { 875 mutex_exit(&vmp->vm_lock); 876 return (vmem_xalloc(vmp, size, vmp->vm_quantum, 877 0, 0, NULL, NULL, vmflag & VM_KMFLAGS)); 878 } 879 vmp->vm_kstat.vk_wait.value.ui64++; 880 cv_wait(&vmp->vm_cv, &vmp->vm_lock); 881 vsp = rotor->vs_anext; 882 } 883 } 884 885 /* 886 * We found a segment. Extract enough space to satisfy the allocation. 887 */ 888 addr = vsp->vs_start; 889 vsp = vmem_seg_alloc(vmp, vsp, addr, size); 890 ASSERT(vsp->vs_type == VMEM_ALLOC && 891 vsp->vs_start == addr && vsp->vs_end == addr + size); 892 893 /* 894 * Advance the rotor to right after the newly-allocated segment. 895 * That's where the next VM_NEXTFIT allocation will begin searching. 896 */ 897 vmem_advance(vmp, rotor, vsp); 898 mutex_exit(&vmp->vm_lock); 899 return ((void *)addr); 900 } 901 902 /* 903 * Checks if vmp is guaranteed to have a size-byte buffer somewhere on its 904 * freelist. If size is not a power-of-2, it can return a false-negative. 905 * 906 * Used to decide if a newly imported span is superfluous after re-acquiring 907 * the arena lock. 908 */ 909 static int 910 vmem_canalloc(vmem_t *vmp, size_t size) 911 { 912 int hb; 913 int flist = 0; 914 ASSERT(MUTEX_HELD(&vmp->vm_lock)); 915 916 if ((size & (size - 1)) == 0) 917 flist = lowbit(P2ALIGN(vmp->vm_freemap, size)); 918 else if ((hb = highbit(size)) < VMEM_FREELISTS) 919 flist = lowbit(P2ALIGN(vmp->vm_freemap, 1UL << hb)); 920 921 return (flist); 922 } 923 924 /* 925 * Allocate size bytes at offset phase from an align boundary such that the 926 * resulting segment [addr, addr + size) is a subset of [minaddr, maxaddr) 927 * that does not straddle a nocross-aligned boundary. 928 */ 929 void * 930 vmem_xalloc(vmem_t *vmp, size_t size, size_t align_arg, size_t phase, 931 size_t nocross, void *minaddr, void *maxaddr, int vmflag) 932 { 933 vmem_seg_t *vsp; 934 vmem_seg_t *vbest = NULL; 935 uintptr_t addr, taddr, start, end; 936 uintptr_t align = (align_arg != 0) ? align_arg : vmp->vm_quantum; 937 void *vaddr, *xvaddr = NULL; 938 size_t xsize; 939 int hb, flist, resv; 940 uint32_t mtbf; 941 942 if ((align | phase | nocross) & (vmp->vm_quantum - 1)) 943 panic("vmem_xalloc(%p, %lu, %lu, %lu, %lu, %p, %p, %x): " 944 "parameters not vm_quantum aligned", 945 (void *)vmp, size, align_arg, phase, nocross, 946 minaddr, maxaddr, vmflag); 947 948 if (nocross != 0 && 949 (align > nocross || P2ROUNDUP(phase + size, align) > nocross)) 950 panic("vmem_xalloc(%p, %lu, %lu, %lu, %lu, %p, %p, %x): " 951 "overconstrained allocation", 952 (void *)vmp, size, align_arg, phase, nocross, 953 minaddr, maxaddr, vmflag); 954 955 if (phase >= align || (align & (align - 1)) != 0 || 956 (nocross & (nocross - 1)) != 0) 957 panic("vmem_xalloc(%p, %lu, %lu, %lu, %lu, %p, %p, %x): " 958 "parameters inconsistent or invalid", 959 (void *)vmp, size, align_arg, phase, nocross, 960 minaddr, maxaddr, vmflag); 961 962 if ((mtbf = vmem_mtbf | vmp->vm_mtbf) != 0 && gethrtime() % mtbf == 0 && 963 (vmflag & (VM_NOSLEEP | VM_PANIC)) == VM_NOSLEEP) 964 return (NULL); 965 966 mutex_enter(&vmp->vm_lock); 967 for (;;) { 968 if (vmp->vm_nsegfree < VMEM_MINFREE && 969 !vmem_populate(vmp, vmflag)) 970 break; 971 do_alloc: 972 /* 973 * highbit() returns the highest bit + 1, which is exactly 974 * what we want: we want to search the first freelist whose 975 * members are *definitely* large enough to satisfy our 976 * allocation. However, there are certain cases in which we 977 * want to look at the next-smallest freelist (which *might* 978 * be able to satisfy the allocation): 979 * 980 * (1) The size is exactly a power of 2, in which case 981 * the smaller freelist is always big enough; 982 * 983 * (2) All other freelists are empty; 984 * 985 * (3) We're in the highest possible freelist, which is 986 * always empty (e.g. the 4GB freelist on 32-bit systems); 987 * 988 * (4) We're doing a best-fit or first-fit allocation. 989 */ 990 if ((size & (size - 1)) == 0) { 991 flist = lowbit(P2ALIGN(vmp->vm_freemap, size)); 992 } else { 993 hb = highbit(size); 994 if ((vmp->vm_freemap >> hb) == 0 || 995 hb == VMEM_FREELISTS || 996 (vmflag & (VM_BESTFIT | VM_FIRSTFIT))) 997 hb--; 998 flist = lowbit(P2ALIGN(vmp->vm_freemap, 1UL << hb)); 999 } 1000 1001 for (vbest = NULL, vsp = (flist == 0) ? NULL : 1002 vmp->vm_freelist[flist - 1].vs_knext; 1003 vsp != NULL; vsp = vsp->vs_knext) { 1004 vmp->vm_kstat.vk_search.value.ui64++; 1005 if (vsp->vs_start == 0) { 1006 /* 1007 * We're moving up to a larger freelist, 1008 * so if we've already found a candidate, 1009 * the fit can't possibly get any better. 1010 */ 1011 if (vbest != NULL) 1012 break; 1013 /* 1014 * Find the next non-empty freelist. 1015 */ 1016 flist = lowbit(P2ALIGN(vmp->vm_freemap, 1017 VS_SIZE(vsp))); 1018 if (flist-- == 0) 1019 break; 1020 vsp = (vmem_seg_t *)&vmp->vm_freelist[flist]; 1021 ASSERT(vsp->vs_knext->vs_type == VMEM_FREE); 1022 continue; 1023 } 1024 if (vsp->vs_end - 1 < (uintptr_t)minaddr) 1025 continue; 1026 if (vsp->vs_start > (uintptr_t)maxaddr - 1) 1027 continue; 1028 start = MAX(vsp->vs_start, (uintptr_t)minaddr); 1029 end = MIN(vsp->vs_end - 1, (uintptr_t)maxaddr - 1) + 1; 1030 taddr = P2PHASEUP(start, align, phase); 1031 if (P2CROSS(taddr, taddr + size - 1, nocross)) 1032 taddr += 1033 P2ROUNDUP(P2NPHASE(taddr, nocross), align); 1034 if ((taddr - start) + size > end - start || 1035 (vbest != NULL && VS_SIZE(vsp) >= VS_SIZE(vbest))) 1036 continue; 1037 vbest = vsp; 1038 addr = taddr; 1039 if (!(vmflag & VM_BESTFIT) || VS_SIZE(vbest) == size) 1040 break; 1041 } 1042 if (vbest != NULL) 1043 break; 1044 ASSERT(xvaddr == NULL); 1045 if (size == 0) 1046 panic("vmem_xalloc(): size == 0"); 1047 if (vmp->vm_source_alloc != NULL && nocross == 0 && 1048 minaddr == NULL && maxaddr == NULL) { 1049 size_t aneeded, asize; 1050 size_t aquantum = MAX(vmp->vm_quantum, 1051 vmp->vm_source->vm_quantum); 1052 size_t aphase = phase; 1053 if (align > aquantum) { 1054 aphase = (P2PHASE(phase, aquantum) != 0) ? 1055 align - vmp->vm_quantum : align - aquantum; 1056 ASSERT(aphase >= phase); 1057 } 1058 aneeded = MAX(size + aphase, vmp->vm_min_import); 1059 asize = P2ROUNDUP(aneeded, aquantum); 1060 1061 /* 1062 * Determine how many segment structures we'll consume. 1063 * The calculation must be precise because if we're 1064 * here on behalf of vmem_populate(), we are taking 1065 * segments from a very limited reserve. 1066 */ 1067 if (size == asize && !(vmp->vm_cflags & VMC_XALLOC)) 1068 resv = VMEM_SEGS_PER_SPAN_CREATE + 1069 VMEM_SEGS_PER_EXACT_ALLOC; 1070 else if (phase == 0 && 1071 align <= vmp->vm_source->vm_quantum) 1072 resv = VMEM_SEGS_PER_SPAN_CREATE + 1073 VMEM_SEGS_PER_LEFT_ALLOC; 1074 else 1075 resv = VMEM_SEGS_PER_ALLOC_MAX; 1076 1077 ASSERT(vmp->vm_nsegfree >= resv); 1078 vmp->vm_nsegfree -= resv; /* reserve our segs */ 1079 mutex_exit(&vmp->vm_lock); 1080 if (vmp->vm_cflags & VMC_XALLOC) { 1081 size_t oasize = asize; 1082 vaddr = ((vmem_ximport_t *) 1083 vmp->vm_source_alloc)(vmp->vm_source, 1084 &asize, vmflag & VM_KMFLAGS); 1085 ASSERT(asize >= oasize); 1086 ASSERT(P2PHASE(asize, 1087 vmp->vm_source->vm_quantum) == 0); 1088 } else { 1089 vaddr = vmp->vm_source_alloc(vmp->vm_source, 1090 asize, vmflag & VM_KMFLAGS); 1091 } 1092 mutex_enter(&vmp->vm_lock); 1093 vmp->vm_nsegfree += resv; /* claim reservation */ 1094 aneeded = size + align - vmp->vm_quantum; 1095 aneeded = P2ROUNDUP(aneeded, vmp->vm_quantum); 1096 if (vaddr != NULL) { 1097 /* 1098 * Since we dropped the vmem lock while 1099 * calling the import function, other 1100 * threads could have imported space 1101 * and made our import unnecessary. In 1102 * order to save space, we return 1103 * excess imports immediately. 1104 */ 1105 if (asize > aneeded && 1106 vmp->vm_source_free != NULL && 1107 vmem_canalloc(vmp, aneeded)) { 1108 ASSERT(resv >= 1109 VMEM_SEGS_PER_MIDDLE_ALLOC); 1110 xvaddr = vaddr; 1111 xsize = asize; 1112 goto do_alloc; 1113 } 1114 vbest = vmem_span_create(vmp, vaddr, asize, 1); 1115 addr = P2PHASEUP(vbest->vs_start, align, phase); 1116 break; 1117 } else if (vmem_canalloc(vmp, aneeded)) { 1118 /* 1119 * Our import failed, but another thread 1120 * added sufficient free memory to the arena 1121 * to satisfy our request. Go back and 1122 * grab it. 1123 */ 1124 ASSERT(resv >= VMEM_SEGS_PER_MIDDLE_ALLOC); 1125 goto do_alloc; 1126 } 1127 } 1128 1129 /* 1130 * If the requestor chooses to fail the allocation attempt 1131 * rather than reap wait and retry - get out of the loop. 1132 */ 1133 if (vmflag & VM_ABORT) 1134 break; 1135 mutex_exit(&vmp->vm_lock); 1136 if (vmp->vm_cflags & VMC_IDENTIFIER) 1137 kmem_reap_idspace(); 1138 else 1139 kmem_reap(); 1140 mutex_enter(&vmp->vm_lock); 1141 if (vmflag & VM_NOSLEEP) 1142 break; 1143 vmp->vm_kstat.vk_wait.value.ui64++; 1144 cv_wait(&vmp->vm_cv, &vmp->vm_lock); 1145 } 1146 if (vbest != NULL) { 1147 ASSERT(vbest->vs_type == VMEM_FREE); 1148 ASSERT(vbest->vs_knext != vbest); 1149 (void) vmem_seg_alloc(vmp, vbest, addr, size); 1150 mutex_exit(&vmp->vm_lock); 1151 if (xvaddr) 1152 vmp->vm_source_free(vmp->vm_source, xvaddr, xsize); 1153 ASSERT(P2PHASE(addr, align) == phase); 1154 ASSERT(!P2CROSS(addr, addr + size - 1, nocross)); 1155 ASSERT(addr >= (uintptr_t)minaddr); 1156 ASSERT(addr + size - 1 <= (uintptr_t)maxaddr - 1); 1157 return ((void *)addr); 1158 } 1159 vmp->vm_kstat.vk_fail.value.ui64++; 1160 mutex_exit(&vmp->vm_lock); 1161 if (vmflag & VM_PANIC) 1162 panic("vmem_xalloc(%p, %lu, %lu, %lu, %lu, %p, %p, %x): " 1163 "cannot satisfy mandatory allocation", 1164 (void *)vmp, size, align_arg, phase, nocross, 1165 minaddr, maxaddr, vmflag); 1166 ASSERT(xvaddr == NULL); 1167 return (NULL); 1168 } 1169 1170 /* 1171 * Free the segment [vaddr, vaddr + size), where vaddr was a constrained 1172 * allocation. vmem_xalloc() and vmem_xfree() must always be paired because 1173 * both routines bypass the quantum caches. 1174 */ 1175 void 1176 vmem_xfree(vmem_t *vmp, void *vaddr, size_t size) 1177 { 1178 vmem_seg_t *vsp, *vnext, *vprev; 1179 1180 mutex_enter(&vmp->vm_lock); 1181 1182 vsp = vmem_hash_delete(vmp, (uintptr_t)vaddr, size); 1183 vsp->vs_end = P2ROUNDUP(vsp->vs_end, vmp->vm_quantum); 1184 1185 /* 1186 * Attempt to coalesce with the next segment. 1187 */ 1188 vnext = vsp->vs_anext; 1189 if (vnext->vs_type == VMEM_FREE) { 1190 ASSERT(vsp->vs_end == vnext->vs_start); 1191 vmem_freelist_delete(vmp, vnext); 1192 vsp->vs_end = vnext->vs_end; 1193 vmem_seg_destroy(vmp, vnext); 1194 } 1195 1196 /* 1197 * Attempt to coalesce with the previous segment. 1198 */ 1199 vprev = vsp->vs_aprev; 1200 if (vprev->vs_type == VMEM_FREE) { 1201 ASSERT(vprev->vs_end == vsp->vs_start); 1202 vmem_freelist_delete(vmp, vprev); 1203 vprev->vs_end = vsp->vs_end; 1204 vmem_seg_destroy(vmp, vsp); 1205 vsp = vprev; 1206 } 1207 1208 /* 1209 * If the entire span is free, return it to the source. 1210 */ 1211 if (vsp->vs_aprev->vs_import && vmp->vm_source_free != NULL && 1212 vsp->vs_aprev->vs_type == VMEM_SPAN && 1213 vsp->vs_anext->vs_type == VMEM_SPAN) { 1214 vaddr = (void *)vsp->vs_start; 1215 size = VS_SIZE(vsp); 1216 ASSERT(size == VS_SIZE(vsp->vs_aprev)); 1217 vmem_span_destroy(vmp, vsp); 1218 mutex_exit(&vmp->vm_lock); 1219 vmp->vm_source_free(vmp->vm_source, vaddr, size); 1220 } else { 1221 vmem_freelist_insert(vmp, vsp); 1222 mutex_exit(&vmp->vm_lock); 1223 } 1224 } 1225 1226 /* 1227 * Allocate size bytes from arena vmp. Returns the allocated address 1228 * on success, NULL on failure. vmflag specifies VM_SLEEP or VM_NOSLEEP, 1229 * and may also specify best-fit, first-fit, or next-fit allocation policy 1230 * instead of the default instant-fit policy. VM_SLEEP allocations are 1231 * guaranteed to succeed. 1232 */ 1233 void * 1234 vmem_alloc(vmem_t *vmp, size_t size, int vmflag) 1235 { 1236 vmem_seg_t *vsp; 1237 uintptr_t addr; 1238 int hb; 1239 int flist = 0; 1240 uint32_t mtbf; 1241 1242 if (size - 1 < vmp->vm_qcache_max) 1243 return (kmem_cache_alloc(vmp->vm_qcache[(size - 1) >> 1244 vmp->vm_qshift], vmflag & VM_KMFLAGS)); 1245 1246 if ((mtbf = vmem_mtbf | vmp->vm_mtbf) != 0 && gethrtime() % mtbf == 0 && 1247 (vmflag & (VM_NOSLEEP | VM_PANIC)) == VM_NOSLEEP) 1248 return (NULL); 1249 1250 if (vmflag & VM_NEXTFIT) 1251 return (vmem_nextfit_alloc(vmp, size, vmflag)); 1252 1253 if (vmflag & (VM_BESTFIT | VM_FIRSTFIT)) 1254 return (vmem_xalloc(vmp, size, vmp->vm_quantum, 0, 0, 1255 NULL, NULL, vmflag)); 1256 1257 /* 1258 * Unconstrained instant-fit allocation from the segment list. 1259 */ 1260 mutex_enter(&vmp->vm_lock); 1261 1262 if (vmp->vm_nsegfree >= VMEM_MINFREE || vmem_populate(vmp, vmflag)) { 1263 if ((size & (size - 1)) == 0) 1264 flist = lowbit(P2ALIGN(vmp->vm_freemap, size)); 1265 else if ((hb = highbit(size)) < VMEM_FREELISTS) 1266 flist = lowbit(P2ALIGN(vmp->vm_freemap, 1UL << hb)); 1267 } 1268 1269 if (flist-- == 0) { 1270 mutex_exit(&vmp->vm_lock); 1271 return (vmem_xalloc(vmp, size, vmp->vm_quantum, 1272 0, 0, NULL, NULL, vmflag)); 1273 } 1274 1275 ASSERT(size <= (1UL << flist)); 1276 vsp = vmp->vm_freelist[flist].vs_knext; 1277 addr = vsp->vs_start; 1278 (void) vmem_seg_alloc(vmp, vsp, addr, size); 1279 mutex_exit(&vmp->vm_lock); 1280 return ((void *)addr); 1281 } 1282 1283 /* 1284 * Free the segment [vaddr, vaddr + size). 1285 */ 1286 void 1287 vmem_free(vmem_t *vmp, void *vaddr, size_t size) 1288 { 1289 if (size - 1 < vmp->vm_qcache_max) 1290 kmem_cache_free(vmp->vm_qcache[(size - 1) >> vmp->vm_qshift], 1291 vaddr); 1292 else 1293 vmem_xfree(vmp, vaddr, size); 1294 } 1295 1296 /* 1297 * Determine whether arena vmp contains the segment [vaddr, vaddr + size). 1298 */ 1299 int 1300 vmem_contains(vmem_t *vmp, void *vaddr, size_t size) 1301 { 1302 uintptr_t start = (uintptr_t)vaddr; 1303 uintptr_t end = start + size; 1304 vmem_seg_t *vsp; 1305 vmem_seg_t *seg0 = &vmp->vm_seg0; 1306 1307 mutex_enter(&vmp->vm_lock); 1308 vmp->vm_kstat.vk_contains.value.ui64++; 1309 for (vsp = seg0->vs_knext; vsp != seg0; vsp = vsp->vs_knext) { 1310 vmp->vm_kstat.vk_contains_search.value.ui64++; 1311 ASSERT(vsp->vs_type == VMEM_SPAN); 1312 if (start >= vsp->vs_start && end - 1 <= vsp->vs_end - 1) 1313 break; 1314 } 1315 mutex_exit(&vmp->vm_lock); 1316 return (vsp != seg0); 1317 } 1318 1319 /* 1320 * Add the span [vaddr, vaddr + size) to arena vmp. 1321 */ 1322 void * 1323 vmem_add(vmem_t *vmp, void *vaddr, size_t size, int vmflag) 1324 { 1325 if (vaddr == NULL || size == 0) 1326 panic("vmem_add(%p, %p, %lu): bad arguments", vmp, vaddr, size); 1327 1328 ASSERT(!vmem_contains(vmp, vaddr, size)); 1329 1330 mutex_enter(&vmp->vm_lock); 1331 if (vmem_populate(vmp, vmflag)) 1332 (void) vmem_span_create(vmp, vaddr, size, 0); 1333 else 1334 vaddr = NULL; 1335 mutex_exit(&vmp->vm_lock); 1336 return (vaddr); 1337 } 1338 1339 /* 1340 * Walk the vmp arena, applying func to each segment matching typemask. 1341 * If VMEM_REENTRANT is specified, the arena lock is dropped across each 1342 * call to func(); otherwise, it is held for the duration of vmem_walk() 1343 * to ensure a consistent snapshot. Note that VMEM_REENTRANT callbacks 1344 * are *not* necessarily consistent, so they may only be used when a hint 1345 * is adequate. 1346 */ 1347 void 1348 vmem_walk(vmem_t *vmp, int typemask, 1349 void (*func)(void *, void *, size_t), void *arg) 1350 { 1351 vmem_seg_t *vsp; 1352 vmem_seg_t *seg0 = &vmp->vm_seg0; 1353 vmem_seg_t walker; 1354 1355 if (typemask & VMEM_WALKER) 1356 return; 1357 1358 bzero(&walker, sizeof (walker)); 1359 walker.vs_type = VMEM_WALKER; 1360 1361 mutex_enter(&vmp->vm_lock); 1362 VMEM_INSERT(seg0, &walker, a); 1363 for (vsp = seg0->vs_anext; vsp != seg0; vsp = vsp->vs_anext) { 1364 if (vsp->vs_type & typemask) { 1365 void *start = (void *)vsp->vs_start; 1366 size_t size = VS_SIZE(vsp); 1367 if (typemask & VMEM_REENTRANT) { 1368 vmem_advance(vmp, &walker, vsp); 1369 mutex_exit(&vmp->vm_lock); 1370 func(arg, start, size); 1371 mutex_enter(&vmp->vm_lock); 1372 vsp = &walker; 1373 } else { 1374 func(arg, start, size); 1375 } 1376 } 1377 } 1378 vmem_advance(vmp, &walker, NULL); 1379 mutex_exit(&vmp->vm_lock); 1380 } 1381 1382 /* 1383 * Return the total amount of memory whose type matches typemask. Thus: 1384 * 1385 * typemask VMEM_ALLOC yields total memory allocated (in use). 1386 * typemask VMEM_FREE yields total memory free (available). 1387 * typemask (VMEM_ALLOC | VMEM_FREE) yields total arena size. 1388 */ 1389 size_t 1390 vmem_size(vmem_t *vmp, int typemask) 1391 { 1392 uint64_t size = 0; 1393 1394 if (typemask & VMEM_ALLOC) 1395 size += vmp->vm_kstat.vk_mem_inuse.value.ui64; 1396 if (typemask & VMEM_FREE) 1397 size += vmp->vm_kstat.vk_mem_total.value.ui64 - 1398 vmp->vm_kstat.vk_mem_inuse.value.ui64; 1399 return ((size_t)size); 1400 } 1401 1402 /* 1403 * Create an arena called name whose initial span is [base, base + size). 1404 * The arena's natural unit of currency is quantum, so vmem_alloc() 1405 * guarantees quantum-aligned results. The arena may import new spans 1406 * by invoking afunc() on source, and may return those spans by invoking 1407 * ffunc() on source. To make small allocations fast and scalable, 1408 * the arena offers high-performance caching for each integer multiple 1409 * of quantum up to qcache_max. 1410 */ 1411 static vmem_t * 1412 vmem_create_common(const char *name, void *base, size_t size, size_t quantum, 1413 void *(*afunc)(vmem_t *, size_t, int), 1414 void (*ffunc)(vmem_t *, void *, size_t), 1415 vmem_t *source, size_t qcache_max, int vmflag) 1416 { 1417 int i; 1418 size_t nqcache; 1419 vmem_t *vmp, *cur, **vmpp; 1420 vmem_seg_t *vsp; 1421 vmem_freelist_t *vfp; 1422 uint32_t id = atomic_add_32_nv(&vmem_id, 1); 1423 1424 if (vmem_vmem_arena != NULL) { 1425 vmp = vmem_alloc(vmem_vmem_arena, sizeof (vmem_t), 1426 vmflag & VM_KMFLAGS); 1427 } else { 1428 ASSERT(id <= VMEM_INITIAL); 1429 vmp = &vmem0[id - 1]; 1430 } 1431 1432 /* An identifier arena must inherit from another identifier arena */ 1433 ASSERT(source == NULL || ((source->vm_cflags & VMC_IDENTIFIER) == 1434 (vmflag & VMC_IDENTIFIER))); 1435 1436 if (vmp == NULL) 1437 return (NULL); 1438 bzero(vmp, sizeof (vmem_t)); 1439 1440 (void) snprintf(vmp->vm_name, VMEM_NAMELEN, "%s", name); 1441 mutex_init(&vmp->vm_lock, NULL, MUTEX_DEFAULT, NULL); 1442 cv_init(&vmp->vm_cv, NULL, CV_DEFAULT, NULL); 1443 vmp->vm_cflags = vmflag; 1444 vmflag &= VM_KMFLAGS; 1445 1446 vmp->vm_quantum = quantum; 1447 vmp->vm_qshift = highbit(quantum) - 1; 1448 nqcache = MIN(qcache_max >> vmp->vm_qshift, VMEM_NQCACHE_MAX); 1449 1450 for (i = 0; i <= VMEM_FREELISTS; i++) { 1451 vfp = &vmp->vm_freelist[i]; 1452 vfp->vs_end = 1UL << i; 1453 vfp->vs_knext = (vmem_seg_t *)(vfp + 1); 1454 vfp->vs_kprev = (vmem_seg_t *)(vfp - 1); 1455 } 1456 1457 vmp->vm_freelist[0].vs_kprev = NULL; 1458 vmp->vm_freelist[VMEM_FREELISTS].vs_knext = NULL; 1459 vmp->vm_freelist[VMEM_FREELISTS].vs_end = 0; 1460 vmp->vm_hash_table = vmp->vm_hash0; 1461 vmp->vm_hash_mask = VMEM_HASH_INITIAL - 1; 1462 vmp->vm_hash_shift = highbit(vmp->vm_hash_mask); 1463 1464 vsp = &vmp->vm_seg0; 1465 vsp->vs_anext = vsp; 1466 vsp->vs_aprev = vsp; 1467 vsp->vs_knext = vsp; 1468 vsp->vs_kprev = vsp; 1469 vsp->vs_type = VMEM_SPAN; 1470 1471 vsp = &vmp->vm_rotor; 1472 vsp->vs_type = VMEM_ROTOR; 1473 VMEM_INSERT(&vmp->vm_seg0, vsp, a); 1474 1475 bcopy(&vmem_kstat_template, &vmp->vm_kstat, sizeof (vmem_kstat_t)); 1476 1477 vmp->vm_id = id; 1478 if (source != NULL) 1479 vmp->vm_kstat.vk_source_id.value.ui32 = source->vm_id; 1480 vmp->vm_source = source; 1481 vmp->vm_source_alloc = afunc; 1482 vmp->vm_source_free = ffunc; 1483 1484 /* 1485 * Some arenas (like vmem_metadata and kmem_metadata) cannot 1486 * use quantum caching to lower fragmentation. Instead, we 1487 * increase their imports, giving a similar effect. 1488 */ 1489 if (vmp->vm_cflags & VMC_NO_QCACHE) { 1490 vmp->vm_min_import = 1491 VMEM_QCACHE_SLABSIZE(nqcache << vmp->vm_qshift); 1492 nqcache = 0; 1493 } 1494 1495 if (nqcache != 0) { 1496 ASSERT(!(vmflag & VM_NOSLEEP)); 1497 vmp->vm_qcache_max = nqcache << vmp->vm_qshift; 1498 for (i = 0; i < nqcache; i++) { 1499 char buf[VMEM_NAMELEN + 21]; 1500 (void) sprintf(buf, "%s_%lu", vmp->vm_name, 1501 (i + 1) * quantum); 1502 vmp->vm_qcache[i] = kmem_cache_create(buf, 1503 (i + 1) * quantum, quantum, NULL, NULL, NULL, 1504 NULL, vmp, KMC_QCACHE | KMC_NOTOUCH); 1505 } 1506 } 1507 1508 if ((vmp->vm_ksp = kstat_create("vmem", vmp->vm_id, vmp->vm_name, 1509 "vmem", KSTAT_TYPE_NAMED, sizeof (vmem_kstat_t) / 1510 sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL)) != NULL) { 1511 vmp->vm_ksp->ks_data = &vmp->vm_kstat; 1512 kstat_install(vmp->vm_ksp); 1513 } 1514 1515 mutex_enter(&vmem_list_lock); 1516 vmpp = &vmem_list; 1517 while ((cur = *vmpp) != NULL) 1518 vmpp = &cur->vm_next; 1519 *vmpp = vmp; 1520 mutex_exit(&vmem_list_lock); 1521 1522 if (vmp->vm_cflags & VMC_POPULATOR) { 1523 ASSERT(vmem_populators < VMEM_INITIAL); 1524 vmem_populator[atomic_add_32_nv(&vmem_populators, 1) - 1] = vmp; 1525 mutex_enter(&vmp->vm_lock); 1526 (void) vmem_populate(vmp, vmflag | VM_PANIC); 1527 mutex_exit(&vmp->vm_lock); 1528 } 1529 1530 if ((base || size) && vmem_add(vmp, base, size, vmflag) == NULL) { 1531 vmem_destroy(vmp); 1532 return (NULL); 1533 } 1534 1535 return (vmp); 1536 } 1537 1538 vmem_t * 1539 vmem_xcreate(const char *name, void *base, size_t size, size_t quantum, 1540 vmem_ximport_t *afunc, vmem_free_t *ffunc, vmem_t *source, 1541 size_t qcache_max, int vmflag) 1542 { 1543 ASSERT(!(vmflag & (VMC_POPULATOR | VMC_XALLOC))); 1544 vmflag &= ~(VMC_POPULATOR | VMC_XALLOC); 1545 1546 return (vmem_create_common(name, base, size, quantum, 1547 (vmem_alloc_t *)afunc, ffunc, source, qcache_max, 1548 vmflag | VMC_XALLOC)); 1549 } 1550 1551 vmem_t * 1552 vmem_create(const char *name, void *base, size_t size, size_t quantum, 1553 vmem_alloc_t *afunc, vmem_free_t *ffunc, vmem_t *source, 1554 size_t qcache_max, int vmflag) 1555 { 1556 ASSERT(!(vmflag & VMC_XALLOC)); 1557 vmflag &= ~VMC_XALLOC; 1558 1559 return (vmem_create_common(name, base, size, quantum, 1560 afunc, ffunc, source, qcache_max, vmflag)); 1561 } 1562 1563 /* 1564 * Destroy arena vmp. 1565 */ 1566 void 1567 vmem_destroy(vmem_t *vmp) 1568 { 1569 vmem_t *cur, **vmpp; 1570 vmem_seg_t *seg0 = &vmp->vm_seg0; 1571 vmem_seg_t *vsp; 1572 size_t leaked; 1573 int i; 1574 1575 mutex_enter(&vmem_list_lock); 1576 vmpp = &vmem_list; 1577 while ((cur = *vmpp) != vmp) 1578 vmpp = &cur->vm_next; 1579 *vmpp = vmp->vm_next; 1580 mutex_exit(&vmem_list_lock); 1581 1582 for (i = 0; i < VMEM_NQCACHE_MAX; i++) 1583 if (vmp->vm_qcache[i]) 1584 kmem_cache_destroy(vmp->vm_qcache[i]); 1585 1586 leaked = vmem_size(vmp, VMEM_ALLOC); 1587 if (leaked != 0) 1588 cmn_err(CE_WARN, "vmem_destroy('%s'): leaked %lu %s", 1589 vmp->vm_name, leaked, (vmp->vm_cflags & VMC_IDENTIFIER) ? 1590 "identifiers" : "bytes"); 1591 1592 if (vmp->vm_hash_table != vmp->vm_hash0) 1593 vmem_free(vmem_hash_arena, vmp->vm_hash_table, 1594 (vmp->vm_hash_mask + 1) * sizeof (void *)); 1595 1596 /* 1597 * Give back the segment structures for anything that's left in the 1598 * arena, e.g. the primary spans and their free segments. 1599 */ 1600 VMEM_DELETE(&vmp->vm_rotor, a); 1601 for (vsp = seg0->vs_anext; vsp != seg0; vsp = vsp->vs_anext) 1602 vmem_putseg_global(vsp); 1603 1604 while (vmp->vm_nsegfree > 0) 1605 vmem_putseg_global(vmem_getseg(vmp)); 1606 1607 kstat_delete(vmp->vm_ksp); 1608 1609 mutex_destroy(&vmp->vm_lock); 1610 cv_destroy(&vmp->vm_cv); 1611 vmem_free(vmem_vmem_arena, vmp, sizeof (vmem_t)); 1612 } 1613 1614 /* 1615 * Resize vmp's hash table to keep the average lookup depth near 1.0. 1616 */ 1617 static void 1618 vmem_hash_rescale(vmem_t *vmp) 1619 { 1620 vmem_seg_t **old_table, **new_table, *vsp; 1621 size_t old_size, new_size, h, nseg; 1622 1623 nseg = (size_t)(vmp->vm_kstat.vk_alloc.value.ui64 - 1624 vmp->vm_kstat.vk_free.value.ui64); 1625 1626 new_size = MAX(VMEM_HASH_INITIAL, 1 << (highbit(3 * nseg + 4) - 2)); 1627 old_size = vmp->vm_hash_mask + 1; 1628 1629 if ((old_size >> 1) <= new_size && new_size <= (old_size << 1)) 1630 return; 1631 1632 new_table = vmem_alloc(vmem_hash_arena, new_size * sizeof (void *), 1633 VM_NOSLEEP); 1634 if (new_table == NULL) 1635 return; 1636 bzero(new_table, new_size * sizeof (void *)); 1637 1638 mutex_enter(&vmp->vm_lock); 1639 1640 old_size = vmp->vm_hash_mask + 1; 1641 old_table = vmp->vm_hash_table; 1642 1643 vmp->vm_hash_mask = new_size - 1; 1644 vmp->vm_hash_table = new_table; 1645 vmp->vm_hash_shift = highbit(vmp->vm_hash_mask); 1646 1647 for (h = 0; h < old_size; h++) { 1648 vsp = old_table[h]; 1649 while (vsp != NULL) { 1650 uintptr_t addr = vsp->vs_start; 1651 vmem_seg_t *next_vsp = vsp->vs_knext; 1652 vmem_seg_t **hash_bucket = VMEM_HASH(vmp, addr); 1653 vsp->vs_knext = *hash_bucket; 1654 *hash_bucket = vsp; 1655 vsp = next_vsp; 1656 } 1657 } 1658 1659 mutex_exit(&vmp->vm_lock); 1660 1661 if (old_table != vmp->vm_hash0) 1662 vmem_free(vmem_hash_arena, old_table, 1663 old_size * sizeof (void *)); 1664 } 1665 1666 /* 1667 * Perform periodic maintenance on all vmem arenas. 1668 */ 1669 void 1670 vmem_update(void *dummy) 1671 { 1672 vmem_t *vmp; 1673 1674 mutex_enter(&vmem_list_lock); 1675 for (vmp = vmem_list; vmp != NULL; vmp = vmp->vm_next) { 1676 /* 1677 * If threads are waiting for resources, wake them up 1678 * periodically so they can issue another kmem_reap() 1679 * to reclaim resources cached by the slab allocator. 1680 */ 1681 cv_broadcast(&vmp->vm_cv); 1682 1683 /* 1684 * Rescale the hash table to keep the hash chains short. 1685 */ 1686 vmem_hash_rescale(vmp); 1687 } 1688 mutex_exit(&vmem_list_lock); 1689 1690 (void) timeout(vmem_update, dummy, vmem_update_interval * hz); 1691 } 1692 1693 /* 1694 * Prepare vmem for use. 1695 */ 1696 vmem_t * 1697 vmem_init(const char *heap_name, 1698 void *heap_start, size_t heap_size, size_t heap_quantum, 1699 void *(*heap_alloc)(vmem_t *, size_t, int), 1700 void (*heap_free)(vmem_t *, void *, size_t)) 1701 { 1702 uint32_t id; 1703 int nseg = VMEM_SEG_INITIAL; 1704 vmem_t *heap; 1705 1706 while (--nseg >= 0) 1707 vmem_putseg_global(&vmem_seg0[nseg]); 1708 1709 heap = vmem_create(heap_name, 1710 heap_start, heap_size, heap_quantum, 1711 NULL, NULL, NULL, 0, 1712 VM_SLEEP | VMC_POPULATOR); 1713 1714 vmem_metadata_arena = vmem_create("vmem_metadata", 1715 NULL, 0, heap_quantum, 1716 vmem_alloc, vmem_free, heap, 8 * heap_quantum, 1717 VM_SLEEP | VMC_POPULATOR | VMC_NO_QCACHE); 1718 1719 vmem_seg_arena = vmem_create("vmem_seg", 1720 NULL, 0, heap_quantum, 1721 heap_alloc, heap_free, vmem_metadata_arena, 0, 1722 VM_SLEEP | VMC_POPULATOR); 1723 1724 vmem_hash_arena = vmem_create("vmem_hash", 1725 NULL, 0, 8, 1726 heap_alloc, heap_free, vmem_metadata_arena, 0, 1727 VM_SLEEP); 1728 1729 vmem_vmem_arena = vmem_create("vmem_vmem", 1730 vmem0, sizeof (vmem0), 1, 1731 heap_alloc, heap_free, vmem_metadata_arena, 0, 1732 VM_SLEEP); 1733 1734 for (id = 0; id < vmem_id; id++) 1735 (void) vmem_xalloc(vmem_vmem_arena, sizeof (vmem_t), 1736 1, 0, 0, &vmem0[id], &vmem0[id + 1], 1737 VM_NOSLEEP | VM_BESTFIT | VM_PANIC); 1738 1739 return (heap); 1740 } 1741