1 /*- 2 * SPDX-License-Identifier: (BSD-3-Clause AND MIT-CMU) 3 * 4 * Copyright (c) 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * The Mach Operating System project at Carnegie-Mellon University. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94 35 * 36 * 37 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 38 * All rights reserved. 39 * 40 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 41 * 42 * Permission to use, copy, modify and distribute this software and 43 * its documentation is hereby granted, provided that both the copyright 44 * notice and this permission notice appear in all copies of the 45 * software, derivative works or modified versions, and any portions 46 * thereof, and that both notices appear in supporting documentation. 47 * 48 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 49 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 50 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 51 * 52 * Carnegie Mellon requests users of this software to return to 53 * 54 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 55 * School of Computer Science 56 * Carnegie Mellon University 57 * Pittsburgh PA 15213-3890 58 * 59 * any improvements or extensions that they make and grant Carnegie the 60 * rights to redistribute these changes. 61 */ 62 63 /* 64 * Virtual memory mapping module. 65 */ 66 67 #include <sys/cdefs.h> 68 __FBSDID("$FreeBSD$"); 69 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/kernel.h> 73 #include <sys/ktr.h> 74 #include <sys/lock.h> 75 #include <sys/mutex.h> 76 #include <sys/proc.h> 77 #include <sys/vmmeter.h> 78 #include <sys/mman.h> 79 #include <sys/vnode.h> 80 #include <sys/racct.h> 81 #include <sys/resourcevar.h> 82 #include <sys/rwlock.h> 83 #include <sys/file.h> 84 #include <sys/sysctl.h> 85 #include <sys/sysent.h> 86 #include <sys/shm.h> 87 88 #include <vm/vm.h> 89 #include <vm/vm_param.h> 90 #include <vm/pmap.h> 91 #include <vm/vm_map.h> 92 #include <vm/vm_page.h> 93 #include <vm/vm_object.h> 94 #include <vm/vm_pager.h> 95 #include <vm/vm_kern.h> 96 #include <vm/vm_extern.h> 97 #include <vm/vnode_pager.h> 98 #include <vm/swap_pager.h> 99 #include <vm/uma.h> 100 101 /* 102 * Virtual memory maps provide for the mapping, protection, 103 * and sharing of virtual memory objects. In addition, 104 * this module provides for an efficient virtual copy of 105 * memory from one map to another. 106 * 107 * Synchronization is required prior to most operations. 108 * 109 * Maps consist of an ordered doubly-linked list of simple 110 * entries; a self-adjusting binary search tree of these 111 * entries is used to speed up lookups. 112 * 113 * Since portions of maps are specified by start/end addresses, 114 * which may not align with existing map entries, all 115 * routines merely "clip" entries to these start/end values. 116 * [That is, an entry is split into two, bordering at a 117 * start or end value.] Note that these clippings may not 118 * always be necessary (as the two resulting entries are then 119 * not changed); however, the clipping is done for convenience. 120 * 121 * As mentioned above, virtual copy operations are performed 122 * by copying VM object references from one map to 123 * another, and then marking both regions as copy-on-write. 124 */ 125 126 static struct mtx map_sleep_mtx; 127 static uma_zone_t mapentzone; 128 static uma_zone_t kmapentzone; 129 static uma_zone_t mapzone; 130 static uma_zone_t vmspace_zone; 131 static int vmspace_zinit(void *mem, int size, int flags); 132 static int vm_map_zinit(void *mem, int ize, int flags); 133 static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, 134 vm_offset_t max); 135 static int vm_map_alignspace(vm_map_t map, vm_object_t object, 136 vm_ooffset_t offset, vm_offset_t *addr, vm_size_t length, 137 vm_offset_t max_addr, vm_offset_t alignment); 138 static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map); 139 static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry); 140 static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry); 141 static int vm_map_growstack(vm_map_t map, vm_offset_t addr, 142 vm_map_entry_t gap_entry); 143 static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, 144 vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags); 145 #ifdef INVARIANTS 146 static void vm_map_zdtor(void *mem, int size, void *arg); 147 static void vmspace_zdtor(void *mem, int size, void *arg); 148 #endif 149 static int vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, 150 vm_size_t max_ssize, vm_size_t growsize, vm_prot_t prot, vm_prot_t max, 151 int cow); 152 static void vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, 153 vm_offset_t failed_addr); 154 155 #define ENTRY_CHARGED(e) ((e)->cred != NULL || \ 156 ((e)->object.vm_object != NULL && (e)->object.vm_object->cred != NULL && \ 157 !((e)->eflags & MAP_ENTRY_NEEDS_COPY))) 158 159 /* 160 * PROC_VMSPACE_{UN,}LOCK() can be a noop as long as vmspaces are type 161 * stable. 162 */ 163 #define PROC_VMSPACE_LOCK(p) do { } while (0) 164 #define PROC_VMSPACE_UNLOCK(p) do { } while (0) 165 166 /* 167 * VM_MAP_RANGE_CHECK: [ internal use only ] 168 * 169 * Asserts that the starting and ending region 170 * addresses fall within the valid range of the map. 171 */ 172 #define VM_MAP_RANGE_CHECK(map, start, end) \ 173 { \ 174 if (start < vm_map_min(map)) \ 175 start = vm_map_min(map); \ 176 if (end > vm_map_max(map)) \ 177 end = vm_map_max(map); \ 178 if (start > end) \ 179 start = end; \ 180 } 181 182 /* 183 * vm_map_startup: 184 * 185 * Initialize the vm_map module. Must be called before 186 * any other vm_map routines. 187 * 188 * Map and entry structures are allocated from the general 189 * purpose memory pool with some exceptions: 190 * 191 * - The kernel map and kmem submap are allocated statically. 192 * - Kernel map entries are allocated out of a static pool. 193 * 194 * These restrictions are necessary since malloc() uses the 195 * maps and requires map entries. 196 */ 197 198 void 199 vm_map_startup(void) 200 { 201 mtx_init(&map_sleep_mtx, "vm map sleep mutex", NULL, MTX_DEF); 202 mapzone = uma_zcreate("MAP", sizeof(struct vm_map), NULL, 203 #ifdef INVARIANTS 204 vm_map_zdtor, 205 #else 206 NULL, 207 #endif 208 vm_map_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 209 uma_prealloc(mapzone, MAX_KMAP); 210 kmapentzone = uma_zcreate("KMAP ENTRY", sizeof(struct vm_map_entry), 211 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 212 UMA_ZONE_MTXCLASS | UMA_ZONE_VM); 213 mapentzone = uma_zcreate("MAP ENTRY", sizeof(struct vm_map_entry), 214 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 215 vmspace_zone = uma_zcreate("VMSPACE", sizeof(struct vmspace), NULL, 216 #ifdef INVARIANTS 217 vmspace_zdtor, 218 #else 219 NULL, 220 #endif 221 vmspace_zinit, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 222 } 223 224 static int 225 vmspace_zinit(void *mem, int size, int flags) 226 { 227 struct vmspace *vm; 228 229 vm = (struct vmspace *)mem; 230 231 vm->vm_map.pmap = NULL; 232 (void)vm_map_zinit(&vm->vm_map, sizeof(vm->vm_map), flags); 233 PMAP_LOCK_INIT(vmspace_pmap(vm)); 234 return (0); 235 } 236 237 static int 238 vm_map_zinit(void *mem, int size, int flags) 239 { 240 vm_map_t map; 241 242 map = (vm_map_t)mem; 243 memset(map, 0, sizeof(*map)); 244 mtx_init(&map->system_mtx, "vm map (system)", NULL, MTX_DEF | MTX_DUPOK); 245 sx_init(&map->lock, "vm map (user)"); 246 return (0); 247 } 248 249 #ifdef INVARIANTS 250 static void 251 vmspace_zdtor(void *mem, int size, void *arg) 252 { 253 struct vmspace *vm; 254 255 vm = (struct vmspace *)mem; 256 257 vm_map_zdtor(&vm->vm_map, sizeof(vm->vm_map), arg); 258 } 259 static void 260 vm_map_zdtor(void *mem, int size, void *arg) 261 { 262 vm_map_t map; 263 264 map = (vm_map_t)mem; 265 KASSERT(map->nentries == 0, 266 ("map %p nentries == %d on free.", 267 map, map->nentries)); 268 KASSERT(map->size == 0, 269 ("map %p size == %lu on free.", 270 map, (unsigned long)map->size)); 271 } 272 #endif /* INVARIANTS */ 273 274 /* 275 * Allocate a vmspace structure, including a vm_map and pmap, 276 * and initialize those structures. The refcnt is set to 1. 277 * 278 * If 'pinit' is NULL then the embedded pmap is initialized via pmap_pinit(). 279 */ 280 struct vmspace * 281 vmspace_alloc(vm_offset_t min, vm_offset_t max, pmap_pinit_t pinit) 282 { 283 struct vmspace *vm; 284 285 vm = uma_zalloc(vmspace_zone, M_WAITOK); 286 KASSERT(vm->vm_map.pmap == NULL, ("vm_map.pmap must be NULL")); 287 if (!pinit(vmspace_pmap(vm))) { 288 uma_zfree(vmspace_zone, vm); 289 return (NULL); 290 } 291 CTR1(KTR_VM, "vmspace_alloc: %p", vm); 292 _vm_map_init(&vm->vm_map, vmspace_pmap(vm), min, max); 293 vm->vm_refcnt = 1; 294 vm->vm_shm = NULL; 295 vm->vm_swrss = 0; 296 vm->vm_tsize = 0; 297 vm->vm_dsize = 0; 298 vm->vm_ssize = 0; 299 vm->vm_taddr = 0; 300 vm->vm_daddr = 0; 301 vm->vm_maxsaddr = 0; 302 return (vm); 303 } 304 305 #ifdef RACCT 306 static void 307 vmspace_container_reset(struct proc *p) 308 { 309 310 PROC_LOCK(p); 311 racct_set(p, RACCT_DATA, 0); 312 racct_set(p, RACCT_STACK, 0); 313 racct_set(p, RACCT_RSS, 0); 314 racct_set(p, RACCT_MEMLOCK, 0); 315 racct_set(p, RACCT_VMEM, 0); 316 PROC_UNLOCK(p); 317 } 318 #endif 319 320 static inline void 321 vmspace_dofree(struct vmspace *vm) 322 { 323 324 CTR1(KTR_VM, "vmspace_free: %p", vm); 325 326 /* 327 * Make sure any SysV shm is freed, it might not have been in 328 * exit1(). 329 */ 330 shmexit(vm); 331 332 /* 333 * Lock the map, to wait out all other references to it. 334 * Delete all of the mappings and pages they hold, then call 335 * the pmap module to reclaim anything left. 336 */ 337 (void)vm_map_remove(&vm->vm_map, vm_map_min(&vm->vm_map), 338 vm_map_max(&vm->vm_map)); 339 340 pmap_release(vmspace_pmap(vm)); 341 vm->vm_map.pmap = NULL; 342 uma_zfree(vmspace_zone, vm); 343 } 344 345 void 346 vmspace_free(struct vmspace *vm) 347 { 348 349 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 350 "vmspace_free() called"); 351 352 if (vm->vm_refcnt == 0) 353 panic("vmspace_free: attempt to free already freed vmspace"); 354 355 if (atomic_fetchadd_int(&vm->vm_refcnt, -1) == 1) 356 vmspace_dofree(vm); 357 } 358 359 void 360 vmspace_exitfree(struct proc *p) 361 { 362 struct vmspace *vm; 363 364 PROC_VMSPACE_LOCK(p); 365 vm = p->p_vmspace; 366 p->p_vmspace = NULL; 367 PROC_VMSPACE_UNLOCK(p); 368 KASSERT(vm == &vmspace0, ("vmspace_exitfree: wrong vmspace")); 369 vmspace_free(vm); 370 } 371 372 void 373 vmspace_exit(struct thread *td) 374 { 375 int refcnt; 376 struct vmspace *vm; 377 struct proc *p; 378 379 /* 380 * Release user portion of address space. 381 * This releases references to vnodes, 382 * which could cause I/O if the file has been unlinked. 383 * Need to do this early enough that we can still sleep. 384 * 385 * The last exiting process to reach this point releases as 386 * much of the environment as it can. vmspace_dofree() is the 387 * slower fallback in case another process had a temporary 388 * reference to the vmspace. 389 */ 390 391 p = td->td_proc; 392 vm = p->p_vmspace; 393 atomic_add_int(&vmspace0.vm_refcnt, 1); 394 refcnt = vm->vm_refcnt; 395 do { 396 if (refcnt > 1 && p->p_vmspace != &vmspace0) { 397 /* Switch now since other proc might free vmspace */ 398 PROC_VMSPACE_LOCK(p); 399 p->p_vmspace = &vmspace0; 400 PROC_VMSPACE_UNLOCK(p); 401 pmap_activate(td); 402 } 403 } while (!atomic_fcmpset_int(&vm->vm_refcnt, &refcnt, refcnt - 1)); 404 if (refcnt == 1) { 405 if (p->p_vmspace != vm) { 406 /* vmspace not yet freed, switch back */ 407 PROC_VMSPACE_LOCK(p); 408 p->p_vmspace = vm; 409 PROC_VMSPACE_UNLOCK(p); 410 pmap_activate(td); 411 } 412 pmap_remove_pages(vmspace_pmap(vm)); 413 /* Switch now since this proc will free vmspace */ 414 PROC_VMSPACE_LOCK(p); 415 p->p_vmspace = &vmspace0; 416 PROC_VMSPACE_UNLOCK(p); 417 pmap_activate(td); 418 vmspace_dofree(vm); 419 } 420 #ifdef RACCT 421 if (racct_enable) 422 vmspace_container_reset(p); 423 #endif 424 } 425 426 /* Acquire reference to vmspace owned by another process. */ 427 428 struct vmspace * 429 vmspace_acquire_ref(struct proc *p) 430 { 431 struct vmspace *vm; 432 int refcnt; 433 434 PROC_VMSPACE_LOCK(p); 435 vm = p->p_vmspace; 436 if (vm == NULL) { 437 PROC_VMSPACE_UNLOCK(p); 438 return (NULL); 439 } 440 refcnt = vm->vm_refcnt; 441 do { 442 if (refcnt <= 0) { /* Avoid 0->1 transition */ 443 PROC_VMSPACE_UNLOCK(p); 444 return (NULL); 445 } 446 } while (!atomic_fcmpset_int(&vm->vm_refcnt, &refcnt, refcnt + 1)); 447 if (vm != p->p_vmspace) { 448 PROC_VMSPACE_UNLOCK(p); 449 vmspace_free(vm); 450 return (NULL); 451 } 452 PROC_VMSPACE_UNLOCK(p); 453 return (vm); 454 } 455 456 /* 457 * Switch between vmspaces in an AIO kernel process. 458 * 459 * The AIO kernel processes switch to and from a user process's 460 * vmspace while performing an I/O operation on behalf of a user 461 * process. The new vmspace is either the vmspace of a user process 462 * obtained from an active AIO request or the initial vmspace of the 463 * AIO kernel process (when it is idling). Because user processes 464 * will block to drain any active AIO requests before proceeding in 465 * exit() or execve(), the vmspace reference count for these vmspaces 466 * can never be 0. This allows for a much simpler implementation than 467 * the loop in vmspace_acquire_ref() above. Similarly, AIO kernel 468 * processes hold an extra reference on their initial vmspace for the 469 * life of the process so that this guarantee is true for any vmspace 470 * passed as 'newvm'. 471 */ 472 void 473 vmspace_switch_aio(struct vmspace *newvm) 474 { 475 struct vmspace *oldvm; 476 477 /* XXX: Need some way to assert that this is an aio daemon. */ 478 479 KASSERT(newvm->vm_refcnt > 0, 480 ("vmspace_switch_aio: newvm unreferenced")); 481 482 oldvm = curproc->p_vmspace; 483 if (oldvm == newvm) 484 return; 485 486 /* 487 * Point to the new address space and refer to it. 488 */ 489 curproc->p_vmspace = newvm; 490 atomic_add_int(&newvm->vm_refcnt, 1); 491 492 /* Activate the new mapping. */ 493 pmap_activate(curthread); 494 495 /* Remove the daemon's reference to the old address space. */ 496 KASSERT(oldvm->vm_refcnt > 1, 497 ("vmspace_switch_aio: oldvm dropping last reference")); 498 vmspace_free(oldvm); 499 } 500 501 void 502 _vm_map_lock(vm_map_t map, const char *file, int line) 503 { 504 505 if (map->system_map) 506 mtx_lock_flags_(&map->system_mtx, 0, file, line); 507 else 508 sx_xlock_(&map->lock, file, line); 509 map->timestamp++; 510 } 511 512 static void 513 vm_map_process_deferred(void) 514 { 515 struct thread *td; 516 vm_map_entry_t entry, next; 517 vm_object_t object; 518 519 td = curthread; 520 entry = td->td_map_def_user; 521 td->td_map_def_user = NULL; 522 while (entry != NULL) { 523 next = entry->next; 524 if ((entry->eflags & MAP_ENTRY_VN_WRITECNT) != 0) { 525 /* 526 * Decrement the object's writemappings and 527 * possibly the vnode's v_writecount. 528 */ 529 KASSERT((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0, 530 ("Submap with writecount")); 531 object = entry->object.vm_object; 532 KASSERT(object != NULL, ("No object for writecount")); 533 vnode_pager_release_writecount(object, entry->start, 534 entry->end); 535 } 536 vm_map_entry_deallocate(entry, FALSE); 537 entry = next; 538 } 539 } 540 541 void 542 _vm_map_unlock(vm_map_t map, const char *file, int line) 543 { 544 545 if (map->system_map) 546 mtx_unlock_flags_(&map->system_mtx, 0, file, line); 547 else { 548 sx_xunlock_(&map->lock, file, line); 549 vm_map_process_deferred(); 550 } 551 } 552 553 void 554 _vm_map_lock_read(vm_map_t map, const char *file, int line) 555 { 556 557 if (map->system_map) 558 mtx_lock_flags_(&map->system_mtx, 0, file, line); 559 else 560 sx_slock_(&map->lock, file, line); 561 } 562 563 void 564 _vm_map_unlock_read(vm_map_t map, const char *file, int line) 565 { 566 567 if (map->system_map) 568 mtx_unlock_flags_(&map->system_mtx, 0, file, line); 569 else { 570 sx_sunlock_(&map->lock, file, line); 571 vm_map_process_deferred(); 572 } 573 } 574 575 int 576 _vm_map_trylock(vm_map_t map, const char *file, int line) 577 { 578 int error; 579 580 error = map->system_map ? 581 !mtx_trylock_flags_(&map->system_mtx, 0, file, line) : 582 !sx_try_xlock_(&map->lock, file, line); 583 if (error == 0) 584 map->timestamp++; 585 return (error == 0); 586 } 587 588 int 589 _vm_map_trylock_read(vm_map_t map, const char *file, int line) 590 { 591 int error; 592 593 error = map->system_map ? 594 !mtx_trylock_flags_(&map->system_mtx, 0, file, line) : 595 !sx_try_slock_(&map->lock, file, line); 596 return (error == 0); 597 } 598 599 /* 600 * _vm_map_lock_upgrade: [ internal use only ] 601 * 602 * Tries to upgrade a read (shared) lock on the specified map to a write 603 * (exclusive) lock. Returns the value "0" if the upgrade succeeds and a 604 * non-zero value if the upgrade fails. If the upgrade fails, the map is 605 * returned without a read or write lock held. 606 * 607 * Requires that the map be read locked. 608 */ 609 int 610 _vm_map_lock_upgrade(vm_map_t map, const char *file, int line) 611 { 612 unsigned int last_timestamp; 613 614 if (map->system_map) { 615 mtx_assert_(&map->system_mtx, MA_OWNED, file, line); 616 } else { 617 if (!sx_try_upgrade_(&map->lock, file, line)) { 618 last_timestamp = map->timestamp; 619 sx_sunlock_(&map->lock, file, line); 620 vm_map_process_deferred(); 621 /* 622 * If the map's timestamp does not change while the 623 * map is unlocked, then the upgrade succeeds. 624 */ 625 sx_xlock_(&map->lock, file, line); 626 if (last_timestamp != map->timestamp) { 627 sx_xunlock_(&map->lock, file, line); 628 return (1); 629 } 630 } 631 } 632 map->timestamp++; 633 return (0); 634 } 635 636 void 637 _vm_map_lock_downgrade(vm_map_t map, const char *file, int line) 638 { 639 640 if (map->system_map) { 641 mtx_assert_(&map->system_mtx, MA_OWNED, file, line); 642 } else 643 sx_downgrade_(&map->lock, file, line); 644 } 645 646 /* 647 * vm_map_locked: 648 * 649 * Returns a non-zero value if the caller holds a write (exclusive) lock 650 * on the specified map and the value "0" otherwise. 651 */ 652 int 653 vm_map_locked(vm_map_t map) 654 { 655 656 if (map->system_map) 657 return (mtx_owned(&map->system_mtx)); 658 else 659 return (sx_xlocked(&map->lock)); 660 } 661 662 #ifdef INVARIANTS 663 static void 664 _vm_map_assert_locked(vm_map_t map, const char *file, int line) 665 { 666 667 if (map->system_map) 668 mtx_assert_(&map->system_mtx, MA_OWNED, file, line); 669 else 670 sx_assert_(&map->lock, SA_XLOCKED, file, line); 671 } 672 673 #define VM_MAP_ASSERT_LOCKED(map) \ 674 _vm_map_assert_locked(map, LOCK_FILE, LOCK_LINE) 675 #else 676 #define VM_MAP_ASSERT_LOCKED(map) 677 #endif 678 679 /* 680 * _vm_map_unlock_and_wait: 681 * 682 * Atomically releases the lock on the specified map and puts the calling 683 * thread to sleep. The calling thread will remain asleep until either 684 * vm_map_wakeup() is performed on the map or the specified timeout is 685 * exceeded. 686 * 687 * WARNING! This function does not perform deferred deallocations of 688 * objects and map entries. Therefore, the calling thread is expected to 689 * reacquire the map lock after reawakening and later perform an ordinary 690 * unlock operation, such as vm_map_unlock(), before completing its 691 * operation on the map. 692 */ 693 int 694 _vm_map_unlock_and_wait(vm_map_t map, int timo, const char *file, int line) 695 { 696 697 mtx_lock(&map_sleep_mtx); 698 if (map->system_map) 699 mtx_unlock_flags_(&map->system_mtx, 0, file, line); 700 else 701 sx_xunlock_(&map->lock, file, line); 702 return (msleep(&map->root, &map_sleep_mtx, PDROP | PVM, "vmmaps", 703 timo)); 704 } 705 706 /* 707 * vm_map_wakeup: 708 * 709 * Awaken any threads that have slept on the map using 710 * vm_map_unlock_and_wait(). 711 */ 712 void 713 vm_map_wakeup(vm_map_t map) 714 { 715 716 /* 717 * Acquire and release map_sleep_mtx to prevent a wakeup() 718 * from being performed (and lost) between the map unlock 719 * and the msleep() in _vm_map_unlock_and_wait(). 720 */ 721 mtx_lock(&map_sleep_mtx); 722 mtx_unlock(&map_sleep_mtx); 723 wakeup(&map->root); 724 } 725 726 void 727 vm_map_busy(vm_map_t map) 728 { 729 730 VM_MAP_ASSERT_LOCKED(map); 731 map->busy++; 732 } 733 734 void 735 vm_map_unbusy(vm_map_t map) 736 { 737 738 VM_MAP_ASSERT_LOCKED(map); 739 KASSERT(map->busy, ("vm_map_unbusy: not busy")); 740 if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) { 741 vm_map_modflags(map, 0, MAP_BUSY_WAKEUP); 742 wakeup(&map->busy); 743 } 744 } 745 746 void 747 vm_map_wait_busy(vm_map_t map) 748 { 749 750 VM_MAP_ASSERT_LOCKED(map); 751 while (map->busy) { 752 vm_map_modflags(map, MAP_BUSY_WAKEUP, 0); 753 if (map->system_map) 754 msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0); 755 else 756 sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0); 757 } 758 map->timestamp++; 759 } 760 761 long 762 vmspace_resident_count(struct vmspace *vmspace) 763 { 764 return pmap_resident_count(vmspace_pmap(vmspace)); 765 } 766 767 /* 768 * vm_map_create: 769 * 770 * Creates and returns a new empty VM map with 771 * the given physical map structure, and having 772 * the given lower and upper address bounds. 773 */ 774 vm_map_t 775 vm_map_create(pmap_t pmap, vm_offset_t min, vm_offset_t max) 776 { 777 vm_map_t result; 778 779 result = uma_zalloc(mapzone, M_WAITOK); 780 CTR1(KTR_VM, "vm_map_create: %p", result); 781 _vm_map_init(result, pmap, min, max); 782 return (result); 783 } 784 785 /* 786 * Initialize an existing vm_map structure 787 * such as that in the vmspace structure. 788 */ 789 static void 790 _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) 791 { 792 793 map->header.next = map->header.prev = &map->header; 794 map->header.eflags = MAP_ENTRY_HEADER; 795 map->needs_wakeup = FALSE; 796 map->system_map = 0; 797 map->pmap = pmap; 798 map->header.end = min; 799 map->header.start = max; 800 map->flags = 0; 801 map->root = NULL; 802 map->timestamp = 0; 803 map->busy = 0; 804 map->anon_loc = 0; 805 } 806 807 void 808 vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) 809 { 810 811 _vm_map_init(map, pmap, min, max); 812 mtx_init(&map->system_mtx, "system map", NULL, MTX_DEF | MTX_DUPOK); 813 sx_init(&map->lock, "user map"); 814 } 815 816 /* 817 * vm_map_entry_dispose: [ internal use only ] 818 * 819 * Inverse of vm_map_entry_create. 820 */ 821 static void 822 vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry) 823 { 824 uma_zfree(map->system_map ? kmapentzone : mapentzone, entry); 825 } 826 827 /* 828 * vm_map_entry_create: [ internal use only ] 829 * 830 * Allocates a VM map entry for insertion. 831 * No entry fields are filled in. 832 */ 833 static vm_map_entry_t 834 vm_map_entry_create(vm_map_t map) 835 { 836 vm_map_entry_t new_entry; 837 838 if (map->system_map) 839 new_entry = uma_zalloc(kmapentzone, M_NOWAIT); 840 else 841 new_entry = uma_zalloc(mapentzone, M_WAITOK); 842 if (new_entry == NULL) 843 panic("vm_map_entry_create: kernel resources exhausted"); 844 return (new_entry); 845 } 846 847 /* 848 * vm_map_entry_set_behavior: 849 * 850 * Set the expected access behavior, either normal, random, or 851 * sequential. 852 */ 853 static inline void 854 vm_map_entry_set_behavior(vm_map_entry_t entry, u_char behavior) 855 { 856 entry->eflags = (entry->eflags & ~MAP_ENTRY_BEHAV_MASK) | 857 (behavior & MAP_ENTRY_BEHAV_MASK); 858 } 859 860 /* 861 * vm_map_entry_set_max_free: 862 * 863 * Set the max_free field in a vm_map_entry. 864 */ 865 static inline void 866 vm_map_entry_set_max_free(vm_map_entry_t entry) 867 { 868 869 entry->max_free = entry->adj_free; 870 if (entry->left != NULL && entry->left->max_free > entry->max_free) 871 entry->max_free = entry->left->max_free; 872 if (entry->right != NULL && entry->right->max_free > entry->max_free) 873 entry->max_free = entry->right->max_free; 874 } 875 876 /* 877 * vm_map_entry_splay: 878 * 879 * The Sleator and Tarjan top-down splay algorithm with the 880 * following variation. Max_free must be computed bottom-up, so 881 * on the downward pass, maintain the left and right spines in 882 * reverse order. Then, make a second pass up each side to fix 883 * the pointers and compute max_free. The time bound is O(log n) 884 * amortized. 885 * 886 * The new root is the vm_map_entry containing "addr", or else an 887 * adjacent entry (lower or higher) if addr is not in the tree. 888 * 889 * The map must be locked, and leaves it so. 890 * 891 * Returns: the new root. 892 */ 893 static vm_map_entry_t 894 vm_map_entry_splay(vm_offset_t addr, vm_map_entry_t root) 895 { 896 vm_map_entry_t llist, rlist; 897 vm_map_entry_t ltree, rtree; 898 vm_map_entry_t y; 899 900 /* Special case of empty tree. */ 901 if (root == NULL) 902 return (root); 903 904 /* 905 * Pass One: Splay down the tree until we find addr or a NULL 906 * pointer where addr would go. llist and rlist are the two 907 * sides in reverse order (bottom-up), with llist linked by 908 * the right pointer and rlist linked by the left pointer in 909 * the vm_map_entry. Wait until Pass Two to set max_free on 910 * the two spines. 911 */ 912 llist = NULL; 913 rlist = NULL; 914 for (;;) { 915 /* root is never NULL in here. */ 916 if (addr < root->start) { 917 y = root->left; 918 if (y == NULL) 919 break; 920 if (addr < y->start && y->left != NULL) { 921 /* Rotate right and put y on rlist. */ 922 root->left = y->right; 923 y->right = root; 924 vm_map_entry_set_max_free(root); 925 root = y->left; 926 y->left = rlist; 927 rlist = y; 928 } else { 929 /* Put root on rlist. */ 930 root->left = rlist; 931 rlist = root; 932 root = y; 933 } 934 } else if (addr >= root->end) { 935 y = root->right; 936 if (y == NULL) 937 break; 938 if (addr >= y->end && y->right != NULL) { 939 /* Rotate left and put y on llist. */ 940 root->right = y->left; 941 y->left = root; 942 vm_map_entry_set_max_free(root); 943 root = y->right; 944 y->right = llist; 945 llist = y; 946 } else { 947 /* Put root on llist. */ 948 root->right = llist; 949 llist = root; 950 root = y; 951 } 952 } else 953 break; 954 } 955 956 /* 957 * Pass Two: Walk back up the two spines, flip the pointers 958 * and set max_free. The subtrees of the root go at the 959 * bottom of llist and rlist. 960 */ 961 ltree = root->left; 962 while (llist != NULL) { 963 y = llist->right; 964 llist->right = ltree; 965 vm_map_entry_set_max_free(llist); 966 ltree = llist; 967 llist = y; 968 } 969 rtree = root->right; 970 while (rlist != NULL) { 971 y = rlist->left; 972 rlist->left = rtree; 973 vm_map_entry_set_max_free(rlist); 974 rtree = rlist; 975 rlist = y; 976 } 977 978 /* 979 * Final assembly: add ltree and rtree as subtrees of root. 980 */ 981 root->left = ltree; 982 root->right = rtree; 983 vm_map_entry_set_max_free(root); 984 985 return (root); 986 } 987 988 /* 989 * vm_map_entry_{un,}link: 990 * 991 * Insert/remove entries from maps. 992 */ 993 static void 994 vm_map_entry_link(vm_map_t map, 995 vm_map_entry_t after_where, 996 vm_map_entry_t entry) 997 { 998 999 CTR4(KTR_VM, 1000 "vm_map_entry_link: map %p, nentries %d, entry %p, after %p", map, 1001 map->nentries, entry, after_where); 1002 VM_MAP_ASSERT_LOCKED(map); 1003 KASSERT(after_where->end <= entry->start, 1004 ("vm_map_entry_link: prev end %jx new start %jx overlap", 1005 (uintmax_t)after_where->end, (uintmax_t)entry->start)); 1006 KASSERT(entry->end <= after_where->next->start, 1007 ("vm_map_entry_link: new end %jx next start %jx overlap", 1008 (uintmax_t)entry->end, (uintmax_t)after_where->next->start)); 1009 1010 map->nentries++; 1011 entry->prev = after_where; 1012 entry->next = after_where->next; 1013 entry->next->prev = entry; 1014 after_where->next = entry; 1015 1016 if (after_where != &map->header) { 1017 if (after_where != map->root) 1018 vm_map_entry_splay(after_where->start, map->root); 1019 entry->right = after_where->right; 1020 entry->left = after_where; 1021 after_where->right = NULL; 1022 after_where->adj_free = entry->start - after_where->end; 1023 vm_map_entry_set_max_free(after_where); 1024 } else { 1025 entry->right = map->root; 1026 entry->left = NULL; 1027 } 1028 entry->adj_free = entry->next->start - entry->end; 1029 vm_map_entry_set_max_free(entry); 1030 map->root = entry; 1031 } 1032 1033 static void 1034 vm_map_entry_unlink(vm_map_t map, 1035 vm_map_entry_t entry) 1036 { 1037 vm_map_entry_t next, prev, root; 1038 1039 VM_MAP_ASSERT_LOCKED(map); 1040 if (entry != map->root) 1041 vm_map_entry_splay(entry->start, map->root); 1042 if (entry->left == NULL) 1043 root = entry->right; 1044 else { 1045 root = vm_map_entry_splay(entry->start, entry->left); 1046 root->right = entry->right; 1047 root->adj_free = entry->next->start - root->end; 1048 vm_map_entry_set_max_free(root); 1049 } 1050 map->root = root; 1051 1052 prev = entry->prev; 1053 next = entry->next; 1054 next->prev = prev; 1055 prev->next = next; 1056 map->nentries--; 1057 CTR3(KTR_VM, "vm_map_entry_unlink: map %p, nentries %d, entry %p", map, 1058 map->nentries, entry); 1059 } 1060 1061 /* 1062 * vm_map_entry_resize_free: 1063 * 1064 * Recompute the amount of free space following a vm_map_entry 1065 * and propagate that value up the tree. Call this function after 1066 * resizing a map entry in-place, that is, without a call to 1067 * vm_map_entry_link() or _unlink(). 1068 * 1069 * The map must be locked, and leaves it so. 1070 */ 1071 static void 1072 vm_map_entry_resize_free(vm_map_t map, vm_map_entry_t entry) 1073 { 1074 1075 /* 1076 * Using splay trees without parent pointers, propagating 1077 * max_free up the tree is done by moving the entry to the 1078 * root and making the change there. 1079 */ 1080 if (entry != map->root) 1081 map->root = vm_map_entry_splay(entry->start, map->root); 1082 1083 entry->adj_free = entry->next->start - entry->end; 1084 vm_map_entry_set_max_free(entry); 1085 } 1086 1087 /* 1088 * vm_map_lookup_entry: [ internal use only ] 1089 * 1090 * Finds the map entry containing (or 1091 * immediately preceding) the specified address 1092 * in the given map; the entry is returned 1093 * in the "entry" parameter. The boolean 1094 * result indicates whether the address is 1095 * actually contained in the map. 1096 */ 1097 boolean_t 1098 vm_map_lookup_entry( 1099 vm_map_t map, 1100 vm_offset_t address, 1101 vm_map_entry_t *entry) /* OUT */ 1102 { 1103 vm_map_entry_t cur; 1104 boolean_t locked; 1105 1106 /* 1107 * If the map is empty, then the map entry immediately preceding 1108 * "address" is the map's header. 1109 */ 1110 cur = map->root; 1111 if (cur == NULL) 1112 *entry = &map->header; 1113 else if (address >= cur->start && cur->end > address) { 1114 *entry = cur; 1115 return (TRUE); 1116 } else if ((locked = vm_map_locked(map)) || 1117 sx_try_upgrade(&map->lock)) { 1118 /* 1119 * Splay requires a write lock on the map. However, it only 1120 * restructures the binary search tree; it does not otherwise 1121 * change the map. Thus, the map's timestamp need not change 1122 * on a temporary upgrade. 1123 */ 1124 map->root = cur = vm_map_entry_splay(address, cur); 1125 if (!locked) 1126 sx_downgrade(&map->lock); 1127 1128 /* 1129 * If "address" is contained within a map entry, the new root 1130 * is that map entry. Otherwise, the new root is a map entry 1131 * immediately before or after "address". 1132 */ 1133 if (address >= cur->start) { 1134 *entry = cur; 1135 if (cur->end > address) 1136 return (TRUE); 1137 } else 1138 *entry = cur->prev; 1139 } else 1140 /* 1141 * Since the map is only locked for read access, perform a 1142 * standard binary search tree lookup for "address". 1143 */ 1144 for (;;) { 1145 if (address < cur->start) { 1146 if (cur->left == NULL) { 1147 *entry = cur->prev; 1148 break; 1149 } 1150 cur = cur->left; 1151 } else if (cur->end > address) { 1152 *entry = cur; 1153 return (TRUE); 1154 } else { 1155 if (cur->right == NULL) { 1156 *entry = cur; 1157 break; 1158 } 1159 cur = cur->right; 1160 } 1161 } 1162 return (FALSE); 1163 } 1164 1165 /* 1166 * vm_map_insert: 1167 * 1168 * Inserts the given whole VM object into the target 1169 * map at the specified address range. The object's 1170 * size should match that of the address range. 1171 * 1172 * Requires that the map be locked, and leaves it so. 1173 * 1174 * If object is non-NULL, ref count must be bumped by caller 1175 * prior to making call to account for the new entry. 1176 */ 1177 int 1178 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1179 vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, int cow) 1180 { 1181 vm_map_entry_t new_entry, prev_entry, temp_entry; 1182 struct ucred *cred; 1183 vm_eflags_t protoeflags; 1184 vm_inherit_t inheritance; 1185 1186 VM_MAP_ASSERT_LOCKED(map); 1187 KASSERT(object != kernel_object || 1188 (cow & MAP_COPY_ON_WRITE) == 0, 1189 ("vm_map_insert: kernel object and COW")); 1190 KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0, 1191 ("vm_map_insert: paradoxical MAP_NOFAULT request")); 1192 KASSERT((prot & ~max) == 0, 1193 ("prot %#x is not subset of max_prot %#x", prot, max)); 1194 1195 /* 1196 * Check that the start and end points are not bogus. 1197 */ 1198 if (start < vm_map_min(map) || end > vm_map_max(map) || 1199 start >= end) 1200 return (KERN_INVALID_ADDRESS); 1201 1202 /* 1203 * Find the entry prior to the proposed starting address; if it's part 1204 * of an existing entry, this range is bogus. 1205 */ 1206 if (vm_map_lookup_entry(map, start, &temp_entry)) 1207 return (KERN_NO_SPACE); 1208 1209 prev_entry = temp_entry; 1210 1211 /* 1212 * Assert that the next entry doesn't overlap the end point. 1213 */ 1214 if (prev_entry->next->start < end) 1215 return (KERN_NO_SPACE); 1216 1217 if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL || 1218 max != VM_PROT_NONE)) 1219 return (KERN_INVALID_ARGUMENT); 1220 1221 protoeflags = 0; 1222 if (cow & MAP_COPY_ON_WRITE) 1223 protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY; 1224 if (cow & MAP_NOFAULT) 1225 protoeflags |= MAP_ENTRY_NOFAULT; 1226 if (cow & MAP_DISABLE_SYNCER) 1227 protoeflags |= MAP_ENTRY_NOSYNC; 1228 if (cow & MAP_DISABLE_COREDUMP) 1229 protoeflags |= MAP_ENTRY_NOCOREDUMP; 1230 if (cow & MAP_STACK_GROWS_DOWN) 1231 protoeflags |= MAP_ENTRY_GROWS_DOWN; 1232 if (cow & MAP_STACK_GROWS_UP) 1233 protoeflags |= MAP_ENTRY_GROWS_UP; 1234 if (cow & MAP_VN_WRITECOUNT) 1235 protoeflags |= MAP_ENTRY_VN_WRITECNT; 1236 if ((cow & MAP_CREATE_GUARD) != 0) 1237 protoeflags |= MAP_ENTRY_GUARD; 1238 if ((cow & MAP_CREATE_STACK_GAP_DN) != 0) 1239 protoeflags |= MAP_ENTRY_STACK_GAP_DN; 1240 if ((cow & MAP_CREATE_STACK_GAP_UP) != 0) 1241 protoeflags |= MAP_ENTRY_STACK_GAP_UP; 1242 if (cow & MAP_INHERIT_SHARE) 1243 inheritance = VM_INHERIT_SHARE; 1244 else 1245 inheritance = VM_INHERIT_DEFAULT; 1246 1247 cred = NULL; 1248 if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0) 1249 goto charged; 1250 if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) && 1251 ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) { 1252 if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start)) 1253 return (KERN_RESOURCE_SHORTAGE); 1254 KASSERT(object == NULL || 1255 (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 || 1256 object->cred == NULL, 1257 ("overcommit: vm_map_insert o %p", object)); 1258 cred = curthread->td_ucred; 1259 } 1260 1261 charged: 1262 /* Expand the kernel pmap, if necessary. */ 1263 if (map == kernel_map && end > kernel_vm_end) 1264 pmap_growkernel(end); 1265 if (object != NULL) { 1266 /* 1267 * OBJ_ONEMAPPING must be cleared unless this mapping 1268 * is trivially proven to be the only mapping for any 1269 * of the object's pages. (Object granularity 1270 * reference counting is insufficient to recognize 1271 * aliases with precision.) 1272 */ 1273 VM_OBJECT_WLOCK(object); 1274 if (object->ref_count > 1 || object->shadow_count != 0) 1275 vm_object_clear_flag(object, OBJ_ONEMAPPING); 1276 VM_OBJECT_WUNLOCK(object); 1277 } else if ((prev_entry->eflags & ~MAP_ENTRY_USER_WIRED) == 1278 protoeflags && 1279 (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 && 1280 prev_entry->end == start && (prev_entry->cred == cred || 1281 (prev_entry->object.vm_object != NULL && 1282 prev_entry->object.vm_object->cred == cred)) && 1283 vm_object_coalesce(prev_entry->object.vm_object, 1284 prev_entry->offset, 1285 (vm_size_t)(prev_entry->end - prev_entry->start), 1286 (vm_size_t)(end - prev_entry->end), cred != NULL && 1287 (protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)) { 1288 /* 1289 * We were able to extend the object. Determine if we 1290 * can extend the previous map entry to include the 1291 * new range as well. 1292 */ 1293 if (prev_entry->inheritance == inheritance && 1294 prev_entry->protection == prot && 1295 prev_entry->max_protection == max && 1296 prev_entry->wired_count == 0) { 1297 KASSERT((prev_entry->eflags & MAP_ENTRY_USER_WIRED) == 1298 0, ("prev_entry %p has incoherent wiring", 1299 prev_entry)); 1300 if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0) 1301 map->size += end - prev_entry->end; 1302 prev_entry->end = end; 1303 vm_map_entry_resize_free(map, prev_entry); 1304 vm_map_simplify_entry(map, prev_entry); 1305 return (KERN_SUCCESS); 1306 } 1307 1308 /* 1309 * If we can extend the object but cannot extend the 1310 * map entry, we have to create a new map entry. We 1311 * must bump the ref count on the extended object to 1312 * account for it. object may be NULL. 1313 */ 1314 object = prev_entry->object.vm_object; 1315 offset = prev_entry->offset + 1316 (prev_entry->end - prev_entry->start); 1317 vm_object_reference(object); 1318 if (cred != NULL && object != NULL && object->cred != NULL && 1319 !(prev_entry->eflags & MAP_ENTRY_NEEDS_COPY)) { 1320 /* Object already accounts for this uid. */ 1321 cred = NULL; 1322 } 1323 } 1324 if (cred != NULL) 1325 crhold(cred); 1326 1327 /* 1328 * Create a new entry 1329 */ 1330 new_entry = vm_map_entry_create(map); 1331 new_entry->start = start; 1332 new_entry->end = end; 1333 new_entry->cred = NULL; 1334 1335 new_entry->eflags = protoeflags; 1336 new_entry->object.vm_object = object; 1337 new_entry->offset = offset; 1338 1339 new_entry->inheritance = inheritance; 1340 new_entry->protection = prot; 1341 new_entry->max_protection = max; 1342 new_entry->wired_count = 0; 1343 new_entry->wiring_thread = NULL; 1344 new_entry->read_ahead = VM_FAULT_READ_AHEAD_INIT; 1345 new_entry->next_read = start; 1346 1347 KASSERT(cred == NULL || !ENTRY_CHARGED(new_entry), 1348 ("overcommit: vm_map_insert leaks vm_map %p", new_entry)); 1349 new_entry->cred = cred; 1350 1351 /* 1352 * Insert the new entry into the list 1353 */ 1354 vm_map_entry_link(map, prev_entry, new_entry); 1355 if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0) 1356 map->size += new_entry->end - new_entry->start; 1357 1358 /* 1359 * Try to coalesce the new entry with both the previous and next 1360 * entries in the list. Previously, we only attempted to coalesce 1361 * with the previous entry when object is NULL. Here, we handle the 1362 * other cases, which are less common. 1363 */ 1364 vm_map_simplify_entry(map, new_entry); 1365 1366 if ((cow & (MAP_PREFAULT | MAP_PREFAULT_PARTIAL)) != 0) { 1367 vm_map_pmap_enter(map, start, prot, object, OFF_TO_IDX(offset), 1368 end - start, cow & MAP_PREFAULT_PARTIAL); 1369 } 1370 1371 return (KERN_SUCCESS); 1372 } 1373 1374 /* 1375 * vm_map_findspace: 1376 * 1377 * Find the first fit (lowest VM address) for "length" free bytes 1378 * beginning at address >= start in the given map. 1379 * 1380 * In a vm_map_entry, "adj_free" is the amount of free space 1381 * adjacent (higher address) to this entry, and "max_free" is the 1382 * maximum amount of contiguous free space in its subtree. This 1383 * allows finding a free region in one path down the tree, so 1384 * O(log n) amortized with splay trees. 1385 * 1386 * The map must be locked, and leaves it so. 1387 * 1388 * Returns: 0 on success, and starting address in *addr, 1389 * 1 if insufficient space. 1390 */ 1391 int 1392 vm_map_findspace(vm_map_t map, vm_offset_t start, vm_size_t length, 1393 vm_offset_t *addr) /* OUT */ 1394 { 1395 vm_map_entry_t entry; 1396 vm_offset_t st; 1397 1398 /* 1399 * Request must fit within min/max VM address and must avoid 1400 * address wrap. 1401 */ 1402 start = MAX(start, vm_map_min(map)); 1403 if (start + length > vm_map_max(map) || start + length < start) 1404 return (1); 1405 1406 /* Empty tree means wide open address space. */ 1407 if (map->root == NULL) { 1408 *addr = start; 1409 return (0); 1410 } 1411 1412 /* 1413 * After splay, if start comes before root node, then there 1414 * must be a gap from start to the root. 1415 */ 1416 map->root = vm_map_entry_splay(start, map->root); 1417 if (start + length <= map->root->start) { 1418 *addr = start; 1419 return (0); 1420 } 1421 1422 /* 1423 * Root is the last node that might begin its gap before 1424 * start, and this is the last comparison where address 1425 * wrap might be a problem. 1426 */ 1427 st = (start > map->root->end) ? start : map->root->end; 1428 if (length <= map->root->end + map->root->adj_free - st) { 1429 *addr = st; 1430 return (0); 1431 } 1432 1433 /* With max_free, can immediately tell if no solution. */ 1434 entry = map->root->right; 1435 if (entry == NULL || length > entry->max_free) 1436 return (1); 1437 1438 /* 1439 * Search the right subtree in the order: left subtree, root, 1440 * right subtree (first fit). The previous splay implies that 1441 * all regions in the right subtree have addresses > start. 1442 */ 1443 while (entry != NULL) { 1444 if (entry->left != NULL && entry->left->max_free >= length) 1445 entry = entry->left; 1446 else if (entry->adj_free >= length) { 1447 *addr = entry->end; 1448 return (0); 1449 } else 1450 entry = entry->right; 1451 } 1452 1453 /* Can't get here, so panic if we do. */ 1454 panic("vm_map_findspace: max_free corrupt"); 1455 } 1456 1457 int 1458 vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1459 vm_offset_t start, vm_size_t length, vm_prot_t prot, 1460 vm_prot_t max, int cow) 1461 { 1462 vm_offset_t end; 1463 int result; 1464 1465 end = start + length; 1466 KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 || 1467 object == NULL, 1468 ("vm_map_fixed: non-NULL backing object for stack")); 1469 vm_map_lock(map); 1470 VM_MAP_RANGE_CHECK(map, start, end); 1471 if ((cow & MAP_CHECK_EXCL) == 0) 1472 vm_map_delete(map, start, end); 1473 if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) { 1474 result = vm_map_stack_locked(map, start, length, sgrowsiz, 1475 prot, max, cow); 1476 } else { 1477 result = vm_map_insert(map, object, offset, start, end, 1478 prot, max, cow); 1479 } 1480 vm_map_unlock(map); 1481 return (result); 1482 } 1483 1484 static const int aslr_pages_rnd_64[2] = {0x1000, 0x10}; 1485 static const int aslr_pages_rnd_32[2] = {0x100, 0x4}; 1486 1487 static int cluster_anon = 1; 1488 SYSCTL_INT(_vm, OID_AUTO, cluster_anon, CTLFLAG_RW, 1489 &cluster_anon, 0, 1490 "Cluster anonymous mappings: 0 = no, 1 = yes if no hint, 2 = always"); 1491 1492 static bool 1493 clustering_anon_allowed(vm_offset_t addr) 1494 { 1495 1496 switch (cluster_anon) { 1497 case 0: 1498 return (false); 1499 case 1: 1500 return (addr == 0); 1501 case 2: 1502 default: 1503 return (true); 1504 } 1505 } 1506 1507 static long aslr_restarts; 1508 SYSCTL_LONG(_vm, OID_AUTO, aslr_restarts, CTLFLAG_RD, 1509 &aslr_restarts, 0, 1510 "Number of aslr failures"); 1511 1512 #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) 1513 1514 /* 1515 * Searches for the specified amount of free space in the given map with the 1516 * specified alignment. Performs an address-ordered, first-fit search from 1517 * the given address "*addr", with an optional upper bound "max_addr". If the 1518 * parameter "alignment" is zero, then the alignment is computed from the 1519 * given (object, offset) pair so as to enable the greatest possible use of 1520 * superpage mappings. Returns KERN_SUCCESS and the address of the free space 1521 * in "*addr" if successful. Otherwise, returns KERN_NO_SPACE. 1522 * 1523 * The map must be locked. Initially, there must be at least "length" bytes 1524 * of free space at the given address. 1525 */ 1526 static int 1527 vm_map_alignspace(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1528 vm_offset_t *addr, vm_size_t length, vm_offset_t max_addr, 1529 vm_offset_t alignment) 1530 { 1531 vm_offset_t aligned_addr, free_addr; 1532 1533 VM_MAP_ASSERT_LOCKED(map); 1534 free_addr = *addr; 1535 KASSERT(!vm_map_findspace(map, free_addr, length, addr) && 1536 free_addr == *addr, ("caller provided insufficient free space")); 1537 for (;;) { 1538 /* 1539 * At the start of every iteration, the free space at address 1540 * "*addr" is at least "length" bytes. 1541 */ 1542 if (alignment == 0) 1543 pmap_align_superpage(object, offset, addr, length); 1544 else if ((*addr & (alignment - 1)) != 0) { 1545 *addr &= ~(alignment - 1); 1546 *addr += alignment; 1547 } 1548 aligned_addr = *addr; 1549 if (aligned_addr == free_addr) { 1550 /* 1551 * Alignment did not change "*addr", so "*addr" must 1552 * still provide sufficient free space. 1553 */ 1554 return (KERN_SUCCESS); 1555 } 1556 1557 /* 1558 * Test for address wrap on "*addr". A wrapped "*addr" could 1559 * be a valid address, in which case vm_map_findspace() cannot 1560 * be relied upon to fail. 1561 */ 1562 if (aligned_addr < free_addr || 1563 vm_map_findspace(map, aligned_addr, length, addr) || 1564 (max_addr != 0 && *addr + length > max_addr)) 1565 return (KERN_NO_SPACE); 1566 free_addr = *addr; 1567 if (free_addr == aligned_addr) { 1568 /* 1569 * If a successful call to vm_map_findspace() did not 1570 * change "*addr", then "*addr" must still be aligned 1571 * and provide sufficient free space. 1572 */ 1573 return (KERN_SUCCESS); 1574 } 1575 } 1576 } 1577 1578 /* 1579 * vm_map_find finds an unallocated region in the target address 1580 * map with the given length. The search is defined to be 1581 * first-fit from the specified address; the region found is 1582 * returned in the same parameter. 1583 * 1584 * If object is non-NULL, ref count must be bumped by caller 1585 * prior to making call to account for the new entry. 1586 */ 1587 int 1588 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1589 vm_offset_t *addr, /* IN/OUT */ 1590 vm_size_t length, vm_offset_t max_addr, int find_space, 1591 vm_prot_t prot, vm_prot_t max, int cow) 1592 { 1593 vm_offset_t alignment, curr_min_addr, min_addr; 1594 int gap, pidx, rv, try; 1595 bool cluster, en_aslr, update_anon; 1596 1597 KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 || 1598 object == NULL, 1599 ("vm_map_find: non-NULL backing object for stack")); 1600 MPASS((cow & MAP_REMAP) == 0 || (find_space == VMFS_NO_SPACE && 1601 (cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0)); 1602 if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL || 1603 (object->flags & OBJ_COLORED) == 0)) 1604 find_space = VMFS_ANY_SPACE; 1605 if (find_space >> 8 != 0) { 1606 KASSERT((find_space & 0xff) == 0, ("bad VMFS flags")); 1607 alignment = (vm_offset_t)1 << (find_space >> 8); 1608 } else 1609 alignment = 0; 1610 en_aslr = (map->flags & MAP_ASLR) != 0; 1611 update_anon = cluster = clustering_anon_allowed(*addr) && 1612 (map->flags & MAP_IS_SUB_MAP) == 0 && max_addr == 0 && 1613 find_space != VMFS_NO_SPACE && object == NULL && 1614 (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP | 1615 MAP_STACK_GROWS_DOWN)) == 0 && prot != PROT_NONE; 1616 curr_min_addr = min_addr = *addr; 1617 if (en_aslr && min_addr == 0 && !cluster && 1618 find_space != VMFS_NO_SPACE && 1619 (map->flags & MAP_ASLR_IGNSTART) != 0) 1620 curr_min_addr = min_addr = vm_map_min(map); 1621 try = 0; 1622 vm_map_lock(map); 1623 if (cluster) { 1624 curr_min_addr = map->anon_loc; 1625 if (curr_min_addr == 0) 1626 cluster = false; 1627 } 1628 if (find_space != VMFS_NO_SPACE) { 1629 KASSERT(find_space == VMFS_ANY_SPACE || 1630 find_space == VMFS_OPTIMAL_SPACE || 1631 find_space == VMFS_SUPER_SPACE || 1632 alignment != 0, ("unexpected VMFS flag")); 1633 again: 1634 /* 1635 * When creating an anonymous mapping, try clustering 1636 * with an existing anonymous mapping first. 1637 * 1638 * We make up to two attempts to find address space 1639 * for a given find_space value. The first attempt may 1640 * apply randomization or may cluster with an existing 1641 * anonymous mapping. If this first attempt fails, 1642 * perform a first-fit search of the available address 1643 * space. 1644 * 1645 * If all tries failed, and find_space is 1646 * VMFS_OPTIMAL_SPACE, fallback to VMFS_ANY_SPACE. 1647 * Again enable clustering and randomization. 1648 */ 1649 try++; 1650 MPASS(try <= 2); 1651 1652 if (try == 2) { 1653 /* 1654 * Second try: we failed either to find a 1655 * suitable region for randomizing the 1656 * allocation, or to cluster with an existing 1657 * mapping. Retry with free run. 1658 */ 1659 curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ? 1660 vm_map_min(map) : min_addr; 1661 atomic_add_long(&aslr_restarts, 1); 1662 } 1663 1664 if (try == 1 && en_aslr && !cluster) { 1665 /* 1666 * Find space for allocation, including 1667 * gap needed for later randomization. 1668 */ 1669 pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 && 1670 (find_space == VMFS_SUPER_SPACE || find_space == 1671 VMFS_OPTIMAL_SPACE) ? 1 : 0; 1672 gap = vm_map_max(map) > MAP_32BIT_MAX_ADDR && 1673 (max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ? 1674 aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx]; 1675 if (vm_map_findspace(map, curr_min_addr, length + 1676 gap * pagesizes[pidx], addr) || 1677 (max_addr != 0 && *addr + length > max_addr)) 1678 goto again; 1679 /* And randomize the start address. */ 1680 *addr += (arc4random() % gap) * pagesizes[pidx]; 1681 } else if (vm_map_findspace(map, curr_min_addr, length, addr) || 1682 (max_addr != 0 && *addr + length > max_addr)) { 1683 if (cluster) { 1684 cluster = false; 1685 MPASS(try == 1); 1686 goto again; 1687 } 1688 rv = KERN_NO_SPACE; 1689 goto done; 1690 } 1691 1692 if (find_space != VMFS_ANY_SPACE && 1693 (rv = vm_map_alignspace(map, object, offset, addr, length, 1694 max_addr, alignment)) != KERN_SUCCESS) { 1695 if (find_space == VMFS_OPTIMAL_SPACE) { 1696 find_space = VMFS_ANY_SPACE; 1697 curr_min_addr = min_addr; 1698 cluster = update_anon; 1699 try = 0; 1700 goto again; 1701 } 1702 goto done; 1703 } 1704 } else if ((cow & MAP_REMAP) != 0) { 1705 if (*addr < vm_map_min(map) || 1706 *addr + length > vm_map_max(map) || 1707 *addr + length <= length) { 1708 rv = KERN_INVALID_ADDRESS; 1709 goto done; 1710 } 1711 vm_map_delete(map, *addr, *addr + length); 1712 } 1713 if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) { 1714 rv = vm_map_stack_locked(map, *addr, length, sgrowsiz, prot, 1715 max, cow); 1716 } else { 1717 rv = vm_map_insert(map, object, offset, *addr, *addr + length, 1718 prot, max, cow); 1719 } 1720 if (rv == KERN_SUCCESS && update_anon) 1721 map->anon_loc = *addr + length; 1722 done: 1723 vm_map_unlock(map); 1724 return (rv); 1725 } 1726 1727 /* 1728 * vm_map_find_min() is a variant of vm_map_find() that takes an 1729 * additional parameter (min_addr) and treats the given address 1730 * (*addr) differently. Specifically, it treats *addr as a hint 1731 * and not as the minimum address where the mapping is created. 1732 * 1733 * This function works in two phases. First, it tries to 1734 * allocate above the hint. If that fails and the hint is 1735 * greater than min_addr, it performs a second pass, replacing 1736 * the hint with min_addr as the minimum address for the 1737 * allocation. 1738 */ 1739 int 1740 vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 1741 vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr, 1742 vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max, 1743 int cow) 1744 { 1745 vm_offset_t hint; 1746 int rv; 1747 1748 hint = *addr; 1749 for (;;) { 1750 rv = vm_map_find(map, object, offset, addr, length, max_addr, 1751 find_space, prot, max, cow); 1752 if (rv == KERN_SUCCESS || min_addr >= hint) 1753 return (rv); 1754 *addr = hint = min_addr; 1755 } 1756 } 1757 1758 /* 1759 * A map entry with any of the following flags set must not be merged with 1760 * another entry. 1761 */ 1762 #define MAP_ENTRY_NOMERGE_MASK (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP | \ 1763 MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_IS_SUB_MAP) 1764 1765 static bool 1766 vm_map_mergeable_neighbors(vm_map_entry_t prev, vm_map_entry_t entry) 1767 { 1768 1769 KASSERT((prev->eflags & MAP_ENTRY_NOMERGE_MASK) == 0 || 1770 (entry->eflags & MAP_ENTRY_NOMERGE_MASK) == 0, 1771 ("vm_map_mergeable_neighbors: neither %p nor %p are mergeable", 1772 prev, entry)); 1773 return (prev->end == entry->start && 1774 prev->object.vm_object == entry->object.vm_object && 1775 (prev->object.vm_object == NULL || 1776 prev->offset + (prev->end - prev->start) == entry->offset) && 1777 prev->eflags == entry->eflags && 1778 prev->protection == entry->protection && 1779 prev->max_protection == entry->max_protection && 1780 prev->inheritance == entry->inheritance && 1781 prev->wired_count == entry->wired_count && 1782 prev->cred == entry->cred); 1783 } 1784 1785 static void 1786 vm_map_merged_neighbor_dispose(vm_map_t map, vm_map_entry_t entry) 1787 { 1788 1789 /* 1790 * If the backing object is a vnode object, vm_object_deallocate() 1791 * calls vrele(). However, vrele() does not lock the vnode because 1792 * the vnode has additional references. Thus, the map lock can be 1793 * kept without causing a lock-order reversal with the vnode lock. 1794 * 1795 * Since we count the number of virtual page mappings in 1796 * object->un_pager.vnp.writemappings, the writemappings value 1797 * should not be adjusted when the entry is disposed of. 1798 */ 1799 if (entry->object.vm_object != NULL) 1800 vm_object_deallocate(entry->object.vm_object); 1801 if (entry->cred != NULL) 1802 crfree(entry->cred); 1803 vm_map_entry_dispose(map, entry); 1804 } 1805 1806 /* 1807 * vm_map_simplify_entry: 1808 * 1809 * Simplify the given map entry by merging with either neighbor. This 1810 * routine also has the ability to merge with both neighbors. 1811 * 1812 * The map must be locked. 1813 * 1814 * This routine guarantees that the passed entry remains valid (though 1815 * possibly extended). When merging, this routine may delete one or 1816 * both neighbors. 1817 */ 1818 void 1819 vm_map_simplify_entry(vm_map_t map, vm_map_entry_t entry) 1820 { 1821 vm_map_entry_t next, prev; 1822 1823 if ((entry->eflags & MAP_ENTRY_NOMERGE_MASK) != 0) 1824 return; 1825 prev = entry->prev; 1826 if (vm_map_mergeable_neighbors(prev, entry)) { 1827 vm_map_entry_unlink(map, prev); 1828 entry->start = prev->start; 1829 entry->offset = prev->offset; 1830 if (entry->prev != &map->header) 1831 vm_map_entry_resize_free(map, entry->prev); 1832 vm_map_merged_neighbor_dispose(map, prev); 1833 } 1834 next = entry->next; 1835 if (vm_map_mergeable_neighbors(entry, next)) { 1836 vm_map_entry_unlink(map, next); 1837 entry->end = next->end; 1838 vm_map_entry_resize_free(map, entry); 1839 vm_map_merged_neighbor_dispose(map, next); 1840 } 1841 } 1842 1843 /* 1844 * vm_map_clip_start: [ internal use only ] 1845 * 1846 * Asserts that the given entry begins at or after 1847 * the specified address; if necessary, 1848 * it splits the entry into two. 1849 */ 1850 #define vm_map_clip_start(map, entry, startaddr) \ 1851 { \ 1852 if (startaddr > entry->start) \ 1853 _vm_map_clip_start(map, entry, startaddr); \ 1854 } 1855 1856 /* 1857 * This routine is called only when it is known that 1858 * the entry must be split. 1859 */ 1860 static void 1861 _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start) 1862 { 1863 vm_map_entry_t new_entry; 1864 1865 VM_MAP_ASSERT_LOCKED(map); 1866 KASSERT(entry->end > start && entry->start < start, 1867 ("_vm_map_clip_start: invalid clip of entry %p", entry)); 1868 1869 /* 1870 * Split off the front portion -- note that we must insert the new 1871 * entry BEFORE this one, so that this entry has the specified 1872 * starting address. 1873 */ 1874 vm_map_simplify_entry(map, entry); 1875 1876 /* 1877 * If there is no object backing this entry, we might as well create 1878 * one now. If we defer it, an object can get created after the map 1879 * is clipped, and individual objects will be created for the split-up 1880 * map. This is a bit of a hack, but is also about the best place to 1881 * put this improvement. 1882 */ 1883 if (entry->object.vm_object == NULL && !map->system_map && 1884 (entry->eflags & MAP_ENTRY_GUARD) == 0) { 1885 vm_object_t object; 1886 object = vm_object_allocate(OBJT_DEFAULT, 1887 atop(entry->end - entry->start)); 1888 entry->object.vm_object = object; 1889 entry->offset = 0; 1890 if (entry->cred != NULL) { 1891 object->cred = entry->cred; 1892 object->charge = entry->end - entry->start; 1893 entry->cred = NULL; 1894 } 1895 } else if (entry->object.vm_object != NULL && 1896 ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) && 1897 entry->cred != NULL) { 1898 VM_OBJECT_WLOCK(entry->object.vm_object); 1899 KASSERT(entry->object.vm_object->cred == NULL, 1900 ("OVERCOMMIT: vm_entry_clip_start: both cred e %p", entry)); 1901 entry->object.vm_object->cred = entry->cred; 1902 entry->object.vm_object->charge = entry->end - entry->start; 1903 VM_OBJECT_WUNLOCK(entry->object.vm_object); 1904 entry->cred = NULL; 1905 } 1906 1907 new_entry = vm_map_entry_create(map); 1908 *new_entry = *entry; 1909 1910 new_entry->end = start; 1911 entry->offset += (start - entry->start); 1912 entry->start = start; 1913 if (new_entry->cred != NULL) 1914 crhold(entry->cred); 1915 1916 vm_map_entry_link(map, entry->prev, new_entry); 1917 1918 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 1919 vm_object_reference(new_entry->object.vm_object); 1920 /* 1921 * The object->un_pager.vnp.writemappings for the 1922 * object of MAP_ENTRY_VN_WRITECNT type entry shall be 1923 * kept as is here. The virtual pages are 1924 * re-distributed among the clipped entries, so the sum is 1925 * left the same. 1926 */ 1927 } 1928 } 1929 1930 /* 1931 * vm_map_clip_end: [ internal use only ] 1932 * 1933 * Asserts that the given entry ends at or before 1934 * the specified address; if necessary, 1935 * it splits the entry into two. 1936 */ 1937 #define vm_map_clip_end(map, entry, endaddr) \ 1938 { \ 1939 if ((endaddr) < (entry->end)) \ 1940 _vm_map_clip_end((map), (entry), (endaddr)); \ 1941 } 1942 1943 /* 1944 * This routine is called only when it is known that 1945 * the entry must be split. 1946 */ 1947 static void 1948 _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end) 1949 { 1950 vm_map_entry_t new_entry; 1951 1952 VM_MAP_ASSERT_LOCKED(map); 1953 KASSERT(entry->start < end && entry->end > end, 1954 ("_vm_map_clip_end: invalid clip of entry %p", entry)); 1955 1956 /* 1957 * If there is no object backing this entry, we might as well create 1958 * one now. If we defer it, an object can get created after the map 1959 * is clipped, and individual objects will be created for the split-up 1960 * map. This is a bit of a hack, but is also about the best place to 1961 * put this improvement. 1962 */ 1963 if (entry->object.vm_object == NULL && !map->system_map && 1964 (entry->eflags & MAP_ENTRY_GUARD) == 0) { 1965 vm_object_t object; 1966 object = vm_object_allocate(OBJT_DEFAULT, 1967 atop(entry->end - entry->start)); 1968 entry->object.vm_object = object; 1969 entry->offset = 0; 1970 if (entry->cred != NULL) { 1971 object->cred = entry->cred; 1972 object->charge = entry->end - entry->start; 1973 entry->cred = NULL; 1974 } 1975 } else if (entry->object.vm_object != NULL && 1976 ((entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) && 1977 entry->cred != NULL) { 1978 VM_OBJECT_WLOCK(entry->object.vm_object); 1979 KASSERT(entry->object.vm_object->cred == NULL, 1980 ("OVERCOMMIT: vm_entry_clip_end: both cred e %p", entry)); 1981 entry->object.vm_object->cred = entry->cred; 1982 entry->object.vm_object->charge = entry->end - entry->start; 1983 VM_OBJECT_WUNLOCK(entry->object.vm_object); 1984 entry->cred = NULL; 1985 } 1986 1987 /* 1988 * Create a new entry and insert it AFTER the specified entry 1989 */ 1990 new_entry = vm_map_entry_create(map); 1991 *new_entry = *entry; 1992 1993 new_entry->start = entry->end = end; 1994 new_entry->offset += (end - entry->start); 1995 if (new_entry->cred != NULL) 1996 crhold(entry->cred); 1997 1998 vm_map_entry_link(map, entry, new_entry); 1999 2000 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) { 2001 vm_object_reference(new_entry->object.vm_object); 2002 } 2003 } 2004 2005 /* 2006 * vm_map_submap: [ kernel use only ] 2007 * 2008 * Mark the given range as handled by a subordinate map. 2009 * 2010 * This range must have been created with vm_map_find, 2011 * and no other operations may have been performed on this 2012 * range prior to calling vm_map_submap. 2013 * 2014 * Only a limited number of operations can be performed 2015 * within this rage after calling vm_map_submap: 2016 * vm_fault 2017 * [Don't try vm_map_copy!] 2018 * 2019 * To remove a submapping, one must first remove the 2020 * range from the superior map, and then destroy the 2021 * submap (if desired). [Better yet, don't try it.] 2022 */ 2023 int 2024 vm_map_submap( 2025 vm_map_t map, 2026 vm_offset_t start, 2027 vm_offset_t end, 2028 vm_map_t submap) 2029 { 2030 vm_map_entry_t entry; 2031 int result; 2032 2033 result = KERN_INVALID_ARGUMENT; 2034 2035 vm_map_lock(submap); 2036 submap->flags |= MAP_IS_SUB_MAP; 2037 vm_map_unlock(submap); 2038 2039 vm_map_lock(map); 2040 2041 VM_MAP_RANGE_CHECK(map, start, end); 2042 2043 if (vm_map_lookup_entry(map, start, &entry)) { 2044 vm_map_clip_start(map, entry, start); 2045 } else 2046 entry = entry->next; 2047 2048 vm_map_clip_end(map, entry, end); 2049 2050 if ((entry->start == start) && (entry->end == end) && 2051 ((entry->eflags & MAP_ENTRY_COW) == 0) && 2052 (entry->object.vm_object == NULL)) { 2053 entry->object.sub_map = submap; 2054 entry->eflags |= MAP_ENTRY_IS_SUB_MAP; 2055 result = KERN_SUCCESS; 2056 } 2057 vm_map_unlock(map); 2058 2059 if (result != KERN_SUCCESS) { 2060 vm_map_lock(submap); 2061 submap->flags &= ~MAP_IS_SUB_MAP; 2062 vm_map_unlock(submap); 2063 } 2064 return (result); 2065 } 2066 2067 /* 2068 * The maximum number of pages to map if MAP_PREFAULT_PARTIAL is specified 2069 */ 2070 #define MAX_INIT_PT 96 2071 2072 /* 2073 * vm_map_pmap_enter: 2074 * 2075 * Preload the specified map's pmap with mappings to the specified 2076 * object's memory-resident pages. No further physical pages are 2077 * allocated, and no further virtual pages are retrieved from secondary 2078 * storage. If the specified flags include MAP_PREFAULT_PARTIAL, then a 2079 * limited number of page mappings are created at the low-end of the 2080 * specified address range. (For this purpose, a superpage mapping 2081 * counts as one page mapping.) Otherwise, all resident pages within 2082 * the specified address range are mapped. 2083 */ 2084 static void 2085 vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot, 2086 vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags) 2087 { 2088 vm_offset_t start; 2089 vm_page_t p, p_start; 2090 vm_pindex_t mask, psize, threshold, tmpidx; 2091 2092 if ((prot & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 || object == NULL) 2093 return; 2094 VM_OBJECT_RLOCK(object); 2095 if (object->type == OBJT_DEVICE || object->type == OBJT_SG) { 2096 VM_OBJECT_RUNLOCK(object); 2097 VM_OBJECT_WLOCK(object); 2098 if (object->type == OBJT_DEVICE || object->type == OBJT_SG) { 2099 pmap_object_init_pt(map->pmap, addr, object, pindex, 2100 size); 2101 VM_OBJECT_WUNLOCK(object); 2102 return; 2103 } 2104 VM_OBJECT_LOCK_DOWNGRADE(object); 2105 } 2106 2107 psize = atop(size); 2108 if (psize + pindex > object->size) { 2109 if (object->size < pindex) { 2110 VM_OBJECT_RUNLOCK(object); 2111 return; 2112 } 2113 psize = object->size - pindex; 2114 } 2115 2116 start = 0; 2117 p_start = NULL; 2118 threshold = MAX_INIT_PT; 2119 2120 p = vm_page_find_least(object, pindex); 2121 /* 2122 * Assert: the variable p is either (1) the page with the 2123 * least pindex greater than or equal to the parameter pindex 2124 * or (2) NULL. 2125 */ 2126 for (; 2127 p != NULL && (tmpidx = p->pindex - pindex) < psize; 2128 p = TAILQ_NEXT(p, listq)) { 2129 /* 2130 * don't allow an madvise to blow away our really 2131 * free pages allocating pv entries. 2132 */ 2133 if (((flags & MAP_PREFAULT_MADVISE) != 0 && 2134 vm_page_count_severe()) || 2135 ((flags & MAP_PREFAULT_PARTIAL) != 0 && 2136 tmpidx >= threshold)) { 2137 psize = tmpidx; 2138 break; 2139 } 2140 if (p->valid == VM_PAGE_BITS_ALL) { 2141 if (p_start == NULL) { 2142 start = addr + ptoa(tmpidx); 2143 p_start = p; 2144 } 2145 /* Jump ahead if a superpage mapping is possible. */ 2146 if (p->psind > 0 && ((addr + ptoa(tmpidx)) & 2147 (pagesizes[p->psind] - 1)) == 0) { 2148 mask = atop(pagesizes[p->psind]) - 1; 2149 if (tmpidx + mask < psize && 2150 vm_page_ps_test(p, PS_ALL_VALID, NULL)) { 2151 p += mask; 2152 threshold += mask; 2153 } 2154 } 2155 } else if (p_start != NULL) { 2156 pmap_enter_object(map->pmap, start, addr + 2157 ptoa(tmpidx), p_start, prot); 2158 p_start = NULL; 2159 } 2160 } 2161 if (p_start != NULL) 2162 pmap_enter_object(map->pmap, start, addr + ptoa(psize), 2163 p_start, prot); 2164 VM_OBJECT_RUNLOCK(object); 2165 } 2166 2167 /* 2168 * vm_map_protect: 2169 * 2170 * Sets the protection of the specified address 2171 * region in the target map. If "set_max" is 2172 * specified, the maximum protection is to be set; 2173 * otherwise, only the current protection is affected. 2174 */ 2175 int 2176 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, 2177 vm_prot_t new_prot, boolean_t set_max) 2178 { 2179 vm_map_entry_t current, entry; 2180 vm_object_t obj; 2181 struct ucred *cred; 2182 vm_prot_t old_prot; 2183 2184 if (start == end) 2185 return (KERN_SUCCESS); 2186 2187 vm_map_lock(map); 2188 2189 /* 2190 * Ensure that we are not concurrently wiring pages. vm_map_wire() may 2191 * need to fault pages into the map and will drop the map lock while 2192 * doing so, and the VM object may end up in an inconsistent state if we 2193 * update the protection on the map entry in between faults. 2194 */ 2195 vm_map_wait_busy(map); 2196 2197 VM_MAP_RANGE_CHECK(map, start, end); 2198 2199 if (vm_map_lookup_entry(map, start, &entry)) { 2200 vm_map_clip_start(map, entry, start); 2201 } else { 2202 entry = entry->next; 2203 } 2204 2205 /* 2206 * Make a first pass to check for protection violations. 2207 */ 2208 for (current = entry; current->start < end; current = current->next) { 2209 if ((current->eflags & MAP_ENTRY_GUARD) != 0) 2210 continue; 2211 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 2212 vm_map_unlock(map); 2213 return (KERN_INVALID_ARGUMENT); 2214 } 2215 if ((new_prot & current->max_protection) != new_prot) { 2216 vm_map_unlock(map); 2217 return (KERN_PROTECTION_FAILURE); 2218 } 2219 } 2220 2221 /* 2222 * Do an accounting pass for private read-only mappings that 2223 * now will do cow due to allowed write (e.g. debugger sets 2224 * breakpoint on text segment) 2225 */ 2226 for (current = entry; current->start < end; current = current->next) { 2227 2228 vm_map_clip_end(map, current, end); 2229 2230 if (set_max || 2231 ((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 || 2232 ENTRY_CHARGED(current) || 2233 (current->eflags & MAP_ENTRY_GUARD) != 0) { 2234 continue; 2235 } 2236 2237 cred = curthread->td_ucred; 2238 obj = current->object.vm_object; 2239 2240 if (obj == NULL || (current->eflags & MAP_ENTRY_NEEDS_COPY)) { 2241 if (!swap_reserve(current->end - current->start)) { 2242 vm_map_unlock(map); 2243 return (KERN_RESOURCE_SHORTAGE); 2244 } 2245 crhold(cred); 2246 current->cred = cred; 2247 continue; 2248 } 2249 2250 VM_OBJECT_WLOCK(obj); 2251 if (obj->type != OBJT_DEFAULT && obj->type != OBJT_SWAP) { 2252 VM_OBJECT_WUNLOCK(obj); 2253 continue; 2254 } 2255 2256 /* 2257 * Charge for the whole object allocation now, since 2258 * we cannot distinguish between non-charged and 2259 * charged clipped mapping of the same object later. 2260 */ 2261 KASSERT(obj->charge == 0, 2262 ("vm_map_protect: object %p overcharged (entry %p)", 2263 obj, current)); 2264 if (!swap_reserve(ptoa(obj->size))) { 2265 VM_OBJECT_WUNLOCK(obj); 2266 vm_map_unlock(map); 2267 return (KERN_RESOURCE_SHORTAGE); 2268 } 2269 2270 crhold(cred); 2271 obj->cred = cred; 2272 obj->charge = ptoa(obj->size); 2273 VM_OBJECT_WUNLOCK(obj); 2274 } 2275 2276 /* 2277 * Go back and fix up protections. [Note that clipping is not 2278 * necessary the second time.] 2279 */ 2280 for (current = entry; current->start < end; current = current->next) { 2281 if ((current->eflags & MAP_ENTRY_GUARD) != 0) 2282 continue; 2283 2284 old_prot = current->protection; 2285 2286 if (set_max) 2287 current->protection = 2288 (current->max_protection = new_prot) & 2289 old_prot; 2290 else 2291 current->protection = new_prot; 2292 2293 /* 2294 * For user wired map entries, the normal lazy evaluation of 2295 * write access upgrades through soft page faults is 2296 * undesirable. Instead, immediately copy any pages that are 2297 * copy-on-write and enable write access in the physical map. 2298 */ 2299 if ((current->eflags & MAP_ENTRY_USER_WIRED) != 0 && 2300 (current->protection & VM_PROT_WRITE) != 0 && 2301 (old_prot & VM_PROT_WRITE) == 0) 2302 vm_fault_copy_entry(map, map, current, current, NULL); 2303 2304 /* 2305 * When restricting access, update the physical map. Worry 2306 * about copy-on-write here. 2307 */ 2308 if ((old_prot & ~current->protection) != 0) { 2309 #define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ 2310 VM_PROT_ALL) 2311 pmap_protect(map->pmap, current->start, 2312 current->end, 2313 current->protection & MASK(current)); 2314 #undef MASK 2315 } 2316 vm_map_simplify_entry(map, current); 2317 } 2318 vm_map_unlock(map); 2319 return (KERN_SUCCESS); 2320 } 2321 2322 /* 2323 * vm_map_madvise: 2324 * 2325 * This routine traverses a processes map handling the madvise 2326 * system call. Advisories are classified as either those effecting 2327 * the vm_map_entry structure, or those effecting the underlying 2328 * objects. 2329 */ 2330 int 2331 vm_map_madvise( 2332 vm_map_t map, 2333 vm_offset_t start, 2334 vm_offset_t end, 2335 int behav) 2336 { 2337 vm_map_entry_t current, entry; 2338 bool modify_map; 2339 2340 /* 2341 * Some madvise calls directly modify the vm_map_entry, in which case 2342 * we need to use an exclusive lock on the map and we need to perform 2343 * various clipping operations. Otherwise we only need a read-lock 2344 * on the map. 2345 */ 2346 switch(behav) { 2347 case MADV_NORMAL: 2348 case MADV_SEQUENTIAL: 2349 case MADV_RANDOM: 2350 case MADV_NOSYNC: 2351 case MADV_AUTOSYNC: 2352 case MADV_NOCORE: 2353 case MADV_CORE: 2354 if (start == end) 2355 return (0); 2356 modify_map = true; 2357 vm_map_lock(map); 2358 break; 2359 case MADV_WILLNEED: 2360 case MADV_DONTNEED: 2361 case MADV_FREE: 2362 if (start == end) 2363 return (0); 2364 modify_map = false; 2365 vm_map_lock_read(map); 2366 break; 2367 default: 2368 return (EINVAL); 2369 } 2370 2371 /* 2372 * Locate starting entry and clip if necessary. 2373 */ 2374 VM_MAP_RANGE_CHECK(map, start, end); 2375 2376 if (vm_map_lookup_entry(map, start, &entry)) { 2377 if (modify_map) 2378 vm_map_clip_start(map, entry, start); 2379 } else { 2380 entry = entry->next; 2381 } 2382 2383 if (modify_map) { 2384 /* 2385 * madvise behaviors that are implemented in the vm_map_entry. 2386 * 2387 * We clip the vm_map_entry so that behavioral changes are 2388 * limited to the specified address range. 2389 */ 2390 for (current = entry; current->start < end; 2391 current = current->next) { 2392 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 2393 continue; 2394 2395 vm_map_clip_end(map, current, end); 2396 2397 switch (behav) { 2398 case MADV_NORMAL: 2399 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_NORMAL); 2400 break; 2401 case MADV_SEQUENTIAL: 2402 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_SEQUENTIAL); 2403 break; 2404 case MADV_RANDOM: 2405 vm_map_entry_set_behavior(current, MAP_ENTRY_BEHAV_RANDOM); 2406 break; 2407 case MADV_NOSYNC: 2408 current->eflags |= MAP_ENTRY_NOSYNC; 2409 break; 2410 case MADV_AUTOSYNC: 2411 current->eflags &= ~MAP_ENTRY_NOSYNC; 2412 break; 2413 case MADV_NOCORE: 2414 current->eflags |= MAP_ENTRY_NOCOREDUMP; 2415 break; 2416 case MADV_CORE: 2417 current->eflags &= ~MAP_ENTRY_NOCOREDUMP; 2418 break; 2419 default: 2420 break; 2421 } 2422 vm_map_simplify_entry(map, current); 2423 } 2424 vm_map_unlock(map); 2425 } else { 2426 vm_pindex_t pstart, pend; 2427 2428 /* 2429 * madvise behaviors that are implemented in the underlying 2430 * vm_object. 2431 * 2432 * Since we don't clip the vm_map_entry, we have to clip 2433 * the vm_object pindex and count. 2434 */ 2435 for (current = entry; current->start < end; 2436 current = current->next) { 2437 vm_offset_t useEnd, useStart; 2438 2439 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) 2440 continue; 2441 2442 pstart = OFF_TO_IDX(current->offset); 2443 pend = pstart + atop(current->end - current->start); 2444 useStart = current->start; 2445 useEnd = current->end; 2446 2447 if (current->start < start) { 2448 pstart += atop(start - current->start); 2449 useStart = start; 2450 } 2451 if (current->end > end) { 2452 pend -= atop(current->end - end); 2453 useEnd = end; 2454 } 2455 2456 if (pstart >= pend) 2457 continue; 2458 2459 /* 2460 * Perform the pmap_advise() before clearing 2461 * PGA_REFERENCED in vm_page_advise(). Otherwise, a 2462 * concurrent pmap operation, such as pmap_remove(), 2463 * could clear a reference in the pmap and set 2464 * PGA_REFERENCED on the page before the pmap_advise() 2465 * had completed. Consequently, the page would appear 2466 * referenced based upon an old reference that 2467 * occurred before this pmap_advise() ran. 2468 */ 2469 if (behav == MADV_DONTNEED || behav == MADV_FREE) 2470 pmap_advise(map->pmap, useStart, useEnd, 2471 behav); 2472 2473 vm_object_madvise(current->object.vm_object, pstart, 2474 pend, behav); 2475 2476 /* 2477 * Pre-populate paging structures in the 2478 * WILLNEED case. For wired entries, the 2479 * paging structures are already populated. 2480 */ 2481 if (behav == MADV_WILLNEED && 2482 current->wired_count == 0) { 2483 vm_map_pmap_enter(map, 2484 useStart, 2485 current->protection, 2486 current->object.vm_object, 2487 pstart, 2488 ptoa(pend - pstart), 2489 MAP_PREFAULT_MADVISE 2490 ); 2491 } 2492 } 2493 vm_map_unlock_read(map); 2494 } 2495 return (0); 2496 } 2497 2498 2499 /* 2500 * vm_map_inherit: 2501 * 2502 * Sets the inheritance of the specified address 2503 * range in the target map. Inheritance 2504 * affects how the map will be shared with 2505 * child maps at the time of vmspace_fork. 2506 */ 2507 int 2508 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, 2509 vm_inherit_t new_inheritance) 2510 { 2511 vm_map_entry_t entry; 2512 vm_map_entry_t temp_entry; 2513 2514 switch (new_inheritance) { 2515 case VM_INHERIT_NONE: 2516 case VM_INHERIT_COPY: 2517 case VM_INHERIT_SHARE: 2518 case VM_INHERIT_ZERO: 2519 break; 2520 default: 2521 return (KERN_INVALID_ARGUMENT); 2522 } 2523 if (start == end) 2524 return (KERN_SUCCESS); 2525 vm_map_lock(map); 2526 VM_MAP_RANGE_CHECK(map, start, end); 2527 if (vm_map_lookup_entry(map, start, &temp_entry)) { 2528 entry = temp_entry; 2529 vm_map_clip_start(map, entry, start); 2530 } else 2531 entry = temp_entry->next; 2532 while (entry->start < end) { 2533 vm_map_clip_end(map, entry, end); 2534 if ((entry->eflags & MAP_ENTRY_GUARD) == 0 || 2535 new_inheritance != VM_INHERIT_ZERO) 2536 entry->inheritance = new_inheritance; 2537 vm_map_simplify_entry(map, entry); 2538 entry = entry->next; 2539 } 2540 vm_map_unlock(map); 2541 return (KERN_SUCCESS); 2542 } 2543 2544 /* 2545 * vm_map_unwire: 2546 * 2547 * Implements both kernel and user unwiring. 2548 */ 2549 int 2550 vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, 2551 int flags) 2552 { 2553 vm_map_entry_t entry, first_entry, tmp_entry; 2554 vm_offset_t saved_start; 2555 unsigned int last_timestamp; 2556 int rv; 2557 boolean_t need_wakeup, result, user_unwire; 2558 2559 if (start == end) 2560 return (KERN_SUCCESS); 2561 user_unwire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE; 2562 vm_map_lock(map); 2563 VM_MAP_RANGE_CHECK(map, start, end); 2564 if (!vm_map_lookup_entry(map, start, &first_entry)) { 2565 if (flags & VM_MAP_WIRE_HOLESOK) 2566 first_entry = first_entry->next; 2567 else { 2568 vm_map_unlock(map); 2569 return (KERN_INVALID_ADDRESS); 2570 } 2571 } 2572 last_timestamp = map->timestamp; 2573 entry = first_entry; 2574 while (entry->start < end) { 2575 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 2576 /* 2577 * We have not yet clipped the entry. 2578 */ 2579 saved_start = (start >= entry->start) ? start : 2580 entry->start; 2581 entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 2582 if (vm_map_unlock_and_wait(map, 0)) { 2583 /* 2584 * Allow interruption of user unwiring? 2585 */ 2586 } 2587 vm_map_lock(map); 2588 if (last_timestamp+1 != map->timestamp) { 2589 /* 2590 * Look again for the entry because the map was 2591 * modified while it was unlocked. 2592 * Specifically, the entry may have been 2593 * clipped, merged, or deleted. 2594 */ 2595 if (!vm_map_lookup_entry(map, saved_start, 2596 &tmp_entry)) { 2597 if (flags & VM_MAP_WIRE_HOLESOK) 2598 tmp_entry = tmp_entry->next; 2599 else { 2600 if (saved_start == start) { 2601 /* 2602 * First_entry has been deleted. 2603 */ 2604 vm_map_unlock(map); 2605 return (KERN_INVALID_ADDRESS); 2606 } 2607 end = saved_start; 2608 rv = KERN_INVALID_ADDRESS; 2609 goto done; 2610 } 2611 } 2612 if (entry == first_entry) 2613 first_entry = tmp_entry; 2614 else 2615 first_entry = NULL; 2616 entry = tmp_entry; 2617 } 2618 last_timestamp = map->timestamp; 2619 continue; 2620 } 2621 vm_map_clip_start(map, entry, start); 2622 vm_map_clip_end(map, entry, end); 2623 /* 2624 * Mark the entry in case the map lock is released. (See 2625 * above.) 2626 */ 2627 KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 && 2628 entry->wiring_thread == NULL, 2629 ("owned map entry %p", entry)); 2630 entry->eflags |= MAP_ENTRY_IN_TRANSITION; 2631 entry->wiring_thread = curthread; 2632 /* 2633 * Check the map for holes in the specified region. 2634 * If VM_MAP_WIRE_HOLESOK was specified, skip this check. 2635 */ 2636 if (((flags & VM_MAP_WIRE_HOLESOK) == 0) && 2637 (entry->end < end && entry->next->start > entry->end)) { 2638 end = entry->end; 2639 rv = KERN_INVALID_ADDRESS; 2640 goto done; 2641 } 2642 /* 2643 * If system unwiring, require that the entry is system wired. 2644 */ 2645 if (!user_unwire && 2646 vm_map_entry_system_wired_count(entry) == 0) { 2647 end = entry->end; 2648 rv = KERN_INVALID_ARGUMENT; 2649 goto done; 2650 } 2651 entry = entry->next; 2652 } 2653 rv = KERN_SUCCESS; 2654 done: 2655 need_wakeup = FALSE; 2656 if (first_entry == NULL) { 2657 result = vm_map_lookup_entry(map, start, &first_entry); 2658 if (!result && (flags & VM_MAP_WIRE_HOLESOK)) 2659 first_entry = first_entry->next; 2660 else 2661 KASSERT(result, ("vm_map_unwire: lookup failed")); 2662 } 2663 for (entry = first_entry; entry->start < end; entry = entry->next) { 2664 /* 2665 * If VM_MAP_WIRE_HOLESOK was specified, an empty 2666 * space in the unwired region could have been mapped 2667 * while the map lock was dropped for draining 2668 * MAP_ENTRY_IN_TRANSITION. Moreover, another thread 2669 * could be simultaneously wiring this new mapping 2670 * entry. Detect these cases and skip any entries 2671 * marked as in transition by us. 2672 */ 2673 if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 || 2674 entry->wiring_thread != curthread) { 2675 KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0, 2676 ("vm_map_unwire: !HOLESOK and new/changed entry")); 2677 continue; 2678 } 2679 2680 if (rv == KERN_SUCCESS && (!user_unwire || 2681 (entry->eflags & MAP_ENTRY_USER_WIRED))) { 2682 if (user_unwire) 2683 entry->eflags &= ~MAP_ENTRY_USER_WIRED; 2684 if (entry->wired_count == 1) 2685 vm_map_entry_unwire(map, entry); 2686 else 2687 entry->wired_count--; 2688 } 2689 KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, 2690 ("vm_map_unwire: in-transition flag missing %p", entry)); 2691 KASSERT(entry->wiring_thread == curthread, 2692 ("vm_map_unwire: alien wire %p", entry)); 2693 entry->eflags &= ~MAP_ENTRY_IN_TRANSITION; 2694 entry->wiring_thread = NULL; 2695 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { 2696 entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; 2697 need_wakeup = TRUE; 2698 } 2699 vm_map_simplify_entry(map, entry); 2700 } 2701 vm_map_unlock(map); 2702 if (need_wakeup) 2703 vm_map_wakeup(map); 2704 return (rv); 2705 } 2706 2707 /* 2708 * vm_map_wire_entry_failure: 2709 * 2710 * Handle a wiring failure on the given entry. 2711 * 2712 * The map should be locked. 2713 */ 2714 static void 2715 vm_map_wire_entry_failure(vm_map_t map, vm_map_entry_t entry, 2716 vm_offset_t failed_addr) 2717 { 2718 2719 VM_MAP_ASSERT_LOCKED(map); 2720 KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 && 2721 entry->wired_count == 1, 2722 ("vm_map_wire_entry_failure: entry %p isn't being wired", entry)); 2723 KASSERT(failed_addr < entry->end, 2724 ("vm_map_wire_entry_failure: entry %p was fully wired", entry)); 2725 2726 /* 2727 * If any pages at the start of this entry were successfully wired, 2728 * then unwire them. 2729 */ 2730 if (failed_addr > entry->start) { 2731 pmap_unwire(map->pmap, entry->start, failed_addr); 2732 vm_object_unwire(entry->object.vm_object, entry->offset, 2733 failed_addr - entry->start, PQ_ACTIVE); 2734 } 2735 2736 /* 2737 * Assign an out-of-range value to represent the failure to wire this 2738 * entry. 2739 */ 2740 entry->wired_count = -1; 2741 } 2742 2743 /* 2744 * vm_map_wire: 2745 * 2746 * Implements both kernel and user wiring. 2747 */ 2748 int 2749 vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, 2750 int flags) 2751 { 2752 vm_map_entry_t entry, first_entry, tmp_entry; 2753 vm_offset_t faddr, saved_end, saved_start; 2754 unsigned int last_timestamp; 2755 int rv; 2756 boolean_t need_wakeup, result, user_wire; 2757 vm_prot_t prot; 2758 2759 if (start == end) 2760 return (KERN_SUCCESS); 2761 prot = 0; 2762 if (flags & VM_MAP_WIRE_WRITE) 2763 prot |= VM_PROT_WRITE; 2764 user_wire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE; 2765 vm_map_lock(map); 2766 VM_MAP_RANGE_CHECK(map, start, end); 2767 if (!vm_map_lookup_entry(map, start, &first_entry)) { 2768 if (flags & VM_MAP_WIRE_HOLESOK) 2769 first_entry = first_entry->next; 2770 else { 2771 vm_map_unlock(map); 2772 return (KERN_INVALID_ADDRESS); 2773 } 2774 } 2775 last_timestamp = map->timestamp; 2776 entry = first_entry; 2777 while (entry->start < end) { 2778 if (entry->eflags & MAP_ENTRY_IN_TRANSITION) { 2779 /* 2780 * We have not yet clipped the entry. 2781 */ 2782 saved_start = (start >= entry->start) ? start : 2783 entry->start; 2784 entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 2785 if (vm_map_unlock_and_wait(map, 0)) { 2786 /* 2787 * Allow interruption of user wiring? 2788 */ 2789 } 2790 vm_map_lock(map); 2791 if (last_timestamp + 1 != map->timestamp) { 2792 /* 2793 * Look again for the entry because the map was 2794 * modified while it was unlocked. 2795 * Specifically, the entry may have been 2796 * clipped, merged, or deleted. 2797 */ 2798 if (!vm_map_lookup_entry(map, saved_start, 2799 &tmp_entry)) { 2800 if (flags & VM_MAP_WIRE_HOLESOK) 2801 tmp_entry = tmp_entry->next; 2802 else { 2803 if (saved_start == start) { 2804 /* 2805 * first_entry has been deleted. 2806 */ 2807 vm_map_unlock(map); 2808 return (KERN_INVALID_ADDRESS); 2809 } 2810 end = saved_start; 2811 rv = KERN_INVALID_ADDRESS; 2812 goto done; 2813 } 2814 } 2815 if (entry == first_entry) 2816 first_entry = tmp_entry; 2817 else 2818 first_entry = NULL; 2819 entry = tmp_entry; 2820 } 2821 last_timestamp = map->timestamp; 2822 continue; 2823 } 2824 vm_map_clip_start(map, entry, start); 2825 vm_map_clip_end(map, entry, end); 2826 /* 2827 * Mark the entry in case the map lock is released. (See 2828 * above.) 2829 */ 2830 KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 && 2831 entry->wiring_thread == NULL, 2832 ("owned map entry %p", entry)); 2833 entry->eflags |= MAP_ENTRY_IN_TRANSITION; 2834 entry->wiring_thread = curthread; 2835 if ((entry->protection & (VM_PROT_READ | VM_PROT_EXECUTE)) == 0 2836 || (entry->protection & prot) != prot) { 2837 entry->eflags |= MAP_ENTRY_WIRE_SKIPPED; 2838 if ((flags & VM_MAP_WIRE_HOLESOK) == 0) { 2839 end = entry->end; 2840 rv = KERN_INVALID_ADDRESS; 2841 goto done; 2842 } 2843 goto next_entry; 2844 } 2845 if (entry->wired_count == 0) { 2846 entry->wired_count++; 2847 saved_start = entry->start; 2848 saved_end = entry->end; 2849 2850 /* 2851 * Release the map lock, relying on the in-transition 2852 * mark. Mark the map busy for fork. 2853 */ 2854 vm_map_busy(map); 2855 vm_map_unlock(map); 2856 2857 faddr = saved_start; 2858 do { 2859 /* 2860 * Simulate a fault to get the page and enter 2861 * it into the physical map. 2862 */ 2863 if ((rv = vm_fault(map, faddr, VM_PROT_NONE, 2864 VM_FAULT_WIRE)) != KERN_SUCCESS) 2865 break; 2866 } while ((faddr += PAGE_SIZE) < saved_end); 2867 vm_map_lock(map); 2868 vm_map_unbusy(map); 2869 if (last_timestamp + 1 != map->timestamp) { 2870 /* 2871 * Look again for the entry because the map was 2872 * modified while it was unlocked. The entry 2873 * may have been clipped, but NOT merged or 2874 * deleted. 2875 */ 2876 result = vm_map_lookup_entry(map, saved_start, 2877 &tmp_entry); 2878 KASSERT(result, ("vm_map_wire: lookup failed")); 2879 if (entry == first_entry) 2880 first_entry = tmp_entry; 2881 else 2882 first_entry = NULL; 2883 entry = tmp_entry; 2884 while (entry->end < saved_end) { 2885 /* 2886 * In case of failure, handle entries 2887 * that were not fully wired here; 2888 * fully wired entries are handled 2889 * later. 2890 */ 2891 if (rv != KERN_SUCCESS && 2892 faddr < entry->end) 2893 vm_map_wire_entry_failure(map, 2894 entry, faddr); 2895 entry = entry->next; 2896 } 2897 } 2898 last_timestamp = map->timestamp; 2899 if (rv != KERN_SUCCESS) { 2900 vm_map_wire_entry_failure(map, entry, faddr); 2901 end = entry->end; 2902 goto done; 2903 } 2904 } else if (!user_wire || 2905 (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { 2906 entry->wired_count++; 2907 } 2908 /* 2909 * Check the map for holes in the specified region. 2910 * If VM_MAP_WIRE_HOLESOK was specified, skip this check. 2911 */ 2912 next_entry: 2913 if ((flags & VM_MAP_WIRE_HOLESOK) == 0 && 2914 entry->end < end && entry->next->start > entry->end) { 2915 end = entry->end; 2916 rv = KERN_INVALID_ADDRESS; 2917 goto done; 2918 } 2919 entry = entry->next; 2920 } 2921 rv = KERN_SUCCESS; 2922 done: 2923 need_wakeup = FALSE; 2924 if (first_entry == NULL) { 2925 result = vm_map_lookup_entry(map, start, &first_entry); 2926 if (!result && (flags & VM_MAP_WIRE_HOLESOK)) 2927 first_entry = first_entry->next; 2928 else 2929 KASSERT(result, ("vm_map_wire: lookup failed")); 2930 } 2931 for (entry = first_entry; entry->start < end; entry = entry->next) { 2932 /* 2933 * If VM_MAP_WIRE_HOLESOK was specified, an empty 2934 * space in the unwired region could have been mapped 2935 * while the map lock was dropped for faulting in the 2936 * pages or draining MAP_ENTRY_IN_TRANSITION. 2937 * Moreover, another thread could be simultaneously 2938 * wiring this new mapping entry. Detect these cases 2939 * and skip any entries marked as in transition not by us. 2940 */ 2941 if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 || 2942 entry->wiring_thread != curthread) { 2943 KASSERT((flags & VM_MAP_WIRE_HOLESOK) != 0, 2944 ("vm_map_wire: !HOLESOK and new/changed entry")); 2945 continue; 2946 } 2947 2948 if ((entry->eflags & MAP_ENTRY_WIRE_SKIPPED) != 0) 2949 goto next_entry_done; 2950 2951 if (rv == KERN_SUCCESS) { 2952 if (user_wire) 2953 entry->eflags |= MAP_ENTRY_USER_WIRED; 2954 } else if (entry->wired_count == -1) { 2955 /* 2956 * Wiring failed on this entry. Thus, unwiring is 2957 * unnecessary. 2958 */ 2959 entry->wired_count = 0; 2960 } else if (!user_wire || 2961 (entry->eflags & MAP_ENTRY_USER_WIRED) == 0) { 2962 /* 2963 * Undo the wiring. Wiring succeeded on this entry 2964 * but failed on a later entry. 2965 */ 2966 if (entry->wired_count == 1) 2967 vm_map_entry_unwire(map, entry); 2968 else 2969 entry->wired_count--; 2970 } 2971 next_entry_done: 2972 KASSERT((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0, 2973 ("vm_map_wire: in-transition flag missing %p", entry)); 2974 KASSERT(entry->wiring_thread == curthread, 2975 ("vm_map_wire: alien wire %p", entry)); 2976 entry->eflags &= ~(MAP_ENTRY_IN_TRANSITION | 2977 MAP_ENTRY_WIRE_SKIPPED); 2978 entry->wiring_thread = NULL; 2979 if (entry->eflags & MAP_ENTRY_NEEDS_WAKEUP) { 2980 entry->eflags &= ~MAP_ENTRY_NEEDS_WAKEUP; 2981 need_wakeup = TRUE; 2982 } 2983 vm_map_simplify_entry(map, entry); 2984 } 2985 vm_map_unlock(map); 2986 if (need_wakeup) 2987 vm_map_wakeup(map); 2988 return (rv); 2989 } 2990 2991 /* 2992 * vm_map_sync 2993 * 2994 * Push any dirty cached pages in the address range to their pager. 2995 * If syncio is TRUE, dirty pages are written synchronously. 2996 * If invalidate is TRUE, any cached pages are freed as well. 2997 * 2998 * If the size of the region from start to end is zero, we are 2999 * supposed to flush all modified pages within the region containing 3000 * start. Unfortunately, a region can be split or coalesced with 3001 * neighboring regions, making it difficult to determine what the 3002 * original region was. Therefore, we approximate this requirement by 3003 * flushing the current region containing start. 3004 * 3005 * Returns an error if any part of the specified range is not mapped. 3006 */ 3007 int 3008 vm_map_sync( 3009 vm_map_t map, 3010 vm_offset_t start, 3011 vm_offset_t end, 3012 boolean_t syncio, 3013 boolean_t invalidate) 3014 { 3015 vm_map_entry_t current; 3016 vm_map_entry_t entry; 3017 vm_size_t size; 3018 vm_object_t object; 3019 vm_ooffset_t offset; 3020 unsigned int last_timestamp; 3021 boolean_t failed; 3022 3023 vm_map_lock_read(map); 3024 VM_MAP_RANGE_CHECK(map, start, end); 3025 if (!vm_map_lookup_entry(map, start, &entry)) { 3026 vm_map_unlock_read(map); 3027 return (KERN_INVALID_ADDRESS); 3028 } else if (start == end) { 3029 start = entry->start; 3030 end = entry->end; 3031 } 3032 /* 3033 * Make a first pass to check for user-wired memory and holes. 3034 */ 3035 for (current = entry; current->start < end; current = current->next) { 3036 if (invalidate && (current->eflags & MAP_ENTRY_USER_WIRED)) { 3037 vm_map_unlock_read(map); 3038 return (KERN_INVALID_ARGUMENT); 3039 } 3040 if (end > current->end && 3041 current->end != current->next->start) { 3042 vm_map_unlock_read(map); 3043 return (KERN_INVALID_ADDRESS); 3044 } 3045 } 3046 3047 if (invalidate) 3048 pmap_remove(map->pmap, start, end); 3049 failed = FALSE; 3050 3051 /* 3052 * Make a second pass, cleaning/uncaching pages from the indicated 3053 * objects as we go. 3054 */ 3055 for (current = entry; current->start < end;) { 3056 offset = current->offset + (start - current->start); 3057 size = (end <= current->end ? end : current->end) - start; 3058 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 3059 vm_map_t smap; 3060 vm_map_entry_t tentry; 3061 vm_size_t tsize; 3062 3063 smap = current->object.sub_map; 3064 vm_map_lock_read(smap); 3065 (void) vm_map_lookup_entry(smap, offset, &tentry); 3066 tsize = tentry->end - offset; 3067 if (tsize < size) 3068 size = tsize; 3069 object = tentry->object.vm_object; 3070 offset = tentry->offset + (offset - tentry->start); 3071 vm_map_unlock_read(smap); 3072 } else { 3073 object = current->object.vm_object; 3074 } 3075 vm_object_reference(object); 3076 last_timestamp = map->timestamp; 3077 vm_map_unlock_read(map); 3078 if (!vm_object_sync(object, offset, size, syncio, invalidate)) 3079 failed = TRUE; 3080 start += size; 3081 vm_object_deallocate(object); 3082 vm_map_lock_read(map); 3083 if (last_timestamp == map->timestamp || 3084 !vm_map_lookup_entry(map, start, ¤t)) 3085 current = current->next; 3086 } 3087 3088 vm_map_unlock_read(map); 3089 return (failed ? KERN_FAILURE : KERN_SUCCESS); 3090 } 3091 3092 /* 3093 * vm_map_entry_unwire: [ internal use only ] 3094 * 3095 * Make the region specified by this entry pageable. 3096 * 3097 * The map in question should be locked. 3098 * [This is the reason for this routine's existence.] 3099 */ 3100 static void 3101 vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry) 3102 { 3103 3104 VM_MAP_ASSERT_LOCKED(map); 3105 KASSERT(entry->wired_count > 0, 3106 ("vm_map_entry_unwire: entry %p isn't wired", entry)); 3107 pmap_unwire(map->pmap, entry->start, entry->end); 3108 vm_object_unwire(entry->object.vm_object, entry->offset, entry->end - 3109 entry->start, PQ_ACTIVE); 3110 entry->wired_count = 0; 3111 } 3112 3113 static void 3114 vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map) 3115 { 3116 3117 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) 3118 vm_object_deallocate(entry->object.vm_object); 3119 uma_zfree(system_map ? kmapentzone : mapentzone, entry); 3120 } 3121 3122 /* 3123 * vm_map_entry_delete: [ internal use only ] 3124 * 3125 * Deallocate the given entry from the target map. 3126 */ 3127 static void 3128 vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry) 3129 { 3130 vm_object_t object; 3131 vm_pindex_t offidxstart, offidxend, count, size1; 3132 vm_size_t size; 3133 3134 vm_map_entry_unlink(map, entry); 3135 object = entry->object.vm_object; 3136 3137 if ((entry->eflags & MAP_ENTRY_GUARD) != 0) { 3138 MPASS(entry->cred == NULL); 3139 MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0); 3140 MPASS(object == NULL); 3141 vm_map_entry_deallocate(entry, map->system_map); 3142 return; 3143 } 3144 3145 size = entry->end - entry->start; 3146 map->size -= size; 3147 3148 if (entry->cred != NULL) { 3149 swap_release_by_cred(size, entry->cred); 3150 crfree(entry->cred); 3151 } 3152 3153 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 && 3154 (object != NULL)) { 3155 KASSERT(entry->cred == NULL || object->cred == NULL || 3156 (entry->eflags & MAP_ENTRY_NEEDS_COPY), 3157 ("OVERCOMMIT vm_map_entry_delete: both cred %p", entry)); 3158 count = atop(size); 3159 offidxstart = OFF_TO_IDX(entry->offset); 3160 offidxend = offidxstart + count; 3161 VM_OBJECT_WLOCK(object); 3162 if (object->ref_count != 1 && ((object->flags & (OBJ_NOSPLIT | 3163 OBJ_ONEMAPPING)) == OBJ_ONEMAPPING || 3164 object == kernel_object)) { 3165 vm_object_collapse(object); 3166 3167 /* 3168 * The option OBJPR_NOTMAPPED can be passed here 3169 * because vm_map_delete() already performed 3170 * pmap_remove() on the only mapping to this range 3171 * of pages. 3172 */ 3173 vm_object_page_remove(object, offidxstart, offidxend, 3174 OBJPR_NOTMAPPED); 3175 if (object->type == OBJT_SWAP) 3176 swap_pager_freespace(object, offidxstart, 3177 count); 3178 if (offidxend >= object->size && 3179 offidxstart < object->size) { 3180 size1 = object->size; 3181 object->size = offidxstart; 3182 if (object->cred != NULL) { 3183 size1 -= object->size; 3184 KASSERT(object->charge >= ptoa(size1), 3185 ("object %p charge < 0", object)); 3186 swap_release_by_cred(ptoa(size1), 3187 object->cred); 3188 object->charge -= ptoa(size1); 3189 } 3190 } 3191 } 3192 VM_OBJECT_WUNLOCK(object); 3193 } else 3194 entry->object.vm_object = NULL; 3195 if (map->system_map) 3196 vm_map_entry_deallocate(entry, TRUE); 3197 else { 3198 entry->next = curthread->td_map_def_user; 3199 curthread->td_map_def_user = entry; 3200 } 3201 } 3202 3203 /* 3204 * vm_map_delete: [ internal use only ] 3205 * 3206 * Deallocates the given address range from the target 3207 * map. 3208 */ 3209 int 3210 vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end) 3211 { 3212 vm_map_entry_t entry; 3213 vm_map_entry_t first_entry; 3214 3215 VM_MAP_ASSERT_LOCKED(map); 3216 if (start == end) 3217 return (KERN_SUCCESS); 3218 3219 /* 3220 * Find the start of the region, and clip it 3221 */ 3222 if (!vm_map_lookup_entry(map, start, &first_entry)) 3223 entry = first_entry->next; 3224 else { 3225 entry = first_entry; 3226 vm_map_clip_start(map, entry, start); 3227 } 3228 3229 /* 3230 * Step through all entries in this region 3231 */ 3232 while (entry->start < end) { 3233 vm_map_entry_t next; 3234 3235 /* 3236 * Wait for wiring or unwiring of an entry to complete. 3237 * Also wait for any system wirings to disappear on 3238 * user maps. 3239 */ 3240 if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) != 0 || 3241 (vm_map_pmap(map) != kernel_pmap && 3242 vm_map_entry_system_wired_count(entry) != 0)) { 3243 unsigned int last_timestamp; 3244 vm_offset_t saved_start; 3245 vm_map_entry_t tmp_entry; 3246 3247 saved_start = entry->start; 3248 entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; 3249 last_timestamp = map->timestamp; 3250 (void) vm_map_unlock_and_wait(map, 0); 3251 vm_map_lock(map); 3252 if (last_timestamp + 1 != map->timestamp) { 3253 /* 3254 * Look again for the entry because the map was 3255 * modified while it was unlocked. 3256 * Specifically, the entry may have been 3257 * clipped, merged, or deleted. 3258 */ 3259 if (!vm_map_lookup_entry(map, saved_start, 3260 &tmp_entry)) 3261 entry = tmp_entry->next; 3262 else { 3263 entry = tmp_entry; 3264 vm_map_clip_start(map, entry, 3265 saved_start); 3266 } 3267 } 3268 continue; 3269 } 3270 vm_map_clip_end(map, entry, end); 3271 3272 next = entry->next; 3273 3274 /* 3275 * Unwire before removing addresses from the pmap; otherwise, 3276 * unwiring will put the entries back in the pmap. 3277 */ 3278 if (entry->wired_count != 0) 3279 vm_map_entry_unwire(map, entry); 3280 3281 /* 3282 * Remove mappings for the pages, but only if the 3283 * mappings could exist. For instance, it does not 3284 * make sense to call pmap_remove() for guard entries. 3285 */ 3286 if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 || 3287 entry->object.vm_object != NULL) 3288 pmap_remove(map->pmap, entry->start, entry->end); 3289 3290 if (entry->end == map->anon_loc) 3291 map->anon_loc = entry->start; 3292 3293 /* 3294 * Delete the entry only after removing all pmap 3295 * entries pointing to its pages. (Otherwise, its 3296 * page frames may be reallocated, and any modify bits 3297 * will be set in the wrong object!) 3298 */ 3299 vm_map_entry_delete(map, entry); 3300 entry = next; 3301 } 3302 return (KERN_SUCCESS); 3303 } 3304 3305 /* 3306 * vm_map_remove: 3307 * 3308 * Remove the given address range from the target map. 3309 * This is the exported form of vm_map_delete. 3310 */ 3311 int 3312 vm_map_remove(vm_map_t map, vm_offset_t start, vm_offset_t end) 3313 { 3314 int result; 3315 3316 vm_map_lock(map); 3317 VM_MAP_RANGE_CHECK(map, start, end); 3318 result = vm_map_delete(map, start, end); 3319 vm_map_unlock(map); 3320 return (result); 3321 } 3322 3323 /* 3324 * vm_map_check_protection: 3325 * 3326 * Assert that the target map allows the specified privilege on the 3327 * entire address region given. The entire region must be allocated. 3328 * 3329 * WARNING! This code does not and should not check whether the 3330 * contents of the region is accessible. For example a smaller file 3331 * might be mapped into a larger address space. 3332 * 3333 * NOTE! This code is also called by munmap(). 3334 * 3335 * The map must be locked. A read lock is sufficient. 3336 */ 3337 boolean_t 3338 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, 3339 vm_prot_t protection) 3340 { 3341 vm_map_entry_t entry; 3342 vm_map_entry_t tmp_entry; 3343 3344 if (!vm_map_lookup_entry(map, start, &tmp_entry)) 3345 return (FALSE); 3346 entry = tmp_entry; 3347 3348 while (start < end) { 3349 /* 3350 * No holes allowed! 3351 */ 3352 if (start < entry->start) 3353 return (FALSE); 3354 /* 3355 * Check protection associated with entry. 3356 */ 3357 if ((entry->protection & protection) != protection) 3358 return (FALSE); 3359 /* go to next entry */ 3360 start = entry->end; 3361 entry = entry->next; 3362 } 3363 return (TRUE); 3364 } 3365 3366 /* 3367 * vm_map_copy_entry: 3368 * 3369 * Copies the contents of the source entry to the destination 3370 * entry. The entries *must* be aligned properly. 3371 */ 3372 static void 3373 vm_map_copy_entry( 3374 vm_map_t src_map, 3375 vm_map_t dst_map, 3376 vm_map_entry_t src_entry, 3377 vm_map_entry_t dst_entry, 3378 vm_ooffset_t *fork_charge) 3379 { 3380 vm_object_t src_object; 3381 vm_map_entry_t fake_entry; 3382 vm_offset_t size; 3383 struct ucred *cred; 3384 int charged; 3385 3386 VM_MAP_ASSERT_LOCKED(dst_map); 3387 3388 if ((dst_entry->eflags|src_entry->eflags) & MAP_ENTRY_IS_SUB_MAP) 3389 return; 3390 3391 if (src_entry->wired_count == 0 || 3392 (src_entry->protection & VM_PROT_WRITE) == 0) { 3393 /* 3394 * If the source entry is marked needs_copy, it is already 3395 * write-protected. 3396 */ 3397 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0 && 3398 (src_entry->protection & VM_PROT_WRITE) != 0) { 3399 pmap_protect(src_map->pmap, 3400 src_entry->start, 3401 src_entry->end, 3402 src_entry->protection & ~VM_PROT_WRITE); 3403 } 3404 3405 /* 3406 * Make a copy of the object. 3407 */ 3408 size = src_entry->end - src_entry->start; 3409 if ((src_object = src_entry->object.vm_object) != NULL) { 3410 VM_OBJECT_WLOCK(src_object); 3411 charged = ENTRY_CHARGED(src_entry); 3412 if (src_object->handle == NULL && 3413 (src_object->type == OBJT_DEFAULT || 3414 src_object->type == OBJT_SWAP)) { 3415 vm_object_collapse(src_object); 3416 if ((src_object->flags & (OBJ_NOSPLIT | 3417 OBJ_ONEMAPPING)) == OBJ_ONEMAPPING) { 3418 vm_object_split(src_entry); 3419 src_object = 3420 src_entry->object.vm_object; 3421 } 3422 } 3423 vm_object_reference_locked(src_object); 3424 vm_object_clear_flag(src_object, OBJ_ONEMAPPING); 3425 if (src_entry->cred != NULL && 3426 !(src_entry->eflags & MAP_ENTRY_NEEDS_COPY)) { 3427 KASSERT(src_object->cred == NULL, 3428 ("OVERCOMMIT: vm_map_copy_entry: cred %p", 3429 src_object)); 3430 src_object->cred = src_entry->cred; 3431 src_object->charge = size; 3432 } 3433 VM_OBJECT_WUNLOCK(src_object); 3434 dst_entry->object.vm_object = src_object; 3435 if (charged) { 3436 cred = curthread->td_ucred; 3437 crhold(cred); 3438 dst_entry->cred = cred; 3439 *fork_charge += size; 3440 if (!(src_entry->eflags & 3441 MAP_ENTRY_NEEDS_COPY)) { 3442 crhold(cred); 3443 src_entry->cred = cred; 3444 *fork_charge += size; 3445 } 3446 } 3447 src_entry->eflags |= MAP_ENTRY_COW | 3448 MAP_ENTRY_NEEDS_COPY; 3449 dst_entry->eflags |= MAP_ENTRY_COW | 3450 MAP_ENTRY_NEEDS_COPY; 3451 dst_entry->offset = src_entry->offset; 3452 if (src_entry->eflags & MAP_ENTRY_VN_WRITECNT) { 3453 /* 3454 * MAP_ENTRY_VN_WRITECNT cannot 3455 * indicate write reference from 3456 * src_entry, since the entry is 3457 * marked as needs copy. Allocate a 3458 * fake entry that is used to 3459 * decrement object->un_pager.vnp.writecount 3460 * at the appropriate time. Attach 3461 * fake_entry to the deferred list. 3462 */ 3463 fake_entry = vm_map_entry_create(dst_map); 3464 fake_entry->eflags = MAP_ENTRY_VN_WRITECNT; 3465 src_entry->eflags &= ~MAP_ENTRY_VN_WRITECNT; 3466 vm_object_reference(src_object); 3467 fake_entry->object.vm_object = src_object; 3468 fake_entry->start = src_entry->start; 3469 fake_entry->end = src_entry->end; 3470 fake_entry->next = curthread->td_map_def_user; 3471 curthread->td_map_def_user = fake_entry; 3472 } 3473 3474 pmap_copy(dst_map->pmap, src_map->pmap, 3475 dst_entry->start, dst_entry->end - dst_entry->start, 3476 src_entry->start); 3477 } else { 3478 dst_entry->object.vm_object = NULL; 3479 dst_entry->offset = 0; 3480 if (src_entry->cred != NULL) { 3481 dst_entry->cred = curthread->td_ucred; 3482 crhold(dst_entry->cred); 3483 *fork_charge += size; 3484 } 3485 } 3486 } else { 3487 /* 3488 * We don't want to make writeable wired pages copy-on-write. 3489 * Immediately copy these pages into the new map by simulating 3490 * page faults. The new pages are pageable. 3491 */ 3492 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry, 3493 fork_charge); 3494 } 3495 } 3496 3497 /* 3498 * vmspace_map_entry_forked: 3499 * Update the newly-forked vmspace each time a map entry is inherited 3500 * or copied. The values for vm_dsize and vm_tsize are approximate 3501 * (and mostly-obsolete ideas in the face of mmap(2) et al.) 3502 */ 3503 static void 3504 vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2, 3505 vm_map_entry_t entry) 3506 { 3507 vm_size_t entrysize; 3508 vm_offset_t newend; 3509 3510 if ((entry->eflags & MAP_ENTRY_GUARD) != 0) 3511 return; 3512 entrysize = entry->end - entry->start; 3513 vm2->vm_map.size += entrysize; 3514 if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) { 3515 vm2->vm_ssize += btoc(entrysize); 3516 } else if (entry->start >= (vm_offset_t)vm1->vm_daddr && 3517 entry->start < (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)) { 3518 newend = MIN(entry->end, 3519 (vm_offset_t)vm1->vm_daddr + ctob(vm1->vm_dsize)); 3520 vm2->vm_dsize += btoc(newend - entry->start); 3521 } else if (entry->start >= (vm_offset_t)vm1->vm_taddr && 3522 entry->start < (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)) { 3523 newend = MIN(entry->end, 3524 (vm_offset_t)vm1->vm_taddr + ctob(vm1->vm_tsize)); 3525 vm2->vm_tsize += btoc(newend - entry->start); 3526 } 3527 } 3528 3529 /* 3530 * vmspace_fork: 3531 * Create a new process vmspace structure and vm_map 3532 * based on those of an existing process. The new map 3533 * is based on the old map, according to the inheritance 3534 * values on the regions in that map. 3535 * 3536 * XXX It might be worth coalescing the entries added to the new vmspace. 3537 * 3538 * The source map must not be locked. 3539 */ 3540 struct vmspace * 3541 vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) 3542 { 3543 struct vmspace *vm2; 3544 vm_map_t new_map, old_map; 3545 vm_map_entry_t new_entry, old_entry; 3546 vm_object_t object; 3547 int error, locked; 3548 vm_inherit_t inh; 3549 3550 old_map = &vm1->vm_map; 3551 /* Copy immutable fields of vm1 to vm2. */ 3552 vm2 = vmspace_alloc(vm_map_min(old_map), vm_map_max(old_map), 3553 pmap_pinit); 3554 if (vm2 == NULL) 3555 return (NULL); 3556 3557 vm2->vm_taddr = vm1->vm_taddr; 3558 vm2->vm_daddr = vm1->vm_daddr; 3559 vm2->vm_maxsaddr = vm1->vm_maxsaddr; 3560 vm_map_lock(old_map); 3561 if (old_map->busy) 3562 vm_map_wait_busy(old_map); 3563 new_map = &vm2->vm_map; 3564 locked = vm_map_trylock(new_map); /* trylock to silence WITNESS */ 3565 KASSERT(locked, ("vmspace_fork: lock failed")); 3566 3567 error = pmap_vmspace_copy(new_map->pmap, old_map->pmap); 3568 if (error != 0) { 3569 sx_xunlock(&old_map->lock); 3570 sx_xunlock(&new_map->lock); 3571 vm_map_process_deferred(); 3572 vmspace_free(vm2); 3573 return (NULL); 3574 } 3575 3576 new_map->anon_loc = old_map->anon_loc; 3577 3578 old_entry = old_map->header.next; 3579 3580 while (old_entry != &old_map->header) { 3581 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) 3582 panic("vm_map_fork: encountered a submap"); 3583 3584 inh = old_entry->inheritance; 3585 if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 && 3586 inh != VM_INHERIT_NONE) 3587 inh = VM_INHERIT_COPY; 3588 3589 switch (inh) { 3590 case VM_INHERIT_NONE: 3591 break; 3592 3593 case VM_INHERIT_SHARE: 3594 /* 3595 * Clone the entry, creating the shared object if necessary. 3596 */ 3597 object = old_entry->object.vm_object; 3598 if (object == NULL) { 3599 object = vm_object_allocate(OBJT_DEFAULT, 3600 atop(old_entry->end - old_entry->start)); 3601 old_entry->object.vm_object = object; 3602 old_entry->offset = 0; 3603 if (old_entry->cred != NULL) { 3604 object->cred = old_entry->cred; 3605 object->charge = old_entry->end - 3606 old_entry->start; 3607 old_entry->cred = NULL; 3608 } 3609 } 3610 3611 /* 3612 * Add the reference before calling vm_object_shadow 3613 * to insure that a shadow object is created. 3614 */ 3615 vm_object_reference(object); 3616 if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { 3617 vm_object_shadow(&old_entry->object.vm_object, 3618 &old_entry->offset, 3619 old_entry->end - old_entry->start); 3620 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 3621 /* Transfer the second reference too. */ 3622 vm_object_reference( 3623 old_entry->object.vm_object); 3624 3625 /* 3626 * As in vm_map_simplify_entry(), the 3627 * vnode lock will not be acquired in 3628 * this call to vm_object_deallocate(). 3629 */ 3630 vm_object_deallocate(object); 3631 object = old_entry->object.vm_object; 3632 } 3633 VM_OBJECT_WLOCK(object); 3634 vm_object_clear_flag(object, OBJ_ONEMAPPING); 3635 if (old_entry->cred != NULL) { 3636 KASSERT(object->cred == NULL, ("vmspace_fork both cred")); 3637 object->cred = old_entry->cred; 3638 object->charge = old_entry->end - old_entry->start; 3639 old_entry->cred = NULL; 3640 } 3641 3642 /* 3643 * Assert the correct state of the vnode 3644 * v_writecount while the object is locked, to 3645 * not relock it later for the assertion 3646 * correctness. 3647 */ 3648 if (old_entry->eflags & MAP_ENTRY_VN_WRITECNT && 3649 object->type == OBJT_VNODE) { 3650 KASSERT(((struct vnode *)object->handle)-> 3651 v_writecount > 0, 3652 ("vmspace_fork: v_writecount %p", object)); 3653 KASSERT(object->un_pager.vnp.writemappings > 0, 3654 ("vmspace_fork: vnp.writecount %p", 3655 object)); 3656 } 3657 VM_OBJECT_WUNLOCK(object); 3658 3659 /* 3660 * Clone the entry, referencing the shared object. 3661 */ 3662 new_entry = vm_map_entry_create(new_map); 3663 *new_entry = *old_entry; 3664 new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED | 3665 MAP_ENTRY_IN_TRANSITION); 3666 new_entry->wiring_thread = NULL; 3667 new_entry->wired_count = 0; 3668 if (new_entry->eflags & MAP_ENTRY_VN_WRITECNT) { 3669 vnode_pager_update_writecount(object, 3670 new_entry->start, new_entry->end); 3671 } 3672 3673 /* 3674 * Insert the entry into the new map -- we know we're 3675 * inserting at the end of the new map. 3676 */ 3677 vm_map_entry_link(new_map, new_map->header.prev, 3678 new_entry); 3679 vmspace_map_entry_forked(vm1, vm2, new_entry); 3680 3681 /* 3682 * Update the physical map 3683 */ 3684 pmap_copy(new_map->pmap, old_map->pmap, 3685 new_entry->start, 3686 (old_entry->end - old_entry->start), 3687 old_entry->start); 3688 break; 3689 3690 case VM_INHERIT_COPY: 3691 /* 3692 * Clone the entry and link into the map. 3693 */ 3694 new_entry = vm_map_entry_create(new_map); 3695 *new_entry = *old_entry; 3696 /* 3697 * Copied entry is COW over the old object. 3698 */ 3699 new_entry->eflags &= ~(MAP_ENTRY_USER_WIRED | 3700 MAP_ENTRY_IN_TRANSITION | MAP_ENTRY_VN_WRITECNT); 3701 new_entry->wiring_thread = NULL; 3702 new_entry->wired_count = 0; 3703 new_entry->object.vm_object = NULL; 3704 new_entry->cred = NULL; 3705 vm_map_entry_link(new_map, new_map->header.prev, 3706 new_entry); 3707 vmspace_map_entry_forked(vm1, vm2, new_entry); 3708 vm_map_copy_entry(old_map, new_map, old_entry, 3709 new_entry, fork_charge); 3710 break; 3711 3712 case VM_INHERIT_ZERO: 3713 /* 3714 * Create a new anonymous mapping entry modelled from 3715 * the old one. 3716 */ 3717 new_entry = vm_map_entry_create(new_map); 3718 memset(new_entry, 0, sizeof(*new_entry)); 3719 3720 new_entry->start = old_entry->start; 3721 new_entry->end = old_entry->end; 3722 new_entry->eflags = old_entry->eflags & 3723 ~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION | 3724 MAP_ENTRY_VN_WRITECNT); 3725 new_entry->protection = old_entry->protection; 3726 new_entry->max_protection = old_entry->max_protection; 3727 new_entry->inheritance = VM_INHERIT_ZERO; 3728 3729 vm_map_entry_link(new_map, new_map->header.prev, 3730 new_entry); 3731 vmspace_map_entry_forked(vm1, vm2, new_entry); 3732 3733 new_entry->cred = curthread->td_ucred; 3734 crhold(new_entry->cred); 3735 *fork_charge += (new_entry->end - new_entry->start); 3736 3737 break; 3738 } 3739 old_entry = old_entry->next; 3740 } 3741 /* 3742 * Use inlined vm_map_unlock() to postpone handling the deferred 3743 * map entries, which cannot be done until both old_map and 3744 * new_map locks are released. 3745 */ 3746 sx_xunlock(&old_map->lock); 3747 sx_xunlock(&new_map->lock); 3748 vm_map_process_deferred(); 3749 3750 return (vm2); 3751 } 3752 3753 /* 3754 * Create a process's stack for exec_new_vmspace(). This function is never 3755 * asked to wire the newly created stack. 3756 */ 3757 int 3758 vm_map_stack(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, 3759 vm_prot_t prot, vm_prot_t max, int cow) 3760 { 3761 vm_size_t growsize, init_ssize; 3762 rlim_t vmemlim; 3763 int rv; 3764 3765 MPASS((map->flags & MAP_WIREFUTURE) == 0); 3766 growsize = sgrowsiz; 3767 init_ssize = (max_ssize < growsize) ? max_ssize : growsize; 3768 vm_map_lock(map); 3769 vmemlim = lim_cur(curthread, RLIMIT_VMEM); 3770 /* If we would blow our VMEM resource limit, no go */ 3771 if (map->size + init_ssize > vmemlim) { 3772 rv = KERN_NO_SPACE; 3773 goto out; 3774 } 3775 rv = vm_map_stack_locked(map, addrbos, max_ssize, growsize, prot, 3776 max, cow); 3777 out: 3778 vm_map_unlock(map); 3779 return (rv); 3780 } 3781 3782 static int stack_guard_page = 1; 3783 SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN, 3784 &stack_guard_page, 0, 3785 "Specifies the number of guard pages for a stack that grows"); 3786 3787 static int 3788 vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize, 3789 vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow) 3790 { 3791 vm_map_entry_t new_entry, prev_entry; 3792 vm_offset_t bot, gap_bot, gap_top, top; 3793 vm_size_t init_ssize, sgp; 3794 int orient, rv; 3795 3796 /* 3797 * The stack orientation is piggybacked with the cow argument. 3798 * Extract it into orient and mask the cow argument so that we 3799 * don't pass it around further. 3800 */ 3801 orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP); 3802 KASSERT(orient != 0, ("No stack grow direction")); 3803 KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP), 3804 ("bi-dir stack")); 3805 3806 if (addrbos < vm_map_min(map) || 3807 addrbos + max_ssize > vm_map_max(map) || 3808 addrbos + max_ssize <= addrbos) 3809 return (KERN_INVALID_ADDRESS); 3810 sgp = (vm_size_t)stack_guard_page * PAGE_SIZE; 3811 if (sgp >= max_ssize) 3812 return (KERN_INVALID_ARGUMENT); 3813 3814 init_ssize = growsize; 3815 if (max_ssize < init_ssize + sgp) 3816 init_ssize = max_ssize - sgp; 3817 3818 /* If addr is already mapped, no go */ 3819 if (vm_map_lookup_entry(map, addrbos, &prev_entry)) 3820 return (KERN_NO_SPACE); 3821 3822 /* 3823 * If we can't accommodate max_ssize in the current mapping, no go. 3824 */ 3825 if (prev_entry->next->start < addrbos + max_ssize) 3826 return (KERN_NO_SPACE); 3827 3828 /* 3829 * We initially map a stack of only init_ssize. We will grow as 3830 * needed later. Depending on the orientation of the stack (i.e. 3831 * the grow direction) we either map at the top of the range, the 3832 * bottom of the range or in the middle. 3833 * 3834 * Note: we would normally expect prot and max to be VM_PROT_ALL, 3835 * and cow to be 0. Possibly we should eliminate these as input 3836 * parameters, and just pass these values here in the insert call. 3837 */ 3838 if (orient == MAP_STACK_GROWS_DOWN) { 3839 bot = addrbos + max_ssize - init_ssize; 3840 top = bot + init_ssize; 3841 gap_bot = addrbos; 3842 gap_top = bot; 3843 } else /* if (orient == MAP_STACK_GROWS_UP) */ { 3844 bot = addrbos; 3845 top = bot + init_ssize; 3846 gap_bot = top; 3847 gap_top = addrbos + max_ssize; 3848 } 3849 rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow); 3850 if (rv != KERN_SUCCESS) 3851 return (rv); 3852 new_entry = prev_entry->next; 3853 KASSERT(new_entry->end == top || new_entry->start == bot, 3854 ("Bad entry start/end for new stack entry")); 3855 KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 || 3856 (new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0, 3857 ("new entry lacks MAP_ENTRY_GROWS_DOWN")); 3858 KASSERT((orient & MAP_STACK_GROWS_UP) == 0 || 3859 (new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0, 3860 ("new entry lacks MAP_ENTRY_GROWS_UP")); 3861 rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE, 3862 VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ? 3863 MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP)); 3864 if (rv != KERN_SUCCESS) 3865 (void)vm_map_delete(map, bot, top); 3866 return (rv); 3867 } 3868 3869 /* 3870 * Attempts to grow a vm stack entry. Returns KERN_SUCCESS if we 3871 * successfully grow the stack. 3872 */ 3873 static int 3874 vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry) 3875 { 3876 vm_map_entry_t stack_entry; 3877 struct proc *p; 3878 struct vmspace *vm; 3879 struct ucred *cred; 3880 vm_offset_t gap_end, gap_start, grow_start; 3881 size_t grow_amount, guard, max_grow; 3882 rlim_t lmemlim, stacklim, vmemlim; 3883 int rv, rv1; 3884 bool gap_deleted, grow_down, is_procstack; 3885 #ifdef notyet 3886 uint64_t limit; 3887 #endif 3888 #ifdef RACCT 3889 int error; 3890 #endif 3891 3892 p = curproc; 3893 vm = p->p_vmspace; 3894 3895 /* 3896 * Disallow stack growth when the access is performed by a 3897 * debugger or AIO daemon. The reason is that the wrong 3898 * resource limits are applied. 3899 */ 3900 if (map != &p->p_vmspace->vm_map || p->p_textvp == NULL) 3901 return (KERN_FAILURE); 3902 3903 MPASS(!map->system_map); 3904 3905 guard = stack_guard_page * PAGE_SIZE; 3906 lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK); 3907 stacklim = lim_cur(curthread, RLIMIT_STACK); 3908 vmemlim = lim_cur(curthread, RLIMIT_VMEM); 3909 retry: 3910 /* If addr is not in a hole for a stack grow area, no need to grow. */ 3911 if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry)) 3912 return (KERN_FAILURE); 3913 if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0) 3914 return (KERN_SUCCESS); 3915 if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) { 3916 stack_entry = gap_entry->next; 3917 if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 || 3918 stack_entry->start != gap_entry->end) 3919 return (KERN_FAILURE); 3920 grow_amount = round_page(stack_entry->start - addr); 3921 grow_down = true; 3922 } else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) { 3923 stack_entry = gap_entry->prev; 3924 if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 || 3925 stack_entry->end != gap_entry->start) 3926 return (KERN_FAILURE); 3927 grow_amount = round_page(addr + 1 - stack_entry->end); 3928 grow_down = false; 3929 } else { 3930 return (KERN_FAILURE); 3931 } 3932 max_grow = gap_entry->end - gap_entry->start; 3933 if (guard > max_grow) 3934 return (KERN_NO_SPACE); 3935 max_grow -= guard; 3936 if (grow_amount > max_grow) 3937 return (KERN_NO_SPACE); 3938 3939 /* 3940 * If this is the main process stack, see if we're over the stack 3941 * limit. 3942 */ 3943 is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr && 3944 addr < (vm_offset_t)p->p_sysent->sv_usrstack; 3945 if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) 3946 return (KERN_NO_SPACE); 3947 3948 #ifdef RACCT 3949 if (racct_enable) { 3950 PROC_LOCK(p); 3951 if (is_procstack && racct_set(p, RACCT_STACK, 3952 ctob(vm->vm_ssize) + grow_amount)) { 3953 PROC_UNLOCK(p); 3954 return (KERN_NO_SPACE); 3955 } 3956 PROC_UNLOCK(p); 3957 } 3958 #endif 3959 3960 grow_amount = roundup(grow_amount, sgrowsiz); 3961 if (grow_amount > max_grow) 3962 grow_amount = max_grow; 3963 if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) { 3964 grow_amount = trunc_page((vm_size_t)stacklim) - 3965 ctob(vm->vm_ssize); 3966 } 3967 3968 #ifdef notyet 3969 PROC_LOCK(p); 3970 limit = racct_get_available(p, RACCT_STACK); 3971 PROC_UNLOCK(p); 3972 if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit)) 3973 grow_amount = limit - ctob(vm->vm_ssize); 3974 #endif 3975 3976 if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) { 3977 if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) { 3978 rv = KERN_NO_SPACE; 3979 goto out; 3980 } 3981 #ifdef RACCT 3982 if (racct_enable) { 3983 PROC_LOCK(p); 3984 if (racct_set(p, RACCT_MEMLOCK, 3985 ptoa(pmap_wired_count(map->pmap)) + grow_amount)) { 3986 PROC_UNLOCK(p); 3987 rv = KERN_NO_SPACE; 3988 goto out; 3989 } 3990 PROC_UNLOCK(p); 3991 } 3992 #endif 3993 } 3994 3995 /* If we would blow our VMEM resource limit, no go */ 3996 if (map->size + grow_amount > vmemlim) { 3997 rv = KERN_NO_SPACE; 3998 goto out; 3999 } 4000 #ifdef RACCT 4001 if (racct_enable) { 4002 PROC_LOCK(p); 4003 if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) { 4004 PROC_UNLOCK(p); 4005 rv = KERN_NO_SPACE; 4006 goto out; 4007 } 4008 PROC_UNLOCK(p); 4009 } 4010 #endif 4011 4012 if (vm_map_lock_upgrade(map)) { 4013 gap_entry = NULL; 4014 vm_map_lock_read(map); 4015 goto retry; 4016 } 4017 4018 if (grow_down) { 4019 grow_start = gap_entry->end - grow_amount; 4020 if (gap_entry->start + grow_amount == gap_entry->end) { 4021 gap_start = gap_entry->start; 4022 gap_end = gap_entry->end; 4023 vm_map_entry_delete(map, gap_entry); 4024 gap_deleted = true; 4025 } else { 4026 MPASS(gap_entry->start < gap_entry->end - grow_amount); 4027 gap_entry->end -= grow_amount; 4028 vm_map_entry_resize_free(map, gap_entry); 4029 gap_deleted = false; 4030 } 4031 rv = vm_map_insert(map, NULL, 0, grow_start, 4032 grow_start + grow_amount, 4033 stack_entry->protection, stack_entry->max_protection, 4034 MAP_STACK_GROWS_DOWN); 4035 if (rv != KERN_SUCCESS) { 4036 if (gap_deleted) { 4037 rv1 = vm_map_insert(map, NULL, 0, gap_start, 4038 gap_end, VM_PROT_NONE, VM_PROT_NONE, 4039 MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN); 4040 MPASS(rv1 == KERN_SUCCESS); 4041 } else { 4042 gap_entry->end += grow_amount; 4043 vm_map_entry_resize_free(map, gap_entry); 4044 } 4045 } 4046 } else { 4047 grow_start = stack_entry->end; 4048 cred = stack_entry->cred; 4049 if (cred == NULL && stack_entry->object.vm_object != NULL) 4050 cred = stack_entry->object.vm_object->cred; 4051 if (cred != NULL && !swap_reserve_by_cred(grow_amount, cred)) 4052 rv = KERN_NO_SPACE; 4053 /* Grow the underlying object if applicable. */ 4054 else if (stack_entry->object.vm_object == NULL || 4055 vm_object_coalesce(stack_entry->object.vm_object, 4056 stack_entry->offset, 4057 (vm_size_t)(stack_entry->end - stack_entry->start), 4058 (vm_size_t)grow_amount, cred != NULL)) { 4059 if (gap_entry->start + grow_amount == gap_entry->end) 4060 vm_map_entry_delete(map, gap_entry); 4061 else 4062 gap_entry->start += grow_amount; 4063 stack_entry->end += grow_amount; 4064 map->size += grow_amount; 4065 vm_map_entry_resize_free(map, stack_entry); 4066 rv = KERN_SUCCESS; 4067 } else 4068 rv = KERN_FAILURE; 4069 } 4070 if (rv == KERN_SUCCESS && is_procstack) 4071 vm->vm_ssize += btoc(grow_amount); 4072 4073 /* 4074 * Heed the MAP_WIREFUTURE flag if it was set for this process. 4075 */ 4076 if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) { 4077 vm_map_unlock(map); 4078 vm_map_wire(map, grow_start, grow_start + grow_amount, 4079 VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); 4080 vm_map_lock_read(map); 4081 } else 4082 vm_map_lock_downgrade(map); 4083 4084 out: 4085 #ifdef RACCT 4086 if (racct_enable && rv != KERN_SUCCESS) { 4087 PROC_LOCK(p); 4088 error = racct_set(p, RACCT_VMEM, map->size); 4089 KASSERT(error == 0, ("decreasing RACCT_VMEM failed")); 4090 if (!old_mlock) { 4091 error = racct_set(p, RACCT_MEMLOCK, 4092 ptoa(pmap_wired_count(map->pmap))); 4093 KASSERT(error == 0, ("decreasing RACCT_MEMLOCK failed")); 4094 } 4095 error = racct_set(p, RACCT_STACK, ctob(vm->vm_ssize)); 4096 KASSERT(error == 0, ("decreasing RACCT_STACK failed")); 4097 PROC_UNLOCK(p); 4098 } 4099 #endif 4100 4101 return (rv); 4102 } 4103 4104 /* 4105 * Unshare the specified VM space for exec. If other processes are 4106 * mapped to it, then create a new one. The new vmspace is null. 4107 */ 4108 int 4109 vmspace_exec(struct proc *p, vm_offset_t minuser, vm_offset_t maxuser) 4110 { 4111 struct vmspace *oldvmspace = p->p_vmspace; 4112 struct vmspace *newvmspace; 4113 4114 KASSERT((curthread->td_pflags & TDP_EXECVMSPC) == 0, 4115 ("vmspace_exec recursed")); 4116 newvmspace = vmspace_alloc(minuser, maxuser, pmap_pinit); 4117 if (newvmspace == NULL) 4118 return (ENOMEM); 4119 newvmspace->vm_swrss = oldvmspace->vm_swrss; 4120 /* 4121 * This code is written like this for prototype purposes. The 4122 * goal is to avoid running down the vmspace here, but let the 4123 * other process's that are still using the vmspace to finally 4124 * run it down. Even though there is little or no chance of blocking 4125 * here, it is a good idea to keep this form for future mods. 4126 */ 4127 PROC_VMSPACE_LOCK(p); 4128 p->p_vmspace = newvmspace; 4129 PROC_VMSPACE_UNLOCK(p); 4130 if (p == curthread->td_proc) 4131 pmap_activate(curthread); 4132 curthread->td_pflags |= TDP_EXECVMSPC; 4133 return (0); 4134 } 4135 4136 /* 4137 * Unshare the specified VM space for forcing COW. This 4138 * is called by rfork, for the (RFMEM|RFPROC) == 0 case. 4139 */ 4140 int 4141 vmspace_unshare(struct proc *p) 4142 { 4143 struct vmspace *oldvmspace = p->p_vmspace; 4144 struct vmspace *newvmspace; 4145 vm_ooffset_t fork_charge; 4146 4147 if (oldvmspace->vm_refcnt == 1) 4148 return (0); 4149 fork_charge = 0; 4150 newvmspace = vmspace_fork(oldvmspace, &fork_charge); 4151 if (newvmspace == NULL) 4152 return (ENOMEM); 4153 if (!swap_reserve_by_cred(fork_charge, p->p_ucred)) { 4154 vmspace_free(newvmspace); 4155 return (ENOMEM); 4156 } 4157 PROC_VMSPACE_LOCK(p); 4158 p->p_vmspace = newvmspace; 4159 PROC_VMSPACE_UNLOCK(p); 4160 if (p == curthread->td_proc) 4161 pmap_activate(curthread); 4162 vmspace_free(oldvmspace); 4163 return (0); 4164 } 4165 4166 /* 4167 * vm_map_lookup: 4168 * 4169 * Finds the VM object, offset, and 4170 * protection for a given virtual address in the 4171 * specified map, assuming a page fault of the 4172 * type specified. 4173 * 4174 * Leaves the map in question locked for read; return 4175 * values are guaranteed until a vm_map_lookup_done 4176 * call is performed. Note that the map argument 4177 * is in/out; the returned map must be used in 4178 * the call to vm_map_lookup_done. 4179 * 4180 * A handle (out_entry) is returned for use in 4181 * vm_map_lookup_done, to make that fast. 4182 * 4183 * If a lookup is requested with "write protection" 4184 * specified, the map may be changed to perform virtual 4185 * copying operations, although the data referenced will 4186 * remain the same. 4187 */ 4188 int 4189 vm_map_lookup(vm_map_t *var_map, /* IN/OUT */ 4190 vm_offset_t vaddr, 4191 vm_prot_t fault_typea, 4192 vm_map_entry_t *out_entry, /* OUT */ 4193 vm_object_t *object, /* OUT */ 4194 vm_pindex_t *pindex, /* OUT */ 4195 vm_prot_t *out_prot, /* OUT */ 4196 boolean_t *wired) /* OUT */ 4197 { 4198 vm_map_entry_t entry; 4199 vm_map_t map = *var_map; 4200 vm_prot_t prot; 4201 vm_prot_t fault_type = fault_typea; 4202 vm_object_t eobject; 4203 vm_size_t size; 4204 struct ucred *cred; 4205 4206 RetryLookup: 4207 4208 vm_map_lock_read(map); 4209 4210 RetryLookupLocked: 4211 /* 4212 * Lookup the faulting address. 4213 */ 4214 if (!vm_map_lookup_entry(map, vaddr, out_entry)) { 4215 vm_map_unlock_read(map); 4216 return (KERN_INVALID_ADDRESS); 4217 } 4218 4219 entry = *out_entry; 4220 4221 /* 4222 * Handle submaps. 4223 */ 4224 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 4225 vm_map_t old_map = map; 4226 4227 *var_map = map = entry->object.sub_map; 4228 vm_map_unlock_read(old_map); 4229 goto RetryLookup; 4230 } 4231 4232 /* 4233 * Check whether this task is allowed to have this page. 4234 */ 4235 prot = entry->protection; 4236 if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) { 4237 fault_typea &= ~VM_PROT_FAULT_LOOKUP; 4238 if (prot == VM_PROT_NONE && map != kernel_map && 4239 (entry->eflags & MAP_ENTRY_GUARD) != 0 && 4240 (entry->eflags & (MAP_ENTRY_STACK_GAP_DN | 4241 MAP_ENTRY_STACK_GAP_UP)) != 0 && 4242 vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS) 4243 goto RetryLookupLocked; 4244 } 4245 fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; 4246 if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) { 4247 vm_map_unlock_read(map); 4248 return (KERN_PROTECTION_FAILURE); 4249 } 4250 KASSERT((prot & VM_PROT_WRITE) == 0 || (entry->eflags & 4251 (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY)) != 4252 (MAP_ENTRY_USER_WIRED | MAP_ENTRY_NEEDS_COPY), 4253 ("entry %p flags %x", entry, entry->eflags)); 4254 if ((fault_typea & VM_PROT_COPY) != 0 && 4255 (entry->max_protection & VM_PROT_WRITE) == 0 && 4256 (entry->eflags & MAP_ENTRY_COW) == 0) { 4257 vm_map_unlock_read(map); 4258 return (KERN_PROTECTION_FAILURE); 4259 } 4260 4261 /* 4262 * If this page is not pageable, we have to get it for all possible 4263 * accesses. 4264 */ 4265 *wired = (entry->wired_count != 0); 4266 if (*wired) 4267 fault_type = entry->protection; 4268 size = entry->end - entry->start; 4269 /* 4270 * If the entry was copy-on-write, we either ... 4271 */ 4272 if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 4273 /* 4274 * If we want to write the page, we may as well handle that 4275 * now since we've got the map locked. 4276 * 4277 * If we don't need to write the page, we just demote the 4278 * permissions allowed. 4279 */ 4280 if ((fault_type & VM_PROT_WRITE) != 0 || 4281 (fault_typea & VM_PROT_COPY) != 0) { 4282 /* 4283 * Make a new object, and place it in the object 4284 * chain. Note that no new references have appeared 4285 * -- one just moved from the map to the new 4286 * object. 4287 */ 4288 if (vm_map_lock_upgrade(map)) 4289 goto RetryLookup; 4290 4291 if (entry->cred == NULL) { 4292 /* 4293 * The debugger owner is charged for 4294 * the memory. 4295 */ 4296 cred = curthread->td_ucred; 4297 crhold(cred); 4298 if (!swap_reserve_by_cred(size, cred)) { 4299 crfree(cred); 4300 vm_map_unlock(map); 4301 return (KERN_RESOURCE_SHORTAGE); 4302 } 4303 entry->cred = cred; 4304 } 4305 vm_object_shadow(&entry->object.vm_object, 4306 &entry->offset, size); 4307 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 4308 eobject = entry->object.vm_object; 4309 if (eobject->cred != NULL) { 4310 /* 4311 * The object was not shadowed. 4312 */ 4313 swap_release_by_cred(size, entry->cred); 4314 crfree(entry->cred); 4315 entry->cred = NULL; 4316 } else if (entry->cred != NULL) { 4317 VM_OBJECT_WLOCK(eobject); 4318 eobject->cred = entry->cred; 4319 eobject->charge = size; 4320 VM_OBJECT_WUNLOCK(eobject); 4321 entry->cred = NULL; 4322 } 4323 4324 vm_map_lock_downgrade(map); 4325 } else { 4326 /* 4327 * We're attempting to read a copy-on-write page -- 4328 * don't allow writes. 4329 */ 4330 prot &= ~VM_PROT_WRITE; 4331 } 4332 } 4333 4334 /* 4335 * Create an object if necessary. 4336 */ 4337 if (entry->object.vm_object == NULL && 4338 !map->system_map) { 4339 if (vm_map_lock_upgrade(map)) 4340 goto RetryLookup; 4341 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT, 4342 atop(size)); 4343 entry->offset = 0; 4344 if (entry->cred != NULL) { 4345 VM_OBJECT_WLOCK(entry->object.vm_object); 4346 entry->object.vm_object->cred = entry->cred; 4347 entry->object.vm_object->charge = size; 4348 VM_OBJECT_WUNLOCK(entry->object.vm_object); 4349 entry->cred = NULL; 4350 } 4351 vm_map_lock_downgrade(map); 4352 } 4353 4354 /* 4355 * Return the object/offset from this entry. If the entry was 4356 * copy-on-write or empty, it has been fixed up. 4357 */ 4358 *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); 4359 *object = entry->object.vm_object; 4360 4361 *out_prot = prot; 4362 return (KERN_SUCCESS); 4363 } 4364 4365 /* 4366 * vm_map_lookup_locked: 4367 * 4368 * Lookup the faulting address. A version of vm_map_lookup that returns 4369 * KERN_FAILURE instead of blocking on map lock or memory allocation. 4370 */ 4371 int 4372 vm_map_lookup_locked(vm_map_t *var_map, /* IN/OUT */ 4373 vm_offset_t vaddr, 4374 vm_prot_t fault_typea, 4375 vm_map_entry_t *out_entry, /* OUT */ 4376 vm_object_t *object, /* OUT */ 4377 vm_pindex_t *pindex, /* OUT */ 4378 vm_prot_t *out_prot, /* OUT */ 4379 boolean_t *wired) /* OUT */ 4380 { 4381 vm_map_entry_t entry; 4382 vm_map_t map = *var_map; 4383 vm_prot_t prot; 4384 vm_prot_t fault_type = fault_typea; 4385 4386 /* 4387 * Lookup the faulting address. 4388 */ 4389 if (!vm_map_lookup_entry(map, vaddr, out_entry)) 4390 return (KERN_INVALID_ADDRESS); 4391 4392 entry = *out_entry; 4393 4394 /* 4395 * Fail if the entry refers to a submap. 4396 */ 4397 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) 4398 return (KERN_FAILURE); 4399 4400 /* 4401 * Check whether this task is allowed to have this page. 4402 */ 4403 prot = entry->protection; 4404 fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; 4405 if ((fault_type & prot) != fault_type) 4406 return (KERN_PROTECTION_FAILURE); 4407 4408 /* 4409 * If this page is not pageable, we have to get it for all possible 4410 * accesses. 4411 */ 4412 *wired = (entry->wired_count != 0); 4413 if (*wired) 4414 fault_type = entry->protection; 4415 4416 if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 4417 /* 4418 * Fail if the entry was copy-on-write for a write fault. 4419 */ 4420 if (fault_type & VM_PROT_WRITE) 4421 return (KERN_FAILURE); 4422 /* 4423 * We're attempting to read a copy-on-write page -- 4424 * don't allow writes. 4425 */ 4426 prot &= ~VM_PROT_WRITE; 4427 } 4428 4429 /* 4430 * Fail if an object should be created. 4431 */ 4432 if (entry->object.vm_object == NULL && !map->system_map) 4433 return (KERN_FAILURE); 4434 4435 /* 4436 * Return the object/offset from this entry. If the entry was 4437 * copy-on-write or empty, it has been fixed up. 4438 */ 4439 *pindex = OFF_TO_IDX((vaddr - entry->start) + entry->offset); 4440 *object = entry->object.vm_object; 4441 4442 *out_prot = prot; 4443 return (KERN_SUCCESS); 4444 } 4445 4446 /* 4447 * vm_map_lookup_done: 4448 * 4449 * Releases locks acquired by a vm_map_lookup 4450 * (according to the handle returned by that lookup). 4451 */ 4452 void 4453 vm_map_lookup_done(vm_map_t map, vm_map_entry_t entry) 4454 { 4455 /* 4456 * Unlock the main-level map 4457 */ 4458 vm_map_unlock_read(map); 4459 } 4460 4461 vm_offset_t 4462 vm_map_max_KBI(const struct vm_map *map) 4463 { 4464 4465 return (vm_map_max(map)); 4466 } 4467 4468 vm_offset_t 4469 vm_map_min_KBI(const struct vm_map *map) 4470 { 4471 4472 return (vm_map_min(map)); 4473 } 4474 4475 pmap_t 4476 vm_map_pmap_KBI(vm_map_t map) 4477 { 4478 4479 return (map->pmap); 4480 } 4481 4482 #include "opt_ddb.h" 4483 #ifdef DDB 4484 #include <sys/kernel.h> 4485 4486 #include <ddb/ddb.h> 4487 4488 static void 4489 vm_map_print(vm_map_t map) 4490 { 4491 vm_map_entry_t entry; 4492 4493 db_iprintf("Task map %p: pmap=%p, nentries=%d, version=%u\n", 4494 (void *)map, 4495 (void *)map->pmap, map->nentries, map->timestamp); 4496 4497 db_indent += 2; 4498 for (entry = map->header.next; entry != &map->header; 4499 entry = entry->next) { 4500 db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n", 4501 (void *)entry, (void *)entry->start, (void *)entry->end, 4502 entry->eflags); 4503 { 4504 static char *inheritance_name[4] = 4505 {"share", "copy", "none", "donate_copy"}; 4506 4507 db_iprintf(" prot=%x/%x/%s", 4508 entry->protection, 4509 entry->max_protection, 4510 inheritance_name[(int)(unsigned char)entry->inheritance]); 4511 if (entry->wired_count != 0) 4512 db_printf(", wired"); 4513 } 4514 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 4515 db_printf(", share=%p, offset=0x%jx\n", 4516 (void *)entry->object.sub_map, 4517 (uintmax_t)entry->offset); 4518 if ((entry->prev == &map->header) || 4519 (entry->prev->object.sub_map != 4520 entry->object.sub_map)) { 4521 db_indent += 2; 4522 vm_map_print((vm_map_t)entry->object.sub_map); 4523 db_indent -= 2; 4524 } 4525 } else { 4526 if (entry->cred != NULL) 4527 db_printf(", ruid %d", entry->cred->cr_ruid); 4528 db_printf(", object=%p, offset=0x%jx", 4529 (void *)entry->object.vm_object, 4530 (uintmax_t)entry->offset); 4531 if (entry->object.vm_object && entry->object.vm_object->cred) 4532 db_printf(", obj ruid %d charge %jx", 4533 entry->object.vm_object->cred->cr_ruid, 4534 (uintmax_t)entry->object.vm_object->charge); 4535 if (entry->eflags & MAP_ENTRY_COW) 4536 db_printf(", copy (%s)", 4537 (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done"); 4538 db_printf("\n"); 4539 4540 if ((entry->prev == &map->header) || 4541 (entry->prev->object.vm_object != 4542 entry->object.vm_object)) { 4543 db_indent += 2; 4544 vm_object_print((db_expr_t)(intptr_t) 4545 entry->object.vm_object, 4546 0, 0, (char *)0); 4547 db_indent -= 2; 4548 } 4549 } 4550 } 4551 db_indent -= 2; 4552 } 4553 4554 DB_SHOW_COMMAND(map, map) 4555 { 4556 4557 if (!have_addr) { 4558 db_printf("usage: show map <addr>\n"); 4559 return; 4560 } 4561 vm_map_print((vm_map_t)addr); 4562 } 4563 4564 DB_SHOW_COMMAND(procvm, procvm) 4565 { 4566 struct proc *p; 4567 4568 if (have_addr) { 4569 p = db_lookup_proc(addr); 4570 } else { 4571 p = curproc; 4572 } 4573 4574 db_printf("p = %p, vmspace = %p, map = %p, pmap = %p\n", 4575 (void *)p, (void *)p->p_vmspace, (void *)&p->p_vmspace->vm_map, 4576 (void *)vmspace_pmap(p->p_vmspace)); 4577 4578 vm_map_print((vm_map_t)&p->p_vmspace->vm_map); 4579 } 4580 4581 #endif /* DDB */ 4582