1 /* 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_map.c 8.3 (Berkeley) 1/12/94 37 * 38 * 39 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 40 * All rights reserved. 41 * 42 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 43 * 44 * Permission to use, copy, modify and distribute this software and 45 * its documentation is hereby granted, provided that both the copyright 46 * notice and this permission notice appear in all copies of the 47 * software, derivative works or modified versions, and any portions 48 * thereof, and that both notices appear in supporting documentation. 49 * 50 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 51 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 52 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 53 * 54 * Carnegie Mellon requests users of this software to return to 55 * 56 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 57 * School of Computer Science 58 * Carnegie Mellon University 59 * Pittsburgh PA 15213-3890 60 * 61 * any improvements or extensions that they make and grant Carnegie the 62 * rights to redistribute these changes. 63 * 64 * $Id: vm_map.c,v 1.93 1997/10/12 20:26:30 phk Exp $ 65 */ 66 67 /* 68 * Virtual memory mapping module. 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/malloc.h> 74 #include <sys/proc.h> 75 #include <sys/vmmeter.h> 76 #include <sys/mman.h> 77 78 #ifdef SMP 79 #include <machine/smp.h> 80 #endif 81 82 #include <vm/vm.h> 83 #include <vm/vm_param.h> 84 #include <vm/vm_prot.h> 85 #include <vm/vm_inherit.h> 86 #include <sys/lock.h> 87 #include <vm/pmap.h> 88 #include <vm/vm_map.h> 89 #include <vm/vm_page.h> 90 #include <vm/vm_object.h> 91 #include <vm/vm_kern.h> 92 #include <vm/vm_extern.h> 93 #include <vm/default_pager.h> 94 #include <vm/vm_zone.h> 95 96 static MALLOC_DEFINE(M_VMMAP, "VM map", "VM map structures"); 97 98 /* 99 * Virtual memory maps provide for the mapping, protection, 100 * and sharing of virtual memory objects. In addition, 101 * this module provides for an efficient virtual copy of 102 * memory from one map to another. 103 * 104 * Synchronization is required prior to most operations. 105 * 106 * Maps consist of an ordered doubly-linked list of simple 107 * entries; a single hint is used to speed up lookups. 108 * 109 * In order to properly represent the sharing of virtual 110 * memory regions among maps, the map structure is bi-level. 111 * Top-level ("address") maps refer to regions of sharable 112 * virtual memory. These regions are implemented as 113 * ("sharing") maps, which then refer to the actual virtual 114 * memory objects. When two address maps "share" memory, 115 * their top-level maps both have references to the same 116 * sharing map. When memory is virtual-copied from one 117 * address map to another, the references in the sharing 118 * maps are actually copied -- no copying occurs at the 119 * virtual memory object level. 120 * 121 * Since portions of maps are specified by start/end addreses, 122 * which may not align with existing map entries, all 123 * routines merely "clip" entries to these start/end values. 124 * [That is, an entry is split into two, bordering at a 125 * start or end value.] Note that these clippings may not 126 * always be necessary (as the two resulting entries are then 127 * not changed); however, the clipping is done for convenience. 128 * No attempt is currently made to "glue back together" two 129 * abutting entries. 130 * 131 * As mentioned above, virtual copy operations are performed 132 * by copying VM object references from one sharing map to 133 * another, and then marking both regions as copy-on-write. 134 * It is important to note that only one writeable reference 135 * to a VM object region exists in any map -- this means that 136 * shadow object creation can be delayed until a write operation 137 * occurs. 138 */ 139 140 /* 141 * vm_map_startup: 142 * 143 * Initialize the vm_map module. Must be called before 144 * any other vm_map routines. 145 * 146 * Map and entry structures are allocated from the general 147 * purpose memory pool with some exceptions: 148 * 149 * - The kernel map and kmem submap are allocated statically. 150 * - Kernel map entries are allocated out of a static pool. 151 * 152 * These restrictions are necessary since malloc() uses the 153 * maps and requires map entries. 154 */ 155 156 extern char kstack[]; 157 extern int inmprotect; 158 159 static int kentry_count; 160 static struct vm_zone kmapentzone_store, mapentzone_store, mapzone_store; 161 static vm_zone_t mapentzone, kmapentzone, mapzone; 162 static struct vm_object kmapentobj, mapentobj, mapobj; 163 #define MAP_ENTRY_INIT 128 164 struct vm_map_entry map_entry_init[MAX_MAPENT]; 165 struct vm_map_entry kmap_entry_init[MAX_KMAPENT]; 166 struct vm_map map_init[MAX_KMAP]; 167 168 static void _vm_map_clip_end __P((vm_map_t, vm_map_entry_t, vm_offset_t)); 169 static void _vm_map_clip_start __P((vm_map_t, vm_map_entry_t, vm_offset_t)); 170 static vm_map_entry_t vm_map_entry_create __P((vm_map_t)); 171 static void vm_map_entry_delete __P((vm_map_t, vm_map_entry_t)); 172 static void vm_map_entry_dispose __P((vm_map_t, vm_map_entry_t)); 173 static void vm_map_entry_unwire __P((vm_map_t, vm_map_entry_t)); 174 static void vm_map_copy_entry __P((vm_map_t, vm_map_t, vm_map_entry_t, 175 vm_map_entry_t)); 176 177 void 178 vm_map_startup() 179 { 180 mapzone = &mapzone_store; 181 zbootinit(mapzone, "MAP", sizeof (struct vm_map), 182 map_init, MAX_KMAP); 183 kmapentzone = &kmapentzone_store; 184 zbootinit(kmapentzone, "KMAP ENTRY", sizeof (struct vm_map_entry), 185 kmap_entry_init, MAX_KMAPENT); 186 mapentzone = &mapentzone_store; 187 zbootinit(mapentzone, "MAP ENTRY", sizeof (struct vm_map_entry), 188 map_entry_init, MAX_MAPENT); 189 } 190 191 /* 192 * Allocate a vmspace structure, including a vm_map and pmap, 193 * and initialize those structures. The refcnt is set to 1. 194 * The remaining fields must be initialized by the caller. 195 */ 196 struct vmspace * 197 vmspace_alloc(min, max, pageable) 198 vm_offset_t min, max; 199 int pageable; 200 { 201 register struct vmspace *vm; 202 203 MALLOC(vm, struct vmspace *, sizeof(struct vmspace), M_VMMAP, M_WAITOK); 204 bzero(vm, (caddr_t) &vm->vm_startcopy - (caddr_t) vm); 205 vm_map_init(&vm->vm_map, min, max, pageable); 206 pmap_pinit(&vm->vm_pmap); 207 vm->vm_map.pmap = &vm->vm_pmap; /* XXX */ 208 vm->vm_refcnt = 1; 209 return (vm); 210 } 211 212 void 213 vm_init2(void) { 214 zinitna(kmapentzone, &kmapentobj, 215 NULL, 0, cnt.v_page_count / 4, ZONE_INTERRUPT, 1); 216 zinitna(mapentzone, &mapentobj, 217 NULL, 0, 0, 0, 1); 218 zinitna(mapzone, &mapobj, 219 NULL, 0, 0, 0, 1); 220 pmap_init2(); 221 vm_object_init2(); 222 } 223 224 void 225 vmspace_free(vm) 226 register struct vmspace *vm; 227 { 228 229 if (vm->vm_refcnt == 0) 230 panic("vmspace_free: attempt to free already freed vmspace"); 231 232 if (--vm->vm_refcnt == 0) { 233 234 /* 235 * Lock the map, to wait out all other references to it. 236 * Delete all of the mappings and pages they hold, then call 237 * the pmap module to reclaim anything left. 238 */ 239 vm_map_lock(&vm->vm_map); 240 (void) vm_map_delete(&vm->vm_map, vm->vm_map.min_offset, 241 vm->vm_map.max_offset); 242 vm_map_unlock(&vm->vm_map); 243 244 while( vm->vm_map.ref_count != 1) 245 tsleep(&vm->vm_map.ref_count, PVM, "vmsfre", 0); 246 --vm->vm_map.ref_count; 247 pmap_release(&vm->vm_pmap); 248 FREE(vm, M_VMMAP); 249 } else { 250 wakeup(&vm->vm_map.ref_count); 251 } 252 } 253 254 /* 255 * vm_map_create: 256 * 257 * Creates and returns a new empty VM map with 258 * the given physical map structure, and having 259 * the given lower and upper address bounds. 260 */ 261 vm_map_t 262 vm_map_create(pmap, min, max, pageable) 263 pmap_t pmap; 264 vm_offset_t min, max; 265 boolean_t pageable; 266 { 267 register vm_map_t result; 268 269 result = zalloc(mapzone); 270 vm_map_init(result, min, max, pageable); 271 result->pmap = pmap; 272 return (result); 273 } 274 275 /* 276 * Initialize an existing vm_map structure 277 * such as that in the vmspace structure. 278 * The pmap is set elsewhere. 279 */ 280 void 281 vm_map_init(map, min, max, pageable) 282 register struct vm_map *map; 283 vm_offset_t min, max; 284 boolean_t pageable; 285 { 286 map->header.next = map->header.prev = &map->header; 287 map->nentries = 0; 288 map->size = 0; 289 map->ref_count = 1; 290 map->is_main_map = TRUE; 291 map->system_map = 0; 292 map->min_offset = min; 293 map->max_offset = max; 294 map->entries_pageable = pageable; 295 map->first_free = &map->header; 296 map->hint = &map->header; 297 map->timestamp = 0; 298 lockinit(&map->lock, PVM, "thrd_sleep", 0, 0); 299 simple_lock_init(&map->ref_lock); 300 } 301 302 /* 303 * vm_map_entry_dispose: [ internal use only ] 304 * 305 * Inverse of vm_map_entry_create. 306 */ 307 static void 308 vm_map_entry_dispose(map, entry) 309 vm_map_t map; 310 vm_map_entry_t entry; 311 { 312 zfree((map->system_map || !mapentzone) ? kmapentzone : mapentzone, entry); 313 } 314 315 /* 316 * vm_map_entry_create: [ internal use only ] 317 * 318 * Allocates a VM map entry for insertion. 319 * No entry fields are filled in. This routine is 320 */ 321 static vm_map_entry_t 322 vm_map_entry_create(map) 323 vm_map_t map; 324 { 325 return zalloc((map->system_map || !mapentzone) ? kmapentzone : mapentzone); 326 } 327 328 /* 329 * vm_map_entry_{un,}link: 330 * 331 * Insert/remove entries from maps. 332 */ 333 #define vm_map_entry_link(map, after_where, entry) \ 334 { \ 335 (map)->nentries++; \ 336 (entry)->prev = (after_where); \ 337 (entry)->next = (after_where)->next; \ 338 (entry)->prev->next = (entry); \ 339 (entry)->next->prev = (entry); \ 340 } 341 #define vm_map_entry_unlink(map, entry) \ 342 { \ 343 (map)->nentries--; \ 344 (entry)->next->prev = (entry)->prev; \ 345 (entry)->prev->next = (entry)->next; \ 346 } 347 348 /* 349 * vm_map_reference: 350 * 351 * Creates another valid reference to the given map. 352 * 353 */ 354 void 355 vm_map_reference(map) 356 register vm_map_t map; 357 { 358 if (map == NULL) 359 return; 360 361 map->ref_count++; 362 } 363 364 /* 365 * vm_map_deallocate: 366 * 367 * Removes a reference from the specified map, 368 * destroying it if no references remain. 369 * The map should not be locked. 370 */ 371 void 372 vm_map_deallocate(map) 373 register vm_map_t map; 374 { 375 register int c; 376 377 if (map == NULL) 378 return; 379 380 c = map->ref_count; 381 382 if (c == 0) 383 panic("vm_map_deallocate: deallocating already freed map"); 384 385 if (c != 1) { 386 --map->ref_count; 387 wakeup(&map->ref_count); 388 return; 389 } 390 /* 391 * Lock the map, to wait out all other references to it. 392 */ 393 394 vm_map_lock_drain_interlock(map); 395 (void) vm_map_delete(map, map->min_offset, map->max_offset); 396 --map->ref_count; 397 if( map->ref_count != 0) { 398 vm_map_unlock(map); 399 return; 400 } 401 402 pmap_destroy(map->pmap); 403 404 vm_map_unlock(map); 405 406 zfree(mapzone, map); 407 } 408 409 /* 410 * SAVE_HINT: 411 * 412 * Saves the specified entry as the hint for 413 * future lookups. 414 */ 415 #define SAVE_HINT(map,value) \ 416 (map)->hint = (value); 417 418 /* 419 * vm_map_lookup_entry: [ internal use only ] 420 * 421 * Finds the map entry containing (or 422 * immediately preceding) the specified address 423 * in the given map; the entry is returned 424 * in the "entry" parameter. The boolean 425 * result indicates whether the address is 426 * actually contained in the map. 427 */ 428 boolean_t 429 vm_map_lookup_entry(map, address, entry) 430 register vm_map_t map; 431 register vm_offset_t address; 432 vm_map_entry_t *entry; /* OUT */ 433 { 434 register vm_map_entry_t cur; 435 register vm_map_entry_t last; 436 437 /* 438 * Start looking either from the head of the list, or from the hint. 439 */ 440 441 cur = map->hint; 442 443 if (cur == &map->header) 444 cur = cur->next; 445 446 if (address >= cur->start) { 447 /* 448 * Go from hint to end of list. 449 * 450 * But first, make a quick check to see if we are already looking 451 * at the entry we want (which is usually the case). Note also 452 * that we don't need to save the hint here... it is the same 453 * hint (unless we are at the header, in which case the hint 454 * didn't buy us anything anyway). 455 */ 456 last = &map->header; 457 if ((cur != last) && (cur->end > address)) { 458 *entry = cur; 459 return (TRUE); 460 } 461 } else { 462 /* 463 * Go from start to hint, *inclusively* 464 */ 465 last = cur->next; 466 cur = map->header.next; 467 } 468 469 /* 470 * Search linearly 471 */ 472 473 while (cur != last) { 474 if (cur->end > address) { 475 if (address >= cur->start) { 476 /* 477 * Save this lookup for future hints, and 478 * return 479 */ 480 481 *entry = cur; 482 SAVE_HINT(map, cur); 483 return (TRUE); 484 } 485 break; 486 } 487 cur = cur->next; 488 } 489 *entry = cur->prev; 490 SAVE_HINT(map, *entry); 491 return (FALSE); 492 } 493 494 /* 495 * vm_map_insert: 496 * 497 * Inserts the given whole VM object into the target 498 * map at the specified address range. The object's 499 * size should match that of the address range. 500 * 501 * Requires that the map be locked, and leaves it so. 502 */ 503 int 504 vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 505 vm_offset_t start, vm_offset_t end, vm_prot_t prot, vm_prot_t max, 506 int cow) 507 { 508 register vm_map_entry_t new_entry; 509 register vm_map_entry_t prev_entry; 510 vm_map_entry_t temp_entry; 511 vm_object_t prev_object; 512 u_char protoeflags; 513 514 if ((object != NULL) && (cow & MAP_NOFAULT)) { 515 panic("vm_map_insert: paradoxical MAP_NOFAULT request"); 516 } 517 518 /* 519 * Check that the start and end points are not bogus. 520 */ 521 522 if ((start < map->min_offset) || (end > map->max_offset) || 523 (start >= end)) 524 return (KERN_INVALID_ADDRESS); 525 526 /* 527 * Find the entry prior to the proposed starting address; if it's part 528 * of an existing entry, this range is bogus. 529 */ 530 531 if (vm_map_lookup_entry(map, start, &temp_entry)) 532 return (KERN_NO_SPACE); 533 534 prev_entry = temp_entry; 535 536 /* 537 * Assert that the next entry doesn't overlap the end point. 538 */ 539 540 if ((prev_entry->next != &map->header) && 541 (prev_entry->next->start < end)) 542 return (KERN_NO_SPACE); 543 544 protoeflags = 0; 545 if (cow & MAP_COPY_NEEDED) 546 protoeflags |= MAP_ENTRY_NEEDS_COPY; 547 548 if (cow & MAP_COPY_ON_WRITE) 549 protoeflags |= MAP_ENTRY_COW; 550 551 if (cow & MAP_NOFAULT) 552 protoeflags |= MAP_ENTRY_NOFAULT; 553 554 /* 555 * See if we can avoid creating a new entry by extending one of our 556 * neighbors. Or at least extend the object. 557 */ 558 559 if ((object == NULL) && 560 (prev_entry != &map->header) && 561 (( prev_entry->eflags & (MAP_ENTRY_IS_A_MAP | MAP_ENTRY_IS_SUB_MAP)) == 0) && 562 (prev_entry->end == start) && 563 (prev_entry->wired_count == 0)) { 564 565 566 if ((protoeflags == prev_entry->eflags) && 567 ((cow & MAP_NOFAULT) || 568 vm_object_coalesce(prev_entry->object.vm_object, 569 OFF_TO_IDX(prev_entry->offset), 570 (vm_size_t) (prev_entry->end - prev_entry->start), 571 (vm_size_t) (end - prev_entry->end)))) { 572 573 /* 574 * Coalesced the two objects. Can we extend the 575 * previous map entry to include the new range? 576 */ 577 if ((prev_entry->inheritance == VM_INHERIT_DEFAULT) && 578 (prev_entry->protection == prot) && 579 (prev_entry->max_protection == max)) { 580 581 map->size += (end - prev_entry->end); 582 prev_entry->end = end; 583 if ((cow & MAP_NOFAULT) == 0) { 584 prev_object = prev_entry->object.vm_object; 585 default_pager_convert_to_swapq(prev_object); 586 } 587 return (KERN_SUCCESS); 588 } 589 else { 590 object = prev_entry->object.vm_object; 591 offset = prev_entry->offset + (prev_entry->end - 592 prev_entry->start); 593 594 vm_object_reference(object); 595 } 596 } 597 } 598 599 /* 600 * Create a new entry 601 */ 602 603 new_entry = vm_map_entry_create(map); 604 new_entry->start = start; 605 new_entry->end = end; 606 607 new_entry->eflags = protoeflags; 608 new_entry->object.vm_object = object; 609 new_entry->offset = offset; 610 611 if (map->is_main_map) { 612 new_entry->inheritance = VM_INHERIT_DEFAULT; 613 new_entry->protection = prot; 614 new_entry->max_protection = max; 615 new_entry->wired_count = 0; 616 } 617 /* 618 * Insert the new entry into the list 619 */ 620 621 vm_map_entry_link(map, prev_entry, new_entry); 622 map->size += new_entry->end - new_entry->start; 623 624 /* 625 * Update the free space hint 626 */ 627 if ((map->first_free == prev_entry) && 628 (prev_entry->end >= new_entry->start)) 629 map->first_free = new_entry; 630 631 default_pager_convert_to_swapq(object); 632 return (KERN_SUCCESS); 633 } 634 635 /* 636 * Find sufficient space for `length' bytes in the given map, starting at 637 * `start'. The map must be locked. Returns 0 on success, 1 on no space. 638 */ 639 int 640 vm_map_findspace(map, start, length, addr) 641 register vm_map_t map; 642 register vm_offset_t start; 643 vm_size_t length; 644 vm_offset_t *addr; 645 { 646 register vm_map_entry_t entry, next; 647 register vm_offset_t end; 648 649 if (start < map->min_offset) 650 start = map->min_offset; 651 if (start > map->max_offset) 652 return (1); 653 654 /* 655 * Look for the first possible address; if there's already something 656 * at this address, we have to start after it. 657 */ 658 if (start == map->min_offset) { 659 if ((entry = map->first_free) != &map->header) 660 start = entry->end; 661 } else { 662 vm_map_entry_t tmp; 663 664 if (vm_map_lookup_entry(map, start, &tmp)) 665 start = tmp->end; 666 entry = tmp; 667 } 668 669 /* 670 * Look through the rest of the map, trying to fit a new region in the 671 * gap between existing regions, or after the very last region. 672 */ 673 for (;; start = (entry = next)->end) { 674 /* 675 * Find the end of the proposed new region. Be sure we didn't 676 * go beyond the end of the map, or wrap around the address; 677 * if so, we lose. Otherwise, if this is the last entry, or 678 * if the proposed new region fits before the next entry, we 679 * win. 680 */ 681 end = start + length; 682 if (end > map->max_offset || end < start) 683 return (1); 684 next = entry->next; 685 if (next == &map->header || next->start >= end) 686 break; 687 } 688 SAVE_HINT(map, entry); 689 *addr = start; 690 if (map == kernel_map) { 691 vm_offset_t ksize; 692 if ((ksize = round_page(start + length)) > kernel_vm_end) { 693 pmap_growkernel(ksize); 694 } 695 } 696 return (0); 697 } 698 699 /* 700 * vm_map_find finds an unallocated region in the target address 701 * map with the given length. The search is defined to be 702 * first-fit from the specified address; the region found is 703 * returned in the same parameter. 704 * 705 */ 706 int 707 vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, 708 vm_offset_t *addr, /* IN/OUT */ 709 vm_size_t length, boolean_t find_space, vm_prot_t prot, 710 vm_prot_t max, int cow) 711 { 712 register vm_offset_t start; 713 int result, s = 0; 714 715 start = *addr; 716 717 if (map == kmem_map || map == mb_map) 718 s = splvm(); 719 720 vm_map_lock(map); 721 if (find_space) { 722 if (vm_map_findspace(map, start, length, addr)) { 723 vm_map_unlock(map); 724 if (map == kmem_map || map == mb_map) 725 splx(s); 726 return (KERN_NO_SPACE); 727 } 728 start = *addr; 729 } 730 result = vm_map_insert(map, object, offset, 731 start, start + length, prot, max, cow); 732 vm_map_unlock(map); 733 734 if (map == kmem_map || map == mb_map) 735 splx(s); 736 737 return (result); 738 } 739 740 /* 741 * vm_map_simplify_entry: 742 * 743 * Simplify the given map entry by merging with either neighbor. 744 */ 745 void 746 vm_map_simplify_entry(map, entry) 747 vm_map_t map; 748 vm_map_entry_t entry; 749 { 750 vm_map_entry_t next, prev; 751 vm_size_t prevsize, esize; 752 753 if (entry->eflags & (MAP_ENTRY_IS_SUB_MAP|MAP_ENTRY_IS_A_MAP)) 754 return; 755 756 prev = entry->prev; 757 if (prev != &map->header) { 758 prevsize = prev->end - prev->start; 759 if ( (prev->end == entry->start) && 760 (prev->object.vm_object == entry->object.vm_object) && 761 (!prev->object.vm_object || (prev->object.vm_object->behavior == entry->object.vm_object->behavior)) && 762 (!prev->object.vm_object || 763 (prev->offset + prevsize == entry->offset)) && 764 (prev->eflags == entry->eflags) && 765 (prev->protection == entry->protection) && 766 (prev->max_protection == entry->max_protection) && 767 (prev->inheritance == entry->inheritance) && 768 (prev->wired_count == entry->wired_count)) { 769 if (map->first_free == prev) 770 map->first_free = entry; 771 if (map->hint == prev) 772 map->hint = entry; 773 vm_map_entry_unlink(map, prev); 774 entry->start = prev->start; 775 entry->offset = prev->offset; 776 if (prev->object.vm_object) 777 vm_object_deallocate(prev->object.vm_object); 778 vm_map_entry_dispose(map, prev); 779 } 780 } 781 782 next = entry->next; 783 if (next != &map->header) { 784 esize = entry->end - entry->start; 785 if ((entry->end == next->start) && 786 (next->object.vm_object == entry->object.vm_object) && 787 (!next->object.vm_object || (next->object.vm_object->behavior == entry->object.vm_object->behavior)) && 788 (!entry->object.vm_object || 789 (entry->offset + esize == next->offset)) && 790 (next->eflags == entry->eflags) && 791 (next->protection == entry->protection) && 792 (next->max_protection == entry->max_protection) && 793 (next->inheritance == entry->inheritance) && 794 (next->wired_count == entry->wired_count)) { 795 if (map->first_free == next) 796 map->first_free = entry; 797 if (map->hint == next) 798 map->hint = entry; 799 vm_map_entry_unlink(map, next); 800 entry->end = next->end; 801 if (next->object.vm_object) 802 vm_object_deallocate(next->object.vm_object); 803 vm_map_entry_dispose(map, next); 804 } 805 } 806 } 807 /* 808 * vm_map_clip_start: [ internal use only ] 809 * 810 * Asserts that the given entry begins at or after 811 * the specified address; if necessary, 812 * it splits the entry into two. 813 */ 814 #define vm_map_clip_start(map, entry, startaddr) \ 815 { \ 816 if (startaddr > entry->start) \ 817 _vm_map_clip_start(map, entry, startaddr); \ 818 } 819 820 /* 821 * This routine is called only when it is known that 822 * the entry must be split. 823 */ 824 static void 825 _vm_map_clip_start(map, entry, start) 826 register vm_map_t map; 827 register vm_map_entry_t entry; 828 register vm_offset_t start; 829 { 830 register vm_map_entry_t new_entry; 831 832 /* 833 * Split off the front portion -- note that we must insert the new 834 * entry BEFORE this one, so that this entry has the specified 835 * starting address. 836 */ 837 838 vm_map_simplify_entry(map, entry); 839 840 /* 841 * If there is no object backing this entry, we might as well create 842 * one now. If we defer it, an object can get created after the map 843 * is clipped, and individual objects will be created for the split-up 844 * map. This is a bit of a hack, but is also about the best place to 845 * put this improvement. 846 */ 847 848 if (entry->object.vm_object == NULL) { 849 vm_object_t object; 850 851 object = vm_object_allocate(OBJT_DEFAULT, 852 OFF_TO_IDX(entry->end - entry->start)); 853 entry->object.vm_object = object; 854 entry->offset = 0; 855 } 856 857 new_entry = vm_map_entry_create(map); 858 *new_entry = *entry; 859 860 new_entry->end = start; 861 entry->offset += (start - entry->start); 862 entry->start = start; 863 864 vm_map_entry_link(map, entry->prev, new_entry); 865 866 if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) 867 vm_map_reference(new_entry->object.share_map); 868 else 869 vm_object_reference(new_entry->object.vm_object); 870 } 871 872 /* 873 * vm_map_clip_end: [ internal use only ] 874 * 875 * Asserts that the given entry ends at or before 876 * the specified address; if necessary, 877 * it splits the entry into two. 878 */ 879 880 #define vm_map_clip_end(map, entry, endaddr) \ 881 { \ 882 if (endaddr < entry->end) \ 883 _vm_map_clip_end(map, entry, endaddr); \ 884 } 885 886 /* 887 * This routine is called only when it is known that 888 * the entry must be split. 889 */ 890 static void 891 _vm_map_clip_end(map, entry, end) 892 register vm_map_t map; 893 register vm_map_entry_t entry; 894 register vm_offset_t end; 895 { 896 register vm_map_entry_t new_entry; 897 898 /* 899 * If there is no object backing this entry, we might as well create 900 * one now. If we defer it, an object can get created after the map 901 * is clipped, and individual objects will be created for the split-up 902 * map. This is a bit of a hack, but is also about the best place to 903 * put this improvement. 904 */ 905 906 if (entry->object.vm_object == NULL) { 907 vm_object_t object; 908 909 object = vm_object_allocate(OBJT_DEFAULT, 910 OFF_TO_IDX(entry->end - entry->start)); 911 entry->object.vm_object = object; 912 entry->offset = 0; 913 } 914 915 /* 916 * Create a new entry and insert it AFTER the specified entry 917 */ 918 919 new_entry = vm_map_entry_create(map); 920 *new_entry = *entry; 921 922 new_entry->start = entry->end = end; 923 new_entry->offset += (end - entry->start); 924 925 vm_map_entry_link(map, entry, new_entry); 926 927 if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) 928 vm_map_reference(new_entry->object.share_map); 929 else 930 vm_object_reference(new_entry->object.vm_object); 931 } 932 933 /* 934 * VM_MAP_RANGE_CHECK: [ internal use only ] 935 * 936 * Asserts that the starting and ending region 937 * addresses fall within the valid range of the map. 938 */ 939 #define VM_MAP_RANGE_CHECK(map, start, end) \ 940 { \ 941 if (start < vm_map_min(map)) \ 942 start = vm_map_min(map); \ 943 if (end > vm_map_max(map)) \ 944 end = vm_map_max(map); \ 945 if (start > end) \ 946 start = end; \ 947 } 948 949 /* 950 * vm_map_submap: [ kernel use only ] 951 * 952 * Mark the given range as handled by a subordinate map. 953 * 954 * This range must have been created with vm_map_find, 955 * and no other operations may have been performed on this 956 * range prior to calling vm_map_submap. 957 * 958 * Only a limited number of operations can be performed 959 * within this rage after calling vm_map_submap: 960 * vm_fault 961 * [Don't try vm_map_copy!] 962 * 963 * To remove a submapping, one must first remove the 964 * range from the superior map, and then destroy the 965 * submap (if desired). [Better yet, don't try it.] 966 */ 967 int 968 vm_map_submap(map, start, end, submap) 969 register vm_map_t map; 970 register vm_offset_t start; 971 register vm_offset_t end; 972 vm_map_t submap; 973 { 974 vm_map_entry_t entry; 975 register int result = KERN_INVALID_ARGUMENT; 976 977 vm_map_lock(map); 978 979 VM_MAP_RANGE_CHECK(map, start, end); 980 981 if (vm_map_lookup_entry(map, start, &entry)) { 982 vm_map_clip_start(map, entry, start); 983 } else 984 entry = entry->next; 985 986 vm_map_clip_end(map, entry, end); 987 988 if ((entry->start == start) && (entry->end == end) && 989 ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_COW)) == 0) && 990 (entry->object.vm_object == NULL)) { 991 entry->eflags |= MAP_ENTRY_IS_SUB_MAP; 992 vm_map_reference(entry->object.sub_map = submap); 993 result = KERN_SUCCESS; 994 } 995 vm_map_unlock(map); 996 997 return (result); 998 } 999 1000 /* 1001 * vm_map_protect: 1002 * 1003 * Sets the protection of the specified address 1004 * region in the target map. If "set_max" is 1005 * specified, the maximum protection is to be set; 1006 * otherwise, only the current protection is affected. 1007 */ 1008 int 1009 vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end, 1010 vm_prot_t new_prot, boolean_t set_max) 1011 { 1012 register vm_map_entry_t current; 1013 vm_map_entry_t entry; 1014 1015 vm_map_lock(map); 1016 1017 VM_MAP_RANGE_CHECK(map, start, end); 1018 1019 if (vm_map_lookup_entry(map, start, &entry)) { 1020 vm_map_clip_start(map, entry, start); 1021 } else { 1022 entry = entry->next; 1023 } 1024 1025 /* 1026 * Make a first pass to check for protection violations. 1027 */ 1028 1029 current = entry; 1030 while ((current != &map->header) && (current->start < end)) { 1031 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1032 vm_map_unlock(map); 1033 return (KERN_INVALID_ARGUMENT); 1034 } 1035 if ((new_prot & current->max_protection) != new_prot) { 1036 vm_map_unlock(map); 1037 return (KERN_PROTECTION_FAILURE); 1038 } 1039 current = current->next; 1040 } 1041 1042 /* 1043 * Go back and fix up protections. [Note that clipping is not 1044 * necessary the second time.] 1045 */ 1046 1047 current = entry; 1048 1049 while ((current != &map->header) && (current->start < end)) { 1050 vm_prot_t old_prot; 1051 1052 vm_map_clip_end(map, current, end); 1053 1054 old_prot = current->protection; 1055 if (set_max) 1056 current->protection = 1057 (current->max_protection = new_prot) & 1058 old_prot; 1059 else 1060 current->protection = new_prot; 1061 1062 /* 1063 * Update physical map if necessary. Worry about copy-on-write 1064 * here -- CHECK THIS XXX 1065 */ 1066 1067 if (current->protection != old_prot) { 1068 #define MASK(entry) (((entry)->eflags & MAP_ENTRY_COW) ? ~VM_PROT_WRITE : \ 1069 VM_PROT_ALL) 1070 #define max(a,b) ((a) > (b) ? (a) : (b)) 1071 1072 if (current->eflags & MAP_ENTRY_IS_A_MAP) { 1073 vm_map_entry_t share_entry; 1074 vm_offset_t share_end; 1075 1076 vm_map_lock(current->object.share_map); 1077 (void) vm_map_lookup_entry( 1078 current->object.share_map, 1079 current->offset, 1080 &share_entry); 1081 share_end = current->offset + 1082 (current->end - current->start); 1083 while ((share_entry != 1084 ¤t->object.share_map->header) && 1085 (share_entry->start < share_end)) { 1086 1087 pmap_protect(map->pmap, 1088 (max(share_entry->start, 1089 current->offset) - 1090 current->offset + 1091 current->start), 1092 min(share_entry->end, 1093 share_end) - 1094 current->offset + 1095 current->start, 1096 current->protection & 1097 MASK(share_entry)); 1098 1099 share_entry = share_entry->next; 1100 } 1101 vm_map_unlock(current->object.share_map); 1102 } else 1103 pmap_protect(map->pmap, current->start, 1104 current->end, 1105 current->protection & MASK(entry)); 1106 #undef max 1107 #undef MASK 1108 } 1109 1110 vm_map_simplify_entry(map, current); 1111 1112 current = current->next; 1113 } 1114 1115 vm_map_unlock(map); 1116 return (KERN_SUCCESS); 1117 } 1118 1119 /* 1120 * vm_map_madvise: 1121 * 1122 * This routine traverses a processes map handling the madvise 1123 * system call. 1124 */ 1125 void 1126 vm_map_madvise(map, pmap, start, end, advise) 1127 vm_map_t map; 1128 pmap_t pmap; 1129 vm_offset_t start, end; 1130 int advise; 1131 { 1132 register vm_map_entry_t current; 1133 vm_map_entry_t entry; 1134 1135 vm_map_lock(map); 1136 1137 VM_MAP_RANGE_CHECK(map, start, end); 1138 1139 if (vm_map_lookup_entry(map, start, &entry)) { 1140 vm_map_clip_start(map, entry, start); 1141 } else 1142 entry = entry->next; 1143 1144 for(current = entry; 1145 (current != &map->header) && (current->start < end); 1146 current = current->next) { 1147 vm_size_t size = current->end - current->start; 1148 1149 if (current->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) { 1150 continue; 1151 } 1152 1153 /* 1154 * Create an object if needed 1155 */ 1156 if (current->object.vm_object == NULL) { 1157 vm_object_t object; 1158 object = vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(size)); 1159 current->object.vm_object = object; 1160 current->offset = 0; 1161 } 1162 1163 vm_map_clip_end(map, current, end); 1164 switch (advise) { 1165 case MADV_NORMAL: 1166 current->object.vm_object->behavior = OBJ_NORMAL; 1167 break; 1168 case MADV_SEQUENTIAL: 1169 current->object.vm_object->behavior = OBJ_SEQUENTIAL; 1170 break; 1171 case MADV_RANDOM: 1172 current->object.vm_object->behavior = OBJ_RANDOM; 1173 break; 1174 /* 1175 * Right now, we could handle DONTNEED and WILLNEED with common code. 1176 * They are mostly the same, except for the potential async reads (NYI). 1177 */ 1178 case MADV_FREE: 1179 case MADV_DONTNEED: 1180 { 1181 vm_pindex_t pindex; 1182 int count; 1183 size = current->end - current->start; 1184 pindex = OFF_TO_IDX(entry->offset); 1185 count = OFF_TO_IDX(size); 1186 /* 1187 * MADV_DONTNEED removes the page from all 1188 * pmaps, so pmap_remove is not necessary. 1189 */ 1190 vm_object_madvise(current->object.vm_object, 1191 pindex, count, advise); 1192 } 1193 break; 1194 1195 case MADV_WILLNEED: 1196 { 1197 vm_pindex_t pindex; 1198 int count; 1199 size = current->end - current->start; 1200 pindex = OFF_TO_IDX(current->offset); 1201 count = OFF_TO_IDX(size); 1202 vm_object_madvise(current->object.vm_object, 1203 pindex, count, advise); 1204 pmap_object_init_pt(pmap, current->start, 1205 current->object.vm_object, pindex, 1206 (count << PAGE_SHIFT), 0); 1207 } 1208 break; 1209 1210 default: 1211 break; 1212 } 1213 } 1214 1215 vm_map_simplify_entry(map, entry); 1216 vm_map_unlock(map); 1217 return; 1218 } 1219 1220 1221 /* 1222 * vm_map_inherit: 1223 * 1224 * Sets the inheritance of the specified address 1225 * range in the target map. Inheritance 1226 * affects how the map will be shared with 1227 * child maps at the time of vm_map_fork. 1228 */ 1229 int 1230 vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, 1231 vm_inherit_t new_inheritance) 1232 { 1233 register vm_map_entry_t entry; 1234 vm_map_entry_t temp_entry; 1235 1236 switch (new_inheritance) { 1237 case VM_INHERIT_NONE: 1238 case VM_INHERIT_COPY: 1239 case VM_INHERIT_SHARE: 1240 break; 1241 default: 1242 return (KERN_INVALID_ARGUMENT); 1243 } 1244 1245 vm_map_lock(map); 1246 1247 VM_MAP_RANGE_CHECK(map, start, end); 1248 1249 if (vm_map_lookup_entry(map, start, &temp_entry)) { 1250 entry = temp_entry; 1251 vm_map_clip_start(map, entry, start); 1252 } else 1253 entry = temp_entry->next; 1254 1255 while ((entry != &map->header) && (entry->start < end)) { 1256 vm_map_clip_end(map, entry, end); 1257 1258 entry->inheritance = new_inheritance; 1259 1260 entry = entry->next; 1261 } 1262 1263 vm_map_simplify_entry(map, temp_entry); 1264 vm_map_unlock(map); 1265 return (KERN_SUCCESS); 1266 } 1267 1268 /* 1269 * Implement the semantics of mlock 1270 */ 1271 int 1272 vm_map_user_pageable(map, start, end, new_pageable) 1273 register vm_map_t map; 1274 register vm_offset_t start; 1275 register vm_offset_t end; 1276 register boolean_t new_pageable; 1277 { 1278 register vm_map_entry_t entry; 1279 vm_map_entry_t start_entry; 1280 register vm_offset_t failed = 0; 1281 int rv; 1282 1283 vm_map_lock(map); 1284 VM_MAP_RANGE_CHECK(map, start, end); 1285 1286 if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) { 1287 vm_map_unlock(map); 1288 return (KERN_INVALID_ADDRESS); 1289 } 1290 1291 if (new_pageable) { 1292 1293 entry = start_entry; 1294 vm_map_clip_start(map, entry, start); 1295 1296 /* 1297 * Now decrement the wiring count for each region. If a region 1298 * becomes completely unwired, unwire its physical pages and 1299 * mappings. 1300 */ 1301 vm_map_set_recursive(map); 1302 1303 entry = start_entry; 1304 while ((entry != &map->header) && (entry->start < end)) { 1305 if (entry->eflags & MAP_ENTRY_USER_WIRED) { 1306 vm_map_clip_end(map, entry, end); 1307 entry->eflags &= ~MAP_ENTRY_USER_WIRED; 1308 entry->wired_count--; 1309 if (entry->wired_count == 0) 1310 vm_fault_unwire(map, entry->start, entry->end); 1311 } 1312 entry = entry->next; 1313 } 1314 vm_map_simplify_entry(map, start_entry); 1315 vm_map_clear_recursive(map); 1316 } else { 1317 1318 /* 1319 * Because of the possiblity of blocking, etc. We restart 1320 * through the process's map entries from beginning so that 1321 * we don't end up depending on a map entry that could have 1322 * changed. 1323 */ 1324 rescan: 1325 1326 entry = start_entry; 1327 1328 while ((entry != &map->header) && (entry->start < end)) { 1329 1330 if (entry->eflags & MAP_ENTRY_USER_WIRED) { 1331 entry = entry->next; 1332 continue; 1333 } 1334 1335 if (entry->wired_count != 0) { 1336 entry->wired_count++; 1337 entry->eflags |= MAP_ENTRY_USER_WIRED; 1338 entry = entry->next; 1339 continue; 1340 } 1341 1342 /* Here on entry being newly wired */ 1343 1344 if ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) { 1345 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY; 1346 if (copyflag && ((entry->protection & VM_PROT_WRITE) != 0)) { 1347 1348 vm_object_shadow(&entry->object.vm_object, 1349 &entry->offset, 1350 OFF_TO_IDX(entry->end 1351 - entry->start)); 1352 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 1353 1354 } else if (entry->object.vm_object == NULL) { 1355 1356 entry->object.vm_object = 1357 vm_object_allocate(OBJT_DEFAULT, 1358 OFF_TO_IDX(entry->end - entry->start)); 1359 entry->offset = (vm_offset_t) 0; 1360 1361 } 1362 default_pager_convert_to_swapq(entry->object.vm_object); 1363 } 1364 1365 vm_map_clip_start(map, entry, start); 1366 vm_map_clip_end(map, entry, end); 1367 1368 entry->wired_count++; 1369 entry->eflags |= MAP_ENTRY_USER_WIRED; 1370 1371 /* First we need to allow map modifications */ 1372 vm_map_set_recursive(map); 1373 vm_map_lock_downgrade(map); 1374 1375 rv = vm_fault_user_wire(map, entry->start, entry->end); 1376 if (rv) { 1377 1378 entry->wired_count--; 1379 entry->eflags &= ~MAP_ENTRY_USER_WIRED; 1380 1381 vm_map_clear_recursive(map); 1382 vm_map_unlock(map); 1383 1384 (void) vm_map_user_pageable(map, start, entry->start, TRUE); 1385 return rv; 1386 } 1387 1388 vm_map_clear_recursive(map); 1389 vm_map_lock_upgrade(map); 1390 1391 goto rescan; 1392 } 1393 } 1394 vm_map_unlock(map); 1395 return KERN_SUCCESS; 1396 } 1397 1398 /* 1399 * vm_map_pageable: 1400 * 1401 * Sets the pageability of the specified address 1402 * range in the target map. Regions specified 1403 * as not pageable require locked-down physical 1404 * memory and physical page maps. 1405 * 1406 * The map must not be locked, but a reference 1407 * must remain to the map throughout the call. 1408 */ 1409 int 1410 vm_map_pageable(map, start, end, new_pageable) 1411 register vm_map_t map; 1412 register vm_offset_t start; 1413 register vm_offset_t end; 1414 register boolean_t new_pageable; 1415 { 1416 register vm_map_entry_t entry; 1417 vm_map_entry_t start_entry; 1418 register vm_offset_t failed = 0; 1419 int rv; 1420 1421 vm_map_lock(map); 1422 1423 VM_MAP_RANGE_CHECK(map, start, end); 1424 1425 /* 1426 * Only one pageability change may take place at one time, since 1427 * vm_fault assumes it will be called only once for each 1428 * wiring/unwiring. Therefore, we have to make sure we're actually 1429 * changing the pageability for the entire region. We do so before 1430 * making any changes. 1431 */ 1432 1433 if (vm_map_lookup_entry(map, start, &start_entry) == FALSE) { 1434 vm_map_unlock(map); 1435 return (KERN_INVALID_ADDRESS); 1436 } 1437 entry = start_entry; 1438 1439 /* 1440 * Actions are rather different for wiring and unwiring, so we have 1441 * two separate cases. 1442 */ 1443 1444 if (new_pageable) { 1445 1446 vm_map_clip_start(map, entry, start); 1447 1448 /* 1449 * Unwiring. First ensure that the range to be unwired is 1450 * really wired down and that there are no holes. 1451 */ 1452 while ((entry != &map->header) && (entry->start < end)) { 1453 1454 if (entry->wired_count == 0 || 1455 (entry->end < end && 1456 (entry->next == &map->header || 1457 entry->next->start > entry->end))) { 1458 vm_map_unlock(map); 1459 return (KERN_INVALID_ARGUMENT); 1460 } 1461 entry = entry->next; 1462 } 1463 1464 /* 1465 * Now decrement the wiring count for each region. If a region 1466 * becomes completely unwired, unwire its physical pages and 1467 * mappings. 1468 */ 1469 vm_map_set_recursive(map); 1470 1471 entry = start_entry; 1472 while ((entry != &map->header) && (entry->start < end)) { 1473 vm_map_clip_end(map, entry, end); 1474 1475 entry->wired_count--; 1476 if (entry->wired_count == 0) 1477 vm_fault_unwire(map, entry->start, entry->end); 1478 1479 entry = entry->next; 1480 } 1481 vm_map_simplify_entry(map, start_entry); 1482 vm_map_clear_recursive(map); 1483 } else { 1484 /* 1485 * Wiring. We must do this in two passes: 1486 * 1487 * 1. Holding the write lock, we create any shadow or zero-fill 1488 * objects that need to be created. Then we clip each map 1489 * entry to the region to be wired and increment its wiring 1490 * count. We create objects before clipping the map entries 1491 * to avoid object proliferation. 1492 * 1493 * 2. We downgrade to a read lock, and call vm_fault_wire to 1494 * fault in the pages for any newly wired area (wired_count is 1495 * 1). 1496 * 1497 * Downgrading to a read lock for vm_fault_wire avoids a possible 1498 * deadlock with another process that may have faulted on one 1499 * of the pages to be wired (it would mark the page busy, 1500 * blocking us, then in turn block on the map lock that we 1501 * hold). Because of problems in the recursive lock package, 1502 * we cannot upgrade to a write lock in vm_map_lookup. Thus, 1503 * any actions that require the write lock must be done 1504 * beforehand. Because we keep the read lock on the map, the 1505 * copy-on-write status of the entries we modify here cannot 1506 * change. 1507 */ 1508 1509 /* 1510 * Pass 1. 1511 */ 1512 while ((entry != &map->header) && (entry->start < end)) { 1513 if (entry->wired_count == 0) { 1514 1515 /* 1516 * Perform actions of vm_map_lookup that need 1517 * the write lock on the map: create a shadow 1518 * object for a copy-on-write region, or an 1519 * object for a zero-fill region. 1520 * 1521 * We don't have to do this for entries that 1522 * point to sharing maps, because we won't 1523 * hold the lock on the sharing map. 1524 */ 1525 if ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) == 0) { 1526 int copyflag = entry->eflags & MAP_ENTRY_NEEDS_COPY; 1527 if (copyflag && 1528 ((entry->protection & VM_PROT_WRITE) != 0)) { 1529 1530 vm_object_shadow(&entry->object.vm_object, 1531 &entry->offset, 1532 OFF_TO_IDX(entry->end 1533 - entry->start)); 1534 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 1535 } else if (entry->object.vm_object == NULL) { 1536 entry->object.vm_object = 1537 vm_object_allocate(OBJT_DEFAULT, 1538 OFF_TO_IDX(entry->end - entry->start)); 1539 entry->offset = (vm_offset_t) 0; 1540 } 1541 default_pager_convert_to_swapq(entry->object.vm_object); 1542 } 1543 } 1544 vm_map_clip_start(map, entry, start); 1545 vm_map_clip_end(map, entry, end); 1546 entry->wired_count++; 1547 1548 /* 1549 * Check for holes 1550 */ 1551 if (entry->end < end && 1552 (entry->next == &map->header || 1553 entry->next->start > entry->end)) { 1554 /* 1555 * Found one. Object creation actions do not 1556 * need to be undone, but the wired counts 1557 * need to be restored. 1558 */ 1559 while (entry != &map->header && entry->end > start) { 1560 entry->wired_count--; 1561 entry = entry->prev; 1562 } 1563 vm_map_unlock(map); 1564 return (KERN_INVALID_ARGUMENT); 1565 } 1566 entry = entry->next; 1567 } 1568 1569 /* 1570 * Pass 2. 1571 */ 1572 1573 /* 1574 * HACK HACK HACK HACK 1575 * 1576 * If we are wiring in the kernel map or a submap of it, 1577 * unlock the map to avoid deadlocks. We trust that the 1578 * kernel is well-behaved, and therefore will not do 1579 * anything destructive to this region of the map while 1580 * we have it unlocked. We cannot trust user processes 1581 * to do the same. 1582 * 1583 * HACK HACK HACK HACK 1584 */ 1585 if (vm_map_pmap(map) == kernel_pmap) { 1586 vm_map_unlock(map); /* trust me ... */ 1587 } else { 1588 vm_map_set_recursive(map); 1589 vm_map_lock_downgrade(map); 1590 } 1591 1592 rv = 0; 1593 entry = start_entry; 1594 while (entry != &map->header && entry->start < end) { 1595 /* 1596 * If vm_fault_wire fails for any page we need to undo 1597 * what has been done. We decrement the wiring count 1598 * for those pages which have not yet been wired (now) 1599 * and unwire those that have (later). 1600 * 1601 * XXX this violates the locking protocol on the map, 1602 * needs to be fixed. 1603 */ 1604 if (rv) 1605 entry->wired_count--; 1606 else if (entry->wired_count == 1) { 1607 rv = vm_fault_wire(map, entry->start, entry->end); 1608 if (rv) { 1609 failed = entry->start; 1610 entry->wired_count--; 1611 } 1612 } 1613 entry = entry->next; 1614 } 1615 1616 if (vm_map_pmap(map) == kernel_pmap) { 1617 vm_map_lock(map); 1618 } else { 1619 vm_map_clear_recursive(map); 1620 } 1621 if (rv) { 1622 vm_map_unlock(map); 1623 (void) vm_map_pageable(map, start, failed, TRUE); 1624 return (rv); 1625 } 1626 vm_map_simplify_entry(map, start_entry); 1627 } 1628 1629 vm_map_unlock(map); 1630 1631 return (KERN_SUCCESS); 1632 } 1633 1634 /* 1635 * vm_map_clean 1636 * 1637 * Push any dirty cached pages in the address range to their pager. 1638 * If syncio is TRUE, dirty pages are written synchronously. 1639 * If invalidate is TRUE, any cached pages are freed as well. 1640 * 1641 * Returns an error if any part of the specified range is not mapped. 1642 */ 1643 int 1644 vm_map_clean(map, start, end, syncio, invalidate) 1645 vm_map_t map; 1646 vm_offset_t start; 1647 vm_offset_t end; 1648 boolean_t syncio; 1649 boolean_t invalidate; 1650 { 1651 register vm_map_entry_t current; 1652 vm_map_entry_t entry; 1653 vm_size_t size; 1654 vm_object_t object; 1655 vm_ooffset_t offset; 1656 1657 vm_map_lock_read(map); 1658 VM_MAP_RANGE_CHECK(map, start, end); 1659 if (!vm_map_lookup_entry(map, start, &entry)) { 1660 vm_map_unlock_read(map); 1661 return (KERN_INVALID_ADDRESS); 1662 } 1663 /* 1664 * Make a first pass to check for holes. 1665 */ 1666 for (current = entry; current->start < end; current = current->next) { 1667 if (current->eflags & MAP_ENTRY_IS_SUB_MAP) { 1668 vm_map_unlock_read(map); 1669 return (KERN_INVALID_ARGUMENT); 1670 } 1671 if (end > current->end && 1672 (current->next == &map->header || 1673 current->end != current->next->start)) { 1674 vm_map_unlock_read(map); 1675 return (KERN_INVALID_ADDRESS); 1676 } 1677 } 1678 1679 /* 1680 * Make a second pass, cleaning/uncaching pages from the indicated 1681 * objects as we go. 1682 */ 1683 for (current = entry; current->start < end; current = current->next) { 1684 offset = current->offset + (start - current->start); 1685 size = (end <= current->end ? end : current->end) - start; 1686 if (current->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) { 1687 register vm_map_t smap; 1688 vm_map_entry_t tentry; 1689 vm_size_t tsize; 1690 1691 smap = current->object.share_map; 1692 vm_map_lock_read(smap); 1693 (void) vm_map_lookup_entry(smap, offset, &tentry); 1694 tsize = tentry->end - offset; 1695 if (tsize < size) 1696 size = tsize; 1697 object = tentry->object.vm_object; 1698 offset = tentry->offset + (offset - tentry->start); 1699 vm_map_unlock_read(smap); 1700 } else { 1701 object = current->object.vm_object; 1702 } 1703 /* 1704 * Note that there is absolutely no sense in writing out 1705 * anonymous objects, so we track down the vnode object 1706 * to write out. 1707 * We invalidate (remove) all pages from the address space 1708 * anyway, for semantic correctness. 1709 */ 1710 while (object->backing_object) { 1711 object = object->backing_object; 1712 offset += object->backing_object_offset; 1713 if (object->size < OFF_TO_IDX( offset + size)) 1714 size = IDX_TO_OFF(object->size) - offset; 1715 } 1716 if (invalidate) 1717 pmap_remove(vm_map_pmap(map), current->start, 1718 current->start + size); 1719 if (object && (object->type == OBJT_VNODE)) { 1720 /* 1721 * Flush pages if writing is allowed. XXX should we continue 1722 * on an error? 1723 * 1724 * XXX Doing async I/O and then removing all the pages from 1725 * the object before it completes is probably a very bad 1726 * idea. 1727 */ 1728 if (current->protection & VM_PROT_WRITE) { 1729 vm_object_page_clean(object, 1730 OFF_TO_IDX(offset), 1731 OFF_TO_IDX(offset + size), 1732 (syncio||invalidate)?1:0, TRUE); 1733 if (invalidate) 1734 vm_object_page_remove(object, 1735 OFF_TO_IDX(offset), 1736 OFF_TO_IDX(offset + size), 1737 FALSE); 1738 } 1739 } 1740 start += size; 1741 } 1742 1743 vm_map_unlock_read(map); 1744 return (KERN_SUCCESS); 1745 } 1746 1747 /* 1748 * vm_map_entry_unwire: [ internal use only ] 1749 * 1750 * Make the region specified by this entry pageable. 1751 * 1752 * The map in question should be locked. 1753 * [This is the reason for this routine's existence.] 1754 */ 1755 static void 1756 vm_map_entry_unwire(map, entry) 1757 vm_map_t map; 1758 register vm_map_entry_t entry; 1759 { 1760 vm_fault_unwire(map, entry->start, entry->end); 1761 entry->wired_count = 0; 1762 } 1763 1764 /* 1765 * vm_map_entry_delete: [ internal use only ] 1766 * 1767 * Deallocate the given entry from the target map. 1768 */ 1769 static void 1770 vm_map_entry_delete(map, entry) 1771 register vm_map_t map; 1772 register vm_map_entry_t entry; 1773 { 1774 vm_map_entry_unlink(map, entry); 1775 map->size -= entry->end - entry->start; 1776 1777 if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) { 1778 vm_map_deallocate(entry->object.share_map); 1779 } else { 1780 vm_object_deallocate(entry->object.vm_object); 1781 } 1782 1783 vm_map_entry_dispose(map, entry); 1784 } 1785 1786 /* 1787 * vm_map_delete: [ internal use only ] 1788 * 1789 * Deallocates the given address range from the target 1790 * map. 1791 * 1792 * When called with a sharing map, removes pages from 1793 * that region from all physical maps. 1794 */ 1795 int 1796 vm_map_delete(map, start, end) 1797 register vm_map_t map; 1798 vm_offset_t start; 1799 register vm_offset_t end; 1800 { 1801 register vm_map_entry_t entry; 1802 vm_map_entry_t first_entry; 1803 1804 /* 1805 * Find the start of the region, and clip it 1806 */ 1807 1808 if (!vm_map_lookup_entry(map, start, &first_entry)) 1809 entry = first_entry->next; 1810 else { 1811 entry = first_entry; 1812 vm_map_clip_start(map, entry, start); 1813 1814 /* 1815 * Fix the lookup hint now, rather than each time though the 1816 * loop. 1817 */ 1818 1819 SAVE_HINT(map, entry->prev); 1820 } 1821 1822 /* 1823 * Save the free space hint 1824 */ 1825 1826 if (entry == &map->header) { 1827 map->first_free = &map->header; 1828 } else if (map->first_free->start >= start) 1829 map->first_free = entry->prev; 1830 1831 /* 1832 * Step through all entries in this region 1833 */ 1834 1835 while ((entry != &map->header) && (entry->start < end)) { 1836 vm_map_entry_t next; 1837 vm_offset_t s, e; 1838 vm_object_t object; 1839 vm_ooffset_t offset; 1840 1841 vm_map_clip_end(map, entry, end); 1842 1843 next = entry->next; 1844 s = entry->start; 1845 e = entry->end; 1846 offset = entry->offset; 1847 1848 /* 1849 * Unwire before removing addresses from the pmap; otherwise, 1850 * unwiring will put the entries back in the pmap. 1851 */ 1852 1853 object = entry->object.vm_object; 1854 if (entry->wired_count != 0) 1855 vm_map_entry_unwire(map, entry); 1856 1857 /* 1858 * If this is a sharing map, we must remove *all* references 1859 * to this data, since we can't find all of the physical maps 1860 * which are sharing it. 1861 */ 1862 1863 if (object == kernel_object || object == kmem_object) { 1864 vm_object_page_remove(object, OFF_TO_IDX(offset), 1865 OFF_TO_IDX(offset + (e - s)), FALSE); 1866 } else if (!map->is_main_map) { 1867 vm_object_pmap_remove(object, 1868 OFF_TO_IDX(offset), 1869 OFF_TO_IDX(offset + (e - s))); 1870 } else { 1871 pmap_remove(map->pmap, s, e); 1872 } 1873 1874 /* 1875 * Delete the entry (which may delete the object) only after 1876 * removing all pmap entries pointing to its pages. 1877 * (Otherwise, its page frames may be reallocated, and any 1878 * modify bits will be set in the wrong object!) 1879 */ 1880 1881 vm_map_entry_delete(map, entry); 1882 entry = next; 1883 } 1884 return (KERN_SUCCESS); 1885 } 1886 1887 /* 1888 * vm_map_remove: 1889 * 1890 * Remove the given address range from the target map. 1891 * This is the exported form of vm_map_delete. 1892 */ 1893 int 1894 vm_map_remove(map, start, end) 1895 register vm_map_t map; 1896 register vm_offset_t start; 1897 register vm_offset_t end; 1898 { 1899 register int result, s = 0; 1900 1901 if (map == kmem_map || map == mb_map) 1902 s = splvm(); 1903 1904 vm_map_lock(map); 1905 VM_MAP_RANGE_CHECK(map, start, end); 1906 result = vm_map_delete(map, start, end); 1907 vm_map_unlock(map); 1908 1909 if (map == kmem_map || map == mb_map) 1910 splx(s); 1911 1912 return (result); 1913 } 1914 1915 /* 1916 * vm_map_check_protection: 1917 * 1918 * Assert that the target map allows the specified 1919 * privilege on the entire address region given. 1920 * The entire region must be allocated. 1921 */ 1922 boolean_t 1923 vm_map_check_protection(vm_map_t map, vm_offset_t start, vm_offset_t end, 1924 vm_prot_t protection) 1925 { 1926 register vm_map_entry_t entry; 1927 vm_map_entry_t tmp_entry; 1928 1929 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 1930 return (FALSE); 1931 } 1932 entry = tmp_entry; 1933 1934 while (start < end) { 1935 if (entry == &map->header) { 1936 return (FALSE); 1937 } 1938 /* 1939 * No holes allowed! 1940 */ 1941 1942 if (start < entry->start) { 1943 return (FALSE); 1944 } 1945 /* 1946 * Check protection associated with entry. 1947 */ 1948 1949 if ((entry->protection & protection) != protection) { 1950 return (FALSE); 1951 } 1952 /* go to next entry */ 1953 1954 start = entry->end; 1955 entry = entry->next; 1956 } 1957 return (TRUE); 1958 } 1959 1960 /* 1961 * vm_map_copy_entry: 1962 * 1963 * Copies the contents of the source entry to the destination 1964 * entry. The entries *must* be aligned properly. 1965 */ 1966 static void 1967 vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) 1968 vm_map_t src_map, dst_map; 1969 register vm_map_entry_t src_entry, dst_entry; 1970 { 1971 if ((dst_entry->eflags|src_entry->eflags) & 1972 (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) 1973 return; 1974 1975 if (src_entry->wired_count == 0) { 1976 1977 /* 1978 * If the source entry is marked needs_copy, it is already 1979 * write-protected. 1980 */ 1981 if ((src_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) { 1982 1983 boolean_t su; 1984 1985 /* 1986 * If the source entry has only one mapping, we can 1987 * just protect the virtual address range. 1988 */ 1989 if (!(su = src_map->is_main_map)) { 1990 su = (src_map->ref_count == 1); 1991 } 1992 if (su) { 1993 pmap_protect(src_map->pmap, 1994 src_entry->start, 1995 src_entry->end, 1996 src_entry->protection & ~VM_PROT_WRITE); 1997 } else { 1998 vm_object_pmap_copy(src_entry->object.vm_object, 1999 OFF_TO_IDX(src_entry->offset), 2000 OFF_TO_IDX(src_entry->offset + (src_entry->end 2001 - src_entry->start))); 2002 } 2003 } 2004 2005 /* 2006 * Make a copy of the object. 2007 */ 2008 if (src_entry->object.vm_object) { 2009 if ((src_entry->object.vm_object->handle == NULL) && 2010 (src_entry->object.vm_object->type == OBJT_DEFAULT || 2011 src_entry->object.vm_object->type == OBJT_SWAP)) 2012 vm_object_collapse(src_entry->object.vm_object); 2013 ++src_entry->object.vm_object->ref_count; 2014 src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); 2015 dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); 2016 dst_entry->object.vm_object = 2017 src_entry->object.vm_object; 2018 dst_entry->offset = src_entry->offset; 2019 } else { 2020 dst_entry->object.vm_object = NULL; 2021 dst_entry->offset = 0; 2022 } 2023 2024 pmap_copy(dst_map->pmap, src_map->pmap, dst_entry->start, 2025 dst_entry->end - dst_entry->start, src_entry->start); 2026 } else { 2027 /* 2028 * Of course, wired down pages can't be set copy-on-write. 2029 * Cause wired pages to be copied into the new map by 2030 * simulating faults (the new pages are pageable) 2031 */ 2032 vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry); 2033 } 2034 } 2035 2036 /* 2037 * vmspace_fork: 2038 * Create a new process vmspace structure and vm_map 2039 * based on those of an existing process. The new map 2040 * is based on the old map, according to the inheritance 2041 * values on the regions in that map. 2042 * 2043 * The source map must not be locked. 2044 */ 2045 struct vmspace * 2046 vmspace_fork(vm1) 2047 register struct vmspace *vm1; 2048 { 2049 register struct vmspace *vm2; 2050 vm_map_t old_map = &vm1->vm_map; 2051 vm_map_t new_map; 2052 vm_map_entry_t old_entry; 2053 vm_map_entry_t new_entry; 2054 pmap_t new_pmap; 2055 vm_object_t object; 2056 2057 vm_map_lock(old_map); 2058 2059 vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset, 2060 old_map->entries_pageable); 2061 bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy, 2062 (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); 2063 new_pmap = &vm2->vm_pmap; /* XXX */ 2064 new_map = &vm2->vm_map; /* XXX */ 2065 2066 old_entry = old_map->header.next; 2067 2068 while (old_entry != &old_map->header) { 2069 if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP) 2070 panic("vm_map_fork: encountered a submap"); 2071 2072 switch (old_entry->inheritance) { 2073 case VM_INHERIT_NONE: 2074 break; 2075 2076 case VM_INHERIT_SHARE: 2077 /* 2078 * Clone the entry, creating the shared object if necessary. 2079 */ 2080 object = old_entry->object.vm_object; 2081 if (object == NULL) { 2082 object = vm_object_allocate(OBJT_DEFAULT, 2083 OFF_TO_IDX(old_entry->end - old_entry->start)); 2084 old_entry->object.vm_object = object; 2085 old_entry->offset = (vm_offset_t) 0; 2086 } else if (old_entry->eflags & MAP_ENTRY_NEEDS_COPY) { 2087 vm_object_shadow(&old_entry->object.vm_object, 2088 &old_entry->offset, 2089 OFF_TO_IDX(old_entry->end - old_entry->start)); 2090 old_entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 2091 object = old_entry->object.vm_object; 2092 } 2093 2094 /* 2095 * Clone the entry, referencing the sharing map. 2096 */ 2097 new_entry = vm_map_entry_create(new_map); 2098 *new_entry = *old_entry; 2099 new_entry->wired_count = 0; 2100 ++object->ref_count; 2101 2102 /* 2103 * Insert the entry into the new map -- we know we're 2104 * inserting at the end of the new map. 2105 */ 2106 2107 vm_map_entry_link(new_map, new_map->header.prev, 2108 new_entry); 2109 2110 /* 2111 * Update the physical map 2112 */ 2113 2114 pmap_copy(new_map->pmap, old_map->pmap, 2115 new_entry->start, 2116 (old_entry->end - old_entry->start), 2117 old_entry->start); 2118 break; 2119 2120 case VM_INHERIT_COPY: 2121 /* 2122 * Clone the entry and link into the map. 2123 */ 2124 new_entry = vm_map_entry_create(new_map); 2125 *new_entry = *old_entry; 2126 new_entry->wired_count = 0; 2127 new_entry->object.vm_object = NULL; 2128 new_entry->eflags &= ~MAP_ENTRY_IS_A_MAP; 2129 vm_map_entry_link(new_map, new_map->header.prev, 2130 new_entry); 2131 vm_map_copy_entry(old_map, new_map, old_entry, 2132 new_entry); 2133 break; 2134 } 2135 old_entry = old_entry->next; 2136 } 2137 2138 new_map->size = old_map->size; 2139 vm_map_unlock(old_map); 2140 2141 return (vm2); 2142 } 2143 2144 /* 2145 * Unshare the specified VM space for exec. If other processes are 2146 * mapped to it, then create a new one. The new vmspace is null. 2147 */ 2148 2149 void 2150 vmspace_exec(struct proc *p) { 2151 struct vmspace *oldvmspace = p->p_vmspace; 2152 struct vmspace *newvmspace; 2153 vm_map_t map = &p->p_vmspace->vm_map; 2154 2155 newvmspace = vmspace_alloc(map->min_offset, map->max_offset, 2156 map->entries_pageable); 2157 bcopy(&oldvmspace->vm_startcopy, &newvmspace->vm_startcopy, 2158 (caddr_t) (newvmspace + 1) - (caddr_t) &newvmspace->vm_startcopy); 2159 /* 2160 * This code is written like this for prototype purposes. The 2161 * goal is to avoid running down the vmspace here, but let the 2162 * other process's that are still using the vmspace to finally 2163 * run it down. Even though there is little or no chance of blocking 2164 * here, it is a good idea to keep this form for future mods. 2165 */ 2166 vm_map_reference(&oldvmspace->vm_map); 2167 vmspace_free(oldvmspace); 2168 p->p_vmspace = newvmspace; 2169 if (p == curproc) 2170 pmap_activate(p); 2171 vm_map_deallocate(&oldvmspace->vm_map); 2172 } 2173 2174 /* 2175 * Unshare the specified VM space for forcing COW. This 2176 * is called by rfork, for the (RFMEM|RFPROC) == 0 case. 2177 */ 2178 2179 void 2180 vmspace_unshare(struct proc *p) { 2181 struct vmspace *oldvmspace = p->p_vmspace; 2182 struct vmspace *newvmspace; 2183 2184 if (oldvmspace->vm_refcnt == 1) 2185 return; 2186 newvmspace = vmspace_fork(oldvmspace); 2187 vm_map_reference(&oldvmspace->vm_map); 2188 vmspace_free(oldvmspace); 2189 p->p_vmspace = newvmspace; 2190 if (p == curproc) 2191 pmap_activate(p); 2192 vm_map_deallocate(&oldvmspace->vm_map); 2193 } 2194 2195 2196 /* 2197 * vm_map_lookup: 2198 * 2199 * Finds the VM object, offset, and 2200 * protection for a given virtual address in the 2201 * specified map, assuming a page fault of the 2202 * type specified. 2203 * 2204 * Leaves the map in question locked for read; return 2205 * values are guaranteed until a vm_map_lookup_done 2206 * call is performed. Note that the map argument 2207 * is in/out; the returned map must be used in 2208 * the call to vm_map_lookup_done. 2209 * 2210 * A handle (out_entry) is returned for use in 2211 * vm_map_lookup_done, to make that fast. 2212 * 2213 * If a lookup is requested with "write protection" 2214 * specified, the map may be changed to perform virtual 2215 * copying operations, although the data referenced will 2216 * remain the same. 2217 */ 2218 int 2219 vm_map_lookup(vm_map_t *var_map, /* IN/OUT */ 2220 vm_offset_t vaddr, 2221 vm_prot_t fault_type, 2222 vm_map_entry_t *out_entry, /* OUT */ 2223 vm_object_t *object, /* OUT */ 2224 vm_pindex_t *pindex, /* OUT */ 2225 vm_prot_t *out_prot, /* OUT */ 2226 boolean_t *wired, /* OUT */ 2227 boolean_t *single_use) /* OUT */ 2228 { 2229 vm_map_t share_map; 2230 vm_offset_t share_offset; 2231 register vm_map_entry_t entry; 2232 register vm_map_t map = *var_map; 2233 register vm_prot_t prot; 2234 register boolean_t su; 2235 2236 RetryLookup:; 2237 2238 /* 2239 * Lookup the faulting address. 2240 */ 2241 2242 vm_map_lock_read(map); 2243 2244 #define RETURN(why) \ 2245 { \ 2246 vm_map_unlock_read(map); \ 2247 return(why); \ 2248 } 2249 2250 /* 2251 * If the map has an interesting hint, try it before calling full 2252 * blown lookup routine. 2253 */ 2254 2255 entry = map->hint; 2256 2257 *out_entry = entry; 2258 2259 if ((entry == &map->header) || 2260 (vaddr < entry->start) || (vaddr >= entry->end)) { 2261 vm_map_entry_t tmp_entry; 2262 2263 /* 2264 * Entry was either not a valid hint, or the vaddr was not 2265 * contained in the entry, so do a full lookup. 2266 */ 2267 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) 2268 RETURN(KERN_INVALID_ADDRESS); 2269 2270 entry = tmp_entry; 2271 *out_entry = entry; 2272 } 2273 2274 /* 2275 * Handle submaps. 2276 */ 2277 2278 if (entry->eflags & MAP_ENTRY_IS_SUB_MAP) { 2279 vm_map_t old_map = map; 2280 2281 *var_map = map = entry->object.sub_map; 2282 vm_map_unlock_read(old_map); 2283 goto RetryLookup; 2284 } 2285 2286 /* 2287 * Check whether this task is allowed to have this page. 2288 * Note the special case for MAP_ENTRY_COW 2289 * pages with an override. This is to implement a forced 2290 * COW for debuggers. 2291 */ 2292 2293 prot = entry->protection; 2294 if ((fault_type & VM_PROT_OVERRIDE_WRITE) == 0 || 2295 (entry->eflags & MAP_ENTRY_COW) == 0 || 2296 (entry->wired_count != 0)) { 2297 if ((fault_type & (prot)) != 2298 (fault_type & ~VM_PROT_OVERRIDE_WRITE)) 2299 RETURN(KERN_PROTECTION_FAILURE); 2300 } 2301 2302 /* 2303 * If this page is not pageable, we have to get it for all possible 2304 * accesses. 2305 */ 2306 2307 *wired = (entry->wired_count != 0); 2308 if (*wired) 2309 prot = fault_type = entry->protection; 2310 2311 /* 2312 * If we don't already have a VM object, track it down. 2313 */ 2314 2315 su = (entry->eflags & MAP_ENTRY_IS_A_MAP) == 0; 2316 if (su) { 2317 share_map = map; 2318 share_offset = vaddr; 2319 } else { 2320 vm_map_entry_t share_entry; 2321 2322 /* 2323 * Compute the sharing map, and offset into it. 2324 */ 2325 2326 share_map = entry->object.share_map; 2327 share_offset = (vaddr - entry->start) + entry->offset; 2328 2329 /* 2330 * Look for the backing store object and offset 2331 */ 2332 2333 vm_map_lock_read(share_map); 2334 2335 if (!vm_map_lookup_entry(share_map, share_offset, 2336 &share_entry)) { 2337 vm_map_unlock_read(share_map); 2338 RETURN(KERN_INVALID_ADDRESS); 2339 } 2340 entry = share_entry; 2341 } 2342 2343 /* 2344 * If the entry was copy-on-write, we either ... 2345 */ 2346 2347 if (entry->eflags & MAP_ENTRY_NEEDS_COPY) { 2348 /* 2349 * If we want to write the page, we may as well handle that 2350 * now since we've got the sharing map locked. 2351 * 2352 * If we don't need to write the page, we just demote the 2353 * permissions allowed. 2354 */ 2355 2356 if (fault_type & VM_PROT_WRITE) { 2357 /* 2358 * Make a new object, and place it in the object 2359 * chain. Note that no new references have appeared 2360 * -- one just moved from the share map to the new 2361 * object. 2362 */ 2363 2364 if (vm_map_lock_upgrade(share_map)) { 2365 if (share_map != map) 2366 vm_map_unlock_read(map); 2367 2368 goto RetryLookup; 2369 } 2370 vm_object_shadow( 2371 &entry->object.vm_object, 2372 &entry->offset, 2373 OFF_TO_IDX(entry->end - entry->start)); 2374 2375 entry->eflags &= ~MAP_ENTRY_NEEDS_COPY; 2376 vm_map_lock_downgrade(share_map); 2377 } else { 2378 /* 2379 * We're attempting to read a copy-on-write page -- 2380 * don't allow writes. 2381 */ 2382 2383 prot &= (~VM_PROT_WRITE); 2384 } 2385 } 2386 /* 2387 * Create an object if necessary. 2388 */ 2389 if (entry->object.vm_object == NULL) { 2390 2391 if (vm_map_lock_upgrade(share_map)) { 2392 if (share_map != map) 2393 vm_map_unlock_read(map); 2394 goto RetryLookup; 2395 } 2396 entry->object.vm_object = vm_object_allocate(OBJT_DEFAULT, 2397 OFF_TO_IDX(entry->end - entry->start)); 2398 entry->offset = 0; 2399 vm_map_lock_downgrade(share_map); 2400 } 2401 2402 if (entry->object.vm_object != NULL) 2403 default_pager_convert_to_swapq(entry->object.vm_object); 2404 /* 2405 * Return the object/offset from this entry. If the entry was 2406 * copy-on-write or empty, it has been fixed up. 2407 */ 2408 2409 *pindex = OFF_TO_IDX((share_offset - entry->start) + entry->offset); 2410 *object = entry->object.vm_object; 2411 2412 /* 2413 * Return whether this is the only map sharing this data. 2414 */ 2415 2416 if (!su) { 2417 su = (share_map->ref_count == 1); 2418 } 2419 *out_prot = prot; 2420 *single_use = su; 2421 2422 return (KERN_SUCCESS); 2423 2424 #undef RETURN 2425 } 2426 2427 /* 2428 * vm_map_lookup_done: 2429 * 2430 * Releases locks acquired by a vm_map_lookup 2431 * (according to the handle returned by that lookup). 2432 */ 2433 2434 void 2435 vm_map_lookup_done(map, entry) 2436 register vm_map_t map; 2437 vm_map_entry_t entry; 2438 { 2439 /* 2440 * If this entry references a map, unlock it first. 2441 */ 2442 2443 if (entry->eflags & MAP_ENTRY_IS_A_MAP) 2444 vm_map_unlock_read(entry->object.share_map); 2445 2446 /* 2447 * Unlock the main-level map 2448 */ 2449 2450 vm_map_unlock_read(map); 2451 } 2452 2453 #include "opt_ddb.h" 2454 #ifdef DDB 2455 #include <sys/kernel.h> 2456 2457 #include <ddb/ddb.h> 2458 2459 /* 2460 * vm_map_print: [ debug ] 2461 */ 2462 DB_SHOW_COMMAND(map, vm_map_print) 2463 { 2464 /* XXX convert args. */ 2465 register vm_map_t map = (vm_map_t)addr; 2466 boolean_t full = have_addr; 2467 2468 register vm_map_entry_t entry; 2469 2470 db_iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n", 2471 (map->is_main_map ? "Task" : "Share"), 2472 (int) map, (int) (map->pmap), map->ref_count, map->nentries, 2473 map->timestamp); 2474 2475 if (!full && db_indent) 2476 return; 2477 2478 db_indent += 2; 2479 for (entry = map->header.next; entry != &map->header; 2480 entry = entry->next) { 2481 db_iprintf("map entry 0x%x: start=0x%x, end=0x%x, ", 2482 (int) entry, (int) entry->start, (int) entry->end); 2483 if (map->is_main_map) { 2484 static char *inheritance_name[4] = 2485 {"share", "copy", "none", "donate_copy"}; 2486 2487 db_printf("prot=%x/%x/%s, ", 2488 entry->protection, 2489 entry->max_protection, 2490 inheritance_name[entry->inheritance]); 2491 if (entry->wired_count != 0) 2492 db_printf("wired, "); 2493 } 2494 if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) { 2495 db_printf("share=0x%x, offset=0x%x\n", 2496 (int) entry->object.share_map, 2497 (int) entry->offset); 2498 if ((entry->prev == &map->header) || 2499 ((entry->prev->eflags & MAP_ENTRY_IS_A_MAP) == 0) || 2500 (entry->prev->object.share_map != 2501 entry->object.share_map)) { 2502 db_indent += 2; 2503 vm_map_print((int)entry->object.share_map, 2504 full, 0, (char *)0); 2505 db_indent -= 2; 2506 } 2507 } else { 2508 db_printf("object=0x%x, offset=0x%x", 2509 (int) entry->object.vm_object, 2510 (int) entry->offset); 2511 if (entry->eflags & MAP_ENTRY_COW) 2512 db_printf(", copy (%s)", 2513 (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done"); 2514 db_printf("\n"); 2515 2516 if ((entry->prev == &map->header) || 2517 (entry->prev->eflags & MAP_ENTRY_IS_A_MAP) || 2518 (entry->prev->object.vm_object != 2519 entry->object.vm_object)) { 2520 db_indent += 2; 2521 vm_object_print((int)entry->object.vm_object, 2522 full, 0, (char *)0); 2523 db_indent -= 2; 2524 } 2525 } 2526 } 2527 db_indent -= 2; 2528 } 2529 #endif /* DDB */ 2530