1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 37 * $FreeBSD$ 38 */ 39 40 /* 41 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42 * All rights reserved. 43 * 44 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 45 * 46 * Permission to use, copy, modify and distribute this software and 47 * its documentation is hereby granted, provided that both the copyright 48 * notice and this permission notice appear in all copies of the 49 * software, derivative works or modified versions, and any portions 50 * thereof, and that both notices appear in supporting documentation. 51 * 52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 53 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 55 * 56 * Carnegie Mellon requests users of this software to return to 57 * 58 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 59 * School of Computer Science 60 * Carnegie Mellon University 61 * Pittsburgh PA 15213-3890 62 * 63 * any improvements or extensions that they make and grant Carnegie the 64 * rights to redistribute these changes. 65 */ 66 67 /* 68 * Resident memory management module. 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/malloc.h> 74 #include <sys/proc.h> 75 #include <sys/vmmeter.h> 76 #include <sys/vnode.h> 77 78 #include <vm/vm.h> 79 #include <vm/vm_param.h> 80 #include <sys/lock.h> 81 #include <vm/vm_kern.h> 82 #include <vm/vm_object.h> 83 #include <vm/vm_page.h> 84 #include <vm/vm_pageout.h> 85 #include <vm/vm_pager.h> 86 #include <vm/vm_extern.h> 87 88 static void vm_page_queue_init __P((void)); 89 static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t)); 90 91 /* 92 * Associated with page of user-allocatable memory is a 93 * page structure. 94 */ 95 96 static struct vm_page **vm_page_buckets; /* Array of buckets */ 97 static int vm_page_bucket_count; /* How big is array? */ 98 static int vm_page_hash_mask; /* Mask for hash function */ 99 static volatile int vm_page_bucket_generation; 100 101 struct vpgqueues vm_page_queues[PQ_COUNT]; 102 103 static void 104 vm_page_queue_init(void) { 105 int i; 106 107 for(i=0;i<PQ_L2_SIZE;i++) { 108 vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count; 109 } 110 vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; 111 112 vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; 113 for(i=0;i<PQ_L2_SIZE;i++) { 114 vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count; 115 } 116 for(i=0;i<PQ_COUNT;i++) { 117 TAILQ_INIT(&vm_page_queues[i].pl); 118 } 119 } 120 121 vm_page_t vm_page_array = 0; 122 static int vm_page_array_size = 0; 123 long first_page = 0; 124 int vm_page_zero_count = 0; 125 126 static __inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex)); 127 static void vm_page_free_wakeup __P((void)); 128 129 /* 130 * vm_set_page_size: 131 * 132 * Sets the page size, perhaps based upon the memory 133 * size. Must be called before any use of page-size 134 * dependent functions. 135 */ 136 void 137 vm_set_page_size() 138 { 139 if (cnt.v_page_size == 0) 140 cnt.v_page_size = PAGE_SIZE; 141 if (((cnt.v_page_size - 1) & cnt.v_page_size) != 0) 142 panic("vm_set_page_size: page size not a power of two"); 143 } 144 145 /* 146 * vm_page_startup: 147 * 148 * Initializes the resident memory module. 149 * 150 * Allocates memory for the page cells, and 151 * for the object/offset-to-page hash table headers. 152 * Each page cell is initialized and placed on the free list. 153 */ 154 155 vm_offset_t 156 vm_page_startup(starta, enda, vaddr) 157 register vm_offset_t starta; 158 vm_offset_t enda; 159 register vm_offset_t vaddr; 160 { 161 register vm_offset_t mapped; 162 register vm_page_t m; 163 register struct vm_page **bucket; 164 vm_size_t npages, page_range; 165 register vm_offset_t new_start; 166 int i; 167 vm_offset_t pa; 168 int nblocks; 169 vm_offset_t first_managed_page; 170 171 /* the biggest memory array is the second group of pages */ 172 vm_offset_t start; 173 vm_offset_t biggestone, biggestsize; 174 175 vm_offset_t total; 176 177 total = 0; 178 biggestsize = 0; 179 biggestone = 0; 180 nblocks = 0; 181 vaddr = round_page(vaddr); 182 183 for (i = 0; phys_avail[i + 1]; i += 2) { 184 phys_avail[i] = round_page(phys_avail[i]); 185 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 186 } 187 188 for (i = 0; phys_avail[i + 1]; i += 2) { 189 int size = phys_avail[i + 1] - phys_avail[i]; 190 191 if (size > biggestsize) { 192 biggestone = i; 193 biggestsize = size; 194 } 195 ++nblocks; 196 total += size; 197 } 198 199 start = phys_avail[biggestone]; 200 201 /* 202 * Initialize the queue headers for the free queue, the active queue 203 * and the inactive queue. 204 */ 205 206 vm_page_queue_init(); 207 208 /* 209 * Allocate (and initialize) the hash table buckets. 210 * 211 * The number of buckets MUST BE a power of 2, and the actual value is 212 * the next power of 2 greater than the number of physical pages in 213 * the system. 214 * 215 * We make the hash table approximately 2x the number of pages to 216 * reduce the chain length. This is about the same size using the 217 * singly-linked list as the 1x hash table we were using before 218 * using TAILQ but the chain length will be smaller. 219 * 220 * Note: This computation can be tweaked if desired. 221 */ 222 vm_page_buckets = (struct vm_page **)vaddr; 223 bucket = vm_page_buckets; 224 if (vm_page_bucket_count == 0) { 225 vm_page_bucket_count = 1; 226 while (vm_page_bucket_count < atop(total)) 227 vm_page_bucket_count <<= 1; 228 } 229 vm_page_bucket_count <<= 1; 230 vm_page_hash_mask = vm_page_bucket_count - 1; 231 232 /* 233 * Validate these addresses. 234 */ 235 236 new_start = start + vm_page_bucket_count * sizeof(struct vm_page *); 237 new_start = round_page(new_start); 238 mapped = round_page(vaddr); 239 vaddr = pmap_map(mapped, start, new_start, 240 VM_PROT_READ | VM_PROT_WRITE); 241 start = new_start; 242 vaddr = round_page(vaddr); 243 bzero((caddr_t) mapped, vaddr - mapped); 244 245 for (i = 0; i < vm_page_bucket_count; i++) { 246 *bucket = NULL; 247 bucket++; 248 } 249 250 /* 251 * Compute the number of pages of memory that will be available for 252 * use (taking into account the overhead of a page structure per 253 * page). 254 */ 255 256 first_page = phys_avail[0] / PAGE_SIZE; 257 258 page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE - first_page; 259 npages = (total - (page_range * sizeof(struct vm_page)) - 260 (start - phys_avail[biggestone])) / PAGE_SIZE; 261 262 /* 263 * Initialize the mem entry structures now, and put them in the free 264 * queue. 265 */ 266 vm_page_array = (vm_page_t) vaddr; 267 mapped = vaddr; 268 269 /* 270 * Validate these addresses. 271 */ 272 new_start = round_page(start + page_range * sizeof(struct vm_page)); 273 mapped = pmap_map(mapped, start, new_start, 274 VM_PROT_READ | VM_PROT_WRITE); 275 start = new_start; 276 277 first_managed_page = start / PAGE_SIZE; 278 279 /* 280 * Clear all of the page structures 281 */ 282 bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 283 vm_page_array_size = page_range; 284 285 /* 286 * Construct the free queue(s) in descending order (by physical 287 * address) so that the first 16MB of physical memory is allocated 288 * last rather than first. On large-memory machines, this avoids 289 * the exhaustion of low physical memory before isa_dmainit has run. 290 */ 291 cnt.v_page_count = 0; 292 cnt.v_free_count = 0; 293 for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { 294 if (i == biggestone) 295 pa = ptoa(first_managed_page); 296 else 297 pa = phys_avail[i]; 298 while (pa < phys_avail[i + 1] && npages-- > 0) { 299 ++cnt.v_page_count; 300 ++cnt.v_free_count; 301 m = PHYS_TO_VM_PAGE(pa); 302 m->phys_addr = pa; 303 m->flags = 0; 304 m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK; 305 m->queue = m->pc + PQ_FREE; 306 TAILQ_INSERT_HEAD(&vm_page_queues[m->queue].pl, m, pageq); 307 vm_page_queues[m->queue].lcnt++; 308 pa += PAGE_SIZE; 309 } 310 } 311 return (mapped); 312 } 313 314 /* 315 * vm_page_hash: 316 * 317 * Distributes the object/offset key pair among hash buckets. 318 * 319 * NOTE: This macro depends on vm_page_bucket_count being a power of 2. 320 * This routine may not block. 321 * 322 * We try to randomize the hash based on the object to spread the pages 323 * out in the hash table without it costing us too much. 324 */ 325 static __inline int 326 vm_page_hash(object, pindex) 327 vm_object_t object; 328 vm_pindex_t pindex; 329 { 330 int i = ((uintptr_t)object + pindex) ^ object->hash_rand; 331 332 return(i & vm_page_hash_mask); 333 } 334 335 /* 336 * vm_page_insert: [ internal use only ] 337 * 338 * Inserts the given mem entry into the object and object list. 339 * 340 * The pagetables are not updated but will presumably fault the page 341 * in if necessary, or if a kernel page the caller will at some point 342 * enter the page into the kernel's pmap. We are not allowed to block 343 * here so we *can't* do this anyway. 344 * 345 * The object and page must be locked, and must be splhigh. 346 * This routine may not block. 347 */ 348 349 void 350 vm_page_insert(m, object, pindex) 351 register vm_page_t m; 352 register vm_object_t object; 353 register vm_pindex_t pindex; 354 { 355 register struct vm_page **bucket; 356 357 if (m->object != NULL) 358 panic("vm_page_insert: already inserted"); 359 360 /* 361 * Record the object/offset pair in this page 362 */ 363 364 m->object = object; 365 m->pindex = pindex; 366 367 /* 368 * Insert it into the object_object/offset hash table 369 */ 370 371 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 372 m->hnext = *bucket; 373 *bucket = m; 374 vm_page_bucket_generation++; 375 376 /* 377 * Now link into the object's list of backed pages. 378 */ 379 380 TAILQ_INSERT_TAIL(&object->memq, m, listq); 381 object->generation++; 382 383 /* 384 * show that the object has one more resident page. 385 */ 386 387 object->resident_page_count++; 388 389 /* 390 * Since we are inserting a new and possibly dirty page, 391 * update the object's OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY flags. 392 */ 393 if (m->flags & PG_WRITEABLE) 394 vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); 395 } 396 397 /* 398 * vm_page_remove: 399 * NOTE: used by device pager as well -wfj 400 * 401 * Removes the given mem entry from the object/offset-page 402 * table and the object page list, but do not invalidate/terminate 403 * the backing store. 404 * 405 * The object and page must be locked, and at splhigh. 406 * The underlying pmap entry (if any) is NOT removed here. 407 * This routine may not block. 408 */ 409 410 void 411 vm_page_remove(m) 412 vm_page_t m; 413 { 414 vm_object_t object; 415 416 if (m->object == NULL) 417 return; 418 419 #if !defined(MAX_PERF) 420 if ((m->flags & PG_BUSY) == 0) { 421 panic("vm_page_remove: page not busy"); 422 } 423 #endif 424 425 /* 426 * Basically destroy the page. 427 */ 428 429 vm_page_wakeup(m); 430 431 object = m->object; 432 433 /* 434 * Remove from the object_object/offset hash table. The object 435 * must be on the hash queue, we will panic if it isn't 436 * 437 * Note: we must NULL-out m->hnext to prevent loops in detached 438 * buffers with vm_page_lookup(). 439 */ 440 441 { 442 struct vm_page **bucket; 443 444 bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)]; 445 while (*bucket != m) { 446 #if !defined(MAX_PERF) 447 if (*bucket == NULL) 448 panic("vm_page_remove(): page not found in hash"); 449 #endif 450 bucket = &(*bucket)->hnext; 451 } 452 *bucket = m->hnext; 453 m->hnext = NULL; 454 vm_page_bucket_generation++; 455 } 456 457 /* 458 * Now remove from the object's list of backed pages. 459 */ 460 461 TAILQ_REMOVE(&object->memq, m, listq); 462 463 /* 464 * And show that the object has one fewer resident page. 465 */ 466 467 object->resident_page_count--; 468 object->generation++; 469 470 m->object = NULL; 471 } 472 473 /* 474 * vm_page_lookup: 475 * 476 * Returns the page associated with the object/offset 477 * pair specified; if none is found, NULL is returned. 478 * 479 * NOTE: the code below does not lock. It will operate properly if 480 * an interrupt makes a change, but the generation algorithm will not 481 * operate properly in an SMP environment where both cpu's are able to run 482 * kernel code simultaniously. 483 * 484 * The object must be locked. No side effects. 485 * This routine may not block. 486 * This is a critical path routine 487 */ 488 489 vm_page_t 490 vm_page_lookup(object, pindex) 491 register vm_object_t object; 492 register vm_pindex_t pindex; 493 { 494 register vm_page_t m; 495 register struct vm_page **bucket; 496 int generation; 497 498 /* 499 * Search the hash table for this object/offset pair 500 */ 501 502 retry: 503 generation = vm_page_bucket_generation; 504 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 505 for (m = *bucket; m != NULL; m = m->hnext) { 506 if ((m->object == object) && (m->pindex == pindex)) { 507 if (vm_page_bucket_generation != generation) 508 goto retry; 509 return (m); 510 } 511 } 512 if (vm_page_bucket_generation != generation) 513 goto retry; 514 return (NULL); 515 } 516 517 /* 518 * vm_page_rename: 519 * 520 * Move the given memory entry from its 521 * current object to the specified target object/offset. 522 * 523 * The object must be locked. 524 * This routine may not block. 525 * 526 * Note: this routine will raise itself to splvm(), the caller need not. 527 * 528 * Note: swap associated with the page must be invalidated by the move. We 529 * have to do this for several reasons: (1) we aren't freeing the 530 * page, (2) we are dirtying the page, (3) the VM system is probably 531 * moving the page from object A to B, and will then later move 532 * the backing store from A to B and we can't have a conflict. 533 * 534 * Note: we *always* dirty the page. It is necessary both for the 535 * fact that we moved it, and because we may be invalidating 536 * swap. If the page is on the cache, we have to deactivate it 537 * or vm_page_dirty() will panic. Dirty pages are not allowed 538 * on the cache. 539 */ 540 541 void 542 vm_page_rename(m, new_object, new_pindex) 543 register vm_page_t m; 544 register vm_object_t new_object; 545 vm_pindex_t new_pindex; 546 { 547 int s; 548 549 s = splvm(); 550 vm_page_remove(m); 551 vm_page_insert(m, new_object, new_pindex); 552 if (m->queue - m->pc == PQ_CACHE) 553 vm_page_deactivate(m); 554 vm_page_dirty(m); 555 splx(s); 556 } 557 558 /* 559 * vm_page_unqueue_nowakeup: 560 * 561 * vm_page_unqueue() without any wakeup 562 * 563 * This routine must be called at splhigh(). 564 * This routine may not block. 565 */ 566 567 void 568 vm_page_unqueue_nowakeup(m) 569 vm_page_t m; 570 { 571 int queue = m->queue; 572 struct vpgqueues *pq; 573 if (queue != PQ_NONE) { 574 pq = &vm_page_queues[queue]; 575 m->queue = PQ_NONE; 576 TAILQ_REMOVE(&pq->pl, m, pageq); 577 (*pq->cnt)--; 578 pq->lcnt--; 579 } 580 } 581 582 /* 583 * vm_page_unqueue: 584 * 585 * Remove a page from its queue. 586 * 587 * This routine must be called at splhigh(). 588 * This routine may not block. 589 */ 590 591 void 592 vm_page_unqueue(m) 593 vm_page_t m; 594 { 595 int queue = m->queue; 596 struct vpgqueues *pq; 597 if (queue != PQ_NONE) { 598 m->queue = PQ_NONE; 599 pq = &vm_page_queues[queue]; 600 TAILQ_REMOVE(&pq->pl, m, pageq); 601 (*pq->cnt)--; 602 pq->lcnt--; 603 if ((queue - m->pc) == PQ_CACHE) { 604 if (vm_paging_needed()) 605 pagedaemon_wakeup(); 606 } 607 } 608 } 609 610 #if PQ_L2_SIZE > 1 611 612 /* 613 * vm_page_list_find: 614 * 615 * Find a page on the specified queue with color optimization. 616 * 617 * The page coloring optimization attempts to locate a page 618 * that does not overload other nearby pages in the object in 619 * the cpu's L1 or L2 caches. We need this optmization because 620 * cpu caches tend to be physical caches, while object spaces tend 621 * to be virtual. 622 * 623 * This routine must be called at splvm(). 624 * This routine may not block. 625 * 626 * This routine may only be called from the vm_page_list_find() macro 627 * in vm_page.h 628 */ 629 vm_page_t 630 _vm_page_list_find(basequeue, index) 631 int basequeue, index; 632 { 633 int i; 634 vm_page_t m = NULL; 635 struct vpgqueues *pq; 636 637 pq = &vm_page_queues[basequeue]; 638 639 /* 640 * Note that for the first loop, index+i and index-i wind up at the 641 * same place. Even though this is not totally optimal, we've already 642 * blown it by missing the cache case so we do not care. 643 */ 644 645 for(i = PQ_L2_SIZE / 2; i > 0; --i) { 646 if ((m = TAILQ_FIRST(&pq[(index + i) & PQ_L2_MASK].pl)) != NULL) 647 break; 648 649 if ((m = TAILQ_FIRST(&pq[(index - i) & PQ_L2_MASK].pl)) != NULL) 650 break; 651 } 652 return(m); 653 } 654 655 #endif 656 657 /* 658 * vm_page_select_cache: 659 * 660 * Find a page on the cache queue with color optimization. As pages 661 * might be found, but not applicable, they are deactivated. This 662 * keeps us from using potentially busy cached pages. 663 * 664 * This routine must be called at splvm(). 665 * This routine may not block. 666 */ 667 vm_page_t 668 vm_page_select_cache(object, pindex) 669 vm_object_t object; 670 vm_pindex_t pindex; 671 { 672 vm_page_t m; 673 674 while (TRUE) { 675 m = vm_page_list_find( 676 PQ_CACHE, 677 (pindex + object->pg_color) & PQ_L2_MASK, 678 FALSE 679 ); 680 if (m && ((m->flags & PG_BUSY) || m->busy || 681 m->hold_count || m->wire_count)) { 682 vm_page_deactivate(m); 683 continue; 684 } 685 return m; 686 } 687 } 688 689 /* 690 * vm_page_select_free: 691 * 692 * Find a free or zero page, with specified preference. We attempt to 693 * inline the nominal case and fall back to _vm_page_select_free() 694 * otherwise. 695 * 696 * This routine must be called at splvm(). 697 * This routine may not block. 698 */ 699 700 static __inline vm_page_t 701 vm_page_select_free(vm_object_t object, vm_pindex_t pindex, boolean_t prefer_zero) 702 { 703 vm_page_t m; 704 705 m = vm_page_list_find( 706 PQ_FREE, 707 (pindex + object->pg_color) & PQ_L2_MASK, 708 prefer_zero 709 ); 710 return(m); 711 } 712 713 /* 714 * vm_page_alloc: 715 * 716 * Allocate and return a memory cell associated 717 * with this VM object/offset pair. 718 * 719 * page_req classes: 720 * VM_ALLOC_NORMAL normal process request 721 * VM_ALLOC_SYSTEM system *really* needs a page 722 * VM_ALLOC_INTERRUPT interrupt time request 723 * VM_ALLOC_ZERO zero page 724 * 725 * Object must be locked. 726 * This routine may not block. 727 * 728 * Additional special handling is required when called from an 729 * interrupt (VM_ALLOC_INTERRUPT). We are not allowed to mess with 730 * the page cache in this case. 731 */ 732 733 vm_page_t 734 vm_page_alloc(object, pindex, page_req) 735 vm_object_t object; 736 vm_pindex_t pindex; 737 int page_req; 738 { 739 register vm_page_t m = NULL; 740 int s; 741 742 KASSERT(!vm_page_lookup(object, pindex), 743 ("vm_page_alloc: page already allocated")); 744 745 /* 746 * The pager is allowed to eat deeper into the free page list. 747 */ 748 749 if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) { 750 page_req = VM_ALLOC_SYSTEM; 751 }; 752 753 s = splvm(); 754 755 loop: 756 if (cnt.v_free_count > cnt.v_free_reserved) { 757 /* 758 * Allocate from the free queue if there are plenty of pages 759 * in it. 760 */ 761 if (page_req == VM_ALLOC_ZERO) 762 m = vm_page_select_free(object, pindex, TRUE); 763 else 764 m = vm_page_select_free(object, pindex, FALSE); 765 } else if ( 766 (page_req == VM_ALLOC_SYSTEM && 767 cnt.v_cache_count == 0 && 768 cnt.v_free_count > cnt.v_interrupt_free_min) || 769 (page_req == VM_ALLOC_INTERRUPT && cnt.v_free_count > 0) 770 ) { 771 /* 772 * Interrupt or system, dig deeper into the free list. 773 */ 774 m = vm_page_select_free(object, pindex, FALSE); 775 } else if (page_req != VM_ALLOC_INTERRUPT) { 776 /* 777 * Allocateable from cache (non-interrupt only). On success, 778 * we must free the page and try again, thus ensuring that 779 * cnt.v_*_free_min counters are replenished. 780 */ 781 m = vm_page_select_cache(object, pindex); 782 if (m == NULL) { 783 splx(s); 784 #if defined(DIAGNOSTIC) 785 if (cnt.v_cache_count > 0) 786 printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count); 787 #endif 788 vm_pageout_deficit++; 789 pagedaemon_wakeup(); 790 return (NULL); 791 } 792 KASSERT(m->dirty == 0, ("Found dirty cache page %p", m)); 793 vm_page_busy(m); 794 vm_page_protect(m, VM_PROT_NONE); 795 vm_page_free(m); 796 goto loop; 797 } else { 798 /* 799 * Not allocateable from cache from interrupt, give up. 800 */ 801 splx(s); 802 vm_pageout_deficit++; 803 pagedaemon_wakeup(); 804 return (NULL); 805 } 806 807 /* 808 * At this point we had better have found a good page. 809 */ 810 811 KASSERT( 812 m != NULL, 813 ("vm_page_alloc(): missing page on free queue\n") 814 ); 815 816 /* 817 * Remove from free queue 818 */ 819 820 vm_page_unqueue_nowakeup(m); 821 822 /* 823 * Initialize structure. Only the PG_ZERO flag is inherited. 824 */ 825 826 if (m->flags & PG_ZERO) { 827 vm_page_zero_count--; 828 m->flags = PG_ZERO | PG_BUSY; 829 } else { 830 m->flags = PG_BUSY; 831 } 832 m->wire_count = 0; 833 m->hold_count = 0; 834 m->act_count = 0; 835 m->busy = 0; 836 m->valid = 0; 837 KASSERT(m->dirty == 0, ("vm_page_alloc: free/cache page %p was dirty", m)); 838 839 /* 840 * vm_page_insert() is safe prior to the splx(). Note also that 841 * inserting a page here does not insert it into the pmap (which 842 * could cause us to block allocating memory). We cannot block 843 * anywhere. 844 */ 845 846 vm_page_insert(m, object, pindex); 847 848 /* 849 * Don't wakeup too often - wakeup the pageout daemon when 850 * we would be nearly out of memory. 851 */ 852 if (vm_paging_needed() || cnt.v_free_count < cnt.v_pageout_free_min) 853 pagedaemon_wakeup(); 854 855 splx(s); 856 857 return (m); 858 } 859 860 /* 861 * vm_wait: (also see VM_WAIT macro) 862 * 863 * Block until free pages are available for allocation 864 */ 865 866 void 867 vm_wait() 868 { 869 int s; 870 871 s = splvm(); 872 if (curproc == pageproc) { 873 vm_pageout_pages_needed = 1; 874 tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0); 875 } else { 876 if (!vm_pages_needed) { 877 vm_pages_needed++; 878 wakeup(&vm_pages_needed); 879 } 880 tsleep(&cnt.v_free_count, PVM, "vmwait", 0); 881 } 882 splx(s); 883 } 884 885 /* 886 * vm_await: (also see VM_AWAIT macro) 887 * 888 * asleep on an event that will signal when free pages are available 889 * for allocation. 890 */ 891 892 void 893 vm_await() 894 { 895 int s; 896 897 s = splvm(); 898 if (curproc == pageproc) { 899 vm_pageout_pages_needed = 1; 900 asleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0); 901 } else { 902 if (!vm_pages_needed) { 903 vm_pages_needed++; 904 wakeup(&vm_pages_needed); 905 } 906 asleep(&cnt.v_free_count, PVM, "vmwait", 0); 907 } 908 splx(s); 909 } 910 911 #if 0 912 /* 913 * vm_page_sleep: 914 * 915 * Block until page is no longer busy. 916 */ 917 918 int 919 vm_page_sleep(vm_page_t m, char *msg, char *busy) { 920 int slept = 0; 921 if ((busy && *busy) || (m->flags & PG_BUSY)) { 922 int s; 923 s = splvm(); 924 if ((busy && *busy) || (m->flags & PG_BUSY)) { 925 vm_page_flag_set(m, PG_WANTED); 926 tsleep(m, PVM, msg, 0); 927 slept = 1; 928 } 929 splx(s); 930 } 931 return slept; 932 } 933 934 #endif 935 936 #if 0 937 938 /* 939 * vm_page_asleep: 940 * 941 * Similar to vm_page_sleep(), but does not block. Returns 0 if 942 * the page is not busy, or 1 if the page is busy. 943 * 944 * This routine has the side effect of calling asleep() if the page 945 * was busy (1 returned). 946 */ 947 948 int 949 vm_page_asleep(vm_page_t m, char *msg, char *busy) { 950 int slept = 0; 951 if ((busy && *busy) || (m->flags & PG_BUSY)) { 952 int s; 953 s = splvm(); 954 if ((busy && *busy) || (m->flags & PG_BUSY)) { 955 vm_page_flag_set(m, PG_WANTED); 956 asleep(m, PVM, msg, 0); 957 slept = 1; 958 } 959 splx(s); 960 } 961 return slept; 962 } 963 964 #endif 965 966 /* 967 * vm_page_activate: 968 * 969 * Put the specified page on the active list (if appropriate). 970 * Ensure that act_count is at least ACT_INIT but do not otherwise 971 * mess with it. 972 * 973 * The page queues must be locked. 974 * This routine may not block. 975 */ 976 void 977 vm_page_activate(m) 978 register vm_page_t m; 979 { 980 int s; 981 982 s = splvm(); 983 if (m->queue != PQ_ACTIVE) { 984 if ((m->queue - m->pc) == PQ_CACHE) 985 cnt.v_reactivated++; 986 987 vm_page_unqueue(m); 988 989 if (m->wire_count == 0) { 990 m->queue = PQ_ACTIVE; 991 vm_page_queues[PQ_ACTIVE].lcnt++; 992 TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq); 993 if (m->act_count < ACT_INIT) 994 m->act_count = ACT_INIT; 995 cnt.v_active_count++; 996 } 997 } else { 998 if (m->act_count < ACT_INIT) 999 m->act_count = ACT_INIT; 1000 } 1001 1002 splx(s); 1003 } 1004 1005 /* 1006 * vm_page_free_wakeup: 1007 * 1008 * Helper routine for vm_page_free_toq() and vm_page_cache(). This 1009 * routine is called when a page has been added to the cache or free 1010 * queues. 1011 * 1012 * This routine may not block. 1013 * This routine must be called at splvm() 1014 */ 1015 static __inline void 1016 vm_page_free_wakeup() 1017 { 1018 /* 1019 * if pageout daemon needs pages, then tell it that there are 1020 * some free. 1021 */ 1022 if (vm_pageout_pages_needed) { 1023 wakeup(&vm_pageout_pages_needed); 1024 vm_pageout_pages_needed = 0; 1025 } 1026 /* 1027 * wakeup processes that are waiting on memory if we hit a 1028 * high water mark. And wakeup scheduler process if we have 1029 * lots of memory. this process will swapin processes. 1030 */ 1031 if (vm_pages_needed && vm_page_count_min()) { 1032 wakeup(&cnt.v_free_count); 1033 vm_pages_needed = 0; 1034 } 1035 } 1036 1037 /* 1038 * vm_page_free_toq: 1039 * 1040 * Returns the given page to the PQ_FREE list, 1041 * disassociating it with any VM object. 1042 * 1043 * Object and page must be locked prior to entry. 1044 * This routine may not block. 1045 */ 1046 1047 void 1048 vm_page_free_toq(vm_page_t m) 1049 { 1050 int s; 1051 struct vpgqueues *pq; 1052 vm_object_t object = m->object; 1053 1054 s = splvm(); 1055 1056 cnt.v_tfree++; 1057 1058 #if !defined(MAX_PERF) 1059 if (m->busy || ((m->queue - m->pc) == PQ_FREE) || 1060 (m->hold_count != 0)) { 1061 printf( 1062 "vm_page_free: pindex(%lu), busy(%d), PG_BUSY(%d), hold(%d)\n", 1063 (u_long)m->pindex, m->busy, (m->flags & PG_BUSY) ? 1 : 0, 1064 m->hold_count); 1065 if ((m->queue - m->pc) == PQ_FREE) 1066 panic("vm_page_free: freeing free page"); 1067 else 1068 panic("vm_page_free: freeing busy page"); 1069 } 1070 #endif 1071 1072 /* 1073 * unqueue, then remove page. Note that we cannot destroy 1074 * the page here because we do not want to call the pager's 1075 * callback routine until after we've put the page on the 1076 * appropriate free queue. 1077 */ 1078 1079 vm_page_unqueue_nowakeup(m); 1080 vm_page_remove(m); 1081 1082 /* 1083 * If fictitious remove object association and 1084 * return, otherwise delay object association removal. 1085 */ 1086 1087 if ((m->flags & PG_FICTITIOUS) != 0) { 1088 splx(s); 1089 return; 1090 } 1091 1092 m->valid = 0; 1093 vm_page_undirty(m); 1094 1095 if (m->wire_count != 0) { 1096 #if !defined(MAX_PERF) 1097 if (m->wire_count > 1) { 1098 panic("vm_page_free: invalid wire count (%d), pindex: 0x%lx", 1099 m->wire_count, (long)m->pindex); 1100 } 1101 #endif 1102 panic("vm_page_free: freeing wired page\n"); 1103 } 1104 1105 /* 1106 * If we've exhausted the object's resident pages we want to free 1107 * it up. 1108 */ 1109 1110 if (object && 1111 (object->type == OBJT_VNODE) && 1112 ((object->flags & OBJ_DEAD) == 0) 1113 ) { 1114 struct vnode *vp = (struct vnode *)object->handle; 1115 1116 if (vp && VSHOULDFREE(vp)) { 1117 if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) { 1118 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 1119 vp->v_flag |= VTBFREE; 1120 } 1121 } 1122 } 1123 1124 #ifdef __alpha__ 1125 pmap_page_is_free(m); 1126 #endif 1127 1128 m->queue = PQ_FREE + m->pc; 1129 pq = &vm_page_queues[m->queue]; 1130 pq->lcnt++; 1131 ++(*pq->cnt); 1132 1133 /* 1134 * Put zero'd pages on the end ( where we look for zero'd pages 1135 * first ) and non-zerod pages at the head. 1136 */ 1137 1138 if (m->flags & PG_ZERO) { 1139 TAILQ_INSERT_TAIL(&pq->pl, m, pageq); 1140 ++vm_page_zero_count; 1141 } else { 1142 TAILQ_INSERT_HEAD(&pq->pl, m, pageq); 1143 } 1144 1145 vm_page_free_wakeup(); 1146 1147 splx(s); 1148 } 1149 1150 /* 1151 * vm_page_wire: 1152 * 1153 * Mark this page as wired down by yet 1154 * another map, removing it from paging queues 1155 * as necessary. 1156 * 1157 * The page queues must be locked. 1158 * This routine may not block. 1159 */ 1160 void 1161 vm_page_wire(m) 1162 register vm_page_t m; 1163 { 1164 int s; 1165 1166 s = splvm(); 1167 if (m->wire_count == 0) { 1168 vm_page_unqueue(m); 1169 cnt.v_wire_count++; 1170 } 1171 m->wire_count++; 1172 splx(s); 1173 vm_page_flag_set(m, PG_MAPPED); 1174 } 1175 1176 /* 1177 * vm_page_unwire: 1178 * 1179 * Release one wiring of this page, potentially 1180 * enabling it to be paged again. 1181 * 1182 * Many pages placed on the inactive queue should actually go 1183 * into the cache, but it is difficult to figure out which. What 1184 * we do instead, if the inactive target is well met, is to put 1185 * clean pages at the head of the inactive queue instead of the tail. 1186 * This will cause them to be moved to the cache more quickly and 1187 * if not actively re-referenced, freed more quickly. If we just 1188 * stick these pages at the end of the inactive queue, heavy filesystem 1189 * meta-data accesses can cause an unnecessary paging load on memory bound 1190 * processes. This optimization causes one-time-use metadata to be 1191 * reused more quickly. 1192 * 1193 * A number of routines use vm_page_unwire() to guarentee that the page 1194 * will go into either the inactive or active queues, and will NEVER 1195 * be placed in the cache - for example, just after dirtying a page. 1196 * dirty pages in the cache are not allowed. 1197 * 1198 * The page queues must be locked. 1199 * This routine may not block. 1200 */ 1201 void 1202 vm_page_unwire(m, activate) 1203 register vm_page_t m; 1204 int activate; 1205 { 1206 int s; 1207 1208 s = splvm(); 1209 1210 if (m->wire_count > 0) { 1211 m->wire_count--; 1212 if (m->wire_count == 0) { 1213 cnt.v_wire_count--; 1214 if (activate) { 1215 TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq); 1216 m->queue = PQ_ACTIVE; 1217 vm_page_queues[PQ_ACTIVE].lcnt++; 1218 cnt.v_active_count++; 1219 } else { 1220 TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); 1221 m->queue = PQ_INACTIVE; 1222 vm_page_queues[PQ_INACTIVE].lcnt++; 1223 cnt.v_inactive_count++; 1224 } 1225 } 1226 } else { 1227 #if !defined(MAX_PERF) 1228 panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count); 1229 #endif 1230 } 1231 splx(s); 1232 } 1233 1234 1235 /* 1236 * Move the specified page to the inactive queue. If the page has 1237 * any associated swap, the swap is deallocated. 1238 * 1239 * Normally athead is 0 resulting in LRU operation. athead is set 1240 * to 1 if we want this page to be 'as if it were placed in the cache', 1241 * except without unmapping it from the process address space. 1242 * 1243 * This routine may not block. 1244 */ 1245 static __inline void 1246 _vm_page_deactivate(vm_page_t m, int athead) 1247 { 1248 int s; 1249 1250 /* 1251 * Ignore if already inactive. 1252 */ 1253 if (m->queue == PQ_INACTIVE) 1254 return; 1255 1256 s = splvm(); 1257 if (m->wire_count == 0) { 1258 if ((m->queue - m->pc) == PQ_CACHE) 1259 cnt.v_reactivated++; 1260 vm_page_unqueue(m); 1261 if (athead) 1262 TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); 1263 else 1264 TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); 1265 m->queue = PQ_INACTIVE; 1266 vm_page_queues[PQ_INACTIVE].lcnt++; 1267 cnt.v_inactive_count++; 1268 } 1269 splx(s); 1270 } 1271 1272 void 1273 vm_page_deactivate(vm_page_t m) 1274 { 1275 _vm_page_deactivate(m, 0); 1276 } 1277 1278 /* 1279 * vm_page_cache 1280 * 1281 * Put the specified page onto the page cache queue (if appropriate). 1282 * 1283 * This routine may not block. 1284 */ 1285 void 1286 vm_page_cache(m) 1287 register vm_page_t m; 1288 { 1289 int s; 1290 1291 #if !defined(MAX_PERF) 1292 if ((m->flags & PG_BUSY) || m->busy || m->wire_count) { 1293 printf("vm_page_cache: attempting to cache busy page\n"); 1294 return; 1295 } 1296 #endif 1297 if ((m->queue - m->pc) == PQ_CACHE) 1298 return; 1299 1300 /* 1301 * Remove all pmaps and indicate that the page is not 1302 * writeable or mapped. 1303 */ 1304 1305 vm_page_protect(m, VM_PROT_NONE); 1306 #if !defined(MAX_PERF) 1307 if (m->dirty != 0) { 1308 panic("vm_page_cache: caching a dirty page, pindex: %ld", 1309 (long)m->pindex); 1310 } 1311 #endif 1312 s = splvm(); 1313 vm_page_unqueue_nowakeup(m); 1314 m->queue = PQ_CACHE + m->pc; 1315 vm_page_queues[m->queue].lcnt++; 1316 TAILQ_INSERT_TAIL(&vm_page_queues[m->queue].pl, m, pageq); 1317 cnt.v_cache_count++; 1318 vm_page_free_wakeup(); 1319 splx(s); 1320 } 1321 1322 /* 1323 * vm_page_dontneed 1324 * 1325 * Cache, deactivate, or do nothing as appropriate. This routine 1326 * is typically used by madvise() MADV_DONTNEED. 1327 * 1328 * Generally speaking we want to move the page into the cache so 1329 * it gets reused quickly. However, this can result in a silly syndrome 1330 * due to the page recycling too quickly. Small objects will not be 1331 * fully cached. On the otherhand, if we move the page to the inactive 1332 * queue we wind up with a problem whereby very large objects 1333 * unnecessarily blow away our inactive and cache queues. 1334 * 1335 * The solution is to move the pages based on a fixed weighting. We 1336 * either leave them alone, deactivate them, or move them to the cache, 1337 * where moving them to the cache has the highest weighting. 1338 * By forcing some pages into other queues we eventually force the 1339 * system to balance the queues, potentially recovering other unrelated 1340 * space from active. The idea is to not force this to happen too 1341 * often. 1342 */ 1343 1344 void 1345 vm_page_dontneed(m) 1346 vm_page_t m; 1347 { 1348 static int dnweight; 1349 int dnw; 1350 int head; 1351 1352 dnw = ++dnweight; 1353 1354 /* 1355 * occassionally leave the page alone 1356 */ 1357 1358 if ((dnw & 0x01F0) == 0 || 1359 m->queue == PQ_INACTIVE || 1360 m->queue - m->pc == PQ_CACHE 1361 ) { 1362 if (m->act_count >= ACT_INIT) 1363 --m->act_count; 1364 return; 1365 } 1366 1367 if (m->dirty == 0) 1368 vm_page_test_dirty(m); 1369 1370 if (m->dirty || (dnw & 0x0070) == 0) { 1371 /* 1372 * Deactivate the page 3 times out of 32. 1373 */ 1374 head = 0; 1375 } else { 1376 /* 1377 * Cache the page 28 times out of every 32. Note that 1378 * the page is deactivated instead of cached, but placed 1379 * at the head of the queue instead of the tail. 1380 */ 1381 head = 1; 1382 } 1383 _vm_page_deactivate(m, head); 1384 } 1385 1386 /* 1387 * Grab a page, waiting until we are waken up due to the page 1388 * changing state. We keep on waiting, if the page continues 1389 * to be in the object. If the page doesn't exist, allocate it. 1390 * 1391 * This routine may block. 1392 */ 1393 vm_page_t 1394 vm_page_grab(object, pindex, allocflags) 1395 vm_object_t object; 1396 vm_pindex_t pindex; 1397 int allocflags; 1398 { 1399 1400 vm_page_t m; 1401 int s, generation; 1402 1403 retrylookup: 1404 if ((m = vm_page_lookup(object, pindex)) != NULL) { 1405 if (m->busy || (m->flags & PG_BUSY)) { 1406 generation = object->generation; 1407 1408 s = splvm(); 1409 while ((object->generation == generation) && 1410 (m->busy || (m->flags & PG_BUSY))) { 1411 vm_page_flag_set(m, PG_WANTED | PG_REFERENCED); 1412 tsleep(m, PVM, "pgrbwt", 0); 1413 if ((allocflags & VM_ALLOC_RETRY) == 0) { 1414 splx(s); 1415 return NULL; 1416 } 1417 } 1418 splx(s); 1419 goto retrylookup; 1420 } else { 1421 vm_page_busy(m); 1422 return m; 1423 } 1424 } 1425 1426 m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY); 1427 if (m == NULL) { 1428 VM_WAIT; 1429 if ((allocflags & VM_ALLOC_RETRY) == 0) 1430 return NULL; 1431 goto retrylookup; 1432 } 1433 1434 return m; 1435 } 1436 1437 /* 1438 * Mapping function for valid bits or for dirty bits in 1439 * a page. May not block. 1440 * 1441 * Inputs are required to range within a page. 1442 */ 1443 1444 __inline int 1445 vm_page_bits(int base, int size) 1446 { 1447 int first_bit; 1448 int last_bit; 1449 1450 KASSERT( 1451 base + size <= PAGE_SIZE, 1452 ("vm_page_bits: illegal base/size %d/%d", base, size) 1453 ); 1454 1455 if (size == 0) /* handle degenerate case */ 1456 return(0); 1457 1458 first_bit = base >> DEV_BSHIFT; 1459 last_bit = (base + size - 1) >> DEV_BSHIFT; 1460 1461 return ((2 << last_bit) - (1 << first_bit)); 1462 } 1463 1464 /* 1465 * vm_page_set_validclean: 1466 * 1467 * Sets portions of a page valid and clean. The arguments are expected 1468 * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive 1469 * of any partial chunks touched by the range. The invalid portion of 1470 * such chunks will be zero'd. 1471 * 1472 * This routine may not block. 1473 * 1474 * (base + size) must be less then or equal to PAGE_SIZE. 1475 */ 1476 void 1477 vm_page_set_validclean(m, base, size) 1478 vm_page_t m; 1479 int base; 1480 int size; 1481 { 1482 int pagebits; 1483 int frag; 1484 int endoff; 1485 1486 if (size == 0) /* handle degenerate case */ 1487 return; 1488 1489 /* 1490 * If the base is not DEV_BSIZE aligned and the valid 1491 * bit is clear, we have to zero out a portion of the 1492 * first block. 1493 */ 1494 1495 if ((frag = base & ~(DEV_BSIZE - 1)) != base && 1496 (m->valid & (1 << (base >> DEV_BSHIFT))) == 0 1497 ) { 1498 pmap_zero_page_area( 1499 VM_PAGE_TO_PHYS(m), 1500 frag, 1501 base - frag 1502 ); 1503 } 1504 1505 /* 1506 * If the ending offset is not DEV_BSIZE aligned and the 1507 * valid bit is clear, we have to zero out a portion of 1508 * the last block. 1509 */ 1510 1511 endoff = base + size; 1512 1513 if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff && 1514 (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0 1515 ) { 1516 pmap_zero_page_area( 1517 VM_PAGE_TO_PHYS(m), 1518 endoff, 1519 DEV_BSIZE - (endoff & (DEV_BSIZE - 1)) 1520 ); 1521 } 1522 1523 /* 1524 * Set valid, clear dirty bits. If validating the entire 1525 * page we can safely clear the pmap modify bit. We also 1526 * use this opportunity to clear the PG_NOSYNC flag. If a process 1527 * takes a write fault on a MAP_NOSYNC memory area the flag will 1528 * be set again. 1529 */ 1530 1531 pagebits = vm_page_bits(base, size); 1532 m->valid |= pagebits; 1533 m->dirty &= ~pagebits; 1534 if (base == 0 && size == PAGE_SIZE) { 1535 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1536 vm_page_flag_clear(m, PG_NOSYNC); 1537 } 1538 } 1539 1540 #if 0 1541 1542 void 1543 vm_page_set_dirty(m, base, size) 1544 vm_page_t m; 1545 int base; 1546 int size; 1547 { 1548 m->dirty |= vm_page_bits(base, size); 1549 } 1550 1551 #endif 1552 1553 void 1554 vm_page_clear_dirty(m, base, size) 1555 vm_page_t m; 1556 int base; 1557 int size; 1558 { 1559 m->dirty &= ~vm_page_bits(base, size); 1560 } 1561 1562 /* 1563 * vm_page_set_invalid: 1564 * 1565 * Invalidates DEV_BSIZE'd chunks within a page. Both the 1566 * valid and dirty bits for the effected areas are cleared. 1567 * 1568 * May not block. 1569 */ 1570 void 1571 vm_page_set_invalid(m, base, size) 1572 vm_page_t m; 1573 int base; 1574 int size; 1575 { 1576 int bits; 1577 1578 bits = vm_page_bits(base, size); 1579 m->valid &= ~bits; 1580 m->dirty &= ~bits; 1581 m->object->generation++; 1582 } 1583 1584 /* 1585 * vm_page_zero_invalid() 1586 * 1587 * The kernel assumes that the invalid portions of a page contain 1588 * garbage, but such pages can be mapped into memory by user code. 1589 * When this occurs, we must zero out the non-valid portions of the 1590 * page so user code sees what it expects. 1591 * 1592 * Pages are most often semi-valid when the end of a file is mapped 1593 * into memory and the file's size is not page aligned. 1594 */ 1595 1596 void 1597 vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) 1598 { 1599 int b; 1600 int i; 1601 1602 /* 1603 * Scan the valid bits looking for invalid sections that 1604 * must be zerod. Invalid sub-DEV_BSIZE'd areas ( where the 1605 * valid bit may be set ) have already been zerod by 1606 * vm_page_set_validclean(). 1607 */ 1608 1609 for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { 1610 if (i == (PAGE_SIZE / DEV_BSIZE) || 1611 (m->valid & (1 << i)) 1612 ) { 1613 if (i > b) { 1614 pmap_zero_page_area( 1615 VM_PAGE_TO_PHYS(m), 1616 b << DEV_BSHIFT, 1617 (i - b) << DEV_BSHIFT 1618 ); 1619 } 1620 b = i + 1; 1621 } 1622 } 1623 1624 /* 1625 * setvalid is TRUE when we can safely set the zero'd areas 1626 * as being valid. We can do this if there are no cache consistancy 1627 * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. 1628 */ 1629 1630 if (setvalid) 1631 m->valid = VM_PAGE_BITS_ALL; 1632 } 1633 1634 /* 1635 * vm_page_is_valid: 1636 * 1637 * Is (partial) page valid? Note that the case where size == 0 1638 * will return FALSE in the degenerate case where the page is 1639 * entirely invalid, and TRUE otherwise. 1640 * 1641 * May not block. 1642 */ 1643 1644 int 1645 vm_page_is_valid(m, base, size) 1646 vm_page_t m; 1647 int base; 1648 int size; 1649 { 1650 int bits = vm_page_bits(base, size); 1651 1652 if (m->valid && ((m->valid & bits) == bits)) 1653 return 1; 1654 else 1655 return 0; 1656 } 1657 1658 /* 1659 * update dirty bits from pmap/mmu. May not block. 1660 */ 1661 1662 void 1663 vm_page_test_dirty(m) 1664 vm_page_t m; 1665 { 1666 if ((m->dirty != VM_PAGE_BITS_ALL) && 1667 pmap_is_modified(VM_PAGE_TO_PHYS(m))) { 1668 vm_page_dirty(m); 1669 } 1670 } 1671 1672 /* 1673 * This interface is for merging with malloc() someday. 1674 * Even if we never implement compaction so that contiguous allocation 1675 * works after initialization time, malloc()'s data structures are good 1676 * for statistics and for allocations of less than a page. 1677 */ 1678 void * 1679 contigmalloc1(size, type, flags, low, high, alignment, boundary, map) 1680 unsigned long size; /* should be size_t here and for malloc() */ 1681 struct malloc_type *type; 1682 int flags; 1683 unsigned long low; 1684 unsigned long high; 1685 unsigned long alignment; 1686 unsigned long boundary; 1687 vm_map_t map; 1688 { 1689 int i, s, start; 1690 vm_offset_t addr, phys, tmp_addr; 1691 int pass; 1692 vm_page_t pga = vm_page_array; 1693 1694 size = round_page(size); 1695 #if !defined(MAX_PERF) 1696 if (size == 0) 1697 panic("contigmalloc1: size must not be 0"); 1698 if ((alignment & (alignment - 1)) != 0) 1699 panic("contigmalloc1: alignment must be a power of 2"); 1700 if ((boundary & (boundary - 1)) != 0) 1701 panic("contigmalloc1: boundary must be a power of 2"); 1702 #endif 1703 1704 start = 0; 1705 for (pass = 0; pass <= 1; pass++) { 1706 s = splvm(); 1707 again: 1708 /* 1709 * Find first page in array that is free, within range, aligned, and 1710 * such that the boundary won't be crossed. 1711 */ 1712 for (i = start; i < cnt.v_page_count; i++) { 1713 int pqtype; 1714 phys = VM_PAGE_TO_PHYS(&pga[i]); 1715 pqtype = pga[i].queue - pga[i].pc; 1716 if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 1717 (phys >= low) && (phys < high) && 1718 ((phys & (alignment - 1)) == 0) && 1719 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) 1720 break; 1721 } 1722 1723 /* 1724 * If the above failed or we will exceed the upper bound, fail. 1725 */ 1726 if ((i == cnt.v_page_count) || 1727 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 1728 vm_page_t m, next; 1729 1730 again1: 1731 for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl); 1732 m != NULL; 1733 m = next) { 1734 1735 KASSERT(m->queue == PQ_INACTIVE, 1736 ("contigmalloc1: page %p is not PQ_INACTIVE", m)); 1737 1738 next = TAILQ_NEXT(m, pageq); 1739 if (vm_page_sleep_busy(m, TRUE, "vpctw0")) 1740 goto again1; 1741 vm_page_test_dirty(m); 1742 if (m->dirty) { 1743 if (m->object->type == OBJT_VNODE) { 1744 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1745 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1746 VOP_UNLOCK(m->object->handle, 0, curproc); 1747 goto again1; 1748 } else if (m->object->type == OBJT_SWAP || 1749 m->object->type == OBJT_DEFAULT) { 1750 vm_pageout_flush(&m, 1, 0); 1751 goto again1; 1752 } 1753 } 1754 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1755 vm_page_cache(m); 1756 } 1757 1758 for (m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl); 1759 m != NULL; 1760 m = next) { 1761 1762 KASSERT(m->queue == PQ_ACTIVE, 1763 ("contigmalloc1: page %p is not PQ_ACTIVE", m)); 1764 1765 next = TAILQ_NEXT(m, pageq); 1766 if (vm_page_sleep_busy(m, TRUE, "vpctw1")) 1767 goto again1; 1768 vm_page_test_dirty(m); 1769 if (m->dirty) { 1770 if (m->object->type == OBJT_VNODE) { 1771 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1772 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1773 VOP_UNLOCK(m->object->handle, 0, curproc); 1774 goto again1; 1775 } else if (m->object->type == OBJT_SWAP || 1776 m->object->type == OBJT_DEFAULT) { 1777 vm_pageout_flush(&m, 1, 0); 1778 goto again1; 1779 } 1780 } 1781 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1782 vm_page_cache(m); 1783 } 1784 1785 splx(s); 1786 continue; 1787 } 1788 start = i; 1789 1790 /* 1791 * Check successive pages for contiguous and free. 1792 */ 1793 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 1794 int pqtype; 1795 pqtype = pga[i].queue - pga[i].pc; 1796 if ((VM_PAGE_TO_PHYS(&pga[i]) != 1797 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || 1798 ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) { 1799 start++; 1800 goto again; 1801 } 1802 } 1803 1804 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1805 int pqtype; 1806 vm_page_t m = &pga[i]; 1807 1808 pqtype = m->queue - m->pc; 1809 if (pqtype == PQ_CACHE) { 1810 vm_page_busy(m); 1811 vm_page_free(m); 1812 } 1813 1814 TAILQ_REMOVE(&vm_page_queues[m->queue].pl, m, pageq); 1815 vm_page_queues[m->queue].lcnt--; 1816 cnt.v_free_count--; 1817 m->valid = VM_PAGE_BITS_ALL; 1818 m->flags = 0; 1819 KASSERT(m->dirty == 0, ("contigmalloc1: page %p was dirty", m)); 1820 m->wire_count = 0; 1821 m->busy = 0; 1822 m->queue = PQ_NONE; 1823 m->object = NULL; 1824 vm_page_wire(m); 1825 } 1826 1827 /* 1828 * We've found a contiguous chunk that meets are requirements. 1829 * Allocate kernel VM, unfree and assign the physical pages to it and 1830 * return kernel VM pointer. 1831 */ 1832 tmp_addr = addr = kmem_alloc_pageable(map, size); 1833 if (addr == 0) { 1834 /* 1835 * XXX We almost never run out of kernel virtual 1836 * space, so we don't make the allocated memory 1837 * above available. 1838 */ 1839 splx(s); 1840 return (NULL); 1841 } 1842 1843 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1844 vm_page_t m = &pga[i]; 1845 vm_page_insert(m, kernel_object, 1846 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); 1847 pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); 1848 tmp_addr += PAGE_SIZE; 1849 } 1850 1851 splx(s); 1852 return ((void *)addr); 1853 } 1854 return NULL; 1855 } 1856 1857 void * 1858 contigmalloc(size, type, flags, low, high, alignment, boundary) 1859 unsigned long size; /* should be size_t here and for malloc() */ 1860 struct malloc_type *type; 1861 int flags; 1862 unsigned long low; 1863 unsigned long high; 1864 unsigned long alignment; 1865 unsigned long boundary; 1866 { 1867 return contigmalloc1(size, type, flags, low, high, alignment, boundary, 1868 kernel_map); 1869 } 1870 1871 void 1872 contigfree(addr, size, type) 1873 void *addr; 1874 unsigned long size; 1875 struct malloc_type *type; 1876 { 1877 kmem_free(kernel_map, (vm_offset_t)addr, size); 1878 } 1879 1880 vm_offset_t 1881 vm_page_alloc_contig(size, low, high, alignment) 1882 vm_offset_t size; 1883 vm_offset_t low; 1884 vm_offset_t high; 1885 vm_offset_t alignment; 1886 { 1887 return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high, 1888 alignment, 0ul, kernel_map)); 1889 } 1890 1891 #include "opt_ddb.h" 1892 #ifdef DDB 1893 #include <sys/kernel.h> 1894 1895 #include <ddb/ddb.h> 1896 1897 DB_SHOW_COMMAND(page, vm_page_print_page_info) 1898 { 1899 db_printf("cnt.v_free_count: %d\n", cnt.v_free_count); 1900 db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); 1901 db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); 1902 db_printf("cnt.v_active_count: %d\n", cnt.v_active_count); 1903 db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); 1904 db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); 1905 db_printf("cnt.v_free_min: %d\n", cnt.v_free_min); 1906 db_printf("cnt.v_free_target: %d\n", cnt.v_free_target); 1907 db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); 1908 db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); 1909 } 1910 1911 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 1912 { 1913 int i; 1914 db_printf("PQ_FREE:"); 1915 for(i=0;i<PQ_L2_SIZE;i++) { 1916 db_printf(" %d", vm_page_queues[PQ_FREE + i].lcnt); 1917 } 1918 db_printf("\n"); 1919 1920 db_printf("PQ_CACHE:"); 1921 for(i=0;i<PQ_L2_SIZE;i++) { 1922 db_printf(" %d", vm_page_queues[PQ_CACHE + i].lcnt); 1923 } 1924 db_printf("\n"); 1925 1926 db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", 1927 vm_page_queues[PQ_ACTIVE].lcnt, 1928 vm_page_queues[PQ_INACTIVE].lcnt); 1929 } 1930 #endif /* DDB */ 1931