1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 37 * $FreeBSD$ 38 */ 39 40 /* 41 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42 * All rights reserved. 43 * 44 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 45 * 46 * Permission to use, copy, modify and distribute this software and 47 * its documentation is hereby granted, provided that both the copyright 48 * notice and this permission notice appear in all copies of the 49 * software, derivative works or modified versions, and any portions 50 * thereof, and that both notices appear in supporting documentation. 51 * 52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 53 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 55 * 56 * Carnegie Mellon requests users of this software to return to 57 * 58 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 59 * School of Computer Science 60 * Carnegie Mellon University 61 * Pittsburgh PA 15213-3890 62 * 63 * any improvements or extensions that they make and grant Carnegie the 64 * rights to redistribute these changes. 65 */ 66 67 /* 68 * Resident memory management module. 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/malloc.h> 74 #include <sys/proc.h> 75 #include <sys/vmmeter.h> 76 #include <sys/vnode.h> 77 78 #include <vm/vm.h> 79 #include <vm/vm_param.h> 80 #include <sys/lock.h> 81 #include <vm/vm_kern.h> 82 #include <vm/vm_object.h> 83 #include <vm/vm_page.h> 84 #include <vm/vm_pageout.h> 85 #include <vm/vm_pager.h> 86 #include <vm/vm_extern.h> 87 88 static void vm_page_queue_init __P((void)); 89 static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t)); 90 91 /* 92 * Associated with page of user-allocatable memory is a 93 * page structure. 94 */ 95 96 static struct vm_page **vm_page_buckets; /* Array of buckets */ 97 static int vm_page_bucket_count; /* How big is array? */ 98 static int vm_page_hash_mask; /* Mask for hash function */ 99 static volatile int vm_page_bucket_generation; 100 101 struct vpgqueues vm_page_queues[PQ_COUNT]; 102 103 static void 104 vm_page_queue_init(void) { 105 int i; 106 107 for(i=0;i<PQ_L2_SIZE;i++) { 108 vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count; 109 } 110 vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; 111 112 vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; 113 for(i=0;i<PQ_L2_SIZE;i++) { 114 vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count; 115 } 116 for(i=0;i<PQ_COUNT;i++) { 117 TAILQ_INIT(&vm_page_queues[i].pl); 118 } 119 } 120 121 vm_page_t vm_page_array = 0; 122 static int vm_page_array_size = 0; 123 long first_page = 0; 124 int vm_page_zero_count = 0; 125 126 static __inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex)); 127 static void vm_page_free_wakeup __P((void)); 128 129 /* 130 * vm_set_page_size: 131 * 132 * Sets the page size, perhaps based upon the memory 133 * size. Must be called before any use of page-size 134 * dependent functions. 135 */ 136 void 137 vm_set_page_size() 138 { 139 if (cnt.v_page_size == 0) 140 cnt.v_page_size = PAGE_SIZE; 141 if (((cnt.v_page_size - 1) & cnt.v_page_size) != 0) 142 panic("vm_set_page_size: page size not a power of two"); 143 } 144 145 /* 146 * vm_page_startup: 147 * 148 * Initializes the resident memory module. 149 * 150 * Allocates memory for the page cells, and 151 * for the object/offset-to-page hash table headers. 152 * Each page cell is initialized and placed on the free list. 153 */ 154 155 vm_offset_t 156 vm_page_startup(starta, enda, vaddr) 157 register vm_offset_t starta; 158 vm_offset_t enda; 159 register vm_offset_t vaddr; 160 { 161 register vm_offset_t mapped; 162 register vm_page_t m; 163 register struct vm_page **bucket; 164 vm_size_t npages, page_range; 165 register vm_offset_t new_start; 166 int i; 167 vm_offset_t pa; 168 int nblocks; 169 vm_offset_t first_managed_page; 170 171 /* the biggest memory array is the second group of pages */ 172 vm_offset_t start; 173 vm_offset_t biggestone, biggestsize; 174 175 vm_offset_t total; 176 177 total = 0; 178 biggestsize = 0; 179 biggestone = 0; 180 nblocks = 0; 181 vaddr = round_page(vaddr); 182 183 for (i = 0; phys_avail[i + 1]; i += 2) { 184 phys_avail[i] = round_page(phys_avail[i]); 185 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 186 } 187 188 for (i = 0; phys_avail[i + 1]; i += 2) { 189 int size = phys_avail[i + 1] - phys_avail[i]; 190 191 if (size > biggestsize) { 192 biggestone = i; 193 biggestsize = size; 194 } 195 ++nblocks; 196 total += size; 197 } 198 199 start = phys_avail[biggestone]; 200 201 /* 202 * Initialize the queue headers for the free queue, the active queue 203 * and the inactive queue. 204 */ 205 206 vm_page_queue_init(); 207 208 /* 209 * Allocate (and initialize) the hash table buckets. 210 * 211 * The number of buckets MUST BE a power of 2, and the actual value is 212 * the next power of 2 greater than the number of physical pages in 213 * the system. 214 * 215 * We make the hash table approximately 2x the number of pages to 216 * reduce the chain length. This is about the same size using the 217 * singly-linked list as the 1x hash table we were using before 218 * using TAILQ but the chain length will be smaller. 219 * 220 * Note: This computation can be tweaked if desired. 221 */ 222 vm_page_buckets = (struct vm_page **)vaddr; 223 bucket = vm_page_buckets; 224 if (vm_page_bucket_count == 0) { 225 vm_page_bucket_count = 1; 226 while (vm_page_bucket_count < atop(total)) 227 vm_page_bucket_count <<= 1; 228 } 229 vm_page_bucket_count <<= 1; 230 vm_page_hash_mask = vm_page_bucket_count - 1; 231 232 /* 233 * Validate these addresses. 234 */ 235 236 new_start = start + vm_page_bucket_count * sizeof(struct vm_page *); 237 new_start = round_page(new_start); 238 mapped = round_page(vaddr); 239 vaddr = pmap_map(mapped, start, new_start, 240 VM_PROT_READ | VM_PROT_WRITE); 241 start = new_start; 242 vaddr = round_page(vaddr); 243 bzero((caddr_t) mapped, vaddr - mapped); 244 245 for (i = 0; i < vm_page_bucket_count; i++) { 246 *bucket = NULL; 247 bucket++; 248 } 249 250 /* 251 * Compute the number of pages of memory that will be available for 252 * use (taking into account the overhead of a page structure per 253 * page). 254 */ 255 256 first_page = phys_avail[0] / PAGE_SIZE; 257 258 page_range = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE - first_page; 259 npages = (total - (page_range * sizeof(struct vm_page)) - 260 (start - phys_avail[biggestone])) / PAGE_SIZE; 261 262 /* 263 * Initialize the mem entry structures now, and put them in the free 264 * queue. 265 */ 266 vm_page_array = (vm_page_t) vaddr; 267 mapped = vaddr; 268 269 /* 270 * Validate these addresses. 271 */ 272 new_start = round_page(start + page_range * sizeof(struct vm_page)); 273 mapped = pmap_map(mapped, start, new_start, 274 VM_PROT_READ | VM_PROT_WRITE); 275 start = new_start; 276 277 first_managed_page = start / PAGE_SIZE; 278 279 /* 280 * Clear all of the page structures 281 */ 282 bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 283 vm_page_array_size = page_range; 284 285 /* 286 * Construct the free queue(s) in descending order (by physical 287 * address) so that the first 16MB of physical memory is allocated 288 * last rather than first. On large-memory machines, this avoids 289 * the exhaustion of low physical memory before isa_dmainit has run. 290 */ 291 cnt.v_page_count = 0; 292 cnt.v_free_count = 0; 293 for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { 294 if (i == biggestone) 295 pa = ptoa(first_managed_page); 296 else 297 pa = phys_avail[i]; 298 while (pa < phys_avail[i + 1] && npages-- > 0) { 299 ++cnt.v_page_count; 300 ++cnt.v_free_count; 301 m = PHYS_TO_VM_PAGE(pa); 302 m->phys_addr = pa; 303 m->flags = 0; 304 m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK; 305 m->queue = m->pc + PQ_FREE; 306 TAILQ_INSERT_HEAD(&vm_page_queues[m->queue].pl, m, pageq); 307 vm_page_queues[m->queue].lcnt++; 308 pa += PAGE_SIZE; 309 } 310 } 311 return (mapped); 312 } 313 314 /* 315 * vm_page_hash: 316 * 317 * Distributes the object/offset key pair among hash buckets. 318 * 319 * NOTE: This macro depends on vm_page_bucket_count being a power of 2. 320 * This routine may not block. 321 * 322 * We try to randomize the hash based on the object to spread the pages 323 * out in the hash table without it costing us too much. 324 */ 325 static __inline int 326 vm_page_hash(object, pindex) 327 vm_object_t object; 328 vm_pindex_t pindex; 329 { 330 int i = ((uintptr_t)object + pindex) ^ object->hash_rand; 331 332 return(i & vm_page_hash_mask); 333 } 334 335 /* 336 * vm_page_insert: [ internal use only ] 337 * 338 * Inserts the given mem entry into the object and object list. 339 * 340 * The pagetables are not updated but will presumably fault the page 341 * in if necessary, or if a kernel page the caller will at some point 342 * enter the page into the kernel's pmap. We are not allowed to block 343 * here so we *can't* do this anyway. 344 * 345 * The object and page must be locked, and must be splhigh. 346 * This routine may not block. 347 */ 348 349 void 350 vm_page_insert(m, object, pindex) 351 register vm_page_t m; 352 register vm_object_t object; 353 register vm_pindex_t pindex; 354 { 355 register struct vm_page **bucket; 356 357 if (m->object != NULL) 358 panic("vm_page_insert: already inserted"); 359 360 /* 361 * Record the object/offset pair in this page 362 */ 363 364 m->object = object; 365 m->pindex = pindex; 366 367 /* 368 * Insert it into the object_object/offset hash table 369 */ 370 371 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 372 m->hnext = *bucket; 373 *bucket = m; 374 vm_page_bucket_generation++; 375 376 /* 377 * Now link into the object's list of backed pages. 378 */ 379 380 TAILQ_INSERT_TAIL(&object->memq, m, listq); 381 object->generation++; 382 383 /* 384 * show that the object has one more resident page. 385 */ 386 387 object->resident_page_count++; 388 389 /* 390 * Since we are inserting a new and possibly dirty page, 391 * update the object's OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY flags. 392 */ 393 if (m->flags & PG_WRITEABLE) 394 vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY); 395 } 396 397 /* 398 * vm_page_remove: 399 * NOTE: used by device pager as well -wfj 400 * 401 * Removes the given mem entry from the object/offset-page 402 * table and the object page list, but do not invalidate/terminate 403 * the backing store. 404 * 405 * The object and page must be locked, and at splhigh. 406 * The underlying pmap entry (if any) is NOT removed here. 407 * This routine may not block. 408 */ 409 410 void 411 vm_page_remove(m) 412 vm_page_t m; 413 { 414 vm_object_t object; 415 416 if (m->object == NULL) 417 return; 418 419 #if !defined(MAX_PERF) 420 if ((m->flags & PG_BUSY) == 0) { 421 panic("vm_page_remove: page not busy"); 422 } 423 #endif 424 425 /* 426 * Basically destroy the page. 427 */ 428 429 vm_page_wakeup(m); 430 431 object = m->object; 432 433 /* 434 * Remove from the object_object/offset hash table. The object 435 * must be on the hash queue, we will panic if it isn't 436 * 437 * Note: we must NULL-out m->hnext to prevent loops in detached 438 * buffers with vm_page_lookup(). 439 */ 440 441 { 442 struct vm_page **bucket; 443 444 bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)]; 445 while (*bucket != m) { 446 #if !defined(MAX_PERF) 447 if (*bucket == NULL) 448 panic("vm_page_remove(): page not found in hash"); 449 #endif 450 bucket = &(*bucket)->hnext; 451 } 452 *bucket = m->hnext; 453 m->hnext = NULL; 454 vm_page_bucket_generation++; 455 } 456 457 /* 458 * Now remove from the object's list of backed pages. 459 */ 460 461 TAILQ_REMOVE(&object->memq, m, listq); 462 463 /* 464 * And show that the object has one fewer resident page. 465 */ 466 467 object->resident_page_count--; 468 object->generation++; 469 470 m->object = NULL; 471 } 472 473 /* 474 * vm_page_lookup: 475 * 476 * Returns the page associated with the object/offset 477 * pair specified; if none is found, NULL is returned. 478 * 479 * NOTE: the code below does not lock. It will operate properly if 480 * an interrupt makes a change, but the generation algorithm will not 481 * operate properly in an SMP environment where both cpu's are able to run 482 * kernel code simultaniously. 483 * 484 * The object must be locked. No side effects. 485 * This routine may not block. 486 * This is a critical path routine 487 */ 488 489 vm_page_t 490 vm_page_lookup(object, pindex) 491 register vm_object_t object; 492 register vm_pindex_t pindex; 493 { 494 register vm_page_t m; 495 register struct vm_page **bucket; 496 int generation; 497 498 /* 499 * Search the hash table for this object/offset pair 500 */ 501 502 retry: 503 generation = vm_page_bucket_generation; 504 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 505 for (m = *bucket; m != NULL; m = m->hnext) { 506 if ((m->object == object) && (m->pindex == pindex)) { 507 if (vm_page_bucket_generation != generation) 508 goto retry; 509 return (m); 510 } 511 } 512 if (vm_page_bucket_generation != generation) 513 goto retry; 514 return (NULL); 515 } 516 517 /* 518 * vm_page_rename: 519 * 520 * Move the given memory entry from its 521 * current object to the specified target object/offset. 522 * 523 * The object must be locked. 524 * This routine may not block. 525 * 526 * Note: this routine will raise itself to splvm(), the caller need not. 527 * 528 * Note: swap associated with the page must be invalidated by the move. We 529 * have to do this for several reasons: (1) we aren't freeing the 530 * page, (2) we are dirtying the page, (3) the VM system is probably 531 * moving the page from object A to B, and will then later move 532 * the backing store from A to B and we can't have a conflict. 533 * 534 * Note: we *always* dirty the page. It is necessary both for the 535 * fact that we moved it, and because we may be invalidating 536 * swap. If the page is on the cache, we have to deactivate it 537 * or vm_page_dirty() will panic. Dirty pages are not allowed 538 * on the cache. 539 */ 540 541 void 542 vm_page_rename(m, new_object, new_pindex) 543 register vm_page_t m; 544 register vm_object_t new_object; 545 vm_pindex_t new_pindex; 546 { 547 int s; 548 549 s = splvm(); 550 vm_page_remove(m); 551 vm_page_insert(m, new_object, new_pindex); 552 if (m->queue - m->pc == PQ_CACHE) 553 vm_page_deactivate(m); 554 vm_page_dirty(m); 555 splx(s); 556 } 557 558 /* 559 * vm_page_unqueue_nowakeup: 560 * 561 * vm_page_unqueue() without any wakeup 562 * 563 * This routine must be called at splhigh(). 564 * This routine may not block. 565 */ 566 567 void 568 vm_page_unqueue_nowakeup(m) 569 vm_page_t m; 570 { 571 int queue = m->queue; 572 struct vpgqueues *pq; 573 if (queue != PQ_NONE) { 574 pq = &vm_page_queues[queue]; 575 m->queue = PQ_NONE; 576 TAILQ_REMOVE(&pq->pl, m, pageq); 577 (*pq->cnt)--; 578 pq->lcnt--; 579 } 580 } 581 582 /* 583 * vm_page_unqueue: 584 * 585 * Remove a page from its queue. 586 * 587 * This routine must be called at splhigh(). 588 * This routine may not block. 589 */ 590 591 void 592 vm_page_unqueue(m) 593 vm_page_t m; 594 { 595 int queue = m->queue; 596 struct vpgqueues *pq; 597 if (queue != PQ_NONE) { 598 m->queue = PQ_NONE; 599 pq = &vm_page_queues[queue]; 600 TAILQ_REMOVE(&pq->pl, m, pageq); 601 (*pq->cnt)--; 602 pq->lcnt--; 603 if ((queue - m->pc) == PQ_CACHE) { 604 if (vm_paging_needed()) 605 pagedaemon_wakeup(); 606 } 607 } 608 } 609 610 #if PQ_L2_SIZE > 1 611 612 /* 613 * vm_page_list_find: 614 * 615 * Find a page on the specified queue with color optimization. 616 * 617 * The page coloring optimization attempts to locate a page 618 * that does not overload other nearby pages in the object in 619 * the cpu's L1 or L2 caches. We need this optmization because 620 * cpu caches tend to be physical caches, while object spaces tend 621 * to be virtual. 622 * 623 * This routine must be called at splvm(). 624 * This routine may not block. 625 * 626 * This routine may only be called from the vm_page_list_find() macro 627 * in vm_page.h 628 */ 629 vm_page_t 630 _vm_page_list_find(basequeue, index) 631 int basequeue, index; 632 { 633 int i; 634 vm_page_t m = NULL; 635 struct vpgqueues *pq; 636 637 pq = &vm_page_queues[basequeue]; 638 639 /* 640 * Note that for the first loop, index+i and index-i wind up at the 641 * same place. Even though this is not totally optimal, we've already 642 * blown it by missing the cache case so we do not care. 643 */ 644 645 for(i = PQ_L2_SIZE / 2; i > 0; --i) { 646 if ((m = TAILQ_FIRST(&pq[(index + i) & PQ_L2_MASK].pl)) != NULL) 647 break; 648 649 if ((m = TAILQ_FIRST(&pq[(index - i) & PQ_L2_MASK].pl)) != NULL) 650 break; 651 } 652 return(m); 653 } 654 655 #endif 656 657 /* 658 * vm_page_select_cache: 659 * 660 * Find a page on the cache queue with color optimization. As pages 661 * might be found, but not applicable, they are deactivated. This 662 * keeps us from using potentially busy cached pages. 663 * 664 * This routine must be called at splvm(). 665 * This routine may not block. 666 */ 667 vm_page_t 668 vm_page_select_cache(object, pindex) 669 vm_object_t object; 670 vm_pindex_t pindex; 671 { 672 vm_page_t m; 673 674 while (TRUE) { 675 m = vm_page_list_find( 676 PQ_CACHE, 677 (pindex + object->pg_color) & PQ_L2_MASK, 678 FALSE 679 ); 680 if (m && ((m->flags & PG_BUSY) || m->busy || 681 m->hold_count || m->wire_count)) { 682 vm_page_deactivate(m); 683 continue; 684 } 685 return m; 686 } 687 } 688 689 /* 690 * vm_page_select_free: 691 * 692 * Find a free or zero page, with specified preference. We attempt to 693 * inline the nominal case and fall back to _vm_page_select_free() 694 * otherwise. 695 * 696 * This routine must be called at splvm(). 697 * This routine may not block. 698 */ 699 700 static __inline vm_page_t 701 vm_page_select_free(vm_object_t object, vm_pindex_t pindex, boolean_t prefer_zero) 702 { 703 vm_page_t m; 704 705 m = vm_page_list_find( 706 PQ_FREE, 707 (pindex + object->pg_color) & PQ_L2_MASK, 708 prefer_zero 709 ); 710 return(m); 711 } 712 713 /* 714 * vm_page_alloc: 715 * 716 * Allocate and return a memory cell associated 717 * with this VM object/offset pair. 718 * 719 * page_req classes: 720 * VM_ALLOC_NORMAL normal process request 721 * VM_ALLOC_SYSTEM system *really* needs a page 722 * VM_ALLOC_INTERRUPT interrupt time request 723 * VM_ALLOC_ZERO zero page 724 * 725 * Object must be locked. 726 * This routine may not block. 727 * 728 * Additional special handling is required when called from an 729 * interrupt (VM_ALLOC_INTERRUPT). We are not allowed to mess with 730 * the page cache in this case. 731 */ 732 733 vm_page_t 734 vm_page_alloc(object, pindex, page_req) 735 vm_object_t object; 736 vm_pindex_t pindex; 737 int page_req; 738 { 739 register vm_page_t m = NULL; 740 int s; 741 742 KASSERT(!vm_page_lookup(object, pindex), 743 ("vm_page_alloc: page already allocated")); 744 745 /* 746 * The pager is allowed to eat deeper into the free page list. 747 */ 748 749 if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) { 750 page_req = VM_ALLOC_SYSTEM; 751 }; 752 753 s = splvm(); 754 755 loop: 756 if (cnt.v_free_count > cnt.v_free_reserved) { 757 /* 758 * Allocate from the free queue if there are plenty of pages 759 * in it. 760 */ 761 if (page_req == VM_ALLOC_ZERO) 762 m = vm_page_select_free(object, pindex, TRUE); 763 else 764 m = vm_page_select_free(object, pindex, FALSE); 765 } else if ( 766 (page_req == VM_ALLOC_SYSTEM && 767 cnt.v_cache_count == 0 && 768 cnt.v_free_count > cnt.v_interrupt_free_min) || 769 (page_req == VM_ALLOC_INTERRUPT && cnt.v_free_count > 0) 770 ) { 771 /* 772 * Interrupt or system, dig deeper into the free list. 773 */ 774 m = vm_page_select_free(object, pindex, FALSE); 775 } else if (page_req != VM_ALLOC_INTERRUPT) { 776 /* 777 * Allocateable from cache (non-interrupt only). On success, 778 * we must free the page and try again, thus ensuring that 779 * cnt.v_*_free_min counters are replenished. 780 */ 781 m = vm_page_select_cache(object, pindex); 782 if (m == NULL) { 783 splx(s); 784 #if defined(DIAGNOSTIC) 785 if (cnt.v_cache_count > 0) 786 printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count); 787 #endif 788 vm_pageout_deficit++; 789 pagedaemon_wakeup(); 790 return (NULL); 791 } 792 KASSERT(m->dirty == 0, ("Found dirty cache page %p", m)); 793 vm_page_busy(m); 794 vm_page_protect(m, VM_PROT_NONE); 795 vm_page_free(m); 796 goto loop; 797 } else { 798 /* 799 * Not allocateable from cache from interrupt, give up. 800 */ 801 splx(s); 802 vm_pageout_deficit++; 803 pagedaemon_wakeup(); 804 return (NULL); 805 } 806 807 /* 808 * At this point we had better have found a good page. 809 */ 810 811 KASSERT( 812 m != NULL, 813 ("vm_page_alloc(): missing page on free queue\n") 814 ); 815 816 /* 817 * Remove from free queue 818 */ 819 820 { 821 struct vpgqueues *pq = &vm_page_queues[m->queue]; 822 823 TAILQ_REMOVE(&pq->pl, m, pageq); 824 (*pq->cnt)--; 825 pq->lcnt--; 826 } 827 828 /* 829 * Initialize structure. Only the PG_ZERO flag is inherited. 830 */ 831 832 if (m->flags & PG_ZERO) { 833 vm_page_zero_count--; 834 m->flags = PG_ZERO | PG_BUSY; 835 } else { 836 m->flags = PG_BUSY; 837 } 838 m->wire_count = 0; 839 m->hold_count = 0; 840 m->act_count = 0; 841 m->busy = 0; 842 m->valid = 0; 843 KASSERT(m->dirty == 0, ("vm_page_alloc: free/cache page %p was dirty", m)); 844 m->queue = PQ_NONE; 845 846 /* 847 * vm_page_insert() is safe prior to the splx(). Note also that 848 * inserting a page here does not insert it into the pmap (which 849 * could cause us to block allocating memory). We cannot block 850 * anywhere. 851 */ 852 853 vm_page_insert(m, object, pindex); 854 855 /* 856 * Don't wakeup too often - wakeup the pageout daemon when 857 * we would be nearly out of memory. 858 */ 859 if (vm_paging_needed() || cnt.v_free_count < cnt.v_pageout_free_min) 860 pagedaemon_wakeup(); 861 862 splx(s); 863 864 return (m); 865 } 866 867 /* 868 * vm_wait: (also see VM_WAIT macro) 869 * 870 * Block until free pages are available for allocation 871 */ 872 873 void 874 vm_wait() 875 { 876 int s; 877 878 s = splvm(); 879 if (curproc == pageproc) { 880 vm_pageout_pages_needed = 1; 881 tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0); 882 } else { 883 if (!vm_pages_needed) { 884 vm_pages_needed++; 885 wakeup(&vm_pages_needed); 886 } 887 tsleep(&cnt.v_free_count, PVM, "vmwait", 0); 888 } 889 splx(s); 890 } 891 892 /* 893 * vm_await: (also see VM_AWAIT macro) 894 * 895 * asleep on an event that will signal when free pages are available 896 * for allocation. 897 */ 898 899 void 900 vm_await() 901 { 902 int s; 903 904 s = splvm(); 905 if (curproc == pageproc) { 906 vm_pageout_pages_needed = 1; 907 asleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0); 908 } else { 909 if (!vm_pages_needed) { 910 vm_pages_needed++; 911 wakeup(&vm_pages_needed); 912 } 913 asleep(&cnt.v_free_count, PVM, "vmwait", 0); 914 } 915 splx(s); 916 } 917 918 #if 0 919 /* 920 * vm_page_sleep: 921 * 922 * Block until page is no longer busy. 923 */ 924 925 int 926 vm_page_sleep(vm_page_t m, char *msg, char *busy) { 927 int slept = 0; 928 if ((busy && *busy) || (m->flags & PG_BUSY)) { 929 int s; 930 s = splvm(); 931 if ((busy && *busy) || (m->flags & PG_BUSY)) { 932 vm_page_flag_set(m, PG_WANTED); 933 tsleep(m, PVM, msg, 0); 934 slept = 1; 935 } 936 splx(s); 937 } 938 return slept; 939 } 940 941 #endif 942 943 #if 0 944 945 /* 946 * vm_page_asleep: 947 * 948 * Similar to vm_page_sleep(), but does not block. Returns 0 if 949 * the page is not busy, or 1 if the page is busy. 950 * 951 * This routine has the side effect of calling asleep() if the page 952 * was busy (1 returned). 953 */ 954 955 int 956 vm_page_asleep(vm_page_t m, char *msg, char *busy) { 957 int slept = 0; 958 if ((busy && *busy) || (m->flags & PG_BUSY)) { 959 int s; 960 s = splvm(); 961 if ((busy && *busy) || (m->flags & PG_BUSY)) { 962 vm_page_flag_set(m, PG_WANTED); 963 asleep(m, PVM, msg, 0); 964 slept = 1; 965 } 966 splx(s); 967 } 968 return slept; 969 } 970 971 #endif 972 973 /* 974 * vm_page_activate: 975 * 976 * Put the specified page on the active list (if appropriate). 977 * Ensure that act_count is at least ACT_INIT but do not otherwise 978 * mess with it. 979 * 980 * The page queues must be locked. 981 * This routine may not block. 982 */ 983 void 984 vm_page_activate(m) 985 register vm_page_t m; 986 { 987 int s; 988 989 s = splvm(); 990 if (m->queue != PQ_ACTIVE) { 991 if ((m->queue - m->pc) == PQ_CACHE) 992 cnt.v_reactivated++; 993 994 vm_page_unqueue(m); 995 996 if (m->wire_count == 0) { 997 m->queue = PQ_ACTIVE; 998 vm_page_queues[PQ_ACTIVE].lcnt++; 999 TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq); 1000 if (m->act_count < ACT_INIT) 1001 m->act_count = ACT_INIT; 1002 cnt.v_active_count++; 1003 } 1004 } else { 1005 if (m->act_count < ACT_INIT) 1006 m->act_count = ACT_INIT; 1007 } 1008 1009 splx(s); 1010 } 1011 1012 /* 1013 * vm_page_free_wakeup: 1014 * 1015 * Helper routine for vm_page_free_toq() and vm_page_cache(). This 1016 * routine is called when a page has been added to the cache or free 1017 * queues. 1018 * 1019 * This routine may not block. 1020 * This routine must be called at splvm() 1021 */ 1022 static __inline void 1023 vm_page_free_wakeup() 1024 { 1025 /* 1026 * if pageout daemon needs pages, then tell it that there are 1027 * some free. 1028 */ 1029 if (vm_pageout_pages_needed) { 1030 wakeup(&vm_pageout_pages_needed); 1031 vm_pageout_pages_needed = 0; 1032 } 1033 /* 1034 * wakeup processes that are waiting on memory if we hit a 1035 * high water mark. And wakeup scheduler process if we have 1036 * lots of memory. this process will swapin processes. 1037 */ 1038 if (vm_pages_needed && vm_page_count_min()) { 1039 wakeup(&cnt.v_free_count); 1040 vm_pages_needed = 0; 1041 } 1042 } 1043 1044 /* 1045 * vm_page_free_toq: 1046 * 1047 * Returns the given page to the PQ_FREE list, 1048 * disassociating it with any VM object. 1049 * 1050 * Object and page must be locked prior to entry. 1051 * This routine may not block. 1052 */ 1053 1054 void 1055 vm_page_free_toq(vm_page_t m) 1056 { 1057 int s; 1058 struct vpgqueues *pq; 1059 vm_object_t object = m->object; 1060 1061 s = splvm(); 1062 1063 cnt.v_tfree++; 1064 1065 #if !defined(MAX_PERF) 1066 if (m->busy || ((m->queue - m->pc) == PQ_FREE) || 1067 (m->hold_count != 0)) { 1068 printf( 1069 "vm_page_free: pindex(%lu), busy(%d), PG_BUSY(%d), hold(%d)\n", 1070 (u_long)m->pindex, m->busy, (m->flags & PG_BUSY) ? 1 : 0, 1071 m->hold_count); 1072 if ((m->queue - m->pc) == PQ_FREE) 1073 panic("vm_page_free: freeing free page"); 1074 else 1075 panic("vm_page_free: freeing busy page"); 1076 } 1077 #endif 1078 1079 /* 1080 * unqueue, then remove page. Note that we cannot destroy 1081 * the page here because we do not want to call the pager's 1082 * callback routine until after we've put the page on the 1083 * appropriate free queue. 1084 */ 1085 1086 vm_page_unqueue_nowakeup(m); 1087 vm_page_remove(m); 1088 1089 /* 1090 * If fictitious remove object association and 1091 * return, otherwise delay object association removal. 1092 */ 1093 1094 if ((m->flags & PG_FICTITIOUS) != 0) { 1095 splx(s); 1096 return; 1097 } 1098 1099 m->valid = 0; 1100 vm_page_undirty(m); 1101 1102 if (m->wire_count != 0) { 1103 #if !defined(MAX_PERF) 1104 if (m->wire_count > 1) { 1105 panic("vm_page_free: invalid wire count (%d), pindex: 0x%lx", 1106 m->wire_count, (long)m->pindex); 1107 } 1108 #endif 1109 printf("vm_page_free: freeing wired page\n"); 1110 m->wire_count = 0; 1111 cnt.v_wire_count--; 1112 } 1113 1114 /* 1115 * If we've exhausted the object's resident pages we want to free 1116 * it up. 1117 */ 1118 1119 if (object && 1120 (object->type == OBJT_VNODE) && 1121 ((object->flags & OBJ_DEAD) == 0) 1122 ) { 1123 struct vnode *vp = (struct vnode *)object->handle; 1124 1125 if (vp && VSHOULDFREE(vp)) { 1126 if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) { 1127 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 1128 vp->v_flag |= VTBFREE; 1129 } 1130 } 1131 } 1132 1133 #ifdef __alpha__ 1134 pmap_page_is_free(m); 1135 #endif 1136 1137 m->queue = PQ_FREE + m->pc; 1138 pq = &vm_page_queues[m->queue]; 1139 pq->lcnt++; 1140 ++(*pq->cnt); 1141 1142 /* 1143 * Put zero'd pages on the end ( where we look for zero'd pages 1144 * first ) and non-zerod pages at the head. 1145 */ 1146 1147 if (m->flags & PG_ZERO) { 1148 TAILQ_INSERT_TAIL(&pq->pl, m, pageq); 1149 ++vm_page_zero_count; 1150 } else { 1151 TAILQ_INSERT_HEAD(&pq->pl, m, pageq); 1152 } 1153 1154 vm_page_free_wakeup(); 1155 1156 splx(s); 1157 } 1158 1159 /* 1160 * vm_page_wire: 1161 * 1162 * Mark this page as wired down by yet 1163 * another map, removing it from paging queues 1164 * as necessary. 1165 * 1166 * The page queues must be locked. 1167 * This routine may not block. 1168 */ 1169 void 1170 vm_page_wire(m) 1171 register vm_page_t m; 1172 { 1173 int s; 1174 1175 s = splvm(); 1176 if (m->wire_count == 0) { 1177 vm_page_unqueue(m); 1178 cnt.v_wire_count++; 1179 } 1180 m->wire_count++; 1181 splx(s); 1182 vm_page_flag_set(m, PG_MAPPED); 1183 } 1184 1185 /* 1186 * vm_page_unwire: 1187 * 1188 * Release one wiring of this page, potentially 1189 * enabling it to be paged again. 1190 * 1191 * Many pages placed on the inactive queue should actually go 1192 * into the cache, but it is difficult to figure out which. What 1193 * we do instead, if the inactive target is well met, is to put 1194 * clean pages at the head of the inactive queue instead of the tail. 1195 * This will cause them to be moved to the cache more quickly and 1196 * if not actively re-referenced, freed more quickly. If we just 1197 * stick these pages at the end of the inactive queue, heavy filesystem 1198 * meta-data accesses can cause an unnecessary paging load on memory bound 1199 * processes. This optimization causes one-time-use metadata to be 1200 * reused more quickly. 1201 * 1202 * A number of routines use vm_page_unwire() to guarentee that the page 1203 * will go into either the inactive or active queues, and will NEVER 1204 * be placed in the cache - for example, just after dirtying a page. 1205 * dirty pages in the cache are not allowed. 1206 * 1207 * The page queues must be locked. 1208 * This routine may not block. 1209 */ 1210 void 1211 vm_page_unwire(m, activate) 1212 register vm_page_t m; 1213 int activate; 1214 { 1215 int s; 1216 1217 s = splvm(); 1218 1219 if (m->wire_count > 0) { 1220 m->wire_count--; 1221 if (m->wire_count == 0) { 1222 cnt.v_wire_count--; 1223 if (activate) { 1224 TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq); 1225 m->queue = PQ_ACTIVE; 1226 vm_page_queues[PQ_ACTIVE].lcnt++; 1227 cnt.v_active_count++; 1228 } else { 1229 TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); 1230 m->queue = PQ_INACTIVE; 1231 vm_page_queues[PQ_INACTIVE].lcnt++; 1232 cnt.v_inactive_count++; 1233 } 1234 } 1235 } else { 1236 #if !defined(MAX_PERF) 1237 panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count); 1238 #endif 1239 } 1240 splx(s); 1241 } 1242 1243 1244 /* 1245 * Move the specified page to the inactive queue. If the page has 1246 * any associated swap, the swap is deallocated. 1247 * 1248 * Normally athead is 0 resulting in LRU operation. athead is set 1249 * to 1 if we want this page to be 'as if it were placed in the cache', 1250 * except without unmapping it from the process address space. 1251 * 1252 * This routine may not block. 1253 */ 1254 static __inline void 1255 _vm_page_deactivate(vm_page_t m, int athead) 1256 { 1257 int s; 1258 1259 /* 1260 * Ignore if already inactive. 1261 */ 1262 if (m->queue == PQ_INACTIVE) 1263 return; 1264 1265 s = splvm(); 1266 if (m->wire_count == 0) { 1267 if ((m->queue - m->pc) == PQ_CACHE) 1268 cnt.v_reactivated++; 1269 vm_page_unqueue(m); 1270 if (athead) 1271 TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); 1272 else 1273 TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq); 1274 m->queue = PQ_INACTIVE; 1275 vm_page_queues[PQ_INACTIVE].lcnt++; 1276 cnt.v_inactive_count++; 1277 } 1278 splx(s); 1279 } 1280 1281 void 1282 vm_page_deactivate(vm_page_t m) 1283 { 1284 _vm_page_deactivate(m, 0); 1285 } 1286 1287 /* 1288 * vm_page_cache 1289 * 1290 * Put the specified page onto the page cache queue (if appropriate). 1291 * 1292 * This routine may not block. 1293 */ 1294 void 1295 vm_page_cache(m) 1296 register vm_page_t m; 1297 { 1298 int s; 1299 1300 #if !defined(MAX_PERF) 1301 if ((m->flags & PG_BUSY) || m->busy || m->wire_count) { 1302 printf("vm_page_cache: attempting to cache busy page\n"); 1303 return; 1304 } 1305 #endif 1306 if ((m->queue - m->pc) == PQ_CACHE) 1307 return; 1308 1309 /* 1310 * Remove all pmaps and indicate that the page is not 1311 * writeable or mapped. 1312 */ 1313 1314 vm_page_protect(m, VM_PROT_NONE); 1315 #if !defined(MAX_PERF) 1316 if (m->dirty != 0) { 1317 panic("vm_page_cache: caching a dirty page, pindex: %ld", 1318 (long)m->pindex); 1319 } 1320 #endif 1321 s = splvm(); 1322 vm_page_unqueue_nowakeup(m); 1323 m->queue = PQ_CACHE + m->pc; 1324 vm_page_queues[m->queue].lcnt++; 1325 TAILQ_INSERT_TAIL(&vm_page_queues[m->queue].pl, m, pageq); 1326 cnt.v_cache_count++; 1327 vm_page_free_wakeup(); 1328 splx(s); 1329 } 1330 1331 /* 1332 * vm_page_dontneed 1333 * 1334 * Cache, deactivate, or do nothing as appropriate. This routine 1335 * is typically used by madvise() MADV_DONTNEED. 1336 * 1337 * Generally speaking we want to move the page into the cache so 1338 * it gets reused quickly. However, this can result in a silly syndrome 1339 * due to the page recycling too quickly. Small objects will not be 1340 * fully cached. On the otherhand, if we move the page to the inactive 1341 * queue we wind up with a problem whereby very large objects 1342 * unnecessarily blow away our inactive and cache queues. 1343 * 1344 * The solution is to move the pages based on a fixed weighting. We 1345 * either leave them alone, deactivate them, or move them to the cache, 1346 * where moving them to the cache has the highest weighting. 1347 * By forcing some pages into other queues we eventually force the 1348 * system to balance the queues, potentially recovering other unrelated 1349 * space from active. The idea is to not force this to happen too 1350 * often. 1351 */ 1352 1353 void 1354 vm_page_dontneed(m) 1355 vm_page_t m; 1356 { 1357 static int dnweight; 1358 int dnw; 1359 int head; 1360 1361 dnw = ++dnweight; 1362 1363 /* 1364 * occassionally leave the page alone 1365 */ 1366 1367 if ((dnw & 0x01F0) == 0 || 1368 m->queue == PQ_INACTIVE || 1369 m->queue - m->pc == PQ_CACHE 1370 ) { 1371 if (m->act_count >= ACT_INIT) 1372 --m->act_count; 1373 return; 1374 } 1375 1376 if (m->dirty == 0) 1377 vm_page_test_dirty(m); 1378 1379 if (m->dirty || (dnw & 0x0070) == 0) { 1380 /* 1381 * Deactivate the page 3 times out of 32. 1382 */ 1383 head = 0; 1384 } else { 1385 /* 1386 * Cache the page 28 times out of every 32. Note that 1387 * the page is deactivated instead of cached, but placed 1388 * at the head of the queue instead of the tail. 1389 */ 1390 head = 1; 1391 } 1392 _vm_page_deactivate(m, head); 1393 } 1394 1395 /* 1396 * Grab a page, waiting until we are waken up due to the page 1397 * changing state. We keep on waiting, if the page continues 1398 * to be in the object. If the page doesn't exist, allocate it. 1399 * 1400 * This routine may block. 1401 */ 1402 vm_page_t 1403 vm_page_grab(object, pindex, allocflags) 1404 vm_object_t object; 1405 vm_pindex_t pindex; 1406 int allocflags; 1407 { 1408 1409 vm_page_t m; 1410 int s, generation; 1411 1412 retrylookup: 1413 if ((m = vm_page_lookup(object, pindex)) != NULL) { 1414 if (m->busy || (m->flags & PG_BUSY)) { 1415 generation = object->generation; 1416 1417 s = splvm(); 1418 while ((object->generation == generation) && 1419 (m->busy || (m->flags & PG_BUSY))) { 1420 vm_page_flag_set(m, PG_WANTED | PG_REFERENCED); 1421 tsleep(m, PVM, "pgrbwt", 0); 1422 if ((allocflags & VM_ALLOC_RETRY) == 0) { 1423 splx(s); 1424 return NULL; 1425 } 1426 } 1427 splx(s); 1428 goto retrylookup; 1429 } else { 1430 vm_page_busy(m); 1431 return m; 1432 } 1433 } 1434 1435 m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY); 1436 if (m == NULL) { 1437 VM_WAIT; 1438 if ((allocflags & VM_ALLOC_RETRY) == 0) 1439 return NULL; 1440 goto retrylookup; 1441 } 1442 1443 return m; 1444 } 1445 1446 /* 1447 * Mapping function for valid bits or for dirty bits in 1448 * a page. May not block. 1449 * 1450 * Inputs are required to range within a page. 1451 */ 1452 1453 __inline int 1454 vm_page_bits(int base, int size) 1455 { 1456 int first_bit; 1457 int last_bit; 1458 1459 KASSERT( 1460 base + size <= PAGE_SIZE, 1461 ("vm_page_bits: illegal base/size %d/%d", base, size) 1462 ); 1463 1464 if (size == 0) /* handle degenerate case */ 1465 return(0); 1466 1467 first_bit = base >> DEV_BSHIFT; 1468 last_bit = (base + size - 1) >> DEV_BSHIFT; 1469 1470 return ((2 << last_bit) - (1 << first_bit)); 1471 } 1472 1473 /* 1474 * vm_page_set_validclean: 1475 * 1476 * Sets portions of a page valid and clean. The arguments are expected 1477 * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive 1478 * of any partial chunks touched by the range. The invalid portion of 1479 * such chunks will be zero'd. 1480 * 1481 * This routine may not block. 1482 * 1483 * (base + size) must be less then or equal to PAGE_SIZE. 1484 */ 1485 void 1486 vm_page_set_validclean(m, base, size) 1487 vm_page_t m; 1488 int base; 1489 int size; 1490 { 1491 int pagebits; 1492 int frag; 1493 int endoff; 1494 1495 if (size == 0) /* handle degenerate case */ 1496 return; 1497 1498 /* 1499 * If the base is not DEV_BSIZE aligned and the valid 1500 * bit is clear, we have to zero out a portion of the 1501 * first block. 1502 */ 1503 1504 if ((frag = base & ~(DEV_BSIZE - 1)) != base && 1505 (m->valid & (1 << (base >> DEV_BSHIFT))) == 0 1506 ) { 1507 pmap_zero_page_area( 1508 VM_PAGE_TO_PHYS(m), 1509 frag, 1510 base - frag 1511 ); 1512 } 1513 1514 /* 1515 * If the ending offset is not DEV_BSIZE aligned and the 1516 * valid bit is clear, we have to zero out a portion of 1517 * the last block. 1518 */ 1519 1520 endoff = base + size; 1521 1522 if ((frag = endoff & ~(DEV_BSIZE - 1)) != endoff && 1523 (m->valid & (1 << (endoff >> DEV_BSHIFT))) == 0 1524 ) { 1525 pmap_zero_page_area( 1526 VM_PAGE_TO_PHYS(m), 1527 endoff, 1528 DEV_BSIZE - (endoff & (DEV_BSIZE - 1)) 1529 ); 1530 } 1531 1532 /* 1533 * Set valid, clear dirty bits. If validating the entire 1534 * page we can safely clear the pmap modify bit. 1535 */ 1536 1537 pagebits = vm_page_bits(base, size); 1538 m->valid |= pagebits; 1539 m->dirty &= ~pagebits; 1540 1541 if (base == 0 && size == PAGE_SIZE) 1542 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1543 } 1544 1545 #if 0 1546 1547 void 1548 vm_page_set_dirty(m, base, size) 1549 vm_page_t m; 1550 int base; 1551 int size; 1552 { 1553 m->dirty |= vm_page_bits(base, size); 1554 } 1555 1556 #endif 1557 1558 void 1559 vm_page_clear_dirty(m, base, size) 1560 vm_page_t m; 1561 int base; 1562 int size; 1563 { 1564 m->dirty &= ~vm_page_bits(base, size); 1565 } 1566 1567 /* 1568 * vm_page_set_invalid: 1569 * 1570 * Invalidates DEV_BSIZE'd chunks within a page. Both the 1571 * valid and dirty bits for the effected areas are cleared. 1572 * 1573 * May not block. 1574 */ 1575 void 1576 vm_page_set_invalid(m, base, size) 1577 vm_page_t m; 1578 int base; 1579 int size; 1580 { 1581 int bits; 1582 1583 bits = vm_page_bits(base, size); 1584 m->valid &= ~bits; 1585 m->dirty &= ~bits; 1586 m->object->generation++; 1587 } 1588 1589 /* 1590 * vm_page_zero_invalid() 1591 * 1592 * The kernel assumes that the invalid portions of a page contain 1593 * garbage, but such pages can be mapped into memory by user code. 1594 * When this occurs, we must zero out the non-valid portions of the 1595 * page so user code sees what it expects. 1596 * 1597 * Pages are most often semi-valid when the end of a file is mapped 1598 * into memory and the file's size is not page aligned. 1599 */ 1600 1601 void 1602 vm_page_zero_invalid(vm_page_t m, boolean_t setvalid) 1603 { 1604 int b; 1605 int i; 1606 1607 /* 1608 * Scan the valid bits looking for invalid sections that 1609 * must be zerod. Invalid sub-DEV_BSIZE'd areas ( where the 1610 * valid bit may be set ) have already been zerod by 1611 * vm_page_set_validclean(). 1612 */ 1613 1614 for (b = i = 0; i <= PAGE_SIZE / DEV_BSIZE; ++i) { 1615 if (i == (PAGE_SIZE / DEV_BSIZE) || 1616 (m->valid & (1 << i)) 1617 ) { 1618 if (i > b) { 1619 pmap_zero_page_area( 1620 VM_PAGE_TO_PHYS(m), 1621 b << DEV_BSHIFT, 1622 (i - b) << DEV_BSHIFT 1623 ); 1624 } 1625 b = i + 1; 1626 } 1627 } 1628 1629 /* 1630 * setvalid is TRUE when we can safely set the zero'd areas 1631 * as being valid. We can do this if there are no cache consistancy 1632 * issues. e.g. it is ok to do with UFS, but not ok to do with NFS. 1633 */ 1634 1635 if (setvalid) 1636 m->valid = VM_PAGE_BITS_ALL; 1637 } 1638 1639 /* 1640 * vm_page_is_valid: 1641 * 1642 * Is (partial) page valid? Note that the case where size == 0 1643 * will return FALSE in the degenerate case where the page is 1644 * entirely invalid, and TRUE otherwise. 1645 * 1646 * May not block. 1647 */ 1648 1649 int 1650 vm_page_is_valid(m, base, size) 1651 vm_page_t m; 1652 int base; 1653 int size; 1654 { 1655 int bits = vm_page_bits(base, size); 1656 1657 if (m->valid && ((m->valid & bits) == bits)) 1658 return 1; 1659 else 1660 return 0; 1661 } 1662 1663 /* 1664 * update dirty bits from pmap/mmu. May not block. 1665 */ 1666 1667 void 1668 vm_page_test_dirty(m) 1669 vm_page_t m; 1670 { 1671 if ((m->dirty != VM_PAGE_BITS_ALL) && 1672 pmap_is_modified(VM_PAGE_TO_PHYS(m))) { 1673 vm_page_dirty(m); 1674 } 1675 } 1676 1677 /* 1678 * This interface is for merging with malloc() someday. 1679 * Even if we never implement compaction so that contiguous allocation 1680 * works after initialization time, malloc()'s data structures are good 1681 * for statistics and for allocations of less than a page. 1682 */ 1683 void * 1684 contigmalloc1(size, type, flags, low, high, alignment, boundary, map) 1685 unsigned long size; /* should be size_t here and for malloc() */ 1686 struct malloc_type *type; 1687 int flags; 1688 unsigned long low; 1689 unsigned long high; 1690 unsigned long alignment; 1691 unsigned long boundary; 1692 vm_map_t map; 1693 { 1694 int i, s, start; 1695 vm_offset_t addr, phys, tmp_addr; 1696 int pass; 1697 vm_page_t pga = vm_page_array; 1698 1699 size = round_page(size); 1700 #if !defined(MAX_PERF) 1701 if (size == 0) 1702 panic("contigmalloc1: size must not be 0"); 1703 if ((alignment & (alignment - 1)) != 0) 1704 panic("contigmalloc1: alignment must be a power of 2"); 1705 if ((boundary & (boundary - 1)) != 0) 1706 panic("contigmalloc1: boundary must be a power of 2"); 1707 #endif 1708 1709 start = 0; 1710 for (pass = 0; pass <= 1; pass++) { 1711 s = splvm(); 1712 again: 1713 /* 1714 * Find first page in array that is free, within range, aligned, and 1715 * such that the boundary won't be crossed. 1716 */ 1717 for (i = start; i < cnt.v_page_count; i++) { 1718 int pqtype; 1719 phys = VM_PAGE_TO_PHYS(&pga[i]); 1720 pqtype = pga[i].queue - pga[i].pc; 1721 if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 1722 (phys >= low) && (phys < high) && 1723 ((phys & (alignment - 1)) == 0) && 1724 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) 1725 break; 1726 } 1727 1728 /* 1729 * If the above failed or we will exceed the upper bound, fail. 1730 */ 1731 if ((i == cnt.v_page_count) || 1732 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 1733 vm_page_t m, next; 1734 1735 again1: 1736 for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl); 1737 m != NULL; 1738 m = next) { 1739 1740 KASSERT(m->queue == PQ_INACTIVE, 1741 ("contigmalloc1: page %p is not PQ_INACTIVE", m)); 1742 1743 next = TAILQ_NEXT(m, pageq); 1744 if (vm_page_sleep_busy(m, TRUE, "vpctw0")) 1745 goto again1; 1746 vm_page_test_dirty(m); 1747 if (m->dirty) { 1748 if (m->object->type == OBJT_VNODE) { 1749 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1750 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1751 VOP_UNLOCK(m->object->handle, 0, curproc); 1752 goto again1; 1753 } else if (m->object->type == OBJT_SWAP || 1754 m->object->type == OBJT_DEFAULT) { 1755 vm_pageout_flush(&m, 1, 0); 1756 goto again1; 1757 } 1758 } 1759 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1760 vm_page_cache(m); 1761 } 1762 1763 for (m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl); 1764 m != NULL; 1765 m = next) { 1766 1767 KASSERT(m->queue == PQ_ACTIVE, 1768 ("contigmalloc1: page %p is not PQ_ACTIVE", m)); 1769 1770 next = TAILQ_NEXT(m, pageq); 1771 if (vm_page_sleep_busy(m, TRUE, "vpctw1")) 1772 goto again1; 1773 vm_page_test_dirty(m); 1774 if (m->dirty) { 1775 if (m->object->type == OBJT_VNODE) { 1776 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1777 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1778 VOP_UNLOCK(m->object->handle, 0, curproc); 1779 goto again1; 1780 } else if (m->object->type == OBJT_SWAP || 1781 m->object->type == OBJT_DEFAULT) { 1782 vm_pageout_flush(&m, 1, 0); 1783 goto again1; 1784 } 1785 } 1786 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1787 vm_page_cache(m); 1788 } 1789 1790 splx(s); 1791 continue; 1792 } 1793 start = i; 1794 1795 /* 1796 * Check successive pages for contiguous and free. 1797 */ 1798 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 1799 int pqtype; 1800 pqtype = pga[i].queue - pga[i].pc; 1801 if ((VM_PAGE_TO_PHYS(&pga[i]) != 1802 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || 1803 ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) { 1804 start++; 1805 goto again; 1806 } 1807 } 1808 1809 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1810 int pqtype; 1811 vm_page_t m = &pga[i]; 1812 1813 pqtype = m->queue - m->pc; 1814 if (pqtype == PQ_CACHE) { 1815 vm_page_busy(m); 1816 vm_page_free(m); 1817 } 1818 1819 TAILQ_REMOVE(&vm_page_queues[m->queue].pl, m, pageq); 1820 vm_page_queues[m->queue].lcnt--; 1821 cnt.v_free_count--; 1822 m->valid = VM_PAGE_BITS_ALL; 1823 m->flags = 0; 1824 KASSERT(m->dirty == 0, ("contigmalloc1: page %p was dirty", m)); 1825 m->wire_count = 0; 1826 m->busy = 0; 1827 m->queue = PQ_NONE; 1828 m->object = NULL; 1829 vm_page_wire(m); 1830 } 1831 1832 /* 1833 * We've found a contiguous chunk that meets are requirements. 1834 * Allocate kernel VM, unfree and assign the physical pages to it and 1835 * return kernel VM pointer. 1836 */ 1837 tmp_addr = addr = kmem_alloc_pageable(map, size); 1838 if (addr == 0) { 1839 /* 1840 * XXX We almost never run out of kernel virtual 1841 * space, so we don't make the allocated memory 1842 * above available. 1843 */ 1844 splx(s); 1845 return (NULL); 1846 } 1847 1848 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1849 vm_page_t m = &pga[i]; 1850 vm_page_insert(m, kernel_object, 1851 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); 1852 pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); 1853 tmp_addr += PAGE_SIZE; 1854 } 1855 1856 splx(s); 1857 return ((void *)addr); 1858 } 1859 return NULL; 1860 } 1861 1862 void * 1863 contigmalloc(size, type, flags, low, high, alignment, boundary) 1864 unsigned long size; /* should be size_t here and for malloc() */ 1865 struct malloc_type *type; 1866 int flags; 1867 unsigned long low; 1868 unsigned long high; 1869 unsigned long alignment; 1870 unsigned long boundary; 1871 { 1872 return contigmalloc1(size, type, flags, low, high, alignment, boundary, 1873 kernel_map); 1874 } 1875 1876 void 1877 contigfree(addr, size, type) 1878 void *addr; 1879 unsigned long size; 1880 struct malloc_type *type; 1881 { 1882 kmem_free(kernel_map, (vm_offset_t)addr, size); 1883 } 1884 1885 vm_offset_t 1886 vm_page_alloc_contig(size, low, high, alignment) 1887 vm_offset_t size; 1888 vm_offset_t low; 1889 vm_offset_t high; 1890 vm_offset_t alignment; 1891 { 1892 return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high, 1893 alignment, 0ul, kernel_map)); 1894 } 1895 1896 #include "opt_ddb.h" 1897 #ifdef DDB 1898 #include <sys/kernel.h> 1899 1900 #include <ddb/ddb.h> 1901 1902 DB_SHOW_COMMAND(page, vm_page_print_page_info) 1903 { 1904 db_printf("cnt.v_free_count: %d\n", cnt.v_free_count); 1905 db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); 1906 db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); 1907 db_printf("cnt.v_active_count: %d\n", cnt.v_active_count); 1908 db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); 1909 db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); 1910 db_printf("cnt.v_free_min: %d\n", cnt.v_free_min); 1911 db_printf("cnt.v_free_target: %d\n", cnt.v_free_target); 1912 db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); 1913 db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); 1914 } 1915 1916 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 1917 { 1918 int i; 1919 db_printf("PQ_FREE:"); 1920 for(i=0;i<PQ_L2_SIZE;i++) { 1921 db_printf(" %d", vm_page_queues[PQ_FREE + i].lcnt); 1922 } 1923 db_printf("\n"); 1924 1925 db_printf("PQ_CACHE:"); 1926 for(i=0;i<PQ_L2_SIZE;i++) { 1927 db_printf(" %d", vm_page_queues[PQ_CACHE + i].lcnt); 1928 } 1929 db_printf("\n"); 1930 1931 db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", 1932 vm_page_queues[PQ_ACTIVE].lcnt, 1933 vm_page_queues[PQ_INACTIVE].lcnt); 1934 } 1935 #endif /* DDB */ 1936