1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 37 * $Id: vm_page.c,v 1.115 1999/01/08 17:31:27 eivind Exp $ 38 */ 39 40 /* 41 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42 * All rights reserved. 43 * 44 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 45 * 46 * Permission to use, copy, modify and distribute this software and 47 * its documentation is hereby granted, provided that both the copyright 48 * notice and this permission notice appear in all copies of the 49 * software, derivative works or modified versions, and any portions 50 * thereof, and that both notices appear in supporting documentation. 51 * 52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 53 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 55 * 56 * Carnegie Mellon requests users of this software to return to 57 * 58 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 59 * School of Computer Science 60 * Carnegie Mellon University 61 * Pittsburgh PA 15213-3890 62 * 63 * any improvements or extensions that they make and grant Carnegie the 64 * rights to redistribute these changes. 65 */ 66 67 /* 68 * Resident memory management module. 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/malloc.h> 74 #include <sys/proc.h> 75 #include <sys/vmmeter.h> 76 #include <sys/vnode.h> 77 78 #include <vm/vm.h> 79 #include <vm/vm_param.h> 80 #include <vm/vm_prot.h> 81 #include <sys/lock.h> 82 #include <vm/vm_kern.h> 83 #include <vm/vm_object.h> 84 #include <vm/vm_page.h> 85 #include <vm/vm_pageout.h> 86 #include <vm/vm_extern.h> 87 88 static void vm_page_queue_init __P((void)); 89 static vm_page_t vm_page_select_free __P((vm_object_t object, 90 vm_pindex_t pindex, int prefqueue)); 91 static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t)); 92 93 /* 94 * Associated with page of user-allocatable memory is a 95 * page structure. 96 */ 97 98 static struct pglist *vm_page_buckets; /* Array of buckets */ 99 static int vm_page_bucket_count; /* How big is array? */ 100 static int vm_page_hash_mask; /* Mask for hash function */ 101 static volatile int vm_page_bucket_generation; 102 103 struct pglist vm_page_queue_free[PQ_L2_SIZE] = {0}; 104 struct pglist vm_page_queue_zero[PQ_L2_SIZE] = {0}; 105 struct pglist vm_page_queue_active = {0}; 106 struct pglist vm_page_queue_inactive = {0}; 107 struct pglist vm_page_queue_cache[PQ_L2_SIZE] = {0}; 108 109 static int no_queue=0; 110 111 struct vpgqueues vm_page_queues[PQ_COUNT] = {0}; 112 static int pqcnt[PQ_COUNT] = {0}; 113 114 static void 115 vm_page_queue_init(void) { 116 int i; 117 118 vm_page_queues[PQ_NONE].pl = NULL; 119 vm_page_queues[PQ_NONE].cnt = &no_queue; 120 for(i=0;i<PQ_L2_SIZE;i++) { 121 vm_page_queues[PQ_FREE+i].pl = &vm_page_queue_free[i]; 122 vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count; 123 } 124 for(i=0;i<PQ_L2_SIZE;i++) { 125 vm_page_queues[PQ_ZERO+i].pl = &vm_page_queue_zero[i]; 126 vm_page_queues[PQ_ZERO+i].cnt = &cnt.v_free_count; 127 } 128 vm_page_queues[PQ_INACTIVE].pl = &vm_page_queue_inactive; 129 vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; 130 131 vm_page_queues[PQ_ACTIVE].pl = &vm_page_queue_active; 132 vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; 133 for(i=0;i<PQ_L2_SIZE;i++) { 134 vm_page_queues[PQ_CACHE+i].pl = &vm_page_queue_cache[i]; 135 vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count; 136 } 137 for(i=0;i<PQ_COUNT;i++) { 138 if (vm_page_queues[i].pl) { 139 TAILQ_INIT(vm_page_queues[i].pl); 140 } else if (i != 0) { 141 panic("vm_page_queue_init: queue %d is null", i); 142 } 143 vm_page_queues[i].lcnt = &pqcnt[i]; 144 } 145 } 146 147 vm_page_t vm_page_array = 0; 148 static int vm_page_array_size = 0; 149 long first_page = 0; 150 static long last_page; 151 static vm_size_t page_mask; 152 static int page_shift; 153 int vm_page_zero_count = 0; 154 155 /* 156 * map of contiguous valid DEV_BSIZE chunks in a page 157 * (this list is valid for page sizes upto 16*DEV_BSIZE) 158 */ 159 static u_short vm_page_dev_bsize_chunks[] = { 160 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 161 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff 162 }; 163 164 static __inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex)); 165 static int vm_page_freechk_and_unqueue __P((vm_page_t m)); 166 static void vm_page_free_wakeup __P((void)); 167 168 /* 169 * vm_set_page_size: 170 * 171 * Sets the page size, perhaps based upon the memory 172 * size. Must be called before any use of page-size 173 * dependent functions. 174 * 175 * Sets page_shift and page_mask from cnt.v_page_size. 176 */ 177 void 178 vm_set_page_size() 179 { 180 181 if (cnt.v_page_size == 0) 182 cnt.v_page_size = DEFAULT_PAGE_SIZE; 183 page_mask = cnt.v_page_size - 1; 184 if ((page_mask & cnt.v_page_size) != 0) 185 panic("vm_set_page_size: page size not a power of two"); 186 for (page_shift = 0;; page_shift++) 187 if ((1 << page_shift) == cnt.v_page_size) 188 break; 189 } 190 191 /* 192 * vm_page_startup: 193 * 194 * Initializes the resident memory module. 195 * 196 * Allocates memory for the page cells, and 197 * for the object/offset-to-page hash table headers. 198 * Each page cell is initialized and placed on the free list. 199 */ 200 201 vm_offset_t 202 vm_page_startup(starta, enda, vaddr) 203 register vm_offset_t starta; 204 vm_offset_t enda; 205 register vm_offset_t vaddr; 206 { 207 register vm_offset_t mapped; 208 register vm_page_t m; 209 register struct pglist *bucket; 210 vm_size_t npages, page_range; 211 register vm_offset_t new_start; 212 int i; 213 vm_offset_t pa; 214 int nblocks; 215 vm_offset_t first_managed_page; 216 217 /* the biggest memory array is the second group of pages */ 218 vm_offset_t start; 219 vm_offset_t biggestone, biggestsize; 220 221 vm_offset_t total; 222 223 total = 0; 224 biggestsize = 0; 225 biggestone = 0; 226 nblocks = 0; 227 vaddr = round_page(vaddr); 228 229 for (i = 0; phys_avail[i + 1]; i += 2) { 230 phys_avail[i] = round_page(phys_avail[i]); 231 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 232 } 233 234 for (i = 0; phys_avail[i + 1]; i += 2) { 235 int size = phys_avail[i + 1] - phys_avail[i]; 236 237 if (size > biggestsize) { 238 biggestone = i; 239 biggestsize = size; 240 } 241 ++nblocks; 242 total += size; 243 } 244 245 start = phys_avail[biggestone]; 246 247 /* 248 * Initialize the queue headers for the free queue, the active queue 249 * and the inactive queue. 250 */ 251 252 vm_page_queue_init(); 253 254 /* 255 * Allocate (and initialize) the hash table buckets. 256 * 257 * The number of buckets MUST BE a power of 2, and the actual value is 258 * the next power of 2 greater than the number of physical pages in 259 * the system. 260 * 261 * Note: This computation can be tweaked if desired. 262 */ 263 vm_page_buckets = (struct pglist *) vaddr; 264 bucket = vm_page_buckets; 265 if (vm_page_bucket_count == 0) { 266 vm_page_bucket_count = 1; 267 while (vm_page_bucket_count < atop(total)) 268 vm_page_bucket_count <<= 1; 269 } 270 vm_page_hash_mask = vm_page_bucket_count - 1; 271 272 /* 273 * Validate these addresses. 274 */ 275 276 new_start = start + vm_page_bucket_count * sizeof(struct pglist); 277 new_start = round_page(new_start); 278 mapped = round_page(vaddr); 279 vaddr = pmap_map(mapped, start, new_start, 280 VM_PROT_READ | VM_PROT_WRITE); 281 start = new_start; 282 vaddr = round_page(vaddr); 283 bzero((caddr_t) mapped, vaddr - mapped); 284 285 for (i = 0; i < vm_page_bucket_count; i++) { 286 TAILQ_INIT(bucket); 287 bucket++; 288 } 289 290 /* 291 * Compute the number of pages of memory that will be available for 292 * use (taking into account the overhead of a page structure per 293 * page). 294 */ 295 296 first_page = phys_avail[0] / PAGE_SIZE; 297 last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE; 298 299 page_range = last_page - (phys_avail[0] / PAGE_SIZE); 300 npages = (total - (page_range * sizeof(struct vm_page)) - 301 (start - phys_avail[biggestone])) / PAGE_SIZE; 302 303 /* 304 * Initialize the mem entry structures now, and put them in the free 305 * queue. 306 */ 307 vm_page_array = (vm_page_t) vaddr; 308 mapped = vaddr; 309 310 /* 311 * Validate these addresses. 312 */ 313 new_start = round_page(start + page_range * sizeof(struct vm_page)); 314 mapped = pmap_map(mapped, start, new_start, 315 VM_PROT_READ | VM_PROT_WRITE); 316 start = new_start; 317 318 first_managed_page = start / PAGE_SIZE; 319 320 /* 321 * Clear all of the page structures 322 */ 323 bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 324 vm_page_array_size = page_range; 325 326 cnt.v_page_count = 0; 327 cnt.v_free_count = 0; 328 for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { 329 if (i == biggestone) 330 pa = ptoa(first_managed_page); 331 else 332 pa = phys_avail[i]; 333 while (pa < phys_avail[i + 1] && npages-- > 0) { 334 ++cnt.v_page_count; 335 ++cnt.v_free_count; 336 m = PHYS_TO_VM_PAGE(pa); 337 m->phys_addr = pa; 338 m->flags = 0; 339 m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK; 340 m->queue = m->pc + PQ_FREE; 341 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 342 ++(*vm_page_queues[m->queue].lcnt); 343 pa += PAGE_SIZE; 344 } 345 } 346 return (mapped); 347 } 348 349 /* 350 * vm_page_hash: 351 * 352 * Distributes the object/offset key pair among hash buckets. 353 * 354 * NOTE: This macro depends on vm_page_bucket_count being a power of 2. 355 * This routine may not block. 356 */ 357 static __inline int 358 vm_page_hash(object, pindex) 359 vm_object_t object; 360 vm_pindex_t pindex; 361 { 362 return ((((uintptr_t) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask; 363 } 364 365 /* 366 * vm_page_insert: [ internal use only ] 367 * 368 * Inserts the given mem entry into the object and object list. 369 * 370 * The pagetables are not updated but will presumably fault the page 371 * in if necessary, or if a kernel page the caller will at some point 372 * enter the page into the kernel's pmap. We are not allowed to block 373 * here so we *can't* do this anyway. 374 * 375 * The object and page must be locked, and must be splhigh. 376 * This routine may not block. 377 */ 378 379 void 380 vm_page_insert(m, object, pindex) 381 register vm_page_t m; 382 register vm_object_t object; 383 register vm_pindex_t pindex; 384 { 385 register struct pglist *bucket; 386 387 if (m->object != NULL) 388 panic("vm_page_insert: already inserted"); 389 390 /* 391 * Record the object/offset pair in this page 392 */ 393 394 m->object = object; 395 m->pindex = pindex; 396 397 /* 398 * Insert it into the object_object/offset hash table 399 */ 400 401 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 402 TAILQ_INSERT_TAIL(bucket, m, hashq); 403 vm_page_bucket_generation++; 404 405 /* 406 * Now link into the object's list of backed pages. 407 */ 408 409 TAILQ_INSERT_TAIL(&object->memq, m, listq); 410 m->object->page_hint = m; 411 m->object->generation++; 412 413 if (m->wire_count) 414 object->wire_count++; 415 416 if ((m->queue - m->pc) == PQ_CACHE) 417 object->cache_count++; 418 419 /* 420 * And show that the object has one more resident page. 421 */ 422 423 object->resident_page_count++; 424 } 425 426 /* 427 * vm_page_remove: [ internal use only ] 428 * NOTE: used by device pager as well -wfj 429 * 430 * Removes the given mem entry from the object/offset-page 431 * table and the object page list. 432 * 433 * The object and page must be locked, and at splhigh. 434 * This routine may not block. 435 * 436 * I do not think the underlying pmap entry (if any) is removed here. 437 */ 438 439 void 440 vm_page_remove(m) 441 register vm_page_t m; 442 { 443 register struct pglist *bucket; 444 vm_object_t object; 445 446 if (m->object == NULL) 447 return; 448 449 #if !defined(MAX_PERF) 450 if ((m->flags & PG_BUSY) == 0) { 451 panic("vm_page_remove: page not busy"); 452 } 453 #endif 454 455 vm_page_flag_clear(m, PG_BUSY); 456 if (m->flags & PG_WANTED) { 457 vm_page_flag_clear(m, PG_WANTED); 458 wakeup(m); 459 } 460 461 object = m->object; 462 if (object->page_hint == m) 463 object->page_hint = NULL; 464 465 if (m->wire_count) 466 object->wire_count--; 467 468 if ((m->queue - m->pc) == PQ_CACHE) 469 object->cache_count--; 470 471 /* 472 * Remove from the object_object/offset hash table 473 */ 474 475 bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)]; 476 TAILQ_REMOVE(bucket, m, hashq); 477 vm_page_bucket_generation++; 478 479 /* 480 * Now remove from the object's list of backed pages. 481 */ 482 483 TAILQ_REMOVE(&object->memq, m, listq); 484 485 /* 486 * And show that the object has one fewer resident page. 487 */ 488 489 object->resident_page_count--; 490 object->generation++; 491 492 m->object = NULL; 493 } 494 495 /* 496 * vm_page_lookup: 497 * 498 * Returns the page associated with the object/offset 499 * pair specified; if none is found, NULL is returned. 500 * 501 * The object must be locked. No side effects. 502 * This routine may not block. 503 */ 504 505 vm_page_t 506 vm_page_lookup(object, pindex) 507 register vm_object_t object; 508 register vm_pindex_t pindex; 509 { 510 register vm_page_t m; 511 register struct pglist *bucket; 512 int generation; 513 514 /* 515 * Search the hash table for this object/offset pair 516 */ 517 518 if (object->page_hint && (object->page_hint->pindex == pindex) && 519 (object->page_hint->object == object)) 520 return object->page_hint; 521 522 retry: 523 generation = vm_page_bucket_generation; 524 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 525 for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) { 526 if ((m->object == object) && (m->pindex == pindex)) { 527 if (vm_page_bucket_generation != generation) 528 goto retry; 529 m->object->page_hint = m; 530 return (m); 531 } 532 } 533 if (vm_page_bucket_generation != generation) 534 goto retry; 535 return (NULL); 536 } 537 538 /* 539 * vm_page_rename: 540 * 541 * Move the given memory entry from its 542 * current object to the specified target object/offset. 543 * 544 * The object must be locked. 545 * This routine may not block. 546 * 547 * Note: this routine will raise itself to splvm(), the caller need not. 548 */ 549 550 void 551 vm_page_rename(m, new_object, new_pindex) 552 register vm_page_t m; 553 register vm_object_t new_object; 554 vm_pindex_t new_pindex; 555 { 556 int s; 557 558 s = splvm(); 559 vm_page_remove(m); 560 vm_page_insert(m, new_object, new_pindex); 561 splx(s); 562 } 563 564 /* 565 * vm_page_unqueue_nowakeup: 566 * 567 * vm_page_unqueue() without any wakeup 568 * 569 * This routine must be called at splhigh(). 570 * This routine may not block. 571 */ 572 573 void 574 vm_page_unqueue_nowakeup(m) 575 vm_page_t m; 576 { 577 int queue = m->queue; 578 struct vpgqueues *pq; 579 if (queue != PQ_NONE) { 580 pq = &vm_page_queues[queue]; 581 m->queue = PQ_NONE; 582 TAILQ_REMOVE(pq->pl, m, pageq); 583 (*pq->cnt)--; 584 (*pq->lcnt)--; 585 if ((queue - m->pc) == PQ_CACHE) { 586 if (m->object) 587 m->object->cache_count--; 588 } 589 } 590 } 591 592 /* 593 * vm_page_unqueue: 594 * 595 * Remove a page from its queue. 596 * 597 * This routine must be called at splhigh(). 598 * This routine may not block. 599 */ 600 601 void 602 vm_page_unqueue(m) 603 vm_page_t m; 604 { 605 int queue = m->queue; 606 struct vpgqueues *pq; 607 if (queue != PQ_NONE) { 608 m->queue = PQ_NONE; 609 pq = &vm_page_queues[queue]; 610 TAILQ_REMOVE(pq->pl, m, pageq); 611 (*pq->cnt)--; 612 (*pq->lcnt)--; 613 if ((queue - m->pc) == PQ_CACHE) { 614 if ((cnt.v_cache_count + cnt.v_free_count) < 615 (cnt.v_free_reserved + cnt.v_cache_min)) 616 pagedaemon_wakeup(); 617 if (m->object) 618 m->object->cache_count--; 619 } 620 } 621 } 622 623 /* 624 * vm_page_list_find: 625 * 626 * Find a page on the specified queue with color optimization. 627 * 628 * This routine must be called at splvm(). 629 * This routine may not block. 630 */ 631 vm_page_t 632 vm_page_list_find(basequeue, index) 633 int basequeue, index; 634 { 635 #if PQ_L2_SIZE > 1 636 637 int i,j; 638 vm_page_t m; 639 int hindex; 640 struct vpgqueues *pq; 641 642 pq = &vm_page_queues[basequeue]; 643 644 m = TAILQ_FIRST(pq[index].pl); 645 if (m) 646 return m; 647 648 for(j = 0; j < PQ_L1_SIZE; j++) { 649 int ij; 650 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 651 (ij = i + j) > 0; 652 i -= PQ_L1_SIZE) { 653 654 hindex = index + ij; 655 if (hindex >= PQ_L2_SIZE) 656 hindex -= PQ_L2_SIZE; 657 if (m = TAILQ_FIRST(pq[hindex].pl)) 658 return m; 659 660 hindex = index - ij; 661 if (hindex < 0) 662 hindex += PQ_L2_SIZE; 663 if (m = TAILQ_FIRST(pq[hindex].pl)) 664 return m; 665 } 666 } 667 668 hindex = index + PQ_L2_SIZE / 2; 669 if (hindex >= PQ_L2_SIZE) 670 hindex -= PQ_L2_SIZE; 671 m = TAILQ_FIRST(pq[hindex].pl); 672 if (m) 673 return m; 674 675 return NULL; 676 #else 677 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 678 #endif 679 680 } 681 682 /* 683 * vm_page_select: 684 * 685 * Find a page on the specified queue with color optimization. 686 * 687 * This routine must be called at splvm(). 688 * This routine may not block. 689 */ 690 vm_page_t 691 vm_page_select(object, pindex, basequeue) 692 vm_object_t object; 693 vm_pindex_t pindex; 694 int basequeue; 695 { 696 697 #if PQ_L2_SIZE > 1 698 int index; 699 index = (pindex + object->pg_color) & PQ_L2_MASK; 700 return vm_page_list_find(basequeue, index); 701 702 #else 703 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 704 #endif 705 706 } 707 708 /* 709 * vm_page_select_cache: 710 * 711 * Find a page on the cache queue with color optimization. As pages 712 * might be found, but not applicable, they are deactivated. This 713 * keeps us from using potentially busy cached pages. 714 * 715 * This routine must be called at splvm(). 716 * This routine may not block. 717 */ 718 vm_page_t 719 vm_page_select_cache(object, pindex) 720 vm_object_t object; 721 vm_pindex_t pindex; 722 { 723 vm_page_t m; 724 725 while (TRUE) { 726 #if PQ_L2_SIZE > 1 727 int index; 728 index = (pindex + object->pg_color) & PQ_L2_MASK; 729 m = vm_page_list_find(PQ_CACHE, index); 730 731 #else 732 m = TAILQ_FIRST(vm_page_queues[PQ_CACHE].pl); 733 #endif 734 if (m && ((m->flags & PG_BUSY) || m->busy || 735 m->hold_count || m->wire_count)) { 736 vm_page_deactivate(m); 737 continue; 738 } 739 return m; 740 } 741 } 742 743 /* 744 * vm_page_select_free: 745 * 746 * Find a free or zero page, with specified preference. 747 * 748 * This routine must be called at splvm(). 749 * This routine may not block. 750 */ 751 752 static vm_page_t 753 vm_page_select_free(object, pindex, prefqueue) 754 vm_object_t object; 755 vm_pindex_t pindex; 756 int prefqueue; 757 { 758 #if PQ_L2_SIZE > 1 759 int i,j; 760 int index, hindex; 761 #endif 762 vm_page_t m, mh; 763 int oqueuediff; 764 struct vpgqueues *pq; 765 766 if (prefqueue == PQ_ZERO) 767 oqueuediff = PQ_FREE - PQ_ZERO; 768 else 769 oqueuediff = PQ_ZERO - PQ_FREE; 770 771 if (mh = object->page_hint) { 772 if (mh->pindex == (pindex - 1)) { 773 if ((mh->flags & PG_FICTITIOUS) == 0) { 774 if ((mh < &vm_page_array[cnt.v_page_count-1]) && 775 (mh >= &vm_page_array[0])) { 776 int queue; 777 m = mh + 1; 778 if (VM_PAGE_TO_PHYS(m) == (VM_PAGE_TO_PHYS(mh) + PAGE_SIZE)) { 779 queue = m->queue - m->pc; 780 if (queue == PQ_FREE || queue == PQ_ZERO) { 781 return m; 782 } 783 } 784 } 785 } 786 } 787 } 788 789 pq = &vm_page_queues[prefqueue]; 790 791 #if PQ_L2_SIZE > 1 792 793 index = (pindex + object->pg_color) & PQ_L2_MASK; 794 795 if (m = TAILQ_FIRST(pq[index].pl)) 796 return m; 797 if (m = TAILQ_FIRST(pq[index + oqueuediff].pl)) 798 return m; 799 800 for(j = 0; j < PQ_L1_SIZE; j++) { 801 int ij; 802 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 803 (ij = i + j) >= 0; 804 i -= PQ_L1_SIZE) { 805 806 hindex = index + ij; 807 if (hindex >= PQ_L2_SIZE) 808 hindex -= PQ_L2_SIZE; 809 if (m = TAILQ_FIRST(pq[hindex].pl)) 810 return m; 811 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 812 return m; 813 814 hindex = index - ij; 815 if (hindex < 0) 816 hindex += PQ_L2_SIZE; 817 if (m = TAILQ_FIRST(pq[hindex].pl)) 818 return m; 819 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 820 return m; 821 } 822 } 823 824 hindex = index + PQ_L2_SIZE / 2; 825 if (hindex >= PQ_L2_SIZE) 826 hindex -= PQ_L2_SIZE; 827 if (m = TAILQ_FIRST(pq[hindex].pl)) 828 return m; 829 if (m = TAILQ_FIRST(pq[hindex+oqueuediff].pl)) 830 return m; 831 832 #else 833 if (m = TAILQ_FIRST(pq[0].pl)) 834 return m; 835 else 836 return TAILQ_FIRST(pq[oqueuediff].pl); 837 #endif 838 839 return NULL; 840 } 841 842 /* 843 * vm_page_alloc: 844 * 845 * Allocate and return a memory cell associated 846 * with this VM object/offset pair. 847 * 848 * page_req classes: 849 * VM_ALLOC_NORMAL normal process request 850 * VM_ALLOC_SYSTEM system *really* needs a page 851 * VM_ALLOC_INTERRUPT interrupt time request 852 * VM_ALLOC_ZERO zero page 853 * 854 * Object must be locked. 855 * This routine may not block. 856 * 857 * Additional special handling is required when called from an 858 * interrupt (VM_ALLOC_INTERRUPT). We are not allowed to mess with 859 * the page cache in this case. 860 */ 861 vm_page_t 862 vm_page_alloc(object, pindex, page_req) 863 vm_object_t object; 864 vm_pindex_t pindex; 865 int page_req; 866 { 867 register vm_page_t m; 868 struct vpgqueues *pq; 869 vm_object_t oldobject; 870 int queue, qtype; 871 int s; 872 873 KASSERT(!vm_page_lookup(object, pindex), 874 ("vm_page_alloc: page already allocated")); 875 876 if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) { 877 page_req = VM_ALLOC_SYSTEM; 878 }; 879 880 s = splvm(); 881 882 switch (page_req) { 883 884 case VM_ALLOC_NORMAL: 885 if (cnt.v_free_count >= cnt.v_free_reserved) { 886 m = vm_page_select_free(object, pindex, PQ_FREE); 887 KASSERT(m != NULL, ("vm_page_alloc(NORMAL): missing page on free queue\n")); 888 } else { 889 m = vm_page_select_cache(object, pindex); 890 if (m == NULL) { 891 splx(s); 892 #if defined(DIAGNOSTIC) 893 if (cnt.v_cache_count > 0) 894 printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count); 895 #endif 896 vm_pageout_deficit++; 897 pagedaemon_wakeup(); 898 return (NULL); 899 } 900 } 901 break; 902 903 case VM_ALLOC_ZERO: 904 if (cnt.v_free_count >= cnt.v_free_reserved) { 905 m = vm_page_select_free(object, pindex, PQ_ZERO); 906 KASSERT(m != NULL, ("vm_page_alloc(ZERO): missing page on free queue\n")); 907 } else { 908 m = vm_page_select_cache(object, pindex); 909 if (m == NULL) { 910 splx(s); 911 #if defined(DIAGNOSTIC) 912 if (cnt.v_cache_count > 0) 913 printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count); 914 #endif 915 vm_pageout_deficit++; 916 pagedaemon_wakeup(); 917 return (NULL); 918 } 919 } 920 break; 921 922 case VM_ALLOC_SYSTEM: 923 if ((cnt.v_free_count >= cnt.v_free_reserved) || 924 ((cnt.v_cache_count == 0) && 925 (cnt.v_free_count >= cnt.v_interrupt_free_min))) { 926 m = vm_page_select_free(object, pindex, PQ_FREE); 927 KASSERT(m != NULL, ("vm_page_alloc(SYSTEM): missing page on free queue\n")); 928 } else { 929 m = vm_page_select_cache(object, pindex); 930 if (m == NULL) { 931 splx(s); 932 #if defined(DIAGNOSTIC) 933 if (cnt.v_cache_count > 0) 934 printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count); 935 #endif 936 vm_pageout_deficit++; 937 pagedaemon_wakeup(); 938 return (NULL); 939 } 940 } 941 break; 942 943 case VM_ALLOC_INTERRUPT: 944 if (cnt.v_free_count > 0) { 945 m = vm_page_select_free(object, pindex, PQ_FREE); 946 KASSERT(m != NULL, ("vm_page_alloc(INTERRUPT): missing page on free queue\n")); 947 } else { 948 splx(s); 949 vm_pageout_deficit++; 950 pagedaemon_wakeup(); 951 return (NULL); 952 } 953 break; 954 955 default: 956 m = NULL; 957 #if !defined(MAX_PERF) 958 panic("vm_page_alloc: invalid allocation class"); 959 #endif 960 } 961 962 queue = m->queue; 963 qtype = queue - m->pc; 964 if (qtype == PQ_ZERO) 965 vm_page_zero_count--; 966 pq = &vm_page_queues[queue]; 967 TAILQ_REMOVE(pq->pl, m, pageq); 968 (*pq->cnt)--; 969 (*pq->lcnt)--; 970 oldobject = NULL; 971 if (qtype == PQ_ZERO) { 972 m->flags = PG_ZERO | PG_BUSY; 973 } else if (qtype == PQ_CACHE) { 974 oldobject = m->object; 975 vm_page_busy(m); 976 vm_page_remove(m); 977 m->flags = PG_BUSY; 978 } else { 979 m->flags = PG_BUSY; 980 } 981 m->wire_count = 0; 982 m->hold_count = 0; 983 m->act_count = 0; 984 m->busy = 0; 985 m->valid = 0; 986 m->dirty = 0; 987 m->queue = PQ_NONE; 988 989 /* 990 * vm_page_insert() is safe prior to the splx(). Note also that 991 * inserting a page here does not insert it into the pmap (which 992 * could cause us to block allocating memory). We cannot block 993 * anywhere. 994 */ 995 996 vm_page_insert(m, object, pindex); 997 998 /* 999 * Don't wakeup too often - wakeup the pageout daemon when 1000 * we would be nearly out of memory. 1001 */ 1002 if (((cnt.v_free_count + cnt.v_cache_count) < 1003 (cnt.v_free_reserved + cnt.v_cache_min)) || 1004 (cnt.v_free_count < cnt.v_pageout_free_min)) 1005 pagedaemon_wakeup(); 1006 1007 if ((qtype == PQ_CACHE) && 1008 ((page_req == VM_ALLOC_NORMAL) || (page_req == VM_ALLOC_ZERO)) && 1009 oldobject && (oldobject->type == OBJT_VNODE) && 1010 ((oldobject->flags & OBJ_DEAD) == 0)) { 1011 struct vnode *vp; 1012 vp = (struct vnode *) oldobject->handle; 1013 if (vp && VSHOULDFREE(vp)) { 1014 if ((vp->v_flag & (VFREE|VTBFREE|VDOOMED)) == 0) { 1015 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 1016 vp->v_flag |= VTBFREE; 1017 } 1018 } 1019 } 1020 splx(s); 1021 1022 return (m); 1023 } 1024 1025 /* 1026 * vm_wait: (also see VM_WAIT macro) 1027 * 1028 * Block until free pages are available for allocation 1029 */ 1030 1031 void 1032 vm_wait() 1033 { 1034 int s; 1035 1036 s = splvm(); 1037 if (curproc == pageproc) { 1038 vm_pageout_pages_needed = 1; 1039 tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0); 1040 } else { 1041 if (!vm_pages_needed) { 1042 vm_pages_needed++; 1043 wakeup(&vm_pages_needed); 1044 } 1045 tsleep(&cnt.v_free_count, PVM, "vmwait", 0); 1046 } 1047 splx(s); 1048 } 1049 1050 /* 1051 * vm_page_sleep: 1052 * 1053 * Block until page is no longer busy. 1054 */ 1055 1056 int 1057 vm_page_sleep(vm_page_t m, char *msg, char *busy) { 1058 int slept = 0; 1059 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1060 int s; 1061 s = splvm(); 1062 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1063 vm_page_flag_set(m, PG_WANTED); 1064 tsleep(m, PVM, msg, 0); 1065 slept = 1; 1066 } 1067 splx(s); 1068 } 1069 return slept; 1070 } 1071 1072 /* 1073 * vm_page_activate: 1074 * 1075 * Put the specified page on the active list (if appropriate). 1076 * 1077 * The page queues must be locked. 1078 * This routine may not block. 1079 */ 1080 void 1081 vm_page_activate(m) 1082 register vm_page_t m; 1083 { 1084 int s; 1085 1086 s = splvm(); 1087 if (m->queue != PQ_ACTIVE) { 1088 if ((m->queue - m->pc) == PQ_CACHE) 1089 cnt.v_reactivated++; 1090 1091 vm_page_unqueue(m); 1092 1093 if (m->wire_count == 0) { 1094 m->queue = PQ_ACTIVE; 1095 ++(*vm_page_queues[PQ_ACTIVE].lcnt); 1096 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1097 if (m->act_count < ACT_INIT) 1098 m->act_count = ACT_INIT; 1099 cnt.v_active_count++; 1100 } 1101 } else { 1102 if (m->act_count < ACT_INIT) 1103 m->act_count = ACT_INIT; 1104 } 1105 1106 splx(s); 1107 } 1108 1109 /* 1110 * helper routine for vm_page_free and vm_page_free_zero. 1111 * 1112 * This routine may not block. 1113 */ 1114 static int 1115 vm_page_freechk_and_unqueue(m) 1116 vm_page_t m; 1117 { 1118 vm_object_t oldobject; 1119 1120 oldobject = m->object; 1121 1122 #if !defined(MAX_PERF) 1123 if (m->busy || ((m->queue - m->pc) == PQ_FREE) || 1124 (m->hold_count != 0)) { 1125 printf( 1126 "vm_page_free: pindex(%lu), busy(%d), PG_BUSY(%d), hold(%d)\n", 1127 (u_long)m->pindex, m->busy, (m->flags & PG_BUSY) ? 1 : 0, 1128 m->hold_count); 1129 if ((m->queue - m->pc) == PQ_FREE) 1130 panic("vm_page_free: freeing free page"); 1131 else 1132 panic("vm_page_free: freeing busy page"); 1133 } 1134 #endif 1135 1136 vm_page_unqueue_nowakeup(m); 1137 vm_page_remove(m); 1138 1139 if ((m->flags & PG_FICTITIOUS) != 0) { 1140 return 0; 1141 } 1142 1143 m->valid = 0; 1144 1145 if (m->wire_count != 0) { 1146 #if !defined(MAX_PERF) 1147 if (m->wire_count > 1) { 1148 panic("vm_page_free: invalid wire count (%d), pindex: 0x%x", 1149 m->wire_count, m->pindex); 1150 } 1151 #endif 1152 printf("vm_page_free: freeing wired page\n"); 1153 m->wire_count = 0; 1154 if (m->object) 1155 m->object->wire_count--; 1156 cnt.v_wire_count--; 1157 } 1158 1159 if (oldobject && (oldobject->type == OBJT_VNODE) && 1160 ((oldobject->flags & OBJ_DEAD) == 0)) { 1161 struct vnode *vp; 1162 vp = (struct vnode *) oldobject->handle; 1163 if (vp && VSHOULDFREE(vp)) { 1164 if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) { 1165 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 1166 vp->v_flag |= VTBFREE; 1167 } 1168 } 1169 } 1170 1171 #ifdef __alpha__ 1172 pmap_page_is_free(m); 1173 #endif 1174 1175 return 1; 1176 } 1177 1178 /* 1179 * helper routine for vm_page_free and vm_page_free_zero. 1180 * 1181 * This routine may not block. 1182 */ 1183 static __inline void 1184 vm_page_free_wakeup() 1185 { 1186 1187 /* 1188 * if pageout daemon needs pages, then tell it that there are 1189 * some free. 1190 */ 1191 if (vm_pageout_pages_needed) { 1192 wakeup(&vm_pageout_pages_needed); 1193 vm_pageout_pages_needed = 0; 1194 } 1195 /* 1196 * wakeup processes that are waiting on memory if we hit a 1197 * high water mark. And wakeup scheduler process if we have 1198 * lots of memory. this process will swapin processes. 1199 */ 1200 if (vm_pages_needed && 1201 ((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) { 1202 wakeup(&cnt.v_free_count); 1203 vm_pages_needed = 0; 1204 } 1205 } 1206 1207 /* 1208 * vm_page_free: 1209 * 1210 * Returns the given page to the free list, 1211 * disassociating it with any VM object. 1212 * 1213 * Object and page must be locked prior to entry. 1214 * This routine may not block. 1215 */ 1216 void 1217 vm_page_free(m) 1218 register vm_page_t m; 1219 { 1220 int s; 1221 struct vpgqueues *pq; 1222 1223 s = splvm(); 1224 1225 cnt.v_tfree++; 1226 1227 if (!vm_page_freechk_and_unqueue(m)) { 1228 splx(s); 1229 return; 1230 } 1231 1232 m->queue = PQ_FREE + m->pc; 1233 pq = &vm_page_queues[m->queue]; 1234 ++(*pq->lcnt); 1235 ++(*pq->cnt); 1236 /* 1237 * If the pageout process is grabbing the page, it is likely 1238 * that the page is NOT in the cache. It is more likely that 1239 * the page will be partially in the cache if it is being 1240 * explicitly freed. 1241 */ 1242 if (curproc == pageproc) { 1243 TAILQ_INSERT_TAIL(pq->pl, m, pageq); 1244 } else { 1245 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1246 } 1247 1248 vm_page_free_wakeup(); 1249 splx(s); 1250 } 1251 1252 void 1253 vm_page_free_zero(m) 1254 register vm_page_t m; 1255 { 1256 int s; 1257 struct vpgqueues *pq; 1258 1259 s = splvm(); 1260 1261 cnt.v_tfree++; 1262 1263 if (!vm_page_freechk_and_unqueue(m)) { 1264 splx(s); 1265 return; 1266 } 1267 1268 m->queue = PQ_ZERO + m->pc; 1269 pq = &vm_page_queues[m->queue]; 1270 ++(*pq->lcnt); 1271 ++(*pq->cnt); 1272 1273 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1274 ++vm_page_zero_count; 1275 vm_page_free_wakeup(); 1276 splx(s); 1277 } 1278 1279 /* 1280 * vm_page_wire: 1281 * 1282 * Mark this page as wired down by yet 1283 * another map, removing it from paging queues 1284 * as necessary. 1285 * 1286 * The page queues must be locked. 1287 * This routine may not block. 1288 */ 1289 void 1290 vm_page_wire(m) 1291 register vm_page_t m; 1292 { 1293 int s; 1294 1295 s = splvm(); 1296 if (m->wire_count == 0) { 1297 vm_page_unqueue(m); 1298 cnt.v_wire_count++; 1299 if (m->object) 1300 m->object->wire_count++; 1301 } 1302 m->wire_count++; 1303 splx(s); 1304 (*vm_page_queues[PQ_NONE].lcnt)++; 1305 vm_page_flag_set(m, PG_MAPPED); 1306 } 1307 1308 /* 1309 * vm_page_unwire: 1310 * 1311 * Release one wiring of this page, potentially 1312 * enabling it to be paged again. 1313 * 1314 * The page queues must be locked. 1315 * This routine may not block. 1316 */ 1317 void 1318 vm_page_unwire(m, activate) 1319 register vm_page_t m; 1320 int activate; 1321 { 1322 int s; 1323 1324 s = splvm(); 1325 1326 if (m->wire_count > 0) { 1327 m->wire_count--; 1328 if (m->wire_count == 0) { 1329 if (m->object) 1330 m->object->wire_count--; 1331 cnt.v_wire_count--; 1332 if (activate) { 1333 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1334 m->queue = PQ_ACTIVE; 1335 (*vm_page_queues[PQ_ACTIVE].lcnt)++; 1336 cnt.v_active_count++; 1337 } else { 1338 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 1339 m->queue = PQ_INACTIVE; 1340 (*vm_page_queues[PQ_INACTIVE].lcnt)++; 1341 cnt.v_inactive_count++; 1342 } 1343 } 1344 } else { 1345 #if !defined(MAX_PERF) 1346 panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count); 1347 #endif 1348 } 1349 splx(s); 1350 } 1351 1352 1353 /* 1354 * Move the specified page to the inactive queue. 1355 * 1356 * This routine may not block. 1357 */ 1358 void 1359 vm_page_deactivate(m) 1360 register vm_page_t m; 1361 { 1362 int s; 1363 1364 /* 1365 * Ignore if already inactive. 1366 */ 1367 if (m->queue == PQ_INACTIVE) 1368 return; 1369 1370 s = splvm(); 1371 if (m->wire_count == 0) { 1372 if ((m->queue - m->pc) == PQ_CACHE) 1373 cnt.v_reactivated++; 1374 vm_page_unqueue(m); 1375 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 1376 m->queue = PQ_INACTIVE; 1377 ++(*vm_page_queues[PQ_INACTIVE].lcnt); 1378 cnt.v_inactive_count++; 1379 } 1380 splx(s); 1381 } 1382 1383 /* 1384 * vm_page_cache 1385 * 1386 * Put the specified page onto the page cache queue (if appropriate). 1387 * This routine may not block. 1388 */ 1389 void 1390 vm_page_cache(m) 1391 register vm_page_t m; 1392 { 1393 int s; 1394 1395 #if !defined(MAX_PERF) 1396 if ((m->flags & PG_BUSY) || m->busy || m->wire_count) { 1397 printf("vm_page_cache: attempting to cache busy page\n"); 1398 return; 1399 } 1400 #endif 1401 if ((m->queue - m->pc) == PQ_CACHE) 1402 return; 1403 1404 vm_page_protect(m, VM_PROT_NONE); 1405 #if !defined(MAX_PERF) 1406 if (m->dirty != 0) { 1407 panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex); 1408 } 1409 #endif 1410 s = splvm(); 1411 vm_page_unqueue_nowakeup(m); 1412 m->queue = PQ_CACHE + m->pc; 1413 (*vm_page_queues[m->queue].lcnt)++; 1414 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 1415 cnt.v_cache_count++; 1416 m->object->cache_count++; 1417 vm_page_free_wakeup(); 1418 splx(s); 1419 } 1420 1421 /* 1422 * Grab a page, waiting until we are waken up due to the page 1423 * changing state. We keep on waiting, if the page continues 1424 * to be in the object. If the page doesn't exist, allocate it. 1425 * 1426 * This routine may block. 1427 */ 1428 vm_page_t 1429 vm_page_grab(object, pindex, allocflags) 1430 vm_object_t object; 1431 vm_pindex_t pindex; 1432 int allocflags; 1433 { 1434 1435 vm_page_t m; 1436 int s, generation; 1437 1438 retrylookup: 1439 if ((m = vm_page_lookup(object, pindex)) != NULL) { 1440 if (m->busy || (m->flags & PG_BUSY)) { 1441 generation = object->generation; 1442 1443 s = splvm(); 1444 while ((object->generation == generation) && 1445 (m->busy || (m->flags & PG_BUSY))) { 1446 vm_page_flag_set(m, PG_WANTED | PG_REFERENCED); 1447 tsleep(m, PVM, "pgrbwt", 0); 1448 if ((allocflags & VM_ALLOC_RETRY) == 0) { 1449 splx(s); 1450 return NULL; 1451 } 1452 } 1453 splx(s); 1454 goto retrylookup; 1455 } else { 1456 vm_page_busy(m); 1457 return m; 1458 } 1459 } 1460 1461 m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY); 1462 if (m == NULL) { 1463 VM_WAIT; 1464 if ((allocflags & VM_ALLOC_RETRY) == 0) 1465 return NULL; 1466 goto retrylookup; 1467 } 1468 1469 return m; 1470 } 1471 1472 /* 1473 * mapping function for valid bits or for dirty bits in 1474 * a page. May not block. 1475 */ 1476 __inline int 1477 vm_page_bits(int base, int size) 1478 { 1479 u_short chunk; 1480 1481 if ((base == 0) && (size >= PAGE_SIZE)) 1482 return VM_PAGE_BITS_ALL; 1483 1484 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1485 base &= PAGE_MASK; 1486 if (size > PAGE_SIZE - base) { 1487 size = PAGE_SIZE - base; 1488 } 1489 1490 base = base / DEV_BSIZE; 1491 chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE]; 1492 return (chunk << base) & VM_PAGE_BITS_ALL; 1493 } 1494 1495 /* 1496 * set a page valid and clean. May not block. 1497 */ 1498 void 1499 vm_page_set_validclean(m, base, size) 1500 vm_page_t m; 1501 int base; 1502 int size; 1503 { 1504 int pagebits = vm_page_bits(base, size); 1505 m->valid |= pagebits; 1506 m->dirty &= ~pagebits; 1507 if( base == 0 && size == PAGE_SIZE) 1508 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1509 } 1510 1511 /* 1512 * set a page (partially) invalid. May not block. 1513 */ 1514 void 1515 vm_page_set_invalid(m, base, size) 1516 vm_page_t m; 1517 int base; 1518 int size; 1519 { 1520 int bits; 1521 1522 m->valid &= ~(bits = vm_page_bits(base, size)); 1523 if (m->valid == 0) 1524 m->dirty &= ~bits; 1525 m->object->generation++; 1526 } 1527 1528 /* 1529 * is (partial) page valid? May not block. 1530 */ 1531 int 1532 vm_page_is_valid(m, base, size) 1533 vm_page_t m; 1534 int base; 1535 int size; 1536 { 1537 int bits = vm_page_bits(base, size); 1538 1539 if (m->valid && ((m->valid & bits) == bits)) 1540 return 1; 1541 else 1542 return 0; 1543 } 1544 1545 /* 1546 * update dirty bits from pmap/mmu. May not block. 1547 */ 1548 1549 void 1550 vm_page_test_dirty(m) 1551 vm_page_t m; 1552 { 1553 if ((m->dirty != VM_PAGE_BITS_ALL) && 1554 pmap_is_modified(VM_PAGE_TO_PHYS(m))) { 1555 m->dirty = VM_PAGE_BITS_ALL; 1556 } 1557 } 1558 1559 /* 1560 * This interface is for merging with malloc() someday. 1561 * Even if we never implement compaction so that contiguous allocation 1562 * works after initialization time, malloc()'s data structures are good 1563 * for statistics and for allocations of less than a page. 1564 */ 1565 void * 1566 contigmalloc1(size, type, flags, low, high, alignment, boundary, map) 1567 unsigned long size; /* should be size_t here and for malloc() */ 1568 struct malloc_type *type; 1569 int flags; 1570 unsigned long low; 1571 unsigned long high; 1572 unsigned long alignment; 1573 unsigned long boundary; 1574 vm_map_t map; 1575 { 1576 int i, s, start; 1577 vm_offset_t addr, phys, tmp_addr; 1578 int pass; 1579 vm_page_t pga = vm_page_array; 1580 1581 size = round_page(size); 1582 #if !defined(MAX_PERF) 1583 if (size == 0) 1584 panic("contigmalloc1: size must not be 0"); 1585 if ((alignment & (alignment - 1)) != 0) 1586 panic("contigmalloc1: alignment must be a power of 2"); 1587 if ((boundary & (boundary - 1)) != 0) 1588 panic("contigmalloc1: boundary must be a power of 2"); 1589 #endif 1590 1591 start = 0; 1592 for (pass = 0; pass <= 1; pass++) { 1593 s = splvm(); 1594 again: 1595 /* 1596 * Find first page in array that is free, within range, aligned, and 1597 * such that the boundary won't be crossed. 1598 */ 1599 for (i = start; i < cnt.v_page_count; i++) { 1600 int pqtype; 1601 phys = VM_PAGE_TO_PHYS(&pga[i]); 1602 pqtype = pga[i].queue - pga[i].pc; 1603 if (((pqtype == PQ_ZERO) || (pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 1604 (phys >= low) && (phys < high) && 1605 ((phys & (alignment - 1)) == 0) && 1606 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) 1607 break; 1608 } 1609 1610 /* 1611 * If the above failed or we will exceed the upper bound, fail. 1612 */ 1613 if ((i == cnt.v_page_count) || 1614 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 1615 vm_page_t m, next; 1616 1617 again1: 1618 for (m = TAILQ_FIRST(&vm_page_queue_inactive); 1619 m != NULL; 1620 m = next) { 1621 1622 if (m->queue != PQ_INACTIVE) { 1623 break; 1624 } 1625 1626 next = TAILQ_NEXT(m, pageq); 1627 if (vm_page_sleep(m, "vpctw0", &m->busy)) 1628 goto again1; 1629 vm_page_test_dirty(m); 1630 if (m->dirty) { 1631 if (m->object->type == OBJT_VNODE) { 1632 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1633 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1634 VOP_UNLOCK(m->object->handle, 0, curproc); 1635 goto again1; 1636 } else if (m->object->type == OBJT_SWAP || 1637 m->object->type == OBJT_DEFAULT) { 1638 vm_pageout_flush(&m, 1, 0); 1639 goto again1; 1640 } 1641 } 1642 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1643 vm_page_cache(m); 1644 } 1645 1646 for (m = TAILQ_FIRST(&vm_page_queue_active); 1647 m != NULL; 1648 m = next) { 1649 1650 if (m->queue != PQ_ACTIVE) { 1651 break; 1652 } 1653 1654 next = TAILQ_NEXT(m, pageq); 1655 if (vm_page_sleep(m, "vpctw1", &m->busy)) 1656 goto again1; 1657 vm_page_test_dirty(m); 1658 if (m->dirty) { 1659 if (m->object->type == OBJT_VNODE) { 1660 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1661 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1662 VOP_UNLOCK(m->object->handle, 0, curproc); 1663 goto again1; 1664 } else if (m->object->type == OBJT_SWAP || 1665 m->object->type == OBJT_DEFAULT) { 1666 vm_pageout_flush(&m, 1, 0); 1667 goto again1; 1668 } 1669 } 1670 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1671 vm_page_cache(m); 1672 } 1673 1674 splx(s); 1675 continue; 1676 } 1677 start = i; 1678 1679 /* 1680 * Check successive pages for contiguous and free. 1681 */ 1682 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 1683 int pqtype; 1684 pqtype = pga[i].queue - pga[i].pc; 1685 if ((VM_PAGE_TO_PHYS(&pga[i]) != 1686 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || 1687 ((pqtype != PQ_ZERO) && (pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) { 1688 start++; 1689 goto again; 1690 } 1691 } 1692 1693 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1694 int pqtype; 1695 vm_page_t m = &pga[i]; 1696 1697 pqtype = m->queue - m->pc; 1698 if (pqtype == PQ_CACHE) { 1699 vm_page_busy(m); 1700 vm_page_free(m); 1701 } 1702 1703 TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq); 1704 (*vm_page_queues[m->queue].lcnt)--; 1705 cnt.v_free_count--; 1706 m->valid = VM_PAGE_BITS_ALL; 1707 m->flags = 0; 1708 m->dirty = 0; 1709 m->wire_count = 0; 1710 m->busy = 0; 1711 m->queue = PQ_NONE; 1712 m->object = NULL; 1713 vm_page_wire(m); 1714 } 1715 1716 /* 1717 * We've found a contiguous chunk that meets are requirements. 1718 * Allocate kernel VM, unfree and assign the physical pages to it and 1719 * return kernel VM pointer. 1720 */ 1721 tmp_addr = addr = kmem_alloc_pageable(map, size); 1722 if (addr == 0) { 1723 /* 1724 * XXX We almost never run out of kernel virtual 1725 * space, so we don't make the allocated memory 1726 * above available. 1727 */ 1728 splx(s); 1729 return (NULL); 1730 } 1731 1732 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1733 vm_page_t m = &pga[i]; 1734 vm_page_insert(m, kernel_object, 1735 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); 1736 pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); 1737 tmp_addr += PAGE_SIZE; 1738 } 1739 1740 splx(s); 1741 return ((void *)addr); 1742 } 1743 return NULL; 1744 } 1745 1746 void * 1747 contigmalloc(size, type, flags, low, high, alignment, boundary) 1748 unsigned long size; /* should be size_t here and for malloc() */ 1749 struct malloc_type *type; 1750 int flags; 1751 unsigned long low; 1752 unsigned long high; 1753 unsigned long alignment; 1754 unsigned long boundary; 1755 { 1756 return contigmalloc1(size, type, flags, low, high, alignment, boundary, 1757 kernel_map); 1758 } 1759 1760 vm_offset_t 1761 vm_page_alloc_contig(size, low, high, alignment) 1762 vm_offset_t size; 1763 vm_offset_t low; 1764 vm_offset_t high; 1765 vm_offset_t alignment; 1766 { 1767 return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high, 1768 alignment, 0ul, kernel_map)); 1769 } 1770 1771 #include "opt_ddb.h" 1772 #ifdef DDB 1773 #include <sys/kernel.h> 1774 1775 #include <ddb/ddb.h> 1776 1777 DB_SHOW_COMMAND(page, vm_page_print_page_info) 1778 { 1779 db_printf("cnt.v_free_count: %d\n", cnt.v_free_count); 1780 db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); 1781 db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); 1782 db_printf("cnt.v_active_count: %d\n", cnt.v_active_count); 1783 db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); 1784 db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); 1785 db_printf("cnt.v_free_min: %d\n", cnt.v_free_min); 1786 db_printf("cnt.v_free_target: %d\n", cnt.v_free_target); 1787 db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); 1788 db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); 1789 } 1790 1791 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 1792 { 1793 int i; 1794 db_printf("PQ_FREE:"); 1795 for(i=0;i<PQ_L2_SIZE;i++) { 1796 db_printf(" %d", *vm_page_queues[PQ_FREE + i].lcnt); 1797 } 1798 db_printf("\n"); 1799 1800 db_printf("PQ_CACHE:"); 1801 for(i=0;i<PQ_L2_SIZE;i++) { 1802 db_printf(" %d", *vm_page_queues[PQ_CACHE + i].lcnt); 1803 } 1804 db_printf("\n"); 1805 1806 db_printf("PQ_ZERO:"); 1807 for(i=0;i<PQ_L2_SIZE;i++) { 1808 db_printf(" %d", *vm_page_queues[PQ_ZERO + i].lcnt); 1809 } 1810 db_printf("\n"); 1811 1812 db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", 1813 *vm_page_queues[PQ_ACTIVE].lcnt, 1814 *vm_page_queues[PQ_INACTIVE].lcnt); 1815 } 1816 #endif /* DDB */ 1817