1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 37 * $Id: vm_page.c,v 1.96 1998/03/08 06:27:30 dyson Exp $ 38 */ 39 40 /* 41 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42 * All rights reserved. 43 * 44 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 45 * 46 * Permission to use, copy, modify and distribute this software and 47 * its documentation is hereby granted, provided that both the copyright 48 * notice and this permission notice appear in all copies of the 49 * software, derivative works or modified versions, and any portions 50 * thereof, and that both notices appear in supporting documentation. 51 * 52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 53 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 55 * 56 * Carnegie Mellon requests users of this software to return to 57 * 58 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 59 * School of Computer Science 60 * Carnegie Mellon University 61 * Pittsburgh PA 15213-3890 62 * 63 * any improvements or extensions that they make and grant Carnegie the 64 * rights to redistribute these changes. 65 */ 66 67 /* 68 * Resident memory management module. 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/malloc.h> 74 #include <sys/proc.h> 75 #include <sys/vmmeter.h> 76 #include <sys/vnode.h> 77 78 #include <vm/vm.h> 79 #include <vm/vm_param.h> 80 #include <vm/vm_prot.h> 81 #include <sys/lock.h> 82 #include <vm/vm_kern.h> 83 #include <vm/vm_object.h> 84 #include <vm/vm_page.h> 85 #include <vm/vm_pageout.h> 86 #include <vm/vm_extern.h> 87 88 static void vm_page_queue_init __P((void)); 89 static vm_page_t vm_page_select_free __P((vm_object_t object, 90 vm_pindex_t pindex, int prefqueue)); 91 static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t)); 92 93 /* 94 * Associated with page of user-allocatable memory is a 95 * page structure. 96 */ 97 98 static struct pglist *vm_page_buckets; /* Array of buckets */ 99 static int vm_page_bucket_count; /* How big is array? */ 100 static int vm_page_hash_mask; /* Mask for hash function */ 101 static volatile int vm_page_bucket_generation; 102 103 struct pglist vm_page_queue_free[PQ_L2_SIZE] = {0}; 104 struct pglist vm_page_queue_zero[PQ_L2_SIZE] = {0}; 105 struct pglist vm_page_queue_active = {0}; 106 struct pglist vm_page_queue_inactive = {0}; 107 struct pglist vm_page_queue_cache[PQ_L2_SIZE] = {0}; 108 109 static int no_queue=0; 110 111 struct vpgqueues vm_page_queues[PQ_COUNT] = {0}; 112 static int pqcnt[PQ_COUNT] = {0}; 113 114 static void 115 vm_page_queue_init(void) { 116 int i; 117 118 vm_page_queues[PQ_NONE].pl = NULL; 119 vm_page_queues[PQ_NONE].cnt = &no_queue; 120 for(i=0;i<PQ_L2_SIZE;i++) { 121 vm_page_queues[PQ_FREE+i].pl = &vm_page_queue_free[i]; 122 vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count; 123 } 124 for(i=0;i<PQ_L2_SIZE;i++) { 125 vm_page_queues[PQ_ZERO+i].pl = &vm_page_queue_zero[i]; 126 vm_page_queues[PQ_ZERO+i].cnt = &cnt.v_free_count; 127 } 128 vm_page_queues[PQ_INACTIVE].pl = &vm_page_queue_inactive; 129 vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; 130 131 vm_page_queues[PQ_ACTIVE].pl = &vm_page_queue_active; 132 vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; 133 for(i=0;i<PQ_L2_SIZE;i++) { 134 vm_page_queues[PQ_CACHE+i].pl = &vm_page_queue_cache[i]; 135 vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count; 136 } 137 for(i=0;i<PQ_COUNT;i++) { 138 if (vm_page_queues[i].pl) { 139 TAILQ_INIT(vm_page_queues[i].pl); 140 } else if (i != 0) { 141 panic("vm_page_queue_init: queue %d is null", i); 142 } 143 vm_page_queues[i].lcnt = &pqcnt[i]; 144 } 145 } 146 147 vm_page_t vm_page_array = 0; 148 static int vm_page_array_size = 0; 149 long first_page = 0; 150 static long last_page; 151 static vm_size_t page_mask; 152 static int page_shift; 153 int vm_page_zero_count = 0; 154 155 /* 156 * map of contiguous valid DEV_BSIZE chunks in a page 157 * (this list is valid for page sizes upto 16*DEV_BSIZE) 158 */ 159 static u_short vm_page_dev_bsize_chunks[] = { 160 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 161 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff 162 }; 163 164 static inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex)); 165 static int vm_page_freechk_and_unqueue __P((vm_page_t m)); 166 static void vm_page_free_wakeup __P((void)); 167 168 /* 169 * vm_set_page_size: 170 * 171 * Sets the page size, perhaps based upon the memory 172 * size. Must be called before any use of page-size 173 * dependent functions. 174 * 175 * Sets page_shift and page_mask from cnt.v_page_size. 176 */ 177 void 178 vm_set_page_size() 179 { 180 181 if (cnt.v_page_size == 0) 182 cnt.v_page_size = DEFAULT_PAGE_SIZE; 183 page_mask = cnt.v_page_size - 1; 184 if ((page_mask & cnt.v_page_size) != 0) 185 panic("vm_set_page_size: page size not a power of two"); 186 for (page_shift = 0;; page_shift++) 187 if ((1 << page_shift) == cnt.v_page_size) 188 break; 189 } 190 191 /* 192 * vm_page_startup: 193 * 194 * Initializes the resident memory module. 195 * 196 * Allocates memory for the page cells, and 197 * for the object/offset-to-page hash table headers. 198 * Each page cell is initialized and placed on the free list. 199 */ 200 201 vm_offset_t 202 vm_page_startup(starta, enda, vaddr) 203 register vm_offset_t starta; 204 vm_offset_t enda; 205 register vm_offset_t vaddr; 206 { 207 register vm_offset_t mapped; 208 register vm_page_t m; 209 register struct pglist *bucket; 210 vm_size_t npages, page_range; 211 register vm_offset_t new_start; 212 int i; 213 vm_offset_t pa; 214 int nblocks; 215 vm_offset_t first_managed_page; 216 217 /* the biggest memory array is the second group of pages */ 218 vm_offset_t start; 219 vm_offset_t biggestone, biggestsize; 220 221 vm_offset_t total; 222 223 total = 0; 224 biggestsize = 0; 225 biggestone = 0; 226 nblocks = 0; 227 vaddr = round_page(vaddr); 228 229 for (i = 0; phys_avail[i + 1]; i += 2) { 230 phys_avail[i] = round_page(phys_avail[i]); 231 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 232 } 233 234 for (i = 0; phys_avail[i + 1]; i += 2) { 235 int size = phys_avail[i + 1] - phys_avail[i]; 236 237 if (size > biggestsize) { 238 biggestone = i; 239 biggestsize = size; 240 } 241 ++nblocks; 242 total += size; 243 } 244 245 start = phys_avail[biggestone]; 246 247 /* 248 * Initialize the queue headers for the free queue, the active queue 249 * and the inactive queue. 250 */ 251 252 vm_page_queue_init(); 253 254 /* 255 * Allocate (and initialize) the hash table buckets. 256 * 257 * The number of buckets MUST BE a power of 2, and the actual value is 258 * the next power of 2 greater than the number of physical pages in 259 * the system. 260 * 261 * Note: This computation can be tweaked if desired. 262 */ 263 vm_page_buckets = (struct pglist *) vaddr; 264 bucket = vm_page_buckets; 265 if (vm_page_bucket_count == 0) { 266 vm_page_bucket_count = 1; 267 while (vm_page_bucket_count < atop(total)) 268 vm_page_bucket_count <<= 1; 269 } 270 vm_page_hash_mask = vm_page_bucket_count - 1; 271 272 /* 273 * Validate these addresses. 274 */ 275 276 new_start = start + vm_page_bucket_count * sizeof(struct pglist); 277 new_start = round_page(new_start); 278 mapped = vaddr; 279 vaddr = pmap_map(mapped, start, new_start, 280 VM_PROT_READ | VM_PROT_WRITE); 281 start = new_start; 282 bzero((caddr_t) mapped, vaddr - mapped); 283 mapped = vaddr; 284 285 for (i = 0; i < vm_page_bucket_count; i++) { 286 TAILQ_INIT(bucket); 287 bucket++; 288 } 289 290 /* 291 * Validate these zone addresses. 292 */ 293 294 new_start = start + (vaddr - mapped); 295 pmap_map(mapped, start, new_start, VM_PROT_READ | VM_PROT_WRITE); 296 bzero((caddr_t) mapped, (vaddr - mapped)); 297 start = round_page(new_start); 298 299 /* 300 * Compute the number of pages of memory that will be available for 301 * use (taking into account the overhead of a page structure per 302 * page). 303 */ 304 305 first_page = phys_avail[0] / PAGE_SIZE; 306 last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE; 307 308 page_range = last_page - (phys_avail[0] / PAGE_SIZE); 309 npages = (total - (page_range * sizeof(struct vm_page)) - 310 (start - phys_avail[biggestone])) / PAGE_SIZE; 311 312 /* 313 * Initialize the mem entry structures now, and put them in the free 314 * queue. 315 */ 316 317 vm_page_array = (vm_page_t) vaddr; 318 mapped = vaddr; 319 320 /* 321 * Validate these addresses. 322 */ 323 324 new_start = round_page(start + page_range * sizeof(struct vm_page)); 325 mapped = pmap_map(mapped, start, new_start, 326 VM_PROT_READ | VM_PROT_WRITE); 327 start = new_start; 328 329 first_managed_page = start / PAGE_SIZE; 330 331 /* 332 * Clear all of the page structures 333 */ 334 bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 335 vm_page_array_size = page_range; 336 337 cnt.v_page_count = 0; 338 cnt.v_free_count = 0; 339 for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { 340 if (i == biggestone) 341 pa = ptoa(first_managed_page); 342 else 343 pa = phys_avail[i]; 344 while (pa < phys_avail[i + 1] && npages-- > 0) { 345 ++cnt.v_page_count; 346 ++cnt.v_free_count; 347 m = PHYS_TO_VM_PAGE(pa); 348 m->phys_addr = pa; 349 m->flags = 0; 350 m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK; 351 m->queue = PQ_FREE + m->pc; 352 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 353 ++(*vm_page_queues[m->queue].lcnt); 354 pa += PAGE_SIZE; 355 } 356 } 357 return (mapped); 358 } 359 360 /* 361 * vm_page_hash: 362 * 363 * Distributes the object/offset key pair among hash buckets. 364 * 365 * NOTE: This macro depends on vm_page_bucket_count being a power of 2. 366 */ 367 static inline int 368 vm_page_hash(object, pindex) 369 vm_object_t object; 370 vm_pindex_t pindex; 371 { 372 return ((((unsigned) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask; 373 } 374 375 /* 376 * vm_page_insert: [ internal use only ] 377 * 378 * Inserts the given mem entry into the object/object-page 379 * table and object list. 380 * 381 * The object and page must be locked, and must be splhigh. 382 */ 383 384 void 385 vm_page_insert(m, object, pindex) 386 register vm_page_t m; 387 register vm_object_t object; 388 register vm_pindex_t pindex; 389 { 390 register struct pglist *bucket; 391 392 #if !defined(MAX_PERF) 393 if (m->flags & PG_TABLED) 394 panic("vm_page_insert: already inserted"); 395 #endif 396 397 /* 398 * Record the object/offset pair in this page 399 */ 400 401 m->object = object; 402 m->pindex = pindex; 403 404 /* 405 * Insert it into the object_object/offset hash table 406 */ 407 408 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 409 TAILQ_INSERT_TAIL(bucket, m, hashq); 410 vm_page_bucket_generation++; 411 412 /* 413 * Now link into the object's list of backed pages. 414 */ 415 416 TAILQ_INSERT_TAIL(&object->memq, m, listq); 417 m->flags |= PG_TABLED; 418 m->object->page_hint = m; 419 m->object->generation++; 420 421 if (m->wire_count) 422 object->wire_count++; 423 424 if ((m->queue - m->pc) == PQ_CACHE) 425 object->cache_count++; 426 427 /* 428 * And show that the object has one more resident page. 429 */ 430 431 object->resident_page_count++; 432 } 433 434 /* 435 * vm_page_remove: [ internal use only ] 436 * NOTE: used by device pager as well -wfj 437 * 438 * Removes the given mem entry from the object/offset-page 439 * table and the object page list. 440 * 441 * The object and page must be locked, and at splhigh. 442 */ 443 444 void 445 vm_page_remove(m) 446 register vm_page_t m; 447 { 448 register struct pglist *bucket; 449 vm_object_t object; 450 451 if (!(m->flags & PG_TABLED)) 452 return; 453 454 #if !defined(MAX_PERF) 455 if ((m->flags & PG_BUSY) == 0) { 456 panic("vm_page_remove: page not busy"); 457 } 458 #endif 459 460 m->flags &= ~PG_BUSY; 461 if (m->flags & PG_WANTED) { 462 m->flags &= ~PG_WANTED; 463 wakeup(m); 464 } 465 466 object = m->object; 467 if (object->page_hint == m) 468 object->page_hint = NULL; 469 470 if (m->wire_count) 471 object->wire_count--; 472 473 if ((m->queue - m->pc) == PQ_CACHE) 474 object->cache_count--; 475 476 /* 477 * Remove from the object_object/offset hash table 478 */ 479 480 bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)]; 481 TAILQ_REMOVE(bucket, m, hashq); 482 vm_page_bucket_generation++; 483 484 /* 485 * Now remove from the object's list of backed pages. 486 */ 487 488 TAILQ_REMOVE(&object->memq, m, listq); 489 490 /* 491 * And show that the object has one fewer resident page. 492 */ 493 494 object->resident_page_count--; 495 object->generation++; 496 m->object = NULL; 497 498 m->flags &= ~PG_TABLED; 499 } 500 501 /* 502 * vm_page_lookup: 503 * 504 * Returns the page associated with the object/offset 505 * pair specified; if none is found, NULL is returned. 506 * 507 * The object must be locked. No side effects. 508 */ 509 510 vm_page_t 511 vm_page_lookup(object, pindex) 512 register vm_object_t object; 513 register vm_pindex_t pindex; 514 { 515 register vm_page_t m; 516 register struct pglist *bucket; 517 int generation; 518 int s; 519 520 /* 521 * Search the hash table for this object/offset pair 522 */ 523 524 if (object->page_hint && (object->page_hint->pindex == pindex) && 525 (object->page_hint->object == object)) 526 return object->page_hint; 527 528 retry: 529 generation = vm_page_bucket_generation; 530 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 531 for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) { 532 if ((m->object == object) && (m->pindex == pindex)) { 533 if (vm_page_bucket_generation != generation) 534 goto retry; 535 m->object->page_hint = m; 536 return (m); 537 } 538 } 539 if (vm_page_bucket_generation != generation) 540 goto retry; 541 return (NULL); 542 } 543 544 /* 545 * vm_page_rename: 546 * 547 * Move the given memory entry from its 548 * current object to the specified target object/offset. 549 * 550 * The object must be locked. 551 */ 552 void 553 vm_page_rename(m, new_object, new_pindex) 554 register vm_page_t m; 555 register vm_object_t new_object; 556 vm_pindex_t new_pindex; 557 { 558 int s; 559 560 s = splvm(); 561 vm_page_remove(m); 562 vm_page_insert(m, new_object, new_pindex); 563 splx(s); 564 } 565 566 /* 567 * vm_page_unqueue without any wakeup 568 */ 569 void 570 vm_page_unqueue_nowakeup(m) 571 vm_page_t m; 572 { 573 int queue = m->queue; 574 struct vpgqueues *pq; 575 if (queue != PQ_NONE) { 576 pq = &vm_page_queues[queue]; 577 m->queue = PQ_NONE; 578 TAILQ_REMOVE(pq->pl, m, pageq); 579 (*pq->cnt)--; 580 (*pq->lcnt)--; 581 if ((queue - m->pc) == PQ_CACHE) { 582 if (m->object) 583 m->object->cache_count--; 584 } 585 } 586 } 587 588 /* 589 * vm_page_unqueue must be called at splhigh(); 590 */ 591 void 592 vm_page_unqueue(m) 593 vm_page_t m; 594 { 595 int queue = m->queue; 596 struct vpgqueues *pq; 597 if (queue != PQ_NONE) { 598 m->queue = PQ_NONE; 599 pq = &vm_page_queues[queue]; 600 TAILQ_REMOVE(pq->pl, m, pageq); 601 (*pq->cnt)--; 602 (*pq->lcnt)--; 603 if ((queue - m->pc) == PQ_CACHE) { 604 if ((cnt.v_cache_count + cnt.v_free_count) < 605 (cnt.v_free_reserved + cnt.v_cache_min)) 606 pagedaemon_wakeup(); 607 if (m->object) 608 m->object->cache_count--; 609 } 610 } 611 } 612 613 /* 614 * Find a page on the specified queue with color optimization. 615 */ 616 vm_page_t 617 vm_page_list_find(basequeue, index) 618 int basequeue, index; 619 { 620 #if PQ_L2_SIZE > 1 621 622 int i,j; 623 vm_page_t m; 624 int hindex; 625 struct vpgqueues *pq; 626 627 pq = &vm_page_queues[basequeue]; 628 629 m = TAILQ_FIRST(pq[index].pl); 630 if (m) 631 return m; 632 633 for(j = 0; j < PQ_L1_SIZE; j++) { 634 int ij; 635 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 636 (ij = i + j) > 0; 637 i -= PQ_L1_SIZE) { 638 639 hindex = index + ij; 640 if (hindex >= PQ_L2_SIZE) 641 hindex -= PQ_L2_SIZE; 642 if (m = TAILQ_FIRST(pq[hindex].pl)) 643 return m; 644 645 hindex = index - ij; 646 if (hindex < 0) 647 hindex += PQ_L2_SIZE; 648 if (m = TAILQ_FIRST(pq[hindex].pl)) 649 return m; 650 } 651 } 652 653 hindex = index + PQ_L2_SIZE / 2; 654 if (hindex >= PQ_L2_SIZE) 655 hindex -= PQ_L2_SIZE; 656 m = TAILQ_FIRST(pq[hindex].pl); 657 if (m) 658 return m; 659 660 return NULL; 661 #else 662 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 663 #endif 664 665 } 666 667 /* 668 * Find a page on the specified queue with color optimization. 669 */ 670 vm_page_t 671 vm_page_select(object, pindex, basequeue) 672 vm_object_t object; 673 vm_pindex_t pindex; 674 int basequeue; 675 { 676 677 #if PQ_L2_SIZE > 1 678 int index; 679 index = (pindex + object->pg_color) & PQ_L2_MASK; 680 return vm_page_list_find(basequeue, index); 681 682 #else 683 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 684 #endif 685 686 } 687 688 /* 689 * Find a page on the cache queue with color optimization. As pages 690 * might be found, but not applicable, they are deactivated. This 691 * keeps us from using potentially busy cached pages. 692 */ 693 vm_page_t 694 vm_page_select_cache(object, pindex) 695 vm_object_t object; 696 vm_pindex_t pindex; 697 { 698 vm_page_t m; 699 700 while (TRUE) { 701 #if PQ_L2_SIZE > 1 702 int index; 703 index = (pindex + object->pg_color) & PQ_L2_MASK; 704 m = vm_page_list_find(PQ_CACHE, index); 705 706 #else 707 m = TAILQ_FIRST(vm_page_queues[PQ_CACHE].pl); 708 #endif 709 if (m && ((m->flags & PG_BUSY) || m->busy || 710 m->hold_count || m->wire_count)) { 711 vm_page_deactivate(m); 712 continue; 713 } 714 return m; 715 } 716 } 717 718 /* 719 * Find a free or zero page, with specified preference. 720 */ 721 static vm_page_t 722 vm_page_select_free(object, pindex, prefqueue) 723 vm_object_t object; 724 vm_pindex_t pindex; 725 int prefqueue; 726 { 727 #if PQ_L2_SIZE > 1 728 int i,j; 729 int index, hindex; 730 #endif 731 vm_page_t m, mh; 732 int oqueuediff; 733 struct vpgqueues *pq; 734 735 if (prefqueue == PQ_ZERO) 736 oqueuediff = PQ_FREE - PQ_ZERO; 737 else 738 oqueuediff = PQ_ZERO - PQ_FREE; 739 740 if (mh = object->page_hint) { 741 if (mh->pindex == (pindex - 1)) { 742 if ((mh->flags & PG_FICTITIOUS) == 0) { 743 if ((mh < &vm_page_array[cnt.v_page_count-1]) && 744 (mh >= &vm_page_array[0])) { 745 int queue; 746 m = mh + 1; 747 if (VM_PAGE_TO_PHYS(m) == (VM_PAGE_TO_PHYS(mh) + PAGE_SIZE)) { 748 queue = m->queue - m->pc; 749 if (queue == PQ_FREE || queue == PQ_ZERO) { 750 return m; 751 } 752 } 753 } 754 } 755 } 756 } 757 758 pq = &vm_page_queues[prefqueue]; 759 760 #if PQ_L2_SIZE > 1 761 762 index = (pindex + object->pg_color) & PQ_L2_MASK; 763 764 if (m = TAILQ_FIRST(pq[index].pl)) 765 return m; 766 if (m = TAILQ_FIRST(pq[index + oqueuediff].pl)) 767 return m; 768 769 for(j = 0; j < PQ_L1_SIZE; j++) { 770 int ij; 771 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 772 (ij = i + j) >= 0; 773 i -= PQ_L1_SIZE) { 774 775 hindex = index + ij; 776 if (hindex >= PQ_L2_SIZE) 777 hindex -= PQ_L2_SIZE; 778 if (m = TAILQ_FIRST(pq[hindex].pl)) 779 return m; 780 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 781 return m; 782 783 hindex = index - ij; 784 if (hindex < 0) 785 hindex += PQ_L2_SIZE; 786 if (m = TAILQ_FIRST(pq[hindex].pl)) 787 return m; 788 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 789 return m; 790 } 791 } 792 793 hindex = index + PQ_L2_SIZE / 2; 794 if (hindex >= PQ_L2_SIZE) 795 hindex -= PQ_L2_SIZE; 796 if (m = TAILQ_FIRST(pq[hindex].pl)) 797 return m; 798 if (m = TAILQ_FIRST(pq[hindex+oqueuediff].pl)) 799 return m; 800 801 #else 802 if (m = TAILQ_FIRST(pq[0].pl)) 803 return m; 804 else 805 return TAILQ_FIRST(pq[oqueuediff].pl); 806 #endif 807 808 return NULL; 809 } 810 811 /* 812 * vm_page_alloc: 813 * 814 * Allocate and return a memory cell associated 815 * with this VM object/offset pair. 816 * 817 * page_req classes: 818 * VM_ALLOC_NORMAL normal process request 819 * VM_ALLOC_SYSTEM system *really* needs a page 820 * VM_ALLOC_INTERRUPT interrupt time request 821 * VM_ALLOC_ZERO zero page 822 * 823 * Object must be locked. 824 */ 825 vm_page_t 826 vm_page_alloc(object, pindex, page_req) 827 vm_object_t object; 828 vm_pindex_t pindex; 829 int page_req; 830 { 831 register vm_page_t m; 832 struct vpgqueues *pq; 833 vm_object_t oldobject; 834 int queue, qtype; 835 int s; 836 837 #ifdef DIAGNOSTIC 838 m = vm_page_lookup(object, pindex); 839 if (m) 840 panic("vm_page_alloc: page already allocated"); 841 #endif 842 843 if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) { 844 page_req = VM_ALLOC_SYSTEM; 845 }; 846 847 s = splvm(); 848 849 switch (page_req) { 850 851 case VM_ALLOC_NORMAL: 852 if (cnt.v_free_count >= cnt.v_free_reserved) { 853 m = vm_page_select_free(object, pindex, PQ_FREE); 854 #if defined(DIAGNOSTIC) 855 if (m == NULL) 856 panic("vm_page_alloc(NORMAL): missing page on free queue\n"); 857 #endif 858 } else { 859 m = vm_page_select_cache(object, pindex); 860 if (m == NULL) { 861 splx(s); 862 #if defined(DIAGNOSTIC) 863 if (cnt.v_cache_count > 0) 864 printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count); 865 #endif 866 vm_pageout_deficit++; 867 pagedaemon_wakeup(); 868 return (NULL); 869 } 870 } 871 break; 872 873 case VM_ALLOC_ZERO: 874 if (cnt.v_free_count >= cnt.v_free_reserved) { 875 m = vm_page_select_free(object, pindex, PQ_ZERO); 876 #if defined(DIAGNOSTIC) 877 if (m == NULL) 878 panic("vm_page_alloc(ZERO): missing page on free queue\n"); 879 #endif 880 } else { 881 m = vm_page_select_cache(object, pindex); 882 if (m == NULL) { 883 splx(s); 884 #if defined(DIAGNOSTIC) 885 if (cnt.v_cache_count > 0) 886 printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count); 887 #endif 888 vm_pageout_deficit++; 889 pagedaemon_wakeup(); 890 return (NULL); 891 } 892 } 893 break; 894 895 case VM_ALLOC_SYSTEM: 896 if ((cnt.v_free_count >= cnt.v_free_reserved) || 897 ((cnt.v_cache_count == 0) && 898 (cnt.v_free_count >= cnt.v_interrupt_free_min))) { 899 m = vm_page_select_free(object, pindex, PQ_FREE); 900 #if defined(DIAGNOSTIC) 901 if (m == NULL) 902 panic("vm_page_alloc(SYSTEM): missing page on free queue\n"); 903 #endif 904 } else { 905 m = vm_page_select_cache(object, pindex); 906 if (m == NULL) { 907 splx(s); 908 #if defined(DIAGNOSTIC) 909 if (cnt.v_cache_count > 0) 910 printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count); 911 #endif 912 vm_pageout_deficit++; 913 pagedaemon_wakeup(); 914 return (NULL); 915 } 916 } 917 break; 918 919 case VM_ALLOC_INTERRUPT: 920 if (cnt.v_free_count > 0) { 921 m = vm_page_select_free(object, pindex, PQ_FREE); 922 #if defined(DIAGNOSTIC) 923 if (m == NULL) 924 panic("vm_page_alloc(INTERRUPT): missing page on free queue\n"); 925 #endif 926 } else { 927 splx(s); 928 vm_pageout_deficit++; 929 pagedaemon_wakeup(); 930 return (NULL); 931 } 932 break; 933 934 default: 935 m = NULL; 936 #if !defined(MAX_PERF) 937 panic("vm_page_alloc: invalid allocation class"); 938 #endif 939 } 940 941 queue = m->queue; 942 qtype = queue - m->pc; 943 if (qtype == PQ_ZERO) 944 vm_page_zero_count--; 945 pq = &vm_page_queues[queue]; 946 TAILQ_REMOVE(pq->pl, m, pageq); 947 (*pq->cnt)--; 948 (*pq->lcnt)--; 949 oldobject = NULL; 950 if (qtype == PQ_ZERO) { 951 m->flags = PG_ZERO | PG_BUSY; 952 } else if (qtype == PQ_CACHE) { 953 oldobject = m->object; 954 m->flags |= PG_BUSY; 955 vm_page_remove(m); 956 m->flags = PG_BUSY; 957 } else { 958 m->flags = PG_BUSY; 959 } 960 m->wire_count = 0; 961 m->hold_count = 0; 962 m->act_count = 0; 963 m->busy = 0; 964 m->valid = 0; 965 m->dirty = 0; 966 m->queue = PQ_NONE; 967 968 /* XXX before splx until vm_page_insert is safe */ 969 vm_page_insert(m, object, pindex); 970 971 /* 972 * Don't wakeup too often - wakeup the pageout daemon when 973 * we would be nearly out of memory. 974 */ 975 if (((cnt.v_free_count + cnt.v_cache_count) < 976 (cnt.v_free_reserved + cnt.v_cache_min)) || 977 (cnt.v_free_count < cnt.v_pageout_free_min)) 978 pagedaemon_wakeup(); 979 980 if ((qtype == PQ_CACHE) && 981 ((page_req == VM_ALLOC_NORMAL) || (page_req == VM_ALLOC_ZERO)) && 982 oldobject && (oldobject->type == OBJT_VNODE) && 983 ((oldobject->flags & OBJ_DEAD) == 0)) { 984 struct vnode *vp; 985 vp = (struct vnode *) oldobject->handle; 986 if (vp && VSHOULDFREE(vp)) { 987 if ((vp->v_flag & (VFREE|VTBFREE|VDOOMED)) == 0) { 988 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 989 vp->v_flag |= VTBFREE; 990 } 991 } 992 } 993 splx(s); 994 995 return (m); 996 } 997 998 void 999 vm_wait() 1000 { 1001 int s; 1002 1003 s = splvm(); 1004 if (curproc == pageproc) { 1005 vm_pageout_pages_needed = 1; 1006 tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0); 1007 } else { 1008 if (!vm_pages_needed) { 1009 vm_pages_needed++; 1010 wakeup(&vm_pages_needed); 1011 } 1012 tsleep(&cnt.v_free_count, PVM, "vmwait", 0); 1013 } 1014 splx(s); 1015 } 1016 1017 int 1018 vm_page_sleep(vm_page_t m, char *msg, char *busy) { 1019 vm_object_t object = m->object; 1020 int slept = 0; 1021 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1022 int s; 1023 s = splvm(); 1024 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1025 m->flags |= PG_WANTED; 1026 tsleep(m, PVM, msg, 0); 1027 slept = 1; 1028 } 1029 splx(s); 1030 } 1031 return slept; 1032 } 1033 1034 /* 1035 * vm_page_activate: 1036 * 1037 * Put the specified page on the active list (if appropriate). 1038 * 1039 * The page queues must be locked. 1040 */ 1041 void 1042 vm_page_activate(m) 1043 register vm_page_t m; 1044 { 1045 int s; 1046 vm_page_t np; 1047 vm_object_t object; 1048 1049 s = splvm(); 1050 if (m->queue != PQ_ACTIVE) { 1051 if ((m->queue - m->pc) == PQ_CACHE) 1052 cnt.v_reactivated++; 1053 1054 vm_page_unqueue(m); 1055 1056 if (m->wire_count == 0) { 1057 m->queue = PQ_ACTIVE; 1058 ++(*vm_page_queues[PQ_ACTIVE].lcnt); 1059 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1060 if (m->act_count < ACT_INIT) 1061 m->act_count = ACT_INIT; 1062 cnt.v_active_count++; 1063 } 1064 } else { 1065 if (m->act_count < ACT_INIT) 1066 m->act_count = ACT_INIT; 1067 } 1068 1069 splx(s); 1070 } 1071 1072 /* 1073 * helper routine for vm_page_free and vm_page_free_zero 1074 */ 1075 static int 1076 vm_page_freechk_and_unqueue(m) 1077 vm_page_t m; 1078 { 1079 vm_object_t oldobject; 1080 1081 oldobject = m->object; 1082 1083 #if !defined(MAX_PERF) 1084 if (m->busy || ((m->queue - m->pc) == PQ_FREE) || 1085 (m->hold_count != 0)) { 1086 printf("vm_page_free: pindex(%ld), busy(%d), PG_BUSY(%d), hold(%d)\n", 1087 m->pindex, m->busy, 1088 (m->flags & PG_BUSY) ? 1 : 0, m->hold_count); 1089 if ((m->queue - m->pc) == PQ_FREE) 1090 panic("vm_page_free: freeing free page"); 1091 else 1092 panic("vm_page_free: freeing busy page"); 1093 } 1094 #endif 1095 1096 vm_page_unqueue_nowakeup(m); 1097 vm_page_remove(m); 1098 1099 if ((m->flags & PG_FICTITIOUS) != 0) { 1100 return 0; 1101 } 1102 1103 m->valid = 0; 1104 1105 if (m->wire_count != 0) { 1106 #if !defined(MAX_PERF) 1107 if (m->wire_count > 1) { 1108 panic("vm_page_free: invalid wire count (%d), pindex: 0x%x", 1109 m->wire_count, m->pindex); 1110 } 1111 #endif 1112 m->wire_count = 0; 1113 if (m->object) 1114 m->object->wire_count--; 1115 cnt.v_wire_count--; 1116 } 1117 1118 if (oldobject && (oldobject->type == OBJT_VNODE) && 1119 ((oldobject->flags & OBJ_DEAD) == 0)) { 1120 struct vnode *vp; 1121 vp = (struct vnode *) oldobject->handle; 1122 if (vp && VSHOULDFREE(vp)) { 1123 if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) { 1124 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 1125 vp->v_flag |= VTBFREE; 1126 } 1127 } 1128 } 1129 1130 return 1; 1131 } 1132 1133 /* 1134 * helper routine for vm_page_free and vm_page_free_zero 1135 */ 1136 static __inline void 1137 vm_page_free_wakeup() 1138 { 1139 1140 /* 1141 * if pageout daemon needs pages, then tell it that there are 1142 * some free. 1143 */ 1144 if (vm_pageout_pages_needed) { 1145 wakeup(&vm_pageout_pages_needed); 1146 vm_pageout_pages_needed = 0; 1147 } 1148 /* 1149 * wakeup processes that are waiting on memory if we hit a 1150 * high water mark. And wakeup scheduler process if we have 1151 * lots of memory. this process will swapin processes. 1152 */ 1153 if (vm_pages_needed && 1154 ((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) { 1155 wakeup(&cnt.v_free_count); 1156 vm_pages_needed = 0; 1157 } 1158 } 1159 1160 /* 1161 * vm_page_free: 1162 * 1163 * Returns the given page to the free list, 1164 * disassociating it with any VM object. 1165 * 1166 * Object and page must be locked prior to entry. 1167 */ 1168 void 1169 vm_page_free(m) 1170 register vm_page_t m; 1171 { 1172 int s; 1173 struct vpgqueues *pq; 1174 1175 s = splvm(); 1176 1177 cnt.v_tfree++; 1178 1179 if (!vm_page_freechk_and_unqueue(m)) { 1180 splx(s); 1181 return; 1182 } 1183 1184 m->queue = PQ_FREE + m->pc; 1185 pq = &vm_page_queues[m->queue]; 1186 ++(*pq->lcnt); 1187 ++(*pq->cnt); 1188 /* 1189 * If the pageout process is grabbing the page, it is likely 1190 * that the page is NOT in the cache. It is more likely that 1191 * the page will be partially in the cache if it is being 1192 * explicitly freed. 1193 */ 1194 if (curproc == pageproc) { 1195 TAILQ_INSERT_TAIL(pq->pl, m, pageq); 1196 } else { 1197 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1198 } 1199 1200 vm_page_free_wakeup(); 1201 splx(s); 1202 } 1203 1204 void 1205 vm_page_free_zero(m) 1206 register vm_page_t m; 1207 { 1208 int s; 1209 struct vpgqueues *pq; 1210 1211 s = splvm(); 1212 1213 cnt.v_tfree++; 1214 1215 if (!vm_page_freechk_and_unqueue(m)) { 1216 splx(s); 1217 return; 1218 } 1219 1220 m->queue = PQ_ZERO + m->pc; 1221 pq = &vm_page_queues[m->queue]; 1222 ++(*pq->lcnt); 1223 ++(*pq->cnt); 1224 1225 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1226 ++vm_page_zero_count; 1227 vm_page_free_wakeup(); 1228 splx(s); 1229 } 1230 1231 /* 1232 * vm_page_wire: 1233 * 1234 * Mark this page as wired down by yet 1235 * another map, removing it from paging queues 1236 * as necessary. 1237 * 1238 * The page queues must be locked. 1239 */ 1240 void 1241 vm_page_wire(m) 1242 register vm_page_t m; 1243 { 1244 int s; 1245 1246 if (m->wire_count == 0) { 1247 s = splvm(); 1248 vm_page_unqueue(m); 1249 splx(s); 1250 cnt.v_wire_count++; 1251 if (m->object) 1252 m->object->wire_count++; 1253 } 1254 (*vm_page_queues[PQ_NONE].lcnt)++; 1255 m->wire_count++; 1256 m->flags |= PG_MAPPED; 1257 } 1258 1259 /* 1260 * vm_page_unwire: 1261 * 1262 * Release one wiring of this page, potentially 1263 * enabling it to be paged again. 1264 * 1265 * The page queues must be locked. 1266 */ 1267 void 1268 vm_page_unwire(m) 1269 register vm_page_t m; 1270 { 1271 int s; 1272 1273 s = splvm(); 1274 1275 if (m->wire_count > 0) { 1276 m->wire_count--; 1277 if (m->wire_count == 0) { 1278 if (m->object) 1279 m->object->wire_count--; 1280 cnt.v_wire_count--; 1281 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1282 m->queue = PQ_ACTIVE; 1283 (*vm_page_queues[PQ_ACTIVE].lcnt)++; 1284 cnt.v_active_count++; 1285 } 1286 } else { 1287 #if !defined(MAX_PERF) 1288 panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count); 1289 #endif 1290 } 1291 splx(s); 1292 } 1293 1294 1295 /* 1296 * vm_page_deactivate: 1297 * 1298 * Returns the given page to the inactive list, 1299 * indicating that no physical maps have access 1300 * to this page. [Used by the physical mapping system.] 1301 * 1302 * The page queues must be locked. 1303 */ 1304 void 1305 vm_page_deactivate(m) 1306 register vm_page_t m; 1307 { 1308 int s; 1309 1310 /* 1311 * Only move active pages -- ignore locked or already inactive ones. 1312 * 1313 * XXX: sometimes we get pages which aren't wired down or on any queue - 1314 * we need to put them on the inactive queue also, otherwise we lose 1315 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93. 1316 */ 1317 if (m->queue == PQ_INACTIVE) 1318 return; 1319 1320 s = splvm(); 1321 if (m->wire_count == 0) { 1322 if ((m->queue - m->pc) == PQ_CACHE) 1323 cnt.v_reactivated++; 1324 vm_page_unqueue(m); 1325 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 1326 m->queue = PQ_INACTIVE; 1327 ++(*vm_page_queues[PQ_INACTIVE].lcnt); 1328 cnt.v_inactive_count++; 1329 } 1330 splx(s); 1331 } 1332 1333 /* 1334 * vm_page_cache 1335 * 1336 * Put the specified page onto the page cache queue (if appropriate). 1337 */ 1338 void 1339 vm_page_cache(m) 1340 register vm_page_t m; 1341 { 1342 int s; 1343 1344 #if !defined(MAX_PERF) 1345 if ((m->flags & PG_BUSY) || m->busy || m->wire_count) { 1346 printf("vm_page_cache: attempting to cache busy page\n"); 1347 return; 1348 } 1349 #endif 1350 if ((m->queue - m->pc) == PQ_CACHE) 1351 return; 1352 1353 vm_page_protect(m, VM_PROT_NONE); 1354 #if !defined(MAX_PERF) 1355 if (m->dirty != 0) { 1356 panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex); 1357 } 1358 #endif 1359 s = splvm(); 1360 vm_page_unqueue_nowakeup(m); 1361 m->queue = PQ_CACHE + m->pc; 1362 (*vm_page_queues[m->queue].lcnt)++; 1363 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 1364 cnt.v_cache_count++; 1365 m->object->cache_count++; 1366 vm_page_free_wakeup(); 1367 splx(s); 1368 } 1369 1370 /* 1371 * Grab a page, waiting until we are waken up due to the page 1372 * changing state. We keep on waiting, if the page continues 1373 * to be in the object. If the page doesn't exist, allocate it. 1374 */ 1375 vm_page_t 1376 vm_page_grab(object, pindex, allocflags) 1377 vm_object_t object; 1378 vm_pindex_t pindex; 1379 int allocflags; 1380 { 1381 1382 vm_page_t m; 1383 int s, generation; 1384 1385 retrylookup: 1386 if ((m = vm_page_lookup(object, pindex)) != NULL) { 1387 if (m->busy || (m->flags & PG_BUSY)) { 1388 generation = object->generation; 1389 1390 s = splvm(); 1391 while ((object->generation == generation) && 1392 (m->busy || (m->flags & PG_BUSY))) { 1393 m->flags |= PG_WANTED | PG_REFERENCED; 1394 tsleep(m, PVM, "pgrbwt", 0); 1395 if ((allocflags & VM_ALLOC_RETRY) == 0) { 1396 splx(s); 1397 return NULL; 1398 } 1399 } 1400 splx(s); 1401 goto retrylookup; 1402 } else { 1403 m->flags |= PG_BUSY; 1404 return m; 1405 } 1406 } 1407 1408 m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY); 1409 if (m == NULL) { 1410 VM_WAIT; 1411 if ((allocflags & VM_ALLOC_RETRY) == 0) 1412 return NULL; 1413 goto retrylookup; 1414 } 1415 1416 return m; 1417 } 1418 1419 /* 1420 * mapping function for valid bits or for dirty bits in 1421 * a page 1422 */ 1423 inline int 1424 vm_page_bits(int base, int size) 1425 { 1426 u_short chunk; 1427 1428 if ((base == 0) && (size >= PAGE_SIZE)) 1429 return VM_PAGE_BITS_ALL; 1430 1431 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1432 if (size > PAGE_SIZE - base) { 1433 size = PAGE_SIZE - base; 1434 } 1435 1436 base = (base % PAGE_SIZE) / DEV_BSIZE; 1437 chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE]; 1438 return (chunk << base) & VM_PAGE_BITS_ALL; 1439 } 1440 1441 /* 1442 * set a page valid and clean 1443 */ 1444 void 1445 vm_page_set_validclean(m, base, size) 1446 vm_page_t m; 1447 int base; 1448 int size; 1449 { 1450 int pagebits = vm_page_bits(base, size); 1451 m->valid |= pagebits; 1452 m->dirty &= ~pagebits; 1453 if( base == 0 && size == PAGE_SIZE) 1454 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1455 } 1456 1457 /* 1458 * set a page (partially) invalid 1459 */ 1460 void 1461 vm_page_set_invalid(m, base, size) 1462 vm_page_t m; 1463 int base; 1464 int size; 1465 { 1466 int bits; 1467 1468 m->valid &= ~(bits = vm_page_bits(base, size)); 1469 if (m->valid == 0) 1470 m->dirty &= ~bits; 1471 m->object->generation++; 1472 } 1473 1474 /* 1475 * is (partial) page valid? 1476 */ 1477 int 1478 vm_page_is_valid(m, base, size) 1479 vm_page_t m; 1480 int base; 1481 int size; 1482 { 1483 int bits = vm_page_bits(base, size); 1484 1485 if (m->valid && ((m->valid & bits) == bits)) 1486 return 1; 1487 else 1488 return 0; 1489 } 1490 1491 void 1492 vm_page_test_dirty(m) 1493 vm_page_t m; 1494 { 1495 if ((m->dirty != VM_PAGE_BITS_ALL) && 1496 pmap_is_modified(VM_PAGE_TO_PHYS(m))) { 1497 m->dirty = VM_PAGE_BITS_ALL; 1498 } 1499 } 1500 1501 /* 1502 * This interface is for merging with malloc() someday. 1503 * Even if we never implement compaction so that contiguous allocation 1504 * works after initialization time, malloc()'s data structures are good 1505 * for statistics and for allocations of less than a page. 1506 */ 1507 void * 1508 contigmalloc1(size, type, flags, low, high, alignment, boundary, map) 1509 unsigned long size; /* should be size_t here and for malloc() */ 1510 struct malloc_type *type; 1511 int flags; 1512 unsigned long low; 1513 unsigned long high; 1514 unsigned long alignment; 1515 unsigned long boundary; 1516 vm_map_t map; 1517 { 1518 int i, s, start; 1519 vm_offset_t addr, phys, tmp_addr; 1520 int pass; 1521 vm_page_t pga = vm_page_array; 1522 1523 size = round_page(size); 1524 #if !defined(MAX_PERF) 1525 if (size == 0) 1526 panic("contigmalloc1: size must not be 0"); 1527 if ((alignment & (alignment - 1)) != 0) 1528 panic("contigmalloc1: alignment must be a power of 2"); 1529 if ((boundary & (boundary - 1)) != 0) 1530 panic("contigmalloc1: boundary must be a power of 2"); 1531 #endif 1532 1533 start = 0; 1534 for (pass = 0; pass <= 1; pass++) { 1535 s = splvm(); 1536 again: 1537 /* 1538 * Find first page in array that is free, within range, aligned, and 1539 * such that the boundary won't be crossed. 1540 */ 1541 for (i = start; i < cnt.v_page_count; i++) { 1542 int pqtype; 1543 phys = VM_PAGE_TO_PHYS(&pga[i]); 1544 pqtype = pga[i].queue - pga[i].pc; 1545 if (((pqtype == PQ_ZERO) || (pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 1546 (phys >= low) && (phys < high) && 1547 ((phys & (alignment - 1)) == 0) && 1548 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) 1549 break; 1550 } 1551 1552 /* 1553 * If the above failed or we will exceed the upper bound, fail. 1554 */ 1555 if ((i == cnt.v_page_count) || 1556 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 1557 vm_page_t m, next; 1558 1559 again1: 1560 for (m = TAILQ_FIRST(&vm_page_queue_inactive); 1561 m != NULL; 1562 m = next) { 1563 1564 if (m->queue != PQ_INACTIVE) { 1565 break; 1566 } 1567 1568 next = TAILQ_NEXT(m, pageq); 1569 if (vm_page_sleep(m, "vpctw0", &m->busy)) 1570 goto again1; 1571 vm_page_test_dirty(m); 1572 if (m->dirty) { 1573 if (m->object->type == OBJT_VNODE) { 1574 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1575 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1576 VOP_UNLOCK(m->object->handle, 0, curproc); 1577 goto again1; 1578 } else if (m->object->type == OBJT_SWAP || 1579 m->object->type == OBJT_DEFAULT) { 1580 vm_pageout_flush(&m, 1, 0); 1581 goto again1; 1582 } 1583 } 1584 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1585 vm_page_cache(m); 1586 } 1587 1588 for (m = TAILQ_FIRST(&vm_page_queue_active); 1589 m != NULL; 1590 m = next) { 1591 1592 if (m->queue != PQ_ACTIVE) { 1593 break; 1594 } 1595 1596 next = TAILQ_NEXT(m, pageq); 1597 if (vm_page_sleep(m, "vpctw1", &m->busy)) 1598 goto again1; 1599 vm_page_test_dirty(m); 1600 if (m->dirty) { 1601 if (m->object->type == OBJT_VNODE) { 1602 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1603 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1604 VOP_UNLOCK(m->object->handle, 0, curproc); 1605 goto again1; 1606 } else if (m->object->type == OBJT_SWAP || 1607 m->object->type == OBJT_DEFAULT) { 1608 vm_pageout_flush(&m, 1, 0); 1609 goto again1; 1610 } 1611 } 1612 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1613 vm_page_cache(m); 1614 } 1615 1616 splx(s); 1617 continue; 1618 } 1619 start = i; 1620 1621 /* 1622 * Check successive pages for contiguous and free. 1623 */ 1624 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 1625 int pqtype; 1626 pqtype = pga[i].queue - pga[i].pc; 1627 if ((VM_PAGE_TO_PHYS(&pga[i]) != 1628 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || 1629 ((pqtype != PQ_ZERO) && (pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) { 1630 start++; 1631 goto again; 1632 } 1633 } 1634 1635 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1636 int pqtype; 1637 vm_page_t m = &pga[i]; 1638 1639 pqtype = m->queue - m->pc; 1640 if (pqtype == PQ_CACHE) { 1641 m->flags |= PG_BUSY; 1642 vm_page_free(m); 1643 } 1644 1645 TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq); 1646 (*vm_page_queues[m->queue].lcnt)--; 1647 cnt.v_free_count--; 1648 m->valid = VM_PAGE_BITS_ALL; 1649 m->flags = 0; 1650 m->dirty = 0; 1651 m->wire_count = 0; 1652 m->busy = 0; 1653 m->queue = PQ_NONE; 1654 m->object = NULL; 1655 vm_page_wire(m); 1656 } 1657 1658 /* 1659 * We've found a contiguous chunk that meets are requirements. 1660 * Allocate kernel VM, unfree and assign the physical pages to it and 1661 * return kernel VM pointer. 1662 */ 1663 tmp_addr = addr = kmem_alloc_pageable(map, size); 1664 if (addr == 0) { 1665 /* 1666 * XXX We almost never run out of kernel virtual 1667 * space, so we don't make the allocated memory 1668 * above available. 1669 */ 1670 splx(s); 1671 return (NULL); 1672 } 1673 1674 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1675 vm_page_t m = &pga[i]; 1676 vm_page_insert(m, kernel_object, 1677 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); 1678 pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); 1679 tmp_addr += PAGE_SIZE; 1680 } 1681 1682 splx(s); 1683 return ((void *)addr); 1684 } 1685 return NULL; 1686 } 1687 1688 void * 1689 contigmalloc(size, type, flags, low, high, alignment, boundary) 1690 unsigned long size; /* should be size_t here and for malloc() */ 1691 struct malloc_type *type; 1692 int flags; 1693 unsigned long low; 1694 unsigned long high; 1695 unsigned long alignment; 1696 unsigned long boundary; 1697 { 1698 return contigmalloc1(size, type, flags, low, high, alignment, boundary, 1699 kernel_map); 1700 } 1701 1702 vm_offset_t 1703 vm_page_alloc_contig(size, low, high, alignment) 1704 vm_offset_t size; 1705 vm_offset_t low; 1706 vm_offset_t high; 1707 vm_offset_t alignment; 1708 { 1709 return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high, 1710 alignment, 0ul, kernel_map)); 1711 } 1712 1713 #include "opt_ddb.h" 1714 #ifdef DDB 1715 #include <sys/kernel.h> 1716 1717 #include <ddb/ddb.h> 1718 1719 DB_SHOW_COMMAND(page, vm_page_print_page_info) 1720 { 1721 db_printf("cnt.v_free_count: %d\n", cnt.v_free_count); 1722 db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); 1723 db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); 1724 db_printf("cnt.v_active_count: %d\n", cnt.v_active_count); 1725 db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); 1726 db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); 1727 db_printf("cnt.v_free_min: %d\n", cnt.v_free_min); 1728 db_printf("cnt.v_free_target: %d\n", cnt.v_free_target); 1729 db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); 1730 db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); 1731 } 1732 1733 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 1734 { 1735 int i; 1736 db_printf("PQ_FREE:"); 1737 for(i=0;i<PQ_L2_SIZE;i++) { 1738 db_printf(" %d", *vm_page_queues[PQ_FREE + i].lcnt); 1739 } 1740 db_printf("\n"); 1741 1742 db_printf("PQ_CACHE:"); 1743 for(i=0;i<PQ_L2_SIZE;i++) { 1744 db_printf(" %d", *vm_page_queues[PQ_CACHE + i].lcnt); 1745 } 1746 db_printf("\n"); 1747 1748 db_printf("PQ_ZERO:"); 1749 for(i=0;i<PQ_L2_SIZE;i++) { 1750 db_printf(" %d", *vm_page_queues[PQ_ZERO + i].lcnt); 1751 } 1752 db_printf("\n"); 1753 1754 db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", 1755 *vm_page_queues[PQ_ACTIVE].lcnt, 1756 *vm_page_queues[PQ_INACTIVE].lcnt); 1757 } 1758 #endif /* DDB */ 1759