1 /* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * The Mach Operating System project at Carnegie-Mellon University. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 37 * $Id: vm_page.c,v 1.106 1998/08/24 08:39:38 dfr Exp $ 38 */ 39 40 /* 41 * Copyright (c) 1987, 1990 Carnegie-Mellon University. 42 * All rights reserved. 43 * 44 * Authors: Avadis Tevanian, Jr., Michael Wayne Young 45 * 46 * Permission to use, copy, modify and distribute this software and 47 * its documentation is hereby granted, provided that both the copyright 48 * notice and this permission notice appear in all copies of the 49 * software, derivative works or modified versions, and any portions 50 * thereof, and that both notices appear in supporting documentation. 51 * 52 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 53 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 54 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 55 * 56 * Carnegie Mellon requests users of this software to return to 57 * 58 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 59 * School of Computer Science 60 * Carnegie Mellon University 61 * Pittsburgh PA 15213-3890 62 * 63 * any improvements or extensions that they make and grant Carnegie the 64 * rights to redistribute these changes. 65 */ 66 67 /* 68 * Resident memory management module. 69 */ 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/malloc.h> 74 #include <sys/proc.h> 75 #include <sys/vmmeter.h> 76 #include <sys/vnode.h> 77 78 #include <vm/vm.h> 79 #include <vm/vm_param.h> 80 #include <vm/vm_prot.h> 81 #include <sys/lock.h> 82 #include <vm/vm_kern.h> 83 #include <vm/vm_object.h> 84 #include <vm/vm_page.h> 85 #include <vm/vm_pageout.h> 86 #include <vm/vm_extern.h> 87 88 static void vm_page_queue_init __P((void)); 89 static vm_page_t vm_page_select_free __P((vm_object_t object, 90 vm_pindex_t pindex, int prefqueue)); 91 static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t)); 92 93 /* 94 * Associated with page of user-allocatable memory is a 95 * page structure. 96 */ 97 98 static struct pglist *vm_page_buckets; /* Array of buckets */ 99 static int vm_page_bucket_count; /* How big is array? */ 100 static int vm_page_hash_mask; /* Mask for hash function */ 101 static volatile int vm_page_bucket_generation; 102 103 struct pglist vm_page_queue_free[PQ_L2_SIZE] = {0}; 104 struct pglist vm_page_queue_zero[PQ_L2_SIZE] = {0}; 105 struct pglist vm_page_queue_active = {0}; 106 struct pglist vm_page_queue_inactive = {0}; 107 struct pglist vm_page_queue_cache[PQ_L2_SIZE] = {0}; 108 109 static int no_queue=0; 110 111 struct vpgqueues vm_page_queues[PQ_COUNT] = {0}; 112 static int pqcnt[PQ_COUNT] = {0}; 113 114 static void 115 vm_page_queue_init(void) { 116 int i; 117 118 vm_page_queues[PQ_NONE].pl = NULL; 119 vm_page_queues[PQ_NONE].cnt = &no_queue; 120 for(i=0;i<PQ_L2_SIZE;i++) { 121 vm_page_queues[PQ_FREE+i].pl = &vm_page_queue_free[i]; 122 vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count; 123 } 124 for(i=0;i<PQ_L2_SIZE;i++) { 125 vm_page_queues[PQ_ZERO+i].pl = &vm_page_queue_zero[i]; 126 vm_page_queues[PQ_ZERO+i].cnt = &cnt.v_free_count; 127 } 128 vm_page_queues[PQ_INACTIVE].pl = &vm_page_queue_inactive; 129 vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count; 130 131 vm_page_queues[PQ_ACTIVE].pl = &vm_page_queue_active; 132 vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count; 133 for(i=0;i<PQ_L2_SIZE;i++) { 134 vm_page_queues[PQ_CACHE+i].pl = &vm_page_queue_cache[i]; 135 vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count; 136 } 137 for(i=0;i<PQ_COUNT;i++) { 138 if (vm_page_queues[i].pl) { 139 TAILQ_INIT(vm_page_queues[i].pl); 140 } else if (i != 0) { 141 panic("vm_page_queue_init: queue %d is null", i); 142 } 143 vm_page_queues[i].lcnt = &pqcnt[i]; 144 } 145 } 146 147 vm_page_t vm_page_array = 0; 148 static int vm_page_array_size = 0; 149 long first_page = 0; 150 static long last_page; 151 static vm_size_t page_mask; 152 static int page_shift; 153 int vm_page_zero_count = 0; 154 155 /* 156 * map of contiguous valid DEV_BSIZE chunks in a page 157 * (this list is valid for page sizes upto 16*DEV_BSIZE) 158 */ 159 static u_short vm_page_dev_bsize_chunks[] = { 160 0x0, 0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 161 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff 162 }; 163 164 static __inline int vm_page_hash __P((vm_object_t object, vm_pindex_t pindex)); 165 static int vm_page_freechk_and_unqueue __P((vm_page_t m)); 166 static void vm_page_free_wakeup __P((void)); 167 168 /* 169 * vm_set_page_size: 170 * 171 * Sets the page size, perhaps based upon the memory 172 * size. Must be called before any use of page-size 173 * dependent functions. 174 * 175 * Sets page_shift and page_mask from cnt.v_page_size. 176 */ 177 void 178 vm_set_page_size() 179 { 180 181 if (cnt.v_page_size == 0) 182 cnt.v_page_size = DEFAULT_PAGE_SIZE; 183 page_mask = cnt.v_page_size - 1; 184 if ((page_mask & cnt.v_page_size) != 0) 185 panic("vm_set_page_size: page size not a power of two"); 186 for (page_shift = 0;; page_shift++) 187 if ((1 << page_shift) == cnt.v_page_size) 188 break; 189 } 190 191 /* 192 * vm_page_startup: 193 * 194 * Initializes the resident memory module. 195 * 196 * Allocates memory for the page cells, and 197 * for the object/offset-to-page hash table headers. 198 * Each page cell is initialized and placed on the free list. 199 */ 200 201 vm_offset_t 202 vm_page_startup(starta, enda, vaddr) 203 register vm_offset_t starta; 204 vm_offset_t enda; 205 register vm_offset_t vaddr; 206 { 207 register vm_offset_t mapped; 208 register vm_page_t m; 209 register struct pglist *bucket; 210 vm_size_t npages, page_range; 211 register vm_offset_t new_start; 212 int i; 213 vm_offset_t pa; 214 int nblocks; 215 vm_offset_t first_managed_page; 216 217 /* the biggest memory array is the second group of pages */ 218 vm_offset_t start; 219 vm_offset_t biggestone, biggestsize; 220 221 vm_offset_t total; 222 223 total = 0; 224 biggestsize = 0; 225 biggestone = 0; 226 nblocks = 0; 227 vaddr = round_page(vaddr); 228 229 for (i = 0; phys_avail[i + 1]; i += 2) { 230 phys_avail[i] = round_page(phys_avail[i]); 231 phys_avail[i + 1] = trunc_page(phys_avail[i + 1]); 232 } 233 234 for (i = 0; phys_avail[i + 1]; i += 2) { 235 int size = phys_avail[i + 1] - phys_avail[i]; 236 237 if (size > biggestsize) { 238 biggestone = i; 239 biggestsize = size; 240 } 241 ++nblocks; 242 total += size; 243 } 244 245 start = phys_avail[biggestone]; 246 247 /* 248 * Initialize the queue headers for the free queue, the active queue 249 * and the inactive queue. 250 */ 251 252 vm_page_queue_init(); 253 254 /* 255 * Allocate (and initialize) the hash table buckets. 256 * 257 * The number of buckets MUST BE a power of 2, and the actual value is 258 * the next power of 2 greater than the number of physical pages in 259 * the system. 260 * 261 * Note: This computation can be tweaked if desired. 262 */ 263 vm_page_buckets = (struct pglist *) vaddr; 264 bucket = vm_page_buckets; 265 if (vm_page_bucket_count == 0) { 266 vm_page_bucket_count = 1; 267 while (vm_page_bucket_count < atop(total)) 268 vm_page_bucket_count <<= 1; 269 } 270 vm_page_hash_mask = vm_page_bucket_count - 1; 271 272 /* 273 * Validate these addresses. 274 */ 275 276 new_start = start + vm_page_bucket_count * sizeof(struct pglist); 277 new_start = round_page(new_start); 278 mapped = round_page(vaddr); 279 vaddr = pmap_map(mapped, start, new_start, 280 VM_PROT_READ | VM_PROT_WRITE); 281 start = new_start; 282 vaddr = round_page(vaddr); 283 bzero((caddr_t) mapped, vaddr - mapped); 284 285 for (i = 0; i < vm_page_bucket_count; i++) { 286 TAILQ_INIT(bucket); 287 bucket++; 288 } 289 290 /* 291 * Compute the number of pages of memory that will be available for 292 * use (taking into account the overhead of a page structure per 293 * page). 294 */ 295 296 first_page = phys_avail[0] / PAGE_SIZE; 297 last_page = phys_avail[(nblocks - 1) * 2 + 1] / PAGE_SIZE; 298 299 page_range = last_page - (phys_avail[0] / PAGE_SIZE); 300 npages = (total - (page_range * sizeof(struct vm_page)) - 301 (start - phys_avail[biggestone])) / PAGE_SIZE; 302 303 /* 304 * Initialize the mem entry structures now, and put them in the free 305 * queue. 306 */ 307 vm_page_array = (vm_page_t) vaddr; 308 mapped = vaddr; 309 310 /* 311 * Validate these addresses. 312 */ 313 new_start = round_page(start + page_range * sizeof(struct vm_page)); 314 mapped = pmap_map(mapped, start, new_start, 315 VM_PROT_READ | VM_PROT_WRITE); 316 start = new_start; 317 318 first_managed_page = start / PAGE_SIZE; 319 320 /* 321 * Clear all of the page structures 322 */ 323 bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page)); 324 vm_page_array_size = page_range; 325 326 cnt.v_page_count = 0; 327 cnt.v_free_count = 0; 328 for (i = 0; phys_avail[i + 1] && npages > 0; i += 2) { 329 if (i == biggestone) 330 pa = ptoa(first_managed_page); 331 else 332 pa = phys_avail[i]; 333 while (pa < phys_avail[i + 1] && npages-- > 0) { 334 ++cnt.v_page_count; 335 ++cnt.v_free_count; 336 m = PHYS_TO_VM_PAGE(pa); 337 m->phys_addr = pa; 338 m->flags = 0; 339 m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK; 340 m->queue = m->pc + PQ_FREE; 341 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 342 ++(*vm_page_queues[m->queue].lcnt); 343 pa += PAGE_SIZE; 344 } 345 } 346 return (mapped); 347 } 348 349 /* 350 * vm_page_hash: 351 * 352 * Distributes the object/offset key pair among hash buckets. 353 * 354 * NOTE: This macro depends on vm_page_bucket_count being a power of 2. 355 */ 356 static __inline int 357 vm_page_hash(object, pindex) 358 vm_object_t object; 359 vm_pindex_t pindex; 360 { 361 return ((((uintptr_t) object) >> 5) + (pindex >> 1)) & vm_page_hash_mask; 362 } 363 364 /* 365 * vm_page_insert: [ internal use only ] 366 * 367 * Inserts the given mem entry into the object/object-page 368 * table and object list. 369 * 370 * The object and page must be locked, and must be splhigh. 371 */ 372 373 void 374 vm_page_insert(m, object, pindex) 375 register vm_page_t m; 376 register vm_object_t object; 377 register vm_pindex_t pindex; 378 { 379 register struct pglist *bucket; 380 381 #if !defined(MAX_PERF) 382 if (m->flags & PG_TABLED) 383 panic("vm_page_insert: already inserted"); 384 #endif 385 386 /* 387 * Record the object/offset pair in this page 388 */ 389 390 m->object = object; 391 m->pindex = pindex; 392 393 /* 394 * Insert it into the object_object/offset hash table 395 */ 396 397 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 398 TAILQ_INSERT_TAIL(bucket, m, hashq); 399 vm_page_bucket_generation++; 400 401 /* 402 * Now link into the object's list of backed pages. 403 */ 404 405 TAILQ_INSERT_TAIL(&object->memq, m, listq); 406 vm_page_flag_set(m, PG_TABLED); 407 m->object->page_hint = m; 408 m->object->generation++; 409 410 if (m->wire_count) 411 object->wire_count++; 412 413 if ((m->queue - m->pc) == PQ_CACHE) 414 object->cache_count++; 415 416 /* 417 * And show that the object has one more resident page. 418 */ 419 420 object->resident_page_count++; 421 } 422 423 /* 424 * vm_page_remove: [ internal use only ] 425 * NOTE: used by device pager as well -wfj 426 * 427 * Removes the given mem entry from the object/offset-page 428 * table and the object page list. 429 * 430 * The object and page must be locked, and at splhigh. 431 */ 432 433 void 434 vm_page_remove(m) 435 register vm_page_t m; 436 { 437 register struct pglist *bucket; 438 vm_object_t object; 439 440 if (!(m->flags & PG_TABLED)) 441 return; 442 443 #if !defined(MAX_PERF) 444 if ((m->flags & PG_BUSY) == 0) { 445 panic("vm_page_remove: page not busy"); 446 } 447 #endif 448 449 vm_page_flag_clear(m, PG_BUSY); 450 if (m->flags & PG_WANTED) { 451 vm_page_flag_clear(m, PG_WANTED); 452 wakeup(m); 453 } 454 455 object = m->object; 456 if (object->page_hint == m) 457 object->page_hint = NULL; 458 459 if (m->wire_count) 460 object->wire_count--; 461 462 if ((m->queue - m->pc) == PQ_CACHE) 463 object->cache_count--; 464 465 /* 466 * Remove from the object_object/offset hash table 467 */ 468 469 bucket = &vm_page_buckets[vm_page_hash(m->object, m->pindex)]; 470 TAILQ_REMOVE(bucket, m, hashq); 471 vm_page_bucket_generation++; 472 473 /* 474 * Now remove from the object's list of backed pages. 475 */ 476 477 TAILQ_REMOVE(&object->memq, m, listq); 478 479 /* 480 * And show that the object has one fewer resident page. 481 */ 482 483 object->resident_page_count--; 484 object->generation++; 485 m->object = NULL; 486 487 vm_page_flag_clear(m, PG_TABLED); 488 } 489 490 /* 491 * vm_page_lookup: 492 * 493 * Returns the page associated with the object/offset 494 * pair specified; if none is found, NULL is returned. 495 * 496 * The object must be locked. No side effects. 497 */ 498 499 vm_page_t 500 vm_page_lookup(object, pindex) 501 register vm_object_t object; 502 register vm_pindex_t pindex; 503 { 504 register vm_page_t m; 505 register struct pglist *bucket; 506 int generation; 507 int s; 508 509 /* 510 * Search the hash table for this object/offset pair 511 */ 512 513 if (object->page_hint && (object->page_hint->pindex == pindex) && 514 (object->page_hint->object == object)) 515 return object->page_hint; 516 517 retry: 518 generation = vm_page_bucket_generation; 519 bucket = &vm_page_buckets[vm_page_hash(object, pindex)]; 520 for (m = TAILQ_FIRST(bucket); m != NULL; m = TAILQ_NEXT(m,hashq)) { 521 if ((m->object == object) && (m->pindex == pindex)) { 522 if (vm_page_bucket_generation != generation) 523 goto retry; 524 m->object->page_hint = m; 525 return (m); 526 } 527 } 528 if (vm_page_bucket_generation != generation) 529 goto retry; 530 return (NULL); 531 } 532 533 /* 534 * vm_page_rename: 535 * 536 * Move the given memory entry from its 537 * current object to the specified target object/offset. 538 * 539 * The object must be locked. 540 */ 541 void 542 vm_page_rename(m, new_object, new_pindex) 543 register vm_page_t m; 544 register vm_object_t new_object; 545 vm_pindex_t new_pindex; 546 { 547 int s; 548 549 s = splvm(); 550 vm_page_remove(m); 551 vm_page_insert(m, new_object, new_pindex); 552 splx(s); 553 } 554 555 /* 556 * vm_page_unqueue without any wakeup 557 */ 558 void 559 vm_page_unqueue_nowakeup(m) 560 vm_page_t m; 561 { 562 int queue = m->queue; 563 struct vpgqueues *pq; 564 if (queue != PQ_NONE) { 565 pq = &vm_page_queues[queue]; 566 m->queue = PQ_NONE; 567 TAILQ_REMOVE(pq->pl, m, pageq); 568 (*pq->cnt)--; 569 (*pq->lcnt)--; 570 if ((queue - m->pc) == PQ_CACHE) { 571 if (m->object) 572 m->object->cache_count--; 573 } 574 } 575 } 576 577 /* 578 * vm_page_unqueue must be called at splhigh(); 579 */ 580 void 581 vm_page_unqueue(m) 582 vm_page_t m; 583 { 584 int queue = m->queue; 585 struct vpgqueues *pq; 586 if (queue != PQ_NONE) { 587 m->queue = PQ_NONE; 588 pq = &vm_page_queues[queue]; 589 TAILQ_REMOVE(pq->pl, m, pageq); 590 (*pq->cnt)--; 591 (*pq->lcnt)--; 592 if ((queue - m->pc) == PQ_CACHE) { 593 if ((cnt.v_cache_count + cnt.v_free_count) < 594 (cnt.v_free_reserved + cnt.v_cache_min)) 595 pagedaemon_wakeup(); 596 if (m->object) 597 m->object->cache_count--; 598 } 599 } 600 } 601 602 /* 603 * Find a page on the specified queue with color optimization. 604 */ 605 vm_page_t 606 vm_page_list_find(basequeue, index) 607 int basequeue, index; 608 { 609 #if PQ_L2_SIZE > 1 610 611 int i,j; 612 vm_page_t m; 613 int hindex; 614 struct vpgqueues *pq; 615 616 pq = &vm_page_queues[basequeue]; 617 618 m = TAILQ_FIRST(pq[index].pl); 619 if (m) 620 return m; 621 622 for(j = 0; j < PQ_L1_SIZE; j++) { 623 int ij; 624 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 625 (ij = i + j) > 0; 626 i -= PQ_L1_SIZE) { 627 628 hindex = index + ij; 629 if (hindex >= PQ_L2_SIZE) 630 hindex -= PQ_L2_SIZE; 631 if (m = TAILQ_FIRST(pq[hindex].pl)) 632 return m; 633 634 hindex = index - ij; 635 if (hindex < 0) 636 hindex += PQ_L2_SIZE; 637 if (m = TAILQ_FIRST(pq[hindex].pl)) 638 return m; 639 } 640 } 641 642 hindex = index + PQ_L2_SIZE / 2; 643 if (hindex >= PQ_L2_SIZE) 644 hindex -= PQ_L2_SIZE; 645 m = TAILQ_FIRST(pq[hindex].pl); 646 if (m) 647 return m; 648 649 return NULL; 650 #else 651 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 652 #endif 653 654 } 655 656 /* 657 * Find a page on the specified queue with color optimization. 658 */ 659 vm_page_t 660 vm_page_select(object, pindex, basequeue) 661 vm_object_t object; 662 vm_pindex_t pindex; 663 int basequeue; 664 { 665 666 #if PQ_L2_SIZE > 1 667 int index; 668 index = (pindex + object->pg_color) & PQ_L2_MASK; 669 return vm_page_list_find(basequeue, index); 670 671 #else 672 return TAILQ_FIRST(vm_page_queues[basequeue].pl); 673 #endif 674 675 } 676 677 /* 678 * Find a page on the cache queue with color optimization. As pages 679 * might be found, but not applicable, they are deactivated. This 680 * keeps us from using potentially busy cached pages. 681 */ 682 vm_page_t 683 vm_page_select_cache(object, pindex) 684 vm_object_t object; 685 vm_pindex_t pindex; 686 { 687 vm_page_t m; 688 689 while (TRUE) { 690 #if PQ_L2_SIZE > 1 691 int index; 692 index = (pindex + object->pg_color) & PQ_L2_MASK; 693 m = vm_page_list_find(PQ_CACHE, index); 694 695 #else 696 m = TAILQ_FIRST(vm_page_queues[PQ_CACHE].pl); 697 #endif 698 if (m && ((m->flags & PG_BUSY) || m->busy || 699 m->hold_count || m->wire_count)) { 700 vm_page_deactivate(m); 701 continue; 702 } 703 return m; 704 } 705 } 706 707 /* 708 * Find a free or zero page, with specified preference. 709 */ 710 static vm_page_t 711 vm_page_select_free(object, pindex, prefqueue) 712 vm_object_t object; 713 vm_pindex_t pindex; 714 int prefqueue; 715 { 716 #if PQ_L2_SIZE > 1 717 int i,j; 718 int index, hindex; 719 #endif 720 vm_page_t m, mh; 721 int oqueuediff; 722 struct vpgqueues *pq; 723 724 if (prefqueue == PQ_ZERO) 725 oqueuediff = PQ_FREE - PQ_ZERO; 726 else 727 oqueuediff = PQ_ZERO - PQ_FREE; 728 729 if (mh = object->page_hint) { 730 if (mh->pindex == (pindex - 1)) { 731 if ((mh->flags & PG_FICTITIOUS) == 0) { 732 if ((mh < &vm_page_array[cnt.v_page_count-1]) && 733 (mh >= &vm_page_array[0])) { 734 int queue; 735 m = mh + 1; 736 if (VM_PAGE_TO_PHYS(m) == (VM_PAGE_TO_PHYS(mh) + PAGE_SIZE)) { 737 queue = m->queue - m->pc; 738 if (queue == PQ_FREE || queue == PQ_ZERO) { 739 return m; 740 } 741 } 742 } 743 } 744 } 745 } 746 747 pq = &vm_page_queues[prefqueue]; 748 749 #if PQ_L2_SIZE > 1 750 751 index = (pindex + object->pg_color) & PQ_L2_MASK; 752 753 if (m = TAILQ_FIRST(pq[index].pl)) 754 return m; 755 if (m = TAILQ_FIRST(pq[index + oqueuediff].pl)) 756 return m; 757 758 for(j = 0; j < PQ_L1_SIZE; j++) { 759 int ij; 760 for(i = (PQ_L2_SIZE / 2) - PQ_L1_SIZE; 761 (ij = i + j) >= 0; 762 i -= PQ_L1_SIZE) { 763 764 hindex = index + ij; 765 if (hindex >= PQ_L2_SIZE) 766 hindex -= PQ_L2_SIZE; 767 if (m = TAILQ_FIRST(pq[hindex].pl)) 768 return m; 769 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 770 return m; 771 772 hindex = index - ij; 773 if (hindex < 0) 774 hindex += PQ_L2_SIZE; 775 if (m = TAILQ_FIRST(pq[hindex].pl)) 776 return m; 777 if (m = TAILQ_FIRST(pq[hindex + oqueuediff].pl)) 778 return m; 779 } 780 } 781 782 hindex = index + PQ_L2_SIZE / 2; 783 if (hindex >= PQ_L2_SIZE) 784 hindex -= PQ_L2_SIZE; 785 if (m = TAILQ_FIRST(pq[hindex].pl)) 786 return m; 787 if (m = TAILQ_FIRST(pq[hindex+oqueuediff].pl)) 788 return m; 789 790 #else 791 if (m = TAILQ_FIRST(pq[0].pl)) 792 return m; 793 else 794 return TAILQ_FIRST(pq[oqueuediff].pl); 795 #endif 796 797 return NULL; 798 } 799 800 /* 801 * vm_page_alloc: 802 * 803 * Allocate and return a memory cell associated 804 * with this VM object/offset pair. 805 * 806 * page_req classes: 807 * VM_ALLOC_NORMAL normal process request 808 * VM_ALLOC_SYSTEM system *really* needs a page 809 * VM_ALLOC_INTERRUPT interrupt time request 810 * VM_ALLOC_ZERO zero page 811 * 812 * Object must be locked. 813 */ 814 vm_page_t 815 vm_page_alloc(object, pindex, page_req) 816 vm_object_t object; 817 vm_pindex_t pindex; 818 int page_req; 819 { 820 register vm_page_t m; 821 struct vpgqueues *pq; 822 vm_object_t oldobject; 823 int queue, qtype; 824 int s; 825 826 #ifdef DIAGNOSTIC 827 m = vm_page_lookup(object, pindex); 828 if (m) 829 panic("vm_page_alloc: page already allocated"); 830 #endif 831 832 if ((curproc == pageproc) && (page_req != VM_ALLOC_INTERRUPT)) { 833 page_req = VM_ALLOC_SYSTEM; 834 }; 835 836 s = splvm(); 837 838 switch (page_req) { 839 840 case VM_ALLOC_NORMAL: 841 if (cnt.v_free_count >= cnt.v_free_reserved) { 842 m = vm_page_select_free(object, pindex, PQ_FREE); 843 #if defined(DIAGNOSTIC) 844 if (m == NULL) 845 panic("vm_page_alloc(NORMAL): missing page on free queue\n"); 846 #endif 847 } else { 848 m = vm_page_select_cache(object, pindex); 849 if (m == NULL) { 850 splx(s); 851 #if defined(DIAGNOSTIC) 852 if (cnt.v_cache_count > 0) 853 printf("vm_page_alloc(NORMAL): missing pages on cache queue: %d\n", cnt.v_cache_count); 854 #endif 855 vm_pageout_deficit++; 856 pagedaemon_wakeup(); 857 return (NULL); 858 } 859 } 860 break; 861 862 case VM_ALLOC_ZERO: 863 if (cnt.v_free_count >= cnt.v_free_reserved) { 864 m = vm_page_select_free(object, pindex, PQ_ZERO); 865 #if defined(DIAGNOSTIC) 866 if (m == NULL) 867 panic("vm_page_alloc(ZERO): missing page on free queue\n"); 868 #endif 869 } else { 870 m = vm_page_select_cache(object, pindex); 871 if (m == NULL) { 872 splx(s); 873 #if defined(DIAGNOSTIC) 874 if (cnt.v_cache_count > 0) 875 printf("vm_page_alloc(ZERO): missing pages on cache queue: %d\n", cnt.v_cache_count); 876 #endif 877 vm_pageout_deficit++; 878 pagedaemon_wakeup(); 879 return (NULL); 880 } 881 } 882 break; 883 884 case VM_ALLOC_SYSTEM: 885 if ((cnt.v_free_count >= cnt.v_free_reserved) || 886 ((cnt.v_cache_count == 0) && 887 (cnt.v_free_count >= cnt.v_interrupt_free_min))) { 888 m = vm_page_select_free(object, pindex, PQ_FREE); 889 #if defined(DIAGNOSTIC) 890 if (m == NULL) 891 panic("vm_page_alloc(SYSTEM): missing page on free queue\n"); 892 #endif 893 } else { 894 m = vm_page_select_cache(object, pindex); 895 if (m == NULL) { 896 splx(s); 897 #if defined(DIAGNOSTIC) 898 if (cnt.v_cache_count > 0) 899 printf("vm_page_alloc(SYSTEM): missing pages on cache queue: %d\n", cnt.v_cache_count); 900 #endif 901 vm_pageout_deficit++; 902 pagedaemon_wakeup(); 903 return (NULL); 904 } 905 } 906 break; 907 908 case VM_ALLOC_INTERRUPT: 909 if (cnt.v_free_count > 0) { 910 m = vm_page_select_free(object, pindex, PQ_FREE); 911 #if defined(DIAGNOSTIC) 912 if (m == NULL) 913 panic("vm_page_alloc(INTERRUPT): missing page on free queue\n"); 914 #endif 915 } else { 916 splx(s); 917 vm_pageout_deficit++; 918 pagedaemon_wakeup(); 919 return (NULL); 920 } 921 break; 922 923 default: 924 m = NULL; 925 #if !defined(MAX_PERF) 926 panic("vm_page_alloc: invalid allocation class"); 927 #endif 928 } 929 930 queue = m->queue; 931 qtype = queue - m->pc; 932 if (qtype == PQ_ZERO) 933 vm_page_zero_count--; 934 pq = &vm_page_queues[queue]; 935 TAILQ_REMOVE(pq->pl, m, pageq); 936 (*pq->cnt)--; 937 (*pq->lcnt)--; 938 oldobject = NULL; 939 if (qtype == PQ_ZERO) { 940 m->flags = PG_ZERO | PG_BUSY; 941 } else if (qtype == PQ_CACHE) { 942 oldobject = m->object; 943 vm_page_busy(m); 944 vm_page_remove(m); 945 m->flags = PG_BUSY; 946 } else { 947 m->flags = PG_BUSY; 948 } 949 m->wire_count = 0; 950 m->hold_count = 0; 951 m->act_count = 0; 952 m->busy = 0; 953 m->valid = 0; 954 m->dirty = 0; 955 m->queue = PQ_NONE; 956 957 /* XXX before splx until vm_page_insert is safe */ 958 vm_page_insert(m, object, pindex); 959 960 /* 961 * Don't wakeup too often - wakeup the pageout daemon when 962 * we would be nearly out of memory. 963 */ 964 if (((cnt.v_free_count + cnt.v_cache_count) < 965 (cnt.v_free_reserved + cnt.v_cache_min)) || 966 (cnt.v_free_count < cnt.v_pageout_free_min)) 967 pagedaemon_wakeup(); 968 969 if ((qtype == PQ_CACHE) && 970 ((page_req == VM_ALLOC_NORMAL) || (page_req == VM_ALLOC_ZERO)) && 971 oldobject && (oldobject->type == OBJT_VNODE) && 972 ((oldobject->flags & OBJ_DEAD) == 0)) { 973 struct vnode *vp; 974 vp = (struct vnode *) oldobject->handle; 975 if (vp && VSHOULDFREE(vp)) { 976 if ((vp->v_flag & (VFREE|VTBFREE|VDOOMED)) == 0) { 977 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 978 vp->v_flag |= VTBFREE; 979 } 980 } 981 } 982 splx(s); 983 984 return (m); 985 } 986 987 void 988 vm_wait() 989 { 990 int s; 991 992 s = splvm(); 993 if (curproc == pageproc) { 994 vm_pageout_pages_needed = 1; 995 tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0); 996 } else { 997 if (!vm_pages_needed) { 998 vm_pages_needed++; 999 wakeup(&vm_pages_needed); 1000 } 1001 tsleep(&cnt.v_free_count, PVM, "vmwait", 0); 1002 } 1003 splx(s); 1004 } 1005 1006 int 1007 vm_page_sleep(vm_page_t m, char *msg, char *busy) { 1008 vm_object_t object = m->object; 1009 int slept = 0; 1010 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1011 int s; 1012 s = splvm(); 1013 if ((busy && *busy) || (m->flags & PG_BUSY)) { 1014 vm_page_flag_set(m, PG_WANTED); 1015 tsleep(m, PVM, msg, 0); 1016 slept = 1; 1017 } 1018 splx(s); 1019 } 1020 return slept; 1021 } 1022 1023 /* 1024 * vm_page_activate: 1025 * 1026 * Put the specified page on the active list (if appropriate). 1027 * 1028 * The page queues must be locked. 1029 */ 1030 void 1031 vm_page_activate(m) 1032 register vm_page_t m; 1033 { 1034 int s; 1035 vm_page_t np; 1036 vm_object_t object; 1037 1038 s = splvm(); 1039 if (m->queue != PQ_ACTIVE) { 1040 if ((m->queue - m->pc) == PQ_CACHE) 1041 cnt.v_reactivated++; 1042 1043 vm_page_unqueue(m); 1044 1045 if (m->wire_count == 0) { 1046 m->queue = PQ_ACTIVE; 1047 ++(*vm_page_queues[PQ_ACTIVE].lcnt); 1048 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1049 if (m->act_count < ACT_INIT) 1050 m->act_count = ACT_INIT; 1051 cnt.v_active_count++; 1052 } 1053 } else { 1054 if (m->act_count < ACT_INIT) 1055 m->act_count = ACT_INIT; 1056 } 1057 1058 splx(s); 1059 } 1060 1061 /* 1062 * helper routine for vm_page_free and vm_page_free_zero 1063 */ 1064 static int 1065 vm_page_freechk_and_unqueue(m) 1066 vm_page_t m; 1067 { 1068 vm_object_t oldobject; 1069 1070 oldobject = m->object; 1071 1072 #if !defined(MAX_PERF) 1073 if (m->busy || ((m->queue - m->pc) == PQ_FREE) || 1074 (m->hold_count != 0)) { 1075 printf( 1076 "vm_page_free: pindex(%lu), busy(%d), PG_BUSY(%d), hold(%d)\n", 1077 (u_long)m->pindex, m->busy, (m->flags & PG_BUSY) ? 1 : 0, 1078 m->hold_count); 1079 if ((m->queue - m->pc) == PQ_FREE) 1080 panic("vm_page_free: freeing free page"); 1081 else 1082 panic("vm_page_free: freeing busy page"); 1083 } 1084 #endif 1085 1086 vm_page_unqueue_nowakeup(m); 1087 vm_page_remove(m); 1088 1089 if ((m->flags & PG_FICTITIOUS) != 0) { 1090 return 0; 1091 } 1092 1093 m->valid = 0; 1094 1095 if (m->wire_count != 0) { 1096 #if !defined(MAX_PERF) 1097 if (m->wire_count > 1) { 1098 panic("vm_page_free: invalid wire count (%d), pindex: 0x%x", 1099 m->wire_count, m->pindex); 1100 } 1101 #endif 1102 m->wire_count = 0; 1103 if (m->object) 1104 m->object->wire_count--; 1105 cnt.v_wire_count--; 1106 } 1107 1108 if (oldobject && (oldobject->type == OBJT_VNODE) && 1109 ((oldobject->flags & OBJ_DEAD) == 0)) { 1110 struct vnode *vp; 1111 vp = (struct vnode *) oldobject->handle; 1112 if (vp && VSHOULDFREE(vp)) { 1113 if ((vp->v_flag & (VTBFREE|VDOOMED|VFREE)) == 0) { 1114 TAILQ_INSERT_TAIL(&vnode_tobefree_list, vp, v_freelist); 1115 vp->v_flag |= VTBFREE; 1116 } 1117 } 1118 } 1119 1120 #ifdef __alpha__ 1121 pmap_page_is_free(m); 1122 #endif 1123 1124 return 1; 1125 } 1126 1127 /* 1128 * helper routine for vm_page_free and vm_page_free_zero 1129 */ 1130 static __inline void 1131 vm_page_free_wakeup() 1132 { 1133 1134 /* 1135 * if pageout daemon needs pages, then tell it that there are 1136 * some free. 1137 */ 1138 if (vm_pageout_pages_needed) { 1139 wakeup(&vm_pageout_pages_needed); 1140 vm_pageout_pages_needed = 0; 1141 } 1142 /* 1143 * wakeup processes that are waiting on memory if we hit a 1144 * high water mark. And wakeup scheduler process if we have 1145 * lots of memory. this process will swapin processes. 1146 */ 1147 if (vm_pages_needed && 1148 ((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) { 1149 wakeup(&cnt.v_free_count); 1150 vm_pages_needed = 0; 1151 } 1152 } 1153 1154 /* 1155 * vm_page_free: 1156 * 1157 * Returns the given page to the free list, 1158 * disassociating it with any VM object. 1159 * 1160 * Object and page must be locked prior to entry. 1161 */ 1162 void 1163 vm_page_free(m) 1164 register vm_page_t m; 1165 { 1166 int s; 1167 struct vpgqueues *pq; 1168 1169 s = splvm(); 1170 1171 cnt.v_tfree++; 1172 1173 if (!vm_page_freechk_and_unqueue(m)) { 1174 splx(s); 1175 return; 1176 } 1177 1178 m->queue = PQ_FREE + m->pc; 1179 pq = &vm_page_queues[m->queue]; 1180 ++(*pq->lcnt); 1181 ++(*pq->cnt); 1182 /* 1183 * If the pageout process is grabbing the page, it is likely 1184 * that the page is NOT in the cache. It is more likely that 1185 * the page will be partially in the cache if it is being 1186 * explicitly freed. 1187 */ 1188 if (curproc == pageproc) { 1189 TAILQ_INSERT_TAIL(pq->pl, m, pageq); 1190 } else { 1191 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1192 } 1193 1194 vm_page_free_wakeup(); 1195 splx(s); 1196 } 1197 1198 void 1199 vm_page_free_zero(m) 1200 register vm_page_t m; 1201 { 1202 int s; 1203 struct vpgqueues *pq; 1204 1205 s = splvm(); 1206 1207 cnt.v_tfree++; 1208 1209 if (!vm_page_freechk_and_unqueue(m)) { 1210 splx(s); 1211 return; 1212 } 1213 1214 m->queue = PQ_ZERO + m->pc; 1215 pq = &vm_page_queues[m->queue]; 1216 ++(*pq->lcnt); 1217 ++(*pq->cnt); 1218 1219 TAILQ_INSERT_HEAD(pq->pl, m, pageq); 1220 ++vm_page_zero_count; 1221 vm_page_free_wakeup(); 1222 splx(s); 1223 } 1224 1225 /* 1226 * vm_page_wire: 1227 * 1228 * Mark this page as wired down by yet 1229 * another map, removing it from paging queues 1230 * as necessary. 1231 * 1232 * The page queues must be locked. 1233 */ 1234 void 1235 vm_page_wire(m) 1236 register vm_page_t m; 1237 { 1238 int s; 1239 1240 if (m->wire_count == 0) { 1241 s = splvm(); 1242 vm_page_unqueue(m); 1243 splx(s); 1244 cnt.v_wire_count++; 1245 if (m->object) 1246 m->object->wire_count++; 1247 } 1248 (*vm_page_queues[PQ_NONE].lcnt)++; 1249 m->wire_count++; 1250 vm_page_flag_set(m, PG_MAPPED); 1251 } 1252 1253 /* 1254 * vm_page_unwire: 1255 * 1256 * Release one wiring of this page, potentially 1257 * enabling it to be paged again. 1258 * 1259 * The page queues must be locked. 1260 */ 1261 void 1262 vm_page_unwire(m) 1263 register vm_page_t m; 1264 { 1265 int s; 1266 1267 s = splvm(); 1268 1269 if (m->wire_count > 0) { 1270 m->wire_count--; 1271 if (m->wire_count == 0) { 1272 if (m->object) 1273 m->object->wire_count--; 1274 cnt.v_wire_count--; 1275 TAILQ_INSERT_TAIL(&vm_page_queue_active, m, pageq); 1276 m->queue = PQ_ACTIVE; 1277 (*vm_page_queues[PQ_ACTIVE].lcnt)++; 1278 cnt.v_active_count++; 1279 } 1280 } else { 1281 #if !defined(MAX_PERF) 1282 panic("vm_page_unwire: invalid wire count: %d\n", m->wire_count); 1283 #endif 1284 } 1285 splx(s); 1286 } 1287 1288 1289 /* 1290 * vm_page_deactivate: 1291 * 1292 * Returns the given page to the inactive list, 1293 * indicating that no physical maps have access 1294 * to this page. [Used by the physical mapping system.] 1295 * 1296 * The page queues must be locked. 1297 */ 1298 void 1299 vm_page_deactivate(m) 1300 register vm_page_t m; 1301 { 1302 int s; 1303 1304 /* 1305 * Only move active pages -- ignore locked or already inactive ones. 1306 * 1307 * XXX: sometimes we get pages which aren't wired down or on any queue - 1308 * we need to put them on the inactive queue also, otherwise we lose 1309 * track of them. Paul Mackerras (paulus@cs.anu.edu.au) 9-Jan-93. 1310 */ 1311 if (m->queue == PQ_INACTIVE) 1312 return; 1313 1314 s = splvm(); 1315 if (m->wire_count == 0) { 1316 if ((m->queue - m->pc) == PQ_CACHE) 1317 cnt.v_reactivated++; 1318 vm_page_unqueue(m); 1319 TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq); 1320 m->queue = PQ_INACTIVE; 1321 ++(*vm_page_queues[PQ_INACTIVE].lcnt); 1322 cnt.v_inactive_count++; 1323 } 1324 splx(s); 1325 } 1326 1327 /* 1328 * vm_page_cache 1329 * 1330 * Put the specified page onto the page cache queue (if appropriate). 1331 */ 1332 void 1333 vm_page_cache(m) 1334 register vm_page_t m; 1335 { 1336 int s; 1337 1338 #if !defined(MAX_PERF) 1339 if ((m->flags & PG_BUSY) || m->busy || m->wire_count) { 1340 printf("vm_page_cache: attempting to cache busy page\n"); 1341 return; 1342 } 1343 #endif 1344 if ((m->queue - m->pc) == PQ_CACHE) 1345 return; 1346 1347 vm_page_protect(m, VM_PROT_NONE); 1348 #if !defined(MAX_PERF) 1349 if (m->dirty != 0) { 1350 panic("vm_page_cache: caching a dirty page, pindex: %d", m->pindex); 1351 } 1352 #endif 1353 s = splvm(); 1354 vm_page_unqueue_nowakeup(m); 1355 m->queue = PQ_CACHE + m->pc; 1356 (*vm_page_queues[m->queue].lcnt)++; 1357 TAILQ_INSERT_TAIL(vm_page_queues[m->queue].pl, m, pageq); 1358 cnt.v_cache_count++; 1359 m->object->cache_count++; 1360 vm_page_free_wakeup(); 1361 splx(s); 1362 } 1363 1364 /* 1365 * Grab a page, waiting until we are waken up due to the page 1366 * changing state. We keep on waiting, if the page continues 1367 * to be in the object. If the page doesn't exist, allocate it. 1368 */ 1369 vm_page_t 1370 vm_page_grab(object, pindex, allocflags) 1371 vm_object_t object; 1372 vm_pindex_t pindex; 1373 int allocflags; 1374 { 1375 1376 vm_page_t m; 1377 int s, generation; 1378 1379 retrylookup: 1380 if ((m = vm_page_lookup(object, pindex)) != NULL) { 1381 if (m->busy || (m->flags & PG_BUSY)) { 1382 generation = object->generation; 1383 1384 s = splvm(); 1385 while ((object->generation == generation) && 1386 (m->busy || (m->flags & PG_BUSY))) { 1387 vm_page_flag_set(m, PG_WANTED | PG_REFERENCED); 1388 tsleep(m, PVM, "pgrbwt", 0); 1389 if ((allocflags & VM_ALLOC_RETRY) == 0) { 1390 splx(s); 1391 return NULL; 1392 } 1393 } 1394 splx(s); 1395 goto retrylookup; 1396 } else { 1397 vm_page_busy(m); 1398 return m; 1399 } 1400 } 1401 1402 m = vm_page_alloc(object, pindex, allocflags & ~VM_ALLOC_RETRY); 1403 if (m == NULL) { 1404 VM_WAIT; 1405 if ((allocflags & VM_ALLOC_RETRY) == 0) 1406 return NULL; 1407 goto retrylookup; 1408 } 1409 1410 return m; 1411 } 1412 1413 /* 1414 * mapping function for valid bits or for dirty bits in 1415 * a page 1416 */ 1417 __inline int 1418 vm_page_bits(int base, int size) 1419 { 1420 u_short chunk; 1421 1422 if ((base == 0) && (size >= PAGE_SIZE)) 1423 return VM_PAGE_BITS_ALL; 1424 1425 size = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1); 1426 base &= PAGE_MASK; 1427 if (size > PAGE_SIZE - base) { 1428 size = PAGE_SIZE - base; 1429 } 1430 1431 base = base / DEV_BSIZE; 1432 chunk = vm_page_dev_bsize_chunks[size / DEV_BSIZE]; 1433 return (chunk << base) & VM_PAGE_BITS_ALL; 1434 } 1435 1436 /* 1437 * set a page valid and clean 1438 */ 1439 void 1440 vm_page_set_validclean(m, base, size) 1441 vm_page_t m; 1442 int base; 1443 int size; 1444 { 1445 int pagebits = vm_page_bits(base, size); 1446 m->valid |= pagebits; 1447 m->dirty &= ~pagebits; 1448 if( base == 0 && size == PAGE_SIZE) 1449 pmap_clear_modify(VM_PAGE_TO_PHYS(m)); 1450 } 1451 1452 /* 1453 * set a page (partially) invalid 1454 */ 1455 void 1456 vm_page_set_invalid(m, base, size) 1457 vm_page_t m; 1458 int base; 1459 int size; 1460 { 1461 int bits; 1462 1463 m->valid &= ~(bits = vm_page_bits(base, size)); 1464 if (m->valid == 0) 1465 m->dirty &= ~bits; 1466 m->object->generation++; 1467 } 1468 1469 /* 1470 * is (partial) page valid? 1471 */ 1472 int 1473 vm_page_is_valid(m, base, size) 1474 vm_page_t m; 1475 int base; 1476 int size; 1477 { 1478 int bits = vm_page_bits(base, size); 1479 1480 if (m->valid && ((m->valid & bits) == bits)) 1481 return 1; 1482 else 1483 return 0; 1484 } 1485 1486 void 1487 vm_page_test_dirty(m) 1488 vm_page_t m; 1489 { 1490 if ((m->dirty != VM_PAGE_BITS_ALL) && 1491 pmap_is_modified(VM_PAGE_TO_PHYS(m))) { 1492 m->dirty = VM_PAGE_BITS_ALL; 1493 } 1494 } 1495 1496 /* 1497 * This interface is for merging with malloc() someday. 1498 * Even if we never implement compaction so that contiguous allocation 1499 * works after initialization time, malloc()'s data structures are good 1500 * for statistics and for allocations of less than a page. 1501 */ 1502 void * 1503 contigmalloc1(size, type, flags, low, high, alignment, boundary, map) 1504 unsigned long size; /* should be size_t here and for malloc() */ 1505 struct malloc_type *type; 1506 int flags; 1507 unsigned long low; 1508 unsigned long high; 1509 unsigned long alignment; 1510 unsigned long boundary; 1511 vm_map_t map; 1512 { 1513 int i, s, start; 1514 vm_offset_t addr, phys, tmp_addr; 1515 int pass; 1516 vm_page_t pga = vm_page_array; 1517 1518 size = round_page(size); 1519 #if !defined(MAX_PERF) 1520 if (size == 0) 1521 panic("contigmalloc1: size must not be 0"); 1522 if ((alignment & (alignment - 1)) != 0) 1523 panic("contigmalloc1: alignment must be a power of 2"); 1524 if ((boundary & (boundary - 1)) != 0) 1525 panic("contigmalloc1: boundary must be a power of 2"); 1526 #endif 1527 1528 start = 0; 1529 for (pass = 0; pass <= 1; pass++) { 1530 s = splvm(); 1531 again: 1532 /* 1533 * Find first page in array that is free, within range, aligned, and 1534 * such that the boundary won't be crossed. 1535 */ 1536 for (i = start; i < cnt.v_page_count; i++) { 1537 int pqtype; 1538 phys = VM_PAGE_TO_PHYS(&pga[i]); 1539 pqtype = pga[i].queue - pga[i].pc; 1540 if (((pqtype == PQ_ZERO) || (pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) && 1541 (phys >= low) && (phys < high) && 1542 ((phys & (alignment - 1)) == 0) && 1543 (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)) 1544 break; 1545 } 1546 1547 /* 1548 * If the above failed or we will exceed the upper bound, fail. 1549 */ 1550 if ((i == cnt.v_page_count) || 1551 ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) { 1552 vm_page_t m, next; 1553 1554 again1: 1555 for (m = TAILQ_FIRST(&vm_page_queue_inactive); 1556 m != NULL; 1557 m = next) { 1558 1559 if (m->queue != PQ_INACTIVE) { 1560 break; 1561 } 1562 1563 next = TAILQ_NEXT(m, pageq); 1564 if (vm_page_sleep(m, "vpctw0", &m->busy)) 1565 goto again1; 1566 vm_page_test_dirty(m); 1567 if (m->dirty) { 1568 if (m->object->type == OBJT_VNODE) { 1569 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1570 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1571 VOP_UNLOCK(m->object->handle, 0, curproc); 1572 goto again1; 1573 } else if (m->object->type == OBJT_SWAP || 1574 m->object->type == OBJT_DEFAULT) { 1575 vm_pageout_flush(&m, 1, 0); 1576 goto again1; 1577 } 1578 } 1579 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1580 vm_page_cache(m); 1581 } 1582 1583 for (m = TAILQ_FIRST(&vm_page_queue_active); 1584 m != NULL; 1585 m = next) { 1586 1587 if (m->queue != PQ_ACTIVE) { 1588 break; 1589 } 1590 1591 next = TAILQ_NEXT(m, pageq); 1592 if (vm_page_sleep(m, "vpctw1", &m->busy)) 1593 goto again1; 1594 vm_page_test_dirty(m); 1595 if (m->dirty) { 1596 if (m->object->type == OBJT_VNODE) { 1597 vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc); 1598 vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC); 1599 VOP_UNLOCK(m->object->handle, 0, curproc); 1600 goto again1; 1601 } else if (m->object->type == OBJT_SWAP || 1602 m->object->type == OBJT_DEFAULT) { 1603 vm_pageout_flush(&m, 1, 0); 1604 goto again1; 1605 } 1606 } 1607 if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0)) 1608 vm_page_cache(m); 1609 } 1610 1611 splx(s); 1612 continue; 1613 } 1614 start = i; 1615 1616 /* 1617 * Check successive pages for contiguous and free. 1618 */ 1619 for (i = start + 1; i < (start + size / PAGE_SIZE); i++) { 1620 int pqtype; 1621 pqtype = pga[i].queue - pga[i].pc; 1622 if ((VM_PAGE_TO_PHYS(&pga[i]) != 1623 (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) || 1624 ((pqtype != PQ_ZERO) && (pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) { 1625 start++; 1626 goto again; 1627 } 1628 } 1629 1630 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1631 int pqtype; 1632 vm_page_t m = &pga[i]; 1633 1634 pqtype = m->queue - m->pc; 1635 if (pqtype == PQ_CACHE) { 1636 vm_page_busy(m); 1637 vm_page_free(m); 1638 } 1639 1640 TAILQ_REMOVE(vm_page_queues[m->queue].pl, m, pageq); 1641 (*vm_page_queues[m->queue].lcnt)--; 1642 cnt.v_free_count--; 1643 m->valid = VM_PAGE_BITS_ALL; 1644 m->flags = 0; 1645 m->dirty = 0; 1646 m->wire_count = 0; 1647 m->busy = 0; 1648 m->queue = PQ_NONE; 1649 m->object = NULL; 1650 vm_page_wire(m); 1651 } 1652 1653 /* 1654 * We've found a contiguous chunk that meets are requirements. 1655 * Allocate kernel VM, unfree and assign the physical pages to it and 1656 * return kernel VM pointer. 1657 */ 1658 tmp_addr = addr = kmem_alloc_pageable(map, size); 1659 if (addr == 0) { 1660 /* 1661 * XXX We almost never run out of kernel virtual 1662 * space, so we don't make the allocated memory 1663 * above available. 1664 */ 1665 splx(s); 1666 return (NULL); 1667 } 1668 1669 for (i = start; i < (start + size / PAGE_SIZE); i++) { 1670 vm_page_t m = &pga[i]; 1671 vm_page_insert(m, kernel_object, 1672 OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS)); 1673 pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m)); 1674 tmp_addr += PAGE_SIZE; 1675 } 1676 1677 splx(s); 1678 return ((void *)addr); 1679 } 1680 return NULL; 1681 } 1682 1683 void * 1684 contigmalloc(size, type, flags, low, high, alignment, boundary) 1685 unsigned long size; /* should be size_t here and for malloc() */ 1686 struct malloc_type *type; 1687 int flags; 1688 unsigned long low; 1689 unsigned long high; 1690 unsigned long alignment; 1691 unsigned long boundary; 1692 { 1693 return contigmalloc1(size, type, flags, low, high, alignment, boundary, 1694 kernel_map); 1695 } 1696 1697 vm_offset_t 1698 vm_page_alloc_contig(size, low, high, alignment) 1699 vm_offset_t size; 1700 vm_offset_t low; 1701 vm_offset_t high; 1702 vm_offset_t alignment; 1703 { 1704 return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high, 1705 alignment, 0ul, kernel_map)); 1706 } 1707 1708 #include "opt_ddb.h" 1709 #ifdef DDB 1710 #include <sys/kernel.h> 1711 1712 #include <ddb/ddb.h> 1713 1714 DB_SHOW_COMMAND(page, vm_page_print_page_info) 1715 { 1716 db_printf("cnt.v_free_count: %d\n", cnt.v_free_count); 1717 db_printf("cnt.v_cache_count: %d\n", cnt.v_cache_count); 1718 db_printf("cnt.v_inactive_count: %d\n", cnt.v_inactive_count); 1719 db_printf("cnt.v_active_count: %d\n", cnt.v_active_count); 1720 db_printf("cnt.v_wire_count: %d\n", cnt.v_wire_count); 1721 db_printf("cnt.v_free_reserved: %d\n", cnt.v_free_reserved); 1722 db_printf("cnt.v_free_min: %d\n", cnt.v_free_min); 1723 db_printf("cnt.v_free_target: %d\n", cnt.v_free_target); 1724 db_printf("cnt.v_cache_min: %d\n", cnt.v_cache_min); 1725 db_printf("cnt.v_inactive_target: %d\n", cnt.v_inactive_target); 1726 } 1727 1728 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) 1729 { 1730 int i; 1731 db_printf("PQ_FREE:"); 1732 for(i=0;i<PQ_L2_SIZE;i++) { 1733 db_printf(" %d", *vm_page_queues[PQ_FREE + i].lcnt); 1734 } 1735 db_printf("\n"); 1736 1737 db_printf("PQ_CACHE:"); 1738 for(i=0;i<PQ_L2_SIZE;i++) { 1739 db_printf(" %d", *vm_page_queues[PQ_CACHE + i].lcnt); 1740 } 1741 db_printf("\n"); 1742 1743 db_printf("PQ_ZERO:"); 1744 for(i=0;i<PQ_L2_SIZE;i++) { 1745 db_printf(" %d", *vm_page_queues[PQ_ZERO + i].lcnt); 1746 } 1747 db_printf("\n"); 1748 1749 db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", 1750 *vm_page_queues[PQ_ACTIVE].lcnt, 1751 *vm_page_queues[PQ_INACTIVE].lcnt); 1752 } 1753 #endif /* DDB */ 1754