1 /*- 2 * Copyright (c) 2002-2006 Rice University 3 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Alan L. Cox, 7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_ddb.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/lock.h> 40 #include <sys/kernel.h> 41 #include <sys/malloc.h> 42 #include <sys/mutex.h> 43 #include <sys/queue.h> 44 #include <sys/sbuf.h> 45 #include <sys/sysctl.h> 46 #include <sys/vmmeter.h> 47 #include <sys/vnode.h> 48 49 #include <ddb/ddb.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_param.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_phys.h> 57 #include <vm/vm_reserv.h> 58 59 struct vm_freelist { 60 struct pglist pl; 61 int lcnt; 62 }; 63 64 struct vm_phys_seg { 65 vm_paddr_t start; 66 vm_paddr_t end; 67 vm_page_t first_page; 68 struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER]; 69 }; 70 71 static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 72 73 static int vm_phys_nsegs; 74 75 static struct vm_freelist 76 vm_phys_free_queues[VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 77 78 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1; 79 80 static int cnt_prezero; 81 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 82 &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 83 84 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 85 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 86 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 87 88 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 89 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 90 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 91 92 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); 93 static int vm_phys_paddr_to_segind(vm_paddr_t pa); 94 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 95 int order); 96 97 /* 98 * Outputs the state of the physical memory allocator, specifically, 99 * the amount of physical memory in each free list. 100 */ 101 static int 102 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 103 { 104 struct sbuf sbuf; 105 struct vm_freelist *fl; 106 char *cbuf; 107 const int cbufsize = vm_nfreelists*(VM_NFREEORDER + 1)*81; 108 int error, flind, oind, pind; 109 110 cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO); 111 sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN); 112 for (flind = 0; flind < vm_nfreelists; flind++) { 113 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 114 "\n ORDER (SIZE) | NUMBER" 115 "\n ", flind); 116 for (pind = 0; pind < VM_NFREEPOOL; pind++) 117 sbuf_printf(&sbuf, " | POOL %d", pind); 118 sbuf_printf(&sbuf, "\n-- "); 119 for (pind = 0; pind < VM_NFREEPOOL; pind++) 120 sbuf_printf(&sbuf, "-- -- "); 121 sbuf_printf(&sbuf, "--\n"); 122 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 123 sbuf_printf(&sbuf, " %2.2d (%6.6dK)", oind, 124 1 << (PAGE_SHIFT - 10 + oind)); 125 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 126 fl = vm_phys_free_queues[flind][pind]; 127 sbuf_printf(&sbuf, " | %6.6d", fl[oind].lcnt); 128 } 129 sbuf_printf(&sbuf, "\n"); 130 } 131 } 132 sbuf_finish(&sbuf); 133 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 134 sbuf_delete(&sbuf); 135 free(cbuf, M_TEMP); 136 return (error); 137 } 138 139 /* 140 * Outputs the set of physical memory segments. 141 */ 142 static int 143 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 144 { 145 struct sbuf sbuf; 146 struct vm_phys_seg *seg; 147 char *cbuf; 148 const int cbufsize = VM_PHYSSEG_MAX*(VM_NFREEORDER + 1)*81; 149 int error, segind; 150 151 cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO); 152 sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN); 153 for (segind = 0; segind < vm_phys_nsegs; segind++) { 154 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 155 seg = &vm_phys_segs[segind]; 156 sbuf_printf(&sbuf, "start: %#jx\n", 157 (uintmax_t)seg->start); 158 sbuf_printf(&sbuf, "end: %#jx\n", 159 (uintmax_t)seg->end); 160 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 161 } 162 sbuf_finish(&sbuf); 163 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 164 sbuf_delete(&sbuf); 165 free(cbuf, M_TEMP); 166 return (error); 167 } 168 169 /* 170 * Create a physical memory segment. 171 */ 172 static void 173 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind) 174 { 175 struct vm_phys_seg *seg; 176 #ifdef VM_PHYSSEG_SPARSE 177 long pages; 178 int segind; 179 180 pages = 0; 181 for (segind = 0; segind < vm_phys_nsegs; segind++) { 182 seg = &vm_phys_segs[segind]; 183 pages += atop(seg->end - seg->start); 184 } 185 #endif 186 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 187 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 188 seg = &vm_phys_segs[vm_phys_nsegs++]; 189 seg->start = start; 190 seg->end = end; 191 #ifdef VM_PHYSSEG_SPARSE 192 seg->first_page = &vm_page_array[pages]; 193 #else 194 seg->first_page = PHYS_TO_VM_PAGE(start); 195 #endif 196 seg->free_queues = &vm_phys_free_queues[flind]; 197 } 198 199 /* 200 * Initialize the physical memory allocator. 201 */ 202 void 203 vm_phys_init(void) 204 { 205 struct vm_freelist *fl; 206 int flind, i, oind, pind; 207 208 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 209 #ifdef VM_FREELIST_ISADMA 210 if (phys_avail[i] < 16777216) { 211 if (phys_avail[i + 1] > 16777216) { 212 vm_phys_create_seg(phys_avail[i], 16777216, 213 VM_FREELIST_ISADMA); 214 vm_phys_create_seg(16777216, phys_avail[i + 1], 215 VM_FREELIST_DEFAULT); 216 } else { 217 vm_phys_create_seg(phys_avail[i], 218 phys_avail[i + 1], VM_FREELIST_ISADMA); 219 } 220 if (VM_FREELIST_ISADMA >= vm_nfreelists) 221 vm_nfreelists = VM_FREELIST_ISADMA + 1; 222 } else 223 #endif 224 #ifdef VM_FREELIST_HIGHMEM 225 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) { 226 if (phys_avail[i] < VM_HIGHMEM_ADDRESS) { 227 vm_phys_create_seg(phys_avail[i], 228 VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT); 229 vm_phys_create_seg(VM_HIGHMEM_ADDRESS, 230 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 231 } else { 232 vm_phys_create_seg(phys_avail[i], 233 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 234 } 235 if (VM_FREELIST_HIGHMEM >= vm_nfreelists) 236 vm_nfreelists = VM_FREELIST_HIGHMEM + 1; 237 } else 238 #endif 239 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1], 240 VM_FREELIST_DEFAULT); 241 } 242 for (flind = 0; flind < vm_nfreelists; flind++) { 243 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 244 fl = vm_phys_free_queues[flind][pind]; 245 for (oind = 0; oind < VM_NFREEORDER; oind++) 246 TAILQ_INIT(&fl[oind].pl); 247 } 248 } 249 } 250 251 /* 252 * Split a contiguous, power of two-sized set of physical pages. 253 */ 254 static __inline void 255 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 256 { 257 vm_page_t m_buddy; 258 259 while (oind > order) { 260 oind--; 261 m_buddy = &m[1 << oind]; 262 KASSERT(m_buddy->order == VM_NFREEORDER, 263 ("vm_phys_split_pages: page %p has unexpected order %d", 264 m_buddy, m_buddy->order)); 265 m_buddy->order = oind; 266 TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq); 267 fl[oind].lcnt++; 268 } 269 } 270 271 /* 272 * Initialize a physical page and add it to the free lists. 273 */ 274 void 275 vm_phys_add_page(vm_paddr_t pa) 276 { 277 vm_page_t m; 278 279 cnt.v_page_count++; 280 m = vm_phys_paddr_to_vm_page(pa); 281 m->phys_addr = pa; 282 m->segind = vm_phys_paddr_to_segind(pa); 283 m->flags = PG_FREE; 284 KASSERT(m->order == VM_NFREEORDER, 285 ("vm_phys_add_page: page %p has unexpected order %d", 286 m, m->order)); 287 m->pool = VM_FREEPOOL_DEFAULT; 288 pmap_page_init(m); 289 mtx_lock(&vm_page_queue_free_mtx); 290 cnt.v_free_count++; 291 vm_phys_free_pages(m, 0); 292 mtx_unlock(&vm_page_queue_free_mtx); 293 } 294 295 /* 296 * Allocate a contiguous, power of two-sized set of physical pages 297 * from the free lists. 298 * 299 * The free page queues must be locked. 300 */ 301 vm_page_t 302 vm_phys_alloc_pages(int pool, int order) 303 { 304 struct vm_freelist *fl; 305 struct vm_freelist *alt; 306 int flind, oind, pind; 307 vm_page_t m; 308 309 KASSERT(pool < VM_NFREEPOOL, 310 ("vm_phys_alloc_pages: pool %d is out of range", pool)); 311 KASSERT(order < VM_NFREEORDER, 312 ("vm_phys_alloc_pages: order %d is out of range", order)); 313 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 314 for (flind = 0; flind < vm_nfreelists; flind++) { 315 fl = vm_phys_free_queues[flind][pool]; 316 for (oind = order; oind < VM_NFREEORDER; oind++) { 317 m = TAILQ_FIRST(&fl[oind].pl); 318 if (m != NULL) { 319 TAILQ_REMOVE(&fl[oind].pl, m, pageq); 320 fl[oind].lcnt--; 321 m->order = VM_NFREEORDER; 322 vm_phys_split_pages(m, oind, fl, order); 323 return (m); 324 } 325 } 326 327 /* 328 * The given pool was empty. Find the largest 329 * contiguous, power-of-two-sized set of pages in any 330 * pool. Transfer these pages to the given pool, and 331 * use them to satisfy the allocation. 332 */ 333 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 334 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 335 alt = vm_phys_free_queues[flind][pind]; 336 m = TAILQ_FIRST(&alt[oind].pl); 337 if (m != NULL) { 338 TAILQ_REMOVE(&alt[oind].pl, m, pageq); 339 alt[oind].lcnt--; 340 m->order = VM_NFREEORDER; 341 vm_phys_set_pool(pool, m, oind); 342 vm_phys_split_pages(m, oind, fl, order); 343 return (m); 344 } 345 } 346 } 347 } 348 return (NULL); 349 } 350 351 /* 352 * Allocate physical memory from phys_avail[]. 353 */ 354 vm_paddr_t 355 vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment) 356 { 357 vm_paddr_t pa; 358 int i; 359 360 size = round_page(size); 361 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 362 if (phys_avail[i + 1] - phys_avail[i] < size) 363 continue; 364 pa = phys_avail[i]; 365 phys_avail[i] += size; 366 return (pa); 367 } 368 panic("vm_phys_bootstrap_alloc"); 369 } 370 371 /* 372 * Find the vm_page corresponding to the given physical address. 373 */ 374 vm_page_t 375 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 376 { 377 struct vm_phys_seg *seg; 378 int segind; 379 380 for (segind = 0; segind < vm_phys_nsegs; segind++) { 381 seg = &vm_phys_segs[segind]; 382 if (pa >= seg->start && pa < seg->end) 383 return (&seg->first_page[atop(pa - seg->start)]); 384 } 385 return (NULL); 386 } 387 388 /* 389 * Find the segment containing the given physical address. 390 */ 391 static int 392 vm_phys_paddr_to_segind(vm_paddr_t pa) 393 { 394 struct vm_phys_seg *seg; 395 int segind; 396 397 for (segind = 0; segind < vm_phys_nsegs; segind++) { 398 seg = &vm_phys_segs[segind]; 399 if (pa >= seg->start && pa < seg->end) 400 return (segind); 401 } 402 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 403 (uintmax_t)pa); 404 } 405 406 /* 407 * Free a contiguous, power of two-sized set of physical pages. 408 * 409 * The free page queues must be locked. 410 */ 411 void 412 vm_phys_free_pages(vm_page_t m, int order) 413 { 414 struct vm_freelist *fl; 415 struct vm_phys_seg *seg; 416 vm_paddr_t pa, pa_buddy; 417 vm_page_t m_buddy; 418 419 KASSERT(m->order == VM_NFREEORDER, 420 ("vm_phys_free_pages: page %p has unexpected order %d", 421 m, m->order)); 422 KASSERT(m->pool < VM_NFREEPOOL, 423 ("vm_phys_free_pages: page %p has unexpected pool %d", 424 m, m->pool)); 425 KASSERT(order < VM_NFREEORDER, 426 ("vm_phys_free_pages: order %d is out of range", order)); 427 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 428 pa = VM_PAGE_TO_PHYS(m); 429 seg = &vm_phys_segs[m->segind]; 430 while (order < VM_NFREEORDER - 1) { 431 pa_buddy = pa ^ (1 << (PAGE_SHIFT + order)); 432 if (pa_buddy < seg->start || 433 pa_buddy >= seg->end) 434 break; 435 m_buddy = &seg->first_page[atop(pa_buddy - seg->start)]; 436 if (m_buddy->order != order) 437 break; 438 fl = (*seg->free_queues)[m_buddy->pool]; 439 TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq); 440 fl[m_buddy->order].lcnt--; 441 m_buddy->order = VM_NFREEORDER; 442 if (m_buddy->pool != m->pool) 443 vm_phys_set_pool(m->pool, m_buddy, order); 444 order++; 445 pa &= ~((1 << (PAGE_SHIFT + order)) - 1); 446 m = &seg->first_page[atop(pa - seg->start)]; 447 } 448 m->order = order; 449 fl = (*seg->free_queues)[m->pool]; 450 TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq); 451 fl[order].lcnt++; 452 } 453 454 /* 455 * Set the pool for a contiguous, power of two-sized set of physical pages. 456 */ 457 void 458 vm_phys_set_pool(int pool, vm_page_t m, int order) 459 { 460 vm_page_t m_tmp; 461 462 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 463 m_tmp->pool = pool; 464 } 465 466 /* 467 * Search for the given physical page "m" in the free lists. If the search 468 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 469 * FALSE, indicating that "m" is not in the free lists. 470 * 471 * The free page queues must be locked. 472 */ 473 boolean_t 474 vm_phys_unfree_page(vm_page_t m) 475 { 476 struct vm_freelist *fl; 477 struct vm_phys_seg *seg; 478 vm_paddr_t pa, pa_half; 479 vm_page_t m_set, m_tmp; 480 int order; 481 482 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 483 484 /* 485 * First, find the contiguous, power of two-sized set of free 486 * physical pages containing the given physical page "m" and 487 * assign it to "m_set". 488 */ 489 seg = &vm_phys_segs[m->segind]; 490 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 491 order < VM_NFREEORDER - 1; ) { 492 order++; 493 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 494 if (pa >= seg->start) 495 m_set = &seg->first_page[atop(pa - seg->start)]; 496 else 497 return (FALSE); 498 } 499 if (m_set->order < order) 500 return (FALSE); 501 if (m_set->order == VM_NFREEORDER) 502 return (FALSE); 503 KASSERT(m_set->order < VM_NFREEORDER, 504 ("vm_phys_unfree_page: page %p has unexpected order %d", 505 m_set, m_set->order)); 506 507 /* 508 * Next, remove "m_set" from the free lists. Finally, extract 509 * "m" from "m_set" using an iterative algorithm: While "m_set" 510 * is larger than a page, shrink "m_set" by returning the half 511 * of "m_set" that does not contain "m" to the free lists. 512 */ 513 fl = (*seg->free_queues)[m_set->pool]; 514 order = m_set->order; 515 TAILQ_REMOVE(&fl[order].pl, m_set, pageq); 516 fl[order].lcnt--; 517 m_set->order = VM_NFREEORDER; 518 while (order > 0) { 519 order--; 520 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 521 if (m->phys_addr < pa_half) 522 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 523 else { 524 m_tmp = m_set; 525 m_set = &seg->first_page[atop(pa_half - seg->start)]; 526 } 527 m_tmp->order = order; 528 TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq); 529 fl[order].lcnt++; 530 } 531 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 532 return (TRUE); 533 } 534 535 /* 536 * Try to zero one physical page. Used by an idle priority thread. 537 */ 538 boolean_t 539 vm_phys_zero_pages_idle(void) 540 { 541 static struct vm_freelist *fl = vm_phys_free_queues[0][0]; 542 static int flind, oind, pind; 543 vm_page_t m, m_tmp; 544 545 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 546 for (;;) { 547 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) { 548 for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 549 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 550 vm_phys_unfree_page(m_tmp); 551 cnt.v_free_count--; 552 mtx_unlock(&vm_page_queue_free_mtx); 553 pmap_zero_page_idle(m_tmp); 554 m_tmp->flags |= PG_ZERO; 555 mtx_lock(&vm_page_queue_free_mtx); 556 cnt.v_free_count++; 557 vm_phys_free_pages(m_tmp, 0); 558 vm_page_zero_count++; 559 cnt_prezero++; 560 return (TRUE); 561 } 562 } 563 } 564 oind++; 565 if (oind == VM_NFREEORDER) { 566 oind = 0; 567 pind++; 568 if (pind == VM_NFREEPOOL) { 569 pind = 0; 570 flind++; 571 if (flind == vm_nfreelists) 572 flind = 0; 573 } 574 fl = vm_phys_free_queues[flind][pind]; 575 } 576 } 577 } 578 579 /* 580 * Allocate a contiguous set of physical pages of the given size 581 * "npages" from the free lists. All of the physical pages must be at 582 * or above the given physical address "low" and below the given 583 * physical address "high". The given value "alignment" determines the 584 * alignment of the first physical page in the set. If the given value 585 * "boundary" is non-zero, then the set of physical pages cannot cross 586 * any physical address boundary that is a multiple of that value. Both 587 * "alignment" and "boundary" must be a power of two. 588 */ 589 vm_page_t 590 vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high, 591 unsigned long alignment, unsigned long boundary) 592 { 593 struct vm_freelist *fl; 594 struct vm_phys_seg *seg; 595 vm_object_t m_object; 596 vm_paddr_t pa, pa_last, size; 597 vm_page_t deferred_vdrop_list, m, m_ret; 598 int flind, i, oind, order, pind; 599 600 size = npages << PAGE_SHIFT; 601 KASSERT(size != 0, 602 ("vm_phys_alloc_contig: size must not be 0")); 603 KASSERT((alignment & (alignment - 1)) == 0, 604 ("vm_phys_alloc_contig: alignment must be a power of 2")); 605 KASSERT((boundary & (boundary - 1)) == 0, 606 ("vm_phys_alloc_contig: boundary must be a power of 2")); 607 deferred_vdrop_list = NULL; 608 /* Compute the queue that is the best fit for npages. */ 609 for (order = 0; (1 << order) < npages; order++); 610 mtx_lock(&vm_page_queue_free_mtx); 611 #if VM_NRESERVLEVEL > 0 612 retry: 613 #endif 614 for (flind = 0; flind < vm_nfreelists; flind++) { 615 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { 616 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 617 fl = vm_phys_free_queues[flind][pind]; 618 TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) { 619 /* 620 * A free list may contain physical pages 621 * from one or more segments. 622 */ 623 seg = &vm_phys_segs[m_ret->segind]; 624 if (seg->start > high || 625 low >= seg->end) 626 continue; 627 628 /* 629 * Is the size of this allocation request 630 * larger than the largest block size? 631 */ 632 if (order >= VM_NFREEORDER) { 633 /* 634 * Determine if a sufficient number 635 * of subsequent blocks to satisfy 636 * the allocation request are free. 637 */ 638 pa = VM_PAGE_TO_PHYS(m_ret); 639 pa_last = pa + size; 640 for (;;) { 641 pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); 642 if (pa >= pa_last) 643 break; 644 if (pa < seg->start || 645 pa >= seg->end) 646 break; 647 m = &seg->first_page[atop(pa - seg->start)]; 648 if (m->order != VM_NFREEORDER - 1) 649 break; 650 } 651 /* If not, continue to the next block. */ 652 if (pa < pa_last) 653 continue; 654 } 655 656 /* 657 * Determine if the blocks are within the given range, 658 * satisfy the given alignment, and do not cross the 659 * given boundary. 660 */ 661 pa = VM_PAGE_TO_PHYS(m_ret); 662 if (pa >= low && 663 pa + size <= high && 664 (pa & (alignment - 1)) == 0 && 665 ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) 666 goto done; 667 } 668 } 669 } 670 } 671 #if VM_NRESERVLEVEL > 0 672 if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary)) 673 goto retry; 674 #endif 675 mtx_unlock(&vm_page_queue_free_mtx); 676 return (NULL); 677 done: 678 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 679 fl = (*seg->free_queues)[m->pool]; 680 TAILQ_REMOVE(&fl[m->order].pl, m, pageq); 681 fl[m->order].lcnt--; 682 m->order = VM_NFREEORDER; 683 } 684 if (m_ret->pool != VM_FREEPOOL_DEFAULT) 685 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 686 fl = (*seg->free_queues)[m_ret->pool]; 687 vm_phys_split_pages(m_ret, oind, fl, order); 688 for (i = 0; i < npages; i++) { 689 m = &m_ret[i]; 690 KASSERT(m->queue == PQ_NONE, 691 ("vm_phys_alloc_contig: page %p has unexpected queue %d", 692 m, m->queue)); 693 KASSERT(m->wire_count == 0, 694 ("vm_phys_alloc_contig: page %p is wired", m)); 695 KASSERT(m->hold_count == 0, 696 ("vm_phys_alloc_contig: page %p is held", m)); 697 KASSERT(m->busy == 0, 698 ("vm_phys_alloc_contig: page %p is busy", m)); 699 KASSERT(m->dirty == 0, 700 ("vm_phys_alloc_contig: page %p is dirty", m)); 701 KASSERT(pmap_page_get_memattr(m) == VM_MEMATTR_DEFAULT, 702 ("vm_phys_alloc_contig: page %p has unexpected memattr %d", 703 m, pmap_page_get_memattr(m))); 704 if ((m->flags & PG_CACHED) != 0) { 705 m->valid = 0; 706 m_object = m->object; 707 vm_page_cache_remove(m); 708 if (m_object->type == OBJT_VNODE && 709 m_object->cache == NULL) { 710 /* 711 * Enqueue the vnode for deferred vdrop(). 712 * 713 * Unmanaged pages don't use "pageq", so it 714 * can be safely abused to construct a short- 715 * lived queue of vnodes. 716 */ 717 m->pageq.tqe_prev = m_object->handle; 718 m->pageq.tqe_next = deferred_vdrop_list; 719 deferred_vdrop_list = m; 720 } 721 } else { 722 KASSERT(VM_PAGE_IS_FREE(m), 723 ("vm_phys_alloc_contig: page %p is not free", m)); 724 KASSERT(m->valid == 0, 725 ("vm_phys_alloc_contig: free page %p is valid", m)); 726 cnt.v_free_count--; 727 } 728 if (m->flags & PG_ZERO) 729 vm_page_zero_count--; 730 /* Don't clear the PG_ZERO flag; we'll need it later. */ 731 m->flags = PG_UNMANAGED | (m->flags & PG_ZERO); 732 m->oflags = 0; 733 /* Unmanaged pages don't use "act_count". */ 734 } 735 for (; i < roundup2(npages, 1 << imin(oind, order)); i++) { 736 m = &m_ret[i]; 737 KASSERT(m->order == VM_NFREEORDER, 738 ("vm_phys_alloc_contig: page %p has unexpected order %d", 739 m, m->order)); 740 vm_phys_free_pages(m, 0); 741 } 742 mtx_unlock(&vm_page_queue_free_mtx); 743 while (deferred_vdrop_list != NULL) { 744 vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); 745 deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next; 746 } 747 return (m_ret); 748 } 749 750 #ifdef DDB 751 /* 752 * Show the number of physical pages in each of the free lists. 753 */ 754 DB_SHOW_COMMAND(freepages, db_show_freepages) 755 { 756 struct vm_freelist *fl; 757 int flind, oind, pind; 758 759 for (flind = 0; flind < vm_nfreelists; flind++) { 760 db_printf("FREE LIST %d:\n" 761 "\n ORDER (SIZE) | NUMBER" 762 "\n ", flind); 763 for (pind = 0; pind < VM_NFREEPOOL; pind++) 764 db_printf(" | POOL %d", pind); 765 db_printf("\n-- "); 766 for (pind = 0; pind < VM_NFREEPOOL; pind++) 767 db_printf("-- -- "); 768 db_printf("--\n"); 769 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 770 db_printf(" %2.2d (%6.6dK)", oind, 771 1 << (PAGE_SHIFT - 10 + oind)); 772 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 773 fl = vm_phys_free_queues[flind][pind]; 774 db_printf(" | %6.6d", fl[oind].lcnt); 775 } 776 db_printf("\n"); 777 } 778 db_printf("\n"); 779 } 780 } 781 #endif 782