1 /*- 2 * Copyright (c) 2002-2006 Rice University 3 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Alan L. Cox, 7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_ddb.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/lock.h> 40 #include <sys/kernel.h> 41 #include <sys/malloc.h> 42 #include <sys/mutex.h> 43 #include <sys/queue.h> 44 #include <sys/sbuf.h> 45 #include <sys/sysctl.h> 46 #include <sys/vmmeter.h> 47 #include <sys/vnode.h> 48 49 #include <ddb/ddb.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_param.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_phys.h> 57 #include <vm/vm_reserv.h> 58 59 struct vm_freelist { 60 struct pglist pl; 61 int lcnt; 62 }; 63 64 struct vm_phys_seg { 65 vm_paddr_t start; 66 vm_paddr_t end; 67 vm_page_t first_page; 68 struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER]; 69 }; 70 71 static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 72 73 static int vm_phys_nsegs; 74 75 static struct vm_freelist 76 vm_phys_free_queues[VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 77 78 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1; 79 80 static int cnt_prezero; 81 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 82 &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 83 84 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 85 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 86 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 87 88 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 89 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 90 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 91 92 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); 93 static int vm_phys_paddr_to_segind(vm_paddr_t pa); 94 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 95 int order); 96 97 /* 98 * Outputs the state of the physical memory allocator, specifically, 99 * the amount of physical memory in each free list. 100 */ 101 static int 102 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 103 { 104 struct sbuf sbuf; 105 struct vm_freelist *fl; 106 char *cbuf; 107 const int cbufsize = vm_nfreelists*(VM_NFREEORDER + 1)*81; 108 int error, flind, oind, pind; 109 110 cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO); 111 sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN); 112 for (flind = 0; flind < vm_nfreelists; flind++) { 113 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 114 "\n ORDER (SIZE) | NUMBER" 115 "\n ", flind); 116 for (pind = 0; pind < VM_NFREEPOOL; pind++) 117 sbuf_printf(&sbuf, " | POOL %d", pind); 118 sbuf_printf(&sbuf, "\n-- "); 119 for (pind = 0; pind < VM_NFREEPOOL; pind++) 120 sbuf_printf(&sbuf, "-- -- "); 121 sbuf_printf(&sbuf, "--\n"); 122 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 123 sbuf_printf(&sbuf, " %2.2d (%6.6dK)", oind, 124 1 << (PAGE_SHIFT - 10 + oind)); 125 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 126 fl = vm_phys_free_queues[flind][pind]; 127 sbuf_printf(&sbuf, " | %6.6d", fl[oind].lcnt); 128 } 129 sbuf_printf(&sbuf, "\n"); 130 } 131 } 132 sbuf_finish(&sbuf); 133 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 134 sbuf_delete(&sbuf); 135 free(cbuf, M_TEMP); 136 return (error); 137 } 138 139 /* 140 * Outputs the set of physical memory segments. 141 */ 142 static int 143 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 144 { 145 struct sbuf sbuf; 146 struct vm_phys_seg *seg; 147 char *cbuf; 148 const int cbufsize = VM_PHYSSEG_MAX*(VM_NFREEORDER + 1)*81; 149 int error, segind; 150 151 cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO); 152 sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN); 153 for (segind = 0; segind < vm_phys_nsegs; segind++) { 154 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 155 seg = &vm_phys_segs[segind]; 156 sbuf_printf(&sbuf, "start: %#jx\n", 157 (uintmax_t)seg->start); 158 sbuf_printf(&sbuf, "end: %#jx\n", 159 (uintmax_t)seg->end); 160 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 161 } 162 sbuf_finish(&sbuf); 163 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 164 sbuf_delete(&sbuf); 165 free(cbuf, M_TEMP); 166 return (error); 167 } 168 169 /* 170 * Create a physical memory segment. 171 */ 172 static void 173 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind) 174 { 175 struct vm_phys_seg *seg; 176 #ifdef VM_PHYSSEG_SPARSE 177 long pages; 178 int segind; 179 180 pages = 0; 181 for (segind = 0; segind < vm_phys_nsegs; segind++) { 182 seg = &vm_phys_segs[segind]; 183 pages += atop(seg->end - seg->start); 184 } 185 #endif 186 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 187 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 188 seg = &vm_phys_segs[vm_phys_nsegs++]; 189 seg->start = start; 190 seg->end = end; 191 #ifdef VM_PHYSSEG_SPARSE 192 seg->first_page = &vm_page_array[pages]; 193 #else 194 seg->first_page = PHYS_TO_VM_PAGE(start); 195 #endif 196 seg->free_queues = &vm_phys_free_queues[flind]; 197 } 198 199 /* 200 * Initialize the physical memory allocator. 201 */ 202 void 203 vm_phys_init(void) 204 { 205 struct vm_freelist *fl; 206 int flind, i, oind, pind; 207 208 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 209 #ifdef VM_FREELIST_ISADMA 210 if (phys_avail[i] < 16777216) { 211 if (phys_avail[i + 1] > 16777216) { 212 vm_phys_create_seg(phys_avail[i], 16777216, 213 VM_FREELIST_ISADMA); 214 vm_phys_create_seg(16777216, phys_avail[i + 1], 215 VM_FREELIST_DEFAULT); 216 } else { 217 vm_phys_create_seg(phys_avail[i], 218 phys_avail[i + 1], VM_FREELIST_ISADMA); 219 } 220 if (VM_FREELIST_ISADMA >= vm_nfreelists) 221 vm_nfreelists = VM_FREELIST_ISADMA + 1; 222 } else 223 #endif 224 #ifdef VM_FREELIST_HIGHMEM 225 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) { 226 if (phys_avail[i] < VM_HIGHMEM_ADDRESS) { 227 vm_phys_create_seg(phys_avail[i], 228 VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT); 229 vm_phys_create_seg(VM_HIGHMEM_ADDRESS, 230 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 231 } else { 232 vm_phys_create_seg(phys_avail[i], 233 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 234 } 235 if (VM_FREELIST_HIGHMEM >= vm_nfreelists) 236 vm_nfreelists = VM_FREELIST_HIGHMEM + 1; 237 } else 238 #endif 239 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1], 240 VM_FREELIST_DEFAULT); 241 } 242 for (flind = 0; flind < vm_nfreelists; flind++) { 243 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 244 fl = vm_phys_free_queues[flind][pind]; 245 for (oind = 0; oind < VM_NFREEORDER; oind++) 246 TAILQ_INIT(&fl[oind].pl); 247 } 248 } 249 } 250 251 /* 252 * Split a contiguous, power of two-sized set of physical pages. 253 */ 254 static __inline void 255 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 256 { 257 vm_page_t m_buddy; 258 259 while (oind > order) { 260 oind--; 261 m_buddy = &m[1 << oind]; 262 KASSERT(m_buddy->order == VM_NFREEORDER, 263 ("vm_phys_split_pages: page %p has unexpected order %d", 264 m_buddy, m_buddy->order)); 265 m_buddy->order = oind; 266 TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq); 267 fl[oind].lcnt++; 268 } 269 } 270 271 /* 272 * Initialize a physical page and add it to the free lists. 273 */ 274 void 275 vm_phys_add_page(vm_paddr_t pa) 276 { 277 vm_page_t m; 278 279 cnt.v_page_count++; 280 m = vm_phys_paddr_to_vm_page(pa); 281 m->phys_addr = pa; 282 m->segind = vm_phys_paddr_to_segind(pa); 283 m->flags = PG_FREE; 284 KASSERT(m->order == VM_NFREEORDER, 285 ("vm_phys_add_page: page %p has unexpected order %d", 286 m, m->order)); 287 m->pool = VM_FREEPOOL_DEFAULT; 288 pmap_page_init(m); 289 mtx_lock(&vm_page_queue_free_mtx); 290 cnt.v_free_count++; 291 vm_phys_free_pages(m, 0); 292 mtx_unlock(&vm_page_queue_free_mtx); 293 } 294 295 /* 296 * Allocate a contiguous, power of two-sized set of physical pages 297 * from the free lists. 298 * 299 * The free page queues must be locked. 300 */ 301 vm_page_t 302 vm_phys_alloc_pages(int pool, int order) 303 { 304 struct vm_freelist *fl; 305 struct vm_freelist *alt; 306 int flind, oind, pind; 307 vm_page_t m; 308 309 KASSERT(pool < VM_NFREEPOOL, 310 ("vm_phys_alloc_pages: pool %d is out of range", pool)); 311 KASSERT(order < VM_NFREEORDER, 312 ("vm_phys_alloc_pages: order %d is out of range", order)); 313 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 314 for (flind = 0; flind < vm_nfreelists; flind++) { 315 fl = vm_phys_free_queues[flind][pool]; 316 for (oind = order; oind < VM_NFREEORDER; oind++) { 317 m = TAILQ_FIRST(&fl[oind].pl); 318 if (m != NULL) { 319 TAILQ_REMOVE(&fl[oind].pl, m, pageq); 320 fl[oind].lcnt--; 321 m->order = VM_NFREEORDER; 322 vm_phys_split_pages(m, oind, fl, order); 323 return (m); 324 } 325 } 326 327 /* 328 * The given pool was empty. Find the largest 329 * contiguous, power-of-two-sized set of pages in any 330 * pool. Transfer these pages to the given pool, and 331 * use them to satisfy the allocation. 332 */ 333 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 334 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 335 alt = vm_phys_free_queues[flind][pind]; 336 m = TAILQ_FIRST(&alt[oind].pl); 337 if (m != NULL) { 338 TAILQ_REMOVE(&alt[oind].pl, m, pageq); 339 alt[oind].lcnt--; 340 m->order = VM_NFREEORDER; 341 vm_phys_set_pool(pool, m, oind); 342 vm_phys_split_pages(m, oind, fl, order); 343 return (m); 344 } 345 } 346 } 347 } 348 return (NULL); 349 } 350 351 /* 352 * Allocate physical memory from phys_avail[]. 353 */ 354 vm_paddr_t 355 vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment) 356 { 357 vm_paddr_t pa; 358 int i; 359 360 size = round_page(size); 361 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 362 if (phys_avail[i + 1] - phys_avail[i] < size) 363 continue; 364 pa = phys_avail[i]; 365 phys_avail[i] += size; 366 return (pa); 367 } 368 panic("vm_phys_bootstrap_alloc"); 369 } 370 371 /* 372 * Find the vm_page corresponding to the given physical address. 373 */ 374 vm_page_t 375 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 376 { 377 struct vm_phys_seg *seg; 378 int segind; 379 380 for (segind = 0; segind < vm_phys_nsegs; segind++) { 381 seg = &vm_phys_segs[segind]; 382 if (pa >= seg->start && pa < seg->end) 383 return (&seg->first_page[atop(pa - seg->start)]); 384 } 385 panic("vm_phys_paddr_to_vm_page: paddr %#jx is not in any segment", 386 (uintmax_t)pa); 387 } 388 389 /* 390 * Find the segment containing the given physical address. 391 */ 392 static int 393 vm_phys_paddr_to_segind(vm_paddr_t pa) 394 { 395 struct vm_phys_seg *seg; 396 int segind; 397 398 for (segind = 0; segind < vm_phys_nsegs; segind++) { 399 seg = &vm_phys_segs[segind]; 400 if (pa >= seg->start && pa < seg->end) 401 return (segind); 402 } 403 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 404 (uintmax_t)pa); 405 } 406 407 /* 408 * Free a contiguous, power of two-sized set of physical pages. 409 * 410 * The free page queues must be locked. 411 */ 412 void 413 vm_phys_free_pages(vm_page_t m, int order) 414 { 415 struct vm_freelist *fl; 416 struct vm_phys_seg *seg; 417 vm_paddr_t pa, pa_buddy; 418 vm_page_t m_buddy; 419 420 KASSERT(m->order == VM_NFREEORDER, 421 ("vm_phys_free_pages: page %p has unexpected order %d", 422 m, m->order)); 423 KASSERT(m->pool < VM_NFREEPOOL, 424 ("vm_phys_free_pages: page %p has unexpected pool %d", 425 m, m->pool)); 426 KASSERT(order < VM_NFREEORDER, 427 ("vm_phys_free_pages: order %d is out of range", order)); 428 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 429 pa = VM_PAGE_TO_PHYS(m); 430 seg = &vm_phys_segs[m->segind]; 431 while (order < VM_NFREEORDER - 1) { 432 pa_buddy = pa ^ (1 << (PAGE_SHIFT + order)); 433 if (pa_buddy < seg->start || 434 pa_buddy >= seg->end) 435 break; 436 m_buddy = &seg->first_page[atop(pa_buddy - seg->start)]; 437 if (m_buddy->order != order) 438 break; 439 fl = (*seg->free_queues)[m_buddy->pool]; 440 TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq); 441 fl[m_buddy->order].lcnt--; 442 m_buddy->order = VM_NFREEORDER; 443 if (m_buddy->pool != m->pool) 444 vm_phys_set_pool(m->pool, m_buddy, order); 445 order++; 446 pa &= ~((1 << (PAGE_SHIFT + order)) - 1); 447 m = &seg->first_page[atop(pa - seg->start)]; 448 } 449 m->order = order; 450 fl = (*seg->free_queues)[m->pool]; 451 TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq); 452 fl[order].lcnt++; 453 } 454 455 /* 456 * Set the pool for a contiguous, power of two-sized set of physical pages. 457 */ 458 void 459 vm_phys_set_pool(int pool, vm_page_t m, int order) 460 { 461 vm_page_t m_tmp; 462 463 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 464 m_tmp->pool = pool; 465 } 466 467 /* 468 * Search for the given physical page "m" in the free lists. If the search 469 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 470 * FALSE, indicating that "m" is not in the free lists. 471 * 472 * The free page queues must be locked. 473 */ 474 boolean_t 475 vm_phys_unfree_page(vm_page_t m) 476 { 477 struct vm_freelist *fl; 478 struct vm_phys_seg *seg; 479 vm_paddr_t pa, pa_half; 480 vm_page_t m_set, m_tmp; 481 int order; 482 483 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 484 485 /* 486 * First, find the contiguous, power of two-sized set of free 487 * physical pages containing the given physical page "m" and 488 * assign it to "m_set". 489 */ 490 seg = &vm_phys_segs[m->segind]; 491 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 492 order < VM_NFREEORDER - 1; ) { 493 order++; 494 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 495 if (pa >= seg->start) 496 m_set = &seg->first_page[atop(pa - seg->start)]; 497 else 498 return (FALSE); 499 } 500 if (m_set->order < order) 501 return (FALSE); 502 if (m_set->order == VM_NFREEORDER) 503 return (FALSE); 504 KASSERT(m_set->order < VM_NFREEORDER, 505 ("vm_phys_unfree_page: page %p has unexpected order %d", 506 m_set, m_set->order)); 507 508 /* 509 * Next, remove "m_set" from the free lists. Finally, extract 510 * "m" from "m_set" using an iterative algorithm: While "m_set" 511 * is larger than a page, shrink "m_set" by returning the half 512 * of "m_set" that does not contain "m" to the free lists. 513 */ 514 fl = (*seg->free_queues)[m_set->pool]; 515 order = m_set->order; 516 TAILQ_REMOVE(&fl[order].pl, m_set, pageq); 517 fl[order].lcnt--; 518 m_set->order = VM_NFREEORDER; 519 while (order > 0) { 520 order--; 521 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 522 if (m->phys_addr < pa_half) 523 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 524 else { 525 m_tmp = m_set; 526 m_set = &seg->first_page[atop(pa_half - seg->start)]; 527 } 528 m_tmp->order = order; 529 TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq); 530 fl[order].lcnt++; 531 } 532 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 533 return (TRUE); 534 } 535 536 /* 537 * Try to zero one physical page. Used by an idle priority thread. 538 */ 539 boolean_t 540 vm_phys_zero_pages_idle(void) 541 { 542 static struct vm_freelist *fl = vm_phys_free_queues[0][0]; 543 static int flind, oind, pind; 544 vm_page_t m, m_tmp; 545 546 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 547 for (;;) { 548 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) { 549 for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 550 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 551 vm_phys_unfree_page(m_tmp); 552 cnt.v_free_count--; 553 mtx_unlock(&vm_page_queue_free_mtx); 554 pmap_zero_page_idle(m_tmp); 555 m_tmp->flags |= PG_ZERO; 556 mtx_lock(&vm_page_queue_free_mtx); 557 cnt.v_free_count++; 558 vm_phys_free_pages(m_tmp, 0); 559 vm_page_zero_count++; 560 cnt_prezero++; 561 return (TRUE); 562 } 563 } 564 } 565 oind++; 566 if (oind == VM_NFREEORDER) { 567 oind = 0; 568 pind++; 569 if (pind == VM_NFREEPOOL) { 570 pind = 0; 571 flind++; 572 if (flind == vm_nfreelists) 573 flind = 0; 574 } 575 fl = vm_phys_free_queues[flind][pind]; 576 } 577 } 578 } 579 580 /* 581 * Allocate a contiguous set of physical pages of the given size 582 * "npages" from the free lists. All of the physical pages must be at 583 * or above the given physical address "low" and below the given 584 * physical address "high". The given value "alignment" determines the 585 * alignment of the first physical page in the set. If the given value 586 * "boundary" is non-zero, then the set of physical pages cannot cross 587 * any physical address boundary that is a multiple of that value. Both 588 * "alignment" and "boundary" must be a power of two. 589 */ 590 vm_page_t 591 vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high, 592 unsigned long alignment, unsigned long boundary) 593 { 594 struct vm_freelist *fl; 595 struct vm_phys_seg *seg; 596 vm_object_t m_object; 597 vm_paddr_t pa, pa_last, size; 598 vm_page_t m, m_ret; 599 int flind, i, oind, order, pind; 600 601 size = npages << PAGE_SHIFT; 602 KASSERT(size != 0, 603 ("vm_phys_alloc_contig: size must not be 0")); 604 KASSERT((alignment & (alignment - 1)) == 0, 605 ("vm_phys_alloc_contig: alignment must be a power of 2")); 606 KASSERT((boundary & (boundary - 1)) == 0, 607 ("vm_phys_alloc_contig: boundary must be a power of 2")); 608 /* Compute the queue that is the best fit for npages. */ 609 for (order = 0; (1 << order) < npages; order++); 610 mtx_lock(&vm_page_queue_free_mtx); 611 #if VM_NRESERVLEVEL > 0 612 retry: 613 #endif 614 for (flind = 0; flind < vm_nfreelists; flind++) { 615 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { 616 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 617 fl = vm_phys_free_queues[flind][pind]; 618 TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) { 619 /* 620 * A free list may contain physical pages 621 * from one or more segments. 622 */ 623 seg = &vm_phys_segs[m_ret->segind]; 624 if (seg->start > high || 625 low >= seg->end) 626 continue; 627 628 /* 629 * Is the size of this allocation request 630 * larger than the largest block size? 631 */ 632 if (order >= VM_NFREEORDER) { 633 /* 634 * Determine if a sufficient number 635 * of subsequent blocks to satisfy 636 * the allocation request are free. 637 */ 638 pa = VM_PAGE_TO_PHYS(m_ret); 639 pa_last = pa + size; 640 for (;;) { 641 pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); 642 if (pa >= pa_last) 643 break; 644 if (pa < seg->start || 645 pa >= seg->end) 646 break; 647 m = &seg->first_page[atop(pa - seg->start)]; 648 if (m->order != VM_NFREEORDER - 1) 649 break; 650 } 651 /* If not, continue to the next block. */ 652 if (pa < pa_last) 653 continue; 654 } 655 656 /* 657 * Determine if the blocks are within the given range, 658 * satisfy the given alignment, and do not cross the 659 * given boundary. 660 */ 661 pa = VM_PAGE_TO_PHYS(m_ret); 662 if (pa >= low && 663 pa + size <= high && 664 (pa & (alignment - 1)) == 0 && 665 ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) 666 goto done; 667 } 668 } 669 } 670 } 671 #if VM_NRESERVLEVEL > 0 672 if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary)) 673 goto retry; 674 #endif 675 mtx_unlock(&vm_page_queue_free_mtx); 676 return (NULL); 677 done: 678 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 679 fl = (*seg->free_queues)[m->pool]; 680 TAILQ_REMOVE(&fl[m->order].pl, m, pageq); 681 fl[m->order].lcnt--; 682 m->order = VM_NFREEORDER; 683 } 684 if (m_ret->pool != VM_FREEPOOL_DEFAULT) 685 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 686 fl = (*seg->free_queues)[m_ret->pool]; 687 vm_phys_split_pages(m_ret, oind, fl, order); 688 for (i = 0; i < npages; i++) { 689 m = &m_ret[i]; 690 KASSERT(m->queue == PQ_NONE, 691 ("vm_phys_alloc_contig: page %p has unexpected queue %d", 692 m, m->queue)); 693 m_object = m->object; 694 if ((m->flags & PG_CACHED) != 0) 695 vm_page_cache_remove(m); 696 else { 697 KASSERT(VM_PAGE_IS_FREE(m), 698 ("vm_phys_alloc_contig: page %p is not free", m)); 699 cnt.v_free_count--; 700 } 701 m->valid = VM_PAGE_BITS_ALL; 702 if (m->flags & PG_ZERO) 703 vm_page_zero_count--; 704 /* Don't clear the PG_ZERO flag; we'll need it later. */ 705 m->flags = PG_UNMANAGED | (m->flags & PG_ZERO); 706 m->oflags = 0; 707 KASSERT(m->dirty == 0, 708 ("vm_phys_alloc_contig: page %p was dirty", m)); 709 m->wire_count = 0; 710 m->busy = 0; 711 if (m_object != NULL && 712 m_object->type == OBJT_VNODE && 713 m_object->cache == NULL) { 714 mtx_unlock(&vm_page_queue_free_mtx); 715 vdrop(m_object->handle); 716 mtx_lock(&vm_page_queue_free_mtx); 717 } 718 } 719 for (; i < roundup2(npages, 1 << imin(oind, order)); i++) { 720 m = &m_ret[i]; 721 KASSERT(m->order == VM_NFREEORDER, 722 ("vm_phys_alloc_contig: page %p has unexpected order %d", 723 m, m->order)); 724 vm_phys_free_pages(m, 0); 725 } 726 mtx_unlock(&vm_page_queue_free_mtx); 727 return (m_ret); 728 } 729 730 #ifdef DDB 731 /* 732 * Show the number of physical pages in each of the free lists. 733 */ 734 DB_SHOW_COMMAND(freepages, db_show_freepages) 735 { 736 struct vm_freelist *fl; 737 int flind, oind, pind; 738 739 for (flind = 0; flind < vm_nfreelists; flind++) { 740 db_printf("FREE LIST %d:\n" 741 "\n ORDER (SIZE) | NUMBER" 742 "\n ", flind); 743 for (pind = 0; pind < VM_NFREEPOOL; pind++) 744 db_printf(" | POOL %d", pind); 745 db_printf("\n-- "); 746 for (pind = 0; pind < VM_NFREEPOOL; pind++) 747 db_printf("-- -- "); 748 db_printf("--\n"); 749 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 750 db_printf(" %2.2d (%6.6dK)", oind, 751 1 << (PAGE_SHIFT - 10 + oind)); 752 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 753 fl = vm_phys_free_queues[flind][pind]; 754 db_printf(" | %6.6d", fl[oind].lcnt); 755 } 756 db_printf("\n"); 757 } 758 db_printf("\n"); 759 } 760 } 761 #endif 762