1 /*- 2 * Copyright (c) 2002-2006 Rice University 3 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Alan L. Cox, 7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_ddb.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/lock.h> 40 #include <sys/kernel.h> 41 #include <sys/malloc.h> 42 #include <sys/mutex.h> 43 #include <sys/queue.h> 44 #include <sys/sbuf.h> 45 #include <sys/sysctl.h> 46 #include <sys/vmmeter.h> 47 #include <sys/vnode.h> 48 49 #include <ddb/ddb.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_param.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_phys.h> 57 58 struct vm_freelist { 59 struct pglist pl; 60 int lcnt; 61 }; 62 63 struct vm_phys_seg { 64 vm_paddr_t start; 65 vm_paddr_t end; 66 vm_page_t first_page; 67 struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER]; 68 }; 69 70 static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 71 72 static int vm_phys_nsegs; 73 74 static struct vm_freelist 75 vm_phys_free_queues[VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 76 77 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1; 78 79 static int cnt_prezero; 80 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 81 &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 82 83 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 84 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 85 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 86 87 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 88 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 89 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 90 91 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); 92 static int vm_phys_paddr_to_segind(vm_paddr_t pa); 93 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 94 int order); 95 96 /* 97 * Outputs the state of the physical memory allocator, specifically, 98 * the amount of physical memory in each free list. 99 */ 100 static int 101 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 102 { 103 struct sbuf sbuf; 104 struct vm_freelist *fl; 105 char *cbuf; 106 const int cbufsize = vm_nfreelists*(VM_NFREEORDER + 1)*81; 107 int error, flind, oind, pind; 108 109 cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO); 110 sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN); 111 for (flind = 0; flind < vm_nfreelists; flind++) { 112 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 113 "\n ORDER (SIZE) | NUMBER" 114 "\n ", flind); 115 for (pind = 0; pind < VM_NFREEPOOL; pind++) 116 sbuf_printf(&sbuf, " | POOL %d", pind); 117 sbuf_printf(&sbuf, "\n-- "); 118 for (pind = 0; pind < VM_NFREEPOOL; pind++) 119 sbuf_printf(&sbuf, "-- -- "); 120 sbuf_printf(&sbuf, "--\n"); 121 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 122 sbuf_printf(&sbuf, " %2.2d (%6.6dK)", oind, 123 1 << (PAGE_SHIFT - 10 + oind)); 124 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 125 fl = vm_phys_free_queues[flind][pind]; 126 sbuf_printf(&sbuf, " | %6.6d", fl[oind].lcnt); 127 } 128 sbuf_printf(&sbuf, "\n"); 129 } 130 } 131 sbuf_finish(&sbuf); 132 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 133 sbuf_delete(&sbuf); 134 free(cbuf, M_TEMP); 135 return (error); 136 } 137 138 /* 139 * Outputs the set of physical memory segments. 140 */ 141 static int 142 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 143 { 144 struct sbuf sbuf; 145 struct vm_phys_seg *seg; 146 char *cbuf; 147 const int cbufsize = VM_PHYSSEG_MAX*(VM_NFREEORDER + 1)*81; 148 int error, segind; 149 150 cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO); 151 sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN); 152 for (segind = 0; segind < vm_phys_nsegs; segind++) { 153 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 154 seg = &vm_phys_segs[segind]; 155 sbuf_printf(&sbuf, "start: %#jx\n", 156 (uintmax_t)seg->start); 157 sbuf_printf(&sbuf, "end: %#jx\n", 158 (uintmax_t)seg->end); 159 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 160 } 161 sbuf_finish(&sbuf); 162 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 163 sbuf_delete(&sbuf); 164 free(cbuf, M_TEMP); 165 return (error); 166 } 167 168 /* 169 * Create a physical memory segment. 170 */ 171 static void 172 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind) 173 { 174 struct vm_phys_seg *seg; 175 #ifdef VM_PHYSSEG_SPARSE 176 long pages; 177 int segind; 178 179 pages = 0; 180 for (segind = 0; segind < vm_phys_nsegs; segind++) { 181 seg = &vm_phys_segs[segind]; 182 pages += atop(seg->end - seg->start); 183 } 184 #endif 185 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 186 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 187 seg = &vm_phys_segs[vm_phys_nsegs++]; 188 seg->start = start; 189 seg->end = end; 190 #ifdef VM_PHYSSEG_SPARSE 191 seg->first_page = &vm_page_array[pages]; 192 #else 193 seg->first_page = PHYS_TO_VM_PAGE(start); 194 #endif 195 seg->free_queues = &vm_phys_free_queues[flind]; 196 } 197 198 /* 199 * Initialize the physical memory allocator. 200 */ 201 void 202 vm_phys_init(void) 203 { 204 struct vm_freelist *fl; 205 int flind, i, oind, pind; 206 207 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 208 #ifdef VM_FREELIST_ISADMA 209 if (phys_avail[i] < 16777216) { 210 if (phys_avail[i + 1] > 16777216) { 211 vm_phys_create_seg(phys_avail[i], 16777216, 212 VM_FREELIST_ISADMA); 213 vm_phys_create_seg(16777216, phys_avail[i + 1], 214 VM_FREELIST_DEFAULT); 215 } else { 216 vm_phys_create_seg(phys_avail[i], 217 phys_avail[i + 1], VM_FREELIST_ISADMA); 218 } 219 if (VM_FREELIST_ISADMA >= vm_nfreelists) 220 vm_nfreelists = VM_FREELIST_ISADMA + 1; 221 } else 222 #endif 223 #ifdef VM_FREELIST_HIGHMEM 224 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) { 225 if (phys_avail[i] < VM_HIGHMEM_ADDRESS) { 226 vm_phys_create_seg(phys_avail[i], 227 VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT); 228 vm_phys_create_seg(VM_HIGHMEM_ADDRESS, 229 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 230 } else { 231 vm_phys_create_seg(phys_avail[i], 232 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 233 } 234 if (VM_FREELIST_HIGHMEM >= vm_nfreelists) 235 vm_nfreelists = VM_FREELIST_HIGHMEM + 1; 236 } else 237 #endif 238 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1], 239 VM_FREELIST_DEFAULT); 240 } 241 for (flind = 0; flind < vm_nfreelists; flind++) { 242 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 243 fl = vm_phys_free_queues[flind][pind]; 244 for (oind = 0; oind < VM_NFREEORDER; oind++) 245 TAILQ_INIT(&fl[oind].pl); 246 } 247 } 248 } 249 250 /* 251 * Split a contiguous, power of two-sized set of physical pages. 252 */ 253 static __inline void 254 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 255 { 256 vm_page_t m_buddy; 257 258 while (oind > order) { 259 oind--; 260 m_buddy = &m[1 << oind]; 261 KASSERT(m_buddy->order == VM_NFREEORDER, 262 ("vm_phys_split_pages: page %p has unexpected order %d", 263 m_buddy, m_buddy->order)); 264 m_buddy->order = oind; 265 TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq); 266 fl[oind].lcnt++; 267 } 268 } 269 270 /* 271 * Initialize a physical page and add it to the free lists. 272 */ 273 void 274 vm_phys_add_page(vm_paddr_t pa) 275 { 276 vm_page_t m; 277 278 cnt.v_page_count++; 279 m = vm_phys_paddr_to_vm_page(pa); 280 m->phys_addr = pa; 281 m->segind = vm_phys_paddr_to_segind(pa); 282 m->flags = PG_FREE; 283 KASSERT(m->order == VM_NFREEORDER, 284 ("vm_phys_add_page: page %p has unexpected order %d", 285 m, m->order)); 286 m->pool = VM_FREEPOOL_DEFAULT; 287 pmap_page_init(m); 288 mtx_lock(&vm_page_queue_free_mtx); 289 cnt.v_free_count++; 290 vm_phys_free_pages(m, 0); 291 mtx_unlock(&vm_page_queue_free_mtx); 292 } 293 294 /* 295 * Allocate a contiguous, power of two-sized set of physical pages 296 * from the free lists. 297 * 298 * The free page queues must be locked. 299 */ 300 vm_page_t 301 vm_phys_alloc_pages(int pool, int order) 302 { 303 struct vm_freelist *fl; 304 struct vm_freelist *alt; 305 int flind, oind, pind; 306 vm_page_t m; 307 308 KASSERT(pool < VM_NFREEPOOL, 309 ("vm_phys_alloc_pages: pool %d is out of range", pool)); 310 KASSERT(order < VM_NFREEORDER, 311 ("vm_phys_alloc_pages: order %d is out of range", order)); 312 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 313 for (flind = 0; flind < vm_nfreelists; flind++) { 314 fl = vm_phys_free_queues[flind][pool]; 315 for (oind = order; oind < VM_NFREEORDER; oind++) { 316 m = TAILQ_FIRST(&fl[oind].pl); 317 if (m != NULL) { 318 TAILQ_REMOVE(&fl[oind].pl, m, pageq); 319 fl[oind].lcnt--; 320 m->order = VM_NFREEORDER; 321 vm_phys_split_pages(m, oind, fl, order); 322 return (m); 323 } 324 } 325 326 /* 327 * The given pool was empty. Find the largest 328 * contiguous, power-of-two-sized set of pages in any 329 * pool. Transfer these pages to the given pool, and 330 * use them to satisfy the allocation. 331 */ 332 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 333 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 334 alt = vm_phys_free_queues[flind][pind]; 335 m = TAILQ_FIRST(&alt[oind].pl); 336 if (m != NULL) { 337 TAILQ_REMOVE(&alt[oind].pl, m, pageq); 338 alt[oind].lcnt--; 339 m->order = VM_NFREEORDER; 340 vm_phys_set_pool(pool, m, oind); 341 vm_phys_split_pages(m, oind, fl, order); 342 return (m); 343 } 344 } 345 } 346 } 347 return (NULL); 348 } 349 350 /* 351 * Allocate physical memory from phys_avail[]. 352 */ 353 vm_paddr_t 354 vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment) 355 { 356 vm_paddr_t pa; 357 int i; 358 359 size = round_page(size); 360 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 361 if (phys_avail[i + 1] - phys_avail[i] < size) 362 continue; 363 pa = phys_avail[i]; 364 phys_avail[i] += size; 365 return (pa); 366 } 367 panic("vm_phys_bootstrap_alloc"); 368 } 369 370 /* 371 * Find the vm_page corresponding to the given physical address. 372 */ 373 vm_page_t 374 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 375 { 376 struct vm_phys_seg *seg; 377 int segind; 378 379 for (segind = 0; segind < vm_phys_nsegs; segind++) { 380 seg = &vm_phys_segs[segind]; 381 if (pa >= seg->start && pa < seg->end) 382 return (&seg->first_page[atop(pa - seg->start)]); 383 } 384 panic("vm_phys_paddr_to_vm_page: paddr %#jx is not in any segment", 385 (uintmax_t)pa); 386 } 387 388 /* 389 * Find the segment containing the given physical address. 390 */ 391 static int 392 vm_phys_paddr_to_segind(vm_paddr_t pa) 393 { 394 struct vm_phys_seg *seg; 395 int segind; 396 397 for (segind = 0; segind < vm_phys_nsegs; segind++) { 398 seg = &vm_phys_segs[segind]; 399 if (pa >= seg->start && pa < seg->end) 400 return (segind); 401 } 402 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 403 (uintmax_t)pa); 404 } 405 406 /* 407 * Free a contiguous, power of two-sized set of physical pages. 408 * 409 * The free page queues must be locked. 410 */ 411 void 412 vm_phys_free_pages(vm_page_t m, int order) 413 { 414 struct vm_freelist *fl; 415 struct vm_phys_seg *seg; 416 vm_paddr_t pa, pa_buddy; 417 vm_page_t m_buddy; 418 419 KASSERT(m->order == VM_NFREEORDER, 420 ("vm_phys_free_pages: page %p has unexpected order %d", 421 m, m->order)); 422 KASSERT(m->pool < VM_NFREEPOOL, 423 ("vm_phys_free_pages: page %p has unexpected pool %d", 424 m, m->pool)); 425 KASSERT(order < VM_NFREEORDER, 426 ("vm_phys_free_pages: order %d is out of range", order)); 427 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 428 pa = VM_PAGE_TO_PHYS(m); 429 seg = &vm_phys_segs[m->segind]; 430 while (order < VM_NFREEORDER - 1) { 431 pa_buddy = pa ^ (1 << (PAGE_SHIFT + order)); 432 if (pa_buddy < seg->start || 433 pa_buddy >= seg->end) 434 break; 435 m_buddy = &seg->first_page[atop(pa_buddy - seg->start)]; 436 if (m_buddy->order != order) 437 break; 438 fl = (*seg->free_queues)[m_buddy->pool]; 439 TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq); 440 fl[m_buddy->order].lcnt--; 441 m_buddy->order = VM_NFREEORDER; 442 if (m_buddy->pool != m->pool) 443 vm_phys_set_pool(m->pool, m_buddy, order); 444 order++; 445 pa &= ~((1 << (PAGE_SHIFT + order)) - 1); 446 m = &seg->first_page[atop(pa - seg->start)]; 447 } 448 m->order = order; 449 fl = (*seg->free_queues)[m->pool]; 450 TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq); 451 fl[order].lcnt++; 452 } 453 454 /* 455 * Set the pool for a contiguous, power of two-sized set of physical pages. 456 */ 457 void 458 vm_phys_set_pool(int pool, vm_page_t m, int order) 459 { 460 vm_page_t m_tmp; 461 462 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 463 m_tmp->pool = pool; 464 } 465 466 /* 467 * Search for the given physical page "m" in the free lists. If the search 468 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 469 * FALSE, indicating that "m" is not in the free lists. 470 * 471 * The free page queues must be locked. 472 */ 473 boolean_t 474 vm_phys_unfree_page(vm_page_t m) 475 { 476 struct vm_freelist *fl; 477 struct vm_phys_seg *seg; 478 vm_paddr_t pa, pa_half; 479 vm_page_t m_set, m_tmp; 480 int order; 481 482 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 483 484 /* 485 * First, find the contiguous, power of two-sized set of free 486 * physical pages containing the given physical page "m" and 487 * assign it to "m_set". 488 */ 489 seg = &vm_phys_segs[m->segind]; 490 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 491 order < VM_NFREEORDER - 1; ) { 492 order++; 493 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 494 if (pa >= seg->start && pa < seg->end) 495 m_set = &seg->first_page[atop(pa - seg->start)]; 496 else 497 return (FALSE); 498 } 499 if (m_set->order < order) 500 return (FALSE); 501 if (m_set->order == VM_NFREEORDER) 502 return (FALSE); 503 KASSERT(m_set->order < VM_NFREEORDER, 504 ("vm_phys_unfree_page: page %p has unexpected order %d", 505 m_set, m_set->order)); 506 507 /* 508 * Next, remove "m_set" from the free lists. Finally, extract 509 * "m" from "m_set" using an iterative algorithm: While "m_set" 510 * is larger than a page, shrink "m_set" by returning the half 511 * of "m_set" that does not contain "m" to the free lists. 512 */ 513 fl = (*seg->free_queues)[m_set->pool]; 514 order = m_set->order; 515 TAILQ_REMOVE(&fl[order].pl, m_set, pageq); 516 fl[order].lcnt--; 517 m_set->order = VM_NFREEORDER; 518 while (order > 0) { 519 order--; 520 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 521 if (m->phys_addr < pa_half) 522 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 523 else { 524 m_tmp = m_set; 525 m_set = &seg->first_page[atop(pa_half - seg->start)]; 526 } 527 m_tmp->order = order; 528 TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq); 529 fl[order].lcnt++; 530 } 531 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 532 return (TRUE); 533 } 534 535 /* 536 * Try to zero one physical page. Used by an idle priority thread. 537 */ 538 boolean_t 539 vm_phys_zero_pages_idle(void) 540 { 541 static struct vm_freelist *fl = vm_phys_free_queues[0][0]; 542 static int flind, oind, pind; 543 vm_page_t m, m_tmp; 544 545 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 546 for (;;) { 547 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) { 548 for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 549 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 550 vm_phys_unfree_page(m_tmp); 551 cnt.v_free_count--; 552 mtx_unlock(&vm_page_queue_free_mtx); 553 pmap_zero_page_idle(m_tmp); 554 m_tmp->flags |= PG_ZERO; 555 mtx_lock(&vm_page_queue_free_mtx); 556 cnt.v_free_count++; 557 vm_phys_free_pages(m_tmp, 0); 558 vm_page_zero_count++; 559 cnt_prezero++; 560 return (TRUE); 561 } 562 } 563 } 564 oind++; 565 if (oind == VM_NFREEORDER) { 566 oind = 0; 567 pind++; 568 if (pind == VM_NFREEPOOL) { 569 pind = 0; 570 flind++; 571 if (flind == vm_nfreelists) 572 flind = 0; 573 } 574 fl = vm_phys_free_queues[flind][pind]; 575 } 576 } 577 } 578 579 /* 580 * Allocate a contiguous set of physical pages of the given size 581 * "npages" from the free lists. All of the physical pages must be at 582 * or above the given physical address "low" and below the given 583 * physical address "high". The given value "alignment" determines the 584 * alignment of the first physical page in the set. If the given value 585 * "boundary" is non-zero, then the set of physical pages cannot cross 586 * any physical address boundary that is a multiple of that value. Both 587 * "alignment" and "boundary" must be a power of two. 588 */ 589 vm_page_t 590 vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high, 591 unsigned long alignment, unsigned long boundary) 592 { 593 struct vm_freelist *fl; 594 struct vm_phys_seg *seg; 595 vm_object_t m_object; 596 vm_paddr_t pa, pa_last, size; 597 vm_page_t m, m_ret; 598 int flind, i, oind, order, pind; 599 600 size = npages << PAGE_SHIFT; 601 KASSERT(size != 0, 602 ("vm_phys_alloc_contig: size must not be 0")); 603 KASSERT((alignment & (alignment - 1)) == 0, 604 ("vm_phys_alloc_contig: alignment must be a power of 2")); 605 KASSERT((boundary & (boundary - 1)) == 0, 606 ("vm_phys_alloc_contig: boundary must be a power of 2")); 607 /* Compute the queue that is the best fit for npages. */ 608 for (order = 0; (1 << order) < npages; order++); 609 mtx_lock(&vm_page_queue_free_mtx); 610 for (flind = 0; flind < vm_nfreelists; flind++) { 611 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { 612 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 613 fl = vm_phys_free_queues[flind][pind]; 614 TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) { 615 /* 616 * A free list may contain physical pages 617 * from one or more segments. 618 */ 619 seg = &vm_phys_segs[m_ret->segind]; 620 if (seg->start > high || 621 low >= seg->end) 622 continue; 623 624 /* 625 * Is the size of this allocation request 626 * larger than the largest block size? 627 */ 628 if (order >= VM_NFREEORDER) { 629 /* 630 * Determine if a sufficient number 631 * of subsequent blocks to satisfy 632 * the allocation request are free. 633 */ 634 pa = VM_PAGE_TO_PHYS(m_ret); 635 pa_last = pa + size; 636 for (;;) { 637 pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); 638 if (pa >= pa_last) 639 break; 640 if (pa < seg->start || 641 pa >= seg->end) 642 break; 643 m = &seg->first_page[atop(pa - seg->start)]; 644 if (m->order != VM_NFREEORDER - 1) 645 break; 646 } 647 /* If not, continue to the next block. */ 648 if (pa < pa_last) 649 continue; 650 } 651 652 /* 653 * Determine if the blocks are within the given range, 654 * satisfy the given alignment, and do not cross the 655 * given boundary. 656 */ 657 pa = VM_PAGE_TO_PHYS(m_ret); 658 if (pa >= low && 659 pa + size <= high && 660 (pa & (alignment - 1)) == 0 && 661 ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) 662 goto done; 663 } 664 } 665 } 666 } 667 mtx_unlock(&vm_page_queue_free_mtx); 668 return (NULL); 669 done: 670 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 671 fl = (*seg->free_queues)[m->pool]; 672 TAILQ_REMOVE(&fl[m->order].pl, m, pageq); 673 fl[m->order].lcnt--; 674 m->order = VM_NFREEORDER; 675 } 676 if (m_ret->pool != VM_FREEPOOL_DEFAULT) 677 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 678 fl = (*seg->free_queues)[m_ret->pool]; 679 vm_phys_split_pages(m_ret, oind, fl, order); 680 for (i = 0; i < npages; i++) { 681 m = &m_ret[i]; 682 KASSERT(m->queue == PQ_NONE, 683 ("vm_phys_alloc_contig: page %p has unexpected queue %d", 684 m, m->queue)); 685 m_object = m->object; 686 if ((m->flags & PG_CACHED) != 0) 687 vm_page_cache_remove(m); 688 else { 689 KASSERT(VM_PAGE_IS_FREE(m), 690 ("vm_phys_alloc_contig: page %p is not free", m)); 691 cnt.v_free_count--; 692 } 693 m->valid = VM_PAGE_BITS_ALL; 694 if (m->flags & PG_ZERO) 695 vm_page_zero_count--; 696 /* Don't clear the PG_ZERO flag; we'll need it later. */ 697 m->flags = PG_UNMANAGED | (m->flags & PG_ZERO); 698 m->oflags = 0; 699 KASSERT(m->dirty == 0, 700 ("vm_phys_alloc_contig: page %p was dirty", m)); 701 m->wire_count = 0; 702 m->busy = 0; 703 if (m_object != NULL && 704 m_object->type == OBJT_VNODE && 705 m_object->cache == NULL) { 706 mtx_unlock(&vm_page_queue_free_mtx); 707 vdrop(m_object->handle); 708 mtx_lock(&vm_page_queue_free_mtx); 709 } 710 } 711 for (; i < roundup2(npages, 1 << imin(oind, order)); i++) { 712 m = &m_ret[i]; 713 KASSERT(m->order == VM_NFREEORDER, 714 ("vm_phys_alloc_contig: page %p has unexpected order %d", 715 m, m->order)); 716 vm_phys_free_pages(m, 0); 717 } 718 mtx_unlock(&vm_page_queue_free_mtx); 719 return (m_ret); 720 } 721 722 #ifdef DDB 723 /* 724 * Show the number of physical pages in each of the free lists. 725 */ 726 DB_SHOW_COMMAND(freepages, db_show_freepages) 727 { 728 struct vm_freelist *fl; 729 int flind, oind, pind; 730 731 for (flind = 0; flind < vm_nfreelists; flind++) { 732 db_printf("FREE LIST %d:\n" 733 "\n ORDER (SIZE) | NUMBER" 734 "\n ", flind); 735 for (pind = 0; pind < VM_NFREEPOOL; pind++) 736 db_printf(" | POOL %d", pind); 737 db_printf("\n-- "); 738 for (pind = 0; pind < VM_NFREEPOOL; pind++) 739 db_printf("-- -- "); 740 db_printf("--\n"); 741 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 742 db_printf(" %2.2d (%6.6dK)", oind, 743 1 << (PAGE_SHIFT - 10 + oind)); 744 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 745 fl = vm_phys_free_queues[flind][pind]; 746 db_printf(" | %6.6d", fl[oind].lcnt); 747 } 748 db_printf("\n"); 749 } 750 db_printf("\n"); 751 } 752 } 753 #endif 754