1 /*- 2 * Copyright (c) 2002-2006 Rice University 3 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Alan L. Cox, 7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_ddb.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/lock.h> 40 #include <sys/kernel.h> 41 #include <sys/malloc.h> 42 #include <sys/mutex.h> 43 #include <sys/queue.h> 44 #include <sys/sbuf.h> 45 #include <sys/sysctl.h> 46 #include <sys/vmmeter.h> 47 #include <sys/vnode.h> 48 49 #include <ddb/ddb.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_param.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_phys.h> 57 #include <vm/vm_reserv.h> 58 59 struct vm_freelist { 60 struct pglist pl; 61 int lcnt; 62 }; 63 64 struct vm_phys_seg { 65 vm_paddr_t start; 66 vm_paddr_t end; 67 vm_page_t first_page; 68 struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER]; 69 }; 70 71 static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 72 73 static int vm_phys_nsegs; 74 75 static struct vm_freelist 76 vm_phys_free_queues[VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 77 78 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1; 79 80 static int cnt_prezero; 81 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 82 &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 83 84 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 85 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 86 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 87 88 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 89 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 90 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 91 92 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); 93 static int vm_phys_paddr_to_segind(vm_paddr_t pa); 94 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 95 int order); 96 97 /* 98 * Outputs the state of the physical memory allocator, specifically, 99 * the amount of physical memory in each free list. 100 */ 101 static int 102 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 103 { 104 struct sbuf sbuf; 105 struct vm_freelist *fl; 106 char *cbuf; 107 const int cbufsize = vm_nfreelists*(VM_NFREEORDER + 1)*81; 108 int error, flind, oind, pind; 109 110 cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO); 111 sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN); 112 for (flind = 0; flind < vm_nfreelists; flind++) { 113 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 114 "\n ORDER (SIZE) | NUMBER" 115 "\n ", flind); 116 for (pind = 0; pind < VM_NFREEPOOL; pind++) 117 sbuf_printf(&sbuf, " | POOL %d", pind); 118 sbuf_printf(&sbuf, "\n-- "); 119 for (pind = 0; pind < VM_NFREEPOOL; pind++) 120 sbuf_printf(&sbuf, "-- -- "); 121 sbuf_printf(&sbuf, "--\n"); 122 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 123 sbuf_printf(&sbuf, " %2.2d (%6.6dK)", oind, 124 1 << (PAGE_SHIFT - 10 + oind)); 125 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 126 fl = vm_phys_free_queues[flind][pind]; 127 sbuf_printf(&sbuf, " | %6.6d", fl[oind].lcnt); 128 } 129 sbuf_printf(&sbuf, "\n"); 130 } 131 } 132 sbuf_finish(&sbuf); 133 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 134 sbuf_delete(&sbuf); 135 free(cbuf, M_TEMP); 136 return (error); 137 } 138 139 /* 140 * Outputs the set of physical memory segments. 141 */ 142 static int 143 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 144 { 145 struct sbuf sbuf; 146 struct vm_phys_seg *seg; 147 char *cbuf; 148 const int cbufsize = VM_PHYSSEG_MAX*(VM_NFREEORDER + 1)*81; 149 int error, segind; 150 151 cbuf = malloc(cbufsize, M_TEMP, M_WAITOK | M_ZERO); 152 sbuf_new(&sbuf, cbuf, cbufsize, SBUF_FIXEDLEN); 153 for (segind = 0; segind < vm_phys_nsegs; segind++) { 154 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 155 seg = &vm_phys_segs[segind]; 156 sbuf_printf(&sbuf, "start: %#jx\n", 157 (uintmax_t)seg->start); 158 sbuf_printf(&sbuf, "end: %#jx\n", 159 (uintmax_t)seg->end); 160 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 161 } 162 sbuf_finish(&sbuf); 163 error = SYSCTL_OUT(req, sbuf_data(&sbuf), sbuf_len(&sbuf)); 164 sbuf_delete(&sbuf); 165 free(cbuf, M_TEMP); 166 return (error); 167 } 168 169 /* 170 * Create a physical memory segment. 171 */ 172 static void 173 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind) 174 { 175 struct vm_phys_seg *seg; 176 #ifdef VM_PHYSSEG_SPARSE 177 long pages; 178 int segind; 179 180 pages = 0; 181 for (segind = 0; segind < vm_phys_nsegs; segind++) { 182 seg = &vm_phys_segs[segind]; 183 pages += atop(seg->end - seg->start); 184 } 185 #endif 186 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 187 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 188 seg = &vm_phys_segs[vm_phys_nsegs++]; 189 seg->start = start; 190 seg->end = end; 191 #ifdef VM_PHYSSEG_SPARSE 192 seg->first_page = &vm_page_array[pages]; 193 #else 194 seg->first_page = PHYS_TO_VM_PAGE(start); 195 #endif 196 seg->free_queues = &vm_phys_free_queues[flind]; 197 } 198 199 /* 200 * Initialize the physical memory allocator. 201 */ 202 void 203 vm_phys_init(void) 204 { 205 struct vm_freelist *fl; 206 int flind, i, oind, pind; 207 208 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 209 #ifdef VM_FREELIST_ISADMA 210 if (phys_avail[i] < 16777216) { 211 if (phys_avail[i + 1] > 16777216) { 212 vm_phys_create_seg(phys_avail[i], 16777216, 213 VM_FREELIST_ISADMA); 214 vm_phys_create_seg(16777216, phys_avail[i + 1], 215 VM_FREELIST_DEFAULT); 216 } else { 217 vm_phys_create_seg(phys_avail[i], 218 phys_avail[i + 1], VM_FREELIST_ISADMA); 219 } 220 if (VM_FREELIST_ISADMA >= vm_nfreelists) 221 vm_nfreelists = VM_FREELIST_ISADMA + 1; 222 } else 223 #endif 224 #ifdef VM_FREELIST_HIGHMEM 225 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) { 226 if (phys_avail[i] < VM_HIGHMEM_ADDRESS) { 227 vm_phys_create_seg(phys_avail[i], 228 VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT); 229 vm_phys_create_seg(VM_HIGHMEM_ADDRESS, 230 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 231 } else { 232 vm_phys_create_seg(phys_avail[i], 233 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 234 } 235 if (VM_FREELIST_HIGHMEM >= vm_nfreelists) 236 vm_nfreelists = VM_FREELIST_HIGHMEM + 1; 237 } else 238 #endif 239 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1], 240 VM_FREELIST_DEFAULT); 241 } 242 for (flind = 0; flind < vm_nfreelists; flind++) { 243 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 244 fl = vm_phys_free_queues[flind][pind]; 245 for (oind = 0; oind < VM_NFREEORDER; oind++) 246 TAILQ_INIT(&fl[oind].pl); 247 } 248 } 249 } 250 251 /* 252 * Split a contiguous, power of two-sized set of physical pages. 253 */ 254 static __inline void 255 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 256 { 257 vm_page_t m_buddy; 258 259 while (oind > order) { 260 oind--; 261 m_buddy = &m[1 << oind]; 262 KASSERT(m_buddy->order == VM_NFREEORDER, 263 ("vm_phys_split_pages: page %p has unexpected order %d", 264 m_buddy, m_buddy->order)); 265 m_buddy->order = oind; 266 TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq); 267 fl[oind].lcnt++; 268 } 269 } 270 271 /* 272 * Initialize a physical page and add it to the free lists. 273 */ 274 void 275 vm_phys_add_page(vm_paddr_t pa) 276 { 277 vm_page_t m; 278 279 cnt.v_page_count++; 280 m = vm_phys_paddr_to_vm_page(pa); 281 m->phys_addr = pa; 282 m->segind = vm_phys_paddr_to_segind(pa); 283 m->flags = PG_FREE; 284 KASSERT(m->order == VM_NFREEORDER, 285 ("vm_phys_add_page: page %p has unexpected order %d", 286 m, m->order)); 287 m->pool = VM_FREEPOOL_DEFAULT; 288 pmap_page_init(m); 289 mtx_lock(&vm_page_queue_free_mtx); 290 cnt.v_free_count++; 291 vm_phys_free_pages(m, 0); 292 mtx_unlock(&vm_page_queue_free_mtx); 293 } 294 295 /* 296 * Allocate a contiguous, power of two-sized set of physical pages 297 * from the free lists. 298 * 299 * The free page queues must be locked. 300 */ 301 vm_page_t 302 vm_phys_alloc_pages(int pool, int order) 303 { 304 vm_page_t m; 305 int flind; 306 307 for (flind = 0; flind < vm_nfreelists; flind++) { 308 m = vm_phys_alloc_freelist_pages(flind, pool, order); 309 if (m != NULL) 310 return (m); 311 } 312 return (NULL); 313 } 314 315 /* 316 * Find and dequeue a free page on the given free list, with the 317 * specified pool and order 318 */ 319 vm_page_t 320 vm_phys_alloc_freelist_pages(int flind, int pool, int order) 321 { 322 struct vm_freelist *fl; 323 struct vm_freelist *alt; 324 int oind, pind; 325 vm_page_t m; 326 327 KASSERT(flind < VM_NFREELIST, 328 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind)); 329 KASSERT(pool < VM_NFREEPOOL, 330 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 331 KASSERT(order < VM_NFREEORDER, 332 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 333 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 334 fl = vm_phys_free_queues[flind][pool]; 335 for (oind = order; oind < VM_NFREEORDER; oind++) { 336 m = TAILQ_FIRST(&fl[oind].pl); 337 if (m != NULL) { 338 TAILQ_REMOVE(&fl[oind].pl, m, pageq); 339 fl[oind].lcnt--; 340 m->order = VM_NFREEORDER; 341 vm_phys_split_pages(m, oind, fl, order); 342 return (m); 343 } 344 } 345 346 /* 347 * The given pool was empty. Find the largest 348 * contiguous, power-of-two-sized set of pages in any 349 * pool. Transfer these pages to the given pool, and 350 * use them to satisfy the allocation. 351 */ 352 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 353 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 354 alt = vm_phys_free_queues[flind][pind]; 355 m = TAILQ_FIRST(&alt[oind].pl); 356 if (m != NULL) { 357 TAILQ_REMOVE(&alt[oind].pl, m, pageq); 358 alt[oind].lcnt--; 359 m->order = VM_NFREEORDER; 360 vm_phys_set_pool(pool, m, oind); 361 vm_phys_split_pages(m, oind, fl, order); 362 return (m); 363 } 364 } 365 } 366 return (NULL); 367 } 368 369 /* 370 * Allocate physical memory from phys_avail[]. 371 */ 372 vm_paddr_t 373 vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment) 374 { 375 vm_paddr_t pa; 376 int i; 377 378 size = round_page(size); 379 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 380 if (phys_avail[i + 1] - phys_avail[i] < size) 381 continue; 382 pa = phys_avail[i]; 383 phys_avail[i] += size; 384 return (pa); 385 } 386 panic("vm_phys_bootstrap_alloc"); 387 } 388 389 /* 390 * Find the vm_page corresponding to the given physical address. 391 */ 392 vm_page_t 393 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 394 { 395 struct vm_phys_seg *seg; 396 int segind; 397 398 for (segind = 0; segind < vm_phys_nsegs; segind++) { 399 seg = &vm_phys_segs[segind]; 400 if (pa >= seg->start && pa < seg->end) 401 return (&seg->first_page[atop(pa - seg->start)]); 402 } 403 return (NULL); 404 } 405 406 /* 407 * Find the segment containing the given physical address. 408 */ 409 static int 410 vm_phys_paddr_to_segind(vm_paddr_t pa) 411 { 412 struct vm_phys_seg *seg; 413 int segind; 414 415 for (segind = 0; segind < vm_phys_nsegs; segind++) { 416 seg = &vm_phys_segs[segind]; 417 if (pa >= seg->start && pa < seg->end) 418 return (segind); 419 } 420 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 421 (uintmax_t)pa); 422 } 423 424 /* 425 * Free a contiguous, power of two-sized set of physical pages. 426 * 427 * The free page queues must be locked. 428 */ 429 void 430 vm_phys_free_pages(vm_page_t m, int order) 431 { 432 struct vm_freelist *fl; 433 struct vm_phys_seg *seg; 434 vm_paddr_t pa, pa_buddy; 435 vm_page_t m_buddy; 436 437 KASSERT(m->order == VM_NFREEORDER, 438 ("vm_phys_free_pages: page %p has unexpected order %d", 439 m, m->order)); 440 KASSERT(m->pool < VM_NFREEPOOL, 441 ("vm_phys_free_pages: page %p has unexpected pool %d", 442 m, m->pool)); 443 KASSERT(order < VM_NFREEORDER, 444 ("vm_phys_free_pages: order %d is out of range", order)); 445 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 446 pa = VM_PAGE_TO_PHYS(m); 447 seg = &vm_phys_segs[m->segind]; 448 while (order < VM_NFREEORDER - 1) { 449 pa_buddy = pa ^ (1 << (PAGE_SHIFT + order)); 450 if (pa_buddy < seg->start || 451 pa_buddy >= seg->end) 452 break; 453 m_buddy = &seg->first_page[atop(pa_buddy - seg->start)]; 454 if (m_buddy->order != order) 455 break; 456 fl = (*seg->free_queues)[m_buddy->pool]; 457 TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq); 458 fl[m_buddy->order].lcnt--; 459 m_buddy->order = VM_NFREEORDER; 460 if (m_buddy->pool != m->pool) 461 vm_phys_set_pool(m->pool, m_buddy, order); 462 order++; 463 pa &= ~((1 << (PAGE_SHIFT + order)) - 1); 464 m = &seg->first_page[atop(pa - seg->start)]; 465 } 466 m->order = order; 467 fl = (*seg->free_queues)[m->pool]; 468 TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq); 469 fl[order].lcnt++; 470 } 471 472 /* 473 * Set the pool for a contiguous, power of two-sized set of physical pages. 474 */ 475 void 476 vm_phys_set_pool(int pool, vm_page_t m, int order) 477 { 478 vm_page_t m_tmp; 479 480 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 481 m_tmp->pool = pool; 482 } 483 484 /* 485 * Search for the given physical page "m" in the free lists. If the search 486 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 487 * FALSE, indicating that "m" is not in the free lists. 488 * 489 * The free page queues must be locked. 490 */ 491 boolean_t 492 vm_phys_unfree_page(vm_page_t m) 493 { 494 struct vm_freelist *fl; 495 struct vm_phys_seg *seg; 496 vm_paddr_t pa, pa_half; 497 vm_page_t m_set, m_tmp; 498 int order; 499 500 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 501 502 /* 503 * First, find the contiguous, power of two-sized set of free 504 * physical pages containing the given physical page "m" and 505 * assign it to "m_set". 506 */ 507 seg = &vm_phys_segs[m->segind]; 508 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 509 order < VM_NFREEORDER - 1; ) { 510 order++; 511 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 512 if (pa >= seg->start) 513 m_set = &seg->first_page[atop(pa - seg->start)]; 514 else 515 return (FALSE); 516 } 517 if (m_set->order < order) 518 return (FALSE); 519 if (m_set->order == VM_NFREEORDER) 520 return (FALSE); 521 KASSERT(m_set->order < VM_NFREEORDER, 522 ("vm_phys_unfree_page: page %p has unexpected order %d", 523 m_set, m_set->order)); 524 525 /* 526 * Next, remove "m_set" from the free lists. Finally, extract 527 * "m" from "m_set" using an iterative algorithm: While "m_set" 528 * is larger than a page, shrink "m_set" by returning the half 529 * of "m_set" that does not contain "m" to the free lists. 530 */ 531 fl = (*seg->free_queues)[m_set->pool]; 532 order = m_set->order; 533 TAILQ_REMOVE(&fl[order].pl, m_set, pageq); 534 fl[order].lcnt--; 535 m_set->order = VM_NFREEORDER; 536 while (order > 0) { 537 order--; 538 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 539 if (m->phys_addr < pa_half) 540 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 541 else { 542 m_tmp = m_set; 543 m_set = &seg->first_page[atop(pa_half - seg->start)]; 544 } 545 m_tmp->order = order; 546 TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq); 547 fl[order].lcnt++; 548 } 549 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 550 return (TRUE); 551 } 552 553 /* 554 * Try to zero one physical page. Used by an idle priority thread. 555 */ 556 boolean_t 557 vm_phys_zero_pages_idle(void) 558 { 559 static struct vm_freelist *fl = vm_phys_free_queues[0][0]; 560 static int flind, oind, pind; 561 vm_page_t m, m_tmp; 562 563 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 564 for (;;) { 565 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) { 566 for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 567 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 568 vm_phys_unfree_page(m_tmp); 569 cnt.v_free_count--; 570 mtx_unlock(&vm_page_queue_free_mtx); 571 pmap_zero_page_idle(m_tmp); 572 m_tmp->flags |= PG_ZERO; 573 mtx_lock(&vm_page_queue_free_mtx); 574 cnt.v_free_count++; 575 vm_phys_free_pages(m_tmp, 0); 576 vm_page_zero_count++; 577 cnt_prezero++; 578 return (TRUE); 579 } 580 } 581 } 582 oind++; 583 if (oind == VM_NFREEORDER) { 584 oind = 0; 585 pind++; 586 if (pind == VM_NFREEPOOL) { 587 pind = 0; 588 flind++; 589 if (flind == vm_nfreelists) 590 flind = 0; 591 } 592 fl = vm_phys_free_queues[flind][pind]; 593 } 594 } 595 } 596 597 /* 598 * Allocate a contiguous set of physical pages of the given size 599 * "npages" from the free lists. All of the physical pages must be at 600 * or above the given physical address "low" and below the given 601 * physical address "high". The given value "alignment" determines the 602 * alignment of the first physical page in the set. If the given value 603 * "boundary" is non-zero, then the set of physical pages cannot cross 604 * any physical address boundary that is a multiple of that value. Both 605 * "alignment" and "boundary" must be a power of two. 606 */ 607 vm_page_t 608 vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high, 609 unsigned long alignment, unsigned long boundary) 610 { 611 struct vm_freelist *fl; 612 struct vm_phys_seg *seg; 613 struct vnode *vp; 614 vm_paddr_t pa, pa_last, size; 615 vm_page_t deferred_vdrop_list, m, m_ret; 616 int flind, i, oind, order, pind; 617 618 size = npages << PAGE_SHIFT; 619 KASSERT(size != 0, 620 ("vm_phys_alloc_contig: size must not be 0")); 621 KASSERT((alignment & (alignment - 1)) == 0, 622 ("vm_phys_alloc_contig: alignment must be a power of 2")); 623 KASSERT((boundary & (boundary - 1)) == 0, 624 ("vm_phys_alloc_contig: boundary must be a power of 2")); 625 deferred_vdrop_list = NULL; 626 /* Compute the queue that is the best fit for npages. */ 627 for (order = 0; (1 << order) < npages; order++); 628 mtx_lock(&vm_page_queue_free_mtx); 629 #if VM_NRESERVLEVEL > 0 630 retry: 631 #endif 632 for (flind = 0; flind < vm_nfreelists; flind++) { 633 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { 634 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 635 fl = vm_phys_free_queues[flind][pind]; 636 TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) { 637 /* 638 * A free list may contain physical pages 639 * from one or more segments. 640 */ 641 seg = &vm_phys_segs[m_ret->segind]; 642 if (seg->start > high || 643 low >= seg->end) 644 continue; 645 646 /* 647 * Is the size of this allocation request 648 * larger than the largest block size? 649 */ 650 if (order >= VM_NFREEORDER) { 651 /* 652 * Determine if a sufficient number 653 * of subsequent blocks to satisfy 654 * the allocation request are free. 655 */ 656 pa = VM_PAGE_TO_PHYS(m_ret); 657 pa_last = pa + size; 658 for (;;) { 659 pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); 660 if (pa >= pa_last) 661 break; 662 if (pa < seg->start || 663 pa >= seg->end) 664 break; 665 m = &seg->first_page[atop(pa - seg->start)]; 666 if (m->order != VM_NFREEORDER - 1) 667 break; 668 } 669 /* If not, continue to the next block. */ 670 if (pa < pa_last) 671 continue; 672 } 673 674 /* 675 * Determine if the blocks are within the given range, 676 * satisfy the given alignment, and do not cross the 677 * given boundary. 678 */ 679 pa = VM_PAGE_TO_PHYS(m_ret); 680 if (pa >= low && 681 pa + size <= high && 682 (pa & (alignment - 1)) == 0 && 683 ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) 684 goto done; 685 } 686 } 687 } 688 } 689 #if VM_NRESERVLEVEL > 0 690 if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary)) 691 goto retry; 692 #endif 693 mtx_unlock(&vm_page_queue_free_mtx); 694 return (NULL); 695 done: 696 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 697 fl = (*seg->free_queues)[m->pool]; 698 TAILQ_REMOVE(&fl[m->order].pl, m, pageq); 699 fl[m->order].lcnt--; 700 m->order = VM_NFREEORDER; 701 } 702 if (m_ret->pool != VM_FREEPOOL_DEFAULT) 703 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 704 fl = (*seg->free_queues)[m_ret->pool]; 705 vm_phys_split_pages(m_ret, oind, fl, order); 706 for (i = 0; i < npages; i++) { 707 m = &m_ret[i]; 708 vp = vm_page_alloc_init(m); 709 if (vp != NULL) { 710 /* 711 * Enqueue the vnode for deferred vdrop(). 712 * 713 * Unmanaged pages don't use "pageq", so it 714 * can be safely abused to construct a short- 715 * lived queue of vnodes. 716 */ 717 m->pageq.tqe_prev = (void *)vp; 718 m->pageq.tqe_next = deferred_vdrop_list; 719 deferred_vdrop_list = m; 720 } 721 } 722 for (; i < roundup2(npages, 1 << imin(oind, order)); i++) { 723 m = &m_ret[i]; 724 KASSERT(m->order == VM_NFREEORDER, 725 ("vm_phys_alloc_contig: page %p has unexpected order %d", 726 m, m->order)); 727 vm_phys_free_pages(m, 0); 728 } 729 mtx_unlock(&vm_page_queue_free_mtx); 730 while (deferred_vdrop_list != NULL) { 731 vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); 732 deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next; 733 } 734 return (m_ret); 735 } 736 737 #ifdef DDB 738 /* 739 * Show the number of physical pages in each of the free lists. 740 */ 741 DB_SHOW_COMMAND(freepages, db_show_freepages) 742 { 743 struct vm_freelist *fl; 744 int flind, oind, pind; 745 746 for (flind = 0; flind < vm_nfreelists; flind++) { 747 db_printf("FREE LIST %d:\n" 748 "\n ORDER (SIZE) | NUMBER" 749 "\n ", flind); 750 for (pind = 0; pind < VM_NFREEPOOL; pind++) 751 db_printf(" | POOL %d", pind); 752 db_printf("\n-- "); 753 for (pind = 0; pind < VM_NFREEPOOL; pind++) 754 db_printf("-- -- "); 755 db_printf("--\n"); 756 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 757 db_printf(" %2.2d (%6.6dK)", oind, 758 1 << (PAGE_SHIFT - 10 + oind)); 759 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 760 fl = vm_phys_free_queues[flind][pind]; 761 db_printf(" | %6.6d", fl[oind].lcnt); 762 } 763 db_printf("\n"); 764 } 765 db_printf("\n"); 766 } 767 } 768 #endif 769