1 /*- 2 * Copyright (c) 2002-2006 Rice University 3 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Alan L. Cox, 7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_ddb.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/lock.h> 40 #include <sys/kernel.h> 41 #include <sys/malloc.h> 42 #include <sys/mutex.h> 43 #include <sys/queue.h> 44 #include <sys/sbuf.h> 45 #include <sys/sysctl.h> 46 #include <sys/vmmeter.h> 47 #include <sys/vnode.h> 48 49 #include <ddb/ddb.h> 50 51 #include <vm/vm.h> 52 #include <vm/vm_param.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_phys.h> 57 #include <vm/vm_reserv.h> 58 59 /* 60 * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each 61 * domain. These extra lists are stored at the end of the regular 62 * free lists starting with VM_NFREELIST. 63 */ 64 #define VM_RAW_NFREELIST (VM_NFREELIST + VM_NDOMAIN - 1) 65 66 struct vm_freelist { 67 struct pglist pl; 68 int lcnt; 69 }; 70 71 struct vm_phys_seg { 72 vm_paddr_t start; 73 vm_paddr_t end; 74 vm_page_t first_page; 75 int domain; 76 struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER]; 77 }; 78 79 struct mem_affinity *mem_affinity; 80 81 static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 82 83 static int vm_phys_nsegs; 84 85 static struct vm_freelist 86 vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 87 static struct vm_freelist 88 (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER]; 89 90 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1; 91 92 static int cnt_prezero; 93 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 94 &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 95 96 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 97 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 98 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 99 100 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 101 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 102 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 103 104 #if VM_NDOMAIN > 1 105 static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS); 106 SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD, 107 NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists"); 108 #endif 109 110 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, 111 int domain); 112 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); 113 static int vm_phys_paddr_to_segind(vm_paddr_t pa); 114 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 115 int order); 116 117 /* 118 * Outputs the state of the physical memory allocator, specifically, 119 * the amount of physical memory in each free list. 120 */ 121 static int 122 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 123 { 124 struct sbuf sbuf; 125 struct vm_freelist *fl; 126 int error, flind, oind, pind; 127 128 error = sysctl_wire_old_buffer(req, 0); 129 if (error != 0) 130 return (error); 131 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 132 for (flind = 0; flind < vm_nfreelists; flind++) { 133 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 134 "\n ORDER (SIZE) | NUMBER" 135 "\n ", flind); 136 for (pind = 0; pind < VM_NFREEPOOL; pind++) 137 sbuf_printf(&sbuf, " | POOL %d", pind); 138 sbuf_printf(&sbuf, "\n-- "); 139 for (pind = 0; pind < VM_NFREEPOOL; pind++) 140 sbuf_printf(&sbuf, "-- -- "); 141 sbuf_printf(&sbuf, "--\n"); 142 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 143 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 144 1 << (PAGE_SHIFT - 10 + oind)); 145 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 146 fl = vm_phys_free_queues[flind][pind]; 147 sbuf_printf(&sbuf, " | %6d", fl[oind].lcnt); 148 } 149 sbuf_printf(&sbuf, "\n"); 150 } 151 } 152 error = sbuf_finish(&sbuf); 153 sbuf_delete(&sbuf); 154 return (error); 155 } 156 157 /* 158 * Outputs the set of physical memory segments. 159 */ 160 static int 161 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 162 { 163 struct sbuf sbuf; 164 struct vm_phys_seg *seg; 165 int error, segind; 166 167 error = sysctl_wire_old_buffer(req, 0); 168 if (error != 0) 169 return (error); 170 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 171 for (segind = 0; segind < vm_phys_nsegs; segind++) { 172 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 173 seg = &vm_phys_segs[segind]; 174 sbuf_printf(&sbuf, "start: %#jx\n", 175 (uintmax_t)seg->start); 176 sbuf_printf(&sbuf, "end: %#jx\n", 177 (uintmax_t)seg->end); 178 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 179 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 180 } 181 error = sbuf_finish(&sbuf); 182 sbuf_delete(&sbuf); 183 return (error); 184 } 185 186 #if VM_NDOMAIN > 1 187 /* 188 * Outputs the set of free list lookup lists. 189 */ 190 static int 191 sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS) 192 { 193 struct sbuf sbuf; 194 int domain, error, flind, ndomains; 195 196 error = sysctl_wire_old_buffer(req, 0); 197 if (error != 0) 198 return (error); 199 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 200 ndomains = vm_nfreelists - VM_NFREELIST + 1; 201 for (domain = 0; domain < ndomains; domain++) { 202 sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain); 203 for (flind = 0; flind < vm_nfreelists; flind++) 204 sbuf_printf(&sbuf, " [%d]:\t%p\n", flind, 205 vm_phys_lookup_lists[domain][flind]); 206 } 207 error = sbuf_finish(&sbuf); 208 sbuf_delete(&sbuf); 209 return (error); 210 } 211 #endif 212 213 /* 214 * Create a physical memory segment. 215 */ 216 static void 217 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain) 218 { 219 struct vm_phys_seg *seg; 220 #ifdef VM_PHYSSEG_SPARSE 221 long pages; 222 int segind; 223 224 pages = 0; 225 for (segind = 0; segind < vm_phys_nsegs; segind++) { 226 seg = &vm_phys_segs[segind]; 227 pages += atop(seg->end - seg->start); 228 } 229 #endif 230 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 231 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 232 seg = &vm_phys_segs[vm_phys_nsegs++]; 233 seg->start = start; 234 seg->end = end; 235 seg->domain = domain; 236 #ifdef VM_PHYSSEG_SPARSE 237 seg->first_page = &vm_page_array[pages]; 238 #else 239 seg->first_page = PHYS_TO_VM_PAGE(start); 240 #endif 241 #if VM_NDOMAIN > 1 242 if (flind == VM_FREELIST_DEFAULT && domain != 0) { 243 flind = VM_NFREELIST + (domain - 1); 244 if (flind >= vm_nfreelists) 245 vm_nfreelists = flind + 1; 246 } 247 #endif 248 seg->free_queues = &vm_phys_free_queues[flind]; 249 } 250 251 static void 252 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind) 253 { 254 int i; 255 256 if (mem_affinity == NULL) { 257 _vm_phys_create_seg(start, end, flind, 0); 258 return; 259 } 260 261 for (i = 0;; i++) { 262 if (mem_affinity[i].end == 0) 263 panic("Reached end of affinity info"); 264 if (mem_affinity[i].end <= start) 265 continue; 266 if (mem_affinity[i].start > start) 267 panic("No affinity info for start %jx", 268 (uintmax_t)start); 269 if (mem_affinity[i].end >= end) { 270 _vm_phys_create_seg(start, end, flind, 271 mem_affinity[i].domain); 272 break; 273 } 274 _vm_phys_create_seg(start, mem_affinity[i].end, flind, 275 mem_affinity[i].domain); 276 start = mem_affinity[i].end; 277 } 278 } 279 280 /* 281 * Initialize the physical memory allocator. 282 */ 283 void 284 vm_phys_init(void) 285 { 286 struct vm_freelist *fl; 287 int flind, i, oind, pind; 288 #if VM_NDOMAIN > 1 289 int ndomains, j; 290 #endif 291 292 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 293 #ifdef VM_FREELIST_ISADMA 294 if (phys_avail[i] < 16777216) { 295 if (phys_avail[i + 1] > 16777216) { 296 vm_phys_create_seg(phys_avail[i], 16777216, 297 VM_FREELIST_ISADMA); 298 vm_phys_create_seg(16777216, phys_avail[i + 1], 299 VM_FREELIST_DEFAULT); 300 } else { 301 vm_phys_create_seg(phys_avail[i], 302 phys_avail[i + 1], VM_FREELIST_ISADMA); 303 } 304 if (VM_FREELIST_ISADMA >= vm_nfreelists) 305 vm_nfreelists = VM_FREELIST_ISADMA + 1; 306 } else 307 #endif 308 #ifdef VM_FREELIST_HIGHMEM 309 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) { 310 if (phys_avail[i] < VM_HIGHMEM_ADDRESS) { 311 vm_phys_create_seg(phys_avail[i], 312 VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT); 313 vm_phys_create_seg(VM_HIGHMEM_ADDRESS, 314 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 315 } else { 316 vm_phys_create_seg(phys_avail[i], 317 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 318 } 319 if (VM_FREELIST_HIGHMEM >= vm_nfreelists) 320 vm_nfreelists = VM_FREELIST_HIGHMEM + 1; 321 } else 322 #endif 323 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1], 324 VM_FREELIST_DEFAULT); 325 } 326 for (flind = 0; flind < vm_nfreelists; flind++) { 327 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 328 fl = vm_phys_free_queues[flind][pind]; 329 for (oind = 0; oind < VM_NFREEORDER; oind++) 330 TAILQ_INIT(&fl[oind].pl); 331 } 332 } 333 #if VM_NDOMAIN > 1 334 /* 335 * Build a free list lookup list for each domain. All of the 336 * memory domain lists are inserted at the VM_FREELIST_DEFAULT 337 * index in a round-robin order starting with the current 338 * domain. 339 */ 340 ndomains = vm_nfreelists - VM_NFREELIST + 1; 341 for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++) 342 for (i = 0; i < ndomains; i++) 343 vm_phys_lookup_lists[i][flind] = 344 &vm_phys_free_queues[flind]; 345 for (i = 0; i < ndomains; i++) 346 for (j = 0; j < ndomains; j++) { 347 flind = (i + j) % ndomains; 348 if (flind == 0) 349 flind = VM_FREELIST_DEFAULT; 350 else 351 flind += VM_NFREELIST - 1; 352 vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] = 353 &vm_phys_free_queues[flind]; 354 } 355 for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST; 356 flind++) 357 for (i = 0; i < ndomains; i++) 358 vm_phys_lookup_lists[i][flind + ndomains - 1] = 359 &vm_phys_free_queues[flind]; 360 #else 361 for (flind = 0; flind < vm_nfreelists; flind++) 362 vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind]; 363 #endif 364 } 365 366 /* 367 * Split a contiguous, power of two-sized set of physical pages. 368 */ 369 static __inline void 370 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 371 { 372 vm_page_t m_buddy; 373 374 while (oind > order) { 375 oind--; 376 m_buddy = &m[1 << oind]; 377 KASSERT(m_buddy->order == VM_NFREEORDER, 378 ("vm_phys_split_pages: page %p has unexpected order %d", 379 m_buddy, m_buddy->order)); 380 m_buddy->order = oind; 381 TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq); 382 fl[oind].lcnt++; 383 } 384 } 385 386 /* 387 * Initialize a physical page and add it to the free lists. 388 */ 389 void 390 vm_phys_add_page(vm_paddr_t pa) 391 { 392 vm_page_t m; 393 394 cnt.v_page_count++; 395 m = vm_phys_paddr_to_vm_page(pa); 396 m->phys_addr = pa; 397 m->queue = PQ_NONE; 398 m->segind = vm_phys_paddr_to_segind(pa); 399 m->flags = PG_FREE; 400 KASSERT(m->order == VM_NFREEORDER, 401 ("vm_phys_add_page: page %p has unexpected order %d", 402 m, m->order)); 403 m->pool = VM_FREEPOOL_DEFAULT; 404 pmap_page_init(m); 405 mtx_lock(&vm_page_queue_free_mtx); 406 cnt.v_free_count++; 407 vm_phys_free_pages(m, 0); 408 mtx_unlock(&vm_page_queue_free_mtx); 409 } 410 411 /* 412 * Allocate a contiguous, power of two-sized set of physical pages 413 * from the free lists. 414 * 415 * The free page queues must be locked. 416 */ 417 vm_page_t 418 vm_phys_alloc_pages(int pool, int order) 419 { 420 vm_page_t m; 421 int flind; 422 423 for (flind = 0; flind < vm_nfreelists; flind++) { 424 m = vm_phys_alloc_freelist_pages(flind, pool, order); 425 if (m != NULL) 426 return (m); 427 } 428 return (NULL); 429 } 430 431 /* 432 * Find and dequeue a free page on the given free list, with the 433 * specified pool and order 434 */ 435 vm_page_t 436 vm_phys_alloc_freelist_pages(int flind, int pool, int order) 437 { 438 struct vm_freelist *fl; 439 struct vm_freelist *alt; 440 int domain, oind, pind; 441 vm_page_t m; 442 443 KASSERT(flind < VM_NFREELIST, 444 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind)); 445 KASSERT(pool < VM_NFREEPOOL, 446 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 447 KASSERT(order < VM_NFREEORDER, 448 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 449 450 #if VM_NDOMAIN > 1 451 domain = PCPU_GET(domain); 452 #else 453 domain = 0; 454 #endif 455 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 456 fl = (*vm_phys_lookup_lists[domain][flind])[pool]; 457 for (oind = order; oind < VM_NFREEORDER; oind++) { 458 m = TAILQ_FIRST(&fl[oind].pl); 459 if (m != NULL) { 460 TAILQ_REMOVE(&fl[oind].pl, m, pageq); 461 fl[oind].lcnt--; 462 m->order = VM_NFREEORDER; 463 vm_phys_split_pages(m, oind, fl, order); 464 return (m); 465 } 466 } 467 468 /* 469 * The given pool was empty. Find the largest 470 * contiguous, power-of-two-sized set of pages in any 471 * pool. Transfer these pages to the given pool, and 472 * use them to satisfy the allocation. 473 */ 474 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 475 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 476 alt = (*vm_phys_lookup_lists[domain][flind])[pind]; 477 m = TAILQ_FIRST(&alt[oind].pl); 478 if (m != NULL) { 479 TAILQ_REMOVE(&alt[oind].pl, m, pageq); 480 alt[oind].lcnt--; 481 m->order = VM_NFREEORDER; 482 vm_phys_set_pool(pool, m, oind); 483 vm_phys_split_pages(m, oind, fl, order); 484 return (m); 485 } 486 } 487 } 488 return (NULL); 489 } 490 491 /* 492 * Allocate physical memory from phys_avail[]. 493 */ 494 vm_paddr_t 495 vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment) 496 { 497 vm_paddr_t pa; 498 int i; 499 500 size = round_page(size); 501 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 502 if (phys_avail[i + 1] - phys_avail[i] < size) 503 continue; 504 pa = phys_avail[i]; 505 phys_avail[i] += size; 506 return (pa); 507 } 508 panic("vm_phys_bootstrap_alloc"); 509 } 510 511 /* 512 * Find the vm_page corresponding to the given physical address. 513 */ 514 vm_page_t 515 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 516 { 517 struct vm_phys_seg *seg; 518 int segind; 519 520 for (segind = 0; segind < vm_phys_nsegs; segind++) { 521 seg = &vm_phys_segs[segind]; 522 if (pa >= seg->start && pa < seg->end) 523 return (&seg->first_page[atop(pa - seg->start)]); 524 } 525 return (NULL); 526 } 527 528 /* 529 * Find the segment containing the given physical address. 530 */ 531 static int 532 vm_phys_paddr_to_segind(vm_paddr_t pa) 533 { 534 struct vm_phys_seg *seg; 535 int segind; 536 537 for (segind = 0; segind < vm_phys_nsegs; segind++) { 538 seg = &vm_phys_segs[segind]; 539 if (pa >= seg->start && pa < seg->end) 540 return (segind); 541 } 542 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 543 (uintmax_t)pa); 544 } 545 546 /* 547 * Free a contiguous, power of two-sized set of physical pages. 548 * 549 * The free page queues must be locked. 550 */ 551 void 552 vm_phys_free_pages(vm_page_t m, int order) 553 { 554 struct vm_freelist *fl; 555 struct vm_phys_seg *seg; 556 vm_paddr_t pa, pa_buddy; 557 vm_page_t m_buddy; 558 559 KASSERT(m->order == VM_NFREEORDER, 560 ("vm_phys_free_pages: page %p has unexpected order %d", 561 m, m->order)); 562 KASSERT(m->pool < VM_NFREEPOOL, 563 ("vm_phys_free_pages: page %p has unexpected pool %d", 564 m, m->pool)); 565 KASSERT(order < VM_NFREEORDER, 566 ("vm_phys_free_pages: order %d is out of range", order)); 567 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 568 pa = VM_PAGE_TO_PHYS(m); 569 seg = &vm_phys_segs[m->segind]; 570 while (order < VM_NFREEORDER - 1) { 571 pa_buddy = pa ^ (1 << (PAGE_SHIFT + order)); 572 if (pa_buddy < seg->start || 573 pa_buddy >= seg->end) 574 break; 575 m_buddy = &seg->first_page[atop(pa_buddy - seg->start)]; 576 if (m_buddy->order != order) 577 break; 578 fl = (*seg->free_queues)[m_buddy->pool]; 579 TAILQ_REMOVE(&fl[m_buddy->order].pl, m_buddy, pageq); 580 fl[m_buddy->order].lcnt--; 581 m_buddy->order = VM_NFREEORDER; 582 if (m_buddy->pool != m->pool) 583 vm_phys_set_pool(m->pool, m_buddy, order); 584 order++; 585 pa &= ~((1 << (PAGE_SHIFT + order)) - 1); 586 m = &seg->first_page[atop(pa - seg->start)]; 587 } 588 m->order = order; 589 fl = (*seg->free_queues)[m->pool]; 590 TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq); 591 fl[order].lcnt++; 592 } 593 594 /* 595 * Set the pool for a contiguous, power of two-sized set of physical pages. 596 */ 597 void 598 vm_phys_set_pool(int pool, vm_page_t m, int order) 599 { 600 vm_page_t m_tmp; 601 602 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 603 m_tmp->pool = pool; 604 } 605 606 /* 607 * Search for the given physical page "m" in the free lists. If the search 608 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 609 * FALSE, indicating that "m" is not in the free lists. 610 * 611 * The free page queues must be locked. 612 */ 613 boolean_t 614 vm_phys_unfree_page(vm_page_t m) 615 { 616 struct vm_freelist *fl; 617 struct vm_phys_seg *seg; 618 vm_paddr_t pa, pa_half; 619 vm_page_t m_set, m_tmp; 620 int order; 621 622 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 623 624 /* 625 * First, find the contiguous, power of two-sized set of free 626 * physical pages containing the given physical page "m" and 627 * assign it to "m_set". 628 */ 629 seg = &vm_phys_segs[m->segind]; 630 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 631 order < VM_NFREEORDER - 1; ) { 632 order++; 633 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 634 if (pa >= seg->start) 635 m_set = &seg->first_page[atop(pa - seg->start)]; 636 else 637 return (FALSE); 638 } 639 if (m_set->order < order) 640 return (FALSE); 641 if (m_set->order == VM_NFREEORDER) 642 return (FALSE); 643 KASSERT(m_set->order < VM_NFREEORDER, 644 ("vm_phys_unfree_page: page %p has unexpected order %d", 645 m_set, m_set->order)); 646 647 /* 648 * Next, remove "m_set" from the free lists. Finally, extract 649 * "m" from "m_set" using an iterative algorithm: While "m_set" 650 * is larger than a page, shrink "m_set" by returning the half 651 * of "m_set" that does not contain "m" to the free lists. 652 */ 653 fl = (*seg->free_queues)[m_set->pool]; 654 order = m_set->order; 655 TAILQ_REMOVE(&fl[order].pl, m_set, pageq); 656 fl[order].lcnt--; 657 m_set->order = VM_NFREEORDER; 658 while (order > 0) { 659 order--; 660 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 661 if (m->phys_addr < pa_half) 662 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 663 else { 664 m_tmp = m_set; 665 m_set = &seg->first_page[atop(pa_half - seg->start)]; 666 } 667 m_tmp->order = order; 668 TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq); 669 fl[order].lcnt++; 670 } 671 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 672 return (TRUE); 673 } 674 675 /* 676 * Try to zero one physical page. Used by an idle priority thread. 677 */ 678 boolean_t 679 vm_phys_zero_pages_idle(void) 680 { 681 static struct vm_freelist *fl = vm_phys_free_queues[0][0]; 682 static int flind, oind, pind; 683 vm_page_t m, m_tmp; 684 685 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 686 for (;;) { 687 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) { 688 for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 689 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 690 vm_phys_unfree_page(m_tmp); 691 cnt.v_free_count--; 692 mtx_unlock(&vm_page_queue_free_mtx); 693 pmap_zero_page_idle(m_tmp); 694 m_tmp->flags |= PG_ZERO; 695 mtx_lock(&vm_page_queue_free_mtx); 696 cnt.v_free_count++; 697 vm_phys_free_pages(m_tmp, 0); 698 vm_page_zero_count++; 699 cnt_prezero++; 700 return (TRUE); 701 } 702 } 703 } 704 oind++; 705 if (oind == VM_NFREEORDER) { 706 oind = 0; 707 pind++; 708 if (pind == VM_NFREEPOOL) { 709 pind = 0; 710 flind++; 711 if (flind == vm_nfreelists) 712 flind = 0; 713 } 714 fl = vm_phys_free_queues[flind][pind]; 715 } 716 } 717 } 718 719 /* 720 * Allocate a contiguous set of physical pages of the given size 721 * "npages" from the free lists. All of the physical pages must be at 722 * or above the given physical address "low" and below the given 723 * physical address "high". The given value "alignment" determines the 724 * alignment of the first physical page in the set. If the given value 725 * "boundary" is non-zero, then the set of physical pages cannot cross 726 * any physical address boundary that is a multiple of that value. Both 727 * "alignment" and "boundary" must be a power of two. 728 */ 729 vm_page_t 730 vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high, 731 unsigned long alignment, unsigned long boundary) 732 { 733 struct vm_freelist *fl; 734 struct vm_phys_seg *seg; 735 struct vnode *vp; 736 vm_paddr_t pa, pa_last, size; 737 vm_page_t deferred_vdrop_list, m, m_ret; 738 int domain, flind, i, oind, order, pind; 739 740 #if VM_NDOMAIN > 1 741 domain = PCPU_GET(domain); 742 #else 743 domain = 0; 744 #endif 745 size = npages << PAGE_SHIFT; 746 KASSERT(size != 0, 747 ("vm_phys_alloc_contig: size must not be 0")); 748 KASSERT((alignment & (alignment - 1)) == 0, 749 ("vm_phys_alloc_contig: alignment must be a power of 2")); 750 KASSERT((boundary & (boundary - 1)) == 0, 751 ("vm_phys_alloc_contig: boundary must be a power of 2")); 752 deferred_vdrop_list = NULL; 753 /* Compute the queue that is the best fit for npages. */ 754 for (order = 0; (1 << order) < npages; order++); 755 mtx_lock(&vm_page_queue_free_mtx); 756 #if VM_NRESERVLEVEL > 0 757 retry: 758 #endif 759 for (flind = 0; flind < vm_nfreelists; flind++) { 760 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { 761 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 762 fl = (*vm_phys_lookup_lists[domain][flind]) 763 [pind]; 764 TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) { 765 /* 766 * A free list may contain physical pages 767 * from one or more segments. 768 */ 769 seg = &vm_phys_segs[m_ret->segind]; 770 if (seg->start > high || 771 low >= seg->end) 772 continue; 773 774 /* 775 * Is the size of this allocation request 776 * larger than the largest block size? 777 */ 778 if (order >= VM_NFREEORDER) { 779 /* 780 * Determine if a sufficient number 781 * of subsequent blocks to satisfy 782 * the allocation request are free. 783 */ 784 pa = VM_PAGE_TO_PHYS(m_ret); 785 pa_last = pa + size; 786 for (;;) { 787 pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); 788 if (pa >= pa_last) 789 break; 790 if (pa < seg->start || 791 pa >= seg->end) 792 break; 793 m = &seg->first_page[atop(pa - seg->start)]; 794 if (m->order != VM_NFREEORDER - 1) 795 break; 796 } 797 /* If not, continue to the next block. */ 798 if (pa < pa_last) 799 continue; 800 } 801 802 /* 803 * Determine if the blocks are within the given range, 804 * satisfy the given alignment, and do not cross the 805 * given boundary. 806 */ 807 pa = VM_PAGE_TO_PHYS(m_ret); 808 if (pa >= low && 809 pa + size <= high && 810 (pa & (alignment - 1)) == 0 && 811 ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) 812 goto done; 813 } 814 } 815 } 816 } 817 #if VM_NRESERVLEVEL > 0 818 if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary)) 819 goto retry; 820 #endif 821 mtx_unlock(&vm_page_queue_free_mtx); 822 return (NULL); 823 done: 824 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 825 fl = (*seg->free_queues)[m->pool]; 826 TAILQ_REMOVE(&fl[m->order].pl, m, pageq); 827 fl[m->order].lcnt--; 828 m->order = VM_NFREEORDER; 829 } 830 if (m_ret->pool != VM_FREEPOOL_DEFAULT) 831 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 832 fl = (*seg->free_queues)[m_ret->pool]; 833 vm_phys_split_pages(m_ret, oind, fl, order); 834 for (i = 0; i < npages; i++) { 835 m = &m_ret[i]; 836 vp = vm_page_alloc_init(m); 837 if (vp != NULL) { 838 /* 839 * Enqueue the vnode for deferred vdrop(). 840 * 841 * Unmanaged pages don't use "pageq", so it 842 * can be safely abused to construct a short- 843 * lived queue of vnodes. 844 */ 845 m->pageq.tqe_prev = (void *)vp; 846 m->pageq.tqe_next = deferred_vdrop_list; 847 deferred_vdrop_list = m; 848 } 849 } 850 for (; i < roundup2(npages, 1 << imin(oind, order)); i++) { 851 m = &m_ret[i]; 852 KASSERT(m->order == VM_NFREEORDER, 853 ("vm_phys_alloc_contig: page %p has unexpected order %d", 854 m, m->order)); 855 vm_phys_free_pages(m, 0); 856 } 857 mtx_unlock(&vm_page_queue_free_mtx); 858 while (deferred_vdrop_list != NULL) { 859 vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); 860 deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next; 861 } 862 return (m_ret); 863 } 864 865 #ifdef DDB 866 /* 867 * Show the number of physical pages in each of the free lists. 868 */ 869 DB_SHOW_COMMAND(freepages, db_show_freepages) 870 { 871 struct vm_freelist *fl; 872 int flind, oind, pind; 873 874 for (flind = 0; flind < vm_nfreelists; flind++) { 875 db_printf("FREE LIST %d:\n" 876 "\n ORDER (SIZE) | NUMBER" 877 "\n ", flind); 878 for (pind = 0; pind < VM_NFREEPOOL; pind++) 879 db_printf(" | POOL %d", pind); 880 db_printf("\n-- "); 881 for (pind = 0; pind < VM_NFREEPOOL; pind++) 882 db_printf("-- -- "); 883 db_printf("--\n"); 884 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 885 db_printf(" %2.2d (%6.6dK)", oind, 886 1 << (PAGE_SHIFT - 10 + oind)); 887 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 888 fl = vm_phys_free_queues[flind][pind]; 889 db_printf(" | %6.6d", fl[oind].lcnt); 890 } 891 db_printf("\n"); 892 } 893 db_printf("\n"); 894 } 895 } 896 #endif 897