1 /*- 2 * Copyright (c) 2002-2006 Rice University 3 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Alan L. Cox, 7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_ddb.h" 36 #include "opt_vm.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/lock.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/mutex.h> 44 #include <sys/queue.h> 45 #include <sys/sbuf.h> 46 #include <sys/sysctl.h> 47 #include <sys/vmmeter.h> 48 #include <sys/vnode.h> 49 50 #include <ddb/ddb.h> 51 52 #include <vm/vm.h> 53 #include <vm/vm_param.h> 54 #include <vm/vm_kern.h> 55 #include <vm/vm_object.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_phys.h> 58 #include <vm/vm_reserv.h> 59 60 /* 61 * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each 62 * domain. These extra lists are stored at the end of the regular 63 * free lists starting with VM_NFREELIST. 64 */ 65 #define VM_RAW_NFREELIST (VM_NFREELIST + VM_NDOMAIN - 1) 66 67 struct vm_freelist { 68 struct pglist pl; 69 int lcnt; 70 }; 71 72 struct vm_phys_seg { 73 vm_paddr_t start; 74 vm_paddr_t end; 75 vm_page_t first_page; 76 int domain; 77 struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER]; 78 }; 79 80 struct mem_affinity *mem_affinity; 81 82 static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 83 84 static int vm_phys_nsegs; 85 86 static struct vm_freelist 87 vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 88 static struct vm_freelist 89 (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER]; 90 91 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1; 92 93 static int cnt_prezero; 94 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 95 &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 96 97 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 98 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 99 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 100 101 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 102 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 103 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 104 105 #if VM_NDOMAIN > 1 106 static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS); 107 SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD, 108 NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists"); 109 #endif 110 111 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, 112 int domain); 113 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); 114 static int vm_phys_paddr_to_segind(vm_paddr_t pa); 115 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 116 int order); 117 118 /* 119 * Outputs the state of the physical memory allocator, specifically, 120 * the amount of physical memory in each free list. 121 */ 122 static int 123 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 124 { 125 struct sbuf sbuf; 126 struct vm_freelist *fl; 127 int error, flind, oind, pind; 128 129 error = sysctl_wire_old_buffer(req, 0); 130 if (error != 0) 131 return (error); 132 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 133 for (flind = 0; flind < vm_nfreelists; flind++) { 134 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 135 "\n ORDER (SIZE) | NUMBER" 136 "\n ", flind); 137 for (pind = 0; pind < VM_NFREEPOOL; pind++) 138 sbuf_printf(&sbuf, " | POOL %d", pind); 139 sbuf_printf(&sbuf, "\n-- "); 140 for (pind = 0; pind < VM_NFREEPOOL; pind++) 141 sbuf_printf(&sbuf, "-- -- "); 142 sbuf_printf(&sbuf, "--\n"); 143 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 144 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 145 1 << (PAGE_SHIFT - 10 + oind)); 146 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 147 fl = vm_phys_free_queues[flind][pind]; 148 sbuf_printf(&sbuf, " | %6d", fl[oind].lcnt); 149 } 150 sbuf_printf(&sbuf, "\n"); 151 } 152 } 153 error = sbuf_finish(&sbuf); 154 sbuf_delete(&sbuf); 155 return (error); 156 } 157 158 /* 159 * Outputs the set of physical memory segments. 160 */ 161 static int 162 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 163 { 164 struct sbuf sbuf; 165 struct vm_phys_seg *seg; 166 int error, segind; 167 168 error = sysctl_wire_old_buffer(req, 0); 169 if (error != 0) 170 return (error); 171 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 172 for (segind = 0; segind < vm_phys_nsegs; segind++) { 173 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 174 seg = &vm_phys_segs[segind]; 175 sbuf_printf(&sbuf, "start: %#jx\n", 176 (uintmax_t)seg->start); 177 sbuf_printf(&sbuf, "end: %#jx\n", 178 (uintmax_t)seg->end); 179 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 180 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 181 } 182 error = sbuf_finish(&sbuf); 183 sbuf_delete(&sbuf); 184 return (error); 185 } 186 187 #if VM_NDOMAIN > 1 188 /* 189 * Outputs the set of free list lookup lists. 190 */ 191 static int 192 sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS) 193 { 194 struct sbuf sbuf; 195 int domain, error, flind, ndomains; 196 197 error = sysctl_wire_old_buffer(req, 0); 198 if (error != 0) 199 return (error); 200 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 201 ndomains = vm_nfreelists - VM_NFREELIST + 1; 202 for (domain = 0; domain < ndomains; domain++) { 203 sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain); 204 for (flind = 0; flind < vm_nfreelists; flind++) 205 sbuf_printf(&sbuf, " [%d]:\t%p\n", flind, 206 vm_phys_lookup_lists[domain][flind]); 207 } 208 error = sbuf_finish(&sbuf); 209 sbuf_delete(&sbuf); 210 return (error); 211 } 212 #endif 213 214 /* 215 * Create a physical memory segment. 216 */ 217 static void 218 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain) 219 { 220 struct vm_phys_seg *seg; 221 #ifdef VM_PHYSSEG_SPARSE 222 long pages; 223 int segind; 224 225 pages = 0; 226 for (segind = 0; segind < vm_phys_nsegs; segind++) { 227 seg = &vm_phys_segs[segind]; 228 pages += atop(seg->end - seg->start); 229 } 230 #endif 231 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 232 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 233 seg = &vm_phys_segs[vm_phys_nsegs++]; 234 seg->start = start; 235 seg->end = end; 236 seg->domain = domain; 237 #ifdef VM_PHYSSEG_SPARSE 238 seg->first_page = &vm_page_array[pages]; 239 #else 240 seg->first_page = PHYS_TO_VM_PAGE(start); 241 #endif 242 #if VM_NDOMAIN > 1 243 if (flind == VM_FREELIST_DEFAULT && domain != 0) { 244 flind = VM_NFREELIST + (domain - 1); 245 if (flind >= vm_nfreelists) 246 vm_nfreelists = flind + 1; 247 } 248 #endif 249 seg->free_queues = &vm_phys_free_queues[flind]; 250 } 251 252 static void 253 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind) 254 { 255 int i; 256 257 if (mem_affinity == NULL) { 258 _vm_phys_create_seg(start, end, flind, 0); 259 return; 260 } 261 262 for (i = 0;; i++) { 263 if (mem_affinity[i].end == 0) 264 panic("Reached end of affinity info"); 265 if (mem_affinity[i].end <= start) 266 continue; 267 if (mem_affinity[i].start > start) 268 panic("No affinity info for start %jx", 269 (uintmax_t)start); 270 if (mem_affinity[i].end >= end) { 271 _vm_phys_create_seg(start, end, flind, 272 mem_affinity[i].domain); 273 break; 274 } 275 _vm_phys_create_seg(start, mem_affinity[i].end, flind, 276 mem_affinity[i].domain); 277 start = mem_affinity[i].end; 278 } 279 } 280 281 /* 282 * Initialize the physical memory allocator. 283 */ 284 void 285 vm_phys_init(void) 286 { 287 struct vm_freelist *fl; 288 int flind, i, oind, pind; 289 #if VM_NDOMAIN > 1 290 int ndomains, j; 291 #endif 292 293 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 294 #ifdef VM_FREELIST_ISADMA 295 if (phys_avail[i] < 16777216) { 296 if (phys_avail[i + 1] > 16777216) { 297 vm_phys_create_seg(phys_avail[i], 16777216, 298 VM_FREELIST_ISADMA); 299 vm_phys_create_seg(16777216, phys_avail[i + 1], 300 VM_FREELIST_DEFAULT); 301 } else { 302 vm_phys_create_seg(phys_avail[i], 303 phys_avail[i + 1], VM_FREELIST_ISADMA); 304 } 305 if (VM_FREELIST_ISADMA >= vm_nfreelists) 306 vm_nfreelists = VM_FREELIST_ISADMA + 1; 307 } else 308 #endif 309 #ifdef VM_FREELIST_HIGHMEM 310 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) { 311 if (phys_avail[i] < VM_HIGHMEM_ADDRESS) { 312 vm_phys_create_seg(phys_avail[i], 313 VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT); 314 vm_phys_create_seg(VM_HIGHMEM_ADDRESS, 315 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 316 } else { 317 vm_phys_create_seg(phys_avail[i], 318 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 319 } 320 if (VM_FREELIST_HIGHMEM >= vm_nfreelists) 321 vm_nfreelists = VM_FREELIST_HIGHMEM + 1; 322 } else 323 #endif 324 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1], 325 VM_FREELIST_DEFAULT); 326 } 327 for (flind = 0; flind < vm_nfreelists; flind++) { 328 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 329 fl = vm_phys_free_queues[flind][pind]; 330 for (oind = 0; oind < VM_NFREEORDER; oind++) 331 TAILQ_INIT(&fl[oind].pl); 332 } 333 } 334 #if VM_NDOMAIN > 1 335 /* 336 * Build a free list lookup list for each domain. All of the 337 * memory domain lists are inserted at the VM_FREELIST_DEFAULT 338 * index in a round-robin order starting with the current 339 * domain. 340 */ 341 ndomains = vm_nfreelists - VM_NFREELIST + 1; 342 for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++) 343 for (i = 0; i < ndomains; i++) 344 vm_phys_lookup_lists[i][flind] = 345 &vm_phys_free_queues[flind]; 346 for (i = 0; i < ndomains; i++) 347 for (j = 0; j < ndomains; j++) { 348 flind = (i + j) % ndomains; 349 if (flind == 0) 350 flind = VM_FREELIST_DEFAULT; 351 else 352 flind += VM_NFREELIST - 1; 353 vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] = 354 &vm_phys_free_queues[flind]; 355 } 356 for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST; 357 flind++) 358 for (i = 0; i < ndomains; i++) 359 vm_phys_lookup_lists[i][flind + ndomains - 1] = 360 &vm_phys_free_queues[flind]; 361 #else 362 for (flind = 0; flind < vm_nfreelists; flind++) 363 vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind]; 364 #endif 365 } 366 367 /* 368 * Split a contiguous, power of two-sized set of physical pages. 369 */ 370 static __inline void 371 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 372 { 373 vm_page_t m_buddy; 374 375 while (oind > order) { 376 oind--; 377 m_buddy = &m[1 << oind]; 378 KASSERT(m_buddy->order == VM_NFREEORDER, 379 ("vm_phys_split_pages: page %p has unexpected order %d", 380 m_buddy, m_buddy->order)); 381 m_buddy->order = oind; 382 TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq); 383 fl[oind].lcnt++; 384 } 385 } 386 387 /* 388 * Initialize a physical page and add it to the free lists. 389 */ 390 void 391 vm_phys_add_page(vm_paddr_t pa) 392 { 393 vm_page_t m; 394 395 cnt.v_page_count++; 396 m = vm_phys_paddr_to_vm_page(pa); 397 m->phys_addr = pa; 398 m->queue = PQ_NONE; 399 m->segind = vm_phys_paddr_to_segind(pa); 400 m->flags = PG_FREE; 401 KASSERT(m->order == VM_NFREEORDER, 402 ("vm_phys_add_page: page %p has unexpected order %d", 403 m, m->order)); 404 m->pool = VM_FREEPOOL_DEFAULT; 405 pmap_page_init(m); 406 mtx_lock(&vm_page_queue_free_mtx); 407 cnt.v_free_count++; 408 vm_phys_free_pages(m, 0); 409 mtx_unlock(&vm_page_queue_free_mtx); 410 } 411 412 /* 413 * Allocate a contiguous, power of two-sized set of physical pages 414 * from the free lists. 415 * 416 * The free page queues must be locked. 417 */ 418 vm_page_t 419 vm_phys_alloc_pages(int pool, int order) 420 { 421 vm_page_t m; 422 int flind; 423 424 for (flind = 0; flind < vm_nfreelists; flind++) { 425 m = vm_phys_alloc_freelist_pages(flind, pool, order); 426 if (m != NULL) 427 return (m); 428 } 429 return (NULL); 430 } 431 432 /* 433 * Find and dequeue a free page on the given free list, with the 434 * specified pool and order 435 */ 436 vm_page_t 437 vm_phys_alloc_freelist_pages(int flind, int pool, int order) 438 { 439 struct vm_freelist *fl; 440 struct vm_freelist *alt; 441 int domain, oind, pind; 442 vm_page_t m; 443 444 KASSERT(flind < VM_NFREELIST, 445 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind)); 446 KASSERT(pool < VM_NFREEPOOL, 447 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 448 KASSERT(order < VM_NFREEORDER, 449 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 450 451 #if VM_NDOMAIN > 1 452 domain = PCPU_GET(domain); 453 #else 454 domain = 0; 455 #endif 456 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 457 fl = (*vm_phys_lookup_lists[domain][flind])[pool]; 458 for (oind = order; oind < VM_NFREEORDER; oind++) { 459 m = TAILQ_FIRST(&fl[oind].pl); 460 if (m != NULL) { 461 TAILQ_REMOVE(&fl[oind].pl, m, pageq); 462 fl[oind].lcnt--; 463 m->order = VM_NFREEORDER; 464 vm_phys_split_pages(m, oind, fl, order); 465 return (m); 466 } 467 } 468 469 /* 470 * The given pool was empty. Find the largest 471 * contiguous, power-of-two-sized set of pages in any 472 * pool. Transfer these pages to the given pool, and 473 * use them to satisfy the allocation. 474 */ 475 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 476 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 477 alt = (*vm_phys_lookup_lists[domain][flind])[pind]; 478 m = TAILQ_FIRST(&alt[oind].pl); 479 if (m != NULL) { 480 TAILQ_REMOVE(&alt[oind].pl, m, pageq); 481 alt[oind].lcnt--; 482 m->order = VM_NFREEORDER; 483 vm_phys_set_pool(pool, m, oind); 484 vm_phys_split_pages(m, oind, fl, order); 485 return (m); 486 } 487 } 488 } 489 return (NULL); 490 } 491 492 /* 493 * Find the vm_page corresponding to the given physical address. 494 */ 495 vm_page_t 496 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 497 { 498 struct vm_phys_seg *seg; 499 int segind; 500 501 for (segind = 0; segind < vm_phys_nsegs; segind++) { 502 seg = &vm_phys_segs[segind]; 503 if (pa >= seg->start && pa < seg->end) 504 return (&seg->first_page[atop(pa - seg->start)]); 505 } 506 return (NULL); 507 } 508 509 /* 510 * Find the segment containing the given physical address. 511 */ 512 static int 513 vm_phys_paddr_to_segind(vm_paddr_t pa) 514 { 515 struct vm_phys_seg *seg; 516 int segind; 517 518 for (segind = 0; segind < vm_phys_nsegs; segind++) { 519 seg = &vm_phys_segs[segind]; 520 if (pa >= seg->start && pa < seg->end) 521 return (segind); 522 } 523 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 524 (uintmax_t)pa); 525 } 526 527 /* 528 * Free a contiguous, power of two-sized set of physical pages. 529 * 530 * The free page queues must be locked. 531 */ 532 void 533 vm_phys_free_pages(vm_page_t m, int order) 534 { 535 struct vm_freelist *fl; 536 struct vm_phys_seg *seg; 537 vm_paddr_t pa; 538 vm_page_t m_buddy; 539 540 KASSERT(m->order == VM_NFREEORDER, 541 ("vm_phys_free_pages: page %p has unexpected order %d", 542 m, m->order)); 543 KASSERT(m->pool < VM_NFREEPOOL, 544 ("vm_phys_free_pages: page %p has unexpected pool %d", 545 m, m->pool)); 546 KASSERT(order < VM_NFREEORDER, 547 ("vm_phys_free_pages: order %d is out of range", order)); 548 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 549 seg = &vm_phys_segs[m->segind]; 550 if (order < VM_NFREEORDER - 1) { 551 pa = VM_PAGE_TO_PHYS(m); 552 do { 553 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 554 if (pa < seg->start || pa >= seg->end) 555 break; 556 m_buddy = &seg->first_page[atop(pa - seg->start)]; 557 if (m_buddy->order != order) 558 break; 559 fl = (*seg->free_queues)[m_buddy->pool]; 560 TAILQ_REMOVE(&fl[order].pl, m_buddy, pageq); 561 fl[order].lcnt--; 562 m_buddy->order = VM_NFREEORDER; 563 if (m_buddy->pool != m->pool) 564 vm_phys_set_pool(m->pool, m_buddy, order); 565 order++; 566 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 567 m = &seg->first_page[atop(pa - seg->start)]; 568 } while (order < VM_NFREEORDER - 1); 569 } 570 m->order = order; 571 fl = (*seg->free_queues)[m->pool]; 572 TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq); 573 fl[order].lcnt++; 574 } 575 576 /* 577 * Free a contiguous, arbitrarily sized set of physical pages. 578 * 579 * The free page queues must be locked. 580 */ 581 void 582 vm_phys_free_contig(vm_page_t m, u_long npages) 583 { 584 u_int n; 585 int order; 586 587 /* 588 * Avoid unnecessary coalescing by freeing the pages in the largest 589 * possible power-of-two-sized subsets. 590 */ 591 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 592 for (;; npages -= n) { 593 /* 594 * Unsigned "min" is used here so that "order" is assigned 595 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 596 * or the low-order bits of its physical address are zero 597 * because the size of a physical address exceeds the size of 598 * a long. 599 */ 600 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 601 VM_NFREEORDER - 1); 602 n = 1 << order; 603 if (npages < n) 604 break; 605 vm_phys_free_pages(m, order); 606 m += n; 607 } 608 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 609 for (; npages > 0; npages -= n) { 610 order = flsl(npages) - 1; 611 n = 1 << order; 612 vm_phys_free_pages(m, order); 613 m += n; 614 } 615 } 616 617 /* 618 * Set the pool for a contiguous, power of two-sized set of physical pages. 619 */ 620 void 621 vm_phys_set_pool(int pool, vm_page_t m, int order) 622 { 623 vm_page_t m_tmp; 624 625 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 626 m_tmp->pool = pool; 627 } 628 629 /* 630 * Search for the given physical page "m" in the free lists. If the search 631 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 632 * FALSE, indicating that "m" is not in the free lists. 633 * 634 * The free page queues must be locked. 635 */ 636 boolean_t 637 vm_phys_unfree_page(vm_page_t m) 638 { 639 struct vm_freelist *fl; 640 struct vm_phys_seg *seg; 641 vm_paddr_t pa, pa_half; 642 vm_page_t m_set, m_tmp; 643 int order; 644 645 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 646 647 /* 648 * First, find the contiguous, power of two-sized set of free 649 * physical pages containing the given physical page "m" and 650 * assign it to "m_set". 651 */ 652 seg = &vm_phys_segs[m->segind]; 653 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 654 order < VM_NFREEORDER - 1; ) { 655 order++; 656 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 657 if (pa >= seg->start) 658 m_set = &seg->first_page[atop(pa - seg->start)]; 659 else 660 return (FALSE); 661 } 662 if (m_set->order < order) 663 return (FALSE); 664 if (m_set->order == VM_NFREEORDER) 665 return (FALSE); 666 KASSERT(m_set->order < VM_NFREEORDER, 667 ("vm_phys_unfree_page: page %p has unexpected order %d", 668 m_set, m_set->order)); 669 670 /* 671 * Next, remove "m_set" from the free lists. Finally, extract 672 * "m" from "m_set" using an iterative algorithm: While "m_set" 673 * is larger than a page, shrink "m_set" by returning the half 674 * of "m_set" that does not contain "m" to the free lists. 675 */ 676 fl = (*seg->free_queues)[m_set->pool]; 677 order = m_set->order; 678 TAILQ_REMOVE(&fl[order].pl, m_set, pageq); 679 fl[order].lcnt--; 680 m_set->order = VM_NFREEORDER; 681 while (order > 0) { 682 order--; 683 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 684 if (m->phys_addr < pa_half) 685 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 686 else { 687 m_tmp = m_set; 688 m_set = &seg->first_page[atop(pa_half - seg->start)]; 689 } 690 m_tmp->order = order; 691 TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq); 692 fl[order].lcnt++; 693 } 694 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 695 return (TRUE); 696 } 697 698 /* 699 * Try to zero one physical page. Used by an idle priority thread. 700 */ 701 boolean_t 702 vm_phys_zero_pages_idle(void) 703 { 704 static struct vm_freelist *fl = vm_phys_free_queues[0][0]; 705 static int flind, oind, pind; 706 vm_page_t m, m_tmp; 707 708 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 709 for (;;) { 710 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) { 711 for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 712 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 713 vm_phys_unfree_page(m_tmp); 714 cnt.v_free_count--; 715 mtx_unlock(&vm_page_queue_free_mtx); 716 pmap_zero_page_idle(m_tmp); 717 m_tmp->flags |= PG_ZERO; 718 mtx_lock(&vm_page_queue_free_mtx); 719 cnt.v_free_count++; 720 vm_phys_free_pages(m_tmp, 0); 721 vm_page_zero_count++; 722 cnt_prezero++; 723 return (TRUE); 724 } 725 } 726 } 727 oind++; 728 if (oind == VM_NFREEORDER) { 729 oind = 0; 730 pind++; 731 if (pind == VM_NFREEPOOL) { 732 pind = 0; 733 flind++; 734 if (flind == vm_nfreelists) 735 flind = 0; 736 } 737 fl = vm_phys_free_queues[flind][pind]; 738 } 739 } 740 } 741 742 /* 743 * Allocate a contiguous set of physical pages of the given size 744 * "npages" from the free lists. All of the physical pages must be at 745 * or above the given physical address "low" and below the given 746 * physical address "high". The given value "alignment" determines the 747 * alignment of the first physical page in the set. If the given value 748 * "boundary" is non-zero, then the set of physical pages cannot cross 749 * any physical address boundary that is a multiple of that value. Both 750 * "alignment" and "boundary" must be a power of two. 751 */ 752 vm_page_t 753 vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 754 u_long alignment, vm_paddr_t boundary) 755 { 756 struct vm_freelist *fl; 757 struct vm_phys_seg *seg; 758 struct vnode *vp; 759 vm_paddr_t pa, pa_last, size; 760 vm_page_t deferred_vdrop_list, m, m_ret; 761 u_long npages_end; 762 int domain, flind, i, oind, order, pind; 763 764 #if VM_NDOMAIN > 1 765 domain = PCPU_GET(domain); 766 #else 767 domain = 0; 768 #endif 769 size = npages << PAGE_SHIFT; 770 KASSERT(size != 0, 771 ("vm_phys_alloc_contig: size must not be 0")); 772 KASSERT((alignment & (alignment - 1)) == 0, 773 ("vm_phys_alloc_contig: alignment must be a power of 2")); 774 KASSERT((boundary & (boundary - 1)) == 0, 775 ("vm_phys_alloc_contig: boundary must be a power of 2")); 776 deferred_vdrop_list = NULL; 777 /* Compute the queue that is the best fit for npages. */ 778 for (order = 0; (1 << order) < npages; order++); 779 mtx_lock(&vm_page_queue_free_mtx); 780 #if VM_NRESERVLEVEL > 0 781 retry: 782 #endif 783 for (flind = 0; flind < vm_nfreelists; flind++) { 784 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { 785 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 786 fl = (*vm_phys_lookup_lists[domain][flind]) 787 [pind]; 788 TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) { 789 /* 790 * A free list may contain physical pages 791 * from one or more segments. 792 */ 793 seg = &vm_phys_segs[m_ret->segind]; 794 if (seg->start > high || 795 low >= seg->end) 796 continue; 797 798 /* 799 * Is the size of this allocation request 800 * larger than the largest block size? 801 */ 802 if (order >= VM_NFREEORDER) { 803 /* 804 * Determine if a sufficient number 805 * of subsequent blocks to satisfy 806 * the allocation request are free. 807 */ 808 pa = VM_PAGE_TO_PHYS(m_ret); 809 pa_last = pa + size; 810 for (;;) { 811 pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); 812 if (pa >= pa_last) 813 break; 814 if (pa < seg->start || 815 pa >= seg->end) 816 break; 817 m = &seg->first_page[atop(pa - seg->start)]; 818 if (m->order != VM_NFREEORDER - 1) 819 break; 820 } 821 /* If not, continue to the next block. */ 822 if (pa < pa_last) 823 continue; 824 } 825 826 /* 827 * Determine if the blocks are within the given range, 828 * satisfy the given alignment, and do not cross the 829 * given boundary. 830 */ 831 pa = VM_PAGE_TO_PHYS(m_ret); 832 if (pa >= low && 833 pa + size <= high && 834 (pa & (alignment - 1)) == 0 && 835 ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) 836 goto done; 837 } 838 } 839 } 840 } 841 #if VM_NRESERVLEVEL > 0 842 if (vm_reserv_reclaim_contig(size, low, high, alignment, boundary)) 843 goto retry; 844 #endif 845 mtx_unlock(&vm_page_queue_free_mtx); 846 return (NULL); 847 done: 848 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 849 fl = (*seg->free_queues)[m->pool]; 850 TAILQ_REMOVE(&fl[m->order].pl, m, pageq); 851 fl[m->order].lcnt--; 852 m->order = VM_NFREEORDER; 853 } 854 if (m_ret->pool != VM_FREEPOOL_DEFAULT) 855 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 856 fl = (*seg->free_queues)[m_ret->pool]; 857 vm_phys_split_pages(m_ret, oind, fl, order); 858 for (i = 0; i < npages; i++) { 859 m = &m_ret[i]; 860 vp = vm_page_alloc_init(m); 861 if (vp != NULL) { 862 /* 863 * Enqueue the vnode for deferred vdrop(). 864 * 865 * Unmanaged pages don't use "pageq", so it 866 * can be safely abused to construct a short- 867 * lived queue of vnodes. 868 */ 869 m->pageq.tqe_prev = (void *)vp; 870 m->pageq.tqe_next = deferred_vdrop_list; 871 deferred_vdrop_list = m; 872 } 873 } 874 /* Return excess pages to the free lists. */ 875 npages_end = roundup2(npages, 1 << imin(oind, order)); 876 if (npages < npages_end) 877 vm_phys_free_contig(&m_ret[npages], npages_end - npages); 878 mtx_unlock(&vm_page_queue_free_mtx); 879 while (deferred_vdrop_list != NULL) { 880 vdrop((struct vnode *)deferred_vdrop_list->pageq.tqe_prev); 881 deferred_vdrop_list = deferred_vdrop_list->pageq.tqe_next; 882 } 883 return (m_ret); 884 } 885 886 #ifdef DDB 887 /* 888 * Show the number of physical pages in each of the free lists. 889 */ 890 DB_SHOW_COMMAND(freepages, db_show_freepages) 891 { 892 struct vm_freelist *fl; 893 int flind, oind, pind; 894 895 for (flind = 0; flind < vm_nfreelists; flind++) { 896 db_printf("FREE LIST %d:\n" 897 "\n ORDER (SIZE) | NUMBER" 898 "\n ", flind); 899 for (pind = 0; pind < VM_NFREEPOOL; pind++) 900 db_printf(" | POOL %d", pind); 901 db_printf("\n-- "); 902 for (pind = 0; pind < VM_NFREEPOOL; pind++) 903 db_printf("-- -- "); 904 db_printf("--\n"); 905 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 906 db_printf(" %2.2d (%6.6dK)", oind, 907 1 << (PAGE_SHIFT - 10 + oind)); 908 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 909 fl = vm_phys_free_queues[flind][pind]; 910 db_printf(" | %6.6d", fl[oind].lcnt); 911 } 912 db_printf("\n"); 913 } 914 db_printf("\n"); 915 } 916 } 917 #endif 918