1 /*- 2 * Copyright (c) 2002-2006 Rice University 3 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Alan L. Cox, 7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Physical memory system implementation 34 * 35 * Any external functions defined by this module are only to be used by the 36 * virtual memory system. 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_ddb.h" 43 #include "opt_vm.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/lock.h> 48 #include <sys/kernel.h> 49 #include <sys/malloc.h> 50 #include <sys/mutex.h> 51 #include <sys/queue.h> 52 #include <sys/sbuf.h> 53 #include <sys/sysctl.h> 54 #include <sys/vmmeter.h> 55 56 #include <ddb/ddb.h> 57 58 #include <vm/vm.h> 59 #include <vm/vm_param.h> 60 #include <vm/vm_kern.h> 61 #include <vm/vm_object.h> 62 #include <vm/vm_page.h> 63 #include <vm/vm_phys.h> 64 65 /* 66 * VM_FREELIST_DEFAULT is split into VM_NDOMAIN lists, one for each 67 * domain. These extra lists are stored at the end of the regular 68 * free lists starting with VM_NFREELIST. 69 */ 70 #define VM_RAW_NFREELIST (VM_NFREELIST + VM_NDOMAIN - 1) 71 72 struct vm_freelist { 73 struct pglist pl; 74 int lcnt; 75 }; 76 77 struct vm_phys_seg { 78 vm_paddr_t start; 79 vm_paddr_t end; 80 vm_page_t first_page; 81 int domain; 82 struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER]; 83 }; 84 85 struct mem_affinity *mem_affinity; 86 87 static struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 88 89 static int vm_phys_nsegs; 90 91 #define VM_PHYS_FICTITIOUS_NSEGS 8 92 static struct vm_phys_fictitious_seg { 93 vm_paddr_t start; 94 vm_paddr_t end; 95 vm_page_t first_page; 96 } vm_phys_fictitious_segs[VM_PHYS_FICTITIOUS_NSEGS]; 97 static struct mtx vm_phys_fictitious_reg_mtx; 98 MALLOC_DEFINE(M_FICT_PAGES, "", ""); 99 100 static struct vm_freelist 101 vm_phys_free_queues[VM_RAW_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 102 static struct vm_freelist 103 (*vm_phys_lookup_lists[VM_NDOMAIN][VM_RAW_NFREELIST])[VM_NFREEPOOL][VM_NFREEORDER]; 104 105 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1; 106 107 static int cnt_prezero; 108 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 109 &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 110 111 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 112 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 113 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 114 115 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 116 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 117 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 118 119 #if VM_NDOMAIN > 1 120 static int sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS); 121 SYSCTL_OID(_vm, OID_AUTO, phys_lookup_lists, CTLTYPE_STRING | CTLFLAG_RD, 122 NULL, 0, sysctl_vm_phys_lookup_lists, "A", "Phys Lookup Lists"); 123 #endif 124 125 static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, 126 int order); 127 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, 128 int domain); 129 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); 130 static int vm_phys_paddr_to_segind(vm_paddr_t pa); 131 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 132 int order); 133 134 /* 135 * Outputs the state of the physical memory allocator, specifically, 136 * the amount of physical memory in each free list. 137 */ 138 static int 139 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 140 { 141 struct sbuf sbuf; 142 struct vm_freelist *fl; 143 int error, flind, oind, pind; 144 145 error = sysctl_wire_old_buffer(req, 0); 146 if (error != 0) 147 return (error); 148 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 149 for (flind = 0; flind < vm_nfreelists; flind++) { 150 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 151 "\n ORDER (SIZE) | NUMBER" 152 "\n ", flind); 153 for (pind = 0; pind < VM_NFREEPOOL; pind++) 154 sbuf_printf(&sbuf, " | POOL %d", pind); 155 sbuf_printf(&sbuf, "\n-- "); 156 for (pind = 0; pind < VM_NFREEPOOL; pind++) 157 sbuf_printf(&sbuf, "-- -- "); 158 sbuf_printf(&sbuf, "--\n"); 159 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 160 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 161 1 << (PAGE_SHIFT - 10 + oind)); 162 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 163 fl = vm_phys_free_queues[flind][pind]; 164 sbuf_printf(&sbuf, " | %6d", fl[oind].lcnt); 165 } 166 sbuf_printf(&sbuf, "\n"); 167 } 168 } 169 error = sbuf_finish(&sbuf); 170 sbuf_delete(&sbuf); 171 return (error); 172 } 173 174 /* 175 * Outputs the set of physical memory segments. 176 */ 177 static int 178 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 179 { 180 struct sbuf sbuf; 181 struct vm_phys_seg *seg; 182 int error, segind; 183 184 error = sysctl_wire_old_buffer(req, 0); 185 if (error != 0) 186 return (error); 187 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 188 for (segind = 0; segind < vm_phys_nsegs; segind++) { 189 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 190 seg = &vm_phys_segs[segind]; 191 sbuf_printf(&sbuf, "start: %#jx\n", 192 (uintmax_t)seg->start); 193 sbuf_printf(&sbuf, "end: %#jx\n", 194 (uintmax_t)seg->end); 195 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 196 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 197 } 198 error = sbuf_finish(&sbuf); 199 sbuf_delete(&sbuf); 200 return (error); 201 } 202 203 #if VM_NDOMAIN > 1 204 /* 205 * Outputs the set of free list lookup lists. 206 */ 207 static int 208 sysctl_vm_phys_lookup_lists(SYSCTL_HANDLER_ARGS) 209 { 210 struct sbuf sbuf; 211 int domain, error, flind, ndomains; 212 213 error = sysctl_wire_old_buffer(req, 0); 214 if (error != 0) 215 return (error); 216 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 217 ndomains = vm_nfreelists - VM_NFREELIST + 1; 218 for (domain = 0; domain < ndomains; domain++) { 219 sbuf_printf(&sbuf, "\nDOMAIN %d:\n\n", domain); 220 for (flind = 0; flind < vm_nfreelists; flind++) 221 sbuf_printf(&sbuf, " [%d]:\t%p\n", flind, 222 vm_phys_lookup_lists[domain][flind]); 223 } 224 error = sbuf_finish(&sbuf); 225 sbuf_delete(&sbuf); 226 return (error); 227 } 228 #endif 229 230 /* 231 * Create a physical memory segment. 232 */ 233 static void 234 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain) 235 { 236 struct vm_phys_seg *seg; 237 #ifdef VM_PHYSSEG_SPARSE 238 long pages; 239 int segind; 240 241 pages = 0; 242 for (segind = 0; segind < vm_phys_nsegs; segind++) { 243 seg = &vm_phys_segs[segind]; 244 pages += atop(seg->end - seg->start); 245 } 246 #endif 247 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 248 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 249 seg = &vm_phys_segs[vm_phys_nsegs++]; 250 seg->start = start; 251 seg->end = end; 252 seg->domain = domain; 253 #ifdef VM_PHYSSEG_SPARSE 254 seg->first_page = &vm_page_array[pages]; 255 #else 256 seg->first_page = PHYS_TO_VM_PAGE(start); 257 #endif 258 #if VM_NDOMAIN > 1 259 if (flind == VM_FREELIST_DEFAULT && domain != 0) { 260 flind = VM_NFREELIST + (domain - 1); 261 if (flind >= vm_nfreelists) 262 vm_nfreelists = flind + 1; 263 } 264 #endif 265 seg->free_queues = &vm_phys_free_queues[flind]; 266 } 267 268 static void 269 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind) 270 { 271 int i; 272 273 if (mem_affinity == NULL) { 274 _vm_phys_create_seg(start, end, flind, 0); 275 return; 276 } 277 278 for (i = 0;; i++) { 279 if (mem_affinity[i].end == 0) 280 panic("Reached end of affinity info"); 281 if (mem_affinity[i].end <= start) 282 continue; 283 if (mem_affinity[i].start > start) 284 panic("No affinity info for start %jx", 285 (uintmax_t)start); 286 if (mem_affinity[i].end >= end) { 287 _vm_phys_create_seg(start, end, flind, 288 mem_affinity[i].domain); 289 break; 290 } 291 _vm_phys_create_seg(start, mem_affinity[i].end, flind, 292 mem_affinity[i].domain); 293 start = mem_affinity[i].end; 294 } 295 } 296 297 /* 298 * Initialize the physical memory allocator. 299 */ 300 void 301 vm_phys_init(void) 302 { 303 struct vm_freelist *fl; 304 int flind, i, oind, pind; 305 #if VM_NDOMAIN > 1 306 int ndomains, j; 307 #endif 308 309 for (i = 0; phys_avail[i + 1] != 0; i += 2) { 310 #ifdef VM_FREELIST_ISADMA 311 if (phys_avail[i] < 16777216) { 312 if (phys_avail[i + 1] > 16777216) { 313 vm_phys_create_seg(phys_avail[i], 16777216, 314 VM_FREELIST_ISADMA); 315 vm_phys_create_seg(16777216, phys_avail[i + 1], 316 VM_FREELIST_DEFAULT); 317 } else { 318 vm_phys_create_seg(phys_avail[i], 319 phys_avail[i + 1], VM_FREELIST_ISADMA); 320 } 321 if (VM_FREELIST_ISADMA >= vm_nfreelists) 322 vm_nfreelists = VM_FREELIST_ISADMA + 1; 323 } else 324 #endif 325 #ifdef VM_FREELIST_HIGHMEM 326 if (phys_avail[i + 1] > VM_HIGHMEM_ADDRESS) { 327 if (phys_avail[i] < VM_HIGHMEM_ADDRESS) { 328 vm_phys_create_seg(phys_avail[i], 329 VM_HIGHMEM_ADDRESS, VM_FREELIST_DEFAULT); 330 vm_phys_create_seg(VM_HIGHMEM_ADDRESS, 331 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 332 } else { 333 vm_phys_create_seg(phys_avail[i], 334 phys_avail[i + 1], VM_FREELIST_HIGHMEM); 335 } 336 if (VM_FREELIST_HIGHMEM >= vm_nfreelists) 337 vm_nfreelists = VM_FREELIST_HIGHMEM + 1; 338 } else 339 #endif 340 vm_phys_create_seg(phys_avail[i], phys_avail[i + 1], 341 VM_FREELIST_DEFAULT); 342 } 343 for (flind = 0; flind < vm_nfreelists; flind++) { 344 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 345 fl = vm_phys_free_queues[flind][pind]; 346 for (oind = 0; oind < VM_NFREEORDER; oind++) 347 TAILQ_INIT(&fl[oind].pl); 348 } 349 } 350 #if VM_NDOMAIN > 1 351 /* 352 * Build a free list lookup list for each domain. All of the 353 * memory domain lists are inserted at the VM_FREELIST_DEFAULT 354 * index in a round-robin order starting with the current 355 * domain. 356 */ 357 ndomains = vm_nfreelists - VM_NFREELIST + 1; 358 for (flind = 0; flind < VM_FREELIST_DEFAULT; flind++) 359 for (i = 0; i < ndomains; i++) 360 vm_phys_lookup_lists[i][flind] = 361 &vm_phys_free_queues[flind]; 362 for (i = 0; i < ndomains; i++) 363 for (j = 0; j < ndomains; j++) { 364 flind = (i + j) % ndomains; 365 if (flind == 0) 366 flind = VM_FREELIST_DEFAULT; 367 else 368 flind += VM_NFREELIST - 1; 369 vm_phys_lookup_lists[i][VM_FREELIST_DEFAULT + j] = 370 &vm_phys_free_queues[flind]; 371 } 372 for (flind = VM_FREELIST_DEFAULT + 1; flind < VM_NFREELIST; 373 flind++) 374 for (i = 0; i < ndomains; i++) 375 vm_phys_lookup_lists[i][flind + ndomains - 1] = 376 &vm_phys_free_queues[flind]; 377 #else 378 for (flind = 0; flind < vm_nfreelists; flind++) 379 vm_phys_lookup_lists[0][flind] = &vm_phys_free_queues[flind]; 380 #endif 381 382 mtx_init(&vm_phys_fictitious_reg_mtx, "vmfctr", NULL, MTX_DEF); 383 } 384 385 /* 386 * Split a contiguous, power of two-sized set of physical pages. 387 */ 388 static __inline void 389 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 390 { 391 vm_page_t m_buddy; 392 393 while (oind > order) { 394 oind--; 395 m_buddy = &m[1 << oind]; 396 KASSERT(m_buddy->order == VM_NFREEORDER, 397 ("vm_phys_split_pages: page %p has unexpected order %d", 398 m_buddy, m_buddy->order)); 399 m_buddy->order = oind; 400 TAILQ_INSERT_HEAD(&fl[oind].pl, m_buddy, pageq); 401 fl[oind].lcnt++; 402 } 403 } 404 405 /* 406 * Initialize a physical page and add it to the free lists. 407 */ 408 void 409 vm_phys_add_page(vm_paddr_t pa) 410 { 411 vm_page_t m; 412 413 cnt.v_page_count++; 414 m = vm_phys_paddr_to_vm_page(pa); 415 m->phys_addr = pa; 416 m->queue = PQ_NONE; 417 m->segind = vm_phys_paddr_to_segind(pa); 418 m->flags = PG_FREE; 419 KASSERT(m->order == VM_NFREEORDER, 420 ("vm_phys_add_page: page %p has unexpected order %d", 421 m, m->order)); 422 m->pool = VM_FREEPOOL_DEFAULT; 423 pmap_page_init(m); 424 mtx_lock(&vm_page_queue_free_mtx); 425 cnt.v_free_count++; 426 vm_phys_free_pages(m, 0); 427 mtx_unlock(&vm_page_queue_free_mtx); 428 } 429 430 /* 431 * Allocate a contiguous, power of two-sized set of physical pages 432 * from the free lists. 433 * 434 * The free page queues must be locked. 435 */ 436 vm_page_t 437 vm_phys_alloc_pages(int pool, int order) 438 { 439 vm_page_t m; 440 int domain, flind; 441 442 KASSERT(pool < VM_NFREEPOOL, 443 ("vm_phys_alloc_pages: pool %d is out of range", pool)); 444 KASSERT(order < VM_NFREEORDER, 445 ("vm_phys_alloc_pages: order %d is out of range", order)); 446 447 #if VM_NDOMAIN > 1 448 domain = PCPU_GET(domain); 449 #else 450 domain = 0; 451 #endif 452 for (flind = 0; flind < vm_nfreelists; flind++) { 453 m = vm_phys_alloc_domain_pages(domain, flind, pool, order); 454 if (m != NULL) 455 return (m); 456 } 457 return (NULL); 458 } 459 460 /* 461 * Find and dequeue a free page on the given free list, with the 462 * specified pool and order 463 */ 464 vm_page_t 465 vm_phys_alloc_freelist_pages(int flind, int pool, int order) 466 { 467 #if VM_NDOMAIN > 1 468 vm_page_t m; 469 int i, ndomains; 470 #endif 471 int domain; 472 473 KASSERT(flind < VM_NFREELIST, 474 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind)); 475 KASSERT(pool < VM_NFREEPOOL, 476 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 477 KASSERT(order < VM_NFREEORDER, 478 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 479 480 #if VM_NDOMAIN > 1 481 /* 482 * This routine expects to be called with a VM_FREELIST_* constant. 483 * On a system with multiple domains we need to adjust the flind 484 * appropriately. If it is for VM_FREELIST_DEFAULT we need to 485 * iterate over the per-domain lists. 486 */ 487 domain = PCPU_GET(domain); 488 ndomains = vm_nfreelists - VM_NFREELIST + 1; 489 if (flind == VM_FREELIST_DEFAULT) { 490 m = NULL; 491 for (i = 0; i < ndomains; i++, flind++) { 492 m = vm_phys_alloc_domain_pages(domain, flind, pool, 493 order); 494 if (m != NULL) 495 break; 496 } 497 return (m); 498 } else if (flind > VM_FREELIST_DEFAULT) 499 flind += ndomains - 1; 500 #else 501 domain = 0; 502 #endif 503 return (vm_phys_alloc_domain_pages(domain, flind, pool, order)); 504 } 505 506 static vm_page_t 507 vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order) 508 { 509 struct vm_freelist *fl; 510 struct vm_freelist *alt; 511 int oind, pind; 512 vm_page_t m; 513 514 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 515 fl = (*vm_phys_lookup_lists[domain][flind])[pool]; 516 for (oind = order; oind < VM_NFREEORDER; oind++) { 517 m = TAILQ_FIRST(&fl[oind].pl); 518 if (m != NULL) { 519 TAILQ_REMOVE(&fl[oind].pl, m, pageq); 520 fl[oind].lcnt--; 521 m->order = VM_NFREEORDER; 522 vm_phys_split_pages(m, oind, fl, order); 523 return (m); 524 } 525 } 526 527 /* 528 * The given pool was empty. Find the largest 529 * contiguous, power-of-two-sized set of pages in any 530 * pool. Transfer these pages to the given pool, and 531 * use them to satisfy the allocation. 532 */ 533 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 534 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 535 alt = (*vm_phys_lookup_lists[domain][flind])[pind]; 536 m = TAILQ_FIRST(&alt[oind].pl); 537 if (m != NULL) { 538 TAILQ_REMOVE(&alt[oind].pl, m, pageq); 539 alt[oind].lcnt--; 540 m->order = VM_NFREEORDER; 541 vm_phys_set_pool(pool, m, oind); 542 vm_phys_split_pages(m, oind, fl, order); 543 return (m); 544 } 545 } 546 } 547 return (NULL); 548 } 549 550 /* 551 * Find the vm_page corresponding to the given physical address. 552 */ 553 vm_page_t 554 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 555 { 556 struct vm_phys_seg *seg; 557 int segind; 558 559 for (segind = 0; segind < vm_phys_nsegs; segind++) { 560 seg = &vm_phys_segs[segind]; 561 if (pa >= seg->start && pa < seg->end) 562 return (&seg->first_page[atop(pa - seg->start)]); 563 } 564 return (NULL); 565 } 566 567 vm_page_t 568 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 569 { 570 struct vm_phys_fictitious_seg *seg; 571 vm_page_t m; 572 int segind; 573 574 m = NULL; 575 for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) { 576 seg = &vm_phys_fictitious_segs[segind]; 577 if (pa >= seg->start && pa < seg->end) { 578 m = &seg->first_page[atop(pa - seg->start)]; 579 KASSERT((m->flags & PG_FICTITIOUS) != 0, 580 ("%p not fictitious", m)); 581 break; 582 } 583 } 584 return (m); 585 } 586 587 int 588 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 589 vm_memattr_t memattr) 590 { 591 struct vm_phys_fictitious_seg *seg; 592 vm_page_t fp; 593 long i, page_count; 594 int segind; 595 #ifdef VM_PHYSSEG_DENSE 596 long pi; 597 boolean_t malloced; 598 #endif 599 600 page_count = (end - start) / PAGE_SIZE; 601 602 #ifdef VM_PHYSSEG_DENSE 603 pi = atop(start); 604 if (pi >= first_page && atop(end) < vm_page_array_size) { 605 fp = &vm_page_array[pi - first_page]; 606 malloced = FALSE; 607 } else 608 #endif 609 { 610 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 611 M_WAITOK | M_ZERO); 612 #ifdef VM_PHYSSEG_DENSE 613 malloced = TRUE; 614 #endif 615 } 616 for (i = 0; i < page_count; i++) { 617 vm_page_initfake(&fp[i], start + PAGE_SIZE * i, memattr); 618 pmap_page_init(&fp[i]); 619 fp[i].oflags &= ~(VPO_BUSY | VPO_UNMANAGED); 620 } 621 mtx_lock(&vm_phys_fictitious_reg_mtx); 622 for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) { 623 seg = &vm_phys_fictitious_segs[segind]; 624 if (seg->start == 0 && seg->end == 0) { 625 seg->start = start; 626 seg->end = end; 627 seg->first_page = fp; 628 mtx_unlock(&vm_phys_fictitious_reg_mtx); 629 return (0); 630 } 631 } 632 mtx_unlock(&vm_phys_fictitious_reg_mtx); 633 #ifdef VM_PHYSSEG_DENSE 634 if (malloced) 635 #endif 636 free(fp, M_FICT_PAGES); 637 return (EBUSY); 638 } 639 640 void 641 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 642 { 643 struct vm_phys_fictitious_seg *seg; 644 vm_page_t fp; 645 int segind; 646 #ifdef VM_PHYSSEG_DENSE 647 long pi; 648 #endif 649 650 #ifdef VM_PHYSSEG_DENSE 651 pi = atop(start); 652 #endif 653 654 mtx_lock(&vm_phys_fictitious_reg_mtx); 655 for (segind = 0; segind < VM_PHYS_FICTITIOUS_NSEGS; segind++) { 656 seg = &vm_phys_fictitious_segs[segind]; 657 if (seg->start == start && seg->end == end) { 658 seg->start = seg->end = 0; 659 fp = seg->first_page; 660 seg->first_page = NULL; 661 mtx_unlock(&vm_phys_fictitious_reg_mtx); 662 #ifdef VM_PHYSSEG_DENSE 663 if (pi < first_page || atop(end) >= vm_page_array_size) 664 #endif 665 free(fp, M_FICT_PAGES); 666 return; 667 } 668 } 669 mtx_unlock(&vm_phys_fictitious_reg_mtx); 670 KASSERT(0, ("Unregistering not registered fictitious range")); 671 } 672 673 /* 674 * Find the segment containing the given physical address. 675 */ 676 static int 677 vm_phys_paddr_to_segind(vm_paddr_t pa) 678 { 679 struct vm_phys_seg *seg; 680 int segind; 681 682 for (segind = 0; segind < vm_phys_nsegs; segind++) { 683 seg = &vm_phys_segs[segind]; 684 if (pa >= seg->start && pa < seg->end) 685 return (segind); 686 } 687 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 688 (uintmax_t)pa); 689 } 690 691 /* 692 * Free a contiguous, power of two-sized set of physical pages. 693 * 694 * The free page queues must be locked. 695 */ 696 void 697 vm_phys_free_pages(vm_page_t m, int order) 698 { 699 struct vm_freelist *fl; 700 struct vm_phys_seg *seg; 701 vm_paddr_t pa; 702 vm_page_t m_buddy; 703 704 KASSERT(m->order == VM_NFREEORDER, 705 ("vm_phys_free_pages: page %p has unexpected order %d", 706 m, m->order)); 707 KASSERT(m->pool < VM_NFREEPOOL, 708 ("vm_phys_free_pages: page %p has unexpected pool %d", 709 m, m->pool)); 710 KASSERT(order < VM_NFREEORDER, 711 ("vm_phys_free_pages: order %d is out of range", order)); 712 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 713 seg = &vm_phys_segs[m->segind]; 714 if (order < VM_NFREEORDER - 1) { 715 pa = VM_PAGE_TO_PHYS(m); 716 do { 717 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 718 if (pa < seg->start || pa >= seg->end) 719 break; 720 m_buddy = &seg->first_page[atop(pa - seg->start)]; 721 if (m_buddy->order != order) 722 break; 723 fl = (*seg->free_queues)[m_buddy->pool]; 724 TAILQ_REMOVE(&fl[order].pl, m_buddy, pageq); 725 fl[order].lcnt--; 726 m_buddy->order = VM_NFREEORDER; 727 if (m_buddy->pool != m->pool) 728 vm_phys_set_pool(m->pool, m_buddy, order); 729 order++; 730 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 731 m = &seg->first_page[atop(pa - seg->start)]; 732 } while (order < VM_NFREEORDER - 1); 733 } 734 m->order = order; 735 fl = (*seg->free_queues)[m->pool]; 736 TAILQ_INSERT_TAIL(&fl[order].pl, m, pageq); 737 fl[order].lcnt++; 738 } 739 740 /* 741 * Free a contiguous, arbitrarily sized set of physical pages. 742 * 743 * The free page queues must be locked. 744 */ 745 void 746 vm_phys_free_contig(vm_page_t m, u_long npages) 747 { 748 u_int n; 749 int order; 750 751 /* 752 * Avoid unnecessary coalescing by freeing the pages in the largest 753 * possible power-of-two-sized subsets. 754 */ 755 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 756 for (;; npages -= n) { 757 /* 758 * Unsigned "min" is used here so that "order" is assigned 759 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 760 * or the low-order bits of its physical address are zero 761 * because the size of a physical address exceeds the size of 762 * a long. 763 */ 764 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 765 VM_NFREEORDER - 1); 766 n = 1 << order; 767 if (npages < n) 768 break; 769 vm_phys_free_pages(m, order); 770 m += n; 771 } 772 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 773 for (; npages > 0; npages -= n) { 774 order = flsl(npages) - 1; 775 n = 1 << order; 776 vm_phys_free_pages(m, order); 777 m += n; 778 } 779 } 780 781 /* 782 * Set the pool for a contiguous, power of two-sized set of physical pages. 783 */ 784 void 785 vm_phys_set_pool(int pool, vm_page_t m, int order) 786 { 787 vm_page_t m_tmp; 788 789 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 790 m_tmp->pool = pool; 791 } 792 793 /* 794 * Search for the given physical page "m" in the free lists. If the search 795 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 796 * FALSE, indicating that "m" is not in the free lists. 797 * 798 * The free page queues must be locked. 799 */ 800 boolean_t 801 vm_phys_unfree_page(vm_page_t m) 802 { 803 struct vm_freelist *fl; 804 struct vm_phys_seg *seg; 805 vm_paddr_t pa, pa_half; 806 vm_page_t m_set, m_tmp; 807 int order; 808 809 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 810 811 /* 812 * First, find the contiguous, power of two-sized set of free 813 * physical pages containing the given physical page "m" and 814 * assign it to "m_set". 815 */ 816 seg = &vm_phys_segs[m->segind]; 817 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 818 order < VM_NFREEORDER - 1; ) { 819 order++; 820 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 821 if (pa >= seg->start) 822 m_set = &seg->first_page[atop(pa - seg->start)]; 823 else 824 return (FALSE); 825 } 826 if (m_set->order < order) 827 return (FALSE); 828 if (m_set->order == VM_NFREEORDER) 829 return (FALSE); 830 KASSERT(m_set->order < VM_NFREEORDER, 831 ("vm_phys_unfree_page: page %p has unexpected order %d", 832 m_set, m_set->order)); 833 834 /* 835 * Next, remove "m_set" from the free lists. Finally, extract 836 * "m" from "m_set" using an iterative algorithm: While "m_set" 837 * is larger than a page, shrink "m_set" by returning the half 838 * of "m_set" that does not contain "m" to the free lists. 839 */ 840 fl = (*seg->free_queues)[m_set->pool]; 841 order = m_set->order; 842 TAILQ_REMOVE(&fl[order].pl, m_set, pageq); 843 fl[order].lcnt--; 844 m_set->order = VM_NFREEORDER; 845 while (order > 0) { 846 order--; 847 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 848 if (m->phys_addr < pa_half) 849 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 850 else { 851 m_tmp = m_set; 852 m_set = &seg->first_page[atop(pa_half - seg->start)]; 853 } 854 m_tmp->order = order; 855 TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq); 856 fl[order].lcnt++; 857 } 858 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 859 return (TRUE); 860 } 861 862 /* 863 * Try to zero one physical page. Used by an idle priority thread. 864 */ 865 boolean_t 866 vm_phys_zero_pages_idle(void) 867 { 868 static struct vm_freelist *fl = vm_phys_free_queues[0][0]; 869 static int flind, oind, pind; 870 vm_page_t m, m_tmp; 871 872 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 873 for (;;) { 874 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) { 875 for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 876 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 877 vm_phys_unfree_page(m_tmp); 878 cnt.v_free_count--; 879 mtx_unlock(&vm_page_queue_free_mtx); 880 pmap_zero_page_idle(m_tmp); 881 m_tmp->flags |= PG_ZERO; 882 mtx_lock(&vm_page_queue_free_mtx); 883 cnt.v_free_count++; 884 vm_phys_free_pages(m_tmp, 0); 885 vm_page_zero_count++; 886 cnt_prezero++; 887 return (TRUE); 888 } 889 } 890 } 891 oind++; 892 if (oind == VM_NFREEORDER) { 893 oind = 0; 894 pind++; 895 if (pind == VM_NFREEPOOL) { 896 pind = 0; 897 flind++; 898 if (flind == vm_nfreelists) 899 flind = 0; 900 } 901 fl = vm_phys_free_queues[flind][pind]; 902 } 903 } 904 } 905 906 /* 907 * Allocate a contiguous set of physical pages of the given size 908 * "npages" from the free lists. All of the physical pages must be at 909 * or above the given physical address "low" and below the given 910 * physical address "high". The given value "alignment" determines the 911 * alignment of the first physical page in the set. If the given value 912 * "boundary" is non-zero, then the set of physical pages cannot cross 913 * any physical address boundary that is a multiple of that value. Both 914 * "alignment" and "boundary" must be a power of two. 915 */ 916 vm_page_t 917 vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 918 u_long alignment, vm_paddr_t boundary) 919 { 920 struct vm_freelist *fl; 921 struct vm_phys_seg *seg; 922 vm_paddr_t pa, pa_last, size; 923 vm_page_t m, m_ret; 924 u_long npages_end; 925 int domain, flind, oind, order, pind; 926 927 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 928 #if VM_NDOMAIN > 1 929 domain = PCPU_GET(domain); 930 #else 931 domain = 0; 932 #endif 933 size = npages << PAGE_SHIFT; 934 KASSERT(size != 0, 935 ("vm_phys_alloc_contig: size must not be 0")); 936 KASSERT((alignment & (alignment - 1)) == 0, 937 ("vm_phys_alloc_contig: alignment must be a power of 2")); 938 KASSERT((boundary & (boundary - 1)) == 0, 939 ("vm_phys_alloc_contig: boundary must be a power of 2")); 940 /* Compute the queue that is the best fit for npages. */ 941 for (order = 0; (1 << order) < npages; order++); 942 for (flind = 0; flind < vm_nfreelists; flind++) { 943 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { 944 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 945 fl = (*vm_phys_lookup_lists[domain][flind]) 946 [pind]; 947 TAILQ_FOREACH(m_ret, &fl[oind].pl, pageq) { 948 /* 949 * A free list may contain physical pages 950 * from one or more segments. 951 */ 952 seg = &vm_phys_segs[m_ret->segind]; 953 if (seg->start > high || 954 low >= seg->end) 955 continue; 956 957 /* 958 * Is the size of this allocation request 959 * larger than the largest block size? 960 */ 961 if (order >= VM_NFREEORDER) { 962 /* 963 * Determine if a sufficient number 964 * of subsequent blocks to satisfy 965 * the allocation request are free. 966 */ 967 pa = VM_PAGE_TO_PHYS(m_ret); 968 pa_last = pa + size; 969 for (;;) { 970 pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); 971 if (pa >= pa_last) 972 break; 973 if (pa < seg->start || 974 pa >= seg->end) 975 break; 976 m = &seg->first_page[atop(pa - seg->start)]; 977 if (m->order != VM_NFREEORDER - 1) 978 break; 979 } 980 /* If not, continue to the next block. */ 981 if (pa < pa_last) 982 continue; 983 } 984 985 /* 986 * Determine if the blocks are within the given range, 987 * satisfy the given alignment, and do not cross the 988 * given boundary. 989 */ 990 pa = VM_PAGE_TO_PHYS(m_ret); 991 if (pa >= low && 992 pa + size <= high && 993 (pa & (alignment - 1)) == 0 && 994 ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) 995 goto done; 996 } 997 } 998 } 999 } 1000 return (NULL); 1001 done: 1002 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1003 fl = (*seg->free_queues)[m->pool]; 1004 TAILQ_REMOVE(&fl[m->order].pl, m, pageq); 1005 fl[m->order].lcnt--; 1006 m->order = VM_NFREEORDER; 1007 } 1008 if (m_ret->pool != VM_FREEPOOL_DEFAULT) 1009 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 1010 fl = (*seg->free_queues)[m_ret->pool]; 1011 vm_phys_split_pages(m_ret, oind, fl, order); 1012 /* Return excess pages to the free lists. */ 1013 npages_end = roundup2(npages, 1 << imin(oind, order)); 1014 if (npages < npages_end) 1015 vm_phys_free_contig(&m_ret[npages], npages_end - npages); 1016 return (m_ret); 1017 } 1018 1019 #ifdef DDB 1020 /* 1021 * Show the number of physical pages in each of the free lists. 1022 */ 1023 DB_SHOW_COMMAND(freepages, db_show_freepages) 1024 { 1025 struct vm_freelist *fl; 1026 int flind, oind, pind; 1027 1028 for (flind = 0; flind < vm_nfreelists; flind++) { 1029 db_printf("FREE LIST %d:\n" 1030 "\n ORDER (SIZE) | NUMBER" 1031 "\n ", flind); 1032 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1033 db_printf(" | POOL %d", pind); 1034 db_printf("\n-- "); 1035 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1036 db_printf("-- -- "); 1037 db_printf("--\n"); 1038 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1039 db_printf(" %2.2d (%6.6dK)", oind, 1040 1 << (PAGE_SHIFT - 10 + oind)); 1041 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1042 fl = vm_phys_free_queues[flind][pind]; 1043 db_printf(" | %6.6d", fl[oind].lcnt); 1044 } 1045 db_printf("\n"); 1046 } 1047 db_printf("\n"); 1048 } 1049 } 1050 #endif 1051