1 /*- 2 * Copyright (c) 2002-2006 Rice University 3 * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu> 4 * All rights reserved. 5 * 6 * This software was developed for the FreeBSD Project by Alan L. Cox, 7 * Olivier Crameri, Peter Druschel, Sitaram Iyer, and Juan Navarro. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 25 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 28 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Physical memory system implementation 34 * 35 * Any external functions defined by this module are only to be used by the 36 * virtual memory system. 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_ddb.h" 43 #include "opt_vm.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/lock.h> 48 #include <sys/kernel.h> 49 #include <sys/malloc.h> 50 #include <sys/mutex.h> 51 #if MAXMEMDOM > 1 52 #include <sys/proc.h> 53 #endif 54 #include <sys/queue.h> 55 #include <sys/rwlock.h> 56 #include <sys/sbuf.h> 57 #include <sys/sysctl.h> 58 #include <sys/tree.h> 59 #include <sys/vmmeter.h> 60 61 #include <ddb/ddb.h> 62 63 #include <vm/vm.h> 64 #include <vm/vm_param.h> 65 #include <vm/vm_kern.h> 66 #include <vm/vm_object.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_phys.h> 69 70 _Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX, 71 "Too many physsegs."); 72 73 struct mem_affinity *mem_affinity; 74 75 int vm_ndomains = 1; 76 77 struct vm_phys_seg vm_phys_segs[VM_PHYSSEG_MAX]; 78 int vm_phys_nsegs; 79 80 struct vm_phys_fictitious_seg; 81 static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *, 82 struct vm_phys_fictitious_seg *); 83 84 RB_HEAD(fict_tree, vm_phys_fictitious_seg) vm_phys_fictitious_tree = 85 RB_INITIALIZER(_vm_phys_fictitious_tree); 86 87 struct vm_phys_fictitious_seg { 88 RB_ENTRY(vm_phys_fictitious_seg) node; 89 /* Memory region data */ 90 vm_paddr_t start; 91 vm_paddr_t end; 92 vm_page_t first_page; 93 }; 94 95 RB_GENERATE_STATIC(fict_tree, vm_phys_fictitious_seg, node, 96 vm_phys_fictitious_cmp); 97 98 static struct rwlock vm_phys_fictitious_reg_lock; 99 MALLOC_DEFINE(M_FICT_PAGES, "vm_fictitious", "Fictitious VM pages"); 100 101 static struct vm_freelist 102 vm_phys_free_queues[MAXMEMDOM][VM_NFREELIST][VM_NFREEPOOL][VM_NFREEORDER]; 103 104 static int vm_nfreelists = VM_FREELIST_DEFAULT + 1; 105 106 static int cnt_prezero; 107 SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, 108 &cnt_prezero, 0, "The number of physical pages prezeroed at idle time"); 109 110 static int sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS); 111 SYSCTL_OID(_vm, OID_AUTO, phys_free, CTLTYPE_STRING | CTLFLAG_RD, 112 NULL, 0, sysctl_vm_phys_free, "A", "Phys Free Info"); 113 114 static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS); 115 SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD, 116 NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info"); 117 118 SYSCTL_INT(_vm, OID_AUTO, ndomains, CTLFLAG_RD, 119 &vm_ndomains, 0, "Number of physical memory domains available."); 120 121 static vm_page_t vm_phys_alloc_domain_pages(int domain, int flind, int pool, 122 int order); 123 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, 124 int domain); 125 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind); 126 static int vm_phys_paddr_to_segind(vm_paddr_t pa); 127 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, 128 int order); 129 130 /* 131 * Red-black tree helpers for vm fictitious range management. 132 */ 133 static inline int 134 vm_phys_fictitious_in_range(struct vm_phys_fictitious_seg *p, 135 struct vm_phys_fictitious_seg *range) 136 { 137 138 KASSERT(range->start != 0 && range->end != 0, 139 ("Invalid range passed on search for vm_fictitious page")); 140 if (p->start >= range->end) 141 return (1); 142 if (p->start < range->start) 143 return (-1); 144 145 return (0); 146 } 147 148 static int 149 vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1, 150 struct vm_phys_fictitious_seg *p2) 151 { 152 153 /* Check if this is a search for a page */ 154 if (p1->end == 0) 155 return (vm_phys_fictitious_in_range(p1, p2)); 156 157 KASSERT(p2->end != 0, 158 ("Invalid range passed as second parameter to vm fictitious comparison")); 159 160 /* Searching to add a new range */ 161 if (p1->end <= p2->start) 162 return (-1); 163 if (p1->start >= p2->end) 164 return (1); 165 166 panic("Trying to add overlapping vm fictitious ranges:\n" 167 "[%#jx:%#jx] and [%#jx:%#jx]", (uintmax_t)p1->start, 168 (uintmax_t)p1->end, (uintmax_t)p2->start, (uintmax_t)p2->end); 169 } 170 171 static __inline int 172 vm_rr_selectdomain(void) 173 { 174 #if MAXMEMDOM > 1 175 struct thread *td; 176 177 td = curthread; 178 179 td->td_dom_rr_idx++; 180 td->td_dom_rr_idx %= vm_ndomains; 181 return (td->td_dom_rr_idx); 182 #else 183 return (0); 184 #endif 185 } 186 187 boolean_t 188 vm_phys_domain_intersects(long mask, vm_paddr_t low, vm_paddr_t high) 189 { 190 struct vm_phys_seg *s; 191 int idx; 192 193 while ((idx = ffsl(mask)) != 0) { 194 idx--; /* ffsl counts from 1 */ 195 mask &= ~(1UL << idx); 196 s = &vm_phys_segs[idx]; 197 if (low < s->end && high > s->start) 198 return (TRUE); 199 } 200 return (FALSE); 201 } 202 203 /* 204 * Outputs the state of the physical memory allocator, specifically, 205 * the amount of physical memory in each free list. 206 */ 207 static int 208 sysctl_vm_phys_free(SYSCTL_HANDLER_ARGS) 209 { 210 struct sbuf sbuf; 211 struct vm_freelist *fl; 212 int dom, error, flind, oind, pind; 213 214 error = sysctl_wire_old_buffer(req, 0); 215 if (error != 0) 216 return (error); 217 sbuf_new_for_sysctl(&sbuf, NULL, 128 * vm_ndomains, req); 218 for (dom = 0; dom < vm_ndomains; dom++) { 219 sbuf_printf(&sbuf,"\nDOMAIN %d:\n", dom); 220 for (flind = 0; flind < vm_nfreelists; flind++) { 221 sbuf_printf(&sbuf, "\nFREE LIST %d:\n" 222 "\n ORDER (SIZE) | NUMBER" 223 "\n ", flind); 224 for (pind = 0; pind < VM_NFREEPOOL; pind++) 225 sbuf_printf(&sbuf, " | POOL %d", pind); 226 sbuf_printf(&sbuf, "\n-- "); 227 for (pind = 0; pind < VM_NFREEPOOL; pind++) 228 sbuf_printf(&sbuf, "-- -- "); 229 sbuf_printf(&sbuf, "--\n"); 230 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 231 sbuf_printf(&sbuf, " %2d (%6dK)", oind, 232 1 << (PAGE_SHIFT - 10 + oind)); 233 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 234 fl = vm_phys_free_queues[dom][flind][pind]; 235 sbuf_printf(&sbuf, " | %6d", 236 fl[oind].lcnt); 237 } 238 sbuf_printf(&sbuf, "\n"); 239 } 240 } 241 } 242 error = sbuf_finish(&sbuf); 243 sbuf_delete(&sbuf); 244 return (error); 245 } 246 247 /* 248 * Outputs the set of physical memory segments. 249 */ 250 static int 251 sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS) 252 { 253 struct sbuf sbuf; 254 struct vm_phys_seg *seg; 255 int error, segind; 256 257 error = sysctl_wire_old_buffer(req, 0); 258 if (error != 0) 259 return (error); 260 sbuf_new_for_sysctl(&sbuf, NULL, 128, req); 261 for (segind = 0; segind < vm_phys_nsegs; segind++) { 262 sbuf_printf(&sbuf, "\nSEGMENT %d:\n\n", segind); 263 seg = &vm_phys_segs[segind]; 264 sbuf_printf(&sbuf, "start: %#jx\n", 265 (uintmax_t)seg->start); 266 sbuf_printf(&sbuf, "end: %#jx\n", 267 (uintmax_t)seg->end); 268 sbuf_printf(&sbuf, "domain: %d\n", seg->domain); 269 sbuf_printf(&sbuf, "free list: %p\n", seg->free_queues); 270 } 271 error = sbuf_finish(&sbuf); 272 sbuf_delete(&sbuf); 273 return (error); 274 } 275 276 static void 277 vm_freelist_add(struct vm_freelist *fl, vm_page_t m, int order, int tail) 278 { 279 280 m->order = order; 281 if (tail) 282 TAILQ_INSERT_TAIL(&fl[order].pl, m, plinks.q); 283 else 284 TAILQ_INSERT_HEAD(&fl[order].pl, m, plinks.q); 285 fl[order].lcnt++; 286 } 287 288 static void 289 vm_freelist_rem(struct vm_freelist *fl, vm_page_t m, int order) 290 { 291 292 TAILQ_REMOVE(&fl[order].pl, m, plinks.q); 293 fl[order].lcnt--; 294 m->order = VM_NFREEORDER; 295 } 296 297 /* 298 * Create a physical memory segment. 299 */ 300 static void 301 _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind, int domain) 302 { 303 struct vm_phys_seg *seg; 304 305 KASSERT(vm_phys_nsegs < VM_PHYSSEG_MAX, 306 ("vm_phys_create_seg: increase VM_PHYSSEG_MAX")); 307 KASSERT(domain < vm_ndomains, 308 ("vm_phys_create_seg: invalid domain provided")); 309 seg = &vm_phys_segs[vm_phys_nsegs++]; 310 while (seg > vm_phys_segs && (seg - 1)->start >= end) { 311 *seg = *(seg - 1); 312 seg--; 313 } 314 seg->start = start; 315 seg->end = end; 316 seg->domain = domain; 317 seg->free_queues = &vm_phys_free_queues[domain][flind]; 318 } 319 320 static void 321 vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind) 322 { 323 int i; 324 325 if (mem_affinity == NULL) { 326 _vm_phys_create_seg(start, end, flind, 0); 327 return; 328 } 329 330 for (i = 0;; i++) { 331 if (mem_affinity[i].end == 0) 332 panic("Reached end of affinity info"); 333 if (mem_affinity[i].end <= start) 334 continue; 335 if (mem_affinity[i].start > start) 336 panic("No affinity info for start %jx", 337 (uintmax_t)start); 338 if (mem_affinity[i].end >= end) { 339 _vm_phys_create_seg(start, end, flind, 340 mem_affinity[i].domain); 341 break; 342 } 343 _vm_phys_create_seg(start, mem_affinity[i].end, flind, 344 mem_affinity[i].domain); 345 start = mem_affinity[i].end; 346 } 347 } 348 349 /* 350 * Add a physical memory segment. 351 */ 352 void 353 vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end) 354 { 355 356 KASSERT((start & PAGE_MASK) == 0, 357 ("vm_phys_define_seg: start is not page aligned")); 358 KASSERT((end & PAGE_MASK) == 0, 359 ("vm_phys_define_seg: end is not page aligned")); 360 #ifdef VM_FREELIST_ISADMA 361 if (start < 16777216) { 362 if (end > 16777216) { 363 vm_phys_create_seg(start, 16777216, 364 VM_FREELIST_ISADMA); 365 vm_phys_create_seg(16777216, end, VM_FREELIST_DEFAULT); 366 } else 367 vm_phys_create_seg(start, end, VM_FREELIST_ISADMA); 368 if (VM_FREELIST_ISADMA >= vm_nfreelists) 369 vm_nfreelists = VM_FREELIST_ISADMA + 1; 370 } else 371 #endif 372 #ifdef VM_FREELIST_HIGHMEM 373 if (end > VM_HIGHMEM_ADDRESS) { 374 if (start < VM_HIGHMEM_ADDRESS) { 375 vm_phys_create_seg(start, VM_HIGHMEM_ADDRESS, 376 VM_FREELIST_DEFAULT); 377 vm_phys_create_seg(VM_HIGHMEM_ADDRESS, end, 378 VM_FREELIST_HIGHMEM); 379 } else 380 vm_phys_create_seg(start, end, VM_FREELIST_HIGHMEM); 381 if (VM_FREELIST_HIGHMEM >= vm_nfreelists) 382 vm_nfreelists = VM_FREELIST_HIGHMEM + 1; 383 } else 384 #endif 385 vm_phys_create_seg(start, end, VM_FREELIST_DEFAULT); 386 } 387 388 /* 389 * Initialize the physical memory allocator. 390 */ 391 void 392 vm_phys_init(void) 393 { 394 struct vm_freelist *fl; 395 struct vm_phys_seg *seg; 396 #ifdef VM_PHYSSEG_SPARSE 397 long pages; 398 #endif 399 int dom, flind, oind, pind, segind; 400 401 #ifdef VM_PHYSSEG_SPARSE 402 pages = 0; 403 #endif 404 for (segind = 0; segind < vm_phys_nsegs; segind++) { 405 seg = &vm_phys_segs[segind]; 406 #ifdef VM_PHYSSEG_SPARSE 407 seg->first_page = &vm_page_array[pages]; 408 pages += atop(seg->end - seg->start); 409 #else 410 seg->first_page = PHYS_TO_VM_PAGE(seg->start); 411 #endif 412 } 413 for (dom = 0; dom < vm_ndomains; dom++) { 414 for (flind = 0; flind < vm_nfreelists; flind++) { 415 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 416 fl = vm_phys_free_queues[dom][flind][pind]; 417 for (oind = 0; oind < VM_NFREEORDER; oind++) 418 TAILQ_INIT(&fl[oind].pl); 419 } 420 } 421 } 422 rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); 423 } 424 425 /* 426 * Split a contiguous, power of two-sized set of physical pages. 427 */ 428 static __inline void 429 vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order) 430 { 431 vm_page_t m_buddy; 432 433 while (oind > order) { 434 oind--; 435 m_buddy = &m[1 << oind]; 436 KASSERT(m_buddy->order == VM_NFREEORDER, 437 ("vm_phys_split_pages: page %p has unexpected order %d", 438 m_buddy, m_buddy->order)); 439 vm_freelist_add(fl, m_buddy, oind, 0); 440 } 441 } 442 443 /* 444 * Initialize a physical page and add it to the free lists. 445 */ 446 void 447 vm_phys_add_page(vm_paddr_t pa) 448 { 449 vm_page_t m; 450 struct vm_domain *vmd; 451 452 vm_cnt.v_page_count++; 453 m = vm_phys_paddr_to_vm_page(pa); 454 m->phys_addr = pa; 455 m->queue = PQ_NONE; 456 m->segind = vm_phys_paddr_to_segind(pa); 457 vmd = vm_phys_domain(m); 458 vmd->vmd_page_count++; 459 vmd->vmd_segs |= 1UL << m->segind; 460 KASSERT(m->order == VM_NFREEORDER, 461 ("vm_phys_add_page: page %p has unexpected order %d", 462 m, m->order)); 463 m->pool = VM_FREEPOOL_DEFAULT; 464 pmap_page_init(m); 465 mtx_lock(&vm_page_queue_free_mtx); 466 vm_phys_freecnt_adj(m, 1); 467 vm_phys_free_pages(m, 0); 468 mtx_unlock(&vm_page_queue_free_mtx); 469 } 470 471 /* 472 * Allocate a contiguous, power of two-sized set of physical pages 473 * from the free lists. 474 * 475 * The free page queues must be locked. 476 */ 477 vm_page_t 478 vm_phys_alloc_pages(int pool, int order) 479 { 480 vm_page_t m; 481 int dom, domain, flind; 482 483 KASSERT(pool < VM_NFREEPOOL, 484 ("vm_phys_alloc_pages: pool %d is out of range", pool)); 485 KASSERT(order < VM_NFREEORDER, 486 ("vm_phys_alloc_pages: order %d is out of range", order)); 487 488 for (dom = 0; dom < vm_ndomains; dom++) { 489 domain = vm_rr_selectdomain(); 490 for (flind = 0; flind < vm_nfreelists; flind++) { 491 m = vm_phys_alloc_domain_pages(domain, flind, pool, 492 order); 493 if (m != NULL) 494 return (m); 495 } 496 } 497 return (NULL); 498 } 499 500 /* 501 * Find and dequeue a free page on the given free list, with the 502 * specified pool and order 503 */ 504 vm_page_t 505 vm_phys_alloc_freelist_pages(int flind, int pool, int order) 506 { 507 vm_page_t m; 508 int dom, domain; 509 510 KASSERT(flind < VM_NFREELIST, 511 ("vm_phys_alloc_freelist_pages: freelist %d is out of range", flind)); 512 KASSERT(pool < VM_NFREEPOOL, 513 ("vm_phys_alloc_freelist_pages: pool %d is out of range", pool)); 514 KASSERT(order < VM_NFREEORDER, 515 ("vm_phys_alloc_freelist_pages: order %d is out of range", order)); 516 517 for (dom = 0; dom < vm_ndomains; dom++) { 518 domain = vm_rr_selectdomain(); 519 m = vm_phys_alloc_domain_pages(domain, flind, pool, order); 520 if (m != NULL) 521 return (m); 522 } 523 return (NULL); 524 } 525 526 static vm_page_t 527 vm_phys_alloc_domain_pages(int domain, int flind, int pool, int order) 528 { 529 struct vm_freelist *fl; 530 struct vm_freelist *alt; 531 int oind, pind; 532 vm_page_t m; 533 534 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 535 fl = &vm_phys_free_queues[domain][flind][pool][0]; 536 for (oind = order; oind < VM_NFREEORDER; oind++) { 537 m = TAILQ_FIRST(&fl[oind].pl); 538 if (m != NULL) { 539 vm_freelist_rem(fl, m, oind); 540 vm_phys_split_pages(m, oind, fl, order); 541 return (m); 542 } 543 } 544 545 /* 546 * The given pool was empty. Find the largest 547 * contiguous, power-of-two-sized set of pages in any 548 * pool. Transfer these pages to the given pool, and 549 * use them to satisfy the allocation. 550 */ 551 for (oind = VM_NFREEORDER - 1; oind >= order; oind--) { 552 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 553 alt = &vm_phys_free_queues[domain][flind][pind][0]; 554 m = TAILQ_FIRST(&alt[oind].pl); 555 if (m != NULL) { 556 vm_freelist_rem(alt, m, oind); 557 vm_phys_set_pool(pool, m, oind); 558 vm_phys_split_pages(m, oind, fl, order); 559 return (m); 560 } 561 } 562 } 563 return (NULL); 564 } 565 566 /* 567 * Find the vm_page corresponding to the given physical address. 568 */ 569 vm_page_t 570 vm_phys_paddr_to_vm_page(vm_paddr_t pa) 571 { 572 struct vm_phys_seg *seg; 573 int segind; 574 575 for (segind = 0; segind < vm_phys_nsegs; segind++) { 576 seg = &vm_phys_segs[segind]; 577 if (pa >= seg->start && pa < seg->end) 578 return (&seg->first_page[atop(pa - seg->start)]); 579 } 580 return (NULL); 581 } 582 583 vm_page_t 584 vm_phys_fictitious_to_vm_page(vm_paddr_t pa) 585 { 586 struct vm_phys_fictitious_seg tmp, *seg; 587 vm_page_t m; 588 589 m = NULL; 590 tmp.start = pa; 591 tmp.end = 0; 592 593 rw_rlock(&vm_phys_fictitious_reg_lock); 594 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 595 rw_runlock(&vm_phys_fictitious_reg_lock); 596 if (seg == NULL) 597 return (NULL); 598 599 m = &seg->first_page[atop(pa - seg->start)]; 600 KASSERT((m->flags & PG_FICTITIOUS) != 0, ("%p not fictitious", m)); 601 602 return (m); 603 } 604 605 static inline void 606 vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start, 607 long page_count, vm_memattr_t memattr) 608 { 609 long i; 610 611 for (i = 0; i < page_count; i++) { 612 vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr); 613 range[i].oflags &= ~VPO_UNMANAGED; 614 range[i].busy_lock = VPB_UNBUSIED; 615 } 616 } 617 618 int 619 vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end, 620 vm_memattr_t memattr) 621 { 622 struct vm_phys_fictitious_seg *seg; 623 vm_page_t fp; 624 long page_count; 625 #ifdef VM_PHYSSEG_DENSE 626 long pi, pe; 627 long dpage_count; 628 #endif 629 630 KASSERT(start < end, 631 ("Start of segment isn't less than end (start: %jx end: %jx)", 632 (uintmax_t)start, (uintmax_t)end)); 633 634 page_count = (end - start) / PAGE_SIZE; 635 636 #ifdef VM_PHYSSEG_DENSE 637 pi = atop(start); 638 pe = atop(end); 639 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 640 fp = &vm_page_array[pi - first_page]; 641 if ((pe - first_page) > vm_page_array_size) { 642 /* 643 * We have a segment that starts inside 644 * of vm_page_array, but ends outside of it. 645 * 646 * Use vm_page_array pages for those that are 647 * inside of the vm_page_array range, and 648 * allocate the remaining ones. 649 */ 650 dpage_count = vm_page_array_size - (pi - first_page); 651 vm_phys_fictitious_init_range(fp, start, dpage_count, 652 memattr); 653 page_count -= dpage_count; 654 start += ptoa(dpage_count); 655 goto alloc; 656 } 657 /* 658 * We can allocate the full range from vm_page_array, 659 * so there's no need to register the range in the tree. 660 */ 661 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 662 return (0); 663 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 664 /* 665 * We have a segment that ends inside of vm_page_array, 666 * but starts outside of it. 667 */ 668 fp = &vm_page_array[0]; 669 dpage_count = pe - first_page; 670 vm_phys_fictitious_init_range(fp, ptoa(first_page), dpage_count, 671 memattr); 672 end -= ptoa(dpage_count); 673 page_count -= dpage_count; 674 goto alloc; 675 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 676 /* 677 * Trying to register a fictitious range that expands before 678 * and after vm_page_array. 679 */ 680 return (EINVAL); 681 } else { 682 alloc: 683 #endif 684 fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES, 685 M_WAITOK | M_ZERO); 686 #ifdef VM_PHYSSEG_DENSE 687 } 688 #endif 689 vm_phys_fictitious_init_range(fp, start, page_count, memattr); 690 691 seg = malloc(sizeof(*seg), M_FICT_PAGES, M_WAITOK | M_ZERO); 692 seg->start = start; 693 seg->end = end; 694 seg->first_page = fp; 695 696 rw_wlock(&vm_phys_fictitious_reg_lock); 697 RB_INSERT(fict_tree, &vm_phys_fictitious_tree, seg); 698 rw_wunlock(&vm_phys_fictitious_reg_lock); 699 700 return (0); 701 } 702 703 void 704 vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end) 705 { 706 struct vm_phys_fictitious_seg *seg, tmp; 707 #ifdef VM_PHYSSEG_DENSE 708 long pi, pe; 709 #endif 710 711 KASSERT(start < end, 712 ("Start of segment isn't less than end (start: %jx end: %jx)", 713 (uintmax_t)start, (uintmax_t)end)); 714 715 #ifdef VM_PHYSSEG_DENSE 716 pi = atop(start); 717 pe = atop(end); 718 if (pi >= first_page && (pi - first_page) < vm_page_array_size) { 719 if ((pe - first_page) <= vm_page_array_size) { 720 /* 721 * This segment was allocated using vm_page_array 722 * only, there's nothing to do since those pages 723 * were never added to the tree. 724 */ 725 return; 726 } 727 /* 728 * We have a segment that starts inside 729 * of vm_page_array, but ends outside of it. 730 * 731 * Calculate how many pages were added to the 732 * tree and free them. 733 */ 734 start = ptoa(first_page + vm_page_array_size); 735 } else if (pe > first_page && (pe - first_page) < vm_page_array_size) { 736 /* 737 * We have a segment that ends inside of vm_page_array, 738 * but starts outside of it. 739 */ 740 end = ptoa(first_page); 741 } else if (pi < first_page && pe > (first_page + vm_page_array_size)) { 742 /* Since it's not possible to register such a range, panic. */ 743 panic( 744 "Unregistering not registered fictitious range [%#jx:%#jx]", 745 (uintmax_t)start, (uintmax_t)end); 746 } 747 #endif 748 tmp.start = start; 749 tmp.end = 0; 750 751 rw_wlock(&vm_phys_fictitious_reg_lock); 752 seg = RB_FIND(fict_tree, &vm_phys_fictitious_tree, &tmp); 753 if (seg->start != start || seg->end != end) { 754 rw_wunlock(&vm_phys_fictitious_reg_lock); 755 panic( 756 "Unregistering not registered fictitious range [%#jx:%#jx]", 757 (uintmax_t)start, (uintmax_t)end); 758 } 759 RB_REMOVE(fict_tree, &vm_phys_fictitious_tree, seg); 760 rw_wunlock(&vm_phys_fictitious_reg_lock); 761 free(seg->first_page, M_FICT_PAGES); 762 free(seg, M_FICT_PAGES); 763 } 764 765 /* 766 * Find the segment containing the given physical address. 767 */ 768 static int 769 vm_phys_paddr_to_segind(vm_paddr_t pa) 770 { 771 struct vm_phys_seg *seg; 772 int segind; 773 774 for (segind = 0; segind < vm_phys_nsegs; segind++) { 775 seg = &vm_phys_segs[segind]; 776 if (pa >= seg->start && pa < seg->end) 777 return (segind); 778 } 779 panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" , 780 (uintmax_t)pa); 781 } 782 783 /* 784 * Free a contiguous, power of two-sized set of physical pages. 785 * 786 * The free page queues must be locked. 787 */ 788 void 789 vm_phys_free_pages(vm_page_t m, int order) 790 { 791 struct vm_freelist *fl; 792 struct vm_phys_seg *seg; 793 vm_paddr_t pa; 794 vm_page_t m_buddy; 795 796 KASSERT(m->order == VM_NFREEORDER, 797 ("vm_phys_free_pages: page %p has unexpected order %d", 798 m, m->order)); 799 KASSERT(m->pool < VM_NFREEPOOL, 800 ("vm_phys_free_pages: page %p has unexpected pool %d", 801 m, m->pool)); 802 KASSERT(order < VM_NFREEORDER, 803 ("vm_phys_free_pages: order %d is out of range", order)); 804 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 805 seg = &vm_phys_segs[m->segind]; 806 if (order < VM_NFREEORDER - 1) { 807 pa = VM_PAGE_TO_PHYS(m); 808 do { 809 pa ^= ((vm_paddr_t)1 << (PAGE_SHIFT + order)); 810 if (pa < seg->start || pa >= seg->end) 811 break; 812 m_buddy = &seg->first_page[atop(pa - seg->start)]; 813 if (m_buddy->order != order) 814 break; 815 fl = (*seg->free_queues)[m_buddy->pool]; 816 vm_freelist_rem(fl, m_buddy, order); 817 if (m_buddy->pool != m->pool) 818 vm_phys_set_pool(m->pool, m_buddy, order); 819 order++; 820 pa &= ~(((vm_paddr_t)1 << (PAGE_SHIFT + order)) - 1); 821 m = &seg->first_page[atop(pa - seg->start)]; 822 } while (order < VM_NFREEORDER - 1); 823 } 824 fl = (*seg->free_queues)[m->pool]; 825 vm_freelist_add(fl, m, order, 1); 826 } 827 828 /* 829 * Free a contiguous, arbitrarily sized set of physical pages. 830 * 831 * The free page queues must be locked. 832 */ 833 void 834 vm_phys_free_contig(vm_page_t m, u_long npages) 835 { 836 u_int n; 837 int order; 838 839 /* 840 * Avoid unnecessary coalescing by freeing the pages in the largest 841 * possible power-of-two-sized subsets. 842 */ 843 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 844 for (;; npages -= n) { 845 /* 846 * Unsigned "min" is used here so that "order" is assigned 847 * "VM_NFREEORDER - 1" when "m"'s physical address is zero 848 * or the low-order bits of its physical address are zero 849 * because the size of a physical address exceeds the size of 850 * a long. 851 */ 852 order = min(ffsl(VM_PAGE_TO_PHYS(m) >> PAGE_SHIFT) - 1, 853 VM_NFREEORDER - 1); 854 n = 1 << order; 855 if (npages < n) 856 break; 857 vm_phys_free_pages(m, order); 858 m += n; 859 } 860 /* The residual "npages" is less than "1 << (VM_NFREEORDER - 1)". */ 861 for (; npages > 0; npages -= n) { 862 order = flsl(npages) - 1; 863 n = 1 << order; 864 vm_phys_free_pages(m, order); 865 m += n; 866 } 867 } 868 869 /* 870 * Set the pool for a contiguous, power of two-sized set of physical pages. 871 */ 872 void 873 vm_phys_set_pool(int pool, vm_page_t m, int order) 874 { 875 vm_page_t m_tmp; 876 877 for (m_tmp = m; m_tmp < &m[1 << order]; m_tmp++) 878 m_tmp->pool = pool; 879 } 880 881 /* 882 * Search for the given physical page "m" in the free lists. If the search 883 * succeeds, remove "m" from the free lists and return TRUE. Otherwise, return 884 * FALSE, indicating that "m" is not in the free lists. 885 * 886 * The free page queues must be locked. 887 */ 888 boolean_t 889 vm_phys_unfree_page(vm_page_t m) 890 { 891 struct vm_freelist *fl; 892 struct vm_phys_seg *seg; 893 vm_paddr_t pa, pa_half; 894 vm_page_t m_set, m_tmp; 895 int order; 896 897 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 898 899 /* 900 * First, find the contiguous, power of two-sized set of free 901 * physical pages containing the given physical page "m" and 902 * assign it to "m_set". 903 */ 904 seg = &vm_phys_segs[m->segind]; 905 for (m_set = m, order = 0; m_set->order == VM_NFREEORDER && 906 order < VM_NFREEORDER - 1; ) { 907 order++; 908 pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order)); 909 if (pa >= seg->start) 910 m_set = &seg->first_page[atop(pa - seg->start)]; 911 else 912 return (FALSE); 913 } 914 if (m_set->order < order) 915 return (FALSE); 916 if (m_set->order == VM_NFREEORDER) 917 return (FALSE); 918 KASSERT(m_set->order < VM_NFREEORDER, 919 ("vm_phys_unfree_page: page %p has unexpected order %d", 920 m_set, m_set->order)); 921 922 /* 923 * Next, remove "m_set" from the free lists. Finally, extract 924 * "m" from "m_set" using an iterative algorithm: While "m_set" 925 * is larger than a page, shrink "m_set" by returning the half 926 * of "m_set" that does not contain "m" to the free lists. 927 */ 928 fl = (*seg->free_queues)[m_set->pool]; 929 order = m_set->order; 930 vm_freelist_rem(fl, m_set, order); 931 while (order > 0) { 932 order--; 933 pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order)); 934 if (m->phys_addr < pa_half) 935 m_tmp = &seg->first_page[atop(pa_half - seg->start)]; 936 else { 937 m_tmp = m_set; 938 m_set = &seg->first_page[atop(pa_half - seg->start)]; 939 } 940 vm_freelist_add(fl, m_tmp, order, 0); 941 } 942 KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency")); 943 return (TRUE); 944 } 945 946 /* 947 * Try to zero one physical page. Used by an idle priority thread. 948 */ 949 boolean_t 950 vm_phys_zero_pages_idle(void) 951 { 952 static struct vm_freelist *fl; 953 static int flind, oind, pind; 954 vm_page_t m, m_tmp; 955 int domain; 956 957 domain = vm_rr_selectdomain(); 958 fl = vm_phys_free_queues[domain][0][0]; 959 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 960 for (;;) { 961 TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, plinks.q) { 962 for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) { 963 if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) { 964 vm_phys_unfree_page(m_tmp); 965 vm_phys_freecnt_adj(m, -1); 966 mtx_unlock(&vm_page_queue_free_mtx); 967 pmap_zero_page_idle(m_tmp); 968 m_tmp->flags |= PG_ZERO; 969 mtx_lock(&vm_page_queue_free_mtx); 970 vm_phys_freecnt_adj(m, 1); 971 vm_phys_free_pages(m_tmp, 0); 972 vm_page_zero_count++; 973 cnt_prezero++; 974 return (TRUE); 975 } 976 } 977 } 978 oind++; 979 if (oind == VM_NFREEORDER) { 980 oind = 0; 981 pind++; 982 if (pind == VM_NFREEPOOL) { 983 pind = 0; 984 flind++; 985 if (flind == vm_nfreelists) 986 flind = 0; 987 } 988 fl = vm_phys_free_queues[domain][flind][pind]; 989 } 990 } 991 } 992 993 /* 994 * Allocate a contiguous set of physical pages of the given size 995 * "npages" from the free lists. All of the physical pages must be at 996 * or above the given physical address "low" and below the given 997 * physical address "high". The given value "alignment" determines the 998 * alignment of the first physical page in the set. If the given value 999 * "boundary" is non-zero, then the set of physical pages cannot cross 1000 * any physical address boundary that is a multiple of that value. Both 1001 * "alignment" and "boundary" must be a power of two. 1002 */ 1003 vm_page_t 1004 vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high, 1005 u_long alignment, vm_paddr_t boundary) 1006 { 1007 struct vm_freelist *fl; 1008 struct vm_phys_seg *seg; 1009 vm_paddr_t pa, pa_last, size; 1010 vm_page_t m, m_ret; 1011 u_long npages_end; 1012 int dom, domain, flind, oind, order, pind; 1013 1014 mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); 1015 size = npages << PAGE_SHIFT; 1016 KASSERT(size != 0, 1017 ("vm_phys_alloc_contig: size must not be 0")); 1018 KASSERT((alignment & (alignment - 1)) == 0, 1019 ("vm_phys_alloc_contig: alignment must be a power of 2")); 1020 KASSERT((boundary & (boundary - 1)) == 0, 1021 ("vm_phys_alloc_contig: boundary must be a power of 2")); 1022 /* Compute the queue that is the best fit for npages. */ 1023 for (order = 0; (1 << order) < npages; order++); 1024 dom = 0; 1025 restartdom: 1026 domain = vm_rr_selectdomain(); 1027 for (flind = 0; flind < vm_nfreelists; flind++) { 1028 for (oind = min(order, VM_NFREEORDER - 1); oind < VM_NFREEORDER; oind++) { 1029 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1030 fl = &vm_phys_free_queues[domain][flind][pind][0]; 1031 TAILQ_FOREACH(m_ret, &fl[oind].pl, plinks.q) { 1032 /* 1033 * A free list may contain physical pages 1034 * from one or more segments. 1035 */ 1036 seg = &vm_phys_segs[m_ret->segind]; 1037 if (seg->start > high || 1038 low >= seg->end) 1039 continue; 1040 1041 /* 1042 * Is the size of this allocation request 1043 * larger than the largest block size? 1044 */ 1045 if (order >= VM_NFREEORDER) { 1046 /* 1047 * Determine if a sufficient number 1048 * of subsequent blocks to satisfy 1049 * the allocation request are free. 1050 */ 1051 pa = VM_PAGE_TO_PHYS(m_ret); 1052 pa_last = pa + size; 1053 for (;;) { 1054 pa += 1 << (PAGE_SHIFT + VM_NFREEORDER - 1); 1055 if (pa >= pa_last) 1056 break; 1057 if (pa < seg->start || 1058 pa >= seg->end) 1059 break; 1060 m = &seg->first_page[atop(pa - seg->start)]; 1061 if (m->order != VM_NFREEORDER - 1) 1062 break; 1063 } 1064 /* If not, continue to the next block. */ 1065 if (pa < pa_last) 1066 continue; 1067 } 1068 1069 /* 1070 * Determine if the blocks are within the given range, 1071 * satisfy the given alignment, and do not cross the 1072 * given boundary. 1073 */ 1074 pa = VM_PAGE_TO_PHYS(m_ret); 1075 if (pa >= low && 1076 pa + size <= high && 1077 (pa & (alignment - 1)) == 0 && 1078 ((pa ^ (pa + size - 1)) & ~(boundary - 1)) == 0) 1079 goto done; 1080 } 1081 } 1082 } 1083 } 1084 if (++dom < vm_ndomains) 1085 goto restartdom; 1086 return (NULL); 1087 done: 1088 for (m = m_ret; m < &m_ret[npages]; m = &m[1 << oind]) { 1089 fl = (*seg->free_queues)[m->pool]; 1090 vm_freelist_rem(fl, m, m->order); 1091 } 1092 if (m_ret->pool != VM_FREEPOOL_DEFAULT) 1093 vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind); 1094 fl = (*seg->free_queues)[m_ret->pool]; 1095 vm_phys_split_pages(m_ret, oind, fl, order); 1096 /* Return excess pages to the free lists. */ 1097 npages_end = roundup2(npages, 1 << imin(oind, order)); 1098 if (npages < npages_end) 1099 vm_phys_free_contig(&m_ret[npages], npages_end - npages); 1100 return (m_ret); 1101 } 1102 1103 #ifdef DDB 1104 /* 1105 * Show the number of physical pages in each of the free lists. 1106 */ 1107 DB_SHOW_COMMAND(freepages, db_show_freepages) 1108 { 1109 struct vm_freelist *fl; 1110 int flind, oind, pind, dom; 1111 1112 for (dom = 0; dom < vm_ndomains; dom++) { 1113 db_printf("DOMAIN: %d\n", dom); 1114 for (flind = 0; flind < vm_nfreelists; flind++) { 1115 db_printf("FREE LIST %d:\n" 1116 "\n ORDER (SIZE) | NUMBER" 1117 "\n ", flind); 1118 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1119 db_printf(" | POOL %d", pind); 1120 db_printf("\n-- "); 1121 for (pind = 0; pind < VM_NFREEPOOL; pind++) 1122 db_printf("-- -- "); 1123 db_printf("--\n"); 1124 for (oind = VM_NFREEORDER - 1; oind >= 0; oind--) { 1125 db_printf(" %2.2d (%6.6dK)", oind, 1126 1 << (PAGE_SHIFT - 10 + oind)); 1127 for (pind = 0; pind < VM_NFREEPOOL; pind++) { 1128 fl = vm_phys_free_queues[dom][flind][pind]; 1129 db_printf(" | %6.6d", fl[oind].lcnt); 1130 } 1131 db_printf("\n"); 1132 } 1133 db_printf("\n"); 1134 } 1135 db_printf("\n"); 1136 } 1137 } 1138 #endif 1139