1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2014 Ian Lepore <ian@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #ifdef _KERNEL 31 #include "opt_acpi.h" 32 #include "opt_ddb.h" 33 #endif 34 35 /* 36 * Routines for describing and initializing anything related to physical memory. 37 */ 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/bus.h> 42 #include <sys/kernel.h> 43 #include <sys/module.h> 44 #include <sys/physmem.h> 45 46 #ifdef _KERNEL 47 #include <vm/vm.h> 48 #include <vm/vm_param.h> 49 #include <vm/vm_page.h> 50 #include <vm/vm_phys.h> 51 #include <vm/vm_dumpset.h> 52 53 #include <machine/md_var.h> 54 #include <machine/resource.h> 55 #else 56 #include <stdarg.h> 57 #include <stdio.h> 58 #include <string.h> 59 #endif 60 61 /* 62 * These structures are used internally to keep track of regions of physical 63 * ram, and regions within the physical ram that need to be excluded. An 64 * exclusion region can be excluded from crash dumps, from the vm pool of pages 65 * that can be allocated, or both, depending on the exclusion flags associated 66 * with the region. 67 */ 68 #ifdef DEV_ACPI 69 #define MAX_HWCNT 32 /* ACPI needs more regions */ 70 #define MAX_EXCNT 32 71 #else 72 #define MAX_HWCNT 16 73 #define MAX_EXCNT 16 74 #endif 75 76 #if defined(__arm__) 77 #define MAX_PHYS_ADDR 0xFFFFFFFFull 78 #elif defined(__aarch64__) || defined(__amd64__) || defined(__riscv) 79 #define MAX_PHYS_ADDR 0xFFFFFFFFFFFFFFFFull 80 #endif 81 82 struct region { 83 vm_paddr_t addr; 84 vm_size_t size; 85 uint32_t flags; 86 }; 87 88 static struct region hwregions[MAX_HWCNT]; 89 static struct region exregions[MAX_EXCNT]; 90 91 static size_t hwcnt; 92 static size_t excnt; 93 94 /* 95 * realmem is the total number of hardware pages, excluded or not. 96 * Maxmem is one greater than the last physical page number. 97 */ 98 long realmem; 99 long Maxmem; 100 101 #ifndef _KERNEL 102 static void 103 panic(const char *fmt, ...) 104 { 105 va_list va; 106 107 va_start(va, fmt); 108 vfprintf(stderr, fmt, va); 109 fprintf(stderr, "\n"); 110 va_end(va); 111 __builtin_trap(); 112 } 113 #endif 114 115 /* 116 * Print the contents of the physical and excluded region tables using the 117 * provided printf-like output function (which will be either printf or 118 * db_printf). 119 */ 120 static void 121 physmem_dump_tables(int (*prfunc)(const char *, ...) __printflike(1, 2)) 122 { 123 size_t i; 124 int flags; 125 uintmax_t addr, size; 126 const unsigned int mbyte = 1024 * 1024; 127 128 prfunc("Physical memory chunk(s):\n"); 129 for (i = 0; i < hwcnt; ++i) { 130 addr = hwregions[i].addr; 131 size = hwregions[i].size; 132 prfunc(" 0x%08jx - 0x%08jx, %5ju MB (%7ju pages)\n", addr, 133 addr + size - 1, size / mbyte, size / PAGE_SIZE); 134 } 135 136 prfunc("Excluded memory regions:\n"); 137 for (i = 0; i < excnt; ++i) { 138 addr = exregions[i].addr; 139 size = exregions[i].size; 140 flags = exregions[i].flags; 141 prfunc(" 0x%08jx - 0x%08jx, %5ju MB (%7ju pages) %s %s\n", 142 addr, addr + size - 1, size / mbyte, size / PAGE_SIZE, 143 (flags & EXFLAG_NOALLOC) ? "NoAlloc" : "", 144 (flags & EXFLAG_NODUMP) ? "NoDump" : ""); 145 } 146 147 #ifdef DEBUG 148 prfunc("Avail lists:\n"); 149 for (i = 0; phys_avail[i] != 0; ++i) { 150 prfunc(" phys_avail[%zu] 0x%08jx\n", i, 151 (uintmax_t)phys_avail[i]); 152 } 153 for (i = 0; dump_avail[i] != 0; ++i) { 154 prfunc(" dump_avail[%zu] 0x%08jx\n", i, 155 (uintmax_t)dump_avail[i]); 156 } 157 #endif 158 } 159 160 /* 161 * Print the contents of the static mapping table. Used for bootverbose. 162 */ 163 void 164 physmem_print_tables(void) 165 { 166 167 physmem_dump_tables(printf); 168 } 169 170 /* 171 * Walk the list of hardware regions, processing it against the list of 172 * exclusions that contain the given exflags, and generating an "avail list". 173 * 174 * If maxphyssz is not zero it sets upper limit, in bytes, for the total 175 * "avail list" size. Walk stops once the limit is reached and the last region 176 * is cut short if necessary. 177 * 178 * Updates the value at *pavail with the sum of all pages in all hw regions. 179 * 180 * Returns the number of entries in the avail list, which is twice the number 181 * of returned regions. 182 */ 183 static size_t 184 regions_to_avail(vm_paddr_t *avail, uint32_t exflags, size_t maxavail, 185 uint64_t maxphyssz, long *pavail, long *prealmem) 186 { 187 size_t acnt, exi, hwi; 188 uint64_t adj, end, start, xend, xstart; 189 long availmem, totalmem; 190 const struct region *exp, *hwp; 191 uint64_t availsz; 192 193 bzero(avail, maxavail * sizeof(vm_paddr_t)); 194 195 totalmem = 0; 196 availmem = 0; 197 availsz = 0; 198 acnt = 0; 199 for (hwi = 0, hwp = hwregions; hwi < hwcnt; ++hwi, ++hwp) { 200 adj = round_page(hwp->addr) - hwp->addr; 201 start = round_page(hwp->addr); 202 end = trunc_page(hwp->size + adj) + start; 203 totalmem += atop((vm_offset_t)(end - start)); 204 for (exi = 0, exp = exregions; exi < excnt; ++exi, ++exp) { 205 /* 206 * If the excluded region does not match given flags, 207 * continue checking with the next excluded region. 208 */ 209 if ((exp->flags & exflags) == 0) 210 continue; 211 xstart = exp->addr; 212 xend = exp->size + xstart; 213 /* 214 * If the excluded region ends before this hw region, 215 * continue checking with the next excluded region. 216 */ 217 if (xend <= start) 218 continue; 219 /* 220 * If the excluded region begins after this hw region 221 * we're done because both lists are sorted. 222 */ 223 if (xstart >= end) 224 break; 225 /* 226 * If the excluded region completely covers this hw 227 * region, shrink this hw region to zero size. 228 */ 229 if ((start >= xstart) && (end <= xend)) { 230 start = xend; 231 end = xend; 232 break; 233 } 234 /* 235 * If the excluded region falls wholly within this hw 236 * region without abutting or overlapping the beginning 237 * or end, create an available entry from the leading 238 * fragment, then adjust the start of this hw region to 239 * the end of the excluded region, and continue checking 240 * the next excluded region because another exclusion 241 * could affect the remainder of this hw region. 242 */ 243 if ((xstart > start) && (xend < end)) { 244 245 if ((maxphyssz != 0) && 246 (availsz + xstart - start > maxphyssz)) { 247 xstart = maxphyssz + start - availsz; 248 } 249 if (xstart <= start) 250 continue; 251 if (acnt > 0 && 252 avail[acnt - 1] == (vm_paddr_t)start) { 253 avail[acnt - 1] = (vm_paddr_t)xstart; 254 } else { 255 avail[acnt++] = (vm_paddr_t)start; 256 avail[acnt++] = (vm_paddr_t)xstart; 257 } 258 availsz += (xstart - start); 259 availmem += atop((vm_offset_t)(xstart - start)); 260 start = xend; 261 continue; 262 } 263 /* 264 * We know the excluded region overlaps either the start 265 * or end of this hardware region (but not both), trim 266 * the excluded portion off the appropriate end. 267 */ 268 if (xstart <= start) 269 start = xend; 270 else 271 end = xstart; 272 } 273 /* 274 * If the trimming actions above left a non-zero size, create an 275 * available entry for it. 276 */ 277 if (end > start) { 278 if ((maxphyssz != 0) && 279 (availsz + end - start > maxphyssz)) { 280 end = maxphyssz + start - availsz; 281 } 282 if (end <= start) 283 break; 284 285 if (acnt > 0 && avail[acnt - 1] == (vm_paddr_t)start) { 286 avail[acnt - 1] = (vm_paddr_t)end; 287 } else { 288 avail[acnt++] = (vm_paddr_t)start; 289 avail[acnt++] = (vm_paddr_t)end; 290 } 291 availsz += end - start; 292 availmem += atop((vm_offset_t)(end - start)); 293 } 294 if (acnt >= maxavail) 295 panic("Not enough space in the dump/phys_avail arrays"); 296 } 297 298 if (pavail != NULL) 299 *pavail = availmem; 300 if (prealmem != NULL) 301 *prealmem = totalmem; 302 return (acnt); 303 } 304 305 /* 306 * Check if the region at idx can be merged with the region above it. 307 */ 308 static size_t 309 merge_upper_regions(struct region *regions, size_t rcnt, size_t idx) 310 { 311 struct region *lower, *upper; 312 vm_paddr_t lend, uend; 313 size_t i, mergecnt, movecnt; 314 315 lower = ®ions[idx]; 316 lend = lower->addr + lower->size; 317 318 /* 319 * Continue merging in upper entries as long as we have entries to 320 * merge; the new block could have spanned more than one, although one 321 * is likely the common case. 322 */ 323 for (i = idx + 1; i < rcnt; i++) { 324 upper = ®ions[i]; 325 if (lend < upper->addr || lower->flags != upper->flags) 326 break; 327 328 uend = upper->addr + upper->size; 329 if (uend > lend) { 330 lower->size += uend - lend; 331 lend = lower->addr + lower->size; 332 } 333 334 if (uend >= lend) { 335 /* 336 * If we didn't move past the end of the upper region, 337 * then we don't need to bother checking for another 338 * merge because it would have been done already. Just 339 * increment i once more to maintain the invariant that 340 * i is one past the last entry merged. 341 */ 342 i++; 343 break; 344 } 345 } 346 347 /* 348 * We merged in the entries from [idx + 1, i); physically move the tail 349 * end at [i, rcnt) if we need to. 350 */ 351 mergecnt = i - (idx + 1); 352 if (mergecnt > 0) { 353 movecnt = rcnt - i; 354 if (movecnt == 0) { 355 /* Merged all the way to the end, just decrease rcnt. */ 356 rcnt = idx + 1; 357 } else { 358 memmove(®ions[idx + 1], ®ions[idx + mergecnt + 1], 359 movecnt * sizeof(*regions)); 360 rcnt -= mergecnt; 361 } 362 } 363 return (rcnt); 364 } 365 366 /* 367 * Insertion-sort a new entry into a regions list; sorted by start address. 368 */ 369 static size_t 370 insert_region(struct region *regions, size_t rcnt, vm_paddr_t addr, 371 vm_size_t size, uint32_t flags) 372 { 373 size_t i; 374 vm_paddr_t nend, rend; 375 struct region *ep, *rp; 376 377 nend = addr + size; 378 ep = regions + rcnt; 379 for (i = 0, rp = regions; i < rcnt; ++i, ++rp) { 380 rend = rp->addr + rp->size; 381 if (flags == rp->flags) { 382 if (addr <= rp->addr && nend >= rp->addr) { 383 /* 384 * New mapping overlaps at the beginning, shift 385 * for any difference in the beginning then 386 * shift if the new mapping extends past. 387 */ 388 rp->size += rp->addr - addr; 389 rp->addr = addr; 390 if (nend > rend) { 391 rp->size += nend - rend; 392 rcnt = merge_upper_regions(regions, 393 rcnt, i); 394 } 395 return (rcnt); 396 } else if (addr <= rend && nend > rp->addr) { 397 /* 398 * New mapping is either entirely contained 399 * within or it's overlapping at the end. 400 */ 401 if (nend > rend) { 402 rp->size += nend - rend; 403 rcnt = merge_upper_regions(regions, 404 rcnt, i); 405 } 406 return (rcnt); 407 } 408 } else if ((flags != 0) && (rp->flags != 0)) { 409 /* 410 * If we're duplicating an entry that already exists 411 * exactly, just upgrade its flags as needed. We could 412 * do more if we find that we have differently specified 413 * flags clipping existing excluding regions, but that's 414 * probably rare. 415 */ 416 if (addr == rp->addr && nend == rend) { 417 rp->flags |= flags; 418 return (rcnt); 419 } 420 } 421 422 if (addr < rp->addr) { 423 bcopy(rp, rp + 1, (ep - rp) * sizeof(*rp)); 424 break; 425 } 426 } 427 rp->addr = addr; 428 rp->size = size; 429 rp->flags = flags; 430 rcnt++; 431 432 return (rcnt); 433 } 434 435 /* 436 * Add a hardware memory region. 437 */ 438 void 439 physmem_hardware_region(uint64_t pa, uint64_t sz) 440 { 441 /* 442 * Filter out the page at PA 0x00000000. The VM can't handle it, as 443 * pmap_extract() == 0 means failure. 444 */ 445 if (pa == 0) { 446 if (sz <= PAGE_SIZE) 447 return; 448 pa = PAGE_SIZE; 449 sz -= PAGE_SIZE; 450 } else if (pa > MAX_PHYS_ADDR) { 451 /* This range is past usable memory, ignore it */ 452 return; 453 } 454 455 /* 456 * Also filter out the page at the end of the physical address space -- 457 * if addr is non-zero and addr+size is zero we wrapped to the next byte 458 * beyond what vm_paddr_t can express. That leads to a NULL pointer 459 * deref early in startup; work around it by leaving the last page out. 460 * 461 * XXX This just in: subtract out a whole megabyte, not just 1 page. 462 * Reducing the size by anything less than 1MB results in the NULL 463 * pointer deref in _vm_map_lock_read(). Better to give up a megabyte 464 * than leave some folks with an unusable system while we investigate. 465 */ 466 if ((pa + sz) > (MAX_PHYS_ADDR - 1024 * 1024)) { 467 sz = MAX_PHYS_ADDR - pa + 1; 468 if (sz <= 1024 * 1024) 469 return; 470 sz -= 1024 * 1024; 471 } 472 473 if (sz > 0 && hwcnt < nitems(hwregions)) 474 hwcnt = insert_region(hwregions, hwcnt, pa, sz, 0); 475 } 476 477 /* 478 * Add an exclusion region. 479 */ 480 void 481 physmem_exclude_region(vm_paddr_t pa, vm_size_t sz, uint32_t exflags) 482 { 483 vm_offset_t adj; 484 485 /* 486 * Truncate the starting address down to a page boundary, and round the 487 * ending page up to a page boundary. 488 */ 489 adj = pa - trunc_page(pa); 490 pa = trunc_page(pa); 491 sz = round_page(sz + adj); 492 493 if (excnt >= nitems(exregions)) 494 panic("failed to exclude region %#jx-%#jx", (uintmax_t)pa, 495 (uintmax_t)(pa + sz)); 496 excnt = insert_region(exregions, excnt, pa, sz, exflags); 497 } 498 499 size_t 500 physmem_avail(vm_paddr_t *avail, size_t maxavail) 501 { 502 503 return (regions_to_avail(avail, EXFLAG_NOALLOC, maxavail, 0, NULL, NULL)); 504 } 505 506 bool 507 physmem_excluded(vm_paddr_t pa, vm_size_t sz) 508 { 509 const struct region *exp; 510 size_t exi; 511 512 for (exi = 0, exp = exregions; exi < excnt; ++exi, ++exp) { 513 if (pa < exp->addr || pa + sz > exp->addr + exp->size) 514 continue; 515 return (true); 516 } 517 return (false); 518 } 519 520 #ifdef _KERNEL 521 /* 522 * Process all the regions added earlier into the global avail lists. 523 * 524 * Updates the kernel global 'physmem' with the number of physical pages 525 * available for use (all pages not in any exclusion region). 526 * 527 * Updates the kernel global 'Maxmem' with the page number one greater then the 528 * last page of physical memory in the system. 529 */ 530 void 531 physmem_init_kernel_globals(void) 532 { 533 size_t nextidx; 534 u_long hwphyssz; 535 536 hwphyssz = 0; 537 TUNABLE_ULONG_FETCH("hw.physmem", &hwphyssz); 538 539 regions_to_avail(dump_avail, EXFLAG_NODUMP, PHYS_AVAIL_ENTRIES, 540 hwphyssz, NULL, NULL); 541 nextidx = regions_to_avail(phys_avail, EXFLAG_NOALLOC, 542 PHYS_AVAIL_ENTRIES, hwphyssz, &physmem, &realmem); 543 if (nextidx == 0) 544 panic("No memory entries in phys_avail"); 545 Maxmem = atop(phys_avail[nextidx - 1]); 546 } 547 548 #ifdef DDB 549 #include <ddb/ddb.h> 550 551 DB_SHOW_COMMAND_FLAGS(physmem, db_show_physmem, DB_CMD_MEMSAFE) 552 { 553 554 physmem_dump_tables(db_printf); 555 } 556 557 #endif /* DDB */ 558 559 /* 560 * ram pseudo driver - this reserves I/O space resources corresponding to physical 561 * memory regions. 562 */ 563 564 static void 565 ram_identify(driver_t *driver, device_t parent) 566 { 567 568 if (resource_disabled("ram", 0)) 569 return; 570 if (BUS_ADD_CHILD(parent, 0, "ram", 0) == NULL) 571 panic("ram_identify"); 572 } 573 574 static int 575 ram_probe(device_t dev) 576 { 577 578 device_quiet(dev); 579 device_set_desc(dev, "System RAM"); 580 return (BUS_PROBE_SPECIFIC); 581 } 582 583 static int 584 ram_attach(device_t dev) 585 { 586 vm_paddr_t avail_list[PHYS_AVAIL_COUNT]; 587 rman_res_t start, end; 588 int rid, i; 589 590 rid = 0; 591 592 /* Get the avail list. */ 593 regions_to_avail(avail_list, EXFLAG_NOALLOC | EXFLAG_NODUMP, 594 PHYS_AVAIL_COUNT, 0, NULL, NULL); 595 596 /* Reserve all memory regions. */ 597 for (i = 0; avail_list[i + 1] != 0; i += 2) { 598 start = avail_list[i]; 599 end = avail_list[i + 1]; 600 601 if (bootverbose) 602 device_printf(dev, 603 "reserving memory region: %jx-%jx\n", 604 (uintmax_t)start, (uintmax_t)end); 605 606 if (bus_alloc_resource(dev, SYS_RES_MEMORY, &rid, start, end, 607 end - start, 0) == NULL) 608 panic("ram_attach: resource %d failed to attach", rid); 609 rid++; 610 } 611 612 return (0); 613 } 614 615 static device_method_t ram_methods[] = { 616 /* Device interface */ 617 DEVMETHOD(device_identify, ram_identify), 618 DEVMETHOD(device_probe, ram_probe), 619 DEVMETHOD(device_attach, ram_attach), 620 621 DEVMETHOD_END 622 }; 623 624 DEFINE_CLASS_0(ram, ram_driver, ram_methods, /* no softc */ 1); 625 DRIVER_MODULE(ram, nexus, ram_driver, 0, 0); 626 #endif /* _KERNEL */ 627