1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2010 Hudson River Trading LLC 5 * Written by: John H. Baldwin <jhb@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_vm.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/bus.h> 38 #include <sys/kernel.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/smp.h> 42 #include <sys/vmmeter.h> 43 #include <vm/vm.h> 44 #include <vm/pmap.h> 45 #include <vm/vm_param.h> 46 #include <vm/vm_page.h> 47 #include <vm/vm_phys.h> 48 49 #include <contrib/dev/acpica/include/acpi.h> 50 #include <contrib/dev/acpica/include/aclocal.h> 51 #include <contrib/dev/acpica/include/actables.h> 52 53 #include <machine/md_var.h> 54 55 #include <dev/acpica/acpivar.h> 56 57 #if MAXMEMDOM > 1 58 static struct cpu_info { 59 bool enabled:1; 60 bool has_memory:1; 61 int domain; 62 int id; 63 } *cpus; 64 65 static int max_cpus; 66 static int last_cpu; 67 68 struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1]; 69 int num_mem; 70 71 static ACPI_TABLE_SRAT *srat; 72 static vm_paddr_t srat_physaddr; 73 74 static int domain_pxm[MAXMEMDOM]; 75 static int ndomain; 76 static vm_paddr_t maxphyaddr; 77 78 static ACPI_TABLE_SLIT *slit; 79 static vm_paddr_t slit_physaddr; 80 static int vm_locality_table[MAXMEMDOM * MAXMEMDOM]; 81 82 static void srat_walk_table(acpi_subtable_handler *handler, void *arg); 83 84 /* 85 * SLIT parsing. 86 */ 87 88 static void 89 slit_parse_table(ACPI_TABLE_SLIT *s) 90 { 91 int i, j; 92 int i_domain, j_domain; 93 int offset = 0; 94 uint8_t e; 95 96 /* 97 * This maps the SLIT data into the VM-domain centric view. 98 * There may be sparse entries in the PXM namespace, so 99 * remap them to a VM-domain ID and if it doesn't exist, 100 * skip it. 101 * 102 * It should result in a packed 2d array of VM-domain 103 * locality information entries. 104 */ 105 106 if (bootverbose) 107 printf("SLIT.Localities: %d\n", (int) s->LocalityCount); 108 for (i = 0; i < s->LocalityCount; i++) { 109 i_domain = acpi_map_pxm_to_vm_domainid(i); 110 if (i_domain < 0) 111 continue; 112 113 if (bootverbose) 114 printf("%d: ", i); 115 for (j = 0; j < s->LocalityCount; j++) { 116 j_domain = acpi_map_pxm_to_vm_domainid(j); 117 if (j_domain < 0) 118 continue; 119 e = s->Entry[i * s->LocalityCount + j]; 120 if (bootverbose) 121 printf("%d ", (int) e); 122 /* 255 == "no locality information" */ 123 if (e == 255) 124 vm_locality_table[offset] = -1; 125 else 126 vm_locality_table[offset] = e; 127 offset++; 128 } 129 if (bootverbose) 130 printf("\n"); 131 } 132 } 133 134 /* 135 * Look for an ACPI System Locality Distance Information Table ("SLIT") 136 */ 137 static int 138 parse_slit(void) 139 { 140 141 if (resource_disabled("slit", 0)) { 142 return (-1); 143 } 144 145 slit_physaddr = acpi_find_table(ACPI_SIG_SLIT); 146 if (slit_physaddr == 0) { 147 return (-1); 148 } 149 150 /* 151 * Make a pass over the table to populate the cpus[] and 152 * mem_info[] tables. 153 */ 154 slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT); 155 slit_parse_table(slit); 156 acpi_unmap_table(slit); 157 slit = NULL; 158 159 return (0); 160 } 161 162 /* 163 * SRAT parsing. 164 */ 165 166 /* 167 * Returns true if a memory range overlaps with at least one range in 168 * phys_avail[]. 169 */ 170 static int 171 overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end) 172 { 173 int i; 174 175 for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) { 176 if (phys_avail[i + 1] <= start) 177 continue; 178 if (phys_avail[i] < end) 179 return (1); 180 break; 181 } 182 return (0); 183 } 184 185 /* 186 * On x86 we can use the cpuid to index the cpus array, but on arm64 187 * we have an ACPI Processor UID with a larger range. 188 * 189 * Use this variable to indicate if the cpus can be stored by index. 190 */ 191 #ifdef __aarch64__ 192 static const int cpus_use_indexing = 0; 193 #else 194 static const int cpus_use_indexing = 1; 195 #endif 196 197 /* 198 * Find CPU by processor ID (APIC ID on x86, Processor UID on arm64) 199 */ 200 static struct cpu_info * 201 cpu_find(int cpuid) 202 { 203 int i; 204 205 if (cpus_use_indexing) { 206 if (cpuid <= last_cpu && cpus[cpuid].enabled) 207 return (&cpus[cpuid]); 208 } else { 209 for (i = 0; i <= last_cpu; i++) 210 if (cpus[i].id == cpuid) 211 return (&cpus[i]); 212 } 213 return (NULL); 214 } 215 216 /* 217 * Find CPU by pcpu pointer. 218 */ 219 static struct cpu_info * 220 cpu_get_info(struct pcpu *pc) 221 { 222 struct cpu_info *cpup; 223 int id; 224 225 #ifdef __aarch64__ 226 id = pc->pc_acpi_id; 227 #else 228 id = pc->pc_apic_id; 229 #endif 230 cpup = cpu_find(id); 231 if (cpup == NULL) 232 panic("SRAT: CPU with ID %u is not known", id); 233 return (cpup); 234 } 235 236 /* 237 * Add proximity information for a new CPU. 238 */ 239 static struct cpu_info * 240 cpu_add(int cpuid, int domain) 241 { 242 struct cpu_info *cpup; 243 244 if (cpus_use_indexing) { 245 if (cpuid >= max_cpus) 246 return (NULL); 247 last_cpu = imax(last_cpu, cpuid); 248 cpup = &cpus[cpuid]; 249 } else { 250 if (last_cpu >= max_cpus - 1) 251 return (NULL); 252 cpup = &cpus[++last_cpu]; 253 } 254 cpup->domain = domain; 255 cpup->id = cpuid; 256 cpup->enabled = 1; 257 return (cpup); 258 } 259 260 static void 261 srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg) 262 { 263 ACPI_SRAT_CPU_AFFINITY *cpu; 264 ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic; 265 ACPI_SRAT_MEM_AFFINITY *mem; 266 ACPI_SRAT_GICC_AFFINITY *gicc; 267 static struct cpu_info *cpup; 268 uint64_t base, length; 269 int domain, i, slot; 270 271 switch (entry->Type) { 272 case ACPI_SRAT_TYPE_CPU_AFFINITY: 273 cpu = (ACPI_SRAT_CPU_AFFINITY *)entry; 274 domain = cpu->ProximityDomainLo | 275 cpu->ProximityDomainHi[0] << 8 | 276 cpu->ProximityDomainHi[1] << 16 | 277 cpu->ProximityDomainHi[2] << 24; 278 if (bootverbose) 279 printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", 280 cpu->ApicId, domain, 281 (cpu->Flags & ACPI_SRAT_CPU_ENABLED) ? 282 "enabled" : "disabled"); 283 if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED)) 284 break; 285 cpup = cpu_find(cpu->ApicId); 286 if (cpup != NULL) { 287 printf("SRAT: Duplicate local APIC ID %u\n", 288 cpu->ApicId); 289 *(int *)arg = ENXIO; 290 break; 291 } 292 cpup = cpu_add(cpu->ApicId, domain); 293 if (cpup == NULL) 294 printf("SRAT: Ignoring local APIC ID %u (too high)\n", 295 cpu->ApicId); 296 break; 297 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: 298 x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry; 299 if (bootverbose) 300 printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", 301 x2apic->ApicId, x2apic->ProximityDomain, 302 (x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ? 303 "enabled" : "disabled"); 304 if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED)) 305 break; 306 KASSERT(cpu_find(x2apic->ApicId) == NULL, 307 ("Duplicate local APIC ID %u", x2apic->ApicId)); 308 cpup = cpu_add(x2apic->ApicId, x2apic->ProximityDomain); 309 if (cpup == NULL) 310 printf("SRAT: Ignoring local APIC ID %u (too high)\n", 311 x2apic->ApicId); 312 break; 313 case ACPI_SRAT_TYPE_GICC_AFFINITY: 314 gicc = (ACPI_SRAT_GICC_AFFINITY *)entry; 315 if (bootverbose) 316 printf("SRAT: Found CPU UID %u domain %d: %s\n", 317 gicc->AcpiProcessorUid, gicc->ProximityDomain, 318 (gicc->Flags & ACPI_SRAT_GICC_ENABLED) ? 319 "enabled" : "disabled"); 320 if (!(gicc->Flags & ACPI_SRAT_GICC_ENABLED)) 321 break; 322 KASSERT(cpu_find(gicc->AcpiProcessorUid) == NULL, 323 ("Duplicate CPU UID %u", gicc->AcpiProcessorUid)); 324 cpup = cpu_add(gicc->AcpiProcessorUid, gicc->ProximityDomain); 325 if (cpup == NULL) 326 printf("SRAT: Ignoring CPU UID %u (too high)\n", 327 gicc->AcpiProcessorUid); 328 break; 329 case ACPI_SRAT_TYPE_MEMORY_AFFINITY: 330 mem = (ACPI_SRAT_MEM_AFFINITY *)entry; 331 base = mem->BaseAddress; 332 length = mem->Length; 333 domain = mem->ProximityDomain; 334 335 if (bootverbose) 336 printf( 337 "SRAT: Found memory domain %d addr 0x%jx len 0x%jx: %s\n", 338 domain, (uintmax_t)base, (uintmax_t)length, 339 (mem->Flags & ACPI_SRAT_MEM_ENABLED) ? 340 "enabled" : "disabled"); 341 if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED)) 342 break; 343 if (base >= maxphyaddr || 344 !overlaps_phys_avail(base, base + length)) { 345 printf("SRAT: Ignoring memory at addr 0x%jx\n", 346 (uintmax_t)base); 347 break; 348 } 349 if (num_mem == VM_PHYSSEG_MAX) { 350 printf("SRAT: Too many memory regions\n"); 351 *(int *)arg = ENXIO; 352 break; 353 } 354 slot = num_mem; 355 for (i = 0; i < num_mem; i++) { 356 if (mem_info[i].domain == domain) { 357 /* Try to extend an existing segment. */ 358 if (base == mem_info[i].end) { 359 mem_info[i].end += length; 360 return; 361 } 362 if (base + length == mem_info[i].start) { 363 mem_info[i].start -= length; 364 return; 365 } 366 } 367 if (mem_info[i].end <= base) 368 continue; 369 if (mem_info[i].start < base + length) { 370 printf("SRAT: Overlapping memory entries\n"); 371 *(int *)arg = ENXIO; 372 return; 373 } 374 slot = i; 375 } 376 for (i = num_mem; i > slot; i--) 377 mem_info[i] = mem_info[i - 1]; 378 mem_info[slot].start = base; 379 mem_info[slot].end = base + length; 380 mem_info[slot].domain = domain; 381 num_mem++; 382 break; 383 } 384 } 385 386 /* 387 * Ensure each memory domain has at least one CPU and that each CPU 388 * has at least one memory domain. 389 */ 390 static int 391 check_domains(void) 392 { 393 int found, i, j; 394 395 for (i = 0; i < num_mem; i++) { 396 found = 0; 397 for (j = 0; j <= last_cpu; j++) 398 if (cpus[j].enabled && 399 cpus[j].domain == mem_info[i].domain) { 400 cpus[j].has_memory = 1; 401 found++; 402 } 403 if (!found) { 404 printf("SRAT: No CPU found for memory domain %d\n", 405 mem_info[i].domain); 406 return (ENXIO); 407 } 408 } 409 for (i = 0; i <= last_cpu; i++) 410 if (cpus[i].enabled && !cpus[i].has_memory) { 411 found = 0; 412 for (j = 0; j < num_mem && !found; j++) { 413 if (mem_info[j].domain == cpus[i].domain) 414 found = 1; 415 } 416 if (!found) { 417 if (bootverbose) 418 printf("SRAT: mem dom %d is empty\n", 419 cpus[i].domain); 420 mem_info[num_mem].start = 0; 421 mem_info[num_mem].end = 0; 422 mem_info[num_mem].domain = cpus[i].domain; 423 num_mem++; 424 } 425 } 426 return (0); 427 } 428 429 /* 430 * Check that the SRAT memory regions cover all of the regions in 431 * phys_avail[]. 432 */ 433 static int 434 check_phys_avail(void) 435 { 436 vm_paddr_t address; 437 int i, j; 438 439 /* j is the current offset into phys_avail[]. */ 440 address = phys_avail[0]; 441 j = 0; 442 for (i = 0; i < num_mem; i++) { 443 /* 444 * Consume as many phys_avail[] entries as fit in this 445 * region. 446 */ 447 while (address >= mem_info[i].start && 448 address <= mem_info[i].end) { 449 /* 450 * If we cover the rest of this phys_avail[] entry, 451 * advance to the next entry. 452 */ 453 if (phys_avail[j + 1] <= mem_info[i].end) { 454 j += 2; 455 if (phys_avail[j] == 0 && 456 phys_avail[j + 1] == 0) { 457 return (0); 458 } 459 address = phys_avail[j]; 460 } else 461 address = mem_info[i].end + 1; 462 } 463 } 464 printf("SRAT: No memory region found for 0x%jx - 0x%jx\n", 465 (uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]); 466 return (ENXIO); 467 } 468 469 /* 470 * Renumber the memory domains to be compact and zero-based if not 471 * already. Returns an error if there are too many domains. 472 */ 473 static int 474 renumber_domains(void) 475 { 476 int i, j, slot; 477 478 /* Enumerate all the domains. */ 479 ndomain = 0; 480 for (i = 0; i < num_mem; i++) { 481 /* See if this domain is already known. */ 482 for (j = 0; j < ndomain; j++) { 483 if (domain_pxm[j] >= mem_info[i].domain) 484 break; 485 } 486 if (j < ndomain && domain_pxm[j] == mem_info[i].domain) 487 continue; 488 489 if (ndomain >= MAXMEMDOM) { 490 ndomain = 1; 491 printf("SRAT: Too many memory domains\n"); 492 return (EFBIG); 493 } 494 495 /* Insert the new domain at slot 'j'. */ 496 slot = j; 497 for (j = ndomain; j > slot; j--) 498 domain_pxm[j] = domain_pxm[j - 1]; 499 domain_pxm[slot] = mem_info[i].domain; 500 ndomain++; 501 } 502 503 /* Renumber each domain to its index in the sorted 'domain_pxm' list. */ 504 for (i = 0; i < ndomain; i++) { 505 /* 506 * If the domain is already the right value, no need 507 * to renumber. 508 */ 509 if (domain_pxm[i] == i) 510 continue; 511 512 /* Walk the cpu[] and mem_info[] arrays to renumber. */ 513 for (j = 0; j < num_mem; j++) 514 if (mem_info[j].domain == domain_pxm[i]) 515 mem_info[j].domain = i; 516 for (j = 0; j <= last_cpu; j++) 517 if (cpus[j].enabled && cpus[j].domain == domain_pxm[i]) 518 cpus[j].domain = i; 519 } 520 521 return (0); 522 } 523 524 /* 525 * Look for an ACPI System Resource Affinity Table ("SRAT"), 526 * allocate space for cpu information, and initialize globals. 527 */ 528 int 529 acpi_pxm_init(int ncpus, vm_paddr_t maxphys) 530 { 531 unsigned int idx, size; 532 vm_paddr_t addr; 533 534 if (resource_disabled("srat", 0)) 535 return (-1); 536 537 max_cpus = ncpus; 538 last_cpu = -1; 539 maxphyaddr = maxphys; 540 srat_physaddr = acpi_find_table(ACPI_SIG_SRAT); 541 if (srat_physaddr == 0) 542 return (-1); 543 544 /* 545 * Allocate data structure: 546 * 547 * Find the last physical memory region and steal some memory from 548 * it. This is done because at this point in the boot process 549 * malloc is still not usable. 550 */ 551 for (idx = 0; phys_avail[idx + 1] != 0; idx += 2); 552 KASSERT(idx != 0, ("phys_avail is empty!")); 553 idx -= 2; 554 555 size = sizeof(*cpus) * max_cpus; 556 addr = trunc_page(phys_avail[idx + 1] - size); 557 KASSERT(addr >= phys_avail[idx], 558 ("Not enough memory for SRAT table items")); 559 phys_avail[idx + 1] = addr - 1; 560 561 /* 562 * We cannot rely on PHYS_TO_DMAP because this code is also used in 563 * i386, so use pmap_mapbios to map the memory, this will end up using 564 * the default memory attribute (WB), and the DMAP when available. 565 */ 566 cpus = (struct cpu_info *)pmap_mapbios(addr, size); 567 bzero(cpus, size); 568 return (0); 569 } 570 571 static int 572 parse_srat(void) 573 { 574 int error; 575 576 /* 577 * Make a pass over the table to populate the cpus[] and 578 * mem_info[] tables. 579 */ 580 srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT); 581 error = 0; 582 srat_walk_table(srat_parse_entry, &error); 583 acpi_unmap_table(srat); 584 srat = NULL; 585 if (error || check_domains() != 0 || check_phys_avail() != 0 || 586 renumber_domains() != 0) { 587 srat_physaddr = 0; 588 return (-1); 589 } 590 591 return (0); 592 } 593 594 static void 595 init_mem_locality(void) 596 { 597 int i; 598 599 /* 600 * For now, assume -1 == "no locality information for 601 * this pairing. 602 */ 603 for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++) 604 vm_locality_table[i] = -1; 605 } 606 607 /* 608 * Parse SRAT and SLIT to save proximity info. Don't do 609 * anything if SRAT is not available. 610 */ 611 void 612 acpi_pxm_parse_tables(void) 613 { 614 615 if (srat_physaddr == 0) 616 return; 617 if (parse_srat() < 0) 618 return; 619 init_mem_locality(); 620 (void)parse_slit(); 621 } 622 623 /* 624 * Use saved data from SRAT/SLIT to update memory locality. 625 */ 626 void 627 acpi_pxm_set_mem_locality(void) 628 { 629 630 if (srat_physaddr == 0) 631 return; 632 vm_phys_register_domains(ndomain, mem_info, vm_locality_table); 633 } 634 635 static void 636 srat_walk_table(acpi_subtable_handler *handler, void *arg) 637 { 638 639 acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length, 640 handler, arg); 641 } 642 643 /* 644 * Set up per-CPU domain IDs from information saved in 'cpus' and tear down data 645 * structures allocated by acpi_pxm_init(). 646 */ 647 void 648 acpi_pxm_set_cpu_locality(void) 649 { 650 struct cpu_info *cpu; 651 struct pcpu *pc; 652 u_int i; 653 654 if (srat_physaddr == 0) 655 return; 656 for (i = 0; i < MAXCPU; i++) { 657 if (CPU_ABSENT(i)) 658 continue; 659 pc = pcpu_find(i); 660 KASSERT(pc != NULL, ("no pcpu data for CPU %u", i)); 661 cpu = cpu_get_info(pc); 662 pc->pc_domain = vm_ndomains > 1 ? cpu->domain : 0; 663 CPU_SET(i, &cpuset_domain[pc->pc_domain]); 664 if (bootverbose) 665 printf("SRAT: CPU %u has memory domain %d\n", i, 666 pc->pc_domain); 667 } 668 /* XXXMJ the page is leaked. */ 669 pmap_unmapbios(cpus, sizeof(*cpus) * max_cpus); 670 srat_physaddr = 0; 671 cpus = NULL; 672 } 673 674 int 675 acpi_pxm_get_cpu_locality(int apic_id) 676 { 677 struct cpu_info *cpu; 678 679 cpu = cpu_find(apic_id); 680 if (cpu == NULL) 681 panic("SRAT: CPU with ID %u is not known", apic_id); 682 return (cpu->domain); 683 } 684 685 /* 686 * Map a _PXM value to a VM domain ID. 687 * 688 * Returns the domain ID, or -1 if no domain ID was found. 689 */ 690 int 691 acpi_map_pxm_to_vm_domainid(int pxm) 692 { 693 int i; 694 695 for (i = 0; i < ndomain; i++) { 696 if (domain_pxm[i] == pxm) 697 return (vm_ndomains > 1 ? i : 0); 698 } 699 700 return (-1); 701 } 702 703 #else /* MAXMEMDOM == 1 */ 704 705 int 706 acpi_map_pxm_to_vm_domainid(int pxm) 707 { 708 709 return (-1); 710 } 711 712 #endif /* MAXMEMDOM > 1 */ 713