1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2010 Hudson River Trading LLC 5 * Written by: John H. Baldwin <jhb@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 #include "opt_vm.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/bus.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/mutex.h> 39 #include <sys/smp.h> 40 #include <sys/vmmeter.h> 41 #include <vm/vm.h> 42 #include <vm/pmap.h> 43 #include <vm/vm_param.h> 44 #include <vm/vm_page.h> 45 #include <vm/vm_phys.h> 46 47 #include <contrib/dev/acpica/include/acpi.h> 48 #include <contrib/dev/acpica/include/aclocal.h> 49 #include <contrib/dev/acpica/include/actables.h> 50 51 #include <machine/md_var.h> 52 53 #include <dev/acpica/acpivar.h> 54 55 #if MAXMEMDOM > 1 56 static struct cpu_info { 57 bool enabled:1; 58 bool has_memory:1; 59 int domain; 60 int id; 61 } *cpus; 62 63 static int max_cpus; 64 static int last_cpu; 65 66 struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1]; 67 int num_mem; 68 69 static ACPI_TABLE_SRAT *srat; 70 static vm_paddr_t srat_physaddr; 71 72 static int domain_pxm[MAXMEMDOM]; 73 static int ndomain; 74 static vm_paddr_t maxphyaddr; 75 76 static ACPI_TABLE_SLIT *slit; 77 static vm_paddr_t slit_physaddr; 78 static int vm_locality_table[MAXMEMDOM * MAXMEMDOM]; 79 80 static void srat_walk_table(acpi_subtable_handler *handler, void *arg); 81 82 /* 83 * SLIT parsing. 84 */ 85 86 static void 87 slit_parse_table(ACPI_TABLE_SLIT *s) 88 { 89 int i, j; 90 int i_domain, j_domain; 91 int offset = 0; 92 uint8_t e; 93 94 /* 95 * This maps the SLIT data into the VM-domain centric view. 96 * There may be sparse entries in the PXM namespace, so 97 * remap them to a VM-domain ID and if it doesn't exist, 98 * skip it. 99 * 100 * It should result in a packed 2d array of VM-domain 101 * locality information entries. 102 */ 103 104 if (bootverbose) 105 printf("SLIT.Localities: %d\n", (int) s->LocalityCount); 106 for (i = 0; i < s->LocalityCount; i++) { 107 i_domain = acpi_map_pxm_to_vm_domainid(i); 108 if (i_domain < 0) 109 continue; 110 111 if (bootverbose) 112 printf("%d: ", i); 113 for (j = 0; j < s->LocalityCount; j++) { 114 j_domain = acpi_map_pxm_to_vm_domainid(j); 115 if (j_domain < 0) 116 continue; 117 e = s->Entry[i * s->LocalityCount + j]; 118 if (bootverbose) 119 printf("%d ", (int) e); 120 /* 255 == "no locality information" */ 121 if (e == 255) 122 vm_locality_table[offset] = -1; 123 else 124 vm_locality_table[offset] = e; 125 offset++; 126 } 127 if (bootverbose) 128 printf("\n"); 129 } 130 } 131 132 /* 133 * Look for an ACPI System Locality Distance Information Table ("SLIT") 134 */ 135 static int 136 parse_slit(void) 137 { 138 139 if (resource_disabled("slit", 0)) { 140 return (-1); 141 } 142 143 slit_physaddr = acpi_find_table(ACPI_SIG_SLIT); 144 if (slit_physaddr == 0) { 145 return (-1); 146 } 147 148 /* 149 * Make a pass over the table to populate the cpus[] and 150 * mem_info[] tables. 151 */ 152 slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT); 153 slit_parse_table(slit); 154 acpi_unmap_table(slit); 155 slit = NULL; 156 157 return (0); 158 } 159 160 /* 161 * SRAT parsing. 162 */ 163 164 /* 165 * Returns true if a memory range overlaps with at least one range in 166 * phys_avail[]. 167 */ 168 static int 169 overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end) 170 { 171 int i; 172 173 for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) { 174 if (phys_avail[i + 1] <= start) 175 continue; 176 if (phys_avail[i] < end) 177 return (1); 178 break; 179 } 180 return (0); 181 } 182 183 /* 184 * On x86 we can use the cpuid to index the cpus array, but on arm64 185 * we have an ACPI Processor UID with a larger range. 186 * 187 * Use this variable to indicate if the cpus can be stored by index. 188 */ 189 #ifdef __aarch64__ 190 static const int cpus_use_indexing = 0; 191 #else 192 static const int cpus_use_indexing = 1; 193 #endif 194 195 /* 196 * Find CPU by processor ID (APIC ID on x86, Processor UID on arm64) 197 */ 198 static struct cpu_info * 199 cpu_find(int cpuid) 200 { 201 int i; 202 203 if (cpus_use_indexing) { 204 if (cpuid <= last_cpu && cpus[cpuid].enabled) 205 return (&cpus[cpuid]); 206 } else { 207 for (i = 0; i <= last_cpu; i++) 208 if (cpus[i].id == cpuid) 209 return (&cpus[i]); 210 } 211 return (NULL); 212 } 213 214 /* 215 * Find CPU by pcpu pointer. 216 */ 217 static struct cpu_info * 218 cpu_get_info(struct pcpu *pc) 219 { 220 struct cpu_info *cpup; 221 int id; 222 223 #ifdef __aarch64__ 224 id = pc->pc_acpi_id; 225 #else 226 id = pc->pc_apic_id; 227 #endif 228 cpup = cpu_find(id); 229 if (cpup == NULL) 230 panic("SRAT: CPU with ID %u is not known", id); 231 return (cpup); 232 } 233 234 /* 235 * Add proximity information for a new CPU. 236 */ 237 static struct cpu_info * 238 cpu_add(int cpuid, int domain) 239 { 240 struct cpu_info *cpup; 241 242 if (cpus_use_indexing) { 243 if (cpuid >= max_cpus) 244 return (NULL); 245 last_cpu = imax(last_cpu, cpuid); 246 cpup = &cpus[cpuid]; 247 } else { 248 if (last_cpu >= max_cpus - 1) 249 return (NULL); 250 cpup = &cpus[++last_cpu]; 251 } 252 cpup->domain = domain; 253 cpup->id = cpuid; 254 cpup->enabled = 1; 255 return (cpup); 256 } 257 258 static void 259 srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg) 260 { 261 ACPI_SRAT_CPU_AFFINITY *cpu; 262 ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic; 263 ACPI_SRAT_MEM_AFFINITY *mem; 264 ACPI_SRAT_GICC_AFFINITY *gicc; 265 static struct cpu_info *cpup; 266 uint64_t base, length; 267 int domain, i, slot; 268 269 switch (entry->Type) { 270 case ACPI_SRAT_TYPE_CPU_AFFINITY: 271 cpu = (ACPI_SRAT_CPU_AFFINITY *)entry; 272 domain = cpu->ProximityDomainLo | 273 cpu->ProximityDomainHi[0] << 8 | 274 cpu->ProximityDomainHi[1] << 16 | 275 cpu->ProximityDomainHi[2] << 24; 276 if (bootverbose) 277 printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", 278 cpu->ApicId, domain, 279 (cpu->Flags & ACPI_SRAT_CPU_ENABLED) ? 280 "enabled" : "disabled"); 281 if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED)) 282 break; 283 cpup = cpu_find(cpu->ApicId); 284 if (cpup != NULL) { 285 printf("SRAT: Duplicate local APIC ID %u\n", 286 cpu->ApicId); 287 *(int *)arg = ENXIO; 288 break; 289 } 290 cpup = cpu_add(cpu->ApicId, domain); 291 if (cpup == NULL) 292 printf("SRAT: Ignoring local APIC ID %u (too high)\n", 293 cpu->ApicId); 294 break; 295 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: 296 x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry; 297 if (bootverbose) 298 printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", 299 x2apic->ApicId, x2apic->ProximityDomain, 300 (x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ? 301 "enabled" : "disabled"); 302 if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED)) 303 break; 304 KASSERT(cpu_find(x2apic->ApicId) == NULL, 305 ("Duplicate local APIC ID %u", x2apic->ApicId)); 306 cpup = cpu_add(x2apic->ApicId, x2apic->ProximityDomain); 307 if (cpup == NULL) 308 printf("SRAT: Ignoring local APIC ID %u (too high)\n", 309 x2apic->ApicId); 310 break; 311 case ACPI_SRAT_TYPE_GICC_AFFINITY: 312 gicc = (ACPI_SRAT_GICC_AFFINITY *)entry; 313 if (bootverbose) 314 printf("SRAT: Found CPU UID %u domain %d: %s\n", 315 gicc->AcpiProcessorUid, gicc->ProximityDomain, 316 (gicc->Flags & ACPI_SRAT_GICC_ENABLED) ? 317 "enabled" : "disabled"); 318 if (!(gicc->Flags & ACPI_SRAT_GICC_ENABLED)) 319 break; 320 KASSERT(cpu_find(gicc->AcpiProcessorUid) == NULL, 321 ("Duplicate CPU UID %u", gicc->AcpiProcessorUid)); 322 cpup = cpu_add(gicc->AcpiProcessorUid, gicc->ProximityDomain); 323 if (cpup == NULL) 324 printf("SRAT: Ignoring CPU UID %u (too high)\n", 325 gicc->AcpiProcessorUid); 326 break; 327 case ACPI_SRAT_TYPE_MEMORY_AFFINITY: 328 mem = (ACPI_SRAT_MEM_AFFINITY *)entry; 329 base = mem->BaseAddress; 330 length = mem->Length; 331 domain = mem->ProximityDomain; 332 333 if (bootverbose) 334 printf( 335 "SRAT: Found memory domain %d addr 0x%jx len 0x%jx: %s\n", 336 domain, (uintmax_t)base, (uintmax_t)length, 337 (mem->Flags & ACPI_SRAT_MEM_ENABLED) ? 338 "enabled" : "disabled"); 339 if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED)) 340 break; 341 if (base >= maxphyaddr || 342 !overlaps_phys_avail(base, base + length)) { 343 printf("SRAT: Ignoring memory at addr 0x%jx\n", 344 (uintmax_t)base); 345 break; 346 } 347 if (num_mem == VM_PHYSSEG_MAX) { 348 printf("SRAT: Too many memory regions\n"); 349 *(int *)arg = ENXIO; 350 break; 351 } 352 slot = num_mem; 353 for (i = 0; i < num_mem; i++) { 354 if (mem_info[i].domain == domain) { 355 /* Try to extend an existing segment. */ 356 if (base == mem_info[i].end) { 357 mem_info[i].end += length; 358 return; 359 } 360 if (base + length == mem_info[i].start) { 361 mem_info[i].start -= length; 362 return; 363 } 364 } 365 if (mem_info[i].end <= base) 366 continue; 367 if (mem_info[i].start < base + length) { 368 printf("SRAT: Overlapping memory entries\n"); 369 *(int *)arg = ENXIO; 370 return; 371 } 372 slot = i; 373 } 374 for (i = num_mem; i > slot; i--) 375 mem_info[i] = mem_info[i - 1]; 376 mem_info[slot].start = base; 377 mem_info[slot].end = base + length; 378 mem_info[slot].domain = domain; 379 num_mem++; 380 break; 381 } 382 } 383 384 /* 385 * Ensure each memory domain has at least one CPU and that each CPU 386 * has at least one memory domain. 387 */ 388 static int 389 check_domains(void) 390 { 391 int found, i, j; 392 393 for (i = 0; i < num_mem; i++) { 394 found = 0; 395 for (j = 0; j <= last_cpu; j++) 396 if (cpus[j].enabled && 397 cpus[j].domain == mem_info[i].domain) { 398 cpus[j].has_memory = 1; 399 found++; 400 } 401 if (!found) { 402 printf("SRAT: No CPU found for memory domain %d\n", 403 mem_info[i].domain); 404 return (ENXIO); 405 } 406 } 407 for (i = 0; i <= last_cpu; i++) 408 if (cpus[i].enabled && !cpus[i].has_memory) { 409 found = 0; 410 for (j = 0; j < num_mem && !found; j++) { 411 if (mem_info[j].domain == cpus[i].domain) 412 found = 1; 413 } 414 if (!found) { 415 if (bootverbose) 416 printf("SRAT: mem dom %d is empty\n", 417 cpus[i].domain); 418 mem_info[num_mem].start = 0; 419 mem_info[num_mem].end = 0; 420 mem_info[num_mem].domain = cpus[i].domain; 421 num_mem++; 422 } 423 } 424 return (0); 425 } 426 427 /* 428 * Check that the SRAT memory regions cover all of the regions in 429 * phys_avail[]. 430 */ 431 static int 432 check_phys_avail(void) 433 { 434 vm_paddr_t address; 435 int i, j; 436 437 /* j is the current offset into phys_avail[]. */ 438 address = phys_avail[0]; 439 j = 0; 440 for (i = 0; i < num_mem; i++) { 441 /* 442 * Consume as many phys_avail[] entries as fit in this 443 * region. 444 */ 445 while (address >= mem_info[i].start && 446 address <= mem_info[i].end) { 447 /* 448 * If we cover the rest of this phys_avail[] entry, 449 * advance to the next entry. 450 */ 451 if (phys_avail[j + 1] <= mem_info[i].end) { 452 j += 2; 453 if (phys_avail[j] == 0 && 454 phys_avail[j + 1] == 0) { 455 return (0); 456 } 457 address = phys_avail[j]; 458 } else 459 address = mem_info[i].end + 1; 460 } 461 } 462 printf("SRAT: No memory region found for 0x%jx - 0x%jx\n", 463 (uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]); 464 return (ENXIO); 465 } 466 467 /* 468 * Renumber the memory domains to be compact and zero-based if not 469 * already. Returns an error if there are too many domains. 470 */ 471 static int 472 renumber_domains(void) 473 { 474 int i, j, slot; 475 476 /* Enumerate all the domains. */ 477 ndomain = 0; 478 for (i = 0; i < num_mem; i++) { 479 /* See if this domain is already known. */ 480 for (j = 0; j < ndomain; j++) { 481 if (domain_pxm[j] >= mem_info[i].domain) 482 break; 483 } 484 if (j < ndomain && domain_pxm[j] == mem_info[i].domain) 485 continue; 486 487 if (ndomain >= MAXMEMDOM) { 488 ndomain = 1; 489 printf("SRAT: Too many memory domains\n"); 490 return (EFBIG); 491 } 492 493 /* Insert the new domain at slot 'j'. */ 494 slot = j; 495 for (j = ndomain; j > slot; j--) 496 domain_pxm[j] = domain_pxm[j - 1]; 497 domain_pxm[slot] = mem_info[i].domain; 498 ndomain++; 499 } 500 501 /* Renumber each domain to its index in the sorted 'domain_pxm' list. */ 502 for (i = 0; i < ndomain; i++) { 503 /* 504 * If the domain is already the right value, no need 505 * to renumber. 506 */ 507 if (domain_pxm[i] == i) 508 continue; 509 510 /* Walk the cpu[] and mem_info[] arrays to renumber. */ 511 for (j = 0; j < num_mem; j++) 512 if (mem_info[j].domain == domain_pxm[i]) 513 mem_info[j].domain = i; 514 for (j = 0; j <= last_cpu; j++) 515 if (cpus[j].enabled && cpus[j].domain == domain_pxm[i]) 516 cpus[j].domain = i; 517 } 518 519 return (0); 520 } 521 522 /* 523 * Look for an ACPI System Resource Affinity Table ("SRAT"), 524 * allocate space for cpu information, and initialize globals. 525 */ 526 int 527 acpi_pxm_init(int ncpus, vm_paddr_t maxphys) 528 { 529 unsigned int idx, size; 530 vm_paddr_t addr; 531 532 if (resource_disabled("srat", 0)) 533 return (-1); 534 535 max_cpus = ncpus; 536 last_cpu = -1; 537 maxphyaddr = maxphys; 538 srat_physaddr = acpi_find_table(ACPI_SIG_SRAT); 539 if (srat_physaddr == 0) 540 return (-1); 541 542 /* 543 * Allocate data structure: 544 * 545 * Find the last physical memory region and steal some memory from 546 * it. This is done because at this point in the boot process 547 * malloc is still not usable. 548 */ 549 for (idx = 0; phys_avail[idx + 1] != 0; idx += 2); 550 KASSERT(idx != 0, ("phys_avail is empty!")); 551 idx -= 2; 552 553 size = sizeof(*cpus) * max_cpus; 554 addr = trunc_page(phys_avail[idx + 1] - size); 555 KASSERT(addr >= phys_avail[idx], 556 ("Not enough memory for SRAT table items")); 557 phys_avail[idx + 1] = addr - 1; 558 559 /* 560 * We cannot rely on PHYS_TO_DMAP because this code is also used in 561 * i386, so use pmap_mapbios to map the memory, this will end up using 562 * the default memory attribute (WB), and the DMAP when available. 563 */ 564 cpus = (struct cpu_info *)pmap_mapbios(addr, size); 565 bzero(cpus, size); 566 return (0); 567 } 568 569 static int 570 parse_srat(void) 571 { 572 int error; 573 574 /* 575 * Make a pass over the table to populate the cpus[] and 576 * mem_info[] tables. 577 */ 578 srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT); 579 error = 0; 580 srat_walk_table(srat_parse_entry, &error); 581 acpi_unmap_table(srat); 582 srat = NULL; 583 if (error || check_domains() != 0 || check_phys_avail() != 0 || 584 renumber_domains() != 0) { 585 srat_physaddr = 0; 586 return (-1); 587 } 588 589 return (0); 590 } 591 592 static void 593 init_mem_locality(void) 594 { 595 int i; 596 597 /* 598 * For now, assume -1 == "no locality information for 599 * this pairing. 600 */ 601 for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++) 602 vm_locality_table[i] = -1; 603 } 604 605 /* 606 * Parse SRAT and SLIT to save proximity info. Don't do 607 * anything if SRAT is not available. 608 */ 609 void 610 acpi_pxm_parse_tables(void) 611 { 612 613 if (srat_physaddr == 0) 614 return; 615 if (parse_srat() < 0) 616 return; 617 init_mem_locality(); 618 (void)parse_slit(); 619 } 620 621 /* 622 * Use saved data from SRAT/SLIT to update memory locality. 623 */ 624 void 625 acpi_pxm_set_mem_locality(void) 626 { 627 628 if (srat_physaddr == 0) 629 return; 630 vm_phys_register_domains(ndomain, mem_info, vm_locality_table); 631 } 632 633 static void 634 srat_walk_table(acpi_subtable_handler *handler, void *arg) 635 { 636 637 acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length, 638 handler, arg); 639 } 640 641 /* 642 * Set up per-CPU domain IDs from information saved in 'cpus' and tear down data 643 * structures allocated by acpi_pxm_init(). 644 */ 645 void 646 acpi_pxm_set_cpu_locality(void) 647 { 648 struct cpu_info *cpu; 649 struct pcpu *pc; 650 u_int i; 651 652 if (srat_physaddr == 0) 653 return; 654 for (i = 0; i < MAXCPU; i++) { 655 if (CPU_ABSENT(i)) 656 continue; 657 pc = pcpu_find(i); 658 KASSERT(pc != NULL, ("no pcpu data for CPU %u", i)); 659 cpu = cpu_get_info(pc); 660 pc->pc_domain = vm_ndomains > 1 ? cpu->domain : 0; 661 CPU_SET(i, &cpuset_domain[pc->pc_domain]); 662 if (bootverbose) 663 printf("SRAT: CPU %u has memory domain %d\n", i, 664 pc->pc_domain); 665 } 666 /* XXXMJ the page is leaked. */ 667 pmap_unmapbios(cpus, sizeof(*cpus) * max_cpus); 668 srat_physaddr = 0; 669 cpus = NULL; 670 } 671 672 int 673 acpi_pxm_get_cpu_locality(int apic_id) 674 { 675 struct cpu_info *cpu; 676 677 cpu = cpu_find(apic_id); 678 if (cpu == NULL) 679 panic("SRAT: CPU with ID %u is not known", apic_id); 680 return (cpu->domain); 681 } 682 683 /* 684 * Map a _PXM value to a VM domain ID. 685 * 686 * Returns the domain ID, or -1 if no domain ID was found. 687 */ 688 int 689 acpi_map_pxm_to_vm_domainid(int pxm) 690 { 691 int i; 692 693 for (i = 0; i < ndomain; i++) { 694 if (domain_pxm[i] == pxm) 695 return (vm_ndomains > 1 ? i : 0); 696 } 697 698 return (-1); 699 } 700 701 #else /* MAXMEMDOM == 1 */ 702 703 int 704 acpi_map_pxm_to_vm_domainid(int pxm) 705 { 706 707 return (-1); 708 } 709 710 #endif /* MAXMEMDOM > 1 */ 711