1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2010 Hudson River Trading LLC 5 * Written by: John H. Baldwin <jhb@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_vm.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/bus.h> 38 #include <sys/kernel.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/smp.h> 42 #include <sys/vmmeter.h> 43 #include <vm/vm.h> 44 #include <vm/pmap.h> 45 #include <vm/vm_param.h> 46 #include <vm/vm_page.h> 47 #include <vm/vm_phys.h> 48 49 #include <contrib/dev/acpica/include/acpi.h> 50 #include <contrib/dev/acpica/include/aclocal.h> 51 #include <contrib/dev/acpica/include/actables.h> 52 53 #include <machine/md_var.h> 54 55 #include <dev/acpica/acpivar.h> 56 57 #if MAXMEMDOM > 1 58 static struct cpu_info { 59 int enabled:1; 60 int has_memory:1; 61 int domain; 62 int id; 63 } *cpus; 64 65 static int max_cpus; 66 static int last_cpu; 67 68 struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1]; 69 int num_mem; 70 71 static ACPI_TABLE_SRAT *srat; 72 static vm_paddr_t srat_physaddr; 73 74 static int domain_pxm[MAXMEMDOM]; 75 static int ndomain; 76 static vm_paddr_t maxphyaddr; 77 78 static ACPI_TABLE_SLIT *slit; 79 static vm_paddr_t slit_physaddr; 80 static int vm_locality_table[MAXMEMDOM * MAXMEMDOM]; 81 82 static void srat_walk_table(acpi_subtable_handler *handler, void *arg); 83 84 /* 85 * SLIT parsing. 86 */ 87 88 static void 89 slit_parse_table(ACPI_TABLE_SLIT *s) 90 { 91 int i, j; 92 int i_domain, j_domain; 93 int offset = 0; 94 uint8_t e; 95 96 /* 97 * This maps the SLIT data into the VM-domain centric view. 98 * There may be sparse entries in the PXM namespace, so 99 * remap them to a VM-domain ID and if it doesn't exist, 100 * skip it. 101 * 102 * It should result in a packed 2d array of VM-domain 103 * locality information entries. 104 */ 105 106 if (bootverbose) 107 printf("SLIT.Localities: %d\n", (int) s->LocalityCount); 108 for (i = 0; i < s->LocalityCount; i++) { 109 i_domain = acpi_map_pxm_to_vm_domainid(i); 110 if (i_domain < 0) 111 continue; 112 113 if (bootverbose) 114 printf("%d: ", i); 115 for (j = 0; j < s->LocalityCount; j++) { 116 j_domain = acpi_map_pxm_to_vm_domainid(j); 117 if (j_domain < 0) 118 continue; 119 e = s->Entry[i * s->LocalityCount + j]; 120 if (bootverbose) 121 printf("%d ", (int) e); 122 /* 255 == "no locality information" */ 123 if (e == 255) 124 vm_locality_table[offset] = -1; 125 else 126 vm_locality_table[offset] = e; 127 offset++; 128 } 129 if (bootverbose) 130 printf("\n"); 131 } 132 } 133 134 /* 135 * Look for an ACPI System Locality Distance Information Table ("SLIT") 136 */ 137 static int 138 parse_slit(void) 139 { 140 141 if (resource_disabled("slit", 0)) { 142 return (-1); 143 } 144 145 slit_physaddr = acpi_find_table(ACPI_SIG_SLIT); 146 if (slit_physaddr == 0) { 147 return (-1); 148 } 149 150 /* 151 * Make a pass over the table to populate the cpus[] and 152 * mem_info[] tables. 153 */ 154 slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT); 155 slit_parse_table(slit); 156 acpi_unmap_table(slit); 157 slit = NULL; 158 159 return (0); 160 } 161 162 /* 163 * SRAT parsing. 164 */ 165 166 /* 167 * Returns true if a memory range overlaps with at least one range in 168 * phys_avail[]. 169 */ 170 static int 171 overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end) 172 { 173 int i; 174 175 for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) { 176 if (phys_avail[i + 1] <= start) 177 continue; 178 if (phys_avail[i] < end) 179 return (1); 180 break; 181 } 182 return (0); 183 } 184 185 /* 186 * On x86 we can use the cpuid to index the cpus array, but on arm64 187 * we have an ACPI Processor UID with a larger range. 188 * 189 * Use this variable to indicate if the cpus can be stored by index. 190 */ 191 #ifdef __aarch64__ 192 static const int cpus_use_indexing = 0; 193 #else 194 static const int cpus_use_indexing = 1; 195 #endif 196 197 /* 198 * Find CPU by processor ID (APIC ID on x86, Processor UID on arm64) 199 */ 200 static struct cpu_info * 201 cpu_find(int cpuid) 202 { 203 int i; 204 205 if (cpus_use_indexing) { 206 if (cpuid <= last_cpu && cpus[cpuid].enabled) 207 return (&cpus[cpuid]); 208 } else { 209 for (i = 0; i <= last_cpu; i++) 210 if (cpus[i].id == cpuid) 211 return (&cpus[i]); 212 } 213 return (NULL); 214 } 215 216 /* 217 * Find CPU by pcpu pointer. 218 */ 219 static struct cpu_info * 220 cpu_get_info(struct pcpu *pc) 221 { 222 struct cpu_info *cpup; 223 int id; 224 225 #ifdef __aarch64__ 226 id = pc->pc_acpi_id; 227 #else 228 id = pc->pc_apic_id; 229 #endif 230 cpup = cpu_find(id); 231 if (cpup == NULL) 232 panic("SRAT: CPU with ID %u is not known", id); 233 return (cpup); 234 } 235 236 /* 237 * Add proximity information for a new CPU. 238 */ 239 static struct cpu_info * 240 cpu_add(int cpuid, int domain) 241 { 242 struct cpu_info *cpup; 243 244 if (cpus_use_indexing) { 245 if (cpuid >= max_cpus) 246 return (NULL); 247 last_cpu = imax(last_cpu, cpuid); 248 cpup = &cpus[cpuid]; 249 } else { 250 if (last_cpu >= max_cpus - 1) 251 return (NULL); 252 cpup = &cpus[++last_cpu]; 253 } 254 cpup->domain = domain; 255 cpup->id = cpuid; 256 cpup->enabled = 1; 257 return (cpup); 258 } 259 260 static void 261 srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg) 262 { 263 ACPI_SRAT_CPU_AFFINITY *cpu; 264 ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic; 265 ACPI_SRAT_MEM_AFFINITY *mem; 266 ACPI_SRAT_GICC_AFFINITY *gicc; 267 static struct cpu_info *cpup; 268 int domain, i, slot; 269 270 switch (entry->Type) { 271 case ACPI_SRAT_TYPE_CPU_AFFINITY: 272 cpu = (ACPI_SRAT_CPU_AFFINITY *)entry; 273 domain = cpu->ProximityDomainLo | 274 cpu->ProximityDomainHi[0] << 8 | 275 cpu->ProximityDomainHi[1] << 16 | 276 cpu->ProximityDomainHi[2] << 24; 277 if (bootverbose) 278 printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", 279 cpu->ApicId, domain, 280 (cpu->Flags & ACPI_SRAT_CPU_ENABLED) ? 281 "enabled" : "disabled"); 282 if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED)) 283 break; 284 cpup = cpu_find(cpu->ApicId); 285 if (cpup != NULL) { 286 printf("SRAT: Duplicate local APIC ID %u\n", 287 cpu->ApicId); 288 *(int *)arg = ENXIO; 289 break; 290 } 291 cpup = cpu_add(cpu->ApicId, domain); 292 if (cpup == NULL) 293 printf("SRAT: Ignoring local APIC ID %u (too high)\n", 294 cpu->ApicId); 295 break; 296 case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: 297 x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry; 298 if (bootverbose) 299 printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", 300 x2apic->ApicId, x2apic->ProximityDomain, 301 (x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ? 302 "enabled" : "disabled"); 303 if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED)) 304 break; 305 KASSERT(cpu_find(x2apic->ApicId) == NULL, 306 ("Duplicate local APIC ID %u", x2apic->ApicId)); 307 cpup = cpu_add(x2apic->ApicId, x2apic->ProximityDomain); 308 if (cpup == NULL) 309 printf("SRAT: Ignoring local APIC ID %u (too high)\n", 310 x2apic->ApicId); 311 break; 312 case ACPI_SRAT_TYPE_GICC_AFFINITY: 313 gicc = (ACPI_SRAT_GICC_AFFINITY *)entry; 314 if (bootverbose) 315 printf("SRAT: Found CPU UID %u domain %d: %s\n", 316 gicc->AcpiProcessorUid, gicc->ProximityDomain, 317 (gicc->Flags & ACPI_SRAT_GICC_ENABLED) ? 318 "enabled" : "disabled"); 319 if (!(gicc->Flags & ACPI_SRAT_GICC_ENABLED)) 320 break; 321 KASSERT(cpu_find(gicc->AcpiProcessorUid) == NULL, 322 ("Duplicate CPU UID %u", gicc->AcpiProcessorUid)); 323 cpup = cpu_add(gicc->AcpiProcessorUid, gicc->ProximityDomain); 324 if (cpup == NULL) 325 printf("SRAT: Ignoring CPU UID %u (too high)\n", 326 gicc->AcpiProcessorUid); 327 break; 328 case ACPI_SRAT_TYPE_MEMORY_AFFINITY: 329 mem = (ACPI_SRAT_MEM_AFFINITY *)entry; 330 if (bootverbose) 331 printf( 332 "SRAT: Found memory domain %d addr 0x%jx len 0x%jx: %s\n", 333 mem->ProximityDomain, (uintmax_t)mem->BaseAddress, 334 (uintmax_t)mem->Length, 335 (mem->Flags & ACPI_SRAT_MEM_ENABLED) ? 336 "enabled" : "disabled"); 337 if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED)) 338 break; 339 if (mem->BaseAddress >= maxphyaddr || 340 !overlaps_phys_avail(mem->BaseAddress, 341 mem->BaseAddress + mem->Length)) { 342 printf("SRAT: Ignoring memory at addr 0x%jx\n", 343 (uintmax_t)mem->BaseAddress); 344 break; 345 } 346 if (num_mem == VM_PHYSSEG_MAX) { 347 printf("SRAT: Too many memory regions\n"); 348 *(int *)arg = ENXIO; 349 break; 350 } 351 slot = num_mem; 352 for (i = 0; i < num_mem; i++) { 353 if (mem_info[i].end <= mem->BaseAddress) 354 continue; 355 if (mem_info[i].start < 356 (mem->BaseAddress + mem->Length)) { 357 printf("SRAT: Overlapping memory entries\n"); 358 *(int *)arg = ENXIO; 359 return; 360 } 361 slot = i; 362 } 363 for (i = num_mem; i > slot; i--) 364 mem_info[i] = mem_info[i - 1]; 365 mem_info[slot].start = mem->BaseAddress; 366 mem_info[slot].end = mem->BaseAddress + mem->Length; 367 mem_info[slot].domain = mem->ProximityDomain; 368 num_mem++; 369 break; 370 } 371 } 372 373 /* 374 * Ensure each memory domain has at least one CPU and that each CPU 375 * has at least one memory domain. 376 */ 377 static int 378 check_domains(void) 379 { 380 int found, i, j; 381 382 for (i = 0; i < num_mem; i++) { 383 found = 0; 384 for (j = 0; j <= last_cpu; j++) 385 if (cpus[j].enabled && 386 cpus[j].domain == mem_info[i].domain) { 387 cpus[j].has_memory = 1; 388 found++; 389 } 390 if (!found) { 391 printf("SRAT: No CPU found for memory domain %d\n", 392 mem_info[i].domain); 393 return (ENXIO); 394 } 395 } 396 for (i = 0; i <= last_cpu; i++) 397 if (cpus[i].enabled && !cpus[i].has_memory) { 398 found = 0; 399 for (j = 0; j < num_mem && !found; j++) { 400 if (mem_info[j].domain == cpus[i].domain) 401 found = 1; 402 } 403 if (!found) { 404 if (bootverbose) 405 printf("SRAT: mem dom %d is empty\n", 406 cpus[i].domain); 407 mem_info[num_mem].start = 0; 408 mem_info[num_mem].end = 0; 409 mem_info[num_mem].domain = cpus[i].domain; 410 num_mem++; 411 } 412 } 413 return (0); 414 } 415 416 /* 417 * Check that the SRAT memory regions cover all of the regions in 418 * phys_avail[]. 419 */ 420 static int 421 check_phys_avail(void) 422 { 423 vm_paddr_t address; 424 int i, j; 425 426 /* j is the current offset into phys_avail[]. */ 427 address = phys_avail[0]; 428 j = 0; 429 for (i = 0; i < num_mem; i++) { 430 /* 431 * Consume as many phys_avail[] entries as fit in this 432 * region. 433 */ 434 while (address >= mem_info[i].start && 435 address <= mem_info[i].end) { 436 /* 437 * If we cover the rest of this phys_avail[] entry, 438 * advance to the next entry. 439 */ 440 if (phys_avail[j + 1] <= mem_info[i].end) { 441 j += 2; 442 if (phys_avail[j] == 0 && 443 phys_avail[j + 1] == 0) { 444 return (0); 445 } 446 address = phys_avail[j]; 447 } else 448 address = mem_info[i].end + 1; 449 } 450 } 451 printf("SRAT: No memory region found for 0x%jx - 0x%jx\n", 452 (uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]); 453 return (ENXIO); 454 } 455 456 /* 457 * Renumber the memory domains to be compact and zero-based if not 458 * already. Returns an error if there are too many domains. 459 */ 460 static int 461 renumber_domains(void) 462 { 463 int i, j, slot; 464 465 /* Enumerate all the domains. */ 466 ndomain = 0; 467 for (i = 0; i < num_mem; i++) { 468 /* See if this domain is already known. */ 469 for (j = 0; j < ndomain; j++) { 470 if (domain_pxm[j] >= mem_info[i].domain) 471 break; 472 } 473 if (j < ndomain && domain_pxm[j] == mem_info[i].domain) 474 continue; 475 476 if (ndomain >= MAXMEMDOM) { 477 ndomain = 1; 478 printf("SRAT: Too many memory domains\n"); 479 return (EFBIG); 480 } 481 482 /* Insert the new domain at slot 'j'. */ 483 slot = j; 484 for (j = ndomain; j > slot; j--) 485 domain_pxm[j] = domain_pxm[j - 1]; 486 domain_pxm[slot] = mem_info[i].domain; 487 ndomain++; 488 } 489 490 /* Renumber each domain to its index in the sorted 'domain_pxm' list. */ 491 for (i = 0; i < ndomain; i++) { 492 /* 493 * If the domain is already the right value, no need 494 * to renumber. 495 */ 496 if (domain_pxm[i] == i) 497 continue; 498 499 /* Walk the cpu[] and mem_info[] arrays to renumber. */ 500 for (j = 0; j < num_mem; j++) 501 if (mem_info[j].domain == domain_pxm[i]) 502 mem_info[j].domain = i; 503 for (j = 0; j <= last_cpu; j++) 504 if (cpus[j].enabled && cpus[j].domain == domain_pxm[i]) 505 cpus[j].domain = i; 506 } 507 508 return (0); 509 } 510 511 /* 512 * Look for an ACPI System Resource Affinity Table ("SRAT"), 513 * allocate space for cpu information, and initialize globals. 514 */ 515 int 516 acpi_pxm_init(int ncpus, vm_paddr_t maxphys) 517 { 518 unsigned int idx, size; 519 vm_paddr_t addr; 520 521 if (resource_disabled("srat", 0)) 522 return (-1); 523 524 max_cpus = ncpus; 525 last_cpu = -1; 526 maxphyaddr = maxphys; 527 srat_physaddr = acpi_find_table(ACPI_SIG_SRAT); 528 if (srat_physaddr == 0) 529 return (-1); 530 531 /* 532 * Allocate data structure: 533 * 534 * Find the last physical memory region and steal some memory from 535 * it. This is done because at this point in the boot process 536 * malloc is still not usable. 537 */ 538 for (idx = 0; phys_avail[idx + 1] != 0; idx += 2); 539 KASSERT(idx != 0, ("phys_avail is empty!")); 540 idx -= 2; 541 542 size = sizeof(*cpus) * max_cpus; 543 addr = trunc_page(phys_avail[idx + 1] - size); 544 KASSERT(addr >= phys_avail[idx], 545 ("Not enough memory for SRAT table items")); 546 phys_avail[idx + 1] = addr - 1; 547 548 /* 549 * We cannot rely on PHYS_TO_DMAP because this code is also used in 550 * i386, so use pmap_mapbios to map the memory, this will end up using 551 * the default memory attribute (WB), and the DMAP when available. 552 */ 553 cpus = (struct cpu_info *)pmap_mapbios(addr, size); 554 bzero(cpus, size); 555 return (0); 556 } 557 558 static int 559 parse_srat(void) 560 { 561 int error; 562 563 /* 564 * Make a pass over the table to populate the cpus[] and 565 * mem_info[] tables. 566 */ 567 srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT); 568 error = 0; 569 srat_walk_table(srat_parse_entry, &error); 570 acpi_unmap_table(srat); 571 srat = NULL; 572 if (error || check_domains() != 0 || check_phys_avail() != 0 || 573 renumber_domains() != 0) { 574 srat_physaddr = 0; 575 return (-1); 576 } 577 578 return (0); 579 } 580 581 static void 582 init_mem_locality(void) 583 { 584 int i; 585 586 /* 587 * For now, assume -1 == "no locality information for 588 * this pairing. 589 */ 590 for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++) 591 vm_locality_table[i] = -1; 592 } 593 594 /* 595 * Parse SRAT and SLIT to save proximity info. Don't do 596 * anything if SRAT is not available. 597 */ 598 void 599 acpi_pxm_parse_tables(void) 600 { 601 602 if (srat_physaddr == 0) 603 return; 604 if (parse_srat() < 0) 605 return; 606 init_mem_locality(); 607 (void)parse_slit(); 608 } 609 610 /* 611 * Use saved data from SRAT/SLIT to update memory locality. 612 */ 613 void 614 acpi_pxm_set_mem_locality(void) 615 { 616 617 if (srat_physaddr == 0) 618 return; 619 vm_phys_register_domains(ndomain, mem_info, vm_locality_table); 620 } 621 622 static void 623 srat_walk_table(acpi_subtable_handler *handler, void *arg) 624 { 625 626 acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length, 627 handler, arg); 628 } 629 630 /* 631 * Setup per-CPU domain IDs from information saved in 'cpus'. 632 */ 633 void 634 acpi_pxm_set_cpu_locality(void) 635 { 636 struct cpu_info *cpu; 637 struct pcpu *pc; 638 u_int i; 639 640 if (srat_physaddr == 0) 641 return; 642 for (i = 0; i < MAXCPU; i++) { 643 if (CPU_ABSENT(i)) 644 continue; 645 pc = pcpu_find(i); 646 KASSERT(pc != NULL, ("no pcpu data for CPU %u", i)); 647 cpu = cpu_get_info(pc); 648 pc->pc_domain = vm_ndomains > 1 ? cpu->domain : 0; 649 CPU_SET(i, &cpuset_domain[pc->pc_domain]); 650 if (bootverbose) 651 printf("SRAT: CPU %u has memory domain %d\n", i, 652 pc->pc_domain); 653 } 654 } 655 656 /* 657 * Free data structures allocated during acpi_pxm_init. 658 */ 659 void 660 acpi_pxm_free(void) 661 { 662 663 if (srat_physaddr == 0) 664 return; 665 pmap_unmapbios((vm_offset_t)cpus, sizeof(*cpus) * max_cpus); 666 srat_physaddr = 0; 667 cpus = NULL; 668 } 669 670 /* 671 * Map a _PXM value to a VM domain ID. 672 * 673 * Returns the domain ID, or -1 if no domain ID was found. 674 */ 675 int 676 acpi_map_pxm_to_vm_domainid(int pxm) 677 { 678 int i; 679 680 for (i = 0; i < ndomain; i++) { 681 if (domain_pxm[i] == pxm) 682 return (vm_ndomains > 1 ? i : 0); 683 } 684 685 return (-1); 686 } 687 688 #else /* MAXMEMDOM == 1 */ 689 690 int 691 acpi_map_pxm_to_vm_domainid(int pxm) 692 { 693 694 return (-1); 695 } 696 697 #endif /* MAXMEMDOM > 1 */ 698