19417fa9eSJayachandran C. /*- 29417fa9eSJayachandran C. * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 39417fa9eSJayachandran C. * 49417fa9eSJayachandran C. * Copyright (c) 2010 Hudson River Trading LLC 59417fa9eSJayachandran C. * Written by: John H. Baldwin <jhb@FreeBSD.org> 69417fa9eSJayachandran C. * All rights reserved. 79417fa9eSJayachandran C. * 89417fa9eSJayachandran C. * Redistribution and use in source and binary forms, with or without 99417fa9eSJayachandran C. * modification, are permitted provided that the following conditions 109417fa9eSJayachandran C. * are met: 119417fa9eSJayachandran C. * 1. Redistributions of source code must retain the above copyright 129417fa9eSJayachandran C. * notice, this list of conditions and the following disclaimer. 139417fa9eSJayachandran C. * 2. Redistributions in binary form must reproduce the above copyright 149417fa9eSJayachandran C. * notice, this list of conditions and the following disclaimer in the 159417fa9eSJayachandran C. * documentation and/or other materials provided with the distribution. 169417fa9eSJayachandran C. * 179417fa9eSJayachandran C. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 189417fa9eSJayachandran C. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 199417fa9eSJayachandran C. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 209417fa9eSJayachandran C. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 219417fa9eSJayachandran C. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 229417fa9eSJayachandran C. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 239417fa9eSJayachandran C. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 249417fa9eSJayachandran C. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 259417fa9eSJayachandran C. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 269417fa9eSJayachandran C. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 279417fa9eSJayachandran C. * SUCH DAMAGE. 289417fa9eSJayachandran C. */ 299417fa9eSJayachandran C. 309417fa9eSJayachandran C. #include <sys/cdefs.h> 319417fa9eSJayachandran C. __FBSDID("$FreeBSD$"); 329417fa9eSJayachandran C. 339417fa9eSJayachandran C. #include "opt_vm.h" 349417fa9eSJayachandran C. 359417fa9eSJayachandran C. #include <sys/param.h> 369417fa9eSJayachandran C. #include <sys/systm.h> 379417fa9eSJayachandran C. #include <sys/bus.h> 389417fa9eSJayachandran C. #include <sys/kernel.h> 399417fa9eSJayachandran C. #include <sys/lock.h> 409417fa9eSJayachandran C. #include <sys/mutex.h> 419417fa9eSJayachandran C. #include <sys/smp.h> 429417fa9eSJayachandran C. #include <sys/vmmeter.h> 439417fa9eSJayachandran C. #include <vm/vm.h> 449417fa9eSJayachandran C. #include <vm/pmap.h> 459417fa9eSJayachandran C. #include <vm/vm_param.h> 469417fa9eSJayachandran C. #include <vm/vm_page.h> 479417fa9eSJayachandran C. #include <vm/vm_phys.h> 489417fa9eSJayachandran C. 499417fa9eSJayachandran C. #include <contrib/dev/acpica/include/acpi.h> 509417fa9eSJayachandran C. #include <contrib/dev/acpica/include/aclocal.h> 519417fa9eSJayachandran C. #include <contrib/dev/acpica/include/actables.h> 529417fa9eSJayachandran C. 539417fa9eSJayachandran C. #include <machine/md_var.h> 549417fa9eSJayachandran C. 559417fa9eSJayachandran C. #include <dev/acpica/acpivar.h> 569417fa9eSJayachandran C. 579417fa9eSJayachandran C. #if MAXMEMDOM > 1 589417fa9eSJayachandran C. static struct cpu_info { 599417fa9eSJayachandran C. int enabled:1; 609417fa9eSJayachandran C. int has_memory:1; 619417fa9eSJayachandran C. int domain; 62*13aacaeeSJayachandran C. int id; 639417fa9eSJayachandran C. } *cpus; 649417fa9eSJayachandran C. 659417fa9eSJayachandran C. static int max_cpus; 669417fa9eSJayachandran C. static int last_cpu; 679417fa9eSJayachandran C. 689417fa9eSJayachandran C. struct mem_affinity mem_info[VM_PHYSSEG_MAX + 1]; 699417fa9eSJayachandran C. int num_mem; 709417fa9eSJayachandran C. 719417fa9eSJayachandran C. static ACPI_TABLE_SRAT *srat; 729417fa9eSJayachandran C. static vm_paddr_t srat_physaddr; 739417fa9eSJayachandran C. 749417fa9eSJayachandran C. static int domain_pxm[MAXMEMDOM]; 759417fa9eSJayachandran C. static int ndomain; 769417fa9eSJayachandran C. static vm_paddr_t maxphyaddr; 779417fa9eSJayachandran C. 789417fa9eSJayachandran C. static ACPI_TABLE_SLIT *slit; 799417fa9eSJayachandran C. static vm_paddr_t slit_physaddr; 809417fa9eSJayachandran C. static int vm_locality_table[MAXMEMDOM * MAXMEMDOM]; 819417fa9eSJayachandran C. 829417fa9eSJayachandran C. static void srat_walk_table(acpi_subtable_handler *handler, void *arg); 839417fa9eSJayachandran C. 849417fa9eSJayachandran C. /* 859417fa9eSJayachandran C. * SLIT parsing. 869417fa9eSJayachandran C. */ 879417fa9eSJayachandran C. 889417fa9eSJayachandran C. static void 899417fa9eSJayachandran C. slit_parse_table(ACPI_TABLE_SLIT *s) 909417fa9eSJayachandran C. { 919417fa9eSJayachandran C. int i, j; 929417fa9eSJayachandran C. int i_domain, j_domain; 939417fa9eSJayachandran C. int offset = 0; 949417fa9eSJayachandran C. uint8_t e; 959417fa9eSJayachandran C. 969417fa9eSJayachandran C. /* 979417fa9eSJayachandran C. * This maps the SLIT data into the VM-domain centric view. 989417fa9eSJayachandran C. * There may be sparse entries in the PXM namespace, so 999417fa9eSJayachandran C. * remap them to a VM-domain ID and if it doesn't exist, 1009417fa9eSJayachandran C. * skip it. 1019417fa9eSJayachandran C. * 1029417fa9eSJayachandran C. * It should result in a packed 2d array of VM-domain 1039417fa9eSJayachandran C. * locality information entries. 1049417fa9eSJayachandran C. */ 1059417fa9eSJayachandran C. 1069417fa9eSJayachandran C. if (bootverbose) 1079417fa9eSJayachandran C. printf("SLIT.Localities: %d\n", (int) s->LocalityCount); 1089417fa9eSJayachandran C. for (i = 0; i < s->LocalityCount; i++) { 1099417fa9eSJayachandran C. i_domain = acpi_map_pxm_to_vm_domainid(i); 1109417fa9eSJayachandran C. if (i_domain < 0) 1119417fa9eSJayachandran C. continue; 1129417fa9eSJayachandran C. 1139417fa9eSJayachandran C. if (bootverbose) 1149417fa9eSJayachandran C. printf("%d: ", i); 1159417fa9eSJayachandran C. for (j = 0; j < s->LocalityCount; j++) { 1169417fa9eSJayachandran C. j_domain = acpi_map_pxm_to_vm_domainid(j); 1179417fa9eSJayachandran C. if (j_domain < 0) 1189417fa9eSJayachandran C. continue; 1199417fa9eSJayachandran C. e = s->Entry[i * s->LocalityCount + j]; 1209417fa9eSJayachandran C. if (bootverbose) 1219417fa9eSJayachandran C. printf("%d ", (int) e); 1229417fa9eSJayachandran C. /* 255 == "no locality information" */ 1239417fa9eSJayachandran C. if (e == 255) 1249417fa9eSJayachandran C. vm_locality_table[offset] = -1; 1259417fa9eSJayachandran C. else 1269417fa9eSJayachandran C. vm_locality_table[offset] = e; 1279417fa9eSJayachandran C. offset++; 1289417fa9eSJayachandran C. } 1299417fa9eSJayachandran C. if (bootverbose) 1309417fa9eSJayachandran C. printf("\n"); 1319417fa9eSJayachandran C. } 1329417fa9eSJayachandran C. } 1339417fa9eSJayachandran C. 1349417fa9eSJayachandran C. /* 1359417fa9eSJayachandran C. * Look for an ACPI System Locality Distance Information Table ("SLIT") 1369417fa9eSJayachandran C. */ 1379417fa9eSJayachandran C. static int 1389417fa9eSJayachandran C. parse_slit(void) 1399417fa9eSJayachandran C. { 1409417fa9eSJayachandran C. 1419417fa9eSJayachandran C. if (resource_disabled("slit", 0)) { 1429417fa9eSJayachandran C. return (-1); 1439417fa9eSJayachandran C. } 1449417fa9eSJayachandran C. 1459417fa9eSJayachandran C. slit_physaddr = acpi_find_table(ACPI_SIG_SLIT); 1469417fa9eSJayachandran C. if (slit_physaddr == 0) { 1479417fa9eSJayachandran C. return (-1); 1489417fa9eSJayachandran C. } 1499417fa9eSJayachandran C. 1509417fa9eSJayachandran C. /* 1519417fa9eSJayachandran C. * Make a pass over the table to populate the cpus[] and 1529417fa9eSJayachandran C. * mem_info[] tables. 1539417fa9eSJayachandran C. */ 1549417fa9eSJayachandran C. slit = acpi_map_table(slit_physaddr, ACPI_SIG_SLIT); 1559417fa9eSJayachandran C. slit_parse_table(slit); 1569417fa9eSJayachandran C. acpi_unmap_table(slit); 1579417fa9eSJayachandran C. slit = NULL; 1589417fa9eSJayachandran C. 1599417fa9eSJayachandran C. return (0); 1609417fa9eSJayachandran C. } 1619417fa9eSJayachandran C. 1629417fa9eSJayachandran C. /* 1639417fa9eSJayachandran C. * SRAT parsing. 1649417fa9eSJayachandran C. */ 1659417fa9eSJayachandran C. 1669417fa9eSJayachandran C. /* 1679417fa9eSJayachandran C. * Returns true if a memory range overlaps with at least one range in 1689417fa9eSJayachandran C. * phys_avail[]. 1699417fa9eSJayachandran C. */ 1709417fa9eSJayachandran C. static int 1719417fa9eSJayachandran C. overlaps_phys_avail(vm_paddr_t start, vm_paddr_t end) 1729417fa9eSJayachandran C. { 1739417fa9eSJayachandran C. int i; 1749417fa9eSJayachandran C. 1759417fa9eSJayachandran C. for (i = 0; phys_avail[i] != 0 && phys_avail[i + 1] != 0; i += 2) { 1769417fa9eSJayachandran C. if (phys_avail[i + 1] <= start) 1779417fa9eSJayachandran C. continue; 1789417fa9eSJayachandran C. if (phys_avail[i] < end) 1799417fa9eSJayachandran C. return (1); 1809417fa9eSJayachandran C. break; 1819417fa9eSJayachandran C. } 1829417fa9eSJayachandran C. return (0); 1839417fa9eSJayachandran C. } 1849417fa9eSJayachandran C. 1859417fa9eSJayachandran C. /* 186*13aacaeeSJayachandran C. * On x86 we can use the cpuid to index the cpus array, but on arm64 187*13aacaeeSJayachandran C. * we have an ACPI Processor UID with a larger range. 188*13aacaeeSJayachandran C. * 189*13aacaeeSJayachandran C. * Use this variable to indicate if the cpus can be stored by index. 190*13aacaeeSJayachandran C. */ 191*13aacaeeSJayachandran C. #ifdef __aarch64__ 192*13aacaeeSJayachandran C. static const int cpus_use_indexing = 0; 193*13aacaeeSJayachandran C. #else 194*13aacaeeSJayachandran C. static const int cpus_use_indexing = 1; 195*13aacaeeSJayachandran C. #endif 196*13aacaeeSJayachandran C. 197*13aacaeeSJayachandran C. /* 198*13aacaeeSJayachandran C. * Find CPU by processor ID (APIC ID on x86, Processor UID on arm64) 1999417fa9eSJayachandran C. */ 2009417fa9eSJayachandran C. static struct cpu_info * 2019417fa9eSJayachandran C. cpu_find(int cpuid) 2029417fa9eSJayachandran C. { 203*13aacaeeSJayachandran C. int i; 2049417fa9eSJayachandran C. 205*13aacaeeSJayachandran C. if (cpus_use_indexing) { 2069417fa9eSJayachandran C. if (cpuid <= last_cpu && cpus[cpuid].enabled) 2079417fa9eSJayachandran C. return (&cpus[cpuid]); 208*13aacaeeSJayachandran C. } else { 209*13aacaeeSJayachandran C. for (i = 0; i <= last_cpu; i++) 210*13aacaeeSJayachandran C. if (cpus[i].id == cpuid) 211*13aacaeeSJayachandran C. return (&cpus[i]); 212*13aacaeeSJayachandran C. } 2139417fa9eSJayachandran C. return (NULL); 2149417fa9eSJayachandran C. } 2159417fa9eSJayachandran C. 2169417fa9eSJayachandran C. /* 2179417fa9eSJayachandran C. * Find CPU by pcpu pointer. 2189417fa9eSJayachandran C. */ 2199417fa9eSJayachandran C. static struct cpu_info * 2209417fa9eSJayachandran C. cpu_get_info(struct pcpu *pc) 2219417fa9eSJayachandran C. { 2229417fa9eSJayachandran C. struct cpu_info *cpup; 2239417fa9eSJayachandran C. int id; 2249417fa9eSJayachandran C. 225*13aacaeeSJayachandran C. #ifdef __aarch64__ 226*13aacaeeSJayachandran C. id = pc->pc_acpi_id; 227*13aacaeeSJayachandran C. #else 2289417fa9eSJayachandran C. id = pc->pc_apic_id; 229*13aacaeeSJayachandran C. #endif 2309417fa9eSJayachandran C. cpup = cpu_find(id); 2319417fa9eSJayachandran C. if (cpup == NULL) 232*13aacaeeSJayachandran C. panic("SRAT: CPU with ID %u is not known", id); 2339417fa9eSJayachandran C. return (cpup); 2349417fa9eSJayachandran C. } 2359417fa9eSJayachandran C. 2369417fa9eSJayachandran C. /* 2379417fa9eSJayachandran C. * Add proximity information for a new CPU. 2389417fa9eSJayachandran C. */ 2399417fa9eSJayachandran C. static struct cpu_info * 2409417fa9eSJayachandran C. cpu_add(int cpuid, int domain) 2419417fa9eSJayachandran C. { 2429417fa9eSJayachandran C. struct cpu_info *cpup; 2439417fa9eSJayachandran C. 244*13aacaeeSJayachandran C. if (cpus_use_indexing) { 2459417fa9eSJayachandran C. if (cpuid >= max_cpus) 2469417fa9eSJayachandran C. return (NULL); 2479417fa9eSJayachandran C. last_cpu = imax(last_cpu, cpuid); 2489417fa9eSJayachandran C. cpup = &cpus[cpuid]; 249*13aacaeeSJayachandran C. } else { 250*13aacaeeSJayachandran C. if (last_cpu >= max_cpus - 1) 251*13aacaeeSJayachandran C. return (NULL); 252*13aacaeeSJayachandran C. cpup = &cpus[++last_cpu]; 253*13aacaeeSJayachandran C. } 2549417fa9eSJayachandran C. cpup->domain = domain; 255*13aacaeeSJayachandran C. cpup->id = cpuid; 2569417fa9eSJayachandran C. cpup->enabled = 1; 2579417fa9eSJayachandran C. return (cpup); 2589417fa9eSJayachandran C. } 2599417fa9eSJayachandran C. 2609417fa9eSJayachandran C. static void 2619417fa9eSJayachandran C. srat_parse_entry(ACPI_SUBTABLE_HEADER *entry, void *arg) 2629417fa9eSJayachandran C. { 2639417fa9eSJayachandran C. ACPI_SRAT_CPU_AFFINITY *cpu; 2649417fa9eSJayachandran C. ACPI_SRAT_X2APIC_CPU_AFFINITY *x2apic; 2659417fa9eSJayachandran C. ACPI_SRAT_MEM_AFFINITY *mem; 266*13aacaeeSJayachandran C. ACPI_SRAT_GICC_AFFINITY *gicc; 2679417fa9eSJayachandran C. static struct cpu_info *cpup; 2689417fa9eSJayachandran C. int domain, i, slot; 2699417fa9eSJayachandran C. 2709417fa9eSJayachandran C. switch (entry->Type) { 2719417fa9eSJayachandran C. case ACPI_SRAT_TYPE_CPU_AFFINITY: 2729417fa9eSJayachandran C. cpu = (ACPI_SRAT_CPU_AFFINITY *)entry; 2739417fa9eSJayachandran C. domain = cpu->ProximityDomainLo | 2749417fa9eSJayachandran C. cpu->ProximityDomainHi[0] << 8 | 2759417fa9eSJayachandran C. cpu->ProximityDomainHi[1] << 16 | 2769417fa9eSJayachandran C. cpu->ProximityDomainHi[2] << 24; 2779417fa9eSJayachandran C. if (bootverbose) 2789417fa9eSJayachandran C. printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", 2799417fa9eSJayachandran C. cpu->ApicId, domain, 2809417fa9eSJayachandran C. (cpu->Flags & ACPI_SRAT_CPU_ENABLED) ? 2819417fa9eSJayachandran C. "enabled" : "disabled"); 2829417fa9eSJayachandran C. if (!(cpu->Flags & ACPI_SRAT_CPU_ENABLED)) 2839417fa9eSJayachandran C. break; 2849417fa9eSJayachandran C. cpup = cpu_find(cpu->ApicId); 2859417fa9eSJayachandran C. if (cpup != NULL) { 2869417fa9eSJayachandran C. printf("SRAT: Duplicate local APIC ID %u\n", 2879417fa9eSJayachandran C. cpu->ApicId); 2889417fa9eSJayachandran C. *(int *)arg = ENXIO; 2899417fa9eSJayachandran C. break; 2909417fa9eSJayachandran C. } 2919417fa9eSJayachandran C. cpup = cpu_add(cpu->ApicId, domain); 2929417fa9eSJayachandran C. if (cpup == NULL) 2939417fa9eSJayachandran C. printf("SRAT: Ignoring local APIC ID %u (too high)\n", 2949417fa9eSJayachandran C. cpu->ApicId); 2959417fa9eSJayachandran C. break; 2969417fa9eSJayachandran C. case ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY: 2979417fa9eSJayachandran C. x2apic = (ACPI_SRAT_X2APIC_CPU_AFFINITY *)entry; 2989417fa9eSJayachandran C. if (bootverbose) 2999417fa9eSJayachandran C. printf("SRAT: Found CPU APIC ID %u domain %d: %s\n", 3009417fa9eSJayachandran C. x2apic->ApicId, x2apic->ProximityDomain, 3019417fa9eSJayachandran C. (x2apic->Flags & ACPI_SRAT_CPU_ENABLED) ? 3029417fa9eSJayachandran C. "enabled" : "disabled"); 3039417fa9eSJayachandran C. if (!(x2apic->Flags & ACPI_SRAT_CPU_ENABLED)) 3049417fa9eSJayachandran C. break; 3059417fa9eSJayachandran C. KASSERT(cpu_find(x2apic->ApicId) == NULL, 3069417fa9eSJayachandran C. ("Duplicate local APIC ID %u", x2apic->ApicId)); 3079417fa9eSJayachandran C. cpup = cpu_add(x2apic->ApicId, x2apic->ProximityDomain); 3089417fa9eSJayachandran C. if (cpup == NULL) 3099417fa9eSJayachandran C. printf("SRAT: Ignoring local APIC ID %u (too high)\n", 3109417fa9eSJayachandran C. x2apic->ApicId); 3119417fa9eSJayachandran C. break; 312*13aacaeeSJayachandran C. case ACPI_SRAT_TYPE_GICC_AFFINITY: 313*13aacaeeSJayachandran C. gicc = (ACPI_SRAT_GICC_AFFINITY *)entry; 314*13aacaeeSJayachandran C. if (bootverbose) 315*13aacaeeSJayachandran C. printf("SRAT: Found CPU UID %u domain %d: %s\n", 316*13aacaeeSJayachandran C. gicc->AcpiProcessorUid, gicc->ProximityDomain, 317*13aacaeeSJayachandran C. (gicc->Flags & ACPI_SRAT_GICC_ENABLED) ? 318*13aacaeeSJayachandran C. "enabled" : "disabled"); 319*13aacaeeSJayachandran C. if (!(gicc->Flags & ACPI_SRAT_GICC_ENABLED)) 320*13aacaeeSJayachandran C. break; 321*13aacaeeSJayachandran C. KASSERT(cpu_find(gicc->AcpiProcessorUid) == NULL, 322*13aacaeeSJayachandran C. ("Duplicate CPU UID %u", gicc->AcpiProcessorUid)); 323*13aacaeeSJayachandran C. cpup = cpu_add(gicc->AcpiProcessorUid, gicc->ProximityDomain); 324*13aacaeeSJayachandran C. if (cpup == NULL) 325*13aacaeeSJayachandran C. printf("SRAT: Ignoring CPU UID %u (too high)\n", 326*13aacaeeSJayachandran C. gicc->AcpiProcessorUid); 327*13aacaeeSJayachandran C. break; 3289417fa9eSJayachandran C. case ACPI_SRAT_TYPE_MEMORY_AFFINITY: 3299417fa9eSJayachandran C. mem = (ACPI_SRAT_MEM_AFFINITY *)entry; 3309417fa9eSJayachandran C. if (bootverbose) 3319417fa9eSJayachandran C. printf( 3329417fa9eSJayachandran C. "SRAT: Found memory domain %d addr 0x%jx len 0x%jx: %s\n", 3339417fa9eSJayachandran C. mem->ProximityDomain, (uintmax_t)mem->BaseAddress, 3349417fa9eSJayachandran C. (uintmax_t)mem->Length, 3359417fa9eSJayachandran C. (mem->Flags & ACPI_SRAT_MEM_ENABLED) ? 3369417fa9eSJayachandran C. "enabled" : "disabled"); 3379417fa9eSJayachandran C. if (!(mem->Flags & ACPI_SRAT_MEM_ENABLED)) 3389417fa9eSJayachandran C. break; 3399417fa9eSJayachandran C. if (mem->BaseAddress >= maxphyaddr || 3409417fa9eSJayachandran C. !overlaps_phys_avail(mem->BaseAddress, 3419417fa9eSJayachandran C. mem->BaseAddress + mem->Length)) { 3429417fa9eSJayachandran C. printf("SRAT: Ignoring memory at addr 0x%jx\n", 3439417fa9eSJayachandran C. (uintmax_t)mem->BaseAddress); 3449417fa9eSJayachandran C. break; 3459417fa9eSJayachandran C. } 3469417fa9eSJayachandran C. if (num_mem == VM_PHYSSEG_MAX) { 3479417fa9eSJayachandran C. printf("SRAT: Too many memory regions\n"); 3489417fa9eSJayachandran C. *(int *)arg = ENXIO; 3499417fa9eSJayachandran C. break; 3509417fa9eSJayachandran C. } 3519417fa9eSJayachandran C. slot = num_mem; 3529417fa9eSJayachandran C. for (i = 0; i < num_mem; i++) { 3539417fa9eSJayachandran C. if (mem_info[i].end <= mem->BaseAddress) 3549417fa9eSJayachandran C. continue; 3559417fa9eSJayachandran C. if (mem_info[i].start < 3569417fa9eSJayachandran C. (mem->BaseAddress + mem->Length)) { 3579417fa9eSJayachandran C. printf("SRAT: Overlapping memory entries\n"); 3589417fa9eSJayachandran C. *(int *)arg = ENXIO; 3599417fa9eSJayachandran C. return; 3609417fa9eSJayachandran C. } 3619417fa9eSJayachandran C. slot = i; 3629417fa9eSJayachandran C. } 3639417fa9eSJayachandran C. for (i = num_mem; i > slot; i--) 3649417fa9eSJayachandran C. mem_info[i] = mem_info[i - 1]; 3659417fa9eSJayachandran C. mem_info[slot].start = mem->BaseAddress; 3669417fa9eSJayachandran C. mem_info[slot].end = mem->BaseAddress + mem->Length; 3679417fa9eSJayachandran C. mem_info[slot].domain = mem->ProximityDomain; 3689417fa9eSJayachandran C. num_mem++; 3699417fa9eSJayachandran C. break; 3709417fa9eSJayachandran C. } 3719417fa9eSJayachandran C. } 3729417fa9eSJayachandran C. 3739417fa9eSJayachandran C. /* 3749417fa9eSJayachandran C. * Ensure each memory domain has at least one CPU and that each CPU 3759417fa9eSJayachandran C. * has at least one memory domain. 3769417fa9eSJayachandran C. */ 3779417fa9eSJayachandran C. static int 3789417fa9eSJayachandran C. check_domains(void) 3799417fa9eSJayachandran C. { 3809417fa9eSJayachandran C. int found, i, j; 3819417fa9eSJayachandran C. 3829417fa9eSJayachandran C. for (i = 0; i < num_mem; i++) { 3839417fa9eSJayachandran C. found = 0; 3849417fa9eSJayachandran C. for (j = 0; j <= last_cpu; j++) 3859417fa9eSJayachandran C. if (cpus[j].enabled && 3869417fa9eSJayachandran C. cpus[j].domain == mem_info[i].domain) { 3879417fa9eSJayachandran C. cpus[j].has_memory = 1; 3889417fa9eSJayachandran C. found++; 3899417fa9eSJayachandran C. } 3909417fa9eSJayachandran C. if (!found) { 3919417fa9eSJayachandran C. printf("SRAT: No CPU found for memory domain %d\n", 3929417fa9eSJayachandran C. mem_info[i].domain); 3939417fa9eSJayachandran C. return (ENXIO); 3949417fa9eSJayachandran C. } 3959417fa9eSJayachandran C. } 3969417fa9eSJayachandran C. for (i = 0; i <= last_cpu; i++) 3979417fa9eSJayachandran C. if (cpus[i].enabled && !cpus[i].has_memory) { 3989417fa9eSJayachandran C. found = 0; 3999417fa9eSJayachandran C. for (j = 0; j < num_mem && !found; j++) { 4009417fa9eSJayachandran C. if (mem_info[j].domain == cpus[i].domain) 4019417fa9eSJayachandran C. found = 1; 4029417fa9eSJayachandran C. } 4039417fa9eSJayachandran C. if (!found) { 4049417fa9eSJayachandran C. if (bootverbose) 4059417fa9eSJayachandran C. printf("SRAT: mem dom %d is empty\n", 4069417fa9eSJayachandran C. cpus[i].domain); 4079417fa9eSJayachandran C. mem_info[num_mem].start = 0; 4089417fa9eSJayachandran C. mem_info[num_mem].end = 0; 4099417fa9eSJayachandran C. mem_info[num_mem].domain = cpus[i].domain; 4109417fa9eSJayachandran C. num_mem++; 4119417fa9eSJayachandran C. } 4129417fa9eSJayachandran C. } 4139417fa9eSJayachandran C. return (0); 4149417fa9eSJayachandran C. } 4159417fa9eSJayachandran C. 4169417fa9eSJayachandran C. /* 4179417fa9eSJayachandran C. * Check that the SRAT memory regions cover all of the regions in 4189417fa9eSJayachandran C. * phys_avail[]. 4199417fa9eSJayachandran C. */ 4209417fa9eSJayachandran C. static int 4219417fa9eSJayachandran C. check_phys_avail(void) 4229417fa9eSJayachandran C. { 4239417fa9eSJayachandran C. vm_paddr_t address; 4249417fa9eSJayachandran C. int i, j; 4259417fa9eSJayachandran C. 4269417fa9eSJayachandran C. /* j is the current offset into phys_avail[]. */ 4279417fa9eSJayachandran C. address = phys_avail[0]; 4289417fa9eSJayachandran C. j = 0; 4299417fa9eSJayachandran C. for (i = 0; i < num_mem; i++) { 4309417fa9eSJayachandran C. /* 4319417fa9eSJayachandran C. * Consume as many phys_avail[] entries as fit in this 4329417fa9eSJayachandran C. * region. 4339417fa9eSJayachandran C. */ 4349417fa9eSJayachandran C. while (address >= mem_info[i].start && 4359417fa9eSJayachandran C. address <= mem_info[i].end) { 4369417fa9eSJayachandran C. /* 4379417fa9eSJayachandran C. * If we cover the rest of this phys_avail[] entry, 4389417fa9eSJayachandran C. * advance to the next entry. 4399417fa9eSJayachandran C. */ 4409417fa9eSJayachandran C. if (phys_avail[j + 1] <= mem_info[i].end) { 4419417fa9eSJayachandran C. j += 2; 4429417fa9eSJayachandran C. if (phys_avail[j] == 0 && 4439417fa9eSJayachandran C. phys_avail[j + 1] == 0) { 4449417fa9eSJayachandran C. return (0); 4459417fa9eSJayachandran C. } 4469417fa9eSJayachandran C. address = phys_avail[j]; 4479417fa9eSJayachandran C. } else 4489417fa9eSJayachandran C. address = mem_info[i].end + 1; 4499417fa9eSJayachandran C. } 4509417fa9eSJayachandran C. } 4519417fa9eSJayachandran C. printf("SRAT: No memory region found for 0x%jx - 0x%jx\n", 4529417fa9eSJayachandran C. (uintmax_t)phys_avail[j], (uintmax_t)phys_avail[j + 1]); 4539417fa9eSJayachandran C. return (ENXIO); 4549417fa9eSJayachandran C. } 4559417fa9eSJayachandran C. 4569417fa9eSJayachandran C. /* 4579417fa9eSJayachandran C. * Renumber the memory domains to be compact and zero-based if not 4589417fa9eSJayachandran C. * already. Returns an error if there are too many domains. 4599417fa9eSJayachandran C. */ 4609417fa9eSJayachandran C. static int 4619417fa9eSJayachandran C. renumber_domains(void) 4629417fa9eSJayachandran C. { 4639417fa9eSJayachandran C. int i, j, slot; 4649417fa9eSJayachandran C. 4659417fa9eSJayachandran C. /* Enumerate all the domains. */ 4669417fa9eSJayachandran C. ndomain = 0; 4679417fa9eSJayachandran C. for (i = 0; i < num_mem; i++) { 4689417fa9eSJayachandran C. /* See if this domain is already known. */ 4699417fa9eSJayachandran C. for (j = 0; j < ndomain; j++) { 4709417fa9eSJayachandran C. if (domain_pxm[j] >= mem_info[i].domain) 4719417fa9eSJayachandran C. break; 4729417fa9eSJayachandran C. } 4739417fa9eSJayachandran C. if (j < ndomain && domain_pxm[j] == mem_info[i].domain) 4749417fa9eSJayachandran C. continue; 4759417fa9eSJayachandran C. 4769417fa9eSJayachandran C. if (ndomain >= MAXMEMDOM) { 4779417fa9eSJayachandran C. ndomain = 1; 4789417fa9eSJayachandran C. printf("SRAT: Too many memory domains\n"); 4799417fa9eSJayachandran C. return (EFBIG); 4809417fa9eSJayachandran C. } 4819417fa9eSJayachandran C. 4829417fa9eSJayachandran C. /* Insert the new domain at slot 'j'. */ 4839417fa9eSJayachandran C. slot = j; 4849417fa9eSJayachandran C. for (j = ndomain; j > slot; j--) 4859417fa9eSJayachandran C. domain_pxm[j] = domain_pxm[j - 1]; 4869417fa9eSJayachandran C. domain_pxm[slot] = mem_info[i].domain; 4879417fa9eSJayachandran C. ndomain++; 4889417fa9eSJayachandran C. } 4899417fa9eSJayachandran C. 4909417fa9eSJayachandran C. /* Renumber each domain to its index in the sorted 'domain_pxm' list. */ 4919417fa9eSJayachandran C. for (i = 0; i < ndomain; i++) { 4929417fa9eSJayachandran C. /* 4939417fa9eSJayachandran C. * If the domain is already the right value, no need 4949417fa9eSJayachandran C. * to renumber. 4959417fa9eSJayachandran C. */ 4969417fa9eSJayachandran C. if (domain_pxm[i] == i) 4979417fa9eSJayachandran C. continue; 4989417fa9eSJayachandran C. 4999417fa9eSJayachandran C. /* Walk the cpu[] and mem_info[] arrays to renumber. */ 5009417fa9eSJayachandran C. for (j = 0; j < num_mem; j++) 5019417fa9eSJayachandran C. if (mem_info[j].domain == domain_pxm[i]) 5029417fa9eSJayachandran C. mem_info[j].domain = i; 5039417fa9eSJayachandran C. for (j = 0; j <= last_cpu; j++) 5049417fa9eSJayachandran C. if (cpus[j].enabled && cpus[j].domain == domain_pxm[i]) 5059417fa9eSJayachandran C. cpus[j].domain = i; 5069417fa9eSJayachandran C. } 5079417fa9eSJayachandran C. 5089417fa9eSJayachandran C. return (0); 5099417fa9eSJayachandran C. } 5109417fa9eSJayachandran C. 5119417fa9eSJayachandran C. /* 5129417fa9eSJayachandran C. * Look for an ACPI System Resource Affinity Table ("SRAT"), 5139417fa9eSJayachandran C. * allocate space for cpu information, and initialize globals. 5149417fa9eSJayachandran C. */ 5159417fa9eSJayachandran C. int 5169417fa9eSJayachandran C. acpi_pxm_init(int ncpus, vm_paddr_t maxphys) 5179417fa9eSJayachandran C. { 5189417fa9eSJayachandran C. unsigned int idx, size; 5199417fa9eSJayachandran C. vm_paddr_t addr; 5209417fa9eSJayachandran C. 5219417fa9eSJayachandran C. if (resource_disabled("srat", 0)) 5229417fa9eSJayachandran C. return (-1); 5239417fa9eSJayachandran C. 5249417fa9eSJayachandran C. max_cpus = ncpus; 5259417fa9eSJayachandran C. last_cpu = -1; 5269417fa9eSJayachandran C. maxphyaddr = maxphys; 5279417fa9eSJayachandran C. srat_physaddr = acpi_find_table(ACPI_SIG_SRAT); 5289417fa9eSJayachandran C. if (srat_physaddr == 0) 5299417fa9eSJayachandran C. return (-1); 5309417fa9eSJayachandran C. 5319417fa9eSJayachandran C. /* 5329417fa9eSJayachandran C. * Allocate data structure: 5339417fa9eSJayachandran C. * 5349417fa9eSJayachandran C. * Find the last physical memory region and steal some memory from 5359417fa9eSJayachandran C. * it. This is done because at this point in the boot process 5369417fa9eSJayachandran C. * malloc is still not usable. 5379417fa9eSJayachandran C. */ 5389417fa9eSJayachandran C. for (idx = 0; phys_avail[idx + 1] != 0; idx += 2); 5399417fa9eSJayachandran C. KASSERT(idx != 0, ("phys_avail is empty!")); 5409417fa9eSJayachandran C. idx -= 2; 5419417fa9eSJayachandran C. 5429417fa9eSJayachandran C. size = sizeof(*cpus) * max_cpus; 5439417fa9eSJayachandran C. addr = trunc_page(phys_avail[idx + 1] - size); 5449417fa9eSJayachandran C. KASSERT(addr >= phys_avail[idx], 5459417fa9eSJayachandran C. ("Not enough memory for SRAT table items")); 5469417fa9eSJayachandran C. phys_avail[idx + 1] = addr - 1; 5479417fa9eSJayachandran C. 5489417fa9eSJayachandran C. /* 5499417fa9eSJayachandran C. * We cannot rely on PHYS_TO_DMAP because this code is also used in 5509417fa9eSJayachandran C. * i386, so use pmap_mapbios to map the memory, this will end up using 5519417fa9eSJayachandran C. * the default memory attribute (WB), and the DMAP when available. 5529417fa9eSJayachandran C. */ 5539417fa9eSJayachandran C. cpus = (struct cpu_info *)pmap_mapbios(addr, size); 5549417fa9eSJayachandran C. bzero(cpus, size); 5559417fa9eSJayachandran C. return (0); 5569417fa9eSJayachandran C. } 5579417fa9eSJayachandran C. 5589417fa9eSJayachandran C. static int 5599417fa9eSJayachandran C. parse_srat(void) 5609417fa9eSJayachandran C. { 5619417fa9eSJayachandran C. int error; 5629417fa9eSJayachandran C. 5639417fa9eSJayachandran C. /* 5649417fa9eSJayachandran C. * Make a pass over the table to populate the cpus[] and 5659417fa9eSJayachandran C. * mem_info[] tables. 5669417fa9eSJayachandran C. */ 5679417fa9eSJayachandran C. srat = acpi_map_table(srat_physaddr, ACPI_SIG_SRAT); 5689417fa9eSJayachandran C. error = 0; 5699417fa9eSJayachandran C. srat_walk_table(srat_parse_entry, &error); 5709417fa9eSJayachandran C. acpi_unmap_table(srat); 5719417fa9eSJayachandran C. srat = NULL; 5729417fa9eSJayachandran C. if (error || check_domains() != 0 || check_phys_avail() != 0 || 5739417fa9eSJayachandran C. renumber_domains() != 0) { 5749417fa9eSJayachandran C. srat_physaddr = 0; 5759417fa9eSJayachandran C. return (-1); 5769417fa9eSJayachandran C. } 5779417fa9eSJayachandran C. 5789417fa9eSJayachandran C. return (0); 5799417fa9eSJayachandran C. } 5809417fa9eSJayachandran C. 5819417fa9eSJayachandran C. static void 5829417fa9eSJayachandran C. init_mem_locality(void) 5839417fa9eSJayachandran C. { 5849417fa9eSJayachandran C. int i; 5859417fa9eSJayachandran C. 5869417fa9eSJayachandran C. /* 5879417fa9eSJayachandran C. * For now, assume -1 == "no locality information for 5889417fa9eSJayachandran C. * this pairing. 5899417fa9eSJayachandran C. */ 5909417fa9eSJayachandran C. for (i = 0; i < MAXMEMDOM * MAXMEMDOM; i++) 5919417fa9eSJayachandran C. vm_locality_table[i] = -1; 5929417fa9eSJayachandran C. } 5939417fa9eSJayachandran C. 5949417fa9eSJayachandran C. /* 5959417fa9eSJayachandran C. * Parse SRAT and SLIT to save proximity info. Don't do 5969417fa9eSJayachandran C. * anything if SRAT is not available. 5979417fa9eSJayachandran C. */ 5989417fa9eSJayachandran C. void 5999417fa9eSJayachandran C. acpi_pxm_parse_tables(void) 6009417fa9eSJayachandran C. { 6019417fa9eSJayachandran C. 6029417fa9eSJayachandran C. if (srat_physaddr == 0) 6039417fa9eSJayachandran C. return; 6049417fa9eSJayachandran C. if (parse_srat() < 0) 6059417fa9eSJayachandran C. return; 6069417fa9eSJayachandran C. init_mem_locality(); 6079417fa9eSJayachandran C. (void)parse_slit(); 6089417fa9eSJayachandran C. } 6099417fa9eSJayachandran C. 6109417fa9eSJayachandran C. /* 6119417fa9eSJayachandran C. * Use saved data from SRAT/SLIT to update memory locality. 6129417fa9eSJayachandran C. */ 6139417fa9eSJayachandran C. void 6149417fa9eSJayachandran C. acpi_pxm_set_mem_locality(void) 6159417fa9eSJayachandran C. { 6169417fa9eSJayachandran C. 6179417fa9eSJayachandran C. if (srat_physaddr == 0) 6189417fa9eSJayachandran C. return; 6199417fa9eSJayachandran C. vm_phys_register_domains(ndomain, mem_info, vm_locality_table); 6209417fa9eSJayachandran C. } 6219417fa9eSJayachandran C. 6229417fa9eSJayachandran C. static void 6239417fa9eSJayachandran C. srat_walk_table(acpi_subtable_handler *handler, void *arg) 6249417fa9eSJayachandran C. { 6259417fa9eSJayachandran C. 6269417fa9eSJayachandran C. acpi_walk_subtables(srat + 1, (char *)srat + srat->Header.Length, 6279417fa9eSJayachandran C. handler, arg); 6289417fa9eSJayachandran C. } 6299417fa9eSJayachandran C. 6309417fa9eSJayachandran C. /* 6319417fa9eSJayachandran C. * Setup per-CPU domain IDs from information saved in 'cpus'. 6329417fa9eSJayachandran C. */ 6339417fa9eSJayachandran C. void 6349417fa9eSJayachandran C. acpi_pxm_set_cpu_locality(void) 6359417fa9eSJayachandran C. { 6369417fa9eSJayachandran C. struct cpu_info *cpu; 6379417fa9eSJayachandran C. struct pcpu *pc; 6389417fa9eSJayachandran C. u_int i; 6399417fa9eSJayachandran C. 6409417fa9eSJayachandran C. if (srat_physaddr == 0) 6419417fa9eSJayachandran C. return; 6429417fa9eSJayachandran C. for (i = 0; i < MAXCPU; i++) { 6439417fa9eSJayachandran C. if (CPU_ABSENT(i)) 6449417fa9eSJayachandran C. continue; 6459417fa9eSJayachandran C. pc = pcpu_find(i); 6469417fa9eSJayachandran C. KASSERT(pc != NULL, ("no pcpu data for CPU %u", i)); 6479417fa9eSJayachandran C. cpu = cpu_get_info(pc); 6489417fa9eSJayachandran C. pc->pc_domain = vm_ndomains > 1 ? cpu->domain : 0; 6499417fa9eSJayachandran C. CPU_SET(i, &cpuset_domain[pc->pc_domain]); 6509417fa9eSJayachandran C. if (bootverbose) 6519417fa9eSJayachandran C. printf("SRAT: CPU %u has memory domain %d\n", i, 6529417fa9eSJayachandran C. pc->pc_domain); 6539417fa9eSJayachandran C. } 6549417fa9eSJayachandran C. } 6559417fa9eSJayachandran C. 6569417fa9eSJayachandran C. /* 6579417fa9eSJayachandran C. * Free data structures allocated during acpi_pxm_init. 6589417fa9eSJayachandran C. */ 6599417fa9eSJayachandran C. void 6609417fa9eSJayachandran C. acpi_pxm_free(void) 6619417fa9eSJayachandran C. { 6629417fa9eSJayachandran C. 6639417fa9eSJayachandran C. if (srat_physaddr == 0) 6649417fa9eSJayachandran C. return; 6659417fa9eSJayachandran C. pmap_unmapbios((vm_offset_t)cpus, sizeof(*cpus) * max_cpus); 6669417fa9eSJayachandran C. srat_physaddr = 0; 6679417fa9eSJayachandran C. cpus = NULL; 6689417fa9eSJayachandran C. } 6699417fa9eSJayachandran C. 6709417fa9eSJayachandran C. /* 6719417fa9eSJayachandran C. * Map a _PXM value to a VM domain ID. 6729417fa9eSJayachandran C. * 6739417fa9eSJayachandran C. * Returns the domain ID, or -1 if no domain ID was found. 6749417fa9eSJayachandran C. */ 6759417fa9eSJayachandran C. int 6769417fa9eSJayachandran C. acpi_map_pxm_to_vm_domainid(int pxm) 6779417fa9eSJayachandran C. { 6789417fa9eSJayachandran C. int i; 6799417fa9eSJayachandran C. 6809417fa9eSJayachandran C. for (i = 0; i < ndomain; i++) { 6819417fa9eSJayachandran C. if (domain_pxm[i] == pxm) 6829417fa9eSJayachandran C. return (vm_ndomains > 1 ? i : 0); 6839417fa9eSJayachandran C. } 6849417fa9eSJayachandran C. 6859417fa9eSJayachandran C. return (-1); 6869417fa9eSJayachandran C. } 6879417fa9eSJayachandran C. 6889417fa9eSJayachandran C. #else /* MAXMEMDOM == 1 */ 6899417fa9eSJayachandran C. 6909417fa9eSJayachandran C. int 6919417fa9eSJayachandran C. acpi_map_pxm_to_vm_domainid(int pxm) 6929417fa9eSJayachandran C. { 6939417fa9eSJayachandran C. 6949417fa9eSJayachandran C. return (-1); 6959417fa9eSJayachandran C. } 6969417fa9eSJayachandran C. 6979417fa9eSJayachandran C. #endif /* MAXMEMDOM > 1 */ 698